From 8d57af540b12dc864d49dc45ebd8dc449b10e41b Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 10:42:46 +0200
Subject: [PATCH 01/15] Add Dynamic Mode Decomposition functions
 (Reference-LAPACK PR 736)

---
 lapack-netlib/SRC/cgedmd.f90  |  995 +++++++++++++++++++++++++++++++
 lapack-netlib/SRC/cgedmdq.f90 |  689 +++++++++++++++++++++
 lapack-netlib/SRC/dgedmd.f90  | 1054 +++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/dgedmdq.f90 |  704 ++++++++++++++++++++++
 lapack-netlib/SRC/sgedmd.f90  | 1054 +++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/sgedmdq.f90 |  703 ++++++++++++++++++++++
 lapack-netlib/SRC/zgedmd.f90  |  996 +++++++++++++++++++++++++++++++
 lapack-netlib/SRC/zgedmdq.f90 |  689 +++++++++++++++++++++
 8 files changed, 6884 insertions(+)
 create mode 100644 lapack-netlib/SRC/cgedmd.f90
 create mode 100644 lapack-netlib/SRC/cgedmdq.f90
 create mode 100644 lapack-netlib/SRC/dgedmd.f90
 create mode 100644 lapack-netlib/SRC/dgedmdq.f90
 create mode 100644 lapack-netlib/SRC/sgedmd.f90
 create mode 100644 lapack-netlib/SRC/sgedmdq.f90
 create mode 100644 lapack-netlib/SRC/zgedmd.f90
 create mode 100644 lapack-netlib/SRC/zgedmdq.f90

diff --git a/lapack-netlib/SRC/cgedmd.f90 b/lapack-netlib/SRC/cgedmd.f90
new file mode 100644
index 000000000..499489270
--- /dev/null
+++ b/lapack-netlib/SRC/cgedmd.f90
@@ -0,0 +1,995 @@
+      SUBROUTINE CGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
+                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
+                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
+                         RWORK, LRWORK, IWORK, LIWORK, INFO )
+!   March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32
+!.....
+!     Scalar arguments
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LIWORK, LRWORK, LZWORK
+      INTEGER,       INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)   ::    TOL
+!     Array arguments
+      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                         W(LDW,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
+      INTEGER,          INTENT(OUT)   :: IWORK(*)
+!............................................................
+!     Purpose
+!     =======
+!     CGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, CGEDMD computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular
+!     vectors of X. Optionally, CGEDMD returns the residuals
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!......................................................................
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations
+!     expressed in this material are those of the author and
+!     do not necessarily reflect the views of the DARPA SBIR
+!     Program Office
+!============================================================
+!     Distribution Statement A:
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================
+!......................................................................
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix.
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'N' :: No data scaling.
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product X(:,1:K)*W, where X
+!            contains a POD basis (leading left singular vectors
+!            of the data matrix X) and W contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of K, X, W, Z.
+!     'N' :: The eigenvectors are not computed.
+!.....
+!     JOBR (input) CHARACTER*1
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will be
+!            computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: CGESVD (the QR SVD algorithm)
+!     2 :: CGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M>= 0
+!     The state space dimension (the row dimension of X, Y).
+!.....
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshot pairs
+!     (the number of columns of X and Y).
+!.....
+!     X (input/output) COMPLEX(KIND=WP) M-by-N array
+!   > On entry, X contains the data snapshot matrix X. It is
+!     assumed that the column norms of X are in the range of
+!     the normalized floating point numbers.
+!   < On exit, the leading K columns of X contain a POD basis,
+!     i.e. the leading K left singular vectors of the input
+!     data matrix X, U(:,1:K). All N columns of X contain all
+!     left singular vectors of the input matrix X.
+!     See the descriptions of K, Z and W.
+!.....
+!     LDX (input) INTEGER, LDX >= M
+!     The leading dimension of the array X.
+!.....
+!     Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
+!   > On entry, Y contains the data snapshot matrix Y
+!   < On exit,
+!     If JOBR == 'R', the leading K columns of Y  contain
+!     the residual vectors for the computed Ritz pairs.
+!     See the description of RES.
+!     If JOBR == 'N', Y contains the original input data,
+!                     scaled according to the value of JOBS.
+!.....
+!     LDY (input) INTEGER , LDY >= M
+!     The leading dimension of the array Y.
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.
+!     The numerical rank can be enforced by using positive
+!     value of NRNK as follows:
+!     0 < NRNK <= N :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the descriptions of TOL and  K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.
+!.....
+!     K (output) INTEGER,  0 <= K <= N
+!     The dimension of the POD basis for the data snapshot
+!     matrix X and the number of the computed Ritz pairs.
+!     The value of K is determined according to the rule set
+!     by the parameters NRNK and TOL.
+!     See the descriptions of NRNK and TOL.
+!.....
+!     EIGS (output) COMPLEX(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of EIGS contain
+!     the computed eigenvalues (Ritz values).
+!     See the descriptions of K, and Z.
+!.....
+!     Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
+!     If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
+!     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!     the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
+!     is an eigenvector corresponding to EIGS(i). The columns
+!     of W(1:k,1:K) are the computed eigenvectors of the
+!     K-by-K Rayleigh quotient.
+!     See the descriptions of EIGS, X and W.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) N-by-1 array
+!     RES(1:K) contains the residuals for the K computed
+!     Ritz pairs,
+!     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!     See the description of EIGS and Z.
+!.....
+!     B (output) COMPLEX(KIND=WP)  M-by-N array.
+!     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further
+!     details in the provided references.
+!     If JOBF == 'E', B(1:M,1:K) contains
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.
+!     If JOBF =='N', then B is not referenced.
+!     See the descriptions of X, W, K.
+!.....
+!     LDB (input) INTEGER, LDB >= M
+!     The leading dimension of the array B.
+!.....
+!     W (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!     On exit, W(1:K,1:K) contains the K computed
+!     eigenvectors of the matrix Rayleigh quotient.
+!     The Ritz vectors (returned in Z) are the
+!     product of X (containing a POD basis for the input
+!     matrix X) and W. See the descriptions of K, S, X and Z.
+!     W is also used as a workspace to temporarily store the
+!     right singular vectors of X.
+!.....
+!     LDW (input) INTEGER, LDW >= N
+!     The leading dimension of the array W.
+!.....
+!     S (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by CGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N
+!     The leading dimension of the array S.
+!.....
+!     ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
+!     ZWORK is used as complex workspace in the complex SVD, as
+!     specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing
+!     the eigenvalues of a Rayleigh quotient.
+!     If the call to CGEDMD is only workspace query, then
+!     ZWORK(1) contains the minimal complex workspace length and
+!     ZWORK(2) is the optimal complex workspace length.
+!     Hence, the length of work is at least 2.
+!     See the description of LZWORK.
+!.....
+!     LZWORK (input) INTEGER
+!     The minimal length of the workspace vector ZWORK.
+!     LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CGEEV),
+!     where LZWORK_CGEEV = MAX( 1, 2*N )  and the minimal
+!     LZWORK_SVD is calculated as follows
+!     If WHTSVD == 1 :: CGESVD ::
+!        LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+!     If WHTSVD == 2 :: CGESDD ::
+!        LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+!     If WHTSVD == 3 :: CGESVDQ ::
+!        LZWORK_SVD = obtainable by a query
+!     If WHTSVD == 4 :: CGEJSV ::
+!        LZWORK_SVD = obtainable by a query
+!     If on entry LZWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths and returns them in
+!     LZWORK(1) and LZWORK(2), respectively.
+!.....
+!     RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
+!     On exit, RWORK(1:N) contains the singular values of
+!     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!     If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
+!     scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
+!     and Y to avoid overflow in the SVD of X.
+!     This may be of interest if the scaling option is off
+!     and as many as possible smallest eigenvalues are
+!     desired to the highest feasible accuracy.
+!     If the call to CGEDMD is only workspace query, then
+!     RWORK(1) contains the minimal workspace length.
+!     See the description of LRWORK.
+!.....
+!     LRWORK (input) INTEGER
+!     The minimal length of the workspace vector RWORK.
+!     LRWORK is calculated as follows:
+!     LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CGEEV), where
+!     LRWORK_CGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
+!     for the SVD subroutine determined by the input parameter
+!     WHTSVD.
+!     If WHTSVD == 1 :: CGESVD ::
+!        LRWORK_SVD = 5*MIN(M,N)
+!     If WHTSVD == 2 :: CGESDD ::
+!        LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
+!        2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+!     If WHTSVD == 3 :: CGESVDQ ::
+!        LRWORK_SVD = obtainable by a query
+!     If WHTSVD == 4 :: CGEJSV ::
+!        LRWORK_SVD = obtainable by a query
+!     If on entry LRWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     real workspace length and returns it in RWORK(1).
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for  ZWORK, RWORK and
+!     IWORK. See the descriptions of ZWORK, RWORK and IWORK.
+!.....
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,   &
+                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK, MLRWRK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+!
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: RDUMMY(2)
+
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) CLANGE, SLAMCH, SCNRM2
+      EXTERNAL      CLANGE, SLAMCH, SCNRM2, ICAMAX
+      INTEGER                               ICAMAX
+      LOGICAL       SISNAN, LSAME
+      EXTERNAL      SISNAN, LSAME
+
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      CAXPY,  CGEMM,  CSSCAL
+      EXTERNAL      CGEEV,  CGEJSV, CGESDD, CGESVD, CGESVDQ, &
+                    CLACPY, CLASCL, CLASSQ, XERBLA
+
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     FLOAT, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
+                                  .OR. ( LRWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -17
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -20
+      ELSE IF ( LDW < N ) THEN
+          INFO = -22
+      ELSE IF ( LDS < N ) THEN
+          INFO = -24
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                RWORK(1) = 1
+                ZWORK(1) = 2
+                ZWORK(2) = 2
+            ELSE
+               K   =  0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+
+         IMINWR = 1
+         MLRWRK = MAX(1,N)
+         MLWORK = 2
+         OLWORK = 2
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of CGESVD:
+             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MLWORK = MAX(MLWORK,MWRSVD)
+             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
+             IF ( LQUERY ) THEN
+                CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
+                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
+                LWRSVD = INT( ZWORK(1) )
+                OLWORK = MAX(OLWORK,LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of CGESDD:
+             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
+             ! In LAPACK 3.10.1 RWORK is defined differently.
+             ! Below we take max over the two versions.
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+             MLWORK = MAX(MLWORK,MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             MLRWRK = MAX( MLRWRK,  N +                    &
+                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
+                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
+                           2*MAX(M,N)*MIN(M,N)+            &
+                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+             IF ( LQUERY ) THEN
+                CALL CGESDD( 'O', M, N, X, LDX, RWORK, B,     &
+                     LDB, W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
+                LWRSDD = MAX(MWRSDD,INT( ZWORK(1) ))
+                OLWORK = MAX(OLWORK,LWRSDD)
+             END IF
+         CASE (3)
+             CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
+                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVQ)
+             MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVQ)
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
+                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVJ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVJ)
+             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
+             IF ( LQUERY ) THEN
+                LWRSVJ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the CGEEV call
+         MWRKEV = MAX( 1, 2*N )
+         MLWORK = MAX(MLWORK,MWRKEV)
+         MLRWRK = MAX(MLRWRK,N+2*N)
+         IF ( LQUERY ) THEN
+             CALL CGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
+              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 ) ! LAPACK CALL
+                LWRKEV = INT(ZWORK(1))
+                OLWORK = MAX( OLWORK, LWRKEV )
+                OLWORK = MAX( 2, OLWORK )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
+         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26
+
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'CGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          RWORK(1) = MLRWRK
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = SLAMCH('O')*SLAMCH('P')
+      SMALL = SLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using CLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = SCNRM2( M, X(1,i), 1 )
+            SCALE  = ZERO
+            CALL CLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('CGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), LDX, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               X(1,i), LDX, INFO2 )             ! LAPACK CALL
+!                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('CGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL CSSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL
+!               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/FLOAT(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(Y(ICAMAX(M, Y(1,i),1),i ))  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL CSSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using CLASSQ.
+          DO i = 1, N
+            !RWORK(i) = SCNRM2( M, Y(1,i), 1 )
+            SCALE  = ZERO
+            CALL CLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('CGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), LDY, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 Y(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               Y(1,i), LDY, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL CSSCAL( M, ONE/RWORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/FLOAT(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(X(ICAMAX(M, X(1,i),1),i ))  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
+                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
+             T_OR_N = 'C'
+         CASE (2)
+            CALL CGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
+                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'C'
+         CASE (3)
+              CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, ZWORK,     &
+                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
+              CALL CLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'C'
+         CASE (4)
+              CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL CLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = RWORK(N+1)
+              XSCL2 = RWORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case CGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL CLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( RWORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('CGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( RWORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^H * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^H is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL CSSCAL( N, ONE/RWORK(i), W(1,i), 1 )   ! BLAS CALL
+           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that CGESVD, CGESVDQ and CGESDD return the
+          ! adjoint matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW )  ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              RWORK(N+i) = ONE/RWORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside CGEDMD).
+          CALL CGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
+                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL CGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
+                      LDZ, ZZERO, S, LDS )                       ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^H * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL CGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
+                   ZZERO, Z, LDZ )                                  ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        !
+        CALL CGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
+                    LDW, ZZERO, S, LDS )                        ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^H * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^H is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL CLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL CLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL CGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, &
+           LDW, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient.  See the description of Z.
+      ! Also, see the description of CGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! CGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+          IF ( WNTRES ) THEN
+              IF ( WNTREF ) THEN
+                ! Here, if the refinement is requested, we have
+                ! A*U(:,1:K) already computed and stored in Z.
+                ! For the residuals, need Y = A * U(:,1;K) * W.
+                CALL CGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
+                           LDW, ZZERO, Y, LDY )              ! BLAS CALL
+                ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+                ! This frees Z; Y contains A * U(:,1:K) * W.
+              ELSE
+                ! Compute S = V_k * Sigma_k^(-1) * W, where
+                ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
+                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                           W, LDW, ZZERO, S, LDS)
+                ! Then, compute Z = Y * S =
+                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+                ! = A * U(:,1:K) * W(1:K,1:K)
+                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                           LDS, ZZERO, Z, LDZ)
+                ! Save a copy of Z into Y and free Z for holding
+                ! the Ritz vectors.
+                CALL CLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+                IF ( WNTEX ) CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
+              END IF
+          ELSE IF ( WNTEX ) THEN
+              ! Compute S = V_k * Sigma_k^(-1) * W, where
+                ! V_k * Sigma_k^(-1) is stored in Z
+                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                           W, LDW, ZZERO, S, LDS)
+                ! Then, compute Z = Y * S =
+                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+                ! = A * U(:,1:K) * W(1:K,1:K)
+                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                           LDS, ZZERO, B, LDB)
+                ! The above call replaces the following two calls
+                ! that were used in the developing-testing phase.
+                ! CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                !           LDS, ZZERO, Z, LDZ)
+                ! Save a copy of Z into Y and free Z for holding
+                ! the Ritz vectors.
+                ! CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+!
+          ! Compute the Ritz vectors
+          IF ( WNTVEC ) CALL CGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
+                       ZZERO, Z, LDZ )                          ! BLAS CALL
+          ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+          IF ( WNTRES ) THEN
+             DO i = 1, K
+                CALL CAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = SCNRM2( M, Y(1,i), 1)                        ! BLAS CALL
+             END DO
+          END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          RWORK(N+1) = XSCL1
+          RWORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE CGEDMD
+
diff --git a/lapack-netlib/SRC/cgedmdq.f90 b/lapack-netlib/SRC/cgedmdq.f90
new file mode 100644
index 000000000..52c1669c7
--- /dev/null
+++ b/lapack-netlib/SRC/cgedmdq.f90
@@ -0,0 +1,689 @@
+SUBROUTINE CGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  EIGS,         &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
+                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
+                    IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE 
+      INTEGER, PARAMETER :: WP = real32
+!.....      
+!     Scalar arguments       
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LZWORK,  LWORK, LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K      
+      REAL(KIND=WP), INTENT(IN)    ::   TOL     
+!     Array arguments      
+      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
+                                         Z(LDZ,*), B(LDB,*), &
+                                         V(LDV,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!.....      
+!     Purpose  
+!     =======
+!     CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices, using a QR factorization
+!     based compression of the data. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, CGEDMDQ computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular 
+!     vectors of X. Optionally, CGEDMDQ returns the residuals 
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].      
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.      
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations 
+!     expressed in this material are those of the author and 
+!     do not necessarily reflect the views of the DARPA SBIR 
+!     Program Office.      
+!============================================================
+!     Distribution Statement A: 
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022      
+!============================================================      
+!......................................................................      
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix. The data snapshots are the columns
+!     of F. The leading N-1 columns of F are denoted X and the
+!     trailing N-1 columns are denoted Y. 
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)    
+!     'N' :: No data scaling.   
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Z*V, where Z
+!            is orthonormal and V contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of F, V, Z.
+!     'Q' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Q*Z, where Z
+!            contains the eigenvectors of the compression of the
+!            underlying discretised operator onto the span of
+!            the data snapshots. See the descriptions of F, V, Z.   
+!            Q is from the inital QR facorization.    
+!     'N' :: The eigenvectors are not computed.  
+!.....      
+!     JOBR (input) CHARACTER*1 
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will
+!            be computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBQ (input) CHARACTER*1 
+!     Specifies whether to explicitly compute and return the
+!     unitary matrix from the QR factorization.
+!     'Q' :: The matrix Q of the QR factorization of the data
+!            snapshot matrix is computed and stored in the
+!            array F. See the description of F.       
+!     'N' :: The matrix Q is not explicitly computed.
+!.....
+!     JOBT (input) CHARACTER*1 
+!     Specifies whether to return the upper triangular factor
+!     from the QR factorization.
+!     'R' :: The matrix R of the QR factorization of the data 
+!            snapshot matrix F is returned in the array Y.
+!            See the description of Y and Further details.       
+!     'N' :: The matrix R is not returned. 
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are 
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.   
+!     To be useful on exit, this option needs JOBQ='Q'.    
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: CGESVD (the QR SVD algorithm)
+!     2 :: CGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger 
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M >= 0 
+!     The state space dimension (the number of rows of F).
+!.....      
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshots from a single trajectory,
+!     taken at equidistant discrete times. This is the 
+!     number of columns of F.
+!.....
+!     F (input/output) COMPLEX(KIND=WP) M-by-N array
+!     > On entry,
+!     the columns of F are the sequence of data snapshots 
+!     from a single trajectory, taken at equidistant discrete
+!     times. It is assumed that the column norms of F are 
+!     in the range of the normalized floating point numbers. 
+!     < On exit,
+!     If JOBQ == 'Q', the array F contains the orthogonal 
+!     matrix/factor of the QR factorization of the initial 
+!     data snapshots matrix F. See the description of JOBQ. 
+!     If JOBQ == 'N', the entries in F strictly below the main
+!     diagonal contain, column-wise, the information on the 
+!     Householder vectors, as returned by CGEQRF. The 
+!     remaining information to restore the orthogonal matrix
+!     of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). 
+!     See the description of ZWORK.
+!.....
+!     LDF (input) INTEGER, LDF >= M 
+!     The leading dimension of the array F.
+!.....
+!     X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
+!     X is used as workspace to hold representations of the
+!     leading N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, the leading K columns of X contain the leading
+!     K left singular vectors of the above described content
+!     of X. To lift them to the space of the left singular
+!     vectors U(:,1:K) of the input data, pre-multiply with the 
+!     Q factor from the initial QR factorization. 
+!     See the descriptions of F, K, V  and Z.
+!.....      
+!     LDX (input) INTEGER, LDX >= N  
+!     The leading dimension of the array X. 
+!.....
+!     Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
+!     Y is used as workspace to hold representations of the
+!     trailing N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, 
+!     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!     triangular factor from the QR factorization of the data
+!     snapshot matrix F.
+!.....      
+!     LDY (input) INTEGER , LDY >= N
+!     The leading dimension of the array Y.   
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.      
+!     The numerical rank can be enforced by using positive 
+!     value of NRNK as follows: 
+!     0 < NRNK <= N-1 :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the description of K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.  
+!.....
+!     K (output) INTEGER,  0 <= K <= N 
+!     The dimension of the SVD/POD basis for the leading N-1
+!     data snapshots (columns of F) and the number of the 
+!     computed Ritz pairs. The value of K is determined
+!     according to the rule set by the parameters NRNK and 
+!     TOL. See the descriptions of NRNK and TOL. 
+!.....
+!     EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<=N-1) entries of EIGS contain
+!     the computed eigenvalues (Ritz values).
+!     See the descriptions of K, and Z.
+!.....
+!     Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
+!     If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
+!     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!     Z*V, where Z contains orthonormal matrix (the product of
+!     Q from the initial QR factorization and the SVD/POD_basis
+!     returned by CGEDMD in X) and the second factor (the 
+!     eigenvectors of the Rayleigh quotient) is in the array V, 
+!     as returned by CGEDMD. That is,  X(:,1:K)*V(:,i)
+!     is an eigenvector corresponding to EIGS(i). The columns 
+!     of V(1:K,1:K) are the computed eigenvectors of the 
+!     K-by-K Rayleigh quotient.  
+!     See the descriptions of EIGS, X and V.      
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!     RES(1:K) contains the residuals for the K computed 
+!     Ritz pairs, 
+!     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!     See the description of EIGS and Z.      
+!.....
+!     B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further 
+!     details in the provided references. 
+!     If JOBF == 'E', B(1:N,1;K) contains 
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.   
+!     In both cases, the content of B can be lifted to the 
+!     original dimension of the input data by pre-multiplying
+!     with the Q factor from the initial QR factorization. 
+!     Here A denotes a compression of the underlying operator.      
+!     See the descriptions of F and X.
+!     If JOBF =='N', then B is not referenced.
+!.....
+!     LDB (input) INTEGER, LDB >= MIN(M,N)
+!     The leading dimension of the array B.
+!.....
+!     V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!     On exit, V(1:K,1:K) V contains the K eigenvectors of
+!     the Rayleigh quotient. The Ritz vectors
+!     (returned in Z) are the product of Q from the initial QR
+!     factorization (see the description of F) X (see the 
+!     description of X) and V.
+!.....
+!     LDV (input) INTEGER, LDV >= N-1
+!     The leading dimension of the array V.
+!.....      
+!     S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by CGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N-1        
+!     The leading dimension of the array S.
+!.....
+!     ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
+!     On exit, 
+!     ZWORK(1:MIN(M,N)) contains the scalar factors of the 
+!     elementary reflectors as returned by CGEQRF of the 
+!     M-by-N input matrix F.   
+!     If the call to CGEDMDQ is only workspace query, then
+!     ZWORK(1) contains the minimal complex workspace length and
+!     ZWORK(2) is the optimal complex workspace length. 
+!     Hence, the length of work is at least 2.
+!     See the description of LZWORK.      
+!.....      
+!     LZWORK (input) INTEGER
+!     The minimal length of the  workspace vector ZWORK.
+!     LZWORK is calculated as follows:
+!     Let MLWQR  = N (minimal workspace for CGEQRF[M,N])
+!         MLWDMD = minimal workspace for CGEDMD (see the
+!                  description of LWORK in CGEDMD)
+!         MLWMQR = N (minimal workspace for 
+!                    ZUNMQR['L','N',M,N,N])
+!         MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
+!         MINMN  = MIN(M,N)      
+!     Then
+!     LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
+!     is further updated as follows:
+!        if   JOBZ == 'V' or JOBZ == 'F' THEN 
+!             LZWORK = MAX( LZWORK, MINMN+MLWMQR )
+!        if   JOBQ == 'Q' THEN
+!             LZWORK = MAX( ZLWORK, MINMN+MLWGQR)      
+!
+!.....      
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit,
+!     WORK(1:N-1) contains the singular values of 
+!     the input submatrix F(1:M,1:N-1).
+!     If the call to CGEDMDQ is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the  workspace vector WORK.
+!     LWORK is the same as in CGEDMD, because in CGEDMDQ
+!     only CGEDMD requires real workspace for snapshots
+!     of dimensions MIN(M,N)-by-(N-1).
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.          
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     Let M1=MIN(M,N), N1=N-1. Then      
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!..... 
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.  
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~      
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!      
+!     Local scalars      
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
+                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  & 
+                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
+                           OLWORK, OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!      
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME 
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      CGEQRF, CLACPY, CLASET, CUNGQR, & 
+                    CUNMQR, XERBLA
+
+!     External subroutines
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      CGEDMD 
+      
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT         
+ !..........................................................  
+ !
+ !    Test the input arguments    
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R') 
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')     
+      MINMN  = MIN(M,N)
+      INFO = 0 
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!       
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN 
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+           INFO = -4                 
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
+                LSAME(JOBF,'N') ) )                     THEN
+          INFO = -6    
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -21
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -24
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -26
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -28
+      END IF
+!      
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF     
+      IF ( INFO == 0 ) THEN  
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and 
+          ! determine minimal and optimal sizes of the 
+          ! workspace at any moment of the run.         
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN  
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE                
+               K = 0
+            END IF             
+            INFO = 1  
+            RETURN
+         END IF     
+         
+         MLRWRK = 2
+         MLWORK = 2
+         OLWORK = 2 
+         IMINWR = 1
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for CGEQRF.
+         MLWORK = MAX(MLWORK,MINMN + MLWQR) 
+
+         IF ( LQUERY ) THEN 
+             CALL CGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
+                          INFO1 )
+             OLWQR  = INT(ZWORK(1))
+             OLWORK = MAX(OLWORK,MINMN + OLWQR)           
+         END IF
+         CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
+                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    & 
+                      S, LDS, ZWORK, LZWORK, WORK, -1, IWORK,&
+                      LIWORK, INFO1 )
+         MLWDMD = INT(ZWORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
+         IMINWR = MAX(IMINWR, IWORK(1))
+         IF ( LQUERY ) THEN 
+             OLWDMD = INT(ZWORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N) 
+            MLWORK = MAX(MLWORK, MINMN+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL CUNMQR( 'L','N', M, N, MINMN, F, LDF,  & 
+                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
+               OLWMQR = INT(ZWORK(1))
+               OLWORK = MAX(OLWORK, MINMN+OLWMQR)
+            END IF
+         END IF  
+         IF ( WANTQ ) THEN
+            MLWGQR = MAX(1,N)
+            MLWORK = MAX(MLWORK, MINMN+MLWGQR)
+            IF ( LQUERY ) THEN 
+                CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
+                             ZWORK, -1, INFO1 )               
+                OLWGQR = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK, MINMN+OLWGQR)
+            END IF            
+         END IF          
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
+         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
+      END IF  
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'CGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          WORK(1)  = MLRWRK
+          WORK(2)  = MLRWRK
+          RETURN
+      END IF   
+!.....	  
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place 
+!     one can use an out of core QRF.
+!   
+      CALL CGEQRF( M, N, F, LDF, ZWORK,                & 
+                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+!      
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL CLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
+      CALL CLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL CLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL CLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
+                       Y(3,1), LDY )  
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)   
+      CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
+                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
+                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
+                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN,    & 
+                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See CGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF    
+!      
+!     The Ritz vectors (Koopman modes) can be explicitly 
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.  
+        IF ( M > MINMN ) CALL CLASET( 'A', M-MINMN, K, ZZERO, &
+                                     ZZERO, Z(MINMN+1,1), LDZ )
+        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
+             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      ELSE IF ( WNTVCF ) THEN   
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and 
+        !   the SVD/POD_basis returned by CGEDMD in X) and the 
+        !   second factor (the eigenvectors of the Rayleigh 
+        !   quotient) is in the array V, as returned by CGEDMD.
+        CALL CLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL CLASET( 'A', M-N, K, ZZERO, ZZERO, & 
+                                 Z(N+1,1), LDZ )
+        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
+                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      END IF
+!     
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR 
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to CGEDMDQ is to be 
+      
+!     followed by a streaming DMD that is implemented in a 
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
+         CALL CLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
+         CALL CLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
+      END IF    
+!
+!     The orthonormal/unitary factor Q in the initial QR 
+!     factorization is optionally returned in the array F. 
+!     Same as with the triangular factor above, this is 
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN                   ! Q overwrites F 
+         CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
+                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )  
+      END IF
+!      
+      RETURN
+!      
+      END SUBROUTINE CGEDMDQ
+    
\ No newline at end of file
diff --git a/lapack-netlib/SRC/dgedmd.f90 b/lapack-netlib/SRC/dgedmd.f90
new file mode 100644
index 000000000..20424808f
--- /dev/null
+++ b/lapack-netlib/SRC/dgedmd.f90
@@ -0,0 +1,1054 @@
+      SUBROUTINE DGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,  &
+                         K, REIG,  IMEIG,   Z, LDZ,  RES,  &
+                         B, LDB, W,  LDW,   S, LDS,        &
+                         WORK, LWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+!.....
+!     Scalar arguments
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LWORK,  LIWORK
+      INTEGER,   INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)  :: TOL
+!     Array arguments
+      REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                      W(LDW,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!............................................................
+!     Purpose
+!     =======
+!     DGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, DGEDMD computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular
+!     vectors of X. Optionally, DGEDMD returns the residuals
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!......................................................................
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations
+!     expressed in this material are those of the author and
+!     do not necessarily reflect the views of the DARPA SBIR
+!     Program Office
+!============================================================
+!     Distribution Statement A:
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================
+!............................................................
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix.
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'N' :: No data scaling.
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product X(:,1:K)*W, where X
+!            contains a POD basis (leading left singular vectors
+!            of the data matrix X) and W contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of K, X, W, Z.
+!     'N' :: The eigenvectors are not computed.
+!.....
+!     JOBR (input) CHARACTER*1
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will be
+!            computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: DGESVD (the QR SVD algorithm)
+!     2 :: DGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M>= 0
+!     The state space dimension (the row dimension of X, Y).
+!.....
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshot pairs
+!     (the number of columns of X and Y).
+!.....
+!     X (input/output) REAL(KIND=WP) M-by-N array
+!     > On entry, X contains the data snapshot matrix X. It is
+!     assumed that the column norms of X are in the range of
+!     the normalized floating point numbers.
+!     < On exit, the leading K columns of X contain a POD basis,
+!     i.e. the leading K left singular vectors of the input
+!     data matrix X, U(:,1:K). All N columns of X contain all
+!     left singular vectors of the input matrix X.
+!     See the descriptions of K, Z and W.
+!.....
+!     LDX (input) INTEGER, LDX >= M
+!     The leading dimension of the array X.
+!.....
+!     Y (input/workspace/output) REAL(KIND=WP) M-by-N array
+!     > On entry, Y contains the data snapshot matrix Y
+!     < On exit,
+!     If JOBR == 'R', the leading K columns of Y  contain
+!     the residual vectors for the computed Ritz pairs.
+!     See the description of RES.
+!     If JOBR == 'N', Y contains the original input data,
+!                     scaled according to the value of JOBS.
+!.....
+!     LDY (input) INTEGER , LDY >= M
+!     The leading dimension of the array Y.
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1).
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.
+!
+!     The numerical rank can be enforced by using positive
+!     value of NRNK as follows:
+!     0 < NRNK <= N :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the descriptions of TOL and  K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.
+!.....
+!     K (output) INTEGER,  0 <= K <= N
+!     The dimension of the POD basis for the data snapshot
+!     matrix X and the number of the computed Ritz pairs.
+!     The value of K is determined according to the rule set
+!     by the parameters NRNK and TOL.
+!     See the descriptions of NRNK and TOL.
+!.....
+!     REIG (output) REAL(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of REIG contain
+!     the real parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     See the descriptions of K, IMEIG, and Z.
+!.....
+!     IMEIG (output) REAL(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of IMEIG contain
+!     the imaginary parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     The eigenvalues are determined as follows:
+!     If IMEIG(i) == 0, then the corresponding eigenvalue is
+!     real, LAMBDA(i) = REIG(i).
+!     If IMEIG(i)>0, then the corresponding complex
+!     conjugate pair of eigenvalues reads
+!     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!     That is, complex conjugate pairs have consecutive
+!     indices (i,i+1), with the positive imaginary part
+!     listed first.
+!     See the descriptions of K, REIG, and Z.
+!.....
+!     Z (workspace/output) REAL(KIND=WP)  M-by-N array
+!     If JOBZ =='V' then
+!        Z contains real Ritz vectors as follows:
+!        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!        the i-th Ritz value; ||Z(:,i)||_2=1.
+!        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!        [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!        the Ritz values extracted from this subspace are
+!        REIG(i) + sqrt(-1)*IMEIG(i) and
+!        REIG(i) - sqrt(-1)*IMEIG(i).
+!        The corresponding eigenvectors are
+!        Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!        || Z(:,i:i+1)||_F = 1.
+!     If JOBZ == 'F', then the above descriptions hold for
+!     the columns of X(:,1:K)*W(1:K,1:K), where the columns
+!     of W(1:k,1:K) are the computed eigenvectors of the
+!     K-by-K Rayleigh quotient. The columns of W(1:K,1:K)
+!     are similarly structured: If IMEIG(i) == 0 then
+!     X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0
+!     then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and
+!          X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1)
+!     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!     See the descriptions of REIG, IMEIG, X and W.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) N-by-1 array
+!     RES(1:K) contains the residuals for the K computed
+!     Ritz pairs.
+!     If LAMBDA(i) is real, then
+!        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!     then
+!     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!     It holds that
+!     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!     See the description of REIG, IMEIG and Z.
+!.....
+!     B (output) REAL(KIND=WP)  M-by-N array.
+!     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further
+!     details in the provided references.
+!     If JOBF == 'E', B(1:M,1;K) contains
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.
+!     If JOBF =='N', then B is not referenced.
+!     See the descriptions of X, W, K.
+!.....
+!     LDB (input) INTEGER, LDB >= M
+!     The leading dimension of the array B.
+!.....
+!     W (workspace/output) REAL(KIND=WP) N-by-N array
+!     On exit, W(1:K,1:K) contains the K computed
+!     eigenvectors of the matrix Rayleigh quotient (real and
+!     imaginary parts for each complex conjugate pair of the
+!     eigenvalues). The Ritz vectors (returned in Z) are the
+!     product of X (containing a POD basis for the input
+!     matrix X) and W. See the descriptions of K, S, X and Z.
+!     W is also used as a workspace to temporarily store the
+!     right singular vectors of X.
+!.....
+!     LDW (input) INTEGER, LDW >= N
+!     The leading dimension of the array W.
+!.....
+!     S (workspace/output) REAL(KIND=WP) N-by-N array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by DGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N
+!     The leading dimension of the array S.
+!.....
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit, WORK(1:N) contains the singular values of
+!     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!     If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain
+!     scaling factor WORK(N+2)/WORK(N+1) used to scale X
+!     and Y to avoid overflow in the SVD of X.
+!     This may be of interest if the scaling option is off
+!     and as many as possible smallest eigenvalues are
+!     desired to the highest feasible accuracy.
+!     If the call to DGEDMD is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. Hence, the
+!     leng of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the workspace vector WORK.
+!     LWORK is calculated as follows:
+!     If WHTSVD == 1 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)).
+!        If JOBZ == 'N'  then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)).
+!        Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal
+!        workspace length of DGESVD.
+!     If WHTSVD == 2 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N))
+!        Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the
+!        minimal workspace length of DGESDD.
+!     If WHTSVD == 3 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!        Here LWORK_SVD = N+M+MAX(3*N+1,
+!                        MAX(1,3*N+M,5*N),MAX(1,N))
+!        is the minimal workspace length of DGESVDQ.
+!     If WHTSVD == 4 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!        Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the
+!        minimal workspace length of DGEJSV.
+!     The above expressions are not simplified in order to
+!     make the usage of WORK more transparent, and for
+!     easier checking. In any case, LWORK >= 2.
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!.....
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,    ROOTSC, SCALE,  SMALL,  &
+                       SSUM,   XSCL1,  XSCL2
+      INTEGER       :: i,   j, IMINWR,  INFO1, INFO2,  &
+                       LWRKEV, LWRSDD, LWRSVD,         &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK
+      LOGICAL       :: BADXY,  LQUERY, SCCOLX, SCCOLY, &
+                       WNTEX,  WNTREF, WNTRES, WNTVEC
+      CHARACTER     :: JOBZL,  T_OR_N
+      CHARACTER     :: JSVOPT
+
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2)
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) DLANGE, DLAMCH, DNRM2
+      EXTERNAL      DLANGE, DLAMCH, DNRM2, IDAMAX
+      INTEGER       IDAMAX
+      LOGICAL       DISNAN, LSAME
+      EXTERNAL      DISNAN, LSAME
+
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      DAXPY,  DGEMM,  DSCAL
+      EXTERNAL      DGEEV,  DGEJSV, DGESDD, DGESVD, DGESVDQ, &
+                    DLACPY, DLASCL, DLASSQ, XERBLA
+
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     DBLE, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -18
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -21
+      ELSE IF ( LDW < N ) THEN
+          INFO = -23
+      ELSE IF ( LDS < N ) THEN
+          INFO = -25
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                WORK(1)  = 2
+                WORK(2)  = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+         MLWORK = MAX(2,N)
+         OLWORK = MAX(2,N)
+         IMINWR = 1
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of DGESVD:
+             ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MLWORK = MAX(MLWORK,N + MWRSVD)
+             IF ( LQUERY ) THEN
+                CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, &
+                           B, LDB, W, LDW, RDUMMY, -1, INFO1 )
+                LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of DGESDD:
+             ! MWRSDD = 3*MIN(M,N)*MIN(M,N) +
+             ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 3*MIN(M,N)*MIN(M,N) +                &
+              MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             MLWORK = MAX(MLWORK,N + MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             IF ( LQUERY ) THEN
+                CALL DGESDD( 'O', M, N, X, LDX, WORK, B,     &
+                     LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSDD)
+             END IF
+         CASE (3)
+             !LWQP3 = 3*N+1
+             !LWORQ = MAX(N, 1)
+             !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2)
+             !MLWORK = N +  MWRSVQ
+             !IMINWR = M+N-1
+             CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                             X, LDX, WORK, Z, LDZ, W, LDW,   &
+                             NUMRNK, IWORK, LIWORK, RDUMMY,  &
+                             -1, RDUMMY2, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(RDUMMY(2))
+             MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = MAX( MWRSVQ, INT(RDUMMY(1)) )
+                OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1)))
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V'
+             MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 )
+             MLWORK = MAX(MLWORK,N+MWRSVJ)
+             IMINWR = MAX( 3, M+3*N )
+             IF ( LQUERY ) THEN
+                OLWORK =  MAX(OLWORK,N+MWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the DGEEV call
+         IF ( LSAME(JOBZL,'V') ) THEN
+             MWRKEV = MAX( 1, 4*N )
+         ELSE
+             MWRKEV = MAX( 1, 3*N )
+         END IF
+         MLWORK = MAX(MLWORK,N+MWRKEV)
+         IF ( LQUERY ) THEN
+                CALL DGEEV( 'N', JOBZL, N, S, LDS, REIG, &
+                    IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 )
+                LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) )
+                OLWORK = MAX( OLWORK, N+LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'DGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = DLAMCH('O')
+      SMALL = DLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using DLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, X(1,i), 1 )
+            SCALE  = ZERO
+            CALL DLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('DGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               X(1,i), M, INFO2 )              ! LAPACK CALL
+!                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('DGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL DSCAL( M, ONE/WORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL DLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/DBLE(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( Y(IDAMAX(M, Y(1,i),1),i )  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL DSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using DLASSQ.
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, Y(1,i), 1 )
+            SCALE  = ZERO
+            CALL DLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('DGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               Y(1,i), M, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL DSCAL( M, ONE/WORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL DLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/DBLE(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( X(IDAMAX(M, X(1,i),1),i )  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, B, &
+                  LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL
+             T_OR_N = 'T'
+         CASE (2)
+            CALL DGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, &
+                 LDW, WORK(N+1), LWORK-N, IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'T'
+         CASE (3)
+              CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, WORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),&
+                   LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1)     ! LAPACK CALL
+              CALL DLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'T'
+         CASE (4)
+              CALL DGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, WORK, Z, LDZ, W, LDW, &
+                   WORK(N+1), LWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL DLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = WORK(N+1)
+              XSCL2 = WORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case DGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL DLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( WORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('DGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( WORK(i+1) <= WORK(i)*TOL  ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( WORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^T * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^T is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL DSCAL( N, ONE/WORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that DGESVD, DGESVDQ and DGESDD return the
+          ! transposed matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL DSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              WORK(N+i) = ONE/WORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = (WORK(N+i))*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside DGEDMD).
+          CALL DGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, &
+                      LDW, ZERO, Z, LDZ )                        ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL DGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, &
+                      LDZ, ZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^T * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL DGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, &
+                   ZERO, Z, LDZ )                                   ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        ! In the two DGEMM calls here, can use K for LDZ.
+        CALL DGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, &
+                    LDW, ZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^T * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^T is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL DLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL DLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL DGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, &
+                  LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 )   ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient. Even in the case of complex spectrum, all
+      ! computation is done in real arithmetic. REIG and
+      ! IMEIG are the real and the imaginary parts of the
+      ! eigenvalues, so that the spectrum is given as
+      ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs
+      ! are listed at consecutive positions. For such a
+      ! complex conjugate pair of the eigenvalues, the
+      ! corresponding eigenvectors are also a complex
+      ! conjugate pair with the real and imaginary parts
+      ! stored column-wise in W at the corresponding
+      ! consecutive column indices. See the description of Z.
+      ! Also, see the description of DGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! DGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL DGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, &
+                       LDW, ZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS)
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL DLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+            !           LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the real form of the Ritz vectors
+      IF ( WNTVEC ) CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                   ZERO, Z, LDZ )                           ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         i = 1
+         DO WHILE ( i <= K )
+            IF ( IMEIG(i) == ZERO ) THEN
+                ! have a real eigenvalue with real eigenvector
+                CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = DNRM2( M, Y(1,i), 1)                         ! BLAS CALL
+                i = i + 1
+            ELSE
+               ! Have a complex conjugate pair
+               ! REIG(i) +- sqrt(-1)*IMEIG(i).
+               ! Since all computation is done in real
+               ! arithmetic, the formula for the residual
+               ! is recast for real representation of the
+               ! complex conjugate eigenpair. See the
+               ! description of RES.
+               AB(1,1) =  REIG(i)
+               AB(2,1) = -IMEIG(i)
+               AB(1,2) =  IMEIG(i)
+               AB(2,2) =  REIG(i)
+               CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                           LDZ, AB, 2, ONE, Y(1,i), LDY )          ! BLAS CALL
+               ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+               RES(i)   = DLANGE( 'F', M, 2, Y(1,i), LDY, &
+                                  WORK(N+1) )                      ! LAPACK CALL
+               RES(i+1) = RES(i)
+               i = i + 2
+            END IF
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          WORK(N+1) = XSCL1
+          WORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE DGEDMD
+
diff --git a/lapack-netlib/SRC/dgedmdq.f90 b/lapack-netlib/SRC/dgedmdq.f90
new file mode 100644
index 000000000..bedfba472
--- /dev/null
+++ b/lapack-netlib/SRC/dgedmdq.f90
@@ -0,0 +1,704 @@
+SUBROUTINE DGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
+                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64      
+!.....      
+!     Scalar arguments       
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LWORK,  LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,    K      
+      REAL(KIND=WP), INTENT(IN)    ::   TOL     
+!     Array arguments      
+      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
+                                      Z(LDZ,*), B(LDB,*),  &
+                                      V(LDV,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!.....      
+!     Purpose  
+!     =======
+!     DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices, using a QR factorization
+!     based compression of the data. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, DGEDMDQ computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular 
+!     vectors of X. Optionally, DGEDMDQ returns the residuals 
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].      
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.      
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations 
+!     expressed in this material are those of the author and 
+!     do not necessarily reflect the views of the DARPA SBIR 
+!     Program Office.      
+!============================================================
+!     Distribution Statement A: 
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022  
+!============================================================      
+!......................................................................      
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix. The data snapshots are the columns
+!     of F. The leading N-1 columns of F are denoted X and the
+!     trailing N-1 columns are denoted Y. 
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)    
+!     'N' :: No data scaling.   
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Z*V, where Z
+!            is orthonormal and V contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of F, V, Z.
+!     'Q' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Q*Z, where Z
+!            contains the eigenvectors of the compression of the
+!            underlying discretized operator onto the span of
+!            the data snapshots. See the descriptions of F, V, Z.  
+!            Q is from the initial QR factorization.      
+!     'N' :: The eigenvectors are not computed.  
+!.....      
+!     JOBR (input) CHARACTER*1 
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will
+!            be computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBQ (input) CHARACTER*1 
+!     Specifies whether to explicitly compute and return the
+!     orthogonal matrix from the QR factorization.
+!     'Q' :: The matrix Q of the QR factorization of the data
+!            snapshot matrix is computed and stored in the
+!            array F. See the description of F.       
+!     'N' :: The matrix Q is not explicitly computed.
+!.....
+!     JOBT (input) CHARACTER*1 
+!     Specifies whether to return the upper triangular factor
+!     from the QR factorization.
+!     'R' :: The matrix R of the QR factorization of the data 
+!            snapshot matrix F is returned in the array Y.
+!            See the description of Y and Further details.       
+!     'N' :: The matrix R is not returned.    
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are 
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!     To be useful on exit, this option needs JOBQ='Q'.
+!.....      
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: DGESVD (the QR SVD algorithm)
+!     2 :: DGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger 
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M >= 0 
+!     The state space dimension (the number of rows of F).
+!.....      
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshots from a single trajectory,
+!     taken at equidistant discrete times. This is the 
+!     number of columns of F.
+!.....
+!     F (input/output) REAL(KIND=WP) M-by-N array
+!     > On entry,
+!     the columns of F are the sequence of data snapshots 
+!     from a single trajectory, taken at equidistant discrete
+!     times. It is assumed that the column norms of F are 
+!     in the range of the normalized floating point numbers. 
+!     < On exit,
+!     If JOBQ == 'Q', the array F contains the orthogonal 
+!     matrix/factor of the QR factorization of the initial 
+!     data snapshots matrix F. See the description of JOBQ. 
+!     If JOBQ == 'N', the entries in F strictly below the main
+!     diagonal contain, column-wise, the information on the 
+!     Householder vectors, as returned by DGEQRF. The 
+!     remaining information to restore the orthogonal matrix
+!     of the initial QR factorization is stored in WORK(1:N). 
+!     See the description of WORK.
+!.....
+!     LDF (input) INTEGER, LDF >= M 
+!     The leading dimension of the array F.
+!.....
+!     X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!     X is used as workspace to hold representations of the
+!     leading N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, the leading K columns of X contain the leading
+!     K left singular vectors of the above described content
+!     of X. To lift them to the space of the left singular
+!     vectors U(:,1:K)of the input data, pre-multiply with the 
+!     Q factor from the initial QR factorization. 
+!     See the descriptions of F, K, V  and Z.
+!.....      
+!     LDX (input) INTEGER, LDX >= N  
+!     The leading dimension of the array X. 
+!.....
+!     Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!     Y is used as workspace to hold representations of the
+!     trailing N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, 
+!     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!     triangular factor from the QR factorization of the data
+!     snapshot matrix F.
+!.....      
+!     LDY (input) INTEGER , LDY >= N
+!     The leading dimension of the array Y.   
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.  
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.       
+!     The numerical rank can be enforced by using positive 
+!     value of NRNK as follows: 
+!     0 < NRNK <= N-1 :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the description of K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.  
+!.....
+!     K (output) INTEGER,  0 <= K <= N 
+!     The dimension of the SVD/POD basis for the leading N-1
+!     data snapshots (columns of F) and the number of the 
+!     computed Ritz pairs. The value of K is determined
+!     according to the rule set by the parameters NRNK and 
+!     TOL. See the descriptions of NRNK and TOL. 
+!.....
+!     REIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<=N) entries of REIG contain
+!     the real parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     See the descriptions of K, IMEIG, Z.
+!.....
+!     IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<N) entries of REIG contain
+!     the imaginary parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     The eigenvalues are determined as follows:
+!     If IMEIG(i) == 0, then the corresponding eigenvalue is
+!     real, LAMBDA(i) = REIG(i).
+!     If IMEIG(i)>0, then the corresponding complex
+!     conjugate pair of eigenvalues reads
+!     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!     That is, complex conjugate pairs have consequtive
+!     indices (i,i+1), with the positive imaginary part
+!     listed first.
+!     See the descriptions of K, REIG, Z.     
+!.....      
+!     Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
+!     If JOBZ =='V' then
+!        Z contains real Ritz vectors as follows:
+!        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!        the i-th Ritz value.
+!        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!        [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!        the Ritz values extracted from this subspace are
+!        REIG(i) + sqrt(-1)*IMEIG(i) and
+!        REIG(i) - sqrt(-1)*IMEIG(i).
+!        The corresponding eigenvectors are
+!        Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!     If JOBZ == 'F', then the above descriptions hold for
+!     the columns of Z*V, where the columns of V are the
+!     eigenvectors of the K-by-K Rayleigh quotient, and Z is
+!     orthonormal. The columns of V are similarly structured:
+!     If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if 
+!     IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
+!                       Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
+!     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!     See the descriptions of REIG, IMEIG, X and V.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!     RES(1:K) contains the residuals for the K computed 
+!     Ritz pairs.       
+!     If LAMBDA(i) is real, then
+!        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!     then
+!     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!     It holds that
+!     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!     See the description of Z.
+!.....
+!     B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further 
+!     details in the provided references. 
+!     If JOBF == 'E', B(1:N,1;K) contains 
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.   
+!     In both cases, the content of B can be lifted to the 
+!     original dimension of the input data by pre-multiplying
+!     with the Q factor from the initial QR factorization.
+!     Here A denotes a compression of the underlying operator.
+!     See the descriptions of F and X.
+!     If JOBF =='N', then B is not referenced.
+!.....
+!     LDB (input) INTEGER, LDB >= MIN(M,N)
+!     The leading dimension of the array B.
+!.....      
+!     V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!     On exit, V(1:K,1:K) contains the K eigenvectors of
+!     the Rayleigh quotient. The eigenvectors of a complex
+!     conjugate pair of eigenvalues are returned in real form
+!     as explained in the description of Z. The Ritz vectors
+!     (returned in Z) are the product of X and V; see
+!     the descriptions of X and Z.
+!.....
+!     LDV (input) INTEGER, LDV >= N-1
+!     The leading dimension of the array V.
+!.....      
+!     S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by DGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N-1        
+!     The leading dimension of the array S.
+!.....
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit, 
+!     WORK(1:MIN(M,N)) contains the scalar factors of the 
+!     elementary reflectors as returned by DGEQRF of the 
+!     M-by-N input matrix F.
+!     WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of 
+!     the input submatrix F(1:M,1:N-1).
+!     If the call to DGEDMDQ is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. Hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the  workspace vector WORK.
+!     LWORK is calculated as follows:
+!     Let MLWQR  = N (minimal workspace for DGEQRF[M,N])
+!         MLWDMD = minimal workspace for DGEDMD (see the
+!                  description of LWORK in DGEDMD) for 
+!                  snapshots of dimensions MIN(M,N)-by-(N-1)
+!         MLWMQR = N (minimal workspace for 
+!                    DORMQR['L','N',M,N,N])
+!         MLWGQR = N (minimal workspace for DORGQR[M,N,N])
+!     Then
+!     LWORK = MAX(N+MLWQR, N+MLWDMD)
+!     is updated as follows:
+!        if   JOBZ == 'V' or JOBZ == 'F' THEN 
+!             LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR )
+!        if   JOBQ == 'Q' THEN
+!             LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR)
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.          
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     Let M1=MIN(M,N), N1=N-1. Then    
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!..... 
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.  
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~      
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!      
+!     Local scalars      
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
+                           MLWMQR, MLWORK, MLWQR,  MINMN,  & 
+                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
+                           OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!      
+!     Local array      
+!     ~~~~~~~~~~~      
+      REAL(KIND=WP) :: RDUMMY(2)
+!      
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME 
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      DGEMM 
+      EXTERNAL      DGEQRF, DLACPY, DLASET, DORGQR, & 
+                    DORMQR, XERBLA
+
+!     External subroutines
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      DGEDMD 
+      
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT         
+ !..........................................................  
+ !
+ !    Test the input arguments    
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R') 
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')     
+      MINMN  = MIN(M,N)
+      INFO = 0 
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!       
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN 
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+          INFO = -4                 
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -6    
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -22
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -25
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -27
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -29
+      END IF
+!      
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF     
+      IF ( INFO == 0 ) THEN  
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and 
+          ! determine minimal and optimal sizes of the 
+          ! workspace at any moment of the run.         
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN  
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE                
+               K = 0
+            END IF             
+            INFO = 1  
+            RETURN
+         END IF     
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for DGEQRF.
+         MLWORK = MINMN + MLWQR 
+         IF ( LQUERY ) THEN 
+             CALL DGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
+                          INFO1 )
+             OLWQR  = INT(RDUMMY(1))
+             OLWORK = MIN(M,N) + OLWQR           
+         END IF
+         CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
+                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     & 
+                      V, LDV, S, LDS, WORK, -1, IWORK,       &
+                      LIWORK, INFO1 )
+         MLWDMD = INT(WORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         IMINWR = IWORK(1)
+         IF ( LQUERY ) THEN 
+             OLWDMD = INT(WORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N) 
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL DORMQR( 'L','N', M, N, MINMN, F, LDF,  & 
+                            WORK, Z, LDZ, WORK, -1, INFO1 )
+               OLWMQR = INT(WORK(1))
+               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
+            END IF
+         END IF  
+         IF ( WANTQ ) THEN
+            MLWGQR = N
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
+            IF ( LQUERY ) THEN 
+                CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+                             WORK, -1, INFO1 )        
+                OLWGQR = INT(WORK(1))
+                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
+            END IF            
+         END IF   
+         IMINWR = MAX( 1, IMINWR )
+         MLWORK = MAX( 2, MLWORK )      
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
+      END IF  
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'DGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF   
+!.....	  
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place 
+!     one can use an out of core QRF.
+!   
+      CALL DGEQRF( M, N, F, LDF, WORK,               & 
+                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
+!      
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL DLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
+      CALL DLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL DLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL DLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
+                       Y(3,1), LDY )  
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)   
+      CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
+                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
+                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
+                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, & 
+                   IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See DGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF    
+!      
+!     The Ritz vectors (Koopman modes) can be explicitly 
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.  
+        IF ( M > MINMN ) CALL DLASET( 'A', M-MINMN, K, ZERO, &
+                                     ZERO, Z(MINMN+1,1), LDZ )
+        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      ELSE IF ( WNTVCF ) THEN   
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and 
+        !   the SVD/POD_basis returned by DGEDMD in X) and the 
+        !   second factor (the eigenvectors of the Rayleigh 
+        !   quotient) is in the array V, as returned by DGEDMD.
+        CALL DLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL DLASET( 'A', M-N, K, ZERO, ZERO,   & 
+                                  Z(N+1,1), LDZ )
+        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      END IF
+!     
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR 
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to DGEDMDQ is to be 
+!     followed by a streaming DMD that is implemented in a 
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
+         CALL DLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
+         CALL DLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
+      END IF    
+!
+!     The orthonormal/orthogonal factor Q in the initial QR 
+!     factorization is optionally returned in the array F. 
+!     Same as with the triangular factor above, this is 
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN               ! Q overwrites F 
+         CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )  
+      END IF
+!      
+      RETURN
+!      
+      END SUBROUTINE DGEDMDQ
+    
\ No newline at end of file
diff --git a/lapack-netlib/SRC/sgedmd.f90 b/lapack-netlib/SRC/sgedmd.f90
new file mode 100644
index 000000000..49cb11527
--- /dev/null
+++ b/lapack-netlib/SRC/sgedmd.f90
@@ -0,0 +1,1054 @@
+      SUBROUTINE SGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,  &
+                         K, REIG,  IMEIG,   Z, LDZ,  RES,  &
+                         B, LDB, W,  LDW,   S, LDS,        &
+                         WORK, LWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32
+!.....
+!     Scalar arguments
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LWORK,  LIWORK
+      INTEGER,   INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN) ::  TOL
+!     Array arguments
+      REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                      W(LDW,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!............................................................
+!     Purpose
+!     =======
+!     SGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, SGEDMD computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular
+!     vectors of X. Optionally, SGEDMD returns the residuals
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!......................................................................
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations
+!     expressed in this material are those of the author and
+!     do not necessarily reflect the views of the DARPA SBIR
+!     Program Office
+!============================================================
+!     Distribution Statement A:
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================
+!......................................................................
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix.
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'N' :: No data scaling.
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product X(:,1:K)*W, where X
+!            contains a POD basis (leading left singular vectors
+!            of the data matrix X) and W contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of K, X, W, Z.
+!     'N' :: The eigenvectors are not computed.
+!.....
+!     JOBR (input) CHARACTER*1
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will be
+!            computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: SGESVD (the QR SVD algorithm)
+!     2 :: SGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M>= 0
+!     The state space dimension (the row dimension of X, Y).
+!.....
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshot pairs
+!     (the number of columns of X and Y).
+!.....
+!     X (input/output) REAL(KIND=WP) M-by-N array
+!     > On entry, X contains the data snapshot matrix X. It is
+!     assumed that the column norms of X are in the range of
+!     the normalized floating point numbers.
+!     < On exit, the leading K columns of X contain a POD basis,
+!     i.e. the leading K left singular vectors of the input
+!     data matrix X, U(:,1:K). All N columns of X contain all
+!     left singular vectors of the input matrix X.
+!     See the descriptions of K, Z and W.
+!.....
+!     LDX (input) INTEGER, LDX >= M
+!     The leading dimension of the array X.
+!.....
+!     Y (input/workspace/output) REAL(KIND=WP) M-by-N array
+!     > On entry, Y contains the data snapshot matrix Y
+!     < On exit,
+!     If JOBR == 'R', the leading K columns of Y  contain
+!     the residual vectors for the computed Ritz pairs.
+!     See the description of RES.
+!     If JOBR == 'N', Y contains the original input data,
+!                     scaled according to the value of JOBS.
+!.....
+!     LDY (input) INTEGER , LDY >= M
+!     The leading dimension of the array Y.
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.
+!     The numerical rank can be enforced by using positive
+!     value of NRNK as follows:
+!     0 < NRNK <= N :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the descriptions of TOL and  K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.
+!.....
+!     K (output) INTEGER,  0 <= K <= N
+!     The dimension of the POD basis for the data snapshot
+!     matrix X and the number of the computed Ritz pairs.
+!     The value of K is determined according to the rule set
+!     by the parameters NRNK and TOL.
+!     See the descriptions of NRNK and TOL.
+!.....
+!     REIG (output) REAL(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of REIG contain
+!     the real parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     See the descriptions of K, IMEIG, and Z.
+!.....
+!     IMEIG (output) REAL(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of IMEIG contain
+!     the imaginary parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     The eigenvalues are determined as follows:
+!     If IMEIG(i) == 0, then the corresponding eigenvalue is
+!     real, LAMBDA(i) = REIG(i).
+!     If IMEIG(i)>0, then the corresponding complex
+!     conjugate pair of eigenvalues reads
+!     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!     That is, complex conjugate pairs have consecutive
+!     indices (i,i+1), with the positive imaginary part
+!     listed first.
+!     See the descriptions of K, REIG, and Z.
+!.....
+!     Z (workspace/output) REAL(KIND=WP)  M-by-N array
+!     If JOBZ =='V' then
+!        Z contains real Ritz vectors as follows:
+!        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!        the i-th Ritz value; ||Z(:,i)||_2=1.
+!        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!        [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!        the Ritz values extracted from this subspace are
+!        REIG(i) + sqrt(-1)*IMEIG(i) and
+!        REIG(i) - sqrt(-1)*IMEIG(i).
+!        The corresponding eigenvectors are
+!        Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!        || Z(:,i:i+1)||_F = 1.
+!     If JOBZ == 'F', then the above descriptions hold for
+!     the columns of X(:,1:K)*W(1:K,1:K), where the columns
+!     of W(1:k,1:K) are the computed eigenvectors of the
+!     K-by-K Rayleigh quotient. The columns of W(1:K,1:K)
+!     are similarly structured: If IMEIG(i) == 0 then
+!     X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0
+!     then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and
+!          X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1)
+!     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!     See the descriptions of REIG, IMEIG, X and W.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) N-by-1 array
+!     RES(1:K) contains the residuals for the K computed
+!     Ritz pairs.
+!     If LAMBDA(i) is real, then
+!        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!     then
+!     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!     It holds that
+!     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!     See the description of REIG, IMEIG and Z.
+!.....
+!     B (output) REAL(KIND=WP)  M-by-N array.
+!     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further
+!     details in the provided references.
+!     If JOBF == 'E', B(1:M,1;K) contains
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.
+!     If JOBF =='N', then B is not referenced.
+!     See the descriptions of X, W, K.
+!.....
+!     LDB (input) INTEGER, LDB >= M
+!     The leading dimension of the array B.
+!.....
+!     W (workspace/output) REAL(KIND=WP) N-by-N array
+!     On exit, W(1:K,1:K) contains the K computed
+!     eigenvectors of the matrix Rayleigh quotient (real and
+!     imaginary parts for each complex conjugate pair of the
+!     eigenvalues). The Ritz vectors (returned in Z) are the
+!     product of X (containing a POD basis for the input
+!     matrix X) and W. See the descriptions of K, S, X and Z.
+!     W is also used as a workspace to temporarily store the
+!     left singular vectors of X.
+!.....
+!     LDW (input) INTEGER, LDW >= N
+!     The leading dimension of the array W.
+!.....
+!     S (workspace/output) REAL(KIND=WP) N-by-N array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by SGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N
+!     The leading dimension of the array S.
+!.....
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit, WORK(1:N) contains the singular values of
+!     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!     If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain
+!     scaling factor WORK(N+2)/WORK(N+1) used to scale X
+!     and Y to avoid overflow in the SVD of X.
+!     This may be of interest if the scaling option is off
+!     and as many as possible smallest eigenvalues are
+!     desired to the highest feasible accuracy.
+!     If the call to SGEDMD is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. Hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the workspace vector WORK.
+!     LWORK is calculated as follows:
+!     If WHTSVD == 1 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)).
+!        If JOBZ == 'N'  then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)).
+!        Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal
+!        workspace length of SGESVD.
+!     If WHTSVD == 2 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N))
+!        Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the
+!        minimal workspace length of SGESDD.
+!     If WHTSVD == 3 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!        Here LWORK_SVD = N+M+MAX(3*N+1,
+!                        MAX(1,3*N+M,5*N),MAX(1,N))
+!        is the minimal workspace length of SGESVDQ.
+!     If WHTSVD == 4 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!        Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the
+!        minimal workspace length of SGEJSV.
+!     The above expressions are not simplified in order to
+!     make the usage of WORK more transparent, and for
+!     easier checking. In any case, LWORK >= 2.
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!.....
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,  &
+                       LWRKEV, LWRSDD, LWRSVD, &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2)
+
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) SLANGE, SLAMCH, SNRM2
+      EXTERNAL      SLANGE, SLAMCH, SNRM2, ISAMAX
+      INTEGER       ISAMAX
+      LOGICAL       SISNAN, LSAME
+      EXTERNAL      SISNAN, LSAME
+
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      SAXPY,  SGEMM,  SSCAL
+      EXTERNAL      SGEEV,  SGEJSV, SGESDD, SGESVD, SGESVDQ, &
+                    SLACPY, SLASCL, SLASSQ, XERBLA
+
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     INT, FLOAT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -18
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -21
+      ELSE IF ( LDW < N ) THEN
+          INFO = -23
+      ELSE IF ( LDS < N ) THEN
+          INFO = -25
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                WORK(1)  = 2
+                WORK(2)  = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+         MLWORK = MAX(2,N)
+         OLWORK = MAX(2,N)
+         IMINWR = 1
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of SGESVD:
+             ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MLWORK = MAX(MLWORK,N + MWRSVD)
+             IF ( LQUERY ) THEN
+                CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, &
+                           B, LDB, W, LDW, RDUMMY, -1, INFO1 )
+                LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of SGESDD:
+             ! MWRSDD = 3*MIN(M,N)*MIN(M,N) +
+             ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 3*MIN(M,N)*MIN(M,N) +                &
+              MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             MLWORK = MAX(MLWORK,N + MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             IF ( LQUERY ) THEN
+                CALL SGESDD( 'O', M, N, X, LDX, WORK, B,     &
+                     LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSDD)
+             END IF
+         CASE (3)
+             !LWQP3 = 3*N+1
+             !LWORQ = MAX(N, 1)
+             !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ )+ MAX(M,2)
+             !MLWORK = N + MWRSVQ
+             !IMINWR = M+N-1
+             CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                          X, LDX, WORK, Z, LDZ, W, LDW,   &
+                             NUMRNK, IWORK, -1, RDUMMY,   &
+                             -1, RDUMMY2, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(RDUMMY(2))
+             MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(RDUMMY(1))
+                OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1)))
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N )! for JSVOPT='V'
+             MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 )
+             MLWORK = MAX(MLWORK,N+MWRSVJ)
+             IMINWR = MAX( 3, M+3*N )
+             IF ( LQUERY ) THEN
+                OLWORK = MAX(OLWORK,N+MWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the SGEEV call
+         IF ( LSAME(JOBZL,'V') ) THEN
+             MWRKEV = MAX( 1, 4*N )
+         ELSE
+             MWRKEV = MAX( 1, 3*N )
+         END IF
+         MLWORK = MAX(MLWORK,N+MWRKEV)
+         IF ( LQUERY ) THEN
+                CALL SGEEV( 'N', JOBZL, N, S, LDS, REIG, &
+                    IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 )
+                LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) )
+                OLWORK = MAX( OLWORK, N+LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'SGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = SLAMCH('O')
+      SMALL = SLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using SLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, X(1,i), 1 )
+            SCALE  = ZERO
+            CALL SLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('SGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               X(1,i), M, INFO2 )              ! LAPACK CALL
+!                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('SGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL SSCAL( M, ONE/WORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL SLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/FLOAT(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( Y(ISAMAX(M, Y(1,i),1),i )  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL SSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using SLASSQ.
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, Y(1,i), 1 )
+            SCALE  = ZERO
+            CALL SLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('SGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               Y(1,i), M, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL SSCAL( M, ONE/WORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL SLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/FLOAT(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( X(ISAMAX(M, X(1,i),1),i )  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, B, &
+                  LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL
+             T_OR_N = 'T'
+         CASE (2)
+            CALL SGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, &
+                 LDW, WORK(N+1), LWORK-N, IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'T'
+         CASE (3)
+              CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, WORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),&
+                   LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1)     ! LAPACK CALL
+              CALL SLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'T'
+         CASE (4)
+              CALL SGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, WORK, Z, LDZ, W, LDW, &
+                   WORK(N+1), LWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL SLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = WORK(N+1)
+              XSCL2 = WORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case SGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL SLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( WORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('SGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( WORK(i+1) <= WORK(i)*TOL  ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( WORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^T * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^T is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL SSCAL( N, ONE/WORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that SGESVD, SGESVDQ and SGESDD return the
+          ! transposed matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL SSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              WORK(N+i) = ONE/WORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = (WORK(N+i))*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside SGEDMD).
+          CALL SGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, &
+                      LDW, ZERO, Z, LDZ )                        ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL SGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, &
+                      LDZ, ZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^T * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL SGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, &
+                   ZERO, Z, LDZ )                                   ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        ! In the two SGEMM calls here, can use K for LDZ
+        CALL SGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, &
+                    LDW, ZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^T * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^T is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL SLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL SLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL SGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, &
+                  LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 )   ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient. Even in the case of complex spectrum, all
+      ! computation is done in real arithmetic. REIG and
+      ! IMEIG are the real and the imaginary parts of the
+      ! eigenvalues, so that the spectrum is given as
+      ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs
+      ! are listed at consecutive positions. For such a
+      ! complex conjugate pair of the eigenvalues, the
+      ! corresponding eigenvectors are also a complex
+      ! conjugate pair with the real and imaginary parts
+      ! stored column-wise in W at the corresponding
+      ! consecutive column indices. See the description of Z.
+      ! Also, see the description of SGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! SGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL SGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, &
+                       LDW, ZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, Z, LDZ )
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL SLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+            !           LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the real form of the Ritz vectors
+      IF ( WNTVEC ) CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                   ZERO, Z, LDZ )                           ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         i = 1
+         DO WHILE ( i <= K )
+            IF ( IMEIG(i) == ZERO ) THEN
+                ! have a real eigenvalue with real eigenvector
+                CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = SNRM2( M, Y(1,i), 1 )                         ! BLAS CALL
+                i = i + 1
+            ELSE
+               ! Have a complex conjugate pair
+               ! REIG(i) +- sqrt(-1)*IMEIG(i).
+               ! Since all computation is done in real
+               ! arithmetic, the formula for the residual
+               ! is recast for real representation of the
+               ! complex conjugate eigenpair. See the
+               ! description of RES.
+               AB(1,1) =  REIG(i)
+               AB(2,1) = -IMEIG(i)
+               AB(1,2) =  IMEIG(i)
+               AB(2,2) =  REIG(i)
+               CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                           LDZ, AB, 2, ONE, Y(1,i), LDY )          ! BLAS CALL
+               ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+               RES(i)   = SLANGE( 'F', M, 2, Y(1,i), LDY, &
+                                  WORK(N+1) )                      ! LAPACK CALL
+               RES(i+1) = RES(i)
+               i = i + 2
+            END IF
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          WORK(N+1) = XSCL1
+          WORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE SGEDMD
+
diff --git a/lapack-netlib/SRC/sgedmdq.f90 b/lapack-netlib/SRC/sgedmdq.f90
new file mode 100644
index 000000000..acd5d56c6
--- /dev/null
+++ b/lapack-netlib/SRC/sgedmdq.f90
@@ -0,0 +1,703 @@
+SUBROUTINE SGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
+                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env 
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32     
+!.....      
+!     Scalar arguments       
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LWORK,  LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K      
+      REAL(KIND=WP), INTENT(IN)    ::   TOL     
+!     Array arguments      
+      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
+                                      Z(LDZ,*), B(LDB,*),  &
+                                      V(LDV,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!.....      
+!     Purpose  
+!     =======
+!     SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices, using a QR factorization
+!     based compression of the data. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, SGEDMDQ computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular 
+!     vectors of X. Optionally, SGEDMDQ returns the residuals 
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].      
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.      
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations 
+!     expressed in this material are those of the author and 
+!     do not necessarily reflect the views of the DARPA SBIR 
+!     Program Office.      
+!============================================================
+!     Distribution Statement A: 
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================      
+!......................................................................      
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix. The data snapshots are the columns
+!     of F. The leading N-1 columns of F are denoted X and the
+!     trailing N-1 columns are denoted Y. 
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)    
+!     'N' :: No data scaling.   
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Z*V, where Z
+!            is orthonormal and V contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of F, V, Z.
+!     'Q' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Q*Z, where Z
+!            contains the eigenvectors of the compression of the
+!            underlying discretized operator onto the span of
+!            the data snapshots. See the descriptions of F, V, Z. 
+!            Q is from the initial QR factorization.  
+!     'N' :: The eigenvectors are not computed.  
+!.....      
+!     JOBR (input) CHARACTER*1 
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will
+!            be computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBQ (input) CHARACTER*1 
+!     Specifies whether to explicitly compute and return the
+!     orthogonal matrix from the QR factorization.
+!     'Q' :: The matrix Q of the QR factorization of the data
+!            snapshot matrix is computed and stored in the
+!            array F. See the description of F.       
+!     'N' :: The matrix Q is not explicitly computed.
+!.....
+!     JOBT (input) CHARACTER*1 
+!     Specifies whether to return the upper triangular factor
+!     from the QR factorization.
+!     'R' :: The matrix R of the QR factorization of the data 
+!            snapshot matrix F is returned in the array Y.
+!            See the description of Y and Further details.       
+!     'N' :: The matrix R is not returned.    
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are 
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.   
+!     To be useful on exit, this option needs JOBQ='Q'.      
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: SGESVD (the QR SVD algorithm)
+!     2 :: SGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger 
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M >= 0 
+!     The state space dimension (the number of rows of F)
+!.....      
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshots from a single trajectory,
+!     taken at equidistant discrete times. This is the 
+!     number of columns of F.
+!.....
+!     F (input/output) REAL(KIND=WP) M-by-N array
+!     > On entry,
+!     the columns of F are the sequence of data snapshots 
+!     from a single trajectory, taken at equidistant discrete
+!     times. It is assumed that the column norms of F are 
+!     in the range of the normalized floating point numbers. 
+!     < On exit,
+!     If JOBQ == 'Q', the array F contains the orthogonal 
+!     matrix/factor of the QR factorization of the initial 
+!     data snapshots matrix F. See the description of JOBQ. 
+!     If JOBQ == 'N', the entries in F strictly below the main
+!     diagonal contain, column-wise, the information on the 
+!     Householder vectors, as returned by SGEQRF. The 
+!     remaining information to restore the orthogonal matrix
+!     of the initial QR factorization is stored in WORK(1:N). 
+!     See the description of WORK.
+!.....
+!     LDF (input) INTEGER, LDF >= M 
+!     The leading dimension of the array F.
+!.....
+!     X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!     X is used as workspace to hold representations of the
+!     leading N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, the leading K columns of X contain the leading
+!     K left singular vectors of the above described content
+!     of X. To lift them to the space of the left singular
+!     vectors U(:,1:K)of the input data, pre-multiply with the 
+!     Q factor from the initial QR factorization. 
+!     See the descriptions of F, K, V  and Z.
+!.....      
+!     LDX (input) INTEGER, LDX >= N  
+!     The leading dimension of the array X 
+!.....
+!     Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!     Y is used as workspace to hold representations of the
+!     trailing N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, 
+!     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!     triangular factor from the QR factorization of the data
+!     snapshot matrix F.
+!.....      
+!     LDY (input) INTEGER , LDY >= N
+!     The leading dimension of the array Y   
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.   
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.     
+!     The numerical rank can be enforced by using positive 
+!     value of NRNK as follows: 
+!     0 < NRNK <= N-1 :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the description of K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.  
+!.....
+!     K (output) INTEGER,  0 <= K <= N 
+!     The dimension of the SVD/POD basis for the leading N-1
+!     data snapshots (columns of F) and the number of the 
+!     computed Ritz pairs. The value of K is determined
+!     according to the rule set by the parameters NRNK and 
+!     TOL. See the descriptions of NRNK and TOL. 
+!.....
+!     REIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<=N) entries of REIG contain
+!     the real parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     See the descriptions of K, IMEIG, Z.
+!.....
+!     IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<N) entries of REIG contain
+!     the imaginary parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     The eigenvalues are determined as follows:
+!     If IMEIG(i) == 0, then the corresponding eigenvalue is
+!     real, LAMBDA(i) = REIG(i).
+!     If IMEIG(i)>0, then the corresponding complex
+!     conjugate pair of eigenvalues reads
+!     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!     That is, complex conjugate pairs have consecutive
+!     indices (i,i+1), with the positive imaginary part
+!     listed first.
+!     See the descriptions of K, REIG, Z.     
+!.....      
+!     Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
+!     If JOBZ =='V' then
+!        Z contains real Ritz vectors as follows:
+!        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!        the i-th Ritz value.
+!        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!        [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!        the Ritz values extracted from this subspace are
+!        REIG(i) + sqrt(-1)*IMEIG(i) and
+!        REIG(i) - sqrt(-1)*IMEIG(i).
+!        The corresponding eigenvectors are
+!        Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!     If JOBZ == 'F', then the above descriptions hold for
+!     the columns of Z*V, where the columns of V are the
+!     eigenvectors of the K-by-K Rayleigh quotient, and Z is
+!     orthonormal. The columns of V are similarly structured:
+!     If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if 
+!     IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
+!                       Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
+!     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!     See the descriptions of REIG, IMEIG, X and V.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!     RES(1:K) contains the residuals for the K computed 
+!     Ritz pairs.       
+!     If LAMBDA(i) is real, then
+!        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!     then
+!     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!     It holds that
+!     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!     See the description of Z.
+!.....
+!     B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further 
+!     details in the provided references. 
+!     If JOBF == 'E', B(1:N,1;K) contains 
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.   
+!     In both cases, the content of B can be lifted to the 
+!     original dimension of the input data by pre-multiplying
+!     with the Q factor from the initial QR factorization.     
+!     Here A denotes a compression of the underlying operator.      
+!     See the descriptions of F and X.
+!     If JOBF =='N', then B is not referenced.
+!.....
+!     LDB (input) INTEGER, LDB >= MIN(M,N)
+!     The leading dimension of the array B.
+!.....      
+!     V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!     On exit, V(1:K,1:K) contains the K eigenvectors of
+!     the Rayleigh quotient. The eigenvectors of a complex
+!     conjugate pair of eigenvalues are returned in real form
+!     as explained in the description of Z. The Ritz vectors
+!     (returned in Z) are the product of X and V; see
+!     the descriptions of X and Z.
+!.....
+!     LDV (input) INTEGER, LDV >= N-1
+!     The leading dimension of the array V.
+!.....      
+!     S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by SGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N-1        
+!     The leading dimension of the array S.
+!.....
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit, 
+!     WORK(1:MIN(M,N)) contains the scalar factors of the 
+!     elementary reflectors as returned by SGEQRF of the 
+!     M-by-N input matrix F.
+!     WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of 
+!     the input submatrix F(1:M,1:N-1).
+!     If the call to SGEDMDQ is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. Hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the  workspace vector WORK.
+!     LWORK is calculated as follows:
+!     Let MLWQR  = N (minimal workspace for SGEQRF[M,N])
+!         MLWDMD = minimal workspace for SGEDMD (see the
+!                  description of LWORK in SGEDMD) for 
+!                  snapshots of dimensions MIN(M,N)-by-(N-1)
+!         MLWMQR = N (minimal workspace for 
+!                    SORMQR['L','N',M,N,N])
+!         MLWGQR = N (minimal workspace for SORGQR[M,N,N])
+!     Then
+!     LWORK = MAX(N+MLWQR, N+MLWDMD)
+!     is updated as follows:
+!        if   JOBZ == 'V' or JOBZ == 'F' THEN 
+!             LWORK = MAX( LWORK,MIN(M,N)+N-1 +MLWMQR )
+!        if   JOBQ == 'Q' THEN
+!             LWORK = MAX( LWORK,MIN(M,N)+N-1+MLWGQR)
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.          
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     Let M1=MIN(M,N), N1=N-1. Then
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!     If on entry LIWORK = -1, then a worskpace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!..... 
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.  
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~      
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!      
+!     Local scalars      
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
+                           MLWMQR, MLWORK, MLWQR,  MINMN,  & 
+                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
+                           OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!      
+!     Local array      
+!     ~~~~~~~~~~~      
+      REAL(KIND=WP) :: RDUMMY(2)
+!      
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME 
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      SGEMM 
+      EXTERNAL      SGEQRF, SLACPY, SLASET, SORGQR, & 
+                    SORMQR, XERBLA
+
+!     External subroutines
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      SGEDMD 
+      
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT         
+ !..........................................................  
+ !
+ !    Test the input arguments    
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V') 
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R') 
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')     
+      MINMN  = MIN(M,N)
+      INFO = 0 
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!       
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. LSAME(JOBS,'N')) )  THEN 
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+          INFO = -4                 
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -6    
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -22
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -25
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -27
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -29
+      END IF
+!      
+      IF ( WNTVEC .OR. WNTVCF ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF     
+      IF ( INFO == 0 ) THEN  
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and 
+          ! determine minimal and optimal sizes of the 
+          ! workspace at any moment of the run.         
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN  
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE                
+               K = 0
+            END IF             
+            INFO = 1  
+            RETURN
+         END IF     
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for SGEQRF.
+         MLWORK = MIN(M,N) + MLWQR 
+         IF ( LQUERY ) THEN 
+             CALL SGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
+                          INFO1 )
+             OLWQR = INT(RDUMMY(1))
+             OLWORK = MIN(M,N) + OLWQR           
+         END IF
+         CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
+                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     & 
+                      V, LDV, S, LDS, WORK, -1, IWORK,       &
+                      LIWORK, INFO1 )
+         MLWDMD = INT(WORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         IMINWR = IWORK(1)
+         IF ( LQUERY ) THEN 
+             OLWDMD = INT(WORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N) 
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL SORMQR( 'L','N', M, N, MINMN, F, LDF,  & 
+                            WORK, Z, LDZ, WORK, -1, INFO1 )
+               OLWMQR = INT(WORK(1))
+               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
+            END IF
+         END IF  
+         IF ( WANTQ ) THEN
+            MLWGQR = N
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
+            IF ( LQUERY ) THEN 
+                CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+                             WORK, -1, INFO1 )        
+                OLWGQR = INT(WORK(1))
+                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
+            END IF            
+         END IF   
+         IMINWR = MAX( 1, IMINWR )
+         MLWORK = MAX( 2, MLWORK )      
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
+      END IF  
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'SGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF   
+!.....	  
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place 
+!     one can use an out of core QRF.
+!   
+      CALL SGEQRF( M, N, F, LDF, WORK,         & 
+                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
+!      
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL SLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
+      CALL SLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL SLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL SLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
+                       Y(3,1), LDY )  
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)   
+      CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
+                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
+                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
+                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, IWORK,  & 
+                   LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF    
+!      
+!     The Ritz vectors (Koopman modes) can be explicitly 
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.  
+        IF ( M > MINMN ) CALL SLASET( 'A', M-MINMN, K, ZERO, &
+                                     ZERO, Z(MINMN+1,1), LDZ )
+        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+                     LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      ELSE IF ( WNTVCF ) THEN   
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and 
+        !   the SVD/POD_basis returned by SGEDMD in X) and the 
+        !   second factor (the eigenvectors of the Rayleigh 
+        !   quotient) is in the array V, as returned by SGEDMD.
+        CALL SLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL SLASET( 'A', M-N, K, ZERO, ZERO,   & 
+                                  Z(N+1,1), LDZ )
+        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      END IF
+!     
+!     Some optional output variables:
+!
+!     The upper triangular factor in the initial QR 
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to SGEDMDQ is to be 
+!     followed by a streaming DMD that is implemented in a 
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
+         CALL SLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
+         CALL SLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
+      END IF    
+!
+!     The orthonormal/orthogonal factor in the initial QR 
+!     factorization is optionally returned in the array F. 
+!     Same as with the triangular factor above, this is 
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN  ! Q overwrites F 
+         CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )  
+      END IF
+!      
+      RETURN
+!      
+      END SUBROUTINE SGEDMDQ
+    
\ No newline at end of file
diff --git a/lapack-netlib/SRC/zgedmd.f90 b/lapack-netlib/SRC/zgedmd.f90
new file mode 100644
index 000000000..090641ad8
--- /dev/null
+++ b/lapack-netlib/SRC/zgedmd.f90
@@ -0,0 +1,996 @@
+      SUBROUTINE ZGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
+                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
+                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
+                         RWORK, LRWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+
+!.....
+!     Scalar arguments
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LIWORK, LRWORK, LZWORK
+      INTEGER,       INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)   ::    TOL
+!     Array arguments
+      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                         W(LDW,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
+      INTEGER,          INTENT(OUT)   :: IWORK(*)
+!............................................................
+!     Purpose
+!     =======
+!     ZGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, ZGEDMD computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular
+!     vectors of X. Optionally, ZGEDMD returns the residuals
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!......................................................................
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations
+!     expressed in this material are those of the author and
+!     do not necessarily reflect the views of the DARPA SBIR
+!     Program Office
+!============================================================
+!     Distribution Statement A:
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================
+!............................................................
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix.
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'N' :: No data scaling.
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product X(:,1:K)*W, where X
+!            contains a POD basis (leading left singular vectors
+!            of the data matrix X) and W contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of K, X, W, Z.
+!     'N' :: The eigenvectors are not computed.
+!.....
+!     JOBR (input) CHARACTER*1
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will be
+!            computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: ZGESVD (the QR SVD algorithm)
+!     2 :: ZGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M>= 0
+!     The state space dimension (the row dimension of X, Y).
+!.....
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshot pairs
+!     (the number of columns of X and Y).
+!.....
+!     X (input/output) COMPLEX(KIND=WP) M-by-N array
+!   > On entry, X contains the data snapshot matrix X. It is
+!     assumed that the column norms of X are in the range of
+!     the normalized floating point numbers.
+!   < On exit, the leading K columns of X contain a POD basis,
+!     i.e. the leading K left singular vectors of the input
+!     data matrix X, U(:,1:K). All N columns of X contain all
+!     left singular vectors of the input matrix X.
+!     See the descriptions of K, Z and W.
+!.....
+!     LDX (input) INTEGER, LDX >= M
+!     The leading dimension of the array X.
+!.....
+!     Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
+!   > On entry, Y contains the data snapshot matrix Y
+!   < On exit,
+!     If JOBR == 'R', the leading K columns of Y  contain
+!     the residual vectors for the computed Ritz pairs.
+!     See the description of RES.
+!     If JOBR == 'N', Y contains the original input data,
+!                     scaled according to the value of JOBS.
+!.....
+!     LDY (input) INTEGER , LDY >= M
+!     The leading dimension of the array Y.
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.
+!     The numerical rank can be enforced by using positive
+!     value of NRNK as follows:
+!     0 < NRNK <= N :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the descriptions of TOL and  K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.
+!.....
+!     K (output) INTEGER,  0 <= K <= N
+!     The dimension of the POD basis for the data snapshot
+!     matrix X and the number of the computed Ritz pairs.
+!     The value of K is determined according to the rule set
+!     by the parameters NRNK and TOL.
+!     See the descriptions of NRNK and TOL.
+!.....
+!     EIGS (output) COMPLEX(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of EIGS contain
+!     the computed eigenvalues (Ritz values).
+!     See the descriptions of K, and Z.
+!.....
+!     Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
+!     If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
+!     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!     the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
+!     is an eigenvector corresponding to EIGS(i). The columns
+!     of W(1:k,1:K) are the computed eigenvectors of the
+!     K-by-K Rayleigh quotient.
+!     See the descriptions of EIGS, X and W.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) N-by-1 array
+!     RES(1:K) contains the residuals for the K computed
+!     Ritz pairs,
+!     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!     See the description of EIGS and Z.
+!.....
+!     B (output) COMPLEX(KIND=WP)  M-by-N array.
+!     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further
+!     details in the provided references.
+!     If JOBF == 'E', B(1:M,1:K) contains
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.
+!     If JOBF =='N', then B is not referenced.
+!     See the descriptions of X, W, K.
+!.....
+!     LDB (input) INTEGER, LDB >= M
+!     The leading dimension of the array B.
+!.....
+!     W (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!     On exit, W(1:K,1:K) contains the K computed
+!     eigenvectors of the matrix Rayleigh quotient.
+!     The Ritz vectors (returned in Z) are the
+!     product of X (containing a POD basis for the input
+!     matrix X) and W. See the descriptions of K, S, X and Z.
+!     W is also used as a workspace to temporarily store the
+!     right singular vectors of X.
+!.....
+!     LDW (input) INTEGER, LDW >= N
+!     The leading dimension of the array W.
+!.....
+!     S (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by ZGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N
+!     The leading dimension of the array S.
+!.....
+!     ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
+!     ZWORK is used as complex workspace in the complex SVD, as
+!     specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing
+!     the eigenvalues of a Rayleigh quotient.
+!     If the call to ZGEDMD is only workspace query, then
+!     ZWORK(1) contains the minimal complex workspace length and
+!     ZWORK(2) is the optimal complex workspace length.
+!     Hence, the length of work is at least 2.
+!     See the description of LZWORK.
+!.....
+!     LZWORK (input) INTEGER
+!     The minimal length of the workspace vector ZWORK.
+!     LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZGEEV),
+!     where LZWORK_ZGEEV = MAX( 1, 2*N )  and the minimal
+!     LZWORK_SVD is calculated as follows
+!     If WHTSVD == 1 :: ZGESVD ::
+!        LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+!     If WHTSVD == 2 :: ZGESDD ::
+!        LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+!     If WHTSVD == 3 :: ZGESVDQ ::
+!        LZWORK_SVD = obtainable by a query
+!     If WHTSVD == 4 :: ZGEJSV ::
+!        LZWORK_SVD = obtainable by a query
+!     If on entry LZWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths and returns them in
+!     LZWORK(1) and LZWORK(2), respectively.
+!.....
+!     RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
+!     On exit, RWORK(1:N) contains the singular values of
+!     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!     If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
+!     scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
+!     and Y to avoid overflow in the SVD of X.
+!     This may be of interest if the scaling option is off
+!     and as many as possible smallest eigenvalues are
+!     desired to the highest feasible accuracy.
+!     If the call to ZGEDMD is only workspace query, then
+!     RWORK(1) contains the minimal workspace length.
+!     See the description of LRWORK.
+!.....
+!     LRWORK (input) INTEGER
+!     The minimal length of the workspace vector RWORK.
+!     LRWORK is calculated as follows:
+!     LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZGEEV), where
+!     LRWORK_ZGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
+!     for the SVD subroutine determined by the input parameter
+!     WHTSVD.
+!     If WHTSVD == 1 :: ZGESVD ::
+!        LRWORK_SVD = 5*MIN(M,N)
+!     If WHTSVD == 2 :: ZGESDD ::
+!        LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
+!        2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+!     If WHTSVD == 3 :: ZGESVDQ ::
+!        LRWORK_SVD = obtainable by a query
+!     If WHTSVD == 4 :: ZGEJSV ::
+!        LRWORK_SVD = obtainable by a query
+!     If on entry LRWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     real workspace length and returns it in RWORK(1).
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for  ZWORK, RWORK and
+!     IWORK. See the descriptions of ZWORK, RWORK and IWORK.
+!.....
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP),    PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP),    PARAMETER :: ZERO = 0.0_WP
+      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,    &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j,  IMINWR,  INFO1, INFO2,  &
+                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
+                        LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                        MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                        OLWORK, MLRWRK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY,  &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+!
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: RDUMMY(2)
+
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) ZLANGE, DLAMCH, DZNRM2
+      EXTERNAL      ZLANGE, DLAMCH, DZNRM2, IZAMAX
+      INTEGER                               IZAMAX
+      LOGICAL       DISNAN, LSAME
+      EXTERNAL      DISNAN, LSAME
+
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      ZAXPY,  ZGEMM,  ZDSCAL
+      EXTERNAL      ZGEEV,  ZGEJSV, ZGESDD, ZGESVD, ZGESVDQ, &
+                    ZLACPY, ZLASCL, ZLASSQ, XERBLA
+
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     DBLE, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
+                                  .OR. ( LRWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -17
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -20
+      ELSE IF ( LDW < N ) THEN
+          INFO = -22
+      ELSE IF ( LDS < N ) THEN
+          INFO = -24
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                RWORK(1) = 1
+                ZWORK(1) = 2
+                ZWORK(2) = 2
+            ELSE
+               K   =  0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+
+         IMINWR = 1
+         MLRWRK = MAX(1,N)
+         MLWORK = 2
+         OLWORK = 2
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of ZGESVD:
+             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MLWORK = MAX(MLWORK,MWRSVD)
+             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
+             IF ( LQUERY ) THEN
+                CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
+                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
+                LWRSVD = INT( ZWORK(1) )
+                OLWORK = MAX(OLWORK,LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of ZGESDD:
+             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
+             ! In LAPACK 3.10.1 RWORK is defined differently.
+             ! Below we take max over the two versions.
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+             MLWORK = MAX(MLWORK,MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             MLRWRK = MAX( MLRWRK,  N +                    &
+                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
+                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
+                           2*MAX(M,N)*MIN(M,N)+            &
+                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+             IF ( LQUERY ) THEN
+                CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B,LDB,&
+                     W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD,INT( ZWORK(1) ))
+                ! Possible bug in ZGESDD optimal workspace size.
+                OLWORK = MAX(OLWORK,LWRSDD)
+             END IF
+         CASE (3)
+             CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
+                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVQ)
+             MLRWRK  = MAX(MLRWRK,N + INT(RDUMMY(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVQ)
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
+                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVJ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVJ)
+             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
+             IF ( LQUERY ) THEN
+                LWRSVJ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the ZGEEV call
+         MWRKEV = MAX( 1, 2*N )
+         MLWORK = MAX(MLWORK,MWRKEV)
+         MLRWRK = MAX(MLRWRK,N+2*N)
+         IF ( LQUERY ) THEN
+             CALL ZGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
+              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 )
+                LWRKEV = INT(ZWORK(1))
+                OLWORK = MAX( OLWORK, LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
+         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26
+
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'ZGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          RWORK(1) = MLRWRK
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = DLAMCH('O')
+      SMALL = DLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using ZLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DZNRM2( M, X(1,i), 1 )
+            SCALE  = ZERO
+            CALL ZLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('ZGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), LDX, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               X(1,i), LDX, INFO2 )      ! LAPACK CALL
+!                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)   ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+             ! All columns of X are zero. Return error code -8.
+             ! (the 8th input variable had an illegal value)
+             K = 0
+             INFO = -8
+             CALL XERBLA('ZGEDMD',-INFO)
+             RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL ZDSCAL( M, ONE/RWORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)       ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/DBLE(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(Y(IZAMAX(M, Y(1,i),1),i ))  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL ZDSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using ZLASSQ.
+          DO i = 1, N
+            !RWORK(i) = DZNRM2( M, Y(1,i), 1 )
+            SCALE  = ZERO
+            CALL ZLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('ZGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), LDY, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 Y(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               Y(1,i), LDY, INFO2 )             ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL ZDSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL
+!               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/DBLE(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(X(IZAMAX(M, X(1,i),1),i ))  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
+                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
+             T_OR_N = 'C'
+         CASE (2)
+            CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
+                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'C'
+         CASE (3)
+              CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, ZWORK,     &
+                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
+              CALL ZLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'C'
+         CASE (4)
+              CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL ZLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = RWORK(N+1)
+              XSCL2 = RWORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case ZGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL ZLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( RWORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('ZGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( RWORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^H * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^H is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL ZDSCAL( N, ONE/RWORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that ZGESVD, ZGESVDQ and ZGESDD return the
+          ! adjoint matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL ZDSCAL( N, ONE/RWORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              RWORK(N+i) = ONE/RWORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside ZGEDMD).
+          CALL ZGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
+                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))                   ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL ZGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
+                      LDZ, ZZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TRANSPOSE(CONJG(X(1:M,1:K))),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^H * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL ZGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
+                   ZZERO, Z, LDZ )                                         ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(CONJG(X(1:M,1:K))), Y(1:M,1:N) )  ! INTRINSIC
+        !
+        CALL ZGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
+                    LDW, ZZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))                 ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^H * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^H is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL ZLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL ZLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL ZGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, LDW, &
+            W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient.  See the description of Z.
+      ! Also, see the description of ZGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! ZGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL ZGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
+                       LDW, ZZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)        ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
+            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                       W, LDW, ZZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                       LDS, ZZERO, Z, LDZ )
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL ZLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                       W, LDW, ZZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                       LDS, ZZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+            !           LDS, ZZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the Ritz vectors
+      IF ( WNTVEC ) CALL ZGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
+                   ZZERO, Z, LDZ )                          ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         DO i = 1, K
+            CALL ZAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+            ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
+            RES(i) = DZNRM2( M, Y(1,i), 1 )                       ! BLAS CALL
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          RWORK(N+1) = XSCL1
+          RWORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE ZGEDMD
+
diff --git a/lapack-netlib/SRC/zgedmdq.f90 b/lapack-netlib/SRC/zgedmdq.f90
new file mode 100644
index 000000000..51be72a32
--- /dev/null
+++ b/lapack-netlib/SRC/zgedmdq.f90
@@ -0,0 +1,689 @@
+SUBROUTINE ZGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  EIGS,         &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
+                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
+                    IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+!.....      
+!     Scalar arguments       
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LZWORK,  LWORK, LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K      
+      REAL(KIND=WP), INTENT(IN)    ::   TOL     
+!     Array arguments      
+      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
+                                         Z(LDZ,*), B(LDB,*), &
+                                         V(LDV,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!.....      
+!     Purpose  
+!     =======
+!     ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices, using a QR factorization
+!     based compression of the data. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, ZGEDMDQ computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular 
+!     vectors of X. Optionally, ZGEDMDQ returns the residuals 
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].      
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.      
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations 
+!     expressed in this material are those of the author and 
+!     do not necessarily reflect the views of the DARPA SBIR 
+!     Program Office.      
+!============================================================
+!     Distribution Statement A: 
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================      
+!......................................................................      
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix. The data snapshots are the columns
+!     of F. The leading N-1 columns of F are denoted X and the
+!     trailing N-1 columns are denoted Y. 
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)    
+!     'N' :: No data scaling.   
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Z*V, where Z
+!            is orthonormal and V contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of F, V, Z.
+!     'Q' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Q*Z, where Z
+!            contains the eigenvectors of the compression of the
+!            underlying discretized operator onto the span of
+!            the data snapshots. See the descriptions of F, V, Z.
+!            Q is from the initial QR factorization.  
+!     'N' :: The eigenvectors are not computed.  
+!.....      
+!     JOBR (input) CHARACTER*1 
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will
+!            be computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBQ (input) CHARACTER*1 
+!     Specifies whether to explicitly compute and return the
+!     unitary matrix from the QR factorization.
+!     'Q' :: The matrix Q of the QR factorization of the data
+!            snapshot matrix is computed and stored in the
+!            array F. See the description of F.       
+!     'N' :: The matrix Q is not explicitly computed.
+!.....
+!     JOBT (input) CHARACTER*1 
+!     Specifies whether to return the upper triangular factor
+!     from the QR factorization.
+!     'R' :: The matrix R of the QR factorization of the data 
+!            snapshot matrix F is returned in the array Y.
+!            See the description of Y and Further details.       
+!     'N' :: The matrix R is not returned. 
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are 
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.   
+!     To be useful on exit, this option needs JOBQ='Q'.    
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: ZGESVD (the QR SVD algorithm)
+!     2 :: ZGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger 
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M >= 0 
+!     The state space dimension (the number of rows of F).
+!.....      
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshots from a single trajectory,
+!     taken at equidistant discrete times. This is the 
+!     number of columns of F.
+!.....
+!     F (input/output) COMPLEX(KIND=WP) M-by-N array
+!     > On entry,
+!     the columns of F are the sequence of data snapshots 
+!     from a single trajectory, taken at equidistant discrete
+!     times. It is assumed that the column norms of F are 
+!     in the range of the normalized floating point numbers. 
+!     < On exit,
+!     If JOBQ == 'Q', the array F contains the orthogonal 
+!     matrix/factor of the QR factorization of the initial 
+!     data snapshots matrix F. See the description of JOBQ. 
+!     If JOBQ == 'N', the entries in F strictly below the main
+!     diagonal contain, column-wise, the information on the 
+!     Householder vectors, as returned by ZGEQRF. The 
+!     remaining information to restore the orthogonal matrix
+!     of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). 
+!     See the description of ZWORK.
+!.....
+!     LDF (input) INTEGER, LDF >= M 
+!     The leading dimension of the array F.
+!.....
+!     X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
+!     X is used as workspace to hold representations of the
+!     leading N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, the leading K columns of X contain the leading
+!     K left singular vectors of the above described content
+!     of X. To lift them to the space of the left singular
+!     vectors U(:,1:K) of the input data, pre-multiply with the 
+!     Q factor from the initial QR factorization. 
+!     See the descriptions of F, K, V  and Z.
+!.....      
+!     LDX (input) INTEGER, LDX >= N  
+!     The leading dimension of the array X. 
+!.....
+!     Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
+!     Y is used as workspace to hold representations of the
+!     trailing N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, 
+!     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!     triangular factor from the QR factorization of the data
+!     snapshot matrix F.
+!.....      
+!     LDY (input) INTEGER , LDY >= N
+!     The leading dimension of the array Y.   
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.  
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.      
+!     The numerical rank can be enforced by using positive 
+!     value of NRNK as follows: 
+!     0 < NRNK <= N-1 :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the description of K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.  
+!.....
+!     K (output) INTEGER,  0 <= K <= N 
+!     The dimension of the SVD/POD basis for the leading N-1
+!     data snapshots (columns of F) and the number of the 
+!     computed Ritz pairs. The value of K is determined
+!     according to the rule set by the parameters NRNK and 
+!     TOL. See the descriptions of NRNK and TOL. 
+!.....
+!     EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<=N-1) entries of EIGS contain
+!     the computed eigenvalues (Ritz values).
+!     See the descriptions of K, and Z.
+!.....
+!     Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
+!     If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
+!     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!     Z*V, where Z contains orthonormal matrix (the product of
+!     Q from the initial QR factorization and the SVD/POD_basis
+!     returned by ZGEDMD in X) and the second factor (the 
+!     eigenvectors of the Rayleigh quotient) is in the array V, 
+!     as returned by ZGEDMD. That is,  X(:,1:K)*V(:,i)
+!     is an eigenvector corresponding to EIGS(i). The columns 
+!     of V(1:K,1:K) are the computed eigenvectors of the 
+!     K-by-K Rayleigh quotient.  
+!     See the descriptions of EIGS, X and V.      
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!     RES(1:K) contains the residuals for the K computed 
+!     Ritz pairs, 
+!     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!     See the description of EIGS and Z.      
+!.....
+!     B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further 
+!     details in the provided references. 
+!     If JOBF == 'E', B(1:N,1;K) contains 
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.   
+!     In both cases, the content of B can be lifted to the 
+!     original dimension of the input data by pre-multiplying
+!     with the Q factor from the initial QR factorization.   
+!     Here A denotes a compression of the underlying operator.      
+!     See the descriptions of F and X.
+!     If JOBF =='N', then B is not referenced.
+!.....
+!     LDB (input) INTEGER, LDB >= MIN(M,N)
+!     The leading dimension of the array B.
+!.....
+!     V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!     On exit, V(1:K,1:K) V contains the K eigenvectors of
+!     the Rayleigh quotient. The Ritz vectors
+!     (returned in Z) are the product of Q from the initial QR
+!     factorization (see the description of F) X (see the 
+!     description of X) and V.
+!.....
+!     LDV (input) INTEGER, LDV >= N-1
+!     The leading dimension of the array V.
+!.....      
+!     S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by ZGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N-1        
+!     The leading dimension of the array S.
+!.....
+!     ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
+!     On exit, 
+!     ZWORK(1:MIN(M,N)) contains the scalar factors of the 
+!     elementary reflectors as returned by ZGEQRF of the 
+!     M-by-N input matrix F.   
+!     If the call to ZGEDMDQ is only workspace query, then
+!     ZWORK(1) contains the minimal complex workspace length and
+!     ZWORK(2) is the optimal complex workspace length. 
+!     Hence, the length of work is at least 2.
+!     See the description of LZWORK.      
+!.....      
+!     LZWORK (input) INTEGER
+!     The minimal length of the  workspace vector ZWORK.
+!     LZWORK is calculated as follows:
+!     Let MLWQR  = N (minimal workspace for ZGEQRF[M,N])
+!         MLWDMD = minimal workspace for ZGEDMD (see the
+!                  description of LWORK in ZGEDMD)
+!         MLWMQR = N (minimal workspace for 
+!                    ZUNMQR['L','N',M,N,N])
+!         MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
+!         MINMN  = MIN(M,N)      
+!     Then
+!     LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
+!     is further updated as follows:
+!        if   JOBZ == 'V' or JOBZ == 'F' THEN 
+!             LZWORK = MAX(LZWORK, MINMN+MLWMQR)
+!        if   JOBQ == 'Q' THEN
+!             LZWORK = MAX(ZLWORK, MINMN+MLWGQR)      
+!
+!.....      
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit,
+!     WORK(1:N-1) contains the singular values of 
+!     the input submatrix F(1:M,1:N-1).
+!     If the call to ZGEDMDQ is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the  workspace vector WORK.
+!     LWORK is the same as in ZGEDMD, because in ZGEDMDQ
+!     only ZGEDMD requires real workspace for snapshots
+!     of dimensions MIN(M,N)-by-(N-1). 
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace length for WORK.          
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     Let M1=MIN(M,N), N1=N-1. Then
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!..... 
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.  
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~      
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!      
+!     Local scalars      
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
+                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  & 
+                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
+                           OLWORK, OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!      
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME 
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      ZGEQRF, ZLACPY, ZLASET, ZUNGQR, & 
+                    ZUNMQR, XERBLA
+
+!     External subroutines
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      ZGEDMD 
+      
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT         
+ !..........................................................  
+ !
+ !    Test the input arguments    
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q') 
+      WNTREF = LSAME(JOBF,'R') 
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')     
+      MINMN  = MIN(M,N)
+      INFO = 0 
+      LQUERY = ( (LZWORK == -1) .OR. (LWORK == -1) .OR. (LIWORK == -1) )
+!       
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN 
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+           INFO = -4                 
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
+                LSAME(JOBF,'N') ) )                     THEN
+          INFO = -6    
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -21
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -24
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -26
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -28
+      END IF
+!      
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF     
+      IF ( INFO == 0 ) THEN  
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and 
+          ! determine minimal and optimal sizes of the 
+          ! workspace at any moment of the run.         
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN  
+               IWORK(1) = 1
+               ZWORK(1) = 2
+               ZWORK(2) = 2
+               WORK(1)  = 2
+               WORK(2)  = 2
+            ELSE                
+               K = 0
+            END IF             
+            INFO = 1  
+            RETURN
+         END IF   
+         
+         MLRWRK = 2
+         MLWORK = 2
+         OLWORK = 2 
+         IMINWR = 1
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for ZGEQRF.
+         MLWORK = MAX(MLWORK,MINMN + MLWQR) 
+         
+         IF ( LQUERY ) THEN 
+             CALL ZGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
+                          INFO1 )
+             OLWQR  = INT(ZWORK(1))
+             OLWORK = MAX(OLWORK,MINMN + OLWQR)         
+         END IF
+         CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
+                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    & 
+                      S, LDS, ZWORK, -1, WORK, -1, IWORK,&
+                      -1, INFO1 )
+         MLWDMD = INT(ZWORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
+         IMINWR = MAX(IMINWR, IWORK(1))
+         IF ( LQUERY ) THEN 
+             OLWDMD = INT(ZWORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N) 
+            MLWORK = MAX(MLWORK,MINMN+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL ZUNMQR( 'L','N', M, N, MINMN, F, LDF,  & 
+                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
+               OLWMQR = INT(ZWORK(1))
+               OLWORK = MAX(OLWORK,MINMN+OLWMQR)
+            END IF
+         END IF  
+         IF ( WANTQ ) THEN
+            MLWGQR = MAX(1,N)
+            MLWORK = MAX(MLWORK,MINMN+MLWGQR)
+            IF ( LQUERY ) THEN 
+                CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
+                             ZWORK, -1, INFO1 )               
+                OLWGQR = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,MINMN+OLWGQR)
+            END IF            
+         END IF         
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
+         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
+      END IF  
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'ZGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          WORK(1)  = MLRWRK
+          WORK(2)  = MLRWRK
+          RETURN
+      END IF   
+!.....	  
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >> N, at this place 
+!     one can use an out of core QRF.
+!   
+      CALL ZGEQRF( M, N, F, LDF, ZWORK,                & 
+                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+!      
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL ZLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
+      CALL ZLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL ZLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL ZLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
+                       Y(3,1), LDY )  
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)   
+      CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
+                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
+                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
+                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, & 
+                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See ZGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF    
+!      
+!     The Ritz vectors (Koopman modes) can be explicitly 
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.  
+        IF ( M > MINMN ) CALL ZLASET( 'A', M-MINMN, K, ZZERO, &
+                                     ZZERO, Z(MINMN+1,1), LDZ )
+        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
+             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      ELSE IF ( WNTVCF ) THEN   
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and 
+        !   the SVD/POD_basis returned by ZGEDMD in X) and the 
+        !   second factor (the eigenvectors of the Rayleigh 
+        !   quotient) is in the array V, as returned by ZGEDMD.
+        CALL ZLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL ZLASET( 'A', M-N, K, ZZERO, ZZERO, & 
+                                 Z(N+1,1), LDZ )
+        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
+                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      END IF
+!     
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR 
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to ZGEDMDQ is to be 
+!     followed by a streaming DMD that is implemented in a 
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
+         CALL ZLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
+         CALL ZLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
+      END IF    
+!
+!     The orthonormal/unitary factor Q in the initial QR 
+!     factorization is optionally returned in the array F. 
+!     Same as with the triangular factor above, this is 
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN                   ! Q overwrites F 
+         CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
+                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )  
+      END IF
+!      
+      RETURN
+!      
+      END SUBROUTINE ZGEDMDQ
+    
\ No newline at end of file

From c0865ab0fe8579a2716b4f12fae4e3f2f6b1bede Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 10:45:29 +0200
Subject: [PATCH 02/15] Add LAPACKE interfaces for Dynamic Mode Decomposition
 (Reference-LAPACK PR 736)

---
 lapack-netlib/LAPACKE/src/lapacke_cgedmd.c    | 115 ++++++++++
 .../LAPACKE/src/lapacke_cgedmd_work.c         | 180 +++++++++++++++
 lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c   | 123 +++++++++++
 .../LAPACKE/src/lapacke_cgedmdq_work.c        | 205 ++++++++++++++++++
 lapack-netlib/LAPACKE/src/lapacke_dgedmd.c    | 112 ++++++++++
 .../LAPACKE/src/lapacke_dgedmd_work.c         | 179 +++++++++++++++
 lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c   | 119 ++++++++++
 .../LAPACKE/src/lapacke_dgedmdq_work.c        | 200 +++++++++++++++++
 lapack-netlib/LAPACKE/src/lapacke_sgedmd.c    | 112 ++++++++++
 .../LAPACKE/src/lapacke_sgedmd_work.c         | 179 +++++++++++++++
 lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c   | 119 ++++++++++
 .../LAPACKE/src/lapacke_sgedmdq_work.c        | 200 +++++++++++++++++
 lapack-netlib/LAPACKE/src/lapacke_zgedmd.c    | 116 ++++++++++
 .../LAPACKE/src/lapacke_zgedmd_work.c         | 182 ++++++++++++++++
 lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c   | 123 +++++++++++
 .../LAPACKE/src/lapacke_zgedmdq_work.c        | 205 ++++++++++++++++++
 16 files changed, 2469 insertions(+)
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_cgedmd.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_cgedmd_work.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_cgedmdq_work.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dgedmd.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dgedmd_work.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_dgedmdq_work.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_sgedmd.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_sgedmd_work.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_sgedmdq_work.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zgedmd.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zgedmd_work.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c
 create mode 100644 lapack-netlib/LAPACKE/src/lapacke_zgedmdq_work.c

diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgedmd.c b/lapack-netlib/LAPACKE/src/lapacke_cgedmd.c
new file mode 100644
index 000000000..a269b0daf
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_cgedmd.c
@@ -0,0 +1,115 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function cgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_cgedmd( int matrix_layout, char jobs, char jobz, char jobf,
+                           lapack_int whtsvd, lapack_int m, lapack_int n,
+                           lapack_complex_float* x, lapack_int ldx,
+                           lapack_complex_float* y, lapack_int ldy, lapack_int k,
+                           lapack_complex_float* reig, lapack_complex_float* imeig,
+                           lapack_complex_float* z, lapack_int ldz,
+                           lapack_complex_float* res, lapack_complex_float* b,
+                           lapack_int ldb, lapack_complex_float* w,
+                           lapack_int ldw, lapack_complex_float* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    lapack_complex_float* work = NULL;
+    lapack_int* iwork = NULL;
+    lapack_complex_float work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmd", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -8;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -10;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -15;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -18;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, w, ldw ) ) {
+            return -20;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -22;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_cgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, &work_query, lwork,
+                                &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = LAPACK_C2INT( work_query );
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_cgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, work, lwork, iwork,
+                                liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmd", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgedmd_work.c b/lapack-netlib/LAPACKE/src/lapacke_cgedmd_work.c
new file mode 100644
index 000000000..534934efb
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_cgedmd_work.c
@@ -0,0 +1,180 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function cgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_cgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, lapack_complex_float* x, lapack_int ldx,
+                                lapack_complex_float* y, lapack_int ldy, lapack_int k,
+                                lapack_complex_float* reig, lapack_complex_float* imeig,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* res, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* w,
+                                lapack_int ldw, lapack_complex_float* s, lapack_int lds,
+                                lapack_complex_float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                       &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                       work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldw_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        lapack_complex_float* x_t = NULL;
+        lapack_complex_float* y_t = NULL;
+        lapack_complex_float* z_t = NULL;
+        lapack_complex_float* b_t = NULL;
+        lapack_complex_float* w_t = NULL;
+        lapack_complex_float* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldx < n ) {
+            info = -9;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -11;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -19;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( ldw < n ) {
+            info = -21;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 ) {
+            LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                           &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                           work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        x_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        y_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        z_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        w_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldw_t * MAX(1,n) );
+        if( w_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        s_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        /* Transpose input matrices */
+        LAPACKE_cge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t,
+                       &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t,
+                       w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_5:
+        LAPACKE_free( w_t );
+exit_level_4:
+        LAPACKE_free( b_t );
+exit_level_3:
+        LAPACKE_free( z_t );
+exit_level_2:
+        LAPACKE_free( y_t );
+exit_level_1:
+        LAPACKE_free( x_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c b/lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c
new file mode 100644
index 000000000..60e83729b
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_cgedmdq.c
@@ -0,0 +1,123 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function cgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_cgedmdq( int matrix_layout, char jobs, char jobz, char jobr,
+                            char jobq, char jobt, char jobf, lapack_int whtsvd,
+                            lapack_int m, lapack_int n, lapack_complex_float* f,
+                            lapack_int ldf, lapack_complex_float* x,
+                            lapack_int ldx, lapack_complex_float* y,
+                            lapack_int ldy, lapack_int nrnk, float tol,
+                            lapack_int k, lapack_complex_float* reig,
+                            lapack_complex_float* imeig,
+                            lapack_complex_float* z, lapack_int ldz,
+                            lapack_complex_float* res, lapack_complex_float* b,
+                            lapack_int ldb, lapack_complex_float* v,
+                            lapack_int ldv, lapack_complex_float* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    lapack_complex_float* work = NULL;
+    lapack_int* iwork = NULL;
+    lapack_complex_float work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmdq", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, f, ldf ) ) {
+            return -11;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -13;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -15;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -22;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -25;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, v, ldv ) ) {
+            return -27;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -29;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_cgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, &work_query, lwork,
+                                 &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = LAPACK_C2INT( work_query );
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_cgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, work, lwork, iwork,
+                                 liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmdq", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgedmdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_cgedmdq_work.c
new file mode 100644
index 000000000..5bdbd3f56
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_cgedmdq_work.c
@@ -0,0 +1,205 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function cgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_cgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 lapack_complex_float* f, lapack_int ldf,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 lapack_complex_float* y, lapack_int ldy,
+                                 lapack_int nrnk, float tol, lapack_int k,
+                                 lapack_complex_float* reig,
+                                 lapack_complex_float* imeig,
+                                 lapack_complex_float* z,
+                                 lapack_int ldz, lapack_complex_float* res,
+                                 lapack_complex_float* b,
+                                 lapack_int ldb, lapack_complex_float* v,
+                                 lapack_int ldv, lapack_complex_float* s,
+                                 lapack_int lds, lapack_complex_float* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldf_t = MAX(1,m);
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldv_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        lapack_complex_float* f_t = NULL;
+        lapack_complex_float* x_t = NULL;
+        lapack_complex_float* y_t = NULL;
+        lapack_complex_float* z_t = NULL;
+        lapack_complex_float* b_t = NULL;
+        lapack_complex_float* v_t = NULL;
+        lapack_complex_float* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldf < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldx < n ) {
+            info = -14;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -26;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldv < n ) {
+            info = -28;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -30;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 || liwork == -1 ) {
+            LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                            &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                            imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                            work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        f_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldf_t * MAX(1,n) );
+        if( f_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        x_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        y_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        z_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        v_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) );
+        if( v_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        s_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_6;
+        }
+        /* Transpose input matrices */
+        LAPACKE_cge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_6:
+        LAPACKE_free( v_t );
+exit_level_5:
+        LAPACKE_free( b_t );
+exit_level_4:
+        LAPACKE_free( z_t );
+exit_level_3:
+        LAPACKE_free( y_t );
+exit_level_2:
+        LAPACKE_free( x_t );
+exit_level_1:
+        LAPACKE_free( f_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgedmd.c b/lapack-netlib/LAPACKE/src/lapacke_dgedmd.c
new file mode 100644
index 000000000..246d7f649
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_dgedmd.c
@@ -0,0 +1,112 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function dgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dgedmd( int matrix_layout, char jobs, char jobz, char jobf,
+                           lapack_int whtsvd, lapack_int m, lapack_int n,
+                           double* x, lapack_int ldx, double* y, lapack_int ldy,
+                           lapack_int k, double* reig, double* imeig, double* z,
+                           lapack_int ldz, double* res, double* b, lapack_int ldb,
+                           double* w, lapack_int ldw, double* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    double* work = NULL;
+    lapack_int* iwork = NULL;
+    double work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_dgedmd", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -8;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -10;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -15;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -18;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -20;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, w, ldw ) ) {
+            return -22;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_dgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, &work_query, lwork,
+                                &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = (lapack_int) work_query;
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (double*)LAPACKE_malloc( sizeof(double) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_dgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, work, lwork, iwork,
+                                liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_dgedmd", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgedmd_work.c b/lapack-netlib/LAPACKE/src/lapacke_dgedmd_work.c
new file mode 100644
index 000000000..4d1169de9
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_dgedmd_work.c
@@ -0,0 +1,179 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function dgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, double* x, lapack_int ldx,
+                                double* y, lapack_int ldy, lapack_int k,
+                                double* reig, double* imeig, double* z,
+                                lapack_int ldz, double* res, double* b,
+                                lapack_int ldb, double* w, lapack_int ldw,
+                                double* s, lapack_int lds, double* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                       &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                       work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldw_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        double* x_t = NULL;
+        double* y_t = NULL;
+        double* z_t = NULL;
+        double* b_t = NULL;
+        double* w_t = NULL;
+        double* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldx < n ) {
+            info = -9;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -11;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -19;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( ldw < n ) {
+            info = -21;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 ) {
+            LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                           &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                           work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        x_t = (double*)LAPACKE_malloc( sizeof(double) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        y_t = (double*)LAPACKE_malloc( sizeof(double) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        z_t = (double*)LAPACKE_malloc( sizeof(double) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        w_t = (double*)LAPACKE_malloc( sizeof(double) * ldw_t * MAX(1,n) );
+        if( w_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        s_t = (double*)LAPACKE_malloc( sizeof(double) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        /* Transpose input matrices */
+        LAPACKE_dge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t,
+                       &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t,
+                       w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_5:
+        LAPACKE_free( w_t );
+exit_level_4:
+        LAPACKE_free( b_t );
+exit_level_3:
+        LAPACKE_free( z_t );
+exit_level_2:
+        LAPACKE_free( y_t );
+exit_level_1:
+        LAPACKE_free( x_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c b/lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c
new file mode 100644
index 000000000..f3d621ba9
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_dgedmdq.c
@@ -0,0 +1,119 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function dgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dgedmdq( int matrix_layout, char jobs, char jobz, char jobr,
+                            char jobq, char jobt, char jobf, lapack_int whtsvd,
+                            lapack_int m, lapack_int n, double* f, lapack_int ldf,
+                            double* x, lapack_int ldx, double* y, lapack_int ldy,
+                            lapack_int nrnk, double tol, lapack_int k,
+                            double* reig, double* imeig, double* z,
+                            lapack_int ldz, double* res, double* b, lapack_int ldb,
+                            double* v, lapack_int ldv, double* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    double* work = NULL;
+    lapack_int* iwork = NULL;
+    double work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_dgedmdq", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, f, ldf ) ) {
+            return -11;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -13;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -15;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -22;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -25;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, v, ldv ) ) {
+            return -27;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -29;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_dgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, &work_query, lwork,
+                                 &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = (lapack_int) work_query;
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (double*)LAPACKE_malloc( sizeof(double) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_dgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, work, lwork, iwork,
+                                 liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_dgedmdq", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgedmdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_dgedmdq_work.c
new file mode 100644
index 000000000..51b2a66d8
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_dgedmdq_work.c
@@ -0,0 +1,200 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function dgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 double* f, lapack_int ldf, double* x,
+                                 lapack_int ldx, double* y, lapack_int ldy,
+                                 lapack_int nrnk, double tol, lapack_int k,
+                                 double* reig, double* imeig, double* z,
+                                 lapack_int ldz, double* res, double* b,
+                                 lapack_int ldb, double* v, lapack_int ldv,
+                                 double* s, lapack_int lds, double* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldf_t = MAX(1,m);
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldv_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        double* f_t = NULL;
+        double* x_t = NULL;
+        double* y_t = NULL;
+        double* z_t = NULL;
+        double* b_t = NULL;
+        double* v_t = NULL;
+        double* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldf < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldx < n ) {
+            info = -14;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -26;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldv < n ) {
+            info = -28;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -30;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 || liwork == -1 ) {
+            LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                            &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                            imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                            work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        f_t = (double*)LAPACKE_malloc( sizeof(double) * ldf_t * MAX(1,n) );
+        if( f_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        x_t = (double*)LAPACKE_malloc( sizeof(double) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        y_t = (double*)LAPACKE_malloc( sizeof(double) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        z_t = (double*)LAPACKE_malloc( sizeof(double) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        v_t = (double*)LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,n) );
+        if( v_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        s_t = (double*)LAPACKE_malloc( sizeof(double) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_6;
+        }
+        /* Transpose input matrices */
+        LAPACKE_dge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_6:
+        LAPACKE_free( v_t );
+exit_level_5:
+        LAPACKE_free( b_t );
+exit_level_4:
+        LAPACKE_free( z_t );
+exit_level_3:
+        LAPACKE_free( y_t );
+exit_level_2:
+        LAPACKE_free( x_t );
+exit_level_1:
+        LAPACKE_free( f_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgedmd.c b/lapack-netlib/LAPACKE/src/lapacke_sgedmd.c
new file mode 100644
index 000000000..879631b1d
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_sgedmd.c
@@ -0,0 +1,112 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function sgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_sgedmd( int matrix_layout, char jobs, char jobz, char jobf,
+                           lapack_int whtsvd, lapack_int m, lapack_int n,
+                           float* x, lapack_int ldx, float* y, lapack_int ldy,
+                           lapack_int k, float* reig, float* imeig, float* z,
+                           lapack_int ldz, float* res, float* b, lapack_int ldb,
+                           float* w, lapack_int ldw, float* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    float* work = NULL;
+    lapack_int* iwork = NULL;
+    float work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_sgedmd", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -8;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -10;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -15;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -18;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -20;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, w, ldw ) ) {
+            return -22;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_sgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, &work_query, lwork,
+                                &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = (lapack_int) work_query;
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (float*)LAPACKE_malloc( sizeof(float) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_sgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, work, lwork, iwork,
+                                liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_sgedmd", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgedmd_work.c b/lapack-netlib/LAPACKE/src/lapacke_sgedmd_work.c
new file mode 100644
index 000000000..762a9b271
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_sgedmd_work.c
@@ -0,0 +1,179 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function sgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_sgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, float* x, lapack_int ldx,
+                                float* y, lapack_int ldy, lapack_int k,
+                                float* reig, float* imeig, float* z,
+                                lapack_int ldz, float* res, float* b,
+                                lapack_int ldb, float* w, lapack_int ldw,
+                                float* s, lapack_int lds, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                       &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                       work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldw_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        float* x_t = NULL;
+        float* y_t = NULL;
+        float* z_t = NULL;
+        float* b_t = NULL;
+        float* w_t = NULL;
+        float* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldx < n ) {
+            info = -9;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -11;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -19;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( ldw < n ) {
+            info = -21;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 ) {
+            LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                           &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                           work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        x_t = (float*)LAPACKE_malloc( sizeof(float) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        y_t = (float*)LAPACKE_malloc( sizeof(float) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        z_t = (float*)LAPACKE_malloc( sizeof(float) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        w_t = (float*)LAPACKE_malloc( sizeof(float) * ldw_t * MAX(1,n) );
+        if( w_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        s_t = (float*)LAPACKE_malloc( sizeof(float) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        /* Transpose input matrices */
+        LAPACKE_sge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t,
+                       &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t,
+                       w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_5:
+        LAPACKE_free( w_t );
+exit_level_4:
+        LAPACKE_free( b_t );
+exit_level_3:
+        LAPACKE_free( z_t );
+exit_level_2:
+        LAPACKE_free( y_t );
+exit_level_1:
+        LAPACKE_free( x_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c b/lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c
new file mode 100644
index 000000000..e202d7fbd
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_sgedmdq.c
@@ -0,0 +1,119 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function sgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_sgedmdq( int matrix_layout, char jobs, char jobz, char jobr,
+                            char jobq, char jobt, char jobf, lapack_int whtsvd,
+                            lapack_int m, lapack_int n, float* f, lapack_int ldf,
+                            float* x, lapack_int ldx, float* y, lapack_int ldy,
+                            lapack_int nrnk, float tol, lapack_int k,
+                            float* reig, float* imeig, float* z,
+                            lapack_int ldz, float* res, float* b, lapack_int ldb,
+                            float* v, lapack_int ldv, float* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    float* work = NULL;
+    lapack_int* iwork = NULL;
+    float work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_sgedmdq", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, f, ldf ) ) {
+            return -11;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -13;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -15;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -22;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -25;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, v, ldv ) ) {
+            return -27;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -29;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_sgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, &work_query, lwork,
+                                 &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = (lapack_int) work_query;
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (float*)LAPACKE_malloc( sizeof(float) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_sgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, work, lwork, iwork,
+                                 liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_sgedmdq", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgedmdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_sgedmdq_work.c
new file mode 100644
index 000000000..9039898d2
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_sgedmdq_work.c
@@ -0,0 +1,200 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function sgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_sgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 float* f, lapack_int ldf, float* x,
+                                 lapack_int ldx, float* y, lapack_int ldy,
+                                 lapack_int nrnk, float tol, lapack_int k,
+                                 float* reig, float* imeig, float* z,
+                                 lapack_int ldz, float* res, float* b,
+                                 lapack_int ldb, float* v, lapack_int ldv,
+                                 float* s, lapack_int lds, float* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldf_t = MAX(1,m);
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldv_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        float* f_t = NULL;
+        float* x_t = NULL;
+        float* y_t = NULL;
+        float* z_t = NULL;
+        float* b_t = NULL;
+        float* v_t = NULL;
+        float* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldf < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldx < n ) {
+            info = -14;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -26;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldv < n ) {
+            info = -28;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -30;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 || liwork == -1 ) {
+            LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                            &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                            imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                            work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        f_t = (float*)LAPACKE_malloc( sizeof(float) * ldf_t * MAX(1,n) );
+        if( f_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        x_t = (float*)LAPACKE_malloc( sizeof(float) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        y_t = (float*)LAPACKE_malloc( sizeof(float) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        z_t = (float*)LAPACKE_malloc( sizeof(float) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        v_t = (float*)LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,n) );
+        if( v_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        s_t = (float*)LAPACKE_malloc( sizeof(float) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_6;
+        }
+        /* Transpose input matrices */
+        LAPACKE_sge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_6:
+        LAPACKE_free( v_t );
+exit_level_5:
+        LAPACKE_free( b_t );
+exit_level_4:
+        LAPACKE_free( z_t );
+exit_level_3:
+        LAPACKE_free( y_t );
+exit_level_2:
+        LAPACKE_free( x_t );
+exit_level_1:
+        LAPACKE_free( f_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgedmd.c b/lapack-netlib/LAPACKE/src/lapacke_zgedmd.c
new file mode 100644
index 000000000..f3f421c54
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_zgedmd.c
@@ -0,0 +1,116 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function zgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_zgedmd( int matrix_layout, char jobs, char jobz, char jobf,
+                           lapack_int whtsvd, lapack_int m, lapack_int n,
+                           lapack_complex_double* x, lapack_int ldx,
+                           lapack_complex_double* y, lapack_int ldy,
+                           lapack_int k, lapack_complex_double* reig,
+                           lapack_complex_double* imeig, lapack_complex_double* z,
+                           lapack_int ldz, lapack_complex_double* res,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* w, lapack_int ldw,
+                           lapack_complex_double* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    lapack_complex_double* work = NULL;
+    lapack_int* iwork = NULL;
+    lapack_complex_double work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_zgedmd", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -8;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -10;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -15;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -18;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -20;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, w, ldw ) ) {
+            return -22;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_zgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, &work_query, lwork,
+                                &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = LAPACK_Z2INT( work_query );
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_zgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, work, lwork, iwork,
+                                liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_zgedmd", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgedmd_work.c b/lapack-netlib/LAPACKE/src/lapacke_zgedmd_work.c
new file mode 100644
index 000000000..2554411ec
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_zgedmd_work.c
@@ -0,0 +1,182 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function zgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_zgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, lapack_complex_double* x,
+                                lapack_int ldx, lapack_complex_double* y,
+                                lapack_int ldy, lapack_int k,
+                                lapack_complex_double* reig,
+                                lapack_complex_double* imeig, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* res,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* w, lapack_int ldw,
+                                lapack_complex_double* s, lapack_int lds,
+                                lapack_complex_double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                       &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                       work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldw_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        lapack_complex_double* x_t = NULL;
+        lapack_complex_double* y_t = NULL;
+        lapack_complex_double* z_t = NULL;
+        lapack_complex_double* b_t = NULL;
+        lapack_complex_double* w_t = NULL;
+        lapack_complex_double* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldx < n ) {
+            info = -9;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -11;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -19;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( ldw < n ) {
+            info = -21;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 ) {
+            LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                           &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                           work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        x_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        y_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        z_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        w_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldw_t * MAX(1,n) );
+        if( w_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        s_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        /* Transpose input matrices */
+        LAPACKE_zge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t,
+                       &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t,
+                       w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_5:
+        LAPACKE_free( w_t );
+exit_level_4:
+        LAPACKE_free( b_t );
+exit_level_3:
+        LAPACKE_free( z_t );
+exit_level_2:
+        LAPACKE_free( y_t );
+exit_level_1:
+        LAPACKE_free( x_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c b/lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c
new file mode 100644
index 000000000..3648ffdf2
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_zgedmdq.c
@@ -0,0 +1,123 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function zgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_zgedmdq( int matrix_layout, char jobs, char jobz, char jobr,
+                            char jobq, char jobt, char jobf, lapack_int whtsvd,
+                            lapack_int m, lapack_int n, lapack_complex_double* f,
+                            lapack_int ldf, lapack_complex_double* x,
+                            lapack_int ldx, lapack_complex_double* y,
+                            lapack_int ldy, lapack_int nrnk, double tol,
+                            lapack_int k, lapack_complex_double* reig,
+                            lapack_complex_double* imeig,
+                            lapack_complex_double* z, lapack_int ldz,
+                            lapack_complex_double* res, lapack_complex_double* b,
+                            lapack_int ldb, lapack_complex_double* v,
+                            lapack_int ldv, lapack_complex_double* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    lapack_complex_double* work = NULL;
+    lapack_int* iwork = NULL;
+    lapack_complex_double work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmdq", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, f, ldf ) ) {
+            return -11;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -13;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -15;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -22;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -25;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, v, ldv ) ) {
+            return -27;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -29;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_zgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, &work_query, lwork,
+                                 &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = LAPACK_Z2INT( work_query );
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_zgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, work, lwork, iwork,
+                                 liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_zgedmdq", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgedmdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_zgedmdq_work.c
new file mode 100644
index 000000000..9afceba07
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_zgedmdq_work.c
@@ -0,0 +1,205 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function zgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_zgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 lapack_complex_double* f, lapack_int ldf,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 lapack_complex_double* y, lapack_int ldy,
+                                 lapack_int nrnk, double tol, lapack_int k,
+                                 lapack_complex_double* reig,
+                                 lapack_complex_double* imeig,
+                                 lapack_complex_double* z,
+                                 lapack_int ldz, lapack_complex_double* res,
+                                 lapack_complex_double* b,
+                                 lapack_int ldb, lapack_complex_double* v,
+                                 lapack_int ldv, lapack_complex_double* s,
+                                 lapack_int lds, lapack_complex_double* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldf_t = MAX(1,m);
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldv_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        lapack_complex_double* f_t = NULL;
+        lapack_complex_double* x_t = NULL;
+        lapack_complex_double* y_t = NULL;
+        lapack_complex_double* z_t = NULL;
+        lapack_complex_double* b_t = NULL;
+        lapack_complex_double* v_t = NULL;
+        lapack_complex_double* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldf < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldx < n ) {
+            info = -14;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -26;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldv < n ) {
+            info = -28;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -30;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 || liwork == -1 ) {
+            LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                            &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                            imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                            work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        f_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldf_t * MAX(1,n) );
+        if( f_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        x_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        y_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        z_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        v_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,n) );
+        if( v_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        s_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_6;
+        }
+        /* Transpose input matrices */
+        LAPACKE_zge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_6:
+        LAPACKE_free( v_t );
+exit_level_5:
+        LAPACKE_free( b_t );
+exit_level_4:
+        LAPACKE_free( z_t );
+exit_level_3:
+        LAPACKE_free( y_t );
+exit_level_2:
+        LAPACKE_free( x_t );
+exit_level_1:
+        LAPACKE_free( f_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+    }
+    return info;
+}

From e28fdf71b78613073701b3fc2f05f60ad46bb6ed Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 14:50:54 +0200
Subject: [PATCH 03/15] Add LAPACKE interfaces for Dynamic Mode Decomposition
 (Reference-LAPACK PR 736)

---
 lapack-netlib/LAPACKE/src/Makefile | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile
index 9d81e2416..969288f42 100644
--- a/lapack-netlib/LAPACKE/src/Makefile
+++ b/lapack-netlib/LAPACKE/src/Makefile
@@ -137,6 +137,10 @@ lapacke_cgerqf.o \
 lapacke_cgerqf_work.o \
 lapacke_cgesdd.o \
 lapacke_cgesdd_work.o \
+lapacke_cgedmd.o \
+lapacke_cgedmd_work.o \
+lapacke_cgedmdq.o \
+lapacke_cgedmdq_work.o \
 lapacke_cgesv.o \
 lapacke_cgesv_work.o \
 lapacke_cgesvd.o \
@@ -763,6 +767,10 @@ lapacke_dgerqf.o \
 lapacke_dgerqf_work.o \
 lapacke_dgesdd.o \
 lapacke_dgesdd_work.o \
+lapacke_dgedmd.o \
+lapacke_dgedmd_work.o \
+lapacke_dgedmdq.o \
+lapacke_dgedmdq_work.o \
 lapacke_dgesv.o \
 lapacke_dgesv_work.o \
 lapacke_dgesvd.o \
@@ -1343,6 +1351,10 @@ lapacke_sgerqf.o \
 lapacke_sgerqf_work.o \
 lapacke_sgesdd.o \
 lapacke_sgesdd_work.o \
+lapacke_sgedmd.o \
+lapacke_sgedmd_work.o \
+lapacke_sgedmdq.o \
+lapacke_sgedmdq_work.o \
 lapacke_sgesv.o \
 lapacke_sgesv_work.o \
 lapacke_sgesvd.o \
@@ -1913,6 +1925,10 @@ lapacke_zgerqf.o \
 lapacke_zgerqf_work.o \
 lapacke_zgesdd.o \
 lapacke_zgesdd_work.o \
+lapacke_zgedmd.o \
+lapacke_zgedmd_work.o \
+lapacke_zgedmdq.o \
+lapacke_zgedmdq_work.o \
 lapacke_zgesv.o \
 lapacke_zgesv_work.o \
 lapacke_zgesvd.o \

From 83d6ce12893e182bfb212ea5f6d7f3d2f489cda3 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 14:55:26 +0200
Subject: [PATCH 04/15] Add interfaces for Dynamic Mode Decomposition
 (Reference-LAPACK PR 736)

---
 cmake/lapacke.cmake | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake
index 6713beefc..f43bf10d0 100644
--- a/cmake/lapacke.cmake
+++ b/cmake/lapacke.cmake
@@ -90,6 +90,10 @@ set(CSRC
   lapacke_cgerqf_work.c
   lapacke_cgesdd.c
   lapacke_cgesdd_work.c
+  lapacke_cgedmd.c
+  lapacke_cgedmd_work.c
+  lapacke_cgedmdq.c
+  lapacke_cgedmdq_work.c
   lapacke_cgesv.c
   lapacke_cgesv_work.c
   lapacke_cgesvd.c
@@ -713,6 +717,10 @@ set(DSRC
   lapacke_dgerqf_work.c
   lapacke_dgesdd.c
   lapacke_dgesdd_work.c
+  lapacke_dgedmd.c
+  lapacke_dgedmd_work.c
+  lapacke_dgedmdq.c
+  lapacke_dgedmdq_work.c
   lapacke_dgesv.c
   lapacke_dgesv_work.c
   lapacke_dgesvd.c
@@ -1291,6 +1299,10 @@ set(SSRC
   lapacke_sgerqf_work.c
   lapacke_sgesdd.c
   lapacke_sgesdd_work.c
+  lapacke_sgedmd.c
+  lapacke_sgedmd_work.c
+  lapacke_sgedmdq.c
+  lapacke_sgedmdq_work.c
   lapacke_sgesv.c
   lapacke_sgesv_work.c
   lapacke_sgesvd.c
@@ -1863,6 +1875,10 @@ set(ZSRC
   lapacke_zgerqf_work.c
   lapacke_zgesdd.c
   lapacke_zgesdd_work.c
+  lapacke_zgedmd.c
+  lapacke_zgedmd_work.c
+  lapacke_zgedmdq.c
+  lapacke_zgedmdq_work.c
   lapacke_zgesv.c
   lapacke_zgesv_work.c
   lapacke_zgesvd.c

From defafd1353a13e20452f94a3320593ecf6ffe0b4 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 15:07:53 +0200
Subject: [PATCH 05/15] Add functions for Dynamic Mode Decomposition
 (Reference-LAPACK PR 736)

---
 cmake/lapack.cmake | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake
index 544e226ab..077390d90 100644
--- a/cmake/lapack.cmake
+++ b/cmake/lapack.cmake
@@ -124,7 +124,7 @@ set(SLASRC
    ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
    sgesvdq.f slaorhr_col_getrfnp.f
    slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f 
-   slatrs3.f strsyl3.f sgelst.f)
+   slatrs3.f strsyl3.f sgelst.f sgedmd.f90 sgedmdq.f90)
 
 set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
    sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@@ -223,7 +223,7 @@ set(CLASRC
    chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
    cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f 
    cungtsqr.f cungtsqr_row.f cunhr_col.f 
-   clatrs3.f ctrsyl3.f cgelst.f)
+   clatrs3.f ctrsyl3.f cgelst.f cgedmd.f90 cgedmdq.f90)
 
 set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
    cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@@ -316,7 +316,7 @@ set(DLASRC
    dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
    dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
    dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f 
-   dlatrs3.f dtrsyl3.f dgelst.f)
+   dlatrs3.f dtrsyl3.f dgelst.f dgedmd.f90 dgedmdq.f90)
 
 set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
    dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@@ -419,7 +419,7 @@ set(ZLASRC
    zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
    zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
    zungtsqr.f zungtsqr_row.f zunhr_col.f
-   zlatrs3.f ztrsyl3.f zgelst.f)
+   zlatrs3.f ztrsyl3.f zgelst.f zgedmd.f90 zgedmdq.f90)
 
 set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
    zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f

From 9f0ef475b4fe9e287cd253aa9c285155887ee99c Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 15:12:02 +0200
Subject: [PATCH 06/15] Add Dynamic Mode Decomposition functions
 (Reference-LAPACK PR 736)

---
 lapack-netlib/SRC/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile
index 8cac42330..74db14e46 100644
--- a/lapack-netlib/SRC/Makefile
+++ b/lapack-netlib/SRC/Makefile
@@ -207,7 +207,7 @@ SLASRC_O = \
    ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \
    ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \
    ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \
-   sgesvdq.o slatrs3.o strsyl3.o sgelst.o
+   sgesvdq.o slatrs3.o strsyl3.o sgelst.o sgedmd.o sgedmdq.o
    
 endif
 
@@ -316,7 +316,7 @@ CLASRC_O = \
    chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \
    cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \
    chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \
-   cgesvdq.o clatrs3.o ctrsyl3.o cgelst.o
+   cgesvdq.o clatrs3.o ctrsyl3.o cgelst.o cgedmd.o cgedmdq.o
 endif
 
 ifdef USEXBLAS
@@ -417,7 +417,7 @@ DLASRC_O = \
    dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \
    dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \
    dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \
-   dgesvdq.o dlatrs3.o dtrsyl3.o dgelst.o
+   dgesvdq.o dlatrs3.o dtrsyl3.o dgelst.o dgedmd.o dgedmdq.o
 endif
 
 ifdef USEXBLAS
@@ -526,7 +526,7 @@ ZLASRC_O = \
    zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \
    zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \
    zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \
-   zgesvdq.o zlatrs3.o ztrsyl3.o zgelst.o
+   zgesvdq.o zlatrs3.o ztrsyl3.o zgelst.o zgedmd.o zgedmdq.o
 endif
 
 ifdef USEXBLAS

From c7a05458cd6d315ac3cc377c3760f59784d9e9e8 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 15:52:44 +0200
Subject: [PATCH 07/15] Add Dynamic Mode Decomposition functions
 (Reference-LAPACK PR 736)

---
 lapack-netlib/LAPACKE/include/lapack.h | 132 +++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h
index a0fcaa259..a5d02b42e 100644
--- a/lapack-netlib/LAPACKE/include/lapack.h
+++ b/lapack-netlib/LAPACKE/include/lapack.h
@@ -3323,6 +3323,138 @@ void LAPACK_zgesdd_base(
     #define LAPACK_zgesdd(...) LAPACK_zgesdd_base(__VA_ARGS__)
 #endif
 
+#define LAPACK_cgedmd LAPACK_GLOBAL(cgedmd,CGEDMD)
+void LAPACK_cgedmd(
+    char const* jobs, char const* jobz, char const* jobf,
+    lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n,
+    lapack_complex_float* x, lapack_int const* ldx,
+    lapack_complex_float* y, lapack_int const* ldy, lapack_int const* k,
+    lapack_complex_float* reig, lapack_complex_float* imeig,
+    lapack_complex_float* z, lapack_int const* ldz, lapack_complex_float* res,
+    lapack_complex_float* b, lapack_int const* ldb,
+    lapack_complex_float* w, lapack_int const* ldw,
+    lapack_complex_float* s, lapack_int const* lds,
+    lapack_complex_float* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_dgedmd LAPACK_GLOBAL(dgedmd,DGEDMD)
+void LAPACK_dgedmd(
+    char const* jobs, char const* jobz, char const* jobf,
+    lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n,
+    double* x, lapack_int const* ldx,
+    double* y, lapack_int const* ldy, lapack_int const* k,
+    double* reig, double* imeig,
+    double* z, lapack_int const* ldz, double* res,
+    double* b, lapack_int const* ldb,
+    double* w, lapack_int const* ldw,
+    double* s, lapack_int const* lds,
+    double* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_sgedmd LAPACK_GLOBAL(sgedmd,SGEDMD)
+void LAPACK_sgedmd(
+    char const* jobs, char const* jobz, char const* jobf,
+    lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n,
+    float* x, lapack_int const* ldx,
+    float* y, lapack_int const* ldy, lapack_int const* k,
+    float* reig, float* imeig,
+    float* z, lapack_int const* ldz, float* res,
+    float* b, lapack_int const* ldb,
+    float* w, lapack_int const* ldw,
+    float* s, lapack_int const* lds,
+    float* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_zgedmd LAPACK_GLOBAL(zgedmd,ZGEDMD)
+void LAPACK_zgedmd(
+    char const* jobs, char const* jobz, char const* jobf,
+    lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n,
+    lapack_complex_double* x, lapack_int const* ldx,
+    lapack_complex_double* y, lapack_int const* ldy, lapack_int const* k,
+    lapack_complex_double* reig, lapack_complex_double* imeig,
+    lapack_complex_double* z, lapack_int const* ldz, lapack_complex_double* res,
+    lapack_complex_double* b, lapack_int const* ldb,
+    lapack_complex_double* w, lapack_int const* ldw,
+    lapack_complex_double* s, lapack_int const* lds,
+    lapack_complex_double* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_cgedmdq LAPACK_GLOBAL(cgedmdq,CGEDMDQ)
+void LAPACK_cgedmdq(
+    char const* jobs, char const* jobz, char const* jobr, char const* jobq,
+    char const* jobt, char const* jobf, lapack_int const* whtsvd,
+    lapack_int const* m, lapack_int const* n,
+    lapack_complex_float* f, lapack_int const* ldf,
+    lapack_complex_float* x, lapack_int const* ldx,
+    lapack_complex_float* y, lapack_int const* ldy, lapack_int const* nrnk,
+    float const* tol, lapack_int const* k,
+    lapack_complex_float* reig, lapack_complex_float* imeig,
+    lapack_complex_float* z, lapack_int const* ldz, lapack_complex_float* res,
+    lapack_complex_float* b, lapack_int const* ldb,
+    lapack_complex_float* v, lapack_int const* ldv,
+    lapack_complex_float* s, lapack_int const* lds,
+    lapack_complex_float* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_dgedmdq LAPACK_GLOBAL(dgedmdq,DGEDMDQ)
+void LAPACK_dgedmdq(
+    char const* jobs, char const* jobz, char const* jobr, char const* jobq,
+    char const* jobt, char const* jobf, lapack_int const* whtsvd,
+    lapack_int const* m, lapack_int const* n,
+    double* f, lapack_int const* ldf,
+    double* x, lapack_int const* ldx,
+    double* y, lapack_int const* ldy, lapack_int const* nrnk,
+    double const* tol, lapack_int const* k,
+    double* reig, double* imeig,
+    double* z, lapack_int const* ldz, double* res,
+    double* b, lapack_int const* ldb,
+    double* v, lapack_int const* ldv,
+    double* s, lapack_int const* lds,
+    double* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_sgedmdq LAPACK_GLOBAL(sgedmdq,SGEDMDQ)
+void LAPACK_sgedmdq(
+    char const* jobs, char const* jobz, char const* jobr, char const* jobq,
+    char const* jobt, char const* jobf, lapack_int const* whtsvd,
+    lapack_int const* m, lapack_int const* n,
+    float* f, lapack_int const* ldf,
+    float* x, lapack_int const* ldx,
+    float* y, lapack_int const* ldy, lapack_int const* nrnk,
+    float const* tol, lapack_int const* k,
+    float* reig, float* imeig,
+    float* z, lapack_int const* ldz, float* res,
+    float* b, lapack_int const* ldb,
+    float* v, lapack_int const* ldv,
+    float* s, lapack_int const* lds,
+    float* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_zgedmdq LAPACK_GLOBAL(zgedmdq,ZGEDMDQ)
+void LAPACK_zgedmdq(
+    char const* jobs, char const* jobz, char const* jobr, char const* jobq,
+    char const* jobt, char const* jobf, lapack_int const* whtsvd,
+    lapack_int const* m, lapack_int const* n,
+    lapack_complex_double* f, lapack_int const* ldf,
+    lapack_complex_double* x, lapack_int const* ldx,
+    lapack_complex_double* y, lapack_int const* ldy, lapack_int const* nrnk,
+    double const* tol, lapack_int const* k,
+    lapack_complex_double* reig, lapack_complex_double* imeig,
+    lapack_complex_double* z, lapack_int const* ldz, lapack_complex_double* res,
+    lapack_complex_double* b, lapack_int const* ldb,
+    lapack_complex_double* v, lapack_int const* ldv,
+    lapack_complex_double* s, lapack_int const* lds,
+    lapack_complex_double* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info )
+
 #define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV)
 lapack_int LAPACK_cgesv(
     lapack_int const* n, lapack_int const* nrhs,

From de88063aa2795fd25c00efeb92791b9c1d38c5be Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 15:56:10 +0200
Subject: [PATCH 08/15] Add interfaces for Dynamic Mode Decomposition functions
 (Reference-LAPACK PR 736)

---
 lapack-netlib/LAPACKE/include/lapacke.h | 131 ++++++++++++++++++++++--
 1 file changed, 122 insertions(+), 9 deletions(-)

diff --git a/lapack-netlib/LAPACKE/include/lapacke.h b/lapack-netlib/LAPACKE/include/lapacke.h
index 9bd228064..9a9ab4753 100644
--- a/lapack-netlib/LAPACKE/include/lapacke.h
+++ b/lapack-netlib/LAPACKE/include/lapacke.h
@@ -956,7 +956,7 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp, char jobr,
                            lapack_int lda, double* s, lapack_complex_double* u,
                            lapack_int ldu, lapack_complex_double* v,
                            lapack_int ldv, lapack_int* numrank );
-                           
+
 lapack_int LAPACKE_sgesvj( int matrix_layout, char joba, char jobu, char jobv,
                            lapack_int m, lapack_int n, float* a, lapack_int lda,
                            float* sva, lapack_int mv, float* v, lapack_int ldv,
@@ -5712,6 +5712,120 @@ lapack_int LAPACKE_zgesdd_work( int matrix_layout, char jobz, lapack_int m,
                                 lapack_complex_double* work, lapack_int lwork,
                                 double* rwork, lapack_int* iwork );
 
+lapack_int LAPACKE_sgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, float* x, lapack_int ldx,
+                                float* y, lapack_int ldy, lapack_int k,
+                                float* reig, float* imeig, float* z,
+                                lapack_int ldz, float* res, float* b,
+                                lapack_int ldb, float* w, lapack_int ldw,
+                                float* s, lapack_int lds, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_dgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, double* x, lapack_int ldx,
+                                double* y, lapack_int ldy, lapack_int k,
+                                double* reig, double* imeig, double* z,
+                                lapack_int ldz, double* res, double* b,
+                                lapack_int ldb, double* w, lapack_int ldw,
+                                double* s, lapack_int lds, double* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_cgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, lapack_complex_float* x,
+                                lapack_int ldx, lapack_complex_float* y,
+                                lapack_int ldy, lapack_int k,
+                                lapack_complex_float* reig,
+                                lapack_complex_float* imeig,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* res,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* w, lapack_int ldw,
+                                lapack_complex_float* s, lapack_int lds,
+                                lapack_complex_float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_zgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, lapack_complex_double* x,
+                                lapack_int ldx, lapack_complex_double* y,
+                                lapack_int ldy, lapack_int k,
+                                lapack_complex_double* reig,
+                                lapack_complex_double* imeig,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_complex_double* res,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* w, lapack_int ldw,
+                                lapack_complex_double* s, lapack_int lds,
+                                lapack_complex_double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_sgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 float* f, lapack_int ldf, float* x,
+                                 lapack_int ldx, float* y, lapack_int ldy,
+                                 lapack_int nrnk, float tol, lapack_int k,
+                                 float* reig, float* imeig, float* z,
+                                 lapack_int ldz, float* res, float* b,
+                                 lapack_int ldb, float* v, lapack_int ldv,
+                                 float* s, lapack_int lds, float* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork );
+
+lapack_int LAPACKE_dgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 double* f, lapack_int ldf, double* x,
+                                 lapack_int ldx, double* y, lapack_int ldy,
+                                 lapack_int nrnk, double tol, lapack_int k,
+                                 double* reig, double* imeig, double* z,
+                                 lapack_int ldz, double* res, double* b,
+                                 lapack_int ldb, double* v, lapack_int ldv,
+                                 double* s, lapack_int lds, double* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork );
+
+lapack_int LAPACKE_cgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 lapack_complex_float* f, lapack_int ldf,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 lapack_complex_float* y, lapack_int ldy,
+                                 lapack_int nrnk, float tol, lapack_int k,
+                                 lapack_complex_float* reig,
+                                 lapack_complex_float* imeig,
+                                 lapack_complex_float* z, lapack_int ldz,
+                                 lapack_complex_float* res,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* v, lapack_int ldv,
+                                 lapack_complex_float* s, lapack_int lds,
+                                 lapack_complex_float* work, lapack_int lwork,
+                                 lapack_int* iwork,
+                                 lapack_int liwork );
+
+lapack_int LAPACKE_zgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 lapack_complex_double* f, lapack_int ldf,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 lapack_complex_double* y, lapack_int ldy,
+                                 lapack_int nrnk, double tol, lapack_int k,
+                                 lapack_complex_double* reig,
+                                 lapack_complex_double* imeig,
+                                 lapack_complex_double* z, lapack_int ldz,
+                                 lapack_complex_double* res,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* v, lapack_int ldv,
+                                 lapack_complex_double* s, lapack_int lds,
+                                 lapack_complex_double* work, lapack_int lwork,
+                                 lapack_int* iwork,
+                                 lapack_int liwork );
+
 lapack_int LAPACKE_sgesv_work( int matrix_layout, lapack_int n, lapack_int nrhs,
                                float* a, lapack_int lda, lapack_int* ipiv,
                                float* b, lapack_int ldb );
@@ -5833,7 +5947,7 @@ lapack_int LAPACKE_zgesvdq_work( int matrix_layout, char joba, char jobp,
                                 lapack_int* iwork, lapack_int liwork,
                                 lapack_complex_double* cwork, lapack_int lcwork,
                                 double* rwork, lapack_int lrwork);
-                                
+
 lapack_int LAPACKE_sgesvj_work( int matrix_layout, char joba, char jobu,
                                 char jobv, lapack_int m, lapack_int n, float* a,
                                 lapack_int lda, float* sva, lapack_int mv,
@@ -12550,7 +12664,7 @@ lapack_int LAPACKE_zhegv_2stage_work( int matrix_layout, lapack_int itype, char
 //LAPACK 3.8.0
 lapack_int LAPACKE_ssysv_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           lapack_int nrhs, float* a, lapack_int lda,
-                          float* tb, lapack_int ltb, lapack_int* ipiv, 
+                          float* tb, lapack_int ltb, lapack_int* ipiv,
                           lapack_int* ipiv2, float* b, lapack_int ldb );
 lapack_int LAPACKE_ssysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                lapack_int nrhs, float* a, lapack_int lda,
@@ -12560,7 +12674,7 @@ lapack_int LAPACKE_ssysv_aa_2stage_work( int matrix_layout, char uplo, lapack_in
 lapack_int LAPACKE_dsysv_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* tb, lapack_int ltb,
-                          lapack_int* ipiv, lapack_int* ipiv2, 
+                          lapack_int* ipiv, lapack_int* ipiv2,
                           double* b, lapack_int ldb );
 lapack_int LAPACKE_dsysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                lapack_int nrhs, double* a, lapack_int lda,
@@ -12612,10 +12726,10 @@ lapack_int LAPACKE_zhesv_aa_2stage_work( int matrix_layout, char uplo, lapack_in
                                lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* work, lapack_int lwork );
-                               
+
 lapack_int LAPACKE_ssytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           float* a, lapack_int lda,
-                          float* tb, lapack_int ltb, lapack_int* ipiv, 
+                          float* tb, lapack_int ltb, lapack_int* ipiv,
                           lapack_int* ipiv2 );
 lapack_int LAPACKE_ssytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                float* a, lapack_int lda,
@@ -12671,7 +12785,7 @@ lapack_int LAPACKE_zhetrf_aa_2stage_work( int matrix_layout, char uplo, lapack_i
 
 lapack_int LAPACKE_ssytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           lapack_int nrhs, float* a, lapack_int lda,
-                          float* tb, lapack_int ltb, lapack_int* ipiv, 
+                          float* tb, lapack_int ltb, lapack_int* ipiv,
                           lapack_int* ipiv2, float* b, lapack_int ldb );
 lapack_int LAPACKE_ssytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                lapack_int nrhs, float* a, lapack_int lda,
@@ -12680,7 +12794,7 @@ lapack_int LAPACKE_ssytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_i
 lapack_int LAPACKE_dsytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* tb, lapack_int ltb,
-                          lapack_int* ipiv, lapack_int* ipiv2, 
+                          lapack_int* ipiv, lapack_int* ipiv2,
                           double* b, lapack_int ldb );
 lapack_int LAPACKE_dsytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                lapack_int nrhs, double* a, lapack_int lda,
@@ -12727,7 +12841,6 @@ lapack_int LAPACKE_zhetrs_aa_2stage_work( int matrix_layout, char uplo, lapack_i
                                lapack_int lda, lapack_complex_double* tb,
                                lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2,
                                lapack_complex_double* b, lapack_int ldb );
-
 //LAPACK 3.10.0
 lapack_int LAPACKE_sorhr_col( int matrix_layout, lapack_int m, lapack_int n,
                               lapack_int nb, float* a,

From 5eef0793bac9637b80f9ce02363d80abcda24840 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 18:56:33 +0200
Subject: [PATCH 09/15] Fix missing semicolon

---
 lapack-netlib/LAPACKE/include/lapack.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h
index a5d02b42e..f510c8c80 100644
--- a/lapack-netlib/LAPACKE/include/lapack.h
+++ b/lapack-netlib/LAPACKE/include/lapack.h
@@ -3453,7 +3453,7 @@ void LAPACK_zgedmdq(
     lapack_complex_double* s, lapack_int const* lds,
     lapack_complex_double* work, lapack_int const* lwork,
     lapack_int* iwork, lapack_int const* liwork,
-    lapack_int* info )
+    lapack_int* info );
 
 #define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV)
 lapack_int LAPACK_cgesv(

From 649ab6481adec7e4bb2a411a8d2956458523733f Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 20:03:44 +0200
Subject: [PATCH 10/15] Add dummy C files for ?GEMDQ?

---
 lapack-netlib/SRC/cgedmd.c  | 511 ++++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/cgedmdq.c | 511 ++++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/dgedmd.c  | 511 ++++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/dgedmdq.c | 511 ++++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/sgedmd.c  | 511 ++++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/sgedmdq.c | 511 ++++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/zgedmd.c  | 511 ++++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/zgedmdq.c | 511 ++++++++++++++++++++++++++++++++++++
 8 files changed, 4088 insertions(+)
 create mode 100644 lapack-netlib/SRC/cgedmd.c
 create mode 100644 lapack-netlib/SRC/cgedmdq.c
 create mode 100644 lapack-netlib/SRC/dgedmd.c
 create mode 100644 lapack-netlib/SRC/dgedmdq.c
 create mode 100644 lapack-netlib/SRC/sgedmd.c
 create mode 100644 lapack-netlib/SRC/sgedmdq.c
 create mode 100644 lapack-netlib/SRC/zgedmd.c
 create mode 100644 lapack-netlib/SRC/zgedmdq.c

diff --git a/lapack-netlib/SRC/cgedmd.c b/lapack-netlib/SRC/cgedmd.c
new file mode 100644
index 000000000..447b23014
--- /dev/null
+++ b/lapack-netlib/SRC/cgedmd.c
@@ -0,0 +1,511 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
diff --git a/lapack-netlib/SRC/cgedmdq.c b/lapack-netlib/SRC/cgedmdq.c
new file mode 100644
index 000000000..447b23014
--- /dev/null
+++ b/lapack-netlib/SRC/cgedmdq.c
@@ -0,0 +1,511 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
diff --git a/lapack-netlib/SRC/dgedmd.c b/lapack-netlib/SRC/dgedmd.c
new file mode 100644
index 000000000..447b23014
--- /dev/null
+++ b/lapack-netlib/SRC/dgedmd.c
@@ -0,0 +1,511 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
diff --git a/lapack-netlib/SRC/dgedmdq.c b/lapack-netlib/SRC/dgedmdq.c
new file mode 100644
index 000000000..447b23014
--- /dev/null
+++ b/lapack-netlib/SRC/dgedmdq.c
@@ -0,0 +1,511 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
diff --git a/lapack-netlib/SRC/sgedmd.c b/lapack-netlib/SRC/sgedmd.c
new file mode 100644
index 000000000..447b23014
--- /dev/null
+++ b/lapack-netlib/SRC/sgedmd.c
@@ -0,0 +1,511 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
diff --git a/lapack-netlib/SRC/sgedmdq.c b/lapack-netlib/SRC/sgedmdq.c
new file mode 100644
index 000000000..447b23014
--- /dev/null
+++ b/lapack-netlib/SRC/sgedmdq.c
@@ -0,0 +1,511 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
diff --git a/lapack-netlib/SRC/zgedmd.c b/lapack-netlib/SRC/zgedmd.c
new file mode 100644
index 000000000..447b23014
--- /dev/null
+++ b/lapack-netlib/SRC/zgedmd.c
@@ -0,0 +1,511 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
diff --git a/lapack-netlib/SRC/zgedmdq.c b/lapack-netlib/SRC/zgedmdq.c
new file mode 100644
index 000000000..447b23014
--- /dev/null
+++ b/lapack-netlib/SRC/zgedmdq.c
@@ -0,0 +1,511 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+

From 2bfd7ff126934704eb83edae5f3c118cabfb203e Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 21:39:29 +0200
Subject: [PATCH 11/15] Add dummy C sources for ?GEDMD

---
 cmake/lapack.cmake | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake
index 077390d90..12127531d 100644
--- a/cmake/lapack.cmake
+++ b/cmake/lapack.cmake
@@ -624,7 +624,7 @@ set(SLASRC
    ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c
    sgesvdq.c slaorhr_col_getrfnp.c
    slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c 
-   slatrs3.c strsyl3.c sgelst.c)
+   slatrs3.c strsyl3.c sgelst.c sgedmd.c sgedmdq.c)
 
 set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c
    sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c
@@ -722,7 +722,7 @@ set(CLASRC
    chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c
    cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c 
    cungtsqr.c cungtsqr_row.c cunhr_col.c 
-   clatrs3.c ctrsyl3.c cgelst.c)
+   clatrs3.c ctrsyl3.c cgelst.c cgedmd.c cgedmdq.c)
 
 set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c
    cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c
@@ -814,7 +814,7 @@ set(DLASRC
    dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c
    dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c
    dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c 
-   dlatrs3.c dtrsyl3.c dgelst.c)
+   dlatrs3.c dtrsyl3.c dgelst.c dgedmd.c dgedmdq.c)
 
 set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c
    dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c
@@ -925,7 +925,7 @@ set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
    zla_gbrfsx_extended.c zla_gbamv.c zla_gbrcond_c.c zla_gbrcond_x.c
    zla_gbrpvgrw.c zhesvxx.c zherfsx.c zla_herfsx_extended.c
    zla_heamv.c zla_hercond_c.c zla_hercond_x.c zla_herpvgrw.c
-   zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c)
+   zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c zgedmd.c zgedmdq.c)
 
 
 if(USE_XBLAS)

From 174f4e65e320241c7b57136db95d367cac7583d7 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 20 Jun 2023 23:14:35 +0200
Subject: [PATCH 12/15] Add LAPACK/LAPACKE functions for Dynamic Mode
 Decomposition

---
 exports/gensymbol | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/exports/gensymbol b/exports/gensymbol
index b584167a4..704eab06f 100755
--- a/exports/gensymbol
+++ b/exports/gensymbol
@@ -844,6 +844,23 @@ lapackobjs2z="$lapackobjs2z
     zungtsqr_row
     "
 
+#functions added for lapack-3.11
+lapackobjs2c="$lapackobjs2c
+    cgedmd
+    cgedmdq
+    "
+lapackobjs2d="$lapackobjs2d
+    dgedmd
+    dgedmdq
+    "
+lapackobjs2s="$lapackobjs2s
+    sgedmd
+    sgedmdq
+    "
+lapackobjs2z="$lapackobjs2z
+    zgedmd
+    zgedmdq
+    "
 lapack_extendedprecision_objs="
     zposvxx clagge clatms chesvxx cposvxx cgesvxx ssyrfssx csyrfsx
     dlagsy dsysvxx sporfsx slatms zlatms zherfsx csysvxx
@@ -1013,6 +1030,10 @@ lapackeobjsc="
     LAPACKE_cgebrd_work
     LAPACKE_cgecon
     LAPACKE_cgecon_work
+    LAPACKE_cgedmd
+    LAPACKE_cgedmd_work
+    LAPACKE_cgedmdq
+    LAPACKE_cgedmdq_work
     LAPACKE_cgeequ
     LAPACKE_cgeequ_work
     LAPACKE_cgeequb
@@ -1672,6 +1693,10 @@ lapackeobjsd="
     LAPACKE_dgebrd_work
     LAPACKE_dgecon
     LAPACKE_dgecon_work
+    LAPACKE_dgedmd
+    LAPACKE_dgedmd_work
+    LAPACKE_dgedmdq
+    LAPACKE_dgedmdq_work
     LAPACKE_dgeequ
     LAPACKE_dgeequ_work
     LAPACKE_dgeequb
@@ -2285,6 +2310,10 @@ lapackeobjss="
     LAPACKE_sgebrd_work
     LAPACKE_sgecon
     LAPACKE_sgecon_work
+    LAPACKE_sgedmd
+    LAPACKE_sgedmd_work
+    LAPACKE_sgedmdq
+    LAPACKE_sgedmdq_work
     LAPACKE_sgeequ
     LAPACKE_sgeequ_work
     LAPACKE_sgeequb
@@ -2894,6 +2923,10 @@ lapackeobjsz="
     LAPACKE_zgebrd_work
     LAPACKE_zgecon
     LAPACKE_zgecon_work
+    LAPACKE_zgedmd
+    LAPACKE_zgedmd_work
+    LAPACKE_zgedmdq
+    LAPACKE_zgedmdq_work
     LAPACKE_zgeequ
     LAPACKE_zgeequ_work
     LAPACKE_zgeequb

From 5cc1f7a0bab55bd8c10c9fceaa88d67a7246dede Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 21 Jun 2023 15:43:20 +0200
Subject: [PATCH 13/15] Add functional C replacements for ?GEDMD?

---
 lapack-netlib/SRC/cgedmd.c  | 1159 ++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/cgedmdq.c |  778 ++++++++++++++++++++++
 lapack-netlib/SRC/dgedmd.c  | 1242 +++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/dgedmdq.c |  789 ++++++++++++++++++++++
 lapack-netlib/SRC/sgedmd.c  | 1235 ++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/sgedmdq.c |  785 ++++++++++++++++++++++
 lapack-netlib/SRC/zgedmd.c  | 1165 ++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/zgedmdq.c |  782 ++++++++++++++++++++++
 8 files changed, 7935 insertions(+)

diff --git a/lapack-netlib/SRC/cgedmd.c b/lapack-netlib/SRC/cgedmd.c
index 447b23014..570395c7b 100644
--- a/lapack-netlib/SRC/cgedmd.c
+++ b/lapack-netlib/SRC/cgedmd.c
@@ -509,3 +509,1162 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 
 
 
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static integer c__0 = 0;
+
+/* Subroutine */ int cgedmd_(char *jobs, char *jobz, char *jobr, char *jobf, 
+	integer *whtsvd, integer *m, integer *n, complex *x, integer *ldx, 
+	complex *y, integer *ldy, integer *nrnk, real *tol, integer *k, 
+	complex *eigs, complex *z__, integer *ldz, real *res, complex *b, 
+	integer *ldb, complex *w, integer *ldw, complex *s, integer *lds, 
+	complex *zwork, integer *lzwork, real *rwork, integer *lrwork, 
+	integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer x_dim1, x_offset, y_dim1, y_offset, z_dim1, z_offset, b_dim1, 
+	    b_offset, w_dim1, w_offset, s_dim1, s_offset, i__1, i__2, i__3, 
+	    i__4, i__5;
+    real r__1, r__2;
+    complex q__1, q__2;
+
+    /* Local variables */
+    complex zone;
+    real zero, ssum;
+    integer info1, info2;
+    real xscl1, xscl2;
+    integer i__, j;
+    real scale;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
+	    integer *, complex *, complex *, integer *, complex *, integer *, 
+	    complex *, complex *, integer *), cgeev_(char *, 
+	    char *, integer *, complex *, integer *, complex *, complex *, 
+	    integer *, complex *, integer *, complex *, integer *, real *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    logical badxy;
+    real small;
+    char jobzl[1];
+    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, 
+	    integer *, complex *, integer *);
+    logical wntex;
+    complex zzero;
+    extern real scnrm2_(integer *, complex *, integer *);
+    extern /* Subroutine */ int cgesdd_(char *, integer *, integer *, complex 
+	    *, integer *, real *, complex *, integer *, complex *, integer *, 
+	    complex *, integer *, real *, integer *, integer *), 
+	    clascl_(char *, integer *, integer *, real *, real *, integer *, 
+	    integer *, complex *, integer *, integer *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern real slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
+	    *), cgesvd_(char *, char *, integer *, integer *, complex *, 
+	    integer *, real *, complex *, integer *, complex *, integer *, 
+	    complex *, integer *, real *, integer *), clacpy_(
+	    char *, integer *, integer *, complex *, integer *, complex *, 
+	    integer *), xerbla_(char *, integer *);
+    char t_or_n__[1];
+    extern /* Subroutine */ int cgejsv_(char *, char *, char *, char *, char *
+	    , char *, integer *, integer *, complex *, integer *, real *, 
+	    complex *, integer *, complex *, integer *, complex *, integer *, 
+	    real *, integer *, integer *, integer *), classq_(integer *, complex *, integer *, 
+	    real *, real *);
+    logical sccolx, sccoly;
+    extern logical sisnan_(real *);
+    integer lwrsdd, mwrsdd, iminwr;
+    logical wntref, wntvec;
+    real rootsc;
+    integer lwrkev, mlwork, mwrkev, numrnk, olwork, lwrsvd, mwrsvd, mlrwrk;
+    logical lquery, wntres;
+    char jsvopt[1];
+    integer lwrsvj, mwrsvj;
+    real rdummy[2];
+    extern /* Subroutine */ int mecago_();
+    integer lwrsvq, mwrsvq;
+    real ofl, one;
+    extern /* Subroutine */ int cgesvdq_(char *, char *, char *, char *, char 
+	    *, integer *, integer *, complex *, integer *, real *, complex *, 
+	    integer *, complex *, integer *, integer *, integer *, integer *, 
+	    complex *, integer *, real *, integer *, integer *);
+
+/*   March 2023 */
+/* ..... */
+/*      USE                   iso_fortran_env */
+/*      INTEGER, PARAMETER :: WP = real32 */
+/* ..... */
+/*     Scalar arguments */
+/*     Array arguments */
+/* ............................................................ */
+/*     Purpose */
+/*     ======= */
+/*     CGEDMD computes the Dynamic Mode Decomposition (DMD) for */
+/*     a pair of data snapshot matrices. For the input matrices */
+/*     X and Y such that Y = A*X with an unaccessible matrix */
+/*     A, CGEDMD computes a certain number of Ritz pairs of A using */
+/*     the standard Rayleigh-Ritz extraction from a subspace of */
+/*     range(X) that is determined using the leading left singular */
+/*     vectors of X. Optionally, CGEDMD returns the residuals */
+/*     of the computed Ritz pairs, the information needed for */
+/*     a refinement of the Ritz vectors, or the eigenvectors of */
+/*     the Exact DMD. */
+/*     For further details see the references listed */
+/*     below. For more details of the implementation see [3]. */
+
+/*     References */
+/*     ========== */
+/*     [1] P. Schmid: Dynamic mode decomposition of numerical */
+/*         and experimental data, */
+/*         Journal of Fluid Mechanics 656, 5-28, 2010. */
+/*     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */
+/*         decompositions: analysis and enhancements, */
+/*         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */
+/*     [3] Z. Drmac: A LAPACK implementation of the Dynamic */
+/*         Mode Decomposition I. Technical report. AIMDyn Inc. */
+/*         and LAPACK Working Note 298. */
+/*     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */
+/*         Brunton, N. Kutz: On Dynamic Mode Decomposition: */
+/*         Theory and Applications, Journal of Computational */
+/*         Dynamics 1(2), 391 -421, 2014. */
+
+/* ...................................................................... */
+/*     Developed and supported by: */
+/*     =========================== */
+/*     Developed and coded by Zlatko Drmac, Faculty of Science, */
+/*     University of Zagreb;  drmac@math.hr */
+/*     In cooperation with */
+/*     AIMdyn Inc., Santa Barbara, CA. */
+/*     and supported by */
+/*     - DARPA SBIR project "Koopman Operator-Based Forecasting */
+/*     for Nonstationary Processes from Near-Term, Limited */
+/*     Observational Data" Contract No: W31P4Q-21-C-0007 */
+/*     - DARPA PAI project "Physics-Informed Machine Learning */
+/*     Methodologies" Contract No: HR0011-18-9-0033 */
+/*     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */
+/*     Framework for Space-Time Analysis of Process Dynamics" */
+/*     Contract No: HR0011-16-C-0116 */
+/*     Any opinions, findings and conclusions or recommendations */
+/*     expressed in this material are those of the author and */
+/*     do not necessarily reflect the views of the DARPA SBIR */
+/*     Program Office */
+/* ============================================================ */
+/*     Distribution Statement A: */
+/*     Approved for Public Release, Distribution Unlimited. */
+/*     Cleared by DARPA on September 29, 2022 */
+/* ============================================================ */
+/* ...................................................................... */
+/*     Arguments */
+/*     ========= */
+/*     JOBS (input) CHARACTER*1 */
+/*     Determines whether the initial data snapshots are scaled */
+/*     by a diagonal matrix. */
+/*     'S' :: The data snapshots matrices X and Y are multiplied */
+/*            with a diagonal matrix D so that X*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'C' :: The snapshots are scaled as with the 'S' option. */
+/*            If it is found that an i-th column of X is zero */
+/*            vector and the corresponding i-th column of Y is */
+/*            non-zero, then the i-th column of Y is set to */
+/*            zero and a warning flag is raised. */
+/*     'Y' :: The data snapshots matrices X and Y are multiplied */
+/*            by a diagonal matrix D so that Y*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'N' :: No data scaling. */
+/* ..... */
+/*     JOBZ (input) CHARACTER*1 */
+/*     Determines whether the eigenvectors (Koopman modes) will */
+/*     be computed. */
+/*     'V' :: The eigenvectors (Koopman modes) will be computed */
+/*            and returned in the matrix Z. */
+/*            See the description of Z. */
+/*     'F' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product X(:,1:K)*W, where X */
+/*            contains a POD basis (leading left singular vectors */
+/*            of the data matrix X) and W contains the eigenvectors */
+/*            of the corresponding Rayleigh quotient. */
+/*            See the descriptions of K, X, W, Z. */
+/*     'N' :: The eigenvectors are not computed. */
+/* ..... */
+/*     JOBR (input) CHARACTER*1 */
+/*     Determines whether to compute the residuals. */
+/*     'R' :: The residuals for the computed eigenpairs will be */
+/*            computed and stored in the array RES. */
+/*            See the description of RES. */
+/*            For this option to be legal, JOBZ must be 'V'. */
+/*     'N' :: The residuals are not computed. */
+/* ..... */
+/*     JOBF (input) CHARACTER*1 */
+/*     Specifies whether to store information needed for post- */
+/*     processing (e.g. computing refined Ritz vectors) */
+/*     'R' :: The matrix needed for the refinement of the Ritz */
+/*            vectors is computed and stored in the array B. */
+/*            See the description of B. */
+/*     'E' :: The unscaled eigenvectors of the Exact DMD are */
+/*            computed and returned in the array B. See the */
+/*            description of B. */
+/*     'N' :: No eigenvector refinement data is computed. */
+/* ..... */
+/*     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */
+/*     Allows for a selection of the SVD algorithm from the */
+/*     LAPACK library. */
+/*     1 :: CGESVD (the QR SVD algorithm) */
+/*     2 :: CGESDD (the Divide and Conquer algorithm; if enough */
+/*          workspace available, this is the fastest option) */
+/*     3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4 */
+/*          are the most accurate options) */
+/*     4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3 */
+/*          are the most accurate options) */
+/*     For the four methods above, a significant difference in */
+/*     the accuracy of small singular values is possible if */
+/*     the snapshots vary in norm so that X is severely */
+/*     ill-conditioned. If small (smaller than EPS*||X||) */
+/*     singular values are of interest and JOBS=='N',  then */
+/*     the options (3, 4) give the most accurate results, where */
+/*     the option 4 is slightly better and with stronger */
+/*     theoretical background. */
+/*     If JOBS=='S', i.e. the columns of X will be normalized, */
+/*     then all methods give nearly equally accurate results. */
+/* ..... */
+/*     M (input) INTEGER, M>= 0 */
+/*     The state space dimension (the row dimension of X, Y). */
+/* ..... */
+/*     N (input) INTEGER, 0 <= N <= M */
+/*     The number of data snapshot pairs */
+/*     (the number of columns of X and Y). */
+/* ..... */
+/*     X (input/output) COMPLEX(KIND=WP) M-by-N array */
+/*   > On entry, X contains the data snapshot matrix X. It is */
+/*     assumed that the column norms of X are in the range of */
+/*     the normalized floating point numbers. */
+/*   < On exit, the leading K columns of X contain a POD basis, */
+/*     i.e. the leading K left singular vectors of the input */
+/*     data matrix X, U(:,1:K). All N columns of X contain all */
+/*     left singular vectors of the input matrix X. */
+/*     See the descriptions of K, Z and W. */
+/* ..... */
+/*     LDX (input) INTEGER, LDX >= M */
+/*     The leading dimension of the array X. */
+/* ..... */
+/*     Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array */
+/*   > On entry, Y contains the data snapshot matrix Y */
+/*   < On exit, */
+/*     If JOBR == 'R', the leading K columns of Y  contain */
+/*     the residual vectors for the computed Ritz pairs. */
+/*     See the description of RES. */
+/*     If JOBR == 'N', Y contains the original input data, */
+/*                     scaled according to the value of JOBS. */
+/* ..... */
+/*     LDY (input) INTEGER , LDY >= M */
+/*     The leading dimension of the array Y. */
+/* ..... */
+/*     NRNK (input) INTEGER */
+/*     Determines the mode how to compute the numerical rank, */
+/*     i.e. how to truncate small singular values of the input */
+/*     matrix X. On input, if */
+/*     NRNK = -1 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(1) */
+/*                  This option is recommended. */
+/*     NRNK = -2 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(i-1) */
+/*                  This option is included for R&D purposes. */
+/*                  It requires highly accurate SVD, which */
+/*                  may not be feasible. */
+/*     The numerical rank can be enforced by using positive */
+/*     value of NRNK as follows: */
+/*     0 < NRNK <= N :: at most NRNK largest singular values */
+/*     will be used. If the number of the computed nonzero */
+/*     singular values is less than NRNK, then only those */
+/*     nonzero values will be used and the actually used */
+/*     dimension is less than NRNK. The actual number of */
+/*     the nonzero singular values is returned in the variable */
+/*     K. See the descriptions of TOL and  K. */
+/* ..... */
+/*     TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */
+/*     The tolerance for truncating small singular values. */
+/*     See the description of NRNK. */
+/* ..... */
+/*     K (output) INTEGER,  0 <= K <= N */
+/*     The dimension of the POD basis for the data snapshot */
+/*     matrix X and the number of the computed Ritz pairs. */
+/*     The value of K is determined according to the rule set */
+/*     by the parameters NRNK and TOL. */
+/*     See the descriptions of NRNK and TOL. */
+/* ..... */
+/*     EIGS (output) COMPLEX(KIND=WP) N-by-1 array */
+/*     The leading K (K<=N) entries of EIGS contain */
+/*     the computed eigenvalues (Ritz values). */
+/*     See the descriptions of K, and Z. */
+/* ..... */
+/*     Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array */
+/*     If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i) */
+/*     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. */
+/*     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as */
+/*     the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i) */
+/*     is an eigenvector corresponding to EIGS(i). The columns */
+/*     of W(1:k,1:K) are the computed eigenvectors of the */
+/*     K-by-K Rayleigh quotient. */
+/*     See the descriptions of EIGS, X and W. */
+/* ..... */
+/*     LDZ (input) INTEGER , LDZ >= M */
+/*     The leading dimension of the array Z. */
+/* ..... */
+/*     RES (output) REAL(KIND=WP) N-by-1 array */
+/*     RES(1:K) contains the residuals for the K computed */
+/*     Ritz pairs, */
+/*     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. */
+/*     See the description of EIGS and Z. */
+/* ..... */
+/*     B (output) COMPLEX(KIND=WP)  M-by-N array. */
+/*     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can */
+/*     be used for computing the refined vectors; see further */
+/*     details in the provided references. */
+/*     If JOBF == 'E', B(1:M,1:K) contains */
+/*     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */
+/*     Exact DMD, up to scaling by the inverse eigenvalues. */
+/*     If JOBF =='N', then B is not referenced. */
+/*     See the descriptions of X, W, K. */
+/* ..... */
+/*     LDB (input) INTEGER, LDB >= M */
+/*     The leading dimension of the array B. */
+/* ..... */
+/*     W (workspace/output) COMPLEX(KIND=WP) N-by-N array */
+/*     On exit, W(1:K,1:K) contains the K computed */
+/*     eigenvectors of the matrix Rayleigh quotient. */
+/*     The Ritz vectors (returned in Z) are the */
+/*     product of X (containing a POD basis for the input */
+/*     matrix X) and W. See the descriptions of K, S, X and Z. */
+/*     W is also used as a workspace to temporarily store the */
+/*     right singular vectors of X. */
+/* ..... */
+/*     LDW (input) INTEGER, LDW >= N */
+/*     The leading dimension of the array W. */
+/* ..... */
+/*     S (workspace/output) COMPLEX(KIND=WP) N-by-N array */
+/*     The array S(1:K,1:K) is used for the matrix Rayleigh */
+/*     quotient. This content is overwritten during */
+/*     the eigenvalue decomposition by CGEEV. */
+/*     See the description of K. */
+/* ..... */
+/*     LDS (input) INTEGER, LDS >= N */
+/*     The leading dimension of the array S. */
+/* ..... */
+/*     ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array */
+/*     ZWORK is used as complex workspace in the complex SVD, as */
+/*     specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing */
+/*     the eigenvalues of a Rayleigh quotient. */
+/*     If the call to CGEDMD is only workspace query, then */
+/*     ZWORK(1) contains the minimal complex workspace length and */
+/*     ZWORK(2) is the optimal complex workspace length. */
+/*     Hence, the length of work is at least 2. */
+/*     See the description of LZWORK. */
+/* ..... */
+/*     LZWORK (input) INTEGER */
+/*     The minimal length of the workspace vector ZWORK. */
+/*     LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CGEEV), */
+/*     where LZWORK_CGEEV = MAX( 1, 2*N )  and the minimal */
+/*     LZWORK_SVD is calculated as follows */
+/*     If WHTSVD == 1 :: CGESVD :: */
+/*        LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N)) */
+/*     If WHTSVD == 2 :: CGESDD :: */
+/*        LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) */
+/*     If WHTSVD == 3 :: CGESVDQ :: */
+/*        LZWORK_SVD = obtainable by a query */
+/*     If WHTSVD == 4 :: CGEJSV :: */
+/*        LZWORK_SVD = obtainable by a query */
+/*     If on entry LZWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths and returns them in */
+/*     LZWORK(1) and LZWORK(2), respectively. */
+/* ..... */
+/*     RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array */
+/*     On exit, RWORK(1:N) contains the singular values of */
+/*     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). */
+/*     If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain */
+/*     scaling factor RWORK(N+2)/RWORK(N+1) used to scale X */
+/*     and Y to avoid overflow in the SVD of X. */
+/*     This may be of interest if the scaling option is off */
+/*     and as many as possible smallest eigenvalues are */
+/*     desired to the highest feasible accuracy. */
+/*     If the call to CGEDMD is only workspace query, then */
+/*     RWORK(1) contains the minimal workspace length. */
+/*     See the description of LRWORK. */
+/* ..... */
+/*     LRWORK (input) INTEGER */
+/*     The minimal length of the workspace vector RWORK. */
+/*     LRWORK is calculated as follows: */
+/*     LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CGEEV), where */
+/*     LRWORK_CGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace */
+/*     for the SVD subroutine determined by the input parameter */
+/*     WHTSVD. */
+/*     If WHTSVD == 1 :: CGESVD :: */
+/*        LRWORK_SVD = 5*MIN(M,N) */
+/*     If WHTSVD == 2 :: CGESDD :: */
+/*        LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), */
+/*        2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) */
+/*     If WHTSVD == 3 :: CGESVDQ :: */
+/*        LRWORK_SVD = obtainable by a query */
+/*     If WHTSVD == 4 :: CGEJSV :: */
+/*        LRWORK_SVD = obtainable by a query */
+/*     If on entry LRWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     real workspace length and returns it in RWORK(1). */
+/* ..... */
+/*     IWORK (workspace/output) INTEGER LIWORK-by-1 array */
+/*     Workspace that is required only if WHTSVD equals */
+/*     2 , 3 or 4. (See the description of WHTSVD). */
+/*     If on entry LWORK =-1 or LIWORK=-1, then the */
+/*     minimal length of IWORK is computed and returned in */
+/*     IWORK(1). See the description of LIWORK. */
+/* ..... */
+/*     LIWORK (input) INTEGER */
+/*     The minimal length of the workspace vector IWORK. */
+/*     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */
+/*     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */
+/*     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */
+/*     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */
+/*     If on entry LIWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for  ZWORK, RWORK and */
+/*     IWORK. See the descriptions of ZWORK, RWORK and IWORK. */
+/* ..... */
+/*     INFO (output) INTEGER */
+/*     -i < 0 :: On entry, the i-th argument had an */
+/*               illegal value */
+/*        = 0 :: Successful return. */
+/*        = 1 :: Void input. Quick exit (M=0 or N=0). */
+/*        = 2 :: The SVD computation of X did not converge. */
+/*               Suggestion: Check the input data and/or */
+/*               repeat with different WHTSVD. */
+/*        = 3 :: The computation of the eigenvalues did not */
+/*               converge. */
+/*        = 4 :: If data scaling was requested on input and */
+/*               the procedure found inconsistency in the data */
+/*               such that for some column index i, */
+/*               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */
+/*               to zero if JOBS=='C'. The computation proceeds */
+/*               with original or modified data and warning */
+/*               flag is set with INFO=4. */
+/* ............................................................. */
+/* ............................................................. */
+/*     Parameters */
+/*     ~~~~~~~~~~ */
+/*     Local scalars */
+/*     ~~~~~~~~~~~~~ */
+
+/*     Local arrays */
+/*     ~~~~~~~~~~~~ */
+/*     External functions (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~ */
+/*     External subroutines (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     Intrinsic functions */
+/*     ~~~~~~~~~~~~~~~~~~~ */
+/* ............................................................ */
+    /* Parameter adjustments */
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --eigs;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --res;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1 * 1;
+    w -= w_offset;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1 * 1;
+    s -= s_offset;
+    --zwork;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    zero = 0.f;
+    one = 1.f;
+    zzero.r = 0.f, zzero.i = 0.f;
+    zone.r = 1.f, zone.i = 0.f;
+
+/*    Test the input arguments */
+
+    wntres = lsame_(jobr, "R");
+    sccolx = lsame_(jobs, "S") || lsame_(jobs, "C");
+    sccoly = lsame_(jobs, "Y");
+    wntvec = lsame_(jobz, "V");
+    wntref = lsame_(jobf, "R");
+    wntex = lsame_(jobf, "E");
+    *info = 0;
+    lquery = *lzwork == -1 || *liwork == -1 || *lrwork == -1;
+
+    if (! (sccolx || sccoly || lsame_(jobs, "N"))) {
+	*info = -1;
+    } else if (! (wntvec || lsame_(jobz, "N") || lsame_(
+	    jobz, "F"))) {
+	*info = -2;
+    } else if (! (wntres || lsame_(jobr, "N")) || 
+	    wntres && ! wntvec) {
+	*info = -3;
+    } else if (! (wntref || wntex || lsame_(jobf, "N")))
+	     {
+	*info = -4;
+    } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == 
+	    4)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (*n < 0 || *n > *m) {
+	*info = -7;
+    } else if (*ldx < *m) {
+	*info = -9;
+    } else if (*ldy < *m) {
+	*info = -11;
+    } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) {
+	*info = -12;
+    } else if (*tol < zero || *tol >= one) {
+	*info = -13;
+    } else if (*ldz < *m) {
+	*info = -17;
+    } else if ((wntref || wntex) && *ldb < *m) {
+	*info = -20;
+    } else if (*ldw < *n) {
+	*info = -22;
+    } else if (*lds < *n) {
+	*info = -24;
+    }
+
+    if (*info == 0) {
+/* Compute the minimal and the optimal workspace */
+/* requirements. Simulate running the code and */
+/* determine minimal and optimal sizes of the */
+/* workspace at any moment of the run. */
+	if (*n == 0) {
+/* Quick return. All output except K is void. */
+/* INFO=1 signals the void input. */
+/* In case of a workspace query, the default */
+/* minimal workspace lengths are returned. */
+	    if (lquery) {
+		iwork[1] = 1;
+		rwork[1] = 1.f;
+		zwork[1].r = 2.f, zwork[1].i = 0.f;
+		zwork[2].r = 2.f, zwork[2].i = 0.f;
+	    } else {
+		*k = 0;
+	    }
+	    *info = 1;
+	    return 0;
+	}
+	iminwr = 1;
+	mlrwrk = f2cmax(1,*n);
+	mlwork = 2;
+	olwork = 2;
+/*         SELECT CASE ( WHTSVD ) */
+	if (*whtsvd == 1) {
+/* The following is specified as the minimal */
+/* length of WORK in the definition of CGESVD: */
+/* MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) */
+/* Computing MAX */
+	    i__1 = 1, i__2 = (f2cmin(*m,*n) << 1) + f2cmax(*m,*n);
+	    mwrsvd = f2cmax(i__1,i__2);
+	    mlwork = f2cmax(mlwork,mwrsvd);
+/* Computing MAX */
+	    i__1 = mlrwrk, i__2 = *n + f2cmin(*m,*n) * 5;
+	    mlrwrk = f2cmax(i__1,i__2);
+	    if (lquery) {
+		cgesvd_("O", "S", m, n, &x[x_offset], ldx, &rwork[1], &b[
+			b_offset], ldb, &w[w_offset], ldw, &zwork[1], &c_n1, 
+			rdummy, &info1);
+		lwrsvd = (integer) zwork[1].r;
+		olwork = f2cmax(olwork,lwrsvd);
+	    }
+	} else if (*whtsvd == 2) {
+/* The following is specified as the minimal */
+/* length of WORK in the definition of CGESDD: */
+/* MWRSDD = 2*f2cmin(M,N)*f2cmin(M,N)+2*f2cmin(M,N)+f2cmax(M,N). */
+/* RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N) */
+/* In LAPACK 3.10.1 RWORK is defined differently. */
+/* Below we take f2cmax over the two versions. */
+/* IMINWR = 8*MIN(M,N) */
+	    mwrsdd = (f2cmin(*m,*n) << 1) * f2cmin(*m,*n) + (f2cmin(*m,*n) << 1) + f2cmax(
+		    *m,*n);
+	    mlwork = f2cmax(mlwork,mwrsdd);
+	    iminwr = f2cmin(*m,*n) << 3;
+/* Computing MAX */
+/* Computing MAX */
+	    i__3 = f2cmin(*m,*n) * 5 * f2cmin(*m,*n) + f2cmin(*m,*n) * 7, i__4 = f2cmin(*
+		    m,*n) * 5 * f2cmin(*m,*n) + f2cmin(*m,*n) * 5, i__3 = f2cmax(i__3,
+		    i__4), i__4 = (f2cmax(*m,*n) << 1) * f2cmin(*m,*n) + (f2cmin(*m,*n)
+		     << 1) * f2cmin(*m,*n) + f2cmin(*m,*n);
+	    i__1 = mlrwrk, i__2 = *n + f2cmax(i__3,i__4);
+	    mlrwrk = f2cmax(i__1,i__2);
+	    if (lquery) {
+		cgesdd_("O", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset],
+			 ldb, &w[w_offset], ldw, &zwork[1], &c_n1, rdummy, &
+			iwork[1], &info1);
+/* Computing MAX */
+		i__1 = mwrsdd, i__2 = (integer) zwork[1].r;
+		lwrsdd = f2cmax(i__1,i__2);
+		olwork = f2cmax(olwork,lwrsdd);
+	    }
+	} else if (*whtsvd == 3) {
+	    cgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &rwork[
+		    1], &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &
+		    iwork[1], &c_n1, &zwork[1], &c_n1, rdummy, &c_n1, &info1);
+	    iminwr = iwork[1];
+	    mwrsvq = (integer) zwork[2].r;
+	    mlwork = f2cmax(mlwork,mwrsvq);
+/* Computing MAX */
+	    i__1 = mlrwrk, i__2 = *n + (integer) rdummy[0];
+	    mlrwrk = f2cmax(i__1,i__2);
+	    if (lquery) {
+		lwrsvq = (integer) zwork[1].r;
+		olwork = f2cmax(olwork,lwrsvq);
+	    }
+	} else if (*whtsvd == 4) {
+	    *(unsigned char *)jsvopt = 'J';
+	    cgejsv_("F", "U", jsvopt, "N", "N", "P", m, n, &x[x_offset], ldx, 
+		    &rwork[1], &z__[z_offset], ldz, &w[w_offset], ldw, &zwork[
+		    1], &c_n1, rdummy, &c_n1, &iwork[1], &info1);
+	    iminwr = iwork[1];
+	    mwrsvj = (integer) zwork[2].r;
+	    mlwork = f2cmax(mlwork,mwrsvj);
+/* Computing MAX */
+/* Computing MAX */
+	    i__3 = 7, i__4 = (integer) rdummy[0];
+	    i__1 = mlrwrk, i__2 = *n + f2cmax(i__3,i__4);
+	    mlrwrk = f2cmax(i__1,i__2);
+	    if (lquery) {
+		lwrsvj = (integer) zwork[1].r;
+		olwork = f2cmax(olwork,lwrsvj);
+	    }
+/*         END SELECT */
+	}
+	if (wntvec || wntex || lsame_(jobz, "F")) {
+	    *(unsigned char *)jobzl = 'V';
+	} else {
+	    *(unsigned char *)jobzl = 'N';
+	}
+/* Workspace calculation to the CGEEV call */
+/* Computing MAX */
+	i__1 = 1, i__2 = *n << 1;
+	mwrkev = f2cmax(i__1,i__2);
+	mlwork = f2cmax(mlwork,mwrkev);
+/* Computing MAX */
+	i__1 = mlrwrk, i__2 = *n + (*n << 1);
+	mlrwrk = f2cmax(i__1,i__2);
+	if (lquery) {
+	    cgeev_("N", jobzl, n, &s[s_offset], lds, &eigs[1], &w[w_offset], 
+		    ldw, &w[w_offset], ldw, &zwork[1], &c_n1, &rwork[1], &
+		    info1);
+/* LAPACK CALL */
+	    lwrkev = (integer) zwork[1].r;
+	    olwork = f2cmax(olwork,lwrkev);
+	    olwork = f2cmax(2,olwork);
+	}
+
+	if (*liwork < iminwr && ! lquery) {
+	    *info = -30;
+	}
+	if (*lrwork < mlrwrk && ! lquery) {
+	    *info = -28;
+	}
+	if (*lzwork < mlwork && ! lquery) {
+	    *info = -26;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEDMD", &i__1);
+	return 0;
+    } else if (lquery) {
+/*     Return minimal and optimal workspace sizes */
+	iwork[1] = iminwr;
+	rwork[1] = (real) mlrwrk;
+	zwork[1].r = (real) mlwork, zwork[1].i = 0.f;
+	zwork[2].r = (real) olwork, zwork[2].i = 0.f;
+	return 0;
+    }
+/* ............................................................ */
+
+    ofl = slamch_("O") * slamch_("P");
+    small = slamch_("S");
+    badxy = FALSE_;
+
+/*     <1> Optional scaling of the snapshots (columns of X, Y) */
+/*     ========================================================== */
+    if (sccolx) {
+/* The columns of X will be normalized. */
+/* To prevent overflows, the column norms of X are */
+/* carefully computed using CLASSQ. */
+	*k = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* WORK(i) = SCNRM2( M, X(1,i), 1 ) */
+	    scale = zero;
+	    classq_(m, &x[i__ * x_dim1 + 1], &c__1, &scale, &ssum);
+	    if (sisnan_(&scale) || sisnan_(&ssum)) {
+		*k = 0;
+		*info = -8;
+		i__2 = -(*info);
+		xerbla_("CGEDMD", &i__2);
+	    }
+	    if (scale != zero && ssum != zero) {
+		rootsc = sqrt(ssum);
+		if (scale >= ofl / rootsc) {
+/*                 Norm of X(:,i) overflows. First, X(:,i) */
+/*                 is scaled by */
+/*                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. */
+/*                 Next, the norm of X(:,i) is stored without */
+/*                 overflow as WORK(i) = - SCALE * (ROOTSC/M), */
+/*                 the minus sign indicating the 1/M factor. */
+/*                 Scaling is performed without overflow, and */
+/*                 underflow may occur in the smallest entries */
+/*                 of X(:,i). The relative backward and forward */
+/*                 errors are small in the ell_2 norm. */
+		    r__1 = one / rootsc;
+		    clascl_("G", &c__0, &c__0, &scale, &r__1, m, &c__1, &x[
+			    i__ * x_dim1 + 1], ldx, &info2);
+		    rwork[i__] = -scale * (rootsc / (real) (*m));
+		} else {
+/*                 X(:,i) will be scaled to unit 2-norm */
+		    rwork[i__] = scale * rootsc;
+		    clascl_("G", &c__0, &c__0, &rwork[i__], &one, m, &c__1, &
+			    x[i__ * x_dim1 + 1], ldx, &info2);
+/*                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)          ! INTRINSIC */
+/* LAPAC */
+		}
+	    } else {
+		rwork[i__] = zero;
+		++(*k);
+	    }
+	}
+	if (*k == *n) {
+/* All columns of X are zero. Return error code -8. */
+/* (the 8th input variable had an illegal value) */
+	    *k = 0;
+	    *info = -8;
+	    i__1 = -(*info);
+	    xerbla_("CGEDMD", &i__1);
+	    return 0;
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/*           Now, apply the same scaling to the columns of Y. */
+	    if (rwork[i__] > zero) {
+		r__1 = one / rwork[i__];
+		csscal_(m, &r__1, &y[i__ * y_dim1 + 1], &c__1);
+/*               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)      ! INTRINSIC */
+/* BLAS CALL */
+	    } else if (rwork[i__] < zero) {
+		r__1 = -rwork[i__];
+		r__2 = one / (real) (*m);
+		clascl_("G", &c__0, &c__0, &r__1, &r__2, m, &c__1, &y[i__ * 
+			y_dim1 + 1], ldy, &info2);
+/* LAPACK C */
+	    } else if (c_abs(&y[icamax_(m, &y[i__ * y_dim1 + 1], &c__1) + i__ 
+		    * y_dim1]) != zero) {
+/*               X(:,i) is zero vector. For consistency, */
+/*               Y(:,i) should also be zero. If Y(:,i) is not */
+/*               zero, then the data might be inconsistent or */
+/*               corrupted. If JOBS == 'C', Y(:,i) is set to */
+/*               zero and a warning flag is raised. */
+/*               The computation continues but the */
+/*               situation will be reported in the output. */
+		badxy = TRUE_;
+		if (lsame_(jobs, "C")) {
+		    csscal_(m, &zero, &y[i__ * y_dim1 + 1], &c__1);
+		}
+/* BLAS CALL */
+	    }
+	}
+    }
+
+    if (sccoly) {
+/* The columns of Y will be normalized. */
+/* To prevent overflows, the column norms of Y are */
+/* carefully computed using CLASSQ. */
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* RWORK(i) = SCNRM2( M, Y(1,i), 1 ) */
+	    scale = zero;
+	    classq_(m, &y[i__ * y_dim1 + 1], &c__1, &scale, &ssum);
+	    if (sisnan_(&scale) || sisnan_(&ssum)) {
+		*k = 0;
+		*info = -10;
+		i__2 = -(*info);
+		xerbla_("CGEDMD", &i__2);
+	    }
+	    if (scale != zero && ssum != zero) {
+		rootsc = sqrt(ssum);
+		if (scale >= ofl / rootsc) {
+/*                 Norm of Y(:,i) overflows. First, Y(:,i) */
+/*                 is scaled by */
+/*                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. */
+/*                 Next, the norm of Y(:,i) is stored without */
+/*                 overflow as RWORK(i) = - SCALE * (ROOTSC/M), */
+/*                 the minus sign indicating the 1/M factor. */
+/*                 Scaling is performed without overflow, and */
+/*                 underflow may occur in the smallest entries */
+/*                 of Y(:,i). The relative backward and forward */
+/*                 errors are small in the ell_2 norm. */
+		    r__1 = one / rootsc;
+		    clascl_("G", &c__0, &c__0, &scale, &r__1, m, &c__1, &y[
+			    i__ * y_dim1 + 1], ldy, &info2);
+		    rwork[i__] = -scale * (rootsc / (real) (*m));
+		} else {
+/*                 Y(:,i) will be scaled to unit 2-norm */
+		    rwork[i__] = scale * rootsc;
+		    clascl_("G", &c__0, &c__0, &rwork[i__], &one, m, &c__1, &
+			    y[i__ * y_dim1 + 1], ldy, &info2);
+/*                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC */
+/* LAPA */
+		}
+	    } else {
+		rwork[i__] = zero;
+	    }
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/*           Now, apply the same scaling to the columns of X. */
+	    if (rwork[i__] > zero) {
+		r__1 = one / rwork[i__];
+		csscal_(m, &r__1, &x[i__ * x_dim1 + 1], &c__1);
+/*               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC */
+/* BLAS CALL */
+	    } else if (rwork[i__] < zero) {
+		r__1 = -rwork[i__];
+		r__2 = one / (real) (*m);
+		clascl_("G", &c__0, &c__0, &r__1, &r__2, m, &c__1, &x[i__ * 
+			x_dim1 + 1], ldx, &info2);
+/* LAPACK */
+	    } else if (c_abs(&x[icamax_(m, &x[i__ * x_dim1 + 1], &c__1) + i__ 
+		    * x_dim1]) != zero) {
+/*               Y(:,i) is zero vector.  If X(:,i) is not */
+/*               zero, then a warning flag is raised. */
+/*               The computation continues but the */
+/*               situation will be reported in the output. */
+		badxy = TRUE_;
+	    }
+	}
+    }
+
+/*     <2> SVD of the data snapshot matrix X. */
+/*     ===================================== */
+/*     The left singular vectors are stored in the array X. */
+/*     The right singular vectors are in the array W. */
+/*     The array W will later on contain the eigenvectors */
+/*     of a Rayleigh quotient. */
+    numrnk = *n;
+/*      SELECT CASE ( WHTSVD ) */
+    if (*whtsvd == 1) {
+	cgesvd_("O", "S", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], 
+		ldb, &w[w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &
+		info1);
+/* LA */
+	*(unsigned char *)t_or_n__ = 'C';
+    } else if (*whtsvd == 2) {
+	cgesdd_("O", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], ldb, &
+		w[w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &iwork[1]
+		, &info1);
+/* LAP */
+	*(unsigned char *)t_or_n__ = 'C';
+    } else if (*whtsvd == 3) {
+	i__1 = *lrwork - *n;
+	cgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &rwork[1], 
+		&z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &iwork[1], 
+		liwork, &zwork[1], lzwork, &rwork[*n + 1], &i__1, &info1);
+/* LAPACK CA */
+	clacpy_("A", m, &numrnk, &z__[z_offset], ldz, &x[x_offset], ldx);
+/* LAPACK C */
+	*(unsigned char *)t_or_n__ = 'C';
+    } else if (*whtsvd == 4) {
+	i__1 = *lrwork - *n;
+	cgejsv_("F", "U", jsvopt, "N", "N", "P", m, n, &x[x_offset], ldx, &
+		rwork[1], &z__[z_offset], ldz, &w[w_offset], ldw, &zwork[1], 
+		lzwork, &rwork[*n + 1], &i__1, &iwork[1], &info1);
+	clacpy_("A", m, n, &z__[z_offset], ldz, &x[x_offset], ldx);
+/* LAPACK CALL */
+	*(unsigned char *)t_or_n__ = 'N';
+	xscl1 = rwork[*n + 1];
+	xscl2 = rwork[*n + 2];
+	if (xscl1 != xscl2) {
+/* This is an exceptional situation. If the */
+/* data matrices are not scaled and the */
+/* largest singular value of X overflows. */
+/* In that case CGEJSV can return the SVD */
+/* in scaled form. The scaling factor can be used */
+/* to rescale the data (X and Y). */
+	    clascl_("G", &c__0, &c__0, &xscl1, &xscl2, m, n, &y[y_offset], 
+		    ldy, &info2);
+	}
+/*      END SELECT */
+    }
+
+    if (info1 > 0) {
+/* The SVD selected subroutine did not converge. */
+/* Return with an error code. */
+	*info = 2;
+	return 0;
+    }
+
+    if (rwork[1] == zero) {
+/* The largest computed singular value of (scaled) */
+/* X is zero. Return error code -8 */
+/* (the 8th input variable had an illegal value). */
+	*k = 0;
+	*info = -8;
+	i__1 = -(*info);
+	xerbla_("CGEDMD", &i__1);
+	return 0;
+    }
+
+/* <3> Determine the numerical rank of the data */
+/*    snapshots matrix X. This depends on the */
+/*    parameters NRNK and TOL. */
+/*      SELECT CASE ( NRNK ) */
+    if (*nrnk == -1) {
+	*k = 1;
+	i__1 = numrnk;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    if (rwork[i__] <= rwork[1] * *tol || rwork[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+    } else if (*nrnk == -2) {
+	*k = 1;
+	i__1 = numrnk - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (rwork[i__ + 1] <= rwork[i__] * *tol || rwork[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+    } else {
+	*k = 1;
+	i__1 = *nrnk;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    if (rwork[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+/*          END SELECT */
+    }
+/*   Now, U = X(1:M,1:K) is the SVD/POD basis for the */
+/*   snapshot data in the input matrix X. */
+/* <4> Compute the Rayleigh quotient S = U^H * A * U. */
+/*    Depending on the requested outputs, the computation */
+/*    is organized to compute additional auxiliary */
+/*    matrices (for the residuals and refinements). */
+
+/*    In all formulas below, we need V_k*Sigma_k^(-1) */
+/*    where either V_k is in W(1:N,1:K), or V_k^H is in */
+/*    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). */
+    if (lsame_(t_or_n__, "N")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    r__1 = one / rwork[i__];
+	    csscal_(n, &r__1, &w[i__ * w_dim1 + 1], &c__1);
+/* W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC */
+/* BLAS CALL */
+	}
+    } else {
+/* This non-unit stride access is due to the fact */
+/* that CGESVD, CGESVDQ and CGESDD return the */
+/* adjoint matrix of the right singular vectors. */
+/* DO i = 1, K */
+/* CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW )  ! BLAS CALL */
+/* ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC */
+/* END DO */
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    rwork[*n + i__] = one / rwork[i__];
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *k;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * w_dim1;
+		i__4 = *n + i__;
+		q__2.r = rwork[i__4], q__2.i = zero;
+		i__5 = i__ + j * w_dim1;
+		q__1.r = q__2.r * w[i__5].r - q__2.i * w[i__5].i, q__1.i = 
+			q__2.r * w[i__5].i + q__2.i * w[i__5].r;
+		w[i__3].r = q__1.r, w[i__3].i = q__1.i;
+	    }
+	}
+    }
+
+    if (wntref) {
+
+/* Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) */
+/* for computing the refined Ritz vectors */
+/* (optionally, outside CGEDMD). */
+	cgemm_("N", t_or_n__, m, k, n, &zone, &y[y_offset], ldy, &w[w_offset],
+		 ldw, &zzero, &z__[z_offset], ldz);
+/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRI */
+/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRI */
+
+/* At this point Z contains */
+/* A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and */
+/* this is needed for computing the residuals. */
+/* This matrix is  returned in the array B and */
+/* it can be used to compute refined Ritz vectors. */
+/* BLAS */
+	clacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb);
+/* B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC */
+/* BLAS CALL */
+	cgemm_("C", "N", k, k, m, &zone, &x[x_offset], ldx, &z__[z_offset], 
+		ldz, &zzero, &s[s_offset], lds);
+/* S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRI */
+/* At this point S = U^H * A * U is the Rayleigh quotient. */
+/* BLAS */
+    } else {
+/* A * U(:,1:K) is not explicitly needed and the */
+/* computation is organized differently. The Rayleigh */
+/* quotient is computed more efficiently. */
+	cgemm_("C", "N", k, n, m, &zone, &x[x_offset], ldx, &y[y_offset], ldy,
+		 &zzero, &z__[z_offset], ldz);
+/* Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! IN */
+
+/* B */
+	cgemm_("N", t_or_n__, k, k, n, &zone, &z__[z_offset], ldz, &w[
+		w_offset], ldw, &zzero, &s[s_offset], lds);
+/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRIN */
+/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRIN */
+/* At this point S = U^H * A * U is the Rayleigh quotient. */
+/* If the residuals are requested, save scaled V_k into Z. */
+/* Recall that V_k or V_k^H is stored in W. */
+/* BLAS */
+	if (wntres || wntex) {
+	    if (lsame_(t_or_n__, "N")) {
+		clacpy_("A", n, k, &w[w_offset], ldw, &z__[z_offset], ldz);
+	    } else {
+		clacpy_("A", k, n, &w[w_offset], ldw, &z__[z_offset], ldz);
+	    }
+	}
+    }
+
+/* <5> Compute the Ritz values and (if requested) the */
+/*   right eigenvectors of the Rayleigh quotient. */
+
+    cgeev_("N", jobzl, k, &s[s_offset], lds, &eigs[1], &w[w_offset], ldw, &w[
+	    w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &info1);
+
+/* W(1:K,1:K) contains the eigenvectors of the Rayleigh */
+/* quotient.  See the description of Z. */
+/* Also, see the description of CGEEV. */
+/* LAPACK CA */
+    if (info1 > 0) {
+/* CGEEV failed to compute the eigenvalues and */
+/* eigenvectors of the Rayleigh quotient. */
+	*info = 3;
+	return 0;
+    }
+
+/* <6> Compute the eigenvectors (if requested) and, */
+/* the residuals (if requested). */
+
+    if (wntvec || wntex) {
+	if (wntres) {
+	    if (wntref) {
+/* Here, if the refinement is requested, we have */
+/* A*U(:,1:K) already computed and stored in Z. */
+/* For the residuals, need Y = A * U(:,1;K) * W. */
+		cgemm_("N", "N", m, k, k, &zone, &z__[z_offset], ldz, &w[
+			w_offset], ldw, &zzero, &y[y_offset], ldy);
+/* Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC */
+/* This frees Z; Y contains A * U(:,1:K) * W. */
+/* BLAS CALL */
+	    } else {
+/* Compute S = V_k * Sigma_k^(-1) * W, where */
+/* V_k * Sigma_k^(-1) (or its adjoint) is stored in Z */
+		cgemm_(t_or_n__, "N", n, k, k, &zone, &z__[z_offset], ldz, &w[
+			w_offset], ldw, &zzero, &s[s_offset], lds);
+/* Then, compute Z = Y * S = */
+/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */
+/* = A * U(:,1:K) * W(1:K,1:K) */
+		cgemm_("N", "N", m, k, n, &zone, &y[y_offset], ldy, &s[
+			s_offset], lds, &zzero, &z__[z_offset], ldz);
+/* Save a copy of Z into Y and free Z for holding */
+/* the Ritz vectors. */
+		clacpy_("A", m, k, &z__[z_offset], ldz, &y[y_offset], ldy);
+		if (wntex) {
+		    clacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb);
+		}
+	    }
+	} else if (wntex) {
+/* Compute S = V_k * Sigma_k^(-1) * W, where */
+/* V_k * Sigma_k^(-1) is stored in Z */
+	    cgemm_(t_or_n__, "N", n, k, k, &zone, &z__[z_offset], ldz, &w[
+		    w_offset], ldw, &zzero, &s[s_offset], lds);
+/* Then, compute Z = Y * S = */
+/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */
+/* = A * U(:,1:K) * W(1:K,1:K) */
+	    cgemm_("N", "N", m, k, n, &zone, &y[y_offset], ldy, &s[s_offset], 
+		    lds, &zzero, &b[b_offset], ldb);
+/* The above call replaces the following two calls */
+/* that were used in the developing-testing phase. */
+/* CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & */
+/*           LDS, ZZERO, Z, LDZ) */
+/* Save a copy of Z into Y and free Z for holding */
+/* the Ritz vectors. */
+/* CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB ) */
+	}
+
+/* Compute the Ritz vectors */
+	if (wntvec) {
+	    cgemm_("N", "N", m, k, k, &zone, &x[x_offset], ldx, &w[w_offset], 
+		    ldw, &zzero, &z__[z_offset], ldz);
+	}
+/* Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRIN */
+
+/* BLAS CALL */
+	if (wntres) {
+	    i__1 = *k;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = i__;
+		q__1.r = -eigs[i__2].r, q__1.i = -eigs[i__2].i;
+		caxpy_(m, &q__1, &z__[i__ * z_dim1 + 1], &c__1, &y[i__ * 
+			y_dim1 + 1], &c__1);
+/* Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! */
+
+		res[i__] = scnrm2_(m, &y[i__ * y_dim1 + 1], &c__1);
+
+	    }
+	}
+    }
+
+    if (*whtsvd == 4) {
+	rwork[*n + 1] = xscl1;
+	rwork[*n + 2] = xscl2;
+    }
+
+/*     Successful exit. */
+    if (! badxy) {
+	*info = 0;
+    } else {
+/* A warning on possible data inconsistency. */
+/* This should be a rare event. */
+	*info = 4;
+    }
+/* ............................................................ */
+    return 0;
+/*     ...... */
+} /* cgedmd_ */
+
diff --git a/lapack-netlib/SRC/cgedmdq.c b/lapack-netlib/SRC/cgedmdq.c
index 447b23014..6e3a1faca 100644
--- a/lapack-netlib/SRC/cgedmdq.c
+++ b/lapack-netlib/SRC/cgedmdq.c
@@ -509,3 +509,781 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 
 
 
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+
+/* Subroutine */ int cgedmdq_(char *jobs, char *jobz, char *jobr, char *jobq, 
+	char *jobt, char *jobf, integer *whtsvd, integer *m, integer *n, 
+	complex *f, integer *ldf, complex *x, integer *ldx, complex *y, 
+	integer *ldy, integer *nrnk, real *tol, integer *k, complex *eigs, 
+	complex *z__, integer *ldz, real *res, complex *b, integer *ldb, 
+	complex *v, integer *ldv, complex *s, integer *lds, complex *zwork, 
+	integer *lzwork, real *work, integer *lwork, integer *iwork, integer *
+	liwork, integer *info)
+{
+    /* System generated locals */
+    integer f_dim1, f_offset, x_dim1, x_offset, y_dim1, y_offset, z_dim1, 
+	    z_offset, b_dim1, b_offset, v_dim1, v_offset, s_dim1, s_offset, 
+	    i__1, i__2;
+
+    /* Local variables */
+    real zero;
+    integer info1;
+    extern logical lsame_(char *, char *);
+    char jobvl[1];
+    integer minmn;
+    logical wantq;
+    integer mlwqr, olwqr;
+    logical wntex;
+    complex zzero;
+    extern /* Subroutine */ int cgedmd_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, complex *, integer *, complex *, 
+	    integer *, integer *, real *, integer *, complex *, complex *, 
+	    integer *, real *, complex *, integer *, complex *, integer *, 
+	    complex *, integer *, complex *, integer *, real *, integer *, 
+	    integer *, integer *, integer *), 
+	    cgeqrf_(integer *, integer *, complex *, integer *, complex *, 
+	    complex *, integer *, integer *), clacpy_(char *, integer *, 
+	    integer *, complex *, integer *, complex *, integer *), 
+	    claset_(char *, integer *, integer *, complex *, complex *, 
+	    complex *, integer *), xerbla_(char *, integer *);
+    integer mlwdmd, olwdmd;
+    logical sccolx, sccoly;
+    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, 
+	    complex *, integer *, complex *, complex *, integer *, integer *);
+    integer iminwr;
+    logical wntvec, wntvcf;
+    integer mlwgqr;
+    logical wntref;
+    integer mlwork, olwgqr, olwork, mlrwrk, mlwmqr, olwmqr;
+    logical lquery, wntres, wnttrf, wntvcq;
+    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
+	    integer *, complex *, integer *, complex *, complex *, integer *, 
+	    complex *, integer *, integer *);
+    real one;
+
+/* March 2023 */
+/* ..... */
+/*      USE                   iso_fortran_env */
+/*      INTEGER, PARAMETER :: WP = real32 */
+/* ..... */
+/*     Scalar arguments */
+/*     Array arguments */
+/* ..... */
+/*     Purpose */
+/*     ======= */
+/*     CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for */
+/*     a pair of data snapshot matrices, using a QR factorization */
+/*     based compression of the data. For the input matrices */
+/*     X and Y such that Y = A*X with an unaccessible matrix */
+/*     A, CGEDMDQ computes a certain number of Ritz pairs of A using */
+/*     the standard Rayleigh-Ritz extraction from a subspace of */
+/*     range(X) that is determined using the leading left singular */
+/*     vectors of X. Optionally, CGEDMDQ returns the residuals */
+/*     of the computed Ritz pairs, the information needed for */
+/*     a refinement of the Ritz vectors, or the eigenvectors of */
+/*     the Exact DMD. */
+/*     For further details see the references listed */
+/*     below. For more details of the implementation see [3]. */
+
+/*     References */
+/*     ========== */
+/*     [1] P. Schmid: Dynamic mode decomposition of numerical */
+/*         and experimental data, */
+/*         Journal of Fluid Mechanics 656, 5-28, 2010. */
+/*     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */
+/*         decompositions: analysis and enhancements, */
+/*         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */
+/*     [3] Z. Drmac: A LAPACK implementation of the Dynamic */
+/*         Mode Decomposition I. Technical report. AIMDyn Inc. */
+/*         and LAPACK Working Note 298. */
+/*     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */
+/*         Brunton, N. Kutz: On Dynamic Mode Decomposition: */
+/*         Theory and Applications, Journal of Computational */
+/*         Dynamics 1(2), 391 -421, 2014. */
+
+/*     Developed and supported by: */
+/*     =========================== */
+/*     Developed and coded by Zlatko Drmac, Faculty of Science, */
+/*     University of Zagreb;  drmac@math.hr */
+/*     In cooperation with */
+/*     AIMdyn Inc., Santa Barbara, CA. */
+/*     and supported by */
+/*     - DARPA SBIR project "Koopman Operator-Based Forecasting */
+/*     for Nonstationary Processes from Near-Term, Limited */
+/*     Observational Data" Contract No: W31P4Q-21-C-0007 */
+/*     - DARPA PAI project "Physics-Informed Machine Learning */
+/*     Methodologies" Contract No: HR0011-18-9-0033 */
+/*     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */
+/*     Framework for Space-Time Analysis of Process Dynamics" */
+/*     Contract No: HR0011-16-C-0116 */
+/*     Any opinions, findings and conclusions or recommendations */
+/*     expressed in this material are those of the author and */
+/*     do not necessarily reflect the views of the DARPA SBIR */
+/*     Program Office. */
+/* ============================================================ */
+/*     Distribution Statement A: */
+/*     Approved for Public Release, Distribution Unlimited. */
+/*     Cleared by DARPA on September 29, 2022 */
+/* ============================================================ */
+/* ...................................................................... */
+/*     Arguments */
+/*     ========= */
+/*     JOBS (input) CHARACTER*1 */
+/*     Determines whether the initial data snapshots are scaled */
+/*     by a diagonal matrix. The data snapshots are the columns */
+/*     of F. The leading N-1 columns of F are denoted X and the */
+/*     trailing N-1 columns are denoted Y. */
+/*     'S' :: The data snapshots matrices X and Y are multiplied */
+/*            with a diagonal matrix D so that X*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'C' :: The snapshots are scaled as with the 'S' option. */
+/*            If it is found that an i-th column of X is zero */
+/*            vector and the corresponding i-th column of Y is */
+/*            non-zero, then the i-th column of Y is set to */
+/*            zero and a warning flag is raised. */
+/*     'Y' :: The data snapshots matrices X and Y are multiplied */
+/*            by a diagonal matrix D so that Y*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'N' :: No data scaling. */
+/* ..... */
+/*     JOBZ (input) CHARACTER*1 */
+/*     Determines whether the eigenvectors (Koopman modes) will */
+/*     be computed. */
+/*     'V' :: The eigenvectors (Koopman modes) will be computed */
+/*            and returned in the matrix Z. */
+/*            See the description of Z. */
+/*     'F' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product Z*V, where Z */
+/*            is orthonormal and V contains the eigenvectors */
+/*            of the corresponding Rayleigh quotient. */
+/*            See the descriptions of F, V, Z. */
+/*     'Q' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product Q*Z, where Z */
+/*            contains the eigenvectors of the compression of the */
+/*            underlying discretised operator onto the span of */
+/*            the data snapshots. See the descriptions of F, V, Z. */
+/*            Q is from the inital QR facorization. */
+/*     'N' :: The eigenvectors are not computed. */
+/* ..... */
+/*     JOBR (input) CHARACTER*1 */
+/*     Determines whether to compute the residuals. */
+/*     'R' :: The residuals for the computed eigenpairs will */
+/*            be computed and stored in the array RES. */
+/*            See the description of RES. */
+/*            For this option to be legal, JOBZ must be 'V'. */
+/*     'N' :: The residuals are not computed. */
+/* ..... */
+/*     JOBQ (input) CHARACTER*1 */
+/*     Specifies whether to explicitly compute and return the */
+/*     unitary matrix from the QR factorization. */
+/*     'Q' :: The matrix Q of the QR factorization of the data */
+/*            snapshot matrix is computed and stored in the */
+/*            array F. See the description of F. */
+/*     'N' :: The matrix Q is not explicitly computed. */
+/* ..... */
+/*     JOBT (input) CHARACTER*1 */
+/*     Specifies whether to return the upper triangular factor */
+/*     from the QR factorization. */
+/*     'R' :: The matrix R of the QR factorization of the data */
+/*            snapshot matrix F is returned in the array Y. */
+/*            See the description of Y and Further details. */
+/*     'N' :: The matrix R is not returned. */
+/* ..... */
+/*     JOBF (input) CHARACTER*1 */
+/*     Specifies whether to store information needed for post- */
+/*     processing (e.g. computing refined Ritz vectors) */
+/*     'R' :: The matrix needed for the refinement of the Ritz */
+/*            vectors is computed and stored in the array B. */
+/*            See the description of B. */
+/*     'E' :: The unscaled eigenvectors of the Exact DMD are */
+/*            computed and returned in the array B. See the */
+/*            description of B. */
+/*     'N' :: No eigenvector refinement data is computed. */
+/*     To be useful on exit, this option needs JOBQ='Q'. */
+/* ..... */
+/*     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */
+/*     Allows for a selection of the SVD algorithm from the */
+/*     LAPACK library. */
+/*     1 :: CGESVD (the QR SVD algorithm) */
+/*     2 :: CGESDD (the Divide and Conquer algorithm; if enough */
+/*          workspace available, this is the fastest option) */
+/*     3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4 */
+/*          are the most accurate options) */
+/*     4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3 */
+/*          are the most accurate options) */
+/*     For the four methods above, a significant difference in */
+/*     the accuracy of small singular values is possible if */
+/*     the snapshots vary in norm so that X is severely */
+/*     ill-conditioned. If small (smaller than EPS*||X||) */
+/*     singular values are of interest and JOBS=='N',  then */
+/*     the options (3, 4) give the most accurate results, where */
+/*     the option 4 is slightly better and with stronger */
+/*     theoretical background. */
+/*     If JOBS=='S', i.e. the columns of X will be normalized, */
+/*     then all methods give nearly equally accurate results. */
+/* ..... */
+/*     M (input) INTEGER, M >= 0 */
+/*     The state space dimension (the number of rows of F). */
+/* ..... */
+/*     N (input) INTEGER, 0 <= N <= M */
+/*     The number of data snapshots from a single trajectory, */
+/*     taken at equidistant discrete times. This is the */
+/*     number of columns of F. */
+/* ..... */
+/*     F (input/output) COMPLEX(KIND=WP) M-by-N array */
+/*     > On entry, */
+/*     the columns of F are the sequence of data snapshots */
+/*     from a single trajectory, taken at equidistant discrete */
+/*     times. It is assumed that the column norms of F are */
+/*     in the range of the normalized floating point numbers. */
+/*     < On exit, */
+/*     If JOBQ == 'Q', the array F contains the orthogonal */
+/*     matrix/factor of the QR factorization of the initial */
+/*     data snapshots matrix F. See the description of JOBQ. */
+/*     If JOBQ == 'N', the entries in F strictly below the main */
+/*     diagonal contain, column-wise, the information on the */
+/*     Householder vectors, as returned by CGEQRF. The */
+/*     remaining information to restore the orthogonal matrix */
+/*     of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). */
+/*     See the description of ZWORK. */
+/* ..... */
+/*     LDF (input) INTEGER, LDF >= M */
+/*     The leading dimension of the array F. */
+/* ..... */
+/*     X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array */
+/*     X is used as workspace to hold representations of the */
+/*     leading N-1 snapshots in the orthonormal basis computed */
+/*     in the QR factorization of F. */
+/*     On exit, the leading K columns of X contain the leading */
+/*     K left singular vectors of the above described content */
+/*     of X. To lift them to the space of the left singular */
+/*     vectors U(:,1:K) of the input data, pre-multiply with the */
+/*     Q factor from the initial QR factorization. */
+/*     See the descriptions of F, K, V  and Z. */
+/* ..... */
+/*     LDX (input) INTEGER, LDX >= N */
+/*     The leading dimension of the array X. */
+/* ..... */
+/*     Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array */
+/*     Y is used as workspace to hold representations of the */
+/*     trailing N-1 snapshots in the orthonormal basis computed */
+/*     in the QR factorization of F. */
+/*     On exit, */
+/*     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper */
+/*     triangular factor from the QR factorization of the data */
+/*     snapshot matrix F. */
+/* ..... */
+/*     LDY (input) INTEGER , LDY >= N */
+/*     The leading dimension of the array Y. */
+/* ..... */
+/*     NRNK (input) INTEGER */
+/*     Determines the mode how to compute the numerical rank, */
+/*     i.e. how to truncate small singular values of the input */
+/*     matrix X. On input, if */
+/*     NRNK = -1 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(1) */
+/*                  This option is recommended. */
+/*     NRNK = -2 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(i-1) */
+/*                  This option is included for R&D purposes. */
+/*                  It requires highly accurate SVD, which */
+/*                  may not be feasible. */
+/*     The numerical rank can be enforced by using positive */
+/*     value of NRNK as follows: */
+/*     0 < NRNK <= N-1 :: at most NRNK largest singular values */
+/*     will be used. If the number of the computed nonzero */
+/*     singular values is less than NRNK, then only those */
+/*     nonzero values will be used and the actually used */
+/*     dimension is less than NRNK. The actual number of */
+/*     the nonzero singular values is returned in the variable */
+/*     K. See the description of K. */
+/* ..... */
+/*     TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */
+/*     The tolerance for truncating small singular values. */
+/*     See the description of NRNK. */
+/* ..... */
+/*     K (output) INTEGER,  0 <= K <= N */
+/*     The dimension of the SVD/POD basis for the leading N-1 */
+/*     data snapshots (columns of F) and the number of the */
+/*     computed Ritz pairs. The value of K is determined */
+/*     according to the rule set by the parameters NRNK and */
+/*     TOL. See the descriptions of NRNK and TOL. */
+/* ..... */
+/*     EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array */
+/*     The leading K (K<=N-1) entries of EIGS contain */
+/*     the computed eigenvalues (Ritz values). */
+/*     See the descriptions of K, and Z. */
+/* ..... */
+/*     Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array */
+/*     If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) */
+/*     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. */
+/*     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as */
+/*     Z*V, where Z contains orthonormal matrix (the product of */
+/*     Q from the initial QR factorization and the SVD/POD_basis */
+/*     returned by CGEDMD in X) and the second factor (the */
+/*     eigenvectors of the Rayleigh quotient) is in the array V, */
+/*     as returned by CGEDMD. That is,  X(:,1:K)*V(:,i) */
+/*     is an eigenvector corresponding to EIGS(i). The columns */
+/*     of V(1:K,1:K) are the computed eigenvectors of the */
+/*     K-by-K Rayleigh quotient. */
+/*     See the descriptions of EIGS, X and V. */
+/* ..... */
+/*     LDZ (input) INTEGER , LDZ >= M */
+/*     The leading dimension of the array Z. */
+/* ..... */
+/*     RES (output) REAL(KIND=WP) (N-1)-by-1 array */
+/*     RES(1:K) contains the residuals for the K computed */
+/*     Ritz pairs, */
+/*     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. */
+/*     See the description of EIGS and Z. */
+/* ..... */
+/*     B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array. */
+/*     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can */
+/*     be used for computing the refined vectors; see further */
+/*     details in the provided references. */
+/*     If JOBF == 'E', B(1:N,1;K) contains */
+/*     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */
+/*     Exact DMD, up to scaling by the inverse eigenvalues. */
+/*     In both cases, the content of B can be lifted to the */
+/*     original dimension of the input data by pre-multiplying */
+/*     with the Q factor from the initial QR factorization. */
+/*     Here A denotes a compression of the underlying operator. */
+/*     See the descriptions of F and X. */
+/*     If JOBF =='N', then B is not referenced. */
+/* ..... */
+/*     LDB (input) INTEGER, LDB >= MIN(M,N) */
+/*     The leading dimension of the array B. */
+/* ..... */
+/*     V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array */
+/*     On exit, V(1:K,1:K) V contains the K eigenvectors of */
+/*     the Rayleigh quotient. The Ritz vectors */
+/*     (returned in Z) are the product of Q from the initial QR */
+/*     factorization (see the description of F) X (see the */
+/*     description of X) and V. */
+/* ..... */
+/*     LDV (input) INTEGER, LDV >= N-1 */
+/*     The leading dimension of the array V. */
+/* ..... */
+/*     S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array */
+/*     The array S(1:K,1:K) is used for the matrix Rayleigh */
+/*     quotient. This content is overwritten during */
+/*     the eigenvalue decomposition by CGEEV. */
+/*     See the description of K. */
+/* ..... */
+/*     LDS (input) INTEGER, LDS >= N-1 */
+/*     The leading dimension of the array S. */
+/* ..... */
+/*     ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array */
+/*     On exit, */
+/*     ZWORK(1:MIN(M,N)) contains the scalar factors of the */
+/*     elementary reflectors as returned by CGEQRF of the */
+/*     M-by-N input matrix F. */
+/*     If the call to CGEDMDQ is only workspace query, then */
+/*     ZWORK(1) contains the minimal complex workspace length and */
+/*     ZWORK(2) is the optimal complex workspace length. */
+/*     Hence, the length of work is at least 2. */
+/*     See the description of LZWORK. */
+/* ..... */
+/*     LZWORK (input) INTEGER */
+/*     The minimal length of the  workspace vector ZWORK. */
+/*     LZWORK is calculated as follows: */
+/*     Let MLWQR  = N (minimal workspace for CGEQRF[M,N]) */
+/*         MLWDMD = minimal workspace for CGEDMD (see the */
+/*                  description of LWORK in CGEDMD) */
+/*         MLWMQR = N (minimal workspace for */
+/*                    ZUNMQR['L','N',M,N,N]) */
+/*         MLWGQR = N (minimal workspace for ZUNGQR[M,N,N]) */
+/*         MINMN  = MIN(M,N) */
+/*     Then */
+/*     LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD) */
+/*     is further updated as follows: */
+/*        if   JOBZ == 'V' or JOBZ == 'F' THEN */
+/*             LZWORK = MAX( LZWORK, MINMN+MLWMQR ) */
+/*        if   JOBQ == 'Q' THEN */
+/*             LZWORK = MAX( ZLWORK, MINMN+MLWGQR) */
+
+/* ..... */
+/*     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */
+/*     On exit, */
+/*     WORK(1:N-1) contains the singular values of */
+/*     the input submatrix F(1:M,1:N-1). */
+/*     If the call to CGEDMDQ is only workspace query, then */
+/*     WORK(1) contains the minimal workspace length and */
+/*     WORK(2) is the optimal workspace length. hence, the */
+/*     length of work is at least 2. */
+/*     See the description of LWORK. */
+/* ..... */
+/*     LWORK (input) INTEGER */
+/*     The minimal length of the  workspace vector WORK. */
+/*     LWORK is the same as in CGEDMD, because in CGEDMDQ */
+/*     only CGEDMD requires real workspace for snapshots */
+/*     of dimensions MIN(M,N)-by-(N-1). */
+/*     If on entry LWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     IWORK (workspace/output) INTEGER LIWORK-by-1 array */
+/*     Workspace that is required only if WHTSVD equals */
+/*     2 , 3 or 4. (See the description of WHTSVD). */
+/*     If on entry LWORK =-1 or LIWORK=-1, then the */
+/*     minimal length of IWORK is computed and returned in */
+/*     IWORK(1). See the description of LIWORK. */
+/* ..... */
+/*     LIWORK (input) INTEGER */
+/*     The minimal length of the workspace vector IWORK. */
+/*     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */
+/*     Let M1=MIN(M,N), N1=N-1. Then */
+/*     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */
+/*     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */
+/*     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */
+/*     If on entry LIWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     INFO (output) INTEGER */
+/*     -i < 0 :: On entry, the i-th argument had an */
+/*               illegal value */
+/*        = 0 :: Successful return. */
+/*        = 1 :: Void input. Quick exit (M=0 or N=0). */
+/*        = 2 :: The SVD computation of X did not converge. */
+/*               Suggestion: Check the input data and/or */
+/*               repeat with different WHTSVD. */
+/*        = 3 :: The computation of the eigenvalues did not */
+/*               converge. */
+/*        = 4 :: If data scaling was requested on input and */
+/*               the procedure found inconsistency in the data */
+/*               such that for some column index i, */
+/*               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */
+/*               to zero if JOBS=='C'. The computation proceeds */
+/*               with original or modified data and warning */
+/*               flag is set with INFO=4. */
+/* ............................................................. */
+/* ............................................................. */
+/*     Parameters */
+/*     ~~~~~~~~~~ */
+/*     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP ) */
+
+/*     Local scalars */
+/*     ~~~~~~~~~~~~~ */
+
+/*     External functions (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~ */
+
+/*     External subroutines (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     External subroutines */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     Intrinsic functions */
+/*     ~~~~~~~~~~~~~~~~~~~ */
+/* .......................................................... */
+    /* Parameter adjustments */
+    f_dim1 = *ldf;
+    f_offset = 1 + f_dim1 * 1;
+    f -= f_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --eigs;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --res;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1 * 1;
+    s -= s_offset;
+    --zwork;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    one = 1.f;
+    zero = 0.f;
+    zzero.r = 0.f, zzero.i = 0.f;
+
+/*    Test the input arguments */
+    wntres = lsame_(jobr, "R");
+    sccolx = lsame_(jobs, "S") || lsame_(jobs, "C");
+    sccoly = lsame_(jobs, "Y");
+    wntvec = lsame_(jobz, "V");
+    wntvcf = lsame_(jobz, "F");
+    wntvcq = lsame_(jobz, "Q");
+    wntref = lsame_(jobf, "R");
+    wntex = lsame_(jobf, "E");
+    wantq = lsame_(jobq, "Q");
+    wnttrf = lsame_(jobt, "R");
+    minmn = f2cmin(*m,*n);
+    *info = 0;
+    lquery = *lwork == -1 || *liwork == -1;
+
+    if (! (sccolx || sccoly || lsame_(jobs, "N"))) {
+	*info = -1;
+    } else if (! (wntvec || wntvcf || wntvcq || lsame_(jobz, "N"))) {
+	*info = -2;
+    } else if (! (wntres || lsame_(jobr, "N")) || 
+	    wntres && lsame_(jobz, "N")) {
+	*info = -3;
+    } else if (! (wantq || lsame_(jobq, "N"))) {
+	*info = -4;
+    } else if (! (wnttrf || lsame_(jobt, "N"))) {
+	*info = -5;
+    } else if (! (wntref || wntex || lsame_(jobf, "N")))
+	     {
+	*info = -6;
+    } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == 
+	    4)) {
+	*info = -7;
+    } else if (*m < 0) {
+	*info = -8;
+    } else if (*n < 0 || *n > *m + 1) {
+	*info = -9;
+    } else if (*ldf < *m) {
+	*info = -11;
+    } else if (*ldx < minmn) {
+	*info = -13;
+    } else if (*ldy < minmn) {
+	*info = -15;
+    } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) {
+	*info = -16;
+    } else if (*tol < zero || *tol >= one) {
+	*info = -17;
+    } else if (*ldz < *m) {
+	*info = -21;
+    } else if ((wntref || wntex) && *ldb < minmn) {
+	*info = -24;
+    } else if (*ldv < *n - 1) {
+	*info = -26;
+    } else if (*lds < *n - 1) {
+	*info = -28;
+    }
+
+    if (wntvec || wntvcf || wntvcq) {
+	*(unsigned char *)jobvl = 'V';
+    } else {
+	*(unsigned char *)jobvl = 'N';
+    }
+    if (*info == 0) {
+/* Compute the minimal and the optimal workspace */
+/* requirements. Simulate running the code and */
+/* determine minimal and optimal sizes of the */
+/* workspace at any moment of the run. */
+	if (*n == 0 || *n == 1) {
+/* All output except K is void. INFO=1 signals */
+/* the void input. In case of a workspace query, */
+/* the minimal workspace lengths are returned. */
+	    if (lquery) {
+		iwork[1] = 1;
+		work[1] = 2.f;
+		work[2] = 2.f;
+	    } else {
+		*k = 0;
+	    }
+	    *info = 1;
+	    return 0;
+	}
+	mlrwrk = 2;
+	mlwork = 2;
+	olwork = 2;
+	iminwr = 1;
+	mlwqr = f2cmax(1,*n);
+/* Minimal workspace length for CGEQRF. */
+/* Computing MAX */
+	i__1 = mlwork, i__2 = minmn + mlwqr;
+	mlwork = f2cmax(i__1,i__2);
+	if (lquery) {
+	    cgeqrf_(m, n, &f[f_offset], ldf, &zwork[1], &zwork[1], &c_n1, &
+		    info1);
+	    olwqr = (integer) zwork[1].r;
+/* Computing MAX */
+	    i__1 = olwork, i__2 = minmn + olwqr;
+	    olwork = f2cmax(i__1,i__2);
+	}
+	i__1 = *n - 1;
+	cgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], 
+		ldx, &y[y_offset], ldy, nrnk, tol, k, &eigs[1], &z__[z_offset]
+		, ldz, &res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[
+		s_offset], lds, &zwork[1], lzwork, &work[1], &c_n1, &iwork[1],
+		 liwork, &info1);
+	mlwdmd = (integer) zwork[1].r;
+/* Computing MAX */
+	i__1 = mlwork, i__2 = minmn + mlwdmd;
+	mlwork = f2cmax(i__1,i__2);
+/* Computing MAX */
+	i__1 = mlrwrk, i__2 = (integer) work[1];
+	mlrwrk = f2cmax(i__1,i__2);
+	iminwr = f2cmax(iminwr,iwork[1]);
+	if (lquery) {
+	    olwdmd = (integer) zwork[2].r;
+/* Computing MAX */
+	    i__1 = olwork, i__2 = minmn + olwdmd;
+	    olwork = f2cmax(i__1,i__2);
+	}
+	if (wntvec || wntvcf) {
+	    mlwmqr = f2cmax(1,*n);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = minmn + mlwmqr;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		cunmqr_("L", "N", m, n, &minmn, &f[f_offset], ldf, &zwork[1], 
+			&z__[z_offset], ldz, &zwork[1], &c_n1, &info1);
+		olwmqr = (integer) zwork[1].r;
+/* Computing MAX */
+		i__1 = olwork, i__2 = minmn + olwmqr;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+	if (wantq) {
+	    mlwgqr = f2cmax(1,*n);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = minmn + mlwgqr;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		cungqr_(m, &minmn, &minmn, &f[f_offset], ldf, &zwork[1], &
+			zwork[1], &c_n1, &info1);
+		olwgqr = (integer) zwork[1].r;
+/* Computing MAX */
+		i__1 = olwork, i__2 = minmn + olwgqr;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+	if (*liwork < iminwr && ! lquery) {
+	    *info = -34;
+	}
+	if (*lwork < mlrwrk && ! lquery) {
+	    *info = -32;
+	}
+	if (*lzwork < mlwork && ! lquery) {
+	    *info = -30;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEDMDQ", &i__1);
+	return 0;
+    } else if (lquery) {
+/*     Return minimal and optimal workspace sizes */
+	iwork[1] = iminwr;
+	zwork[1].r = (real) mlwork, zwork[1].i = 0.f;
+	zwork[2].r = (real) olwork, zwork[2].i = 0.f;
+	work[1] = (real) mlrwrk;
+	work[2] = (real) mlrwrk;
+	return 0;
+    }
+/* ..... */
+/*     Initial QR factorization that is used to represent the */
+/*     snapshots as elements of lower dimensional subspace. */
+/*     For large scale computation with M >>N , at this place */
+/*     one can use an out of core QRF. */
+
+    i__1 = *lzwork - minmn;
+    cgeqrf_(m, n, &f[f_offset], ldf, &zwork[1], &zwork[minmn + 1], &i__1, &
+	    info1);
+
+/*     Define X and Y as the snapshots representations in the */
+/*     orthogonal basis computed in the QR factorization. */
+/*     X corresponds to the leading N-1 and Y to the trailing */
+/*     N-1 snapshots. */
+    i__1 = *n - 1;
+    claset_("L", &minmn, &i__1, &zzero, &zzero, &x[x_offset], ldx);
+    i__1 = *n - 1;
+    clacpy_("U", &minmn, &i__1, &f[f_offset], ldf, &x[x_offset], ldx);
+    i__1 = *n - 1;
+    clacpy_("A", &minmn, &i__1, &f[(f_dim1 << 1) + 1], ldf, &y[y_offset], ldy);
+    if (*m >= 3) {
+	i__1 = minmn - 2;
+	i__2 = *n - 2;
+	claset_("L", &i__1, &i__2, &zzero, &zzero, &y[y_dim1 + 3], ldy);
+    }
+
+/*     Compute the DMD of the projected snapshot pairs (X,Y) */
+    i__1 = *n - 1;
+    i__2 = *lzwork - minmn;
+    cgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], ldx,
+	     &y[y_offset], ldy, nrnk, tol, k, &eigs[1], &z__[z_offset], ldz, &
+	    res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[s_offset], lds, &
+	    zwork[minmn + 1], &i__2, &work[1], lwork, &iwork[1], liwork, &
+	    info1);
+    if (info1 == 2 || info1 == 3) {
+/* Return with error code. See CGEDMD for details. */
+	*info = info1;
+	return 0;
+    } else {
+	*info = info1;
+    }
+
+/*     The Ritz vectors (Koopman modes) can be explicitly */
+/*     formed or returned in factored form. */
+    if (wntvec) {
+/* Compute the eigenvectors explicitly. */
+	if (*m > minmn) {
+	    i__1 = *m - minmn;
+	    claset_("A", &i__1, k, &zzero, &zzero, &z__[minmn + 1 + z_dim1], 
+		    ldz);
+	}
+	i__1 = *lzwork - minmn;
+	cunmqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &zwork[1], &z__[
+		z_offset], ldz, &zwork[minmn + 1], &i__1, &info1);
+    } else if (wntvcf) {
+/*   Return the Ritz vectors (eigenvectors) in factored */
+/*   form Z*V, where Z contains orthonormal matrix (the */
+/*   product of Q from the initial QR factorization and */
+/*   the SVD/POD_basis returned by CGEDMD in X) and the */
+/*   second factor (the eigenvectors of the Rayleigh */
+/*   quotient) is in the array V, as returned by CGEDMD. */
+	clacpy_("A", n, k, &x[x_offset], ldx, &z__[z_offset], ldz);
+	if (*m > *n) {
+	    i__1 = *m - *n;
+	    claset_("A", &i__1, k, &zzero, &zzero, &z__[*n + 1 + z_dim1], ldz);
+	}
+	i__1 = *lzwork - minmn;
+	cunmqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &zwork[1], &z__[
+		z_offset], ldz, &zwork[minmn + 1], &i__1, &info1);
+    }
+
+/*     Some optional output variables: */
+
+/*     The upper triangular factor R in the initial QR */
+/*     factorization is optionally returned in the array Y. */
+/*     This is useful if this call to CGEDMDQ is to be */
+/*     followed by a streaming DMD that is implemented in a */
+/*     QR compressed form. */
+    if (wnttrf) {
+/* Return the upper triangular R in Y */
+	claset_("A", &minmn, n, &zzero, &zzero, &y[y_offset], ldy);
+	clacpy_("U", &minmn, n, &f[f_offset], ldf, &y[y_offset], ldy);
+    }
+
+/*     The orthonormal/unitary factor Q in the initial QR */
+/*     factorization is optionally returned in the array F. */
+/*     Same as with the triangular factor above, this is */
+/*     useful in a streaming DMD. */
+    if (wantq) {
+/* Q overwrites F */
+	i__1 = *lzwork - minmn;
+	cungqr_(m, &minmn, &minmn, &f[f_offset], ldf, &zwork[1], &zwork[minmn 
+		+ 1], &i__1, &info1);
+    }
+
+    return 0;
+
+} /* cgedmdq_ */
+
diff --git a/lapack-netlib/SRC/dgedmd.c b/lapack-netlib/SRC/dgedmd.c
index 447b23014..66b4d5da6 100644
--- a/lapack-netlib/SRC/dgedmd.c
+++ b/lapack-netlib/SRC/dgedmd.c
@@ -509,3 +509,1245 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 
 
 
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c__2 = 2;
+
+/* Subroutine */ int dgedmd_(char *jobs, char *jobz, char *jobr, char *jobf, 
+	integer *whtsvd, integer *m, integer *n, doublereal *x, integer *ldx, 
+	doublereal *y, integer *ldy, integer *nrnk, doublereal *tol, integer *
+	k, doublereal *reig, doublereal *imeig, doublereal *z__, integer *ldz,
+	 doublereal *res, doublereal *b, integer *ldb, doublereal *w, integer 
+	*ldw, doublereal *s, integer *lds, doublereal *work, integer *lwork, 
+	integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer x_dim1, x_offset, y_dim1, y_offset, z_dim1, z_offset, b_dim1, 
+	    b_offset, w_dim1, w_offset, s_dim1, s_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal zero, ssum;
+    integer info1, info2;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    doublereal xscl1, xscl2;
+    integer i__, j;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    doublereal scale;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *),
+	     dgeev_(char *, char *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, doublereal *, integer *, integer *);
+    extern logical lsame_(char *, char *);
+    logical badxy;
+    doublereal small;
+    char jobzl[1];
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *);
+    logical wntex;
+    doublereal ab[4]	/* was [2][2] */;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgesdd_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *, 
+	    integer *), dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern logical disnan_(doublereal *);
+    extern /* Subroutine */ int dgesvd_(char *, char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *), dlacpy_(char *, integer *, integer *, doublereal 
+	    *, integer *, doublereal *, integer *), xerbla_(char *, 
+	    integer *);
+    char t_or_n__[1];
+    extern /* Subroutine */ int dgejsv_(char *, char *, char *, char *, char *
+	    , char *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *, integer *), dlassq_(integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *);
+    logical sccolx, sccoly;
+    integer lwrsdd, mwrsdd, iminwr;
+    logical wntref, wntvec;
+    doublereal rootsc;
+    integer lwrkev, mlwork, mwrkev, numrnk, olwork;
+    doublereal rdummy[2];
+    integer lwrsvd, mwrsvd;
+    logical lquery, wntres;
+    char jsvopt[1];
+    extern /* Subroutine */ int mecago_();
+    integer mwrsvj, lwrsvq, mwrsvq;
+    doublereal rdummy2[2], ofl, one;
+    extern /* Subroutine */ int dgesvdq_(char *, char *, char *, char *, char 
+	    *, integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, integer *);
+
+/* March 2023 */
+/* ..... */
+/*      USE                   iso_fortran_env */
+/*      INTEGER, PARAMETER :: WP = real64 */
+/* ..... */
+/*     Scalar arguments */
+/*     Array arguments */
+/* ............................................................ */
+/*     Purpose */
+/*     ======= */
+/*     DGEDMD computes the Dynamic Mode Decomposition (DMD) for */
+/*     a pair of data snapshot matrices. For the input matrices */
+/*     X and Y such that Y = A*X with an unaccessible matrix */
+/*     A, DGEDMD computes a certain number of Ritz pairs of A using */
+/*     the standard Rayleigh-Ritz extraction from a subspace of */
+/*     range(X) that is determined using the leading left singular */
+/*     vectors of X. Optionally, DGEDMD returns the residuals */
+/*     of the computed Ritz pairs, the information needed for */
+/*     a refinement of the Ritz vectors, or the eigenvectors of */
+/*     the Exact DMD. */
+/*     For further details see the references listed */
+/*     below. For more details of the implementation see [3]. */
+
+/*     References */
+/*     ========== */
+/*     [1] P. Schmid: Dynamic mode decomposition of numerical */
+/*         and experimental data, */
+/*         Journal of Fluid Mechanics 656, 5-28, 2010. */
+/*     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */
+/*         decompositions: analysis and enhancements, */
+/*         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */
+/*     [3] Z. Drmac: A LAPACK implementation of the Dynamic */
+/*         Mode Decomposition I. Technical report. AIMDyn Inc. */
+/*         and LAPACK Working Note 298. */
+/*     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */
+/*         Brunton, N. Kutz: On Dynamic Mode Decomposition: */
+/*         Theory and Applications, Journal of Computational */
+/*         Dynamics 1(2), 391 -421, 2014. */
+
+/* ...................................................................... */
+/*     Developed and supported by: */
+/*     =========================== */
+/*     Developed and coded by Zlatko Drmac, Faculty of Science, */
+/*     University of Zagreb;  drmac@math.hr */
+/*     In cooperation with */
+/*     AIMdyn Inc., Santa Barbara, CA. */
+/*     and supported by */
+/*     - DARPA SBIR project "Koopman Operator-Based Forecasting */
+/*     for Nonstationary Processes from Near-Term, Limited */
+/*     Observational Data" Contract No: W31P4Q-21-C-0007 */
+/*     - DARPA PAI project "Physics-Informed Machine Learning */
+/*     Methodologies" Contract No: HR0011-18-9-0033 */
+/*     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */
+/*     Framework for Space-Time Analysis of Process Dynamics" */
+/*     Contract No: HR0011-16-C-0116 */
+/*     Any opinions, findings and conclusions or recommendations */
+/*     expressed in this material are those of the author and */
+/*     do not necessarily reflect the views of the DARPA SBIR */
+/*     Program Office */
+/* ============================================================ */
+/*     Distribution Statement A: */
+/*     Approved for Public Release, Distribution Unlimited. */
+/*     Cleared by DARPA on September 29, 2022 */
+/* ============================================================ */
+/* ............................................................ */
+/*     Arguments */
+/*     ========= */
+/*     JOBS (input) CHARACTER*1 */
+/*     Determines whether the initial data snapshots are scaled */
+/*     by a diagonal matrix. */
+/*     'S' :: The data snapshots matrices X and Y are multiplied */
+/*            with a diagonal matrix D so that X*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'C' :: The snapshots are scaled as with the 'S' option. */
+/*            If it is found that an i-th column of X is zero */
+/*            vector and the corresponding i-th column of Y is */
+/*            non-zero, then the i-th column of Y is set to */
+/*            zero and a warning flag is raised. */
+/*     'Y' :: The data snapshots matrices X and Y are multiplied */
+/*            by a diagonal matrix D so that Y*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'N' :: No data scaling. */
+/* ..... */
+/*     JOBZ (input) CHARACTER*1 */
+/*     Determines whether the eigenvectors (Koopman modes) will */
+/*     be computed. */
+/*     'V' :: The eigenvectors (Koopman modes) will be computed */
+/*            and returned in the matrix Z. */
+/*            See the description of Z. */
+/*     'F' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product X(:,1:K)*W, where X */
+/*            contains a POD basis (leading left singular vectors */
+/*            of the data matrix X) and W contains the eigenvectors */
+/*            of the corresponding Rayleigh quotient. */
+/*            See the descriptions of K, X, W, Z. */
+/*     'N' :: The eigenvectors are not computed. */
+/* ..... */
+/*     JOBR (input) CHARACTER*1 */
+/*     Determines whether to compute the residuals. */
+/*     'R' :: The residuals for the computed eigenpairs will be */
+/*            computed and stored in the array RES. */
+/*            See the description of RES. */
+/*            For this option to be legal, JOBZ must be 'V'. */
+/*     'N' :: The residuals are not computed. */
+/* ..... */
+/*     JOBF (input) CHARACTER*1 */
+/*     Specifies whether to store information needed for post- */
+/*     processing (e.g. computing refined Ritz vectors) */
+/*     'R' :: The matrix needed for the refinement of the Ritz */
+/*            vectors is computed and stored in the array B. */
+/*            See the description of B. */
+/*     'E' :: The unscaled eigenvectors of the Exact DMD are */
+/*            computed and returned in the array B. See the */
+/*            description of B. */
+/*     'N' :: No eigenvector refinement data is computed. */
+/* ..... */
+/*     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */
+/*     Allows for a selection of the SVD algorithm from the */
+/*     LAPACK library. */
+/*     1 :: DGESVD (the QR SVD algorithm) */
+/*     2 :: DGESDD (the Divide and Conquer algorithm; if enough */
+/*          workspace available, this is the fastest option) */
+/*     3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4 */
+/*          are the most accurate options) */
+/*     4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3 */
+/*          are the most accurate options) */
+/*     For the four methods above, a significant difference in */
+/*     the accuracy of small singular values is possible if */
+/*     the snapshots vary in norm so that X is severely */
+/*     ill-conditioned. If small (smaller than EPS*||X||) */
+/*     singular values are of interest and JOBS=='N',  then */
+/*     the options (3, 4) give the most accurate results, where */
+/*     the option 4 is slightly better and with stronger */
+/*     theoretical background. */
+/*     If JOBS=='S', i.e. the columns of X will be normalized, */
+/*     then all methods give nearly equally accurate results. */
+/* ..... */
+/*     M (input) INTEGER, M>= 0 */
+/*     The state space dimension (the row dimension of X, Y). */
+/* ..... */
+/*     N (input) INTEGER, 0 <= N <= M */
+/*     The number of data snapshot pairs */
+/*     (the number of columns of X and Y). */
+/* ..... */
+/*     X (input/output) REAL(KIND=WP) M-by-N array */
+/*     > On entry, X contains the data snapshot matrix X. It is */
+/*     assumed that the column norms of X are in the range of */
+/*     the normalized floating point numbers. */
+/*     < On exit, the leading K columns of X contain a POD basis, */
+/*     i.e. the leading K left singular vectors of the input */
+/*     data matrix X, U(:,1:K). All N columns of X contain all */
+/*     left singular vectors of the input matrix X. */
+/*     See the descriptions of K, Z and W. */
+/* ..... */
+/*     LDX (input) INTEGER, LDX >= M */
+/*     The leading dimension of the array X. */
+/* ..... */
+/*     Y (input/workspace/output) REAL(KIND=WP) M-by-N array */
+/*     > On entry, Y contains the data snapshot matrix Y */
+/*     < On exit, */
+/*     If JOBR == 'R', the leading K columns of Y  contain */
+/*     the residual vectors for the computed Ritz pairs. */
+/*     See the description of RES. */
+/*     If JOBR == 'N', Y contains the original input data, */
+/*                     scaled according to the value of JOBS. */
+/* ..... */
+/*     LDY (input) INTEGER , LDY >= M */
+/*     The leading dimension of the array Y. */
+/* ..... */
+/*     NRNK (input) INTEGER */
+/*     Determines the mode how to compute the numerical rank, */
+/*     i.e. how to truncate small singular values of the input */
+/*     matrix X. On input, if */
+/*     NRNK = -1 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(1). */
+/*                  This option is recommended. */
+/*     NRNK = -2 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(i-1) */
+/*                  This option is included for R&D purposes. */
+/*                  It requires highly accurate SVD, which */
+/*                  may not be feasible. */
+
+/*     The numerical rank can be enforced by using positive */
+/*     value of NRNK as follows: */
+/*     0 < NRNK <= N :: at most NRNK largest singular values */
+/*     will be used. If the number of the computed nonzero */
+/*     singular values is less than NRNK, then only those */
+/*     nonzero values will be used and the actually used */
+/*     dimension is less than NRNK. The actual number of */
+/*     the nonzero singular values is returned in the variable */
+/*     K. See the descriptions of TOL and  K. */
+/* ..... */
+/*     TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */
+/*     The tolerance for truncating small singular values. */
+/*     See the description of NRNK. */
+/* ..... */
+/*     K (output) INTEGER,  0 <= K <= N */
+/*     The dimension of the POD basis for the data snapshot */
+/*     matrix X and the number of the computed Ritz pairs. */
+/*     The value of K is determined according to the rule set */
+/*     by the parameters NRNK and TOL. */
+/*     See the descriptions of NRNK and TOL. */
+/* ..... */
+/*     REIG (output) REAL(KIND=WP) N-by-1 array */
+/*     The leading K (K<=N) entries of REIG contain */
+/*     the real parts of the computed eigenvalues */
+/*     REIG(1:K) + sqrt(-1)*IMEIG(1:K). */
+/*     See the descriptions of K, IMEIG, and Z. */
+/* ..... */
+/*     IMEIG (output) REAL(KIND=WP) N-by-1 array */
+/*     The leading K (K<=N) entries of IMEIG contain */
+/*     the imaginary parts of the computed eigenvalues */
+/*     REIG(1:K) + sqrt(-1)*IMEIG(1:K). */
+/*     The eigenvalues are determined as follows: */
+/*     If IMEIG(i) == 0, then the corresponding eigenvalue is */
+/*     real, LAMBDA(i) = REIG(i). */
+/*     If IMEIG(i)>0, then the corresponding complex */
+/*     conjugate pair of eigenvalues reads */
+/*     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i) */
+/*     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) */
+/*     That is, complex conjugate pairs have consecutive */
+/*     indices (i,i+1), with the positive imaginary part */
+/*     listed first. */
+/*     See the descriptions of K, REIG, and Z. */
+/* ..... */
+/*     Z (workspace/output) REAL(KIND=WP)  M-by-N array */
+/*     If JOBZ =='V' then */
+/*        Z contains real Ritz vectors as follows: */
+/*        If IMEIG(i)=0, then Z(:,i) is an eigenvector of */
+/*        the i-th Ritz value; ||Z(:,i)||_2=1. */
+/*        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then */
+/*        [Z(:,i) Z(:,i+1)] span an invariant subspace and */
+/*        the Ritz values extracted from this subspace are */
+/*        REIG(i) + sqrt(-1)*IMEIG(i) and */
+/*        REIG(i) - sqrt(-1)*IMEIG(i). */
+/*        The corresponding eigenvectors are */
+/*        Z(:,i) + sqrt(-1)*Z(:,i+1) and */
+/*        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. */
+/*        || Z(:,i:i+1)||_F = 1. */
+/*     If JOBZ == 'F', then the above descriptions hold for */
+/*     the columns of X(:,1:K)*W(1:K,1:K), where the columns */
+/*     of W(1:k,1:K) are the computed eigenvectors of the */
+/*     K-by-K Rayleigh quotient. The columns of W(1:K,1:K) */
+/*     are similarly structured: If IMEIG(i) == 0 then */
+/*     X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0 */
+/*     then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and */
+/*          X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1) */
+/*     are the eigenvectors of LAMBDA(i), LAMBDA(i+1). */
+/*     See the descriptions of REIG, IMEIG, X and W. */
+/* ..... */
+/*     LDZ (input) INTEGER , LDZ >= M */
+/*     The leading dimension of the array Z. */
+/* ..... */
+/*     RES (output) REAL(KIND=WP) N-by-1 array */
+/*     RES(1:K) contains the residuals for the K computed */
+/*     Ritz pairs. */
+/*     If LAMBDA(i) is real, then */
+/*        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. */
+/*     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair */
+/*     then */
+/*     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F */
+/*     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] */
+/*               [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. */
+/*     It holds that */
+/*     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2 */
+/*     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 */
+/*     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1) */
+/*           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1) */
+/*     See the description of REIG, IMEIG and Z. */
+/* ..... */
+/*     B (output) REAL(KIND=WP)  M-by-N array. */
+/*     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can */
+/*     be used for computing the refined vectors; see further */
+/*     details in the provided references. */
+/*     If JOBF == 'E', B(1:M,1;K) contains */
+/*     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */
+/*     Exact DMD, up to scaling by the inverse eigenvalues. */
+/*     If JOBF =='N', then B is not referenced. */
+/*     See the descriptions of X, W, K. */
+/* ..... */
+/*     LDB (input) INTEGER, LDB >= M */
+/*     The leading dimension of the array B. */
+/* ..... */
+/*     W (workspace/output) REAL(KIND=WP) N-by-N array */
+/*     On exit, W(1:K,1:K) contains the K computed */
+/*     eigenvectors of the matrix Rayleigh quotient (real and */
+/*     imaginary parts for each complex conjugate pair of the */
+/*     eigenvalues). The Ritz vectors (returned in Z) are the */
+/*     product of X (containing a POD basis for the input */
+/*     matrix X) and W. See the descriptions of K, S, X and Z. */
+/*     W is also used as a workspace to temporarily store the */
+/*     right singular vectors of X. */
+/* ..... */
+/*     LDW (input) INTEGER, LDW >= N */
+/*     The leading dimension of the array W. */
+/* ..... */
+/*     S (workspace/output) REAL(KIND=WP) N-by-N array */
+/*     The array S(1:K,1:K) is used for the matrix Rayleigh */
+/*     quotient. This content is overwritten during */
+/*     the eigenvalue decomposition by DGEEV. */
+/*     See the description of K. */
+/* ..... */
+/*     LDS (input) INTEGER, LDS >= N */
+/*     The leading dimension of the array S. */
+/* ..... */
+/*     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */
+/*     On exit, WORK(1:N) contains the singular values of */
+/*     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). */
+/*     If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain */
+/*     scaling factor WORK(N+2)/WORK(N+1) used to scale X */
+/*     and Y to avoid overflow in the SVD of X. */
+/*     This may be of interest if the scaling option is off */
+/*     and as many as possible smallest eigenvalues are */
+/*     desired to the highest feasible accuracy. */
+/*     If the call to DGEDMD is only workspace query, then */
+/*     WORK(1) contains the minimal workspace length and */
+/*     WORK(2) is the optimal workspace length. Hence, the */
+/*     leng of work is at least 2. */
+/*     See the description of LWORK. */
+/* ..... */
+/*     LWORK (input) INTEGER */
+/*     The minimal length of the workspace vector WORK. */
+/*     LWORK is calculated as follows: */
+/*     If WHTSVD == 1 :: */
+/*        If JOBZ == 'V', then */
+/*        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)). */
+/*        If JOBZ == 'N'  then */
+/*        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)). */
+/*        Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal */
+/*        workspace length of DGESVD. */
+/*     If WHTSVD == 2 :: */
+/*        If JOBZ == 'V', then */
+/*        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)) */
+/*        If JOBZ == 'N', then */
+/*        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)) */
+/*        Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the */
+/*        minimal workspace length of DGESDD. */
+/*     If WHTSVD == 3 :: */
+/*        If JOBZ == 'V', then */
+/*        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) */
+/*        If JOBZ == 'N', then */
+/*        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) */
+/*        Here LWORK_SVD = N+M+MAX(3*N+1, */
+/*                        MAX(1,3*N+M,5*N),MAX(1,N)) */
+/*        is the minimal workspace length of DGESVDQ. */
+/*     If WHTSVD == 4 :: */
+/*        If JOBZ == 'V', then */
+/*        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) */
+/*        If JOBZ == 'N', then */
+/*        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) */
+/*        Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the */
+/*        minimal workspace length of DGEJSV. */
+/*     The above expressions are not simplified in order to */
+/*     make the usage of WORK more transparent, and for */
+/*     easier checking. In any case, LWORK >= 2. */
+/*     If on entry LWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     IWORK (workspace/output) INTEGER LIWORK-by-1 array */
+/*     Workspace that is required only if WHTSVD equals */
+/*     2 , 3 or 4. (See the description of WHTSVD). */
+/*     If on entry LWORK =-1 or LIWORK=-1, then the */
+/*     minimal length of IWORK is computed and returned in */
+/*     IWORK(1). See the description of LIWORK. */
+/* ..... */
+/*     LIWORK (input) INTEGER */
+/*     The minimal length of the workspace vector IWORK. */
+/*     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */
+/*     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */
+/*     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */
+/*     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */
+/*     If on entry LIWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     INFO (output) INTEGER */
+/*     -i < 0 :: On entry, the i-th argument had an */
+/*               illegal value */
+/*        = 0 :: Successful return. */
+/*        = 1 :: Void input. Quick exit (M=0 or N=0). */
+/*        = 2 :: The SVD computation of X did not converge. */
+/*               Suggestion: Check the input data and/or */
+/*               repeat with different WHTSVD. */
+/*        = 3 :: The computation of the eigenvalues did not */
+/*               converge. */
+/*        = 4 :: If data scaling was requested on input and */
+/*               the procedure found inconsistency in the data */
+/*               such that for some column index i, */
+/*               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */
+/*               to zero if JOBS=='C'. The computation proceeds */
+/*               with original or modified data and warning */
+/*               flag is set with INFO=4. */
+/* ............................................................. */
+/* ............................................................. */
+/*     Parameters */
+/*     ~~~~~~~~~~ */
+/*     Local scalars */
+/*     ~~~~~~~~~~~~~ */
+/*     Local arrays */
+/*     ~~~~~~~~~~~~ */
+/*     External functions (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~ */
+/*     External subroutines (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     Intrinsic functions */
+/*     ~~~~~~~~~~~~~~~~~~~ */
+/* ............................................................ */
+    /* Parameter adjustments */
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --reig;
+    --imeig;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --res;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1 * 1;
+    w -= w_offset;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1 * 1;
+    s -= s_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    one = 1.f;
+    zero = 0.f;
+
+/*    Test the input arguments */
+
+    wntres = lsame_(jobr, "R");
+    sccolx = lsame_(jobs, "S") || lsame_(jobs, "C");
+    sccoly = lsame_(jobs, "Y");
+    wntvec = lsame_(jobz, "V");
+    wntref = lsame_(jobf, "R");
+    wntex = lsame_(jobf, "E");
+    *info = 0;
+    lquery = *lwork == -1 || *liwork == -1;
+
+    if (! (sccolx || sccoly || lsame_(jobs, "N"))) {
+	*info = -1;
+    } else if (! (wntvec || lsame_(jobz, "N") || lsame_(
+	    jobz, "F"))) {
+	*info = -2;
+    } else if (! (wntres || lsame_(jobr, "N")) || 
+	    wntres && ! wntvec) {
+	*info = -3;
+    } else if (! (wntref || wntex || lsame_(jobf, "N")))
+	     {
+	*info = -4;
+    } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == 
+	    4)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (*n < 0 || *n > *m) {
+	*info = -7;
+    } else if (*ldx < *m) {
+	*info = -9;
+    } else if (*ldy < *m) {
+	*info = -11;
+    } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) {
+	*info = -12;
+    } else if (*tol < zero || *tol >= one) {
+	*info = -13;
+    } else if (*ldz < *m) {
+	*info = -18;
+    } else if ((wntref || wntex) && *ldb < *m) {
+	*info = -21;
+    } else if (*ldw < *n) {
+	*info = -23;
+    } else if (*lds < *n) {
+	*info = -25;
+    }
+
+    if (*info == 0) {
+/* Compute the minimal and the optimal workspace */
+/* requirements. Simulate running the code and */
+/* determine minimal and optimal sizes of the */
+/* workspace at any moment of the run. */
+	if (*n == 0) {
+/* Quick return. All output except K is void. */
+/* INFO=1 signals the void input. */
+/* In case of a workspace query, the default */
+/* minimal workspace lengths are returned. */
+	    if (lquery) {
+		iwork[1] = 1;
+		work[1] = 2.;
+		work[2] = 2.;
+	    } else {
+		*k = 0;
+	    }
+	    *info = 1;
+	    return 0;
+	}
+	mlwork = f2cmax(2,*n);
+	olwork = f2cmax(2,*n);
+	iminwr = 1;
+/*         SELECT CASE ( WHTSVD ) */
+	if (*whtsvd == 1) {
+/* The following is specified as the minimal */
+/* length of WORK in the definition of DGESVD: */
+/* MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) */
+/* Computing MAX */
+	    i__1 = 1, i__2 = f2cmin(*m,*n) * 3 + f2cmax(*m,*n), i__1 = f2cmax(i__1,
+		    i__2), i__2 = f2cmin(*m,*n) * 5;
+	    mwrsvd = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = *n + mwrsvd;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		dgesvd_("O", "S", m, n, &x[x_offset], ldx, &work[1], &b[
+			b_offset], ldb, &w[w_offset], ldw, rdummy, &c_n1, &
+			info1);
+/* Computing MAX */
+		i__1 = mwrsvd, i__2 = (integer) rdummy[0];
+		lwrsvd = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = olwork, i__2 = *n + lwrsvd;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	} else if (*whtsvd == 2) {
+/* The following is specified as the minimal */
+/* length of WORK in the definition of DGESDD: */
+/* MWRSDD = 3*MIN(M,N)*MIN(M,N) + */
+/* MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) */
+/* IMINWR = 8*MIN(M,N) */
+/* Computing MAX */
+	    i__1 = f2cmax(*m,*n), i__2 = f2cmin(*m,*n) * 5 * f2cmin(*m,*n) + (f2cmin(*m,*
+		    n) << 2);
+	    mwrsdd = f2cmin(*m,*n) * 3 * f2cmin(*m,*n) + f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = *n + mwrsdd;
+	    mlwork = f2cmax(i__1,i__2);
+	    iminwr = f2cmin(*m,*n) << 3;
+	    if (lquery) {
+		dgesdd_("O", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], 
+			ldb, &w[w_offset], ldw, rdummy, &c_n1, &iwork[1], &
+			info1);
+/* Computing MAX */
+		i__1 = mwrsdd, i__2 = (integer) rdummy[0];
+		lwrsdd = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = olwork, i__2 = *n + lwrsdd;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	} else if (*whtsvd == 3) {
+/* LWQP3 = 3*N+1 */
+/* LWORQ = MAX(N, 1) */
+/* MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) */
+/* MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2) */
+/* MLWORK = N +  MWRSVQ */
+/* IMINWR = M+N-1 */
+	    dgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &work[
+		    1], &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &
+		    iwork[1], liwork, rdummy, &c_n1, rdummy2, &c_n1, &info1);
+	    iminwr = iwork[1];
+	    mwrsvq = (integer) rdummy[1];
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = *n + mwrsvq + (integer) rdummy2[0];
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+/* Computing MAX */
+		i__1 = mwrsvq, i__2 = (integer) rdummy[0];
+		lwrsvq = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = olwork, i__2 = *n + lwrsvq + (integer) rdummy2[0];
+		olwork = f2cmax(i__1,i__2);
+	    }
+	} else if (*whtsvd == 4) {
+	    *(unsigned char *)jsvopt = 'J';
+/* MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V' */
+/* Computing MAX */
+	    i__1 = 7, i__2 = (*m << 1) + *n, i__1 = f2cmax(i__1,i__2), i__2 = (*
+		    n << 2) + *n * *n, i__1 = f2cmax(i__1,i__2), i__2 = (*n << 1)
+		     + *n * *n + 6;
+	    mwrsvj = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = *n + mwrsvj;
+	    mlwork = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = 3, i__2 = *m + *n * 3;
+	    iminwr = f2cmax(i__1,i__2);
+	    if (lquery) {
+/* Computing MAX */
+		i__1 = olwork, i__2 = *n + mwrsvj;
+		olwork = f2cmax(i__1,i__2);
+	    }
+/*         END SELECT */
+	}
+	if (wntvec || wntex || lsame_(jobz, "F")) {
+	    *(unsigned char *)jobzl = 'V';
+	} else {
+	    *(unsigned char *)jobzl = 'N';
+	}
+/* Workspace calculation to the DGEEV call */
+	if (lsame_(jobzl, "V")) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n << 2;
+	    mwrkev = f2cmax(i__1,i__2);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n * 3;
+	    mwrkev = f2cmax(i__1,i__2);
+	}
+/* Computing MAX */
+	i__1 = mlwork, i__2 = *n + mwrkev;
+	mlwork = f2cmax(i__1,i__2);
+	if (lquery) {
+	    dgeev_("N", jobzl, n, &s[s_offset], lds, &reig[1], &imeig[1], &w[
+		    w_offset], ldw, &w[w_offset], ldw, rdummy, &c_n1, &info1);
+/* Computing MAX */
+	    i__1 = mwrkev, i__2 = (integer) rdummy[0];
+	    lwrkev = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = olwork, i__2 = *n + lwrkev;
+	    olwork = f2cmax(i__1,i__2);
+	}
+
+	if (*liwork < iminwr && ! lquery) {
+	    *info = -29;
+	}
+	if (*lwork < mlwork && ! lquery) {
+	    *info = -27;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEDMD", &i__1);
+	return 0;
+    } else if (lquery) {
+/*     Return minimal and optimal workspace sizes */
+	iwork[1] = iminwr;
+	work[1] = (doublereal) mlwork;
+	work[2] = (doublereal) olwork;
+	return 0;
+    }
+/* ............................................................ */
+
+    ofl = dlamch_("O");
+    small = dlamch_("S");
+    badxy = FALSE_;
+
+/*     <1> Optional scaling of the snapshots (columns of X, Y) */
+/*     ========================================================== */
+    if (sccolx) {
+/* The columns of X will be normalized. */
+/* To prevent overflows, the column norms of X are */
+/* carefully computed using DLASSQ. */
+	*k = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* WORK(i) = DNRM2( M, X(1,i), 1 ) */
+	    scale = zero;
+	    dlassq_(m, &x[i__ * x_dim1 + 1], &c__1, &scale, &ssum);
+	    if (disnan_(&scale) || disnan_(&ssum)) {
+		*k = 0;
+		*info = -8;
+		i__2 = -(*info);
+		xerbla_("DGEDMD", &i__2);
+	    }
+	    if (scale != zero && ssum != zero) {
+		rootsc = sqrt(ssum);
+		if (scale >= ofl / rootsc) {
+/*                 Norm of X(:,i) overflows. First, X(:,i) */
+/*                 is scaled by */
+/*                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. */
+/*                 Next, the norm of X(:,i) is stored without */
+/*                 overflow as WORK(i) = - SCALE * (ROOTSC/M), */
+/*                 the minus sign indicating the 1/M factor. */
+/*                 Scaling is performed without overflow, and */
+/*                 underflow may occur in the smallest entries */
+/*                 of X(:,i). The relative backward and forward */
+/*                 errors are small in the ell_2 norm. */
+		    d__1 = one / rootsc;
+		    dlascl_("G", &c__0, &c__0, &scale, &d__1, m, &c__1, &x[
+			    i__ * x_dim1 + 1], m, &info2);
+		    work[i__] = -scale * (rootsc / (doublereal) (*m));
+		} else {
+/*                 X(:,i) will be scaled to unit 2-norm */
+		    work[i__] = scale * rootsc;
+		    dlascl_("G", &c__0, &c__0, &work[i__], &one, m, &c__1, &x[
+			    i__ * x_dim1 + 1], m, &info2);
+/*                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC */
+/* LAPACK */
+		}
+	    } else {
+		work[i__] = zero;
+		++(*k);
+	    }
+	}
+	if (*k == *n) {
+/* All columns of X are zero. Return error code -8. */
+/* (the 8th input variable had an illegal value) */
+	    *k = 0;
+	    *info = -8;
+	    i__1 = -(*info);
+	    xerbla_("DGEDMD", &i__1);
+	    return 0;
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/*           Now, apply the same scaling to the columns of Y. */
+	    if (work[i__] > zero) {
+		d__1 = one / work[i__];
+		dscal_(m, &d__1, &y[i__ * y_dim1 + 1], &c__1);
+/*               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC */
+/* BLAS CALL */
+	    } else if (work[i__] < zero) {
+		d__1 = -work[i__];
+		d__2 = one / (doublereal) (*m);
+		dlascl_("G", &c__0, &c__0, &d__1, &d__2, m, &c__1, &y[i__ * 
+			y_dim1 + 1], m, &info2);
+/* LAPACK CAL */
+	    } else if (y[idamax_(m, &y[i__ * y_dim1 + 1], &c__1) + i__ * 
+		    y_dim1] != zero) {
+/*               X(:,i) is zero vector. For consistency, */
+/*               Y(:,i) should also be zero. If Y(:,i) is not */
+/*               zero, then the data might be inconsistent or */
+/*               corrupted. If JOBS == 'C', Y(:,i) is set to */
+/*               zero and a warning flag is raised. */
+/*               The computation continues but the */
+/*               situation will be reported in the output. */
+		badxy = TRUE_;
+		if (lsame_(jobs, "C")) {
+		    dscal_(m, &zero, &y[i__ * y_dim1 + 1], &c__1);
+		}
+/* BLAS CALL */
+	    }
+	}
+    }
+
+    if (sccoly) {
+/* The columns of Y will be normalized. */
+/* To prevent overflows, the column norms of Y are */
+/* carefully computed using DLASSQ. */
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* WORK(i) = DNRM2( M, Y(1,i), 1 ) */
+	    scale = zero;
+	    dlassq_(m, &y[i__ * y_dim1 + 1], &c__1, &scale, &ssum);
+	    if (disnan_(&scale) || disnan_(&ssum)) {
+		*k = 0;
+		*info = -10;
+		i__2 = -(*info);
+		xerbla_("DGEDMD", &i__2);
+	    }
+	    if (scale != zero && ssum != zero) {
+		rootsc = sqrt(ssum);
+		if (scale >= ofl / rootsc) {
+/*                 Norm of Y(:,i) overflows. First, Y(:,i) */
+/*                 is scaled by */
+/*                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. */
+/*                 Next, the norm of Y(:,i) is stored without */
+/*                 overflow as WORK(i) = - SCALE * (ROOTSC/M), */
+/*                 the minus sign indicating the 1/M factor. */
+/*                 Scaling is performed without overflow, and */
+/*                 underflow may occur in the smallest entries */
+/*                 of Y(:,i). The relative backward and forward */
+/*                 errors are small in the ell_2 norm. */
+		    d__1 = one / rootsc;
+		    dlascl_("G", &c__0, &c__0, &scale, &d__1, m, &c__1, &y[
+			    i__ * y_dim1 + 1], m, &info2);
+		    work[i__] = -scale * (rootsc / (doublereal) (*m));
+		} else {
+/*                 X(:,i) will be scaled to unit 2-norm */
+		    work[i__] = scale * rootsc;
+		    dlascl_("G", &c__0, &c__0, &work[i__], &one, m, &c__1, &y[
+			    i__ * y_dim1 + 1], m, &info2);
+/*                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC */
+/* LAPACK */
+		}
+	    } else {
+		work[i__] = zero;
+	    }
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/*           Now, apply the same scaling to the columns of X. */
+	    if (work[i__] > zero) {
+		d__1 = one / work[i__];
+		dscal_(m, &d__1, &x[i__ * x_dim1 + 1], &c__1);
+/*               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC */
+/* BLAS CALL */
+	    } else if (work[i__] < zero) {
+		d__1 = -work[i__];
+		d__2 = one / (doublereal) (*m);
+		dlascl_("G", &c__0, &c__0, &d__1, &d__2, m, &c__1, &x[i__ * 
+			x_dim1 + 1], m, &info2);
+/* LAPACK CAL */
+	    } else if (x[idamax_(m, &x[i__ * x_dim1 + 1], &c__1) + i__ * 
+		    x_dim1] != zero) {
+/*               Y(:,i) is zero vector.  If X(:,i) is not */
+/*               zero, then a warning flag is raised. */
+/*               The computation continues but the */
+/*               situation will be reported in the output. */
+		badxy = TRUE_;
+	    }
+	}
+    }
+
+/*     <2> SVD of the data snapshot matrix X. */
+/*     ===================================== */
+/*     The left singular vectors are stored in the array X. */
+/*     The right singular vectors are in the array W. */
+/*     The array W will later on contain the eigenvectors */
+/*     of a Rayleigh quotient. */
+    numrnk = *n;
+/*      SELECT CASE ( WHTSVD ) */
+    if (*whtsvd == 1) {
+	i__1 = *lwork - *n;
+	dgesvd_("O", "S", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], 
+		ldb, &w[w_offset], ldw, &work[*n + 1], &i__1, &info1);
+/* LAPACK CAL */
+	*(unsigned char *)t_or_n__ = 'T';
+    } else if (*whtsvd == 2) {
+	i__1 = *lwork - *n;
+	dgesdd_("O", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], ldb, &w[
+		w_offset], ldw, &work[*n + 1], &i__1, &iwork[1], &info1);
+/* LAPACK CAL */
+	*(unsigned char *)t_or_n__ = 'T';
+    } else if (*whtsvd == 3) {
+	i__1 = *lwork - *n - f2cmax(2,*m);
+	i__2 = f2cmax(2,*m);
+	dgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &work[1], &
+		z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &iwork[1], 
+		liwork, &work[*n + f2cmax(2,*m) + 1], &i__1, &work[*n + 1], &
+		i__2, &info1);
+/* L */
+	dlacpy_("A", m, &numrnk, &z__[z_offset], ldz, &x[x_offset], ldx);
+/* LAPACK C */
+	*(unsigned char *)t_or_n__ = 'T';
+    } else if (*whtsvd == 4) {
+	i__1 = *lwork - *n;
+	dgejsv_("F", "U", jsvopt, "N", "N", "P", m, n, &x[x_offset], ldx, &
+		work[1], &z__[z_offset], ldz, &w[w_offset], ldw, &work[*n + 1]
+		, &i__1, &iwork[1], &info1);
+/* LAPACK CALL */
+	dlacpy_("A", m, n, &z__[z_offset], ldz, &x[x_offset], ldx);
+/* LAPACK CALL */
+	*(unsigned char *)t_or_n__ = 'N';
+	xscl1 = work[*n + 1];
+	xscl2 = work[*n + 2];
+	if (xscl1 != xscl2) {
+/* This is an exceptional situation. If the */
+/* data matrices are not scaled and the */
+/* largest singular value of X overflows. */
+/* In that case DGEJSV can return the SVD */
+/* in scaled form. The scaling factor can be used */
+/* to rescale the data (X and Y). */
+	    dlascl_("G", &c__0, &c__0, &xscl1, &xscl2, m, n, &y[y_offset], 
+		    ldy, &info2);
+	}
+/*      END SELECT */
+    }
+
+    if (info1 > 0) {
+/* The SVD selected subroutine did not converge. */
+/* Return with an error code. */
+	*info = 2;
+	return 0;
+    }
+
+    if (work[1] == zero) {
+/* The largest computed singular value of (scaled) */
+/* X is zero. Return error code -8 */
+/* (the 8th input variable had an illegal value). */
+	*k = 0;
+	*info = -8;
+	i__1 = -(*info);
+	xerbla_("DGEDMD", &i__1);
+	return 0;
+    }
+
+/* <3> Determine the numerical rank of the data */
+/*    snapshots matrix X. This depends on the */
+/*    parameters NRNK and TOL. */
+/*      SELECT CASE ( NRNK ) */
+    if (*nrnk == -1) {
+	*k = 1;
+	i__1 = numrnk;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    if (work[i__] <= work[1] * *tol || work[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+    } else if (*nrnk == -2) {
+	*k = 1;
+	i__1 = numrnk - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (work[i__ + 1] <= work[i__] * *tol || work[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+    } else {
+	*k = 1;
+	i__1 = *nrnk;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    if (work[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+/*          END SELECT */
+    }
+/*   Now, U = X(1:M,1:K) is the SVD/POD basis for the */
+/*   snapshot data in the input matrix X. */
+/* <4> Compute the Rayleigh quotient S = U^T * A * U. */
+/*    Depending on the requested outputs, the computation */
+/*    is organized to compute additional auxiliary */
+/*    matrices (for the residuals and refinements). */
+
+/*    In all formulas below, we need V_k*Sigma_k^(-1) */
+/*    where either V_k is in W(1:N,1:K), or V_k^T is in */
+/*    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). */
+    if (lsame_(t_or_n__, "N")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d__1 = one / work[i__];
+	    dscal_(n, &d__1, &w[i__ * w_dim1 + 1], &c__1);
+/* W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC */
+/* BLAS CALL */
+	}
+    } else {
+/* This non-unit stride access is due to the fact */
+/* that DGESVD, DGESVDQ and DGESDD return the */
+/* transposed matrix of the right singular vectors. */
+/* DO i = 1, K */
+/* CALL DSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL */
+/* ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC */
+/* END DO */
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[*n + i__] = one / work[i__];
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *k;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		w[i__ + j * w_dim1] = work[*n + i__] * w[i__ + j * w_dim1];
+	    }
+	}
+    }
+
+    if (wntref) {
+
+/* Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) */
+/* for computing the refined Ritz vectors */
+/* (optionally, outside DGEDMD). */
+	dgemm_("N", t_or_n__, m, k, n, &one, &y[y_offset], ldy, &w[w_offset], 
+		ldw, &zero, &z__[z_offset], ldz);
+/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRI */
+/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRI */
+
+/* At this point Z contains */
+/* A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and */
+/* this is needed for computing the residuals. */
+/* This matrix is  returned in the array B and */
+/* it can be used to compute refined Ritz vectors. */
+/* BLAS */
+	dlacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb);
+/* B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC */
+/* BLAS CALL */
+	dgemm_("T", "N", k, k, m, &one, &x[x_offset], ldx, &z__[z_offset], 
+		ldz, &zero, &s[s_offset], lds);
+/* S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRI */
+/* At this point S = U^T * A * U is the Rayleigh quotient. */
+/* BLAS */
+    } else {
+/* A * U(:,1:K) is not explicitly needed and the */
+/* computation is organized differently. The Rayleigh */
+/* quotient is computed more efficiently. */
+	dgemm_("T", "N", k, n, m, &one, &x[x_offset], ldx, &y[y_offset], ldy, 
+		&zero, &z__[z_offset], ldz);
+/* Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! IN */
+/* In the two DGEMM calls here, can use K for LDZ. */
+/* B */
+	dgemm_("N", t_or_n__, k, k, n, &one, &z__[z_offset], ldz, &w[w_offset]
+		, ldw, &zero, &s[s_offset], lds);
+/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRIN */
+/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRIN */
+/* At this point S = U^T * A * U is the Rayleigh quotient. */
+/* If the residuals are requested, save scaled V_k into Z. */
+/* Recall that V_k or V_k^T is stored in W. */
+/* BLAS */
+	if (wntres || wntex) {
+	    if (lsame_(t_or_n__, "N")) {
+		dlacpy_("A", n, k, &w[w_offset], ldw, &z__[z_offset], ldz);
+	    } else {
+		dlacpy_("A", k, n, &w[w_offset], ldw, &z__[z_offset], ldz);
+	    }
+	}
+    }
+
+/* <5> Compute the Ritz values and (if requested) the */
+/*   right eigenvectors of the Rayleigh quotient. */
+
+    i__1 = *lwork - *n;
+    dgeev_("N", jobzl, k, &s[s_offset], lds, &reig[1], &imeig[1], &w[w_offset]
+	    , ldw, &w[w_offset], ldw, &work[*n + 1], &i__1, &info1);
+
+/* W(1:K,1:K) contains the eigenvectors of the Rayleigh */
+/* quotient. Even in the case of complex spectrum, all */
+/* computation is done in real arithmetic. REIG and */
+/* IMEIG are the real and the imaginary parts of the */
+/* eigenvalues, so that the spectrum is given as */
+/* REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs */
+/* are listed at consecutive positions. For such a */
+/* complex conjugate pair of the eigenvalues, the */
+/* corresponding eigenvectors are also a complex */
+/* conjugate pair with the real and imaginary parts */
+/* stored column-wise in W at the corresponding */
+/* consecutive column indices. See the description of Z. */
+/* Also, see the description of DGEEV. */
+/* LAPACK C */
+    if (info1 > 0) {
+/* DGEEV failed to compute the eigenvalues and */
+/* eigenvectors of the Rayleigh quotient. */
+	*info = 3;
+	return 0;
+    }
+
+/* <6> Compute the eigenvectors (if requested) and, */
+/* the residuals (if requested). */
+
+    if (wntvec || wntex) {
+	if (wntres) {
+	    if (wntref) {
+/* Here, if the refinement is requested, we have */
+/* A*U(:,1:K) already computed and stored in Z. */
+/* For the residuals, need Y = A * U(:,1;K) * W. */
+		dgemm_("N", "N", m, k, k, &one, &z__[z_offset], ldz, &w[
+			w_offset], ldw, &zero, &y[y_offset], ldy);
+/* Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC */
+/* This frees Z; Y contains A * U(:,1:K) * W. */
+/* BLAS CALL */
+	    } else {
+/* Compute S = V_k * Sigma_k^(-1) * W, where */
+/* V_k * Sigma_k^(-1) is stored in Z */
+		dgemm_(t_or_n__, "N", n, k, k, &one, &z__[z_offset], ldz, &w[
+			w_offset], ldw, &zero, &s[s_offset], lds);
+/* Then, compute Z = Y * S = */
+/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */
+/* = A * U(:,1:K) * W(1:K,1:K) */
+		dgemm_("N", "N", m, k, n, &one, &y[y_offset], ldy, &s[
+			s_offset], lds, &zero, &z__[z_offset], ldz);
+/* Save a copy of Z into Y and free Z for holding */
+/* the Ritz vectors. */
+		dlacpy_("A", m, k, &z__[z_offset], ldz, &y[y_offset], ldy);
+		if (wntex) {
+		    dlacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb);
+		}
+	    }
+	} else if (wntex) {
+/* Compute S = V_k * Sigma_k^(-1) * W, where */
+/* V_k * Sigma_k^(-1) is stored in Z */
+	    dgemm_(t_or_n__, "N", n, k, k, &one, &z__[z_offset], ldz, &w[
+		    w_offset], ldw, &zero, &s[s_offset], lds);
+/* Then, compute Z = Y * S = */
+/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */
+/* = A * U(:,1:K) * W(1:K,1:K) */
+	    dgemm_("N", "N", m, k, n, &one, &y[y_offset], ldy, &s[s_offset], 
+		    lds, &zero, &b[b_offset], ldb);
+/* The above call replaces the following two calls */
+/* that were used in the developing-testing phase. */
+/* CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & */
+/*           LDS, ZERO, Z, LDZ) */
+/* Save a copy of Z into B and free Z for holding */
+/* the Ritz vectors. */
+/* CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB ) */
+	}
+
+/* Compute the real form of the Ritz vectors */
+	if (wntvec) {
+	    dgemm_("N", "N", m, k, k, &one, &x[x_offset], ldx, &w[w_offset], 
+		    ldw, &zero, &z__[z_offset], ldz);
+	}
+/* Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC */
+
+/* BLAS CALL */
+	if (wntres) {
+	    i__ = 1;
+	    while(i__ <= *k) {
+		if (imeig[i__] == zero) {
+/* have a real eigenvalue with real eigenvector */
+		    d__1 = -reig[i__];
+		    daxpy_(m, &d__1, &z__[i__ * z_dim1 + 1], &c__1, &y[i__ * 
+			    y_dim1 + 1], &c__1);
+/* Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! */
+
+		    res[i__] = dnrm2_(m, &y[i__ * y_dim1 + 1], &c__1);
+
+		    ++i__;
+		} else {
+/* Have a complex conjugate pair */
+/* REIG(i) +- sqrt(-1)*IMEIG(i). */
+/* Since all computation is done in real */
+/* arithmetic, the formula for the residual */
+/* is recast for real representation of the */
+/* complex conjugate eigenpair. See the */
+/* description of RES. */
+		    ab[0] = reig[i__];
+		    ab[1] = -imeig[i__];
+		    ab[2] = imeig[i__];
+		    ab[3] = reig[i__];
+		    d__1 = -one;
+		    dgemm_("N", "N", m, &c__2, &c__2, &d__1, &z__[i__ * 
+			    z_dim1 + 1], ldz, ab, &c__2, &one, &y[i__ * 
+			    y_dim1 + 1], ldy);
+/* Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INT */
+/* BL */
+		    res[i__] = dlange_("F", m, &c__2, &y[i__ * y_dim1 + 1], 
+			    ldy, &work[*n + 1]);
+/* LA */
+		    res[i__ + 1] = res[i__];
+		    i__ += 2;
+		}
+	    }
+	}
+    }
+
+    if (*whtsvd == 4) {
+	work[*n + 1] = xscl1;
+	work[*n + 2] = xscl2;
+    }
+
+/*     Successful exit. */
+    if (! badxy) {
+	*info = 0;
+    } else {
+/* A warning on possible data inconsistency. */
+/* This should be a rare event. */
+	*info = 4;
+    }
+/* ............................................................ */
+    return 0;
+/*     ...... */
+} /* dgedmd_ */
+
diff --git a/lapack-netlib/SRC/dgedmdq.c b/lapack-netlib/SRC/dgedmdq.c
index 447b23014..a743a3156 100644
--- a/lapack-netlib/SRC/dgedmdq.c
+++ b/lapack-netlib/SRC/dgedmdq.c
@@ -509,3 +509,792 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 
 
 
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+
+/* Subroutine */ int dgedmdq_(char *jobs, char *jobz, char *jobr, char *jobq, 
+	char *jobt, char *jobf, integer *whtsvd, integer *m, integer *n, 
+	doublereal *f, integer *ldf, doublereal *x, integer *ldx, doublereal *
+	y, integer *ldy, integer *nrnk, doublereal *tol, integer *k, 
+	doublereal *reig, doublereal *imeig, doublereal *z__, integer *ldz, 
+	doublereal *res, doublereal *b, integer *ldb, doublereal *v, integer *
+	ldv, doublereal *s, integer *lds, doublereal *work, integer *lwork, 
+	integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer f_dim1, f_offset, x_dim1, x_offset, y_dim1, y_offset, z_dim1, 
+	    z_offset, b_dim1, b_offset, v_dim1, v_offset, s_dim1, s_offset, 
+	    i__1, i__2;
+
+    /* Local variables */
+    doublereal zero;
+    integer info1;
+    extern logical lsame_(char *, char *);
+    char jobvl[1];
+    integer minmn;
+    logical wantq;
+    integer mlwqr, olwqr;
+    logical wntex;
+    extern /* Subroutine */ int dgedmd_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *, integer *, integer 
+	    *), dgeqrf_(integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *), dlaset_(char *, 
+	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
+	    integer *), xerbla_(char *, integer *);
+    integer mlwdmd, olwdmd;
+    logical sccolx, sccoly;
+    extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dormqr_(char *, char *, integer *, integer *, integer 
+	    *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, integer *);
+    integer iminwr;
+    logical wntvec, wntvcf;
+    integer mlwgqr;
+    logical wntref;
+    integer mlwork, olwgqr, olwork;
+    doublereal rdummy[2];
+    integer mlwmqr, olwmqr;
+    logical lquery, wntres, wnttrf, wntvcq;
+    doublereal one;
+
+/* March 2023 */
+/* ..... */
+/*      USE                   iso_fortran_env */
+/*      INTEGER, PARAMETER :: WP = real64 */
+/* ..... */
+/*     Scalar arguments */
+/*     Array arguments */
+/* ..... */
+/*     Purpose */
+/*     ======= */
+/*     DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for */
+/*     a pair of data snapshot matrices, using a QR factorization */
+/*     based compression of the data. For the input matrices */
+/*     X and Y such that Y = A*X with an unaccessible matrix */
+/*     A, DGEDMDQ computes a certain number of Ritz pairs of A using */
+/*     the standard Rayleigh-Ritz extraction from a subspace of */
+/*     range(X) that is determined using the leading left singular */
+/*     vectors of X. Optionally, DGEDMDQ returns the residuals */
+/*     of the computed Ritz pairs, the information needed for */
+/*     a refinement of the Ritz vectors, or the eigenvectors of */
+/*     the Exact DMD. */
+/*     For further details see the references listed */
+/*     below. For more details of the implementation see [3]. */
+
+/*     References */
+/*     ========== */
+/*     [1] P. Schmid: Dynamic mode decomposition of numerical */
+/*         and experimental data, */
+/*         Journal of Fluid Mechanics 656, 5-28, 2010. */
+/*     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */
+/*         decompositions: analysis and enhancements, */
+/*         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */
+/*     [3] Z. Drmac: A LAPACK implementation of the Dynamic */
+/*         Mode Decomposition I. Technical report. AIMDyn Inc. */
+/*         and LAPACK Working Note 298. */
+/*     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */
+/*         Brunton, N. Kutz: On Dynamic Mode Decomposition: */
+/*         Theory and Applications, Journal of Computational */
+/*         Dynamics 1(2), 391 -421, 2014. */
+
+/*     Developed and supported by: */
+/*     =========================== */
+/*     Developed and coded by Zlatko Drmac, Faculty of Science, */
+/*     University of Zagreb;  drmac@math.hr */
+/*     In cooperation with */
+/*     AIMdyn Inc., Santa Barbara, CA. */
+/*     and supported by */
+/*     - DARPA SBIR project "Koopman Operator-Based Forecasting */
+/*     for Nonstationary Processes from Near-Term, Limited */
+/*     Observational Data" Contract No: W31P4Q-21-C-0007 */
+/*     - DARPA PAI project "Physics-Informed Machine Learning */
+/*     Methodologies" Contract No: HR0011-18-9-0033 */
+/*     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */
+/*     Framework for Space-Time Analysis of Process Dynamics" */
+/*     Contract No: HR0011-16-C-0116 */
+/*     Any opinions, findings and conclusions or recommendations */
+/*     expressed in this material are those of the author and */
+/*     do not necessarily reflect the views of the DARPA SBIR */
+/*     Program Office. */
+/* ============================================================ */
+/*     Distribution Statement A: */
+/*     Approved for Public Release, Distribution Unlimited. */
+/*     Cleared by DARPA on September 29, 2022 */
+/* ============================================================ */
+/* ...................................................................... */
+/*     Arguments */
+/*     ========= */
+/*     JOBS (input) CHARACTER*1 */
+/*     Determines whether the initial data snapshots are scaled */
+/*     by a diagonal matrix. The data snapshots are the columns */
+/*     of F. The leading N-1 columns of F are denoted X and the */
+/*     trailing N-1 columns are denoted Y. */
+/*     'S' :: The data snapshots matrices X and Y are multiplied */
+/*            with a diagonal matrix D so that X*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'C' :: The snapshots are scaled as with the 'S' option. */
+/*            If it is found that an i-th column of X is zero */
+/*            vector and the corresponding i-th column of Y is */
+/*            non-zero, then the i-th column of Y is set to */
+/*            zero and a warning flag is raised. */
+/*     'Y' :: The data snapshots matrices X and Y are multiplied */
+/*            by a diagonal matrix D so that Y*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'N' :: No data scaling. */
+/* ..... */
+/*     JOBZ (input) CHARACTER*1 */
+/*     Determines whether the eigenvectors (Koopman modes) will */
+/*     be computed. */
+/*     'V' :: The eigenvectors (Koopman modes) will be computed */
+/*            and returned in the matrix Z. */
+/*            See the description of Z. */
+/*     'F' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product Z*V, where Z */
+/*            is orthonormal and V contains the eigenvectors */
+/*            of the corresponding Rayleigh quotient. */
+/*            See the descriptions of F, V, Z. */
+/*     'Q' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product Q*Z, where Z */
+/*            contains the eigenvectors of the compression of the */
+/*            underlying discretized operator onto the span of */
+/*            the data snapshots. See the descriptions of F, V, Z. */
+/*            Q is from the initial QR factorization. */
+/*     'N' :: The eigenvectors are not computed. */
+/* ..... */
+/*     JOBR (input) CHARACTER*1 */
+/*     Determines whether to compute the residuals. */
+/*     'R' :: The residuals for the computed eigenpairs will */
+/*            be computed and stored in the array RES. */
+/*            See the description of RES. */
+/*            For this option to be legal, JOBZ must be 'V'. */
+/*     'N' :: The residuals are not computed. */
+/* ..... */
+/*     JOBQ (input) CHARACTER*1 */
+/*     Specifies whether to explicitly compute and return the */
+/*     orthogonal matrix from the QR factorization. */
+/*     'Q' :: The matrix Q of the QR factorization of the data */
+/*            snapshot matrix is computed and stored in the */
+/*            array F. See the description of F. */
+/*     'N' :: The matrix Q is not explicitly computed. */
+/* ..... */
+/*     JOBT (input) CHARACTER*1 */
+/*     Specifies whether to return the upper triangular factor */
+/*     from the QR factorization. */
+/*     'R' :: The matrix R of the QR factorization of the data */
+/*            snapshot matrix F is returned in the array Y. */
+/*            See the description of Y and Further details. */
+/*     'N' :: The matrix R is not returned. */
+/* ..... */
+/*     JOBF (input) CHARACTER*1 */
+/*     Specifies whether to store information needed for post- */
+/*     processing (e.g. computing refined Ritz vectors) */
+/*     'R' :: The matrix needed for the refinement of the Ritz */
+/*            vectors is computed and stored in the array B. */
+/*            See the description of B. */
+/*     'E' :: The unscaled eigenvectors of the Exact DMD are */
+/*            computed and returned in the array B. See the */
+/*            description of B. */
+/*     'N' :: No eigenvector refinement data is computed. */
+/*     To be useful on exit, this option needs JOBQ='Q'. */
+/* ..... */
+/*     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */
+/*     Allows for a selection of the SVD algorithm from the */
+/*     LAPACK library. */
+/*     1 :: DGESVD (the QR SVD algorithm) */
+/*     2 :: DGESDD (the Divide and Conquer algorithm; if enough */
+/*          workspace available, this is the fastest option) */
+/*     3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4 */
+/*          are the most accurate options) */
+/*     4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3 */
+/*          are the most accurate options) */
+/*     For the four methods above, a significant difference in */
+/*     the accuracy of small singular values is possible if */
+/*     the snapshots vary in norm so that X is severely */
+/*     ill-conditioned. If small (smaller than EPS*||X||) */
+/*     singular values are of interest and JOBS=='N',  then */
+/*     the options (3, 4) give the most accurate results, where */
+/*     the option 4 is slightly better and with stronger */
+/*     theoretical background. */
+/*     If JOBS=='S', i.e. the columns of X will be normalized, */
+/*     then all methods give nearly equally accurate results. */
+/* ..... */
+/*     M (input) INTEGER, M >= 0 */
+/*     The state space dimension (the number of rows of F). */
+/* ..... */
+/*     N (input) INTEGER, 0 <= N <= M */
+/*     The number of data snapshots from a single trajectory, */
+/*     taken at equidistant discrete times. This is the */
+/*     number of columns of F. */
+/* ..... */
+/*     F (input/output) REAL(KIND=WP) M-by-N array */
+/*     > On entry, */
+/*     the columns of F are the sequence of data snapshots */
+/*     from a single trajectory, taken at equidistant discrete */
+/*     times. It is assumed that the column norms of F are */
+/*     in the range of the normalized floating point numbers. */
+/*     < On exit, */
+/*     If JOBQ == 'Q', the array F contains the orthogonal */
+/*     matrix/factor of the QR factorization of the initial */
+/*     data snapshots matrix F. See the description of JOBQ. */
+/*     If JOBQ == 'N', the entries in F strictly below the main */
+/*     diagonal contain, column-wise, the information on the */
+/*     Householder vectors, as returned by DGEQRF. The */
+/*     remaining information to restore the orthogonal matrix */
+/*     of the initial QR factorization is stored in WORK(1:N). */
+/*     See the description of WORK. */
+/* ..... */
+/*     LDF (input) INTEGER, LDF >= M */
+/*     The leading dimension of the array F. */
+/* ..... */
+/*     X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array */
+/*     X is used as workspace to hold representations of the */
+/*     leading N-1 snapshots in the orthonormal basis computed */
+/*     in the QR factorization of F. */
+/*     On exit, the leading K columns of X contain the leading */
+/*     K left singular vectors of the above described content */
+/*     of X. To lift them to the space of the left singular */
+/*     vectors U(:,1:K)of the input data, pre-multiply with the */
+/*     Q factor from the initial QR factorization. */
+/*     See the descriptions of F, K, V  and Z. */
+/* ..... */
+/*     LDX (input) INTEGER, LDX >= N */
+/*     The leading dimension of the array X. */
+/* ..... */
+/*     Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array */
+/*     Y is used as workspace to hold representations of the */
+/*     trailing N-1 snapshots in the orthonormal basis computed */
+/*     in the QR factorization of F. */
+/*     On exit, */
+/*     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper */
+/*     triangular factor from the QR factorization of the data */
+/*     snapshot matrix F. */
+/* ..... */
+/*     LDY (input) INTEGER , LDY >= N */
+/*     The leading dimension of the array Y. */
+/* ..... */
+/*     NRNK (input) INTEGER */
+/*     Determines the mode how to compute the numerical rank, */
+/*     i.e. how to truncate small singular values of the input */
+/*     matrix X. On input, if */
+/*     NRNK = -1 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(1) */
+/*                  This option is recommended. */
+/*     NRNK = -2 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(i-1) */
+/*                  This option is included for R&D purposes. */
+/*                  It requires highly accurate SVD, which */
+/*                  may not be feasible. */
+/*     The numerical rank can be enforced by using positive */
+/*     value of NRNK as follows: */
+/*     0 < NRNK <= N-1 :: at most NRNK largest singular values */
+/*     will be used. If the number of the computed nonzero */
+/*     singular values is less than NRNK, then only those */
+/*     nonzero values will be used and the actually used */
+/*     dimension is less than NRNK. The actual number of */
+/*     the nonzero singular values is returned in the variable */
+/*     K. See the description of K. */
+/* ..... */
+/*     TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */
+/*     The tolerance for truncating small singular values. */
+/*     See the description of NRNK. */
+/* ..... */
+/*     K (output) INTEGER,  0 <= K <= N */
+/*     The dimension of the SVD/POD basis for the leading N-1 */
+/*     data snapshots (columns of F) and the number of the */
+/*     computed Ritz pairs. The value of K is determined */
+/*     according to the rule set by the parameters NRNK and */
+/*     TOL. See the descriptions of NRNK and TOL. */
+/* ..... */
+/*     REIG (output) REAL(KIND=WP) (N-1)-by-1 array */
+/*     The leading K (K<=N) entries of REIG contain */
+/*     the real parts of the computed eigenvalues */
+/*     REIG(1:K) + sqrt(-1)*IMEIG(1:K). */
+/*     See the descriptions of K, IMEIG, Z. */
+/* ..... */
+/*     IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array */
+/*     The leading K (K<N) entries of REIG contain */
+/*     the imaginary parts of the computed eigenvalues */
+/*     REIG(1:K) + sqrt(-1)*IMEIG(1:K). */
+/*     The eigenvalues are determined as follows: */
+/*     If IMEIG(i) == 0, then the corresponding eigenvalue is */
+/*     real, LAMBDA(i) = REIG(i). */
+/*     If IMEIG(i)>0, then the corresponding complex */
+/*     conjugate pair of eigenvalues reads */
+/*     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i) */
+/*     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) */
+/*     That is, complex conjugate pairs have consequtive */
+/*     indices (i,i+1), with the positive imaginary part */
+/*     listed first. */
+/*     See the descriptions of K, REIG, Z. */
+/* ..... */
+/*     Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array */
+/*     If JOBZ =='V' then */
+/*        Z contains real Ritz vectors as follows: */
+/*        If IMEIG(i)=0, then Z(:,i) is an eigenvector of */
+/*        the i-th Ritz value. */
+/*        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then */
+/*        [Z(:,i) Z(:,i+1)] span an invariant subspace and */
+/*        the Ritz values extracted from this subspace are */
+/*        REIG(i) + sqrt(-1)*IMEIG(i) and */
+/*        REIG(i) - sqrt(-1)*IMEIG(i). */
+/*        The corresponding eigenvectors are */
+/*        Z(:,i) + sqrt(-1)*Z(:,i+1) and */
+/*        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. */
+/*     If JOBZ == 'F', then the above descriptions hold for */
+/*     the columns of Z*V, where the columns of V are the */
+/*     eigenvectors of the K-by-K Rayleigh quotient, and Z is */
+/*     orthonormal. The columns of V are similarly structured: */
+/*     If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if */
+/*     IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and */
+/*                       Z*V(:,i)-sqrt(-1)*Z*V(:,i+1) */
+/*     are the eigenvectors of LAMBDA(i), LAMBDA(i+1). */
+/*     See the descriptions of REIG, IMEIG, X and V. */
+/* ..... */
+/*     LDZ (input) INTEGER , LDZ >= M */
+/*     The leading dimension of the array Z. */
+/* ..... */
+/*     RES (output) REAL(KIND=WP) (N-1)-by-1 array */
+/*     RES(1:K) contains the residuals for the K computed */
+/*     Ritz pairs. */
+/*     If LAMBDA(i) is real, then */
+/*        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. */
+/*     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair */
+/*     then */
+/*     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F */
+/*     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] */
+/*               [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. */
+/*     It holds that */
+/*     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2 */
+/*     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 */
+/*     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1) */
+/*           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1) */
+/*     See the description of Z. */
+/* ..... */
+/*     B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array. */
+/*     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can */
+/*     be used for computing the refined vectors; see further */
+/*     details in the provided references. */
+/*     If JOBF == 'E', B(1:N,1;K) contains */
+/*     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */
+/*     Exact DMD, up to scaling by the inverse eigenvalues. */
+/*     In both cases, the content of B can be lifted to the */
+/*     original dimension of the input data by pre-multiplying */
+/*     with the Q factor from the initial QR factorization. */
+/*     Here A denotes a compression of the underlying operator. */
+/*     See the descriptions of F and X. */
+/*     If JOBF =='N', then B is not referenced. */
+/* ..... */
+/*     LDB (input) INTEGER, LDB >= MIN(M,N) */
+/*     The leading dimension of the array B. */
+/* ..... */
+/*     V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array */
+/*     On exit, V(1:K,1:K) contains the K eigenvectors of */
+/*     the Rayleigh quotient. The eigenvectors of a complex */
+/*     conjugate pair of eigenvalues are returned in real form */
+/*     as explained in the description of Z. The Ritz vectors */
+/*     (returned in Z) are the product of X and V; see */
+/*     the descriptions of X and Z. */
+/* ..... */
+/*     LDV (input) INTEGER, LDV >= N-1 */
+/*     The leading dimension of the array V. */
+/* ..... */
+/*     S (output) REAL(KIND=WP) (N-1)-by-(N-1) array */
+/*     The array S(1:K,1:K) is used for the matrix Rayleigh */
+/*     quotient. This content is overwritten during */
+/*     the eigenvalue decomposition by DGEEV. */
+/*     See the description of K. */
+/* ..... */
+/*     LDS (input) INTEGER, LDS >= N-1 */
+/*     The leading dimension of the array S. */
+/* ..... */
+/*     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */
+/*     On exit, */
+/*     WORK(1:MIN(M,N)) contains the scalar factors of the */
+/*     elementary reflectors as returned by DGEQRF of the */
+/*     M-by-N input matrix F. */
+/*     WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of */
+/*     the input submatrix F(1:M,1:N-1). */
+/*     If the call to DGEDMDQ is only workspace query, then */
+/*     WORK(1) contains the minimal workspace length and */
+/*     WORK(2) is the optimal workspace length. Hence, the */
+/*     length of work is at least 2. */
+/*     See the description of LWORK. */
+/* ..... */
+/*     LWORK (input) INTEGER */
+/*     The minimal length of the  workspace vector WORK. */
+/*     LWORK is calculated as follows: */
+/*     Let MLWQR  = N (minimal workspace for DGEQRF[M,N]) */
+/*         MLWDMD = minimal workspace for DGEDMD (see the */
+/*                  description of LWORK in DGEDMD) for */
+/*                  snapshots of dimensions MIN(M,N)-by-(N-1) */
+/*         MLWMQR = N (minimal workspace for */
+/*                    DORMQR['L','N',M,N,N]) */
+/*         MLWGQR = N (minimal workspace for DORGQR[M,N,N]) */
+/*     Then */
+/*     LWORK = MAX(N+MLWQR, N+MLWDMD) */
+/*     is updated as follows: */
+/*        if   JOBZ == 'V' or JOBZ == 'F' THEN */
+/*             LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR ) */
+/*        if   JOBQ == 'Q' THEN */
+/*             LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR) */
+/*     If on entry LWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     IWORK (workspace/output) INTEGER LIWORK-by-1 array */
+/*     Workspace that is required only if WHTSVD equals */
+/*     2 , 3 or 4. (See the description of WHTSVD). */
+/*     If on entry LWORK =-1 or LIWORK=-1, then the */
+/*     minimal length of IWORK is computed and returned in */
+/*     IWORK(1). See the description of LIWORK. */
+/* ..... */
+/*     LIWORK (input) INTEGER */
+/*     The minimal length of the workspace vector IWORK. */
+/*     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */
+/*     Let M1=MIN(M,N), N1=N-1. Then */
+/*     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) */
+/*     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) */
+/*     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) */
+/*     If on entry LIWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     INFO (output) INTEGER */
+/*     -i < 0 :: On entry, the i-th argument had an */
+/*               illegal value */
+/*        = 0 :: Successful return. */
+/*        = 1 :: Void input. Quick exit (M=0 or N=0). */
+/*        = 2 :: The SVD computation of X did not converge. */
+/*               Suggestion: Check the input data and/or */
+/*               repeat with different WHTSVD. */
+/*        = 3 :: The computation of the eigenvalues did not */
+/*               converge. */
+/*        = 4 :: If data scaling was requested on input and */
+/*               the procedure found inconsistency in the data */
+/*               such that for some column index i, */
+/*               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */
+/*               to zero if JOBS=='C'. The computation proceeds */
+/*               with original or modified data and warning */
+/*               flag is set with INFO=4. */
+/* ............................................................. */
+/* ............................................................. */
+/*     Parameters */
+/*     ~~~~~~~~~~ */
+
+/*     Local scalars */
+/*     ~~~~~~~~~~~~~ */
+
+/*     Local array */
+/*     ~~~~~~~~~~~ */
+
+/*     External functions (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~ */
+
+/*     External subroutines (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     External subroutines */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     Intrinsic functions */
+/*     ~~~~~~~~~~~~~~~~~~~ */
+/* .......................................................... */
+    /* Parameter adjustments */
+    f_dim1 = *ldf;
+    f_offset = 1 + f_dim1 * 1;
+    f -= f_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --reig;
+    --imeig;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --res;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1 * 1;
+    s -= s_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    zero = 0.f;
+    one = 1.f;
+
+/*    Test the input arguments */
+    wntres = lsame_(jobr, "R");
+    sccolx = lsame_(jobs, "S") || lsame_(jobs, "C");
+    sccoly = lsame_(jobs, "Y");
+    wntvec = lsame_(jobz, "V");
+    wntvcf = lsame_(jobz, "F");
+    wntvcq = lsame_(jobz, "Q");
+    wntref = lsame_(jobf, "R");
+    wntex = lsame_(jobf, "E");
+    wantq = lsame_(jobq, "Q");
+    wnttrf = lsame_(jobt, "R");
+    minmn = f2cmin(*m,*n);
+    *info = 0;
+    lquery = *lwork == -1 || *liwork == -1;
+
+    if (! (sccolx || sccoly || lsame_(jobs, "N"))) {
+	*info = -1;
+    } else if (! (wntvec || wntvcf || wntvcq || lsame_(jobz, "N"))) {
+	*info = -2;
+    } else if (! (wntres || lsame_(jobr, "N")) || 
+	    wntres && lsame_(jobz, "N")) {
+	*info = -3;
+    } else if (! (wantq || lsame_(jobq, "N"))) {
+	*info = -4;
+    } else if (! (wnttrf || lsame_(jobt, "N"))) {
+	*info = -5;
+    } else if (! (wntref || wntex || lsame_(jobf, "N")))
+	     {
+	*info = -6;
+    } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == 
+	    4)) {
+	*info = -7;
+    } else if (*m < 0) {
+	*info = -8;
+    } else if (*n < 0 || *n > *m + 1) {
+	*info = -9;
+    } else if (*ldf < *m) {
+	*info = -11;
+    } else if (*ldx < minmn) {
+	*info = -13;
+    } else if (*ldy < minmn) {
+	*info = -15;
+    } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) {
+	*info = -16;
+    } else if (*tol < zero || *tol >= one) {
+	*info = -17;
+    } else if (*ldz < *m) {
+	*info = -22;
+    } else if ((wntref || wntex) && *ldb < minmn) {
+	*info = -25;
+    } else if (*ldv < *n - 1) {
+	*info = -27;
+    } else if (*lds < *n - 1) {
+	*info = -29;
+    }
+
+    if (wntvec || wntvcf || wntvcq) {
+	*(unsigned char *)jobvl = 'V';
+    } else {
+	*(unsigned char *)jobvl = 'N';
+    }
+    if (*info == 0) {
+/* Compute the minimal and the optimal workspace */
+/* requirements. Simulate running the code and */
+/* determine minimal and optimal sizes of the */
+/* workspace at any moment of the run. */
+	if (*n == 0 || *n == 1) {
+/* All output except K is void. INFO=1 signals */
+/* the void input. In case of a workspace query, */
+/* the minimal workspace lengths are returned. */
+	    if (lquery) {
+		iwork[1] = 1;
+		work[1] = 2.;
+		work[2] = 2.;
+	    } else {
+		*k = 0;
+	    }
+	    *info = 1;
+	    return 0;
+	}
+	mlwqr = f2cmax(1,*n);
+/* Minimal workspace length for DGEQRF. */
+	mlwork = minmn + mlwqr;
+	if (lquery) {
+	    dgeqrf_(m, n, &f[f_offset], ldf, &work[1], rdummy, &c_n1, &info1);
+	    olwqr = (integer) rdummy[0];
+	    olwork = f2cmin(*m,*n) + olwqr;
+	}
+	i__1 = *n - 1;
+	dgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], 
+		ldx, &y[y_offset], ldy, nrnk, tol, k, &reig[1], &imeig[1], &
+		z__[z_offset], ldz, &res[1], &b[b_offset], ldb, &v[v_offset], 
+		ldv, &s[s_offset], lds, &work[1], &c_n1, &iwork[1], liwork, &
+		info1);
+	mlwdmd = (integer) work[1];
+/* Computing MAX */
+	i__1 = mlwork, i__2 = minmn + mlwdmd;
+	mlwork = f2cmax(i__1,i__2);
+	iminwr = iwork[1];
+	if (lquery) {
+	    olwdmd = (integer) work[2];
+/* Computing MAX */
+	    i__1 = olwork, i__2 = minmn + olwdmd;
+	    olwork = f2cmax(i__1,i__2);
+	}
+	if (wntvec || wntvcf) {
+	    mlwmqr = f2cmax(1,*n);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = minmn + *n - 1 + mlwmqr;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		dormqr_("L", "N", m, n, &minmn, &f[f_offset], ldf, &work[1], &
+			z__[z_offset], ldz, &work[1], &c_n1, &info1);
+		olwmqr = (integer) work[1];
+/* Computing MAX */
+		i__1 = olwork, i__2 = minmn + *n - 1 + olwmqr;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+	if (wantq) {
+	    mlwgqr = *n;
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = minmn + *n - 1 + mlwgqr;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		dorgqr_(m, &minmn, &minmn, &f[f_offset], ldf, &work[1], &work[
+			1], &c_n1, &info1);
+		olwgqr = (integer) work[1];
+/* Computing MAX */
+		i__1 = olwork, i__2 = minmn + *n - 1 + olwgqr;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+	iminwr = f2cmax(1,iminwr);
+	mlwork = f2cmax(2,mlwork);
+	if (*lwork < mlwork && ! lquery) {
+	    *info = -31;
+	}
+	if (*liwork < iminwr && ! lquery) {
+	    *info = -33;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEDMDQ", &i__1);
+	return 0;
+    } else if (lquery) {
+/*     Return minimal and optimal workspace sizes */
+	iwork[1] = iminwr;
+	work[1] = (doublereal) mlwork;
+	work[2] = (doublereal) olwork;
+	return 0;
+    }
+/* ..... */
+/*     Initial QR factorization that is used to represent the */
+/*     snapshots as elements of lower dimensional subspace. */
+/*     For large scale computation with M >>N , at this place */
+/*     one can use an out of core QRF. */
+
+    i__1 = *lwork - minmn;
+    dgeqrf_(m, n, &f[f_offset], ldf, &work[1], &work[minmn + 1], &i__1, &
+	    info1);
+
+/*     Define X and Y as the snapshots representations in the */
+/*     orthogonal basis computed in the QR factorization. */
+/*     X corresponds to the leading N-1 and Y to the trailing */
+/*     N-1 snapshots. */
+    i__1 = *n - 1;
+    dlaset_("L", &minmn, &i__1, &zero, &zero, &x[x_offset], ldx);
+    i__1 = *n - 1;
+    dlacpy_("U", &minmn, &i__1, &f[f_offset], ldf, &x[x_offset], ldx);
+    i__1 = *n - 1;
+    dlacpy_("A", &minmn, &i__1, &f[(f_dim1 << 1) + 1], ldf, &y[y_offset], ldy);
+    if (*m >= 3) {
+	i__1 = minmn - 2;
+	i__2 = *n - 2;
+	dlaset_("L", &i__1, &i__2, &zero, &zero, &y[y_dim1 + 3], ldy);
+    }
+
+/*     Compute the DMD of the projected snapshot pairs (X,Y) */
+    i__1 = *n - 1;
+    i__2 = *lwork - minmn;
+    dgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], ldx,
+	     &y[y_offset], ldy, nrnk, tol, k, &reig[1], &imeig[1], &z__[
+	    z_offset], ldz, &res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[
+	    s_offset], lds, &work[minmn + 1], &i__2, &iwork[1], liwork, &
+	    info1);
+    if (info1 == 2 || info1 == 3) {
+/* Return with error code. See DGEDMD for details. */
+	*info = info1;
+	return 0;
+    } else {
+	*info = info1;
+    }
+
+/*     The Ritz vectors (Koopman modes) can be explicitly */
+/*     formed or returned in factored form. */
+    if (wntvec) {
+/* Compute the eigenvectors explicitly. */
+	if (*m > minmn) {
+	    i__1 = *m - minmn;
+	    dlaset_("A", &i__1, k, &zero, &zero, &z__[minmn + 1 + z_dim1], 
+		    ldz);
+	}
+	i__1 = *lwork - (minmn + *n - 1);
+	dormqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &work[1], &z__[
+		z_offset], ldz, &work[minmn + *n], &i__1, &info1);
+    } else if (wntvcf) {
+/*   Return the Ritz vectors (eigenvectors) in factored */
+/*   form Z*V, where Z contains orthonormal matrix (the */
+/*   product of Q from the initial QR factorization and */
+/*   the SVD/POD_basis returned by DGEDMD in X) and the */
+/*   second factor (the eigenvectors of the Rayleigh */
+/*   quotient) is in the array V, as returned by DGEDMD. */
+	dlacpy_("A", n, k, &x[x_offset], ldx, &z__[z_offset], ldz);
+	if (*m > *n) {
+	    i__1 = *m - *n;
+	    dlaset_("A", &i__1, k, &zero, &zero, &z__[*n + 1 + z_dim1], ldz);
+	}
+	i__1 = *lwork - (minmn + *n - 1);
+	dormqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &work[1], &z__[
+		z_offset], ldz, &work[minmn + *n], &i__1, &info1);
+    }
+
+/*     Some optional output variables: */
+
+/*     The upper triangular factor R in the initial QR */
+/*     factorization is optionally returned in the array Y. */
+/*     This is useful if this call to DGEDMDQ is to be */
+/*     followed by a streaming DMD that is implemented in a */
+/*     QR compressed form. */
+    if (wnttrf) {
+/* Return the upper triangular R in Y */
+	dlaset_("A", &minmn, n, &zero, &zero, &y[y_offset], ldy);
+	dlacpy_("U", &minmn, n, &f[f_offset], ldf, &y[y_offset], ldy);
+    }
+
+/*     The orthonormal/orthogonal factor Q in the initial QR */
+/*     factorization is optionally returned in the array F. */
+/*     Same as with the triangular factor above, this is */
+/*     useful in a streaming DMD. */
+    if (wantq) {
+/* Q overwrites F */
+	i__1 = *lwork - (minmn + *n - 1);
+	dorgqr_(m, &minmn, &minmn, &f[f_offset], ldf, &work[1], &work[minmn + 
+		*n], &i__1, &info1);
+    }
+
+    return 0;
+
+} /* dgedmdq_ */
+
diff --git a/lapack-netlib/SRC/sgedmd.c b/lapack-netlib/SRC/sgedmd.c
index 447b23014..c8f3a5964 100644
--- a/lapack-netlib/SRC/sgedmd.c
+++ b/lapack-netlib/SRC/sgedmd.c
@@ -509,3 +509,1238 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 
 
 
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c__2 = 2;
+
+/* Subroutine */ int sgedmd_(char *jobs, char *jobz, char *jobr, char *jobf, 
+	integer *whtsvd, integer *m, integer *n, real *x, integer *ldx, real *
+	y, integer *ldy, integer *nrnk, real *tol, integer *k, real *reig, 
+	real *imeig, real *z__, integer *ldz, real *res, real *b, integer *
+	ldb, real *w, integer *ldw, real *s, integer *lds, real *work, 
+	integer *lwork, integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer x_dim1, x_offset, y_dim1, y_offset, z_dim1, z_offset, b_dim1, 
+	    b_offset, w_dim1, w_offset, s_dim1, s_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    real zero, ssum;
+    integer info1, info2;
+    real xscl1, xscl2;
+    extern real snrm2_(integer *, real *, integer *);
+    integer i__, j;
+    real scale;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    logical badxy;
+    real small;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *), sgeev_(char *, char *, 
+	    integer *, real *, integer *, real *, real *, real *, integer *, 
+	    real *, integer *, real *, integer *, integer *);
+    char jobzl[1];
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
+	    real *, integer *);
+    logical wntex;
+    real ab[4]	/* was [2][2] */;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int sgesdd_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *, integer *), xerbla_(char *, integer 
+	    *);
+    char t_or_n__[1];
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    logical sccolx, sccoly;
+    extern logical sisnan_(real *);
+    extern /* Subroutine */ int sgesvd_(char *, char *, integer *, integer *, 
+	    real *, integer *, real *, real *, integer *, real *, integer *, 
+	    real *, integer *, integer *);
+    integer lwrsdd, mwrsdd;
+    extern /* Subroutine */ int sgejsv_(char *, char *, char *, char *, char *
+	    , char *, integer *, integer *, real *, integer *, real *, real *,
+	     integer *, real *, integer *, real *, integer *, integer *, 
+	    integer *), 
+	    slacpy_(char *, integer *, integer *, real *, integer *, real *, 
+	    integer *);
+    integer iminwr;
+    logical wntref, wntvec;
+    real rootsc;
+    integer lwrkev, mlwork, mwrkev, numrnk, olwork;
+    real rdummy[2];
+    integer lwrsvd, mwrsvd;
+    logical lquery, wntres;
+    char jsvopt[1];
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 
+	    real *), mecago_();
+    integer mwrsvj, lwrsvq, mwrsvq;
+    real rdummy2[2], ofl, one;
+    extern /* Subroutine */ int sgesvdq_(char *, char *, char *, char *, char 
+	    *, integer *, integer *, real *, integer *, real *, real *, 
+	    integer *, real *, integer *, integer *, integer *, integer *, 
+	    real *, integer *, real *, integer *, integer *);
+
+/* March 2023 */
+/* ..... */
+/*      USE                   iso_fortran_env */
+/*      INTEGER, PARAMETER :: WP = real32 */
+/* ..... */
+/*     Scalar arguments */
+/*     Array arguments */
+/* ............................................................ */
+/*     Purpose */
+/*     ======= */
+/*     SGEDMD computes the Dynamic Mode Decomposition (DMD) for */
+/*     a pair of data snapshot matrices. For the input matrices */
+/*     X and Y such that Y = A*X with an unaccessible matrix */
+/*     A, SGEDMD computes a certain number of Ritz pairs of A using */
+/*     the standard Rayleigh-Ritz extraction from a subspace of */
+/*     range(X) that is determined using the leading left singular */
+/*     vectors of X. Optionally, SGEDMD returns the residuals */
+/*     of the computed Ritz pairs, the information needed for */
+/*     a refinement of the Ritz vectors, or the eigenvectors of */
+/*     the Exact DMD. */
+/*     For further details see the references listed */
+/*     below. For more details of the implementation see [3]. */
+
+/*     References */
+/*     ========== */
+/*     [1] P. Schmid: Dynamic mode decomposition of numerical */
+/*         and experimental data, */
+/*         Journal of Fluid Mechanics 656, 5-28, 2010. */
+/*     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */
+/*         decompositions: analysis and enhancements, */
+/*         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */
+/*     [3] Z. Drmac: A LAPACK implementation of the Dynamic */
+/*         Mode Decomposition I. Technical report. AIMDyn Inc. */
+/*         and LAPACK Working Note 298. */
+/*     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */
+/*         Brunton, N. Kutz: On Dynamic Mode Decomposition: */
+/*         Theory and Applications, Journal of Computational */
+/*         Dynamics 1(2), 391 -421, 2014. */
+
+/* ...................................................................... */
+/*     Developed and supported by: */
+/*     =========================== */
+/*     Developed and coded by Zlatko Drmac, Faculty of Science, */
+/*     University of Zagreb;  drmac@math.hr */
+/*     In cooperation with */
+/*     AIMdyn Inc., Santa Barbara, CA. */
+/*     and supported by */
+/*     - DARPA SBIR project "Koopman Operator-Based Forecasting */
+/*     for Nonstationary Processes from Near-Term, Limited */
+/*     Observational Data" Contract No: W31P4Q-21-C-0007 */
+/*     - DARPA PAI project "Physics-Informed Machine Learning */
+/*     Methodologies" Contract No: HR0011-18-9-0033 */
+/*     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */
+/*     Framework for Space-Time Analysis of Process Dynamics" */
+/*     Contract No: HR0011-16-C-0116 */
+/*     Any opinions, findings and conclusions or recommendations */
+/*     expressed in this material are those of the author and */
+/*     do not necessarily reflect the views of the DARPA SBIR */
+/*     Program Office */
+/* ============================================================ */
+/*     Distribution Statement A: */
+/*     Approved for Public Release, Distribution Unlimited. */
+/*     Cleared by DARPA on September 29, 2022 */
+/* ============================================================ */
+/* ...................................................................... */
+/*     Arguments */
+/*     ========= */
+/*     JOBS (input) CHARACTER*1 */
+/*     Determines whether the initial data snapshots are scaled */
+/*     by a diagonal matrix. */
+/*     'S' :: The data snapshots matrices X and Y are multiplied */
+/*            with a diagonal matrix D so that X*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'C' :: The snapshots are scaled as with the 'S' option. */
+/*            If it is found that an i-th column of X is zero */
+/*            vector and the corresponding i-th column of Y is */
+/*            non-zero, then the i-th column of Y is set to */
+/*            zero and a warning flag is raised. */
+/*     'Y' :: The data snapshots matrices X and Y are multiplied */
+/*            by a diagonal matrix D so that Y*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'N' :: No data scaling. */
+/* ..... */
+/*     JOBZ (input) CHARACTER*1 */
+/*     Determines whether the eigenvectors (Koopman modes) will */
+/*     be computed. */
+/*     'V' :: The eigenvectors (Koopman modes) will be computed */
+/*            and returned in the matrix Z. */
+/*            See the description of Z. */
+/*     'F' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product X(:,1:K)*W, where X */
+/*            contains a POD basis (leading left singular vectors */
+/*            of the data matrix X) and W contains the eigenvectors */
+/*            of the corresponding Rayleigh quotient. */
+/*            See the descriptions of K, X, W, Z. */
+/*     'N' :: The eigenvectors are not computed. */
+/* ..... */
+/*     JOBR (input) CHARACTER*1 */
+/*     Determines whether to compute the residuals. */
+/*     'R' :: The residuals for the computed eigenpairs will be */
+/*            computed and stored in the array RES. */
+/*            See the description of RES. */
+/*            For this option to be legal, JOBZ must be 'V'. */
+/*     'N' :: The residuals are not computed. */
+/* ..... */
+/*     JOBF (input) CHARACTER*1 */
+/*     Specifies whether to store information needed for post- */
+/*     processing (e.g. computing refined Ritz vectors) */
+/*     'R' :: The matrix needed for the refinement of the Ritz */
+/*            vectors is computed and stored in the array B. */
+/*            See the description of B. */
+/*     'E' :: The unscaled eigenvectors of the Exact DMD are */
+/*            computed and returned in the array B. See the */
+/*            description of B. */
+/*     'N' :: No eigenvector refinement data is computed. */
+/* ..... */
+/*     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */
+/*     Allows for a selection of the SVD algorithm from the */
+/*     LAPACK library. */
+/*     1 :: SGESVD (the QR SVD algorithm) */
+/*     2 :: SGESDD (the Divide and Conquer algorithm; if enough */
+/*          workspace available, this is the fastest option) */
+/*     3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4 */
+/*          are the most accurate options) */
+/*     4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3 */
+/*          are the most accurate options) */
+/*     For the four methods above, a significant difference in */
+/*     the accuracy of small singular values is possible if */
+/*     the snapshots vary in norm so that X is severely */
+/*     ill-conditioned. If small (smaller than EPS*||X||) */
+/*     singular values are of interest and JOBS=='N',  then */
+/*     the options (3, 4) give the most accurate results, where */
+/*     the option 4 is slightly better and with stronger */
+/*     theoretical background. */
+/*     If JOBS=='S', i.e. the columns of X will be normalized, */
+/*     then all methods give nearly equally accurate results. */
+/* ..... */
+/*     M (input) INTEGER, M>= 0 */
+/*     The state space dimension (the row dimension of X, Y). */
+/* ..... */
+/*     N (input) INTEGER, 0 <= N <= M */
+/*     The number of data snapshot pairs */
+/*     (the number of columns of X and Y). */
+/* ..... */
+/*     X (input/output) REAL(KIND=WP) M-by-N array */
+/*     > On entry, X contains the data snapshot matrix X. It is */
+/*     assumed that the column norms of X are in the range of */
+/*     the normalized floating point numbers. */
+/*     < On exit, the leading K columns of X contain a POD basis, */
+/*     i.e. the leading K left singular vectors of the input */
+/*     data matrix X, U(:,1:K). All N columns of X contain all */
+/*     left singular vectors of the input matrix X. */
+/*     See the descriptions of K, Z and W. */
+/* ..... */
+/*     LDX (input) INTEGER, LDX >= M */
+/*     The leading dimension of the array X. */
+/* ..... */
+/*     Y (input/workspace/output) REAL(KIND=WP) M-by-N array */
+/*     > On entry, Y contains the data snapshot matrix Y */
+/*     < On exit, */
+/*     If JOBR == 'R', the leading K columns of Y  contain */
+/*     the residual vectors for the computed Ritz pairs. */
+/*     See the description of RES. */
+/*     If JOBR == 'N', Y contains the original input data, */
+/*                     scaled according to the value of JOBS. */
+/* ..... */
+/*     LDY (input) INTEGER , LDY >= M */
+/*     The leading dimension of the array Y. */
+/* ..... */
+/*     NRNK (input) INTEGER */
+/*     Determines the mode how to compute the numerical rank, */
+/*     i.e. how to truncate small singular values of the input */
+/*     matrix X. On input, if */
+/*     NRNK = -1 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(1) */
+/*                  This option is recommended. */
+/*     NRNK = -2 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(i-1) */
+/*                  This option is included for R&D purposes. */
+/*                  It requires highly accurate SVD, which */
+/*                  may not be feasible. */
+/*     The numerical rank can be enforced by using positive */
+/*     value of NRNK as follows: */
+/*     0 < NRNK <= N :: at most NRNK largest singular values */
+/*     will be used. If the number of the computed nonzero */
+/*     singular values is less than NRNK, then only those */
+/*     nonzero values will be used and the actually used */
+/*     dimension is less than NRNK. The actual number of */
+/*     the nonzero singular values is returned in the variable */
+/*     K. See the descriptions of TOL and  K. */
+/* ..... */
+/*     TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */
+/*     The tolerance for truncating small singular values. */
+/*     See the description of NRNK. */
+/* ..... */
+/*     K (output) INTEGER,  0 <= K <= N */
+/*     The dimension of the POD basis for the data snapshot */
+/*     matrix X and the number of the computed Ritz pairs. */
+/*     The value of K is determined according to the rule set */
+/*     by the parameters NRNK and TOL. */
+/*     See the descriptions of NRNK and TOL. */
+/* ..... */
+/*     REIG (output) REAL(KIND=WP) N-by-1 array */
+/*     The leading K (K<=N) entries of REIG contain */
+/*     the real parts of the computed eigenvalues */
+/*     REIG(1:K) + sqrt(-1)*IMEIG(1:K). */
+/*     See the descriptions of K, IMEIG, and Z. */
+/* ..... */
+/*     IMEIG (output) REAL(KIND=WP) N-by-1 array */
+/*     The leading K (K<=N) entries of IMEIG contain */
+/*     the imaginary parts of the computed eigenvalues */
+/*     REIG(1:K) + sqrt(-1)*IMEIG(1:K). */
+/*     The eigenvalues are determined as follows: */
+/*     If IMEIG(i) == 0, then the corresponding eigenvalue is */
+/*     real, LAMBDA(i) = REIG(i). */
+/*     If IMEIG(i)>0, then the corresponding complex */
+/*     conjugate pair of eigenvalues reads */
+/*     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i) */
+/*     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) */
+/*     That is, complex conjugate pairs have consecutive */
+/*     indices (i,i+1), with the positive imaginary part */
+/*     listed first. */
+/*     See the descriptions of K, REIG, and Z. */
+/* ..... */
+/*     Z (workspace/output) REAL(KIND=WP)  M-by-N array */
+/*     If JOBZ =='V' then */
+/*        Z contains real Ritz vectors as follows: */
+/*        If IMEIG(i)=0, then Z(:,i) is an eigenvector of */
+/*        the i-th Ritz value; ||Z(:,i)||_2=1. */
+/*        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then */
+/*        [Z(:,i) Z(:,i+1)] span an invariant subspace and */
+/*        the Ritz values extracted from this subspace are */
+/*        REIG(i) + sqrt(-1)*IMEIG(i) and */
+/*        REIG(i) - sqrt(-1)*IMEIG(i). */
+/*        The corresponding eigenvectors are */
+/*        Z(:,i) + sqrt(-1)*Z(:,i+1) and */
+/*        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. */
+/*        || Z(:,i:i+1)||_F = 1. */
+/*     If JOBZ == 'F', then the above descriptions hold for */
+/*     the columns of X(:,1:K)*W(1:K,1:K), where the columns */
+/*     of W(1:k,1:K) are the computed eigenvectors of the */
+/*     K-by-K Rayleigh quotient. The columns of W(1:K,1:K) */
+/*     are similarly structured: If IMEIG(i) == 0 then */
+/*     X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0 */
+/*     then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and */
+/*          X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1) */
+/*     are the eigenvectors of LAMBDA(i), LAMBDA(i+1). */
+/*     See the descriptions of REIG, IMEIG, X and W. */
+/* ..... */
+/*     LDZ (input) INTEGER , LDZ >= M */
+/*     The leading dimension of the array Z. */
+/* ..... */
+/*     RES (output) REAL(KIND=WP) N-by-1 array */
+/*     RES(1:K) contains the residuals for the K computed */
+/*     Ritz pairs. */
+/*     If LAMBDA(i) is real, then */
+/*        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. */
+/*     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair */
+/*     then */
+/*     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F */
+/*     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] */
+/*               [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. */
+/*     It holds that */
+/*     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2 */
+/*     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 */
+/*     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1) */
+/*           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1) */
+/*     See the description of REIG, IMEIG and Z. */
+/* ..... */
+/*     B (output) REAL(KIND=WP)  M-by-N array. */
+/*     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can */
+/*     be used for computing the refined vectors; see further */
+/*     details in the provided references. */
+/*     If JOBF == 'E', B(1:M,1;K) contains */
+/*     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */
+/*     Exact DMD, up to scaling by the inverse eigenvalues. */
+/*     If JOBF =='N', then B is not referenced. */
+/*     See the descriptions of X, W, K. */
+/* ..... */
+/*     LDB (input) INTEGER, LDB >= M */
+/*     The leading dimension of the array B. */
+/* ..... */
+/*     W (workspace/output) REAL(KIND=WP) N-by-N array */
+/*     On exit, W(1:K,1:K) contains the K computed */
+/*     eigenvectors of the matrix Rayleigh quotient (real and */
+/*     imaginary parts for each complex conjugate pair of the */
+/*     eigenvalues). The Ritz vectors (returned in Z) are the */
+/*     product of X (containing a POD basis for the input */
+/*     matrix X) and W. See the descriptions of K, S, X and Z. */
+/*     W is also used as a workspace to temporarily store the */
+/*     left singular vectors of X. */
+/* ..... */
+/*     LDW (input) INTEGER, LDW >= N */
+/*     The leading dimension of the array W. */
+/* ..... */
+/*     S (workspace/output) REAL(KIND=WP) N-by-N array */
+/*     The array S(1:K,1:K) is used for the matrix Rayleigh */
+/*     quotient. This content is overwritten during */
+/*     the eigenvalue decomposition by SGEEV. */
+/*     See the description of K. */
+/* ..... */
+/*     LDS (input) INTEGER, LDS >= N */
+/*     The leading dimension of the array S. */
+/* ..... */
+/*     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */
+/*     On exit, WORK(1:N) contains the singular values of */
+/*     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). */
+/*     If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain */
+/*     scaling factor WORK(N+2)/WORK(N+1) used to scale X */
+/*     and Y to avoid overflow in the SVD of X. */
+/*     This may be of interest if the scaling option is off */
+/*     and as many as possible smallest eigenvalues are */
+/*     desired to the highest feasible accuracy. */
+/*     If the call to SGEDMD is only workspace query, then */
+/*     WORK(1) contains the minimal workspace length and */
+/*     WORK(2) is the optimal workspace length. Hence, the */
+/*     length of work is at least 2. */
+/*     See the description of LWORK. */
+/* ..... */
+/*     LWORK (input) INTEGER */
+/*     The minimal length of the workspace vector WORK. */
+/*     LWORK is calculated as follows: */
+/*     If WHTSVD == 1 :: */
+/*        If JOBZ == 'V', then */
+/*        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)). */
+/*        If JOBZ == 'N'  then */
+/*        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)). */
+/*        Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal */
+/*        workspace length of SGESVD. */
+/*     If WHTSVD == 2 :: */
+/*        If JOBZ == 'V', then */
+/*        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)) */
+/*        If JOBZ == 'N', then */
+/*        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)) */
+/*        Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the */
+/*        minimal workspace length of SGESDD. */
+/*     If WHTSVD == 3 :: */
+/*        If JOBZ == 'V', then */
+/*        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) */
+/*        If JOBZ == 'N', then */
+/*        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) */
+/*        Here LWORK_SVD = N+M+MAX(3*N+1, */
+/*                        MAX(1,3*N+M,5*N),MAX(1,N)) */
+/*        is the minimal workspace length of SGESVDQ. */
+/*     If WHTSVD == 4 :: */
+/*        If JOBZ == 'V', then */
+/*        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N)) */
+/*        If JOBZ == 'N', then */
+/*        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N)) */
+/*        Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the */
+/*        minimal workspace length of SGEJSV. */
+/*     The above expressions are not simplified in order to */
+/*     make the usage of WORK more transparent, and for */
+/*     easier checking. In any case, LWORK >= 2. */
+/*     If on entry LWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     IWORK (workspace/output) INTEGER LIWORK-by-1 array */
+/*     Workspace that is required only if WHTSVD equals */
+/*     2 , 3 or 4. (See the description of WHTSVD). */
+/*     If on entry LWORK =-1 or LIWORK=-1, then the */
+/*     minimal length of IWORK is computed and returned in */
+/*     IWORK(1). See the description of LIWORK. */
+/* ..... */
+/*     LIWORK (input) INTEGER */
+/*     The minimal length of the workspace vector IWORK. */
+/*     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */
+/*     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */
+/*     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */
+/*     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */
+/*     If on entry LIWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     INFO (output) INTEGER */
+/*     -i < 0 :: On entry, the i-th argument had an */
+/*               illegal value */
+/*        = 0 :: Successful return. */
+/*        = 1 :: Void input. Quick exit (M=0 or N=0). */
+/*        = 2 :: The SVD computation of X did not converge. */
+/*               Suggestion: Check the input data and/or */
+/*               repeat with different WHTSVD. */
+/*        = 3 :: The computation of the eigenvalues did not */
+/*               converge. */
+/*        = 4 :: If data scaling was requested on input and */
+/*               the procedure found inconsistency in the data */
+/*               such that for some column index i, */
+/*               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */
+/*               to zero if JOBS=='C'. The computation proceeds */
+/*               with original or modified data and warning */
+/*               flag is set with INFO=4. */
+/* ............................................................. */
+/* ............................................................. */
+/*     Parameters */
+/*     ~~~~~~~~~~ */
+/*     Local scalars */
+/*     ~~~~~~~~~~~~~ */
+/*     Local arrays */
+/*     ~~~~~~~~~~~~ */
+/*     External functions (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~ */
+/*     External subroutines (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     Intrinsic functions */
+/*     ~~~~~~~~~~~~~~~~~~~ */
+/* ............................................................ */
+    /* Parameter adjustments */
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --reig;
+    --imeig;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --res;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1 * 1;
+    w -= w_offset;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1 * 1;
+    s -= s_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    one = 1.f;
+    zero = 0.f;
+
+/*    Test the input arguments */
+
+    wntres = lsame_(jobr, "R");
+    sccolx = lsame_(jobs, "S") || lsame_(jobs, "C");
+    sccoly = lsame_(jobs, "Y");
+    wntvec = lsame_(jobz, "V");
+    wntref = lsame_(jobf, "R");
+    wntex = lsame_(jobf, "E");
+    *info = 0;
+    lquery = *lwork == -1 || *liwork == -1;
+
+    if (! (sccolx || sccoly || lsame_(jobs, "N"))) {
+	*info = -1;
+    } else if (! (wntvec || lsame_(jobz, "N") || lsame_(
+	    jobz, "F"))) {
+	*info = -2;
+    } else if (! (wntres || lsame_(jobr, "N")) || 
+	    wntres && ! wntvec) {
+	*info = -3;
+    } else if (! (wntref || wntex || lsame_(jobf, "N")))
+	     {
+	*info = -4;
+    } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == 
+	    4)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (*n < 0 || *n > *m) {
+	*info = -7;
+    } else if (*ldx < *m) {
+	*info = -9;
+    } else if (*ldy < *m) {
+	*info = -11;
+    } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) {
+	*info = -12;
+    } else if (*tol < zero || *tol >= one) {
+	*info = -13;
+    } else if (*ldz < *m) {
+	*info = -18;
+    } else if ((wntref || wntex) && *ldb < *m) {
+	*info = -21;
+    } else if (*ldw < *n) {
+	*info = -23;
+    } else if (*lds < *n) {
+	*info = -25;
+    }
+
+    if (*info == 0) {
+/* Compute the minimal and the optimal workspace */
+/* requirements. Simulate running the code and */
+/* determine minimal and optimal sizes of the */
+/* workspace at any moment of the run. */
+	if (*n == 0) {
+/* Quick return. All output except K is void. */
+/* INFO=1 signals the void input. */
+/* In case of a workspace query, the default */
+/* minimal workspace lengths are returned. */
+	    if (lquery) {
+		iwork[1] = 1;
+		work[1] = 2.f;
+		work[2] = 2.f;
+	    } else {
+		*k = 0;
+	    }
+	    *info = 1;
+	    return 0;
+	}
+	mlwork = f2cmax(2,*n);
+	olwork = f2cmax(2,*n);
+	iminwr = 1;
+/*         SELECT CASE ( WHTSVD ) */
+	if (*whtsvd == 1) {
+/* The following is specified as the minimal */
+/* length of WORK in the definition of SGESVD: */
+/* MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) */
+/* Computing MAX */
+	    i__1 = 1, i__2 = f2cmin(*m,*n) * 3 + f2cmax(*m,*n), i__1 = f2cmax(i__1,
+		    i__2), i__2 = f2cmin(*m,*n) * 5;
+	    mwrsvd = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = *n + mwrsvd;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		sgesvd_("O", "S", m, n, &x[x_offset], ldx, &work[1], &b[
+			b_offset], ldb, &w[w_offset], ldw, rdummy, &c_n1, &
+			info1);
+/* Computing MAX */
+		i__1 = mwrsvd, i__2 = (integer) rdummy[0];
+		lwrsvd = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = olwork, i__2 = *n + lwrsvd;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	} else if (*whtsvd == 2) {
+/* The following is specified as the minimal */
+/* length of WORK in the definition of SGESDD: */
+/* MWRSDD = 3*MIN(M,N)*MIN(M,N) + */
+/* MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) */
+/* IMINWR = 8*MIN(M,N) */
+/* Computing MAX */
+	    i__1 = f2cmax(*m,*n), i__2 = f2cmin(*m,*n) * 5 * f2cmin(*m,*n) + (f2cmin(*m,*
+		    n) << 2);
+	    mwrsdd = f2cmin(*m,*n) * 3 * f2cmin(*m,*n) + f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = *n + mwrsdd;
+	    mlwork = f2cmax(i__1,i__2);
+	    iminwr = f2cmin(*m,*n) << 3;
+	    if (lquery) {
+		sgesdd_("O", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], 
+			ldb, &w[w_offset], ldw, rdummy, &c_n1, &iwork[1], &
+			info1);
+/* Computing MAX */
+		i__1 = mwrsdd, i__2 = (integer) rdummy[0];
+		lwrsdd = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = olwork, i__2 = *n + lwrsdd;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	} else if (*whtsvd == 3) {
+/* LWQP3 = 3*N+1 */
+/* LWORQ = MAX(N, 1) */
+/* MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) */
+/* MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ )+ MAX(M,2) */
+/* MLWORK = N + MWRSVQ */
+/* IMINWR = M+N-1 */
+	    sgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &work[
+		    1], &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &
+		    iwork[1], &c_n1, rdummy, &c_n1, rdummy2, &c_n1, &info1);
+	    iminwr = iwork[1];
+	    mwrsvq = (integer) rdummy[1];
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = *n + mwrsvq + (integer) rdummy2[0];
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		lwrsvq = (integer) rdummy[0];
+/* Computing MAX */
+		i__1 = olwork, i__2 = *n + lwrsvq + (integer) rdummy2[0];
+		olwork = f2cmax(i__1,i__2);
+	    }
+	} else if (*whtsvd == 4) {
+	    *(unsigned char *)jsvopt = 'J';
+/* MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N )! for JSVOPT='V' */
+/* Computing MAX */
+	    i__1 = 7, i__2 = (*m << 1) + *n, i__1 = f2cmax(i__1,i__2), i__2 = (*
+		    n << 2) + *n * *n, i__1 = f2cmax(i__1,i__2), i__2 = (*n << 1)
+		     + *n * *n + 6;
+	    mwrsvj = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = *n + mwrsvj;
+	    mlwork = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = 3, i__2 = *m + *n * 3;
+	    iminwr = f2cmax(i__1,i__2);
+	    if (lquery) {
+/* Computing MAX */
+		i__1 = olwork, i__2 = *n + mwrsvj;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+/*         END SELECT */
+	if (wntvec || wntex || lsame_(jobz, "F")) {
+	    *(unsigned char *)jobzl = 'V';
+	} else {
+	    *(unsigned char *)jobzl = 'N';
+	}
+/* Workspace calculation to the SGEEV call */
+	if (lsame_(jobzl, "V")) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n << 2;
+	    mwrkev = f2cmax(i__1,i__2);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n * 3;
+	    mwrkev = f2cmax(i__1,i__2);
+	}
+/* Computing MAX */
+	i__1 = mlwork, i__2 = *n + mwrkev;
+	mlwork = f2cmax(i__1,i__2);
+	if (lquery) {
+	    sgeev_("N", jobzl, n, &s[s_offset], lds, &reig[1], &imeig[1], &w[
+		    w_offset], ldw, &w[w_offset], ldw, rdummy, &c_n1, &info1);
+/* Computing MAX */
+	    i__1 = mwrkev, i__2 = (integer) rdummy[0];
+	    lwrkev = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = olwork, i__2 = *n + lwrkev;
+	    olwork = f2cmax(i__1,i__2);
+	}
+
+	if (*liwork < iminwr && ! lquery) {
+	    *info = -29;
+	}
+	if (*lwork < mlwork && ! lquery) {
+	    *info = -27;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEDMD", &i__1);
+	return 0;
+    } else if (lquery) {
+/*     Return minimal and optimal workspace sizes */
+	iwork[1] = iminwr;
+	work[1] = (real) mlwork;
+	work[2] = (real) olwork;
+	return 0;
+    }
+/* ............................................................ */
+
+    ofl = slamch_("O");
+    small = slamch_("S");
+    badxy = FALSE_;
+
+/*     <1> Optional scaling of the snapshots (columns of X, Y) */
+/*     ========================================================== */
+    if (sccolx) {
+/* The columns of X will be normalized. */
+/* To prevent overflows, the column norms of X are */
+/* carefully computed using SLASSQ. */
+	*k = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* WORK(i) = DNRM2( M, X(1,i), 1 ) */
+	    scale = zero;
+	    slassq_(m, &x[i__ * x_dim1 + 1], &c__1, &scale, &ssum);
+	    if (sisnan_(&scale) || sisnan_(&ssum)) {
+		*k = 0;
+		*info = -8;
+		i__2 = -(*info);
+		xerbla_("SGEDMD", &i__2);
+	    }
+	    if (scale != zero && ssum != zero) {
+		rootsc = sqrt(ssum);
+		if (scale >= ofl / rootsc) {
+/*                 Norm of X(:,i) overflows. First, X(:,i) */
+/*                 is scaled by */
+/*                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. */
+/*                 Next, the norm of X(:,i) is stored without */
+/*                 overflow as WORK(i) = - SCALE * (ROOTSC/M), */
+/*                 the minus sign indicating the 1/M factor. */
+/*                 Scaling is performed without overflow, and */
+/*                 underflow may occur in the smallest entries */
+/*                 of X(:,i). The relative backward and forward */
+/*                 errors are small in the ell_2 norm. */
+		    r__1 = one / rootsc;
+		    slascl_("G", &c__0, &c__0, &scale, &r__1, m, &c__1, &x[
+			    i__ * x_dim1 + 1], m, &info2);
+		    work[i__] = -scale * (rootsc / (real) (*m));
+		} else {
+/*                 X(:,i) will be scaled to unit 2-norm */
+		    work[i__] = scale * rootsc;
+		    slascl_("G", &c__0, &c__0, &work[i__], &one, m, &c__1, &x[
+			    i__ * x_dim1 + 1], m, &info2);
+/*                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC */
+/* LAPACK */
+		}
+	    } else {
+		work[i__] = zero;
+		++(*k);
+	    }
+	}
+	if (*k == *n) {
+/* All columns of X are zero. Return error code -8. */
+/* (the 8th input variable had an illegal value) */
+	    *k = 0;
+	    *info = -8;
+	    i__1 = -(*info);
+	    xerbla_("SGEDMD", &i__1);
+	    return 0;
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/*           Now, apply the same scaling to the columns of Y. */
+	    if (work[i__] > zero) {
+		r__1 = one / work[i__];
+		sscal_(m, &r__1, &y[i__ * y_dim1 + 1], &c__1);
+/*               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC */
+/* BLAS CALL */
+	    } else if (work[i__] < zero) {
+		r__1 = -work[i__];
+		r__2 = one / (real) (*m);
+		slascl_("G", &c__0, &c__0, &r__1, &r__2, m, &c__1, &y[i__ * 
+			y_dim1 + 1], m, &info2);
+/* LAPACK CA */
+	    } else if (y[isamax_(m, &y[i__ * y_dim1 + 1], &c__1) + i__ * 
+		    y_dim1] != zero) {
+/*               X(:,i) is zero vector. For consistency, */
+/*               Y(:,i) should also be zero. If Y(:,i) is not */
+/*               zero, then the data might be inconsistent or */
+/*               corrupted. If JOBS == 'C', Y(:,i) is set to */
+/*               zero and a warning flag is raised. */
+/*               The computation continues but the */
+/*               situation will be reported in the output. */
+		badxy = TRUE_;
+		if (lsame_(jobs, "C")) {
+		    sscal_(m, &zero, &y[i__ * y_dim1 + 1], &c__1);
+		}
+/* BLAS CALL */
+	    }
+	}
+    }
+
+    if (sccoly) {
+/* The columns of Y will be normalized. */
+/* To prevent overflows, the column norms of Y are */
+/* carefully computed using SLASSQ. */
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* WORK(i) = DNRM2( M, Y(1,i), 1 ) */
+	    scale = zero;
+	    slassq_(m, &y[i__ * y_dim1 + 1], &c__1, &scale, &ssum);
+	    if (sisnan_(&scale) || sisnan_(&ssum)) {
+		*k = 0;
+		*info = -10;
+		i__2 = -(*info);
+		xerbla_("SGEDMD", &i__2);
+	    }
+	    if (scale != zero && ssum != zero) {
+		rootsc = sqrt(ssum);
+		if (scale >= ofl / rootsc) {
+/*                 Norm of Y(:,i) overflows. First, Y(:,i) */
+/*                 is scaled by */
+/*                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. */
+/*                 Next, the norm of Y(:,i) is stored without */
+/*                 overflow as WORK(i) = - SCALE * (ROOTSC/M), */
+/*                 the minus sign indicating the 1/M factor. */
+/*                 Scaling is performed without overflow, and */
+/*                 underflow may occur in the smallest entries */
+/*                 of Y(:,i). The relative backward and forward */
+/*                 errors are small in the ell_2 norm. */
+		    r__1 = one / rootsc;
+		    slascl_("G", &c__0, &c__0, &scale, &r__1, m, &c__1, &y[
+			    i__ * y_dim1 + 1], m, &info2);
+		    work[i__] = -scale * (rootsc / (real) (*m));
+		} else {
+/*                 X(:,i) will be scaled to unit 2-norm */
+		    work[i__] = scale * rootsc;
+		    slascl_("G", &c__0, &c__0, &work[i__], &one, m, &c__1, &y[
+			    i__ * y_dim1 + 1], m, &info2);
+/*                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC */
+/* LAPACK */
+		}
+	    } else {
+		work[i__] = zero;
+	    }
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/*           Now, apply the same scaling to the columns of X. */
+	    if (work[i__] > zero) {
+		r__1 = one / work[i__];
+		sscal_(m, &r__1, &x[i__ * x_dim1 + 1], &c__1);
+/*               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC */
+/* BLAS CALL */
+	    } else if (work[i__] < zero) {
+		r__1 = -work[i__];
+		r__2 = one / (real) (*m);
+		slascl_("G", &c__0, &c__0, &r__1, &r__2, m, &c__1, &x[i__ * 
+			x_dim1 + 1], m, &info2);
+/* LAPACK CA */
+	    } else if (x[isamax_(m, &x[i__ * x_dim1 + 1], &c__1) + i__ * 
+		    x_dim1] != zero) {
+/*               Y(:,i) is zero vector.  If X(:,i) is not */
+/*               zero, then a warning flag is raised. */
+/*               The computation continues but the */
+/*               situation will be reported in the output. */
+		badxy = TRUE_;
+	    }
+	}
+    }
+
+/*     <2> SVD of the data snapshot matrix X. */
+/*     ===================================== */
+/*     The left singular vectors are stored in the array X. */
+/*     The right singular vectors are in the array W. */
+/*     The array W will later on contain the eigenvectors */
+/*     of a Rayleigh quotient. */
+    numrnk = *n;
+/*      SELECT CASE ( WHTSVD ) */
+    if (*whtsvd == 1) {
+	i__1 = *lwork - *n;
+	sgesvd_("O", "S", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], 
+		ldb, &w[w_offset], ldw, &work[*n + 1], &i__1, &info1);
+/* LAPACK CAL */
+	*(unsigned char *)t_or_n__ = 'T';
+    } else if (*whtsvd == 2) {
+	i__1 = *lwork - *n;
+	sgesdd_("O", m, n, &x[x_offset], ldx, &work[1], &b[b_offset], ldb, &w[
+		w_offset], ldw, &work[*n + 1], &i__1, &iwork[1], &info1);
+/* LAPACK CAL */
+	*(unsigned char *)t_or_n__ = 'T';
+    } else if (*whtsvd == 3) {
+	i__1 = *lwork - *n - f2cmax(2,*m);
+	i__2 = f2cmax(2,*m);
+	sgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &work[1], &
+		z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &iwork[1], 
+		liwork, &work[*n + f2cmax(2,*m) + 1], &i__1, &work[*n + 1], &
+		i__2, &info1);
+
+	slacpy_("A", m, &numrnk, &z__[z_offset], ldz, &x[x_offset], ldx);
+/* LAPACK C */
+	*(unsigned char *)t_or_n__ = 'T';
+    } else if (*whtsvd == 4) {
+	i__1 = *lwork - *n;
+	sgejsv_("F", "U", jsvopt, "N", "N", "P", m, n, &x[x_offset], ldx, &
+		work[1], &z__[z_offset], ldz, &w[w_offset], ldw, &work[*n + 1]
+		, &i__1, &iwork[1], &info1);
+/* LAPACK CALL */
+	slacpy_("A", m, n, &z__[z_offset], ldz, &x[x_offset], ldx);
+/* LAPACK CALL */
+	*(unsigned char *)t_or_n__ = 'N';
+	xscl1 = work[*n + 1];
+	xscl2 = work[*n + 2];
+	if (xscl1 != xscl2) {
+/* This is an exceptional situation. If the */
+/* data matrices are not scaled and the */
+/* largest singular value of X overflows. */
+/* In that case SGEJSV can return the SVD */
+/* in scaled form. The scaling factor can be used */
+/* to rescale the data (X and Y). */
+	    slascl_("G", &c__0, &c__0, &xscl1, &xscl2, m, n, &y[y_offset], 
+		    ldy, &info2);
+	}
+/*      END SELECT */
+    }
+
+    if (info1 > 0) {
+/* The SVD selected subroutine did not converge. */
+/* Return with an error code. */
+	*info = 2;
+	return 0;
+    }
+
+    if (work[1] == zero) {
+/* The largest computed singular value of (scaled) */
+/* X is zero. Return error code -8 */
+/* (the 8th input variable had an illegal value). */
+	*k = 0;
+	*info = -8;
+	i__1 = -(*info);
+	xerbla_("SGEDMD", &i__1);
+	return 0;
+    }
+
+/* <3> Determine the numerical rank of the data */
+/*    snapshots matrix X. This depends on the */
+/*    parameters NRNK and TOL. */
+/*      SELECT CASE ( NRNK ) */
+    if (*nrnk == -1) {
+	*k = 1;
+	i__1 = numrnk;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    if (work[i__] <= work[1] * *tol || work[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+    } else if (*nrnk == -2) {
+	*k = 1;
+	i__1 = numrnk - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (work[i__ + 1] <= work[i__] * *tol || work[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+    } else {
+	*k = 1;
+	i__1 = *nrnk;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    if (work[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+/*          END SELECT */
+    }
+/*   Now, U = X(1:M,1:K) is the SVD/POD basis for the */
+/*   snapshot data in the input matrix X. */
+/* <4> Compute the Rayleigh quotient S = U^T * A * U. */
+/*    Depending on the requested outputs, the computation */
+/*    is organized to compute additional auxiliary */
+/*    matrices (for the residuals and refinements). */
+
+/*    In all formulas below, we need V_k*Sigma_k^(-1) */
+/*    where either V_k is in W(1:N,1:K), or V_k^T is in */
+/*    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). */
+    if (lsame_(t_or_n__, "N")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    r__1 = one / work[i__];
+	    sscal_(n, &r__1, &w[i__ * w_dim1 + 1], &c__1);
+/* W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC */
+/* BLAS CALL */
+	}
+    } else {
+/* This non-unit stride access is due to the fact */
+/* that SGESVD, SGESVDQ and SGESDD return the */
+/* transposed matrix of the right singular vectors. */
+/* DO i = 1, K */
+/* CALL SSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL */
+/* ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC */
+/* END DO */
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[*n + i__] = one / work[i__];
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *k;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		w[i__ + j * w_dim1] = work[*n + i__] * w[i__ + j * w_dim1];
+	    }
+	}
+    }
+
+    if (wntref) {
+
+/* Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) */
+/* for computing the refined Ritz vectors */
+/* (optionally, outside SGEDMD). */
+	sgemm_("N", t_or_n__, m, k, n, &one, &y[y_offset], ldy, &w[w_offset], 
+		ldw, &zero, &z__[z_offset], ldz);
+/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRI */
+/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRI */
+
+/* At this point Z contains */
+/* A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and */
+/* this is needed for computing the residuals. */
+/* This matrix is  returned in the array B and */
+/* it can be used to compute refined Ritz vectors. */
+/* BLAS */
+	slacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb);
+/* B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC */
+/* BLAS CALL */
+	sgemm_("T", "N", k, k, m, &one, &x[x_offset], ldx, &z__[z_offset], 
+		ldz, &zero, &s[s_offset], lds);
+/* S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRI */
+/* At this point S = U^T * A * U is the Rayleigh quotient. */
+/* BLAS */
+    } else {
+/* A * U(:,1:K) is not explicitly needed and the */
+/* computation is organized differently. The Rayleigh */
+/* quotient is computed more efficiently. */
+	sgemm_("T", "N", k, n, m, &one, &x[x_offset], ldx, &y[y_offset], ldy, 
+		&zero, &z__[z_offset], ldz);
+/* Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! IN */
+/* In the two SGEMM calls here, can use K for LDZ */
+/* B */
+	sgemm_("N", t_or_n__, k, k, n, &one, &z__[z_offset], ldz, &w[w_offset]
+		, ldw, &zero, &s[s_offset], lds);
+/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRIN */
+/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRIN */
+/* At this point S = U^T * A * U is the Rayleigh quotient. */
+/* If the residuals are requested, save scaled V_k into Z. */
+/* Recall that V_k or V_k^T is stored in W. */
+/* BLAS */
+	if (wntres || wntex) {
+	    if (lsame_(t_or_n__, "N")) {
+		slacpy_("A", n, k, &w[w_offset], ldw, &z__[z_offset], ldz);
+	    } else {
+		slacpy_("A", k, n, &w[w_offset], ldw, &z__[z_offset], ldz);
+	    }
+	}
+    }
+
+/* <5> Compute the Ritz values and (if requested) the */
+/*   right eigenvectors of the Rayleigh quotient. */
+
+    i__1 = *lwork - *n;
+    sgeev_("N", jobzl, k, &s[s_offset], lds, &reig[1], &imeig[1], &w[w_offset]
+	    , ldw, &w[w_offset], ldw, &work[*n + 1], &i__1, &info1);
+
+/* W(1:K,1:K) contains the eigenvectors of the Rayleigh */
+/* quotient. Even in the case of complex spectrum, all */
+/* computation is done in real arithmetic. REIG and */
+/* IMEIG are the real and the imaginary parts of the */
+/* eigenvalues, so that the spectrum is given as */
+/* REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs */
+/* are listed at consecutive positions. For such a */
+/* complex conjugate pair of the eigenvalues, the */
+/* corresponding eigenvectors are also a complex */
+/* conjugate pair with the real and imaginary parts */
+/* stored column-wise in W at the corresponding */
+/* consecutive column indices. See the description of Z. */
+/* Also, see the description of SGEEV. */
+/* LAPACK C */
+    if (info1 > 0) {
+/* SGEEV failed to compute the eigenvalues and */
+/* eigenvectors of the Rayleigh quotient. */
+	*info = 3;
+	return 0;
+    }
+
+/* <6> Compute the eigenvectors (if requested) and, */
+/* the residuals (if requested). */
+
+    if (wntvec || wntex) {
+	if (wntres) {
+	    if (wntref) {
+/* Here, if the refinement is requested, we have */
+/* A*U(:,1:K) already computed and stored in Z. */
+/* For the residuals, need Y = A * U(:,1;K) * W. */
+		sgemm_("N", "N", m, k, k, &one, &z__[z_offset], ldz, &w[
+			w_offset], ldw, &zero, &y[y_offset], ldy);
+/* Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC */
+/* This frees Z; Y contains A * U(:,1:K) * W. */
+/* BLAS CALL */
+	    } else {
+/* Compute S = V_k * Sigma_k^(-1) * W, where */
+/* V_k * Sigma_k^(-1) is stored in Z */
+		sgemm_(t_or_n__, "N", n, k, k, &one, &z__[z_offset], ldz, &w[
+			w_offset], ldw, &zero, &s[s_offset], lds);
+/* Then, compute Z = Y * S = */
+/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */
+/* = A * U(:,1:K) * W(1:K,1:K) */
+		sgemm_("N", "N", m, k, n, &one, &y[y_offset], ldy, &s[
+			s_offset], lds, &zero, &z__[z_offset], ldz);
+/* Save a copy of Z into Y and free Z for holding */
+/* the Ritz vectors. */
+		slacpy_("A", m, k, &z__[z_offset], ldz, &y[y_offset], ldy);
+		if (wntex) {
+		    slacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb);
+		}
+	    }
+	} else if (wntex) {
+/* Compute S = V_k * Sigma_k^(-1) * W, where */
+/* V_k * Sigma_k^(-1) is stored in Z */
+	    sgemm_(t_or_n__, "N", n, k, k, &one, &z__[z_offset], ldz, &w[
+		    w_offset], ldw, &zero, &s[s_offset], lds);
+/* Then, compute Z = Y * S = */
+/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */
+/* = A * U(:,1:K) * W(1:K,1:K) */
+	    sgemm_("N", "N", m, k, n, &one, &y[y_offset], ldy, &s[s_offset], 
+		    lds, &zero, &b[b_offset], ldb);
+/* The above call replaces the following two calls */
+/* that were used in the developing-testing phase. */
+/* CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & */
+/*           LDS, ZERO, Z, LDZ) */
+/* Save a copy of Z into B and free Z for holding */
+/* the Ritz vectors. */
+/* CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB ) */
+	}
+
+/* Compute the real form of the Ritz vectors */
+	if (wntvec) {
+	    sgemm_("N", "N", m, k, k, &one, &x[x_offset], ldx, &w[w_offset], 
+		    ldw, &zero, &z__[z_offset], ldz);
+	}
+/* Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC */
+
+/* BLAS CALL */
+	if (wntres) {
+	    i__ = 1;
+	    while(i__ <= *k) {
+		if (imeig[i__] == zero) {
+/* have a real eigenvalue with real eigenvector */
+		    r__1 = -reig[i__];
+		    saxpy_(m, &r__1, &z__[i__ * z_dim1 + 1], &c__1, &y[i__ * 
+			    y_dim1 + 1], &c__1);
+/* Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! */
+
+		    res[i__] = snrm2_(m, &y[i__ * y_dim1 + 1], &c__1);
+		    ++i__;
+		} else {
+/* Have a complex conjugate pair */
+/* REIG(i) +- sqrt(-1)*IMEIG(i). */
+/* Since all computation is done in real */
+/* arithmetic, the formula for the residual */
+/* is recast for real representation of the */
+/* complex conjugate eigenpair. See the */
+/* description of RES. */
+		    ab[0] = reig[i__];
+		    ab[1] = -imeig[i__];
+		    ab[2] = imeig[i__];
+		    ab[3] = reig[i__];
+		    r__1 = -one;
+		    sgemm_("N", "N", m, &c__2, &c__2, &r__1, &z__[i__ * 
+			    z_dim1 + 1], ldz, ab, &c__2, &one, &y[i__ * 
+			    y_dim1 + 1], ldy);
+/* Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INT */
+/* BL */
+		    res[i__] = slange_("F", m, &c__2, &y[i__ * y_dim1 + 1], 
+			    ldy, &work[*n + 1]);
+/* LA */
+		    res[i__ + 1] = res[i__];
+		    i__ += 2;
+		}
+	    }
+	}
+    }
+
+    if (*whtsvd == 4) {
+	work[*n + 1] = xscl1;
+	work[*n + 2] = xscl2;
+    }
+
+/*     Successful exit. */
+    if (! badxy) {
+	*info = 0;
+    } else {
+/* A warning on possible data inconsistency. */
+/* This should be a rare event. */
+	*info = 4;
+    }
+/* ............................................................ */
+    return 0;
+/*     ...... */
+} /* sgedmd_ */
+
diff --git a/lapack-netlib/SRC/sgedmdq.c b/lapack-netlib/SRC/sgedmdq.c
index 447b23014..0adf3bda3 100644
--- a/lapack-netlib/SRC/sgedmdq.c
+++ b/lapack-netlib/SRC/sgedmdq.c
@@ -509,3 +509,788 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 
 
 
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+
+/* Subroutine */ int sgedmdq_(char *jobs, char *jobz, char *jobr, char *jobq, 
+	char *jobt, char *jobf, integer *whtsvd, integer *m, integer *n, real 
+	*f, integer *ldf, real *x, integer *ldx, real *y, integer *ldy, 
+	integer *nrnk, real *tol, integer *k, real *reig, real *imeig, real *
+	z__, integer *ldz, real *res, real *b, integer *ldb, real *v, integer 
+	*ldv, real *s, integer *lds, real *work, integer *lwork, integer *
+	iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer f_dim1, f_offset, x_dim1, x_offset, y_dim1, y_offset, z_dim1, 
+	    z_offset, b_dim1, b_offset, v_dim1, v_offset, s_dim1, s_offset, 
+	    i__1, i__2;
+
+    /* Local variables */
+    real zero;
+    integer info1;
+    extern logical lsame_(char *, char *);
+    char jobvl[1];
+    integer minmn;
+    logical wantq;
+    integer mlwqr, olwqr;
+    logical wntex;
+    extern /* Subroutine */ int sgedmd_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, integer *, real *, integer *, real *, real *, real *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *);
+    integer mlwdmd, olwdmd;
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    logical sccolx, sccoly;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *), slaset_(char *, integer *, 
+	    integer *, real *, real *, real *, integer *);
+    integer iminwr;
+    logical wntvec, wntvcf;
+    integer mlwgqr;
+    logical wntref;
+    integer mlwork, olwgqr, olwork;
+    real rdummy[2];
+    integer mlwmqr, olwmqr;
+    logical lquery, wntres, wnttrf, wntvcq;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), sormqr_(char 
+	    *, char *, integer *, integer *, integer *, real *, integer *, 
+	    real *, real *, integer *, real *, integer *, integer *);
+    real one;
+
+/* March 2023 */
+/* ..... */
+/*      USE                   iso_fortran_env */
+/*      INTEGER, PARAMETER :: WP = real32 */
+/* ..... */
+/*     Scalar arguments */
+/*     Array arguments */
+/* ..... */
+/*     Purpose */
+/*     ======= */
+/*     SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for */
+/*     a pair of data snapshot matrices, using a QR factorization */
+/*     based compression of the data. For the input matrices */
+/*     X and Y such that Y = A*X with an unaccessible matrix */
+/*     A, SGEDMDQ computes a certain number of Ritz pairs of A using */
+/*     the standard Rayleigh-Ritz extraction from a subspace of */
+/*     range(X) that is determined using the leading left singular */
+/*     vectors of X. Optionally, SGEDMDQ returns the residuals */
+/*     of the computed Ritz pairs, the information needed for */
+/*     a refinement of the Ritz vectors, or the eigenvectors of */
+/*     the Exact DMD. */
+/*     For further details see the references listed */
+/*     below. For more details of the implementation see [3]. */
+
+/*     References */
+/*     ========== */
+/*     [1] P. Schmid: Dynamic mode decomposition of numerical */
+/*         and experimental data, */
+/*         Journal of Fluid Mechanics 656, 5-28, 2010. */
+/*     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */
+/*         decompositions: analysis and enhancements, */
+/*         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */
+/*     [3] Z. Drmac: A LAPACK implementation of the Dynamic */
+/*         Mode Decomposition I. Technical report. AIMDyn Inc. */
+/*         and LAPACK Working Note 298. */
+/*     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */
+/*         Brunton, N. Kutz: On Dynamic Mode Decomposition: */
+/*         Theory and Applications, Journal of Computational */
+/*         Dynamics 1(2), 391 -421, 2014. */
+
+/*     Developed and supported by: */
+/*     =========================== */
+/*     Developed and coded by Zlatko Drmac, Faculty of Science, */
+/*     University of Zagreb;  drmac@math.hr */
+/*     In cooperation with */
+/*     AIMdyn Inc., Santa Barbara, CA. */
+/*     and supported by */
+/*     - DARPA SBIR project "Koopman Operator-Based Forecasting */
+/*     for Nonstationary Processes from Near-Term, Limited */
+/*     Observational Data" Contract No: W31P4Q-21-C-0007 */
+/*     - DARPA PAI project "Physics-Informed Machine Learning */
+/*     Methodologies" Contract No: HR0011-18-9-0033 */
+/*     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */
+/*     Framework for Space-Time Analysis of Process Dynamics" */
+/*     Contract No: HR0011-16-C-0116 */
+/*     Any opinions, findings and conclusions or recommendations */
+/*     expressed in this material are those of the author and */
+/*     do not necessarily reflect the views of the DARPA SBIR */
+/*     Program Office. */
+/* ============================================================ */
+/*     Distribution Statement A: */
+/*     Approved for Public Release, Distribution Unlimited. */
+/*     Cleared by DARPA on September 29, 2022 */
+/* ============================================================ */
+/* ...................................................................... */
+/*     Arguments */
+/*     ========= */
+/*     JOBS (input) CHARACTER*1 */
+/*     Determines whether the initial data snapshots are scaled */
+/*     by a diagonal matrix. The data snapshots are the columns */
+/*     of F. The leading N-1 columns of F are denoted X and the */
+/*     trailing N-1 columns are denoted Y. */
+/*     'S' :: The data snapshots matrices X and Y are multiplied */
+/*            with a diagonal matrix D so that X*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'C' :: The snapshots are scaled as with the 'S' option. */
+/*            If it is found that an i-th column of X is zero */
+/*            vector and the corresponding i-th column of Y is */
+/*            non-zero, then the i-th column of Y is set to */
+/*            zero and a warning flag is raised. */
+/*     'Y' :: The data snapshots matrices X and Y are multiplied */
+/*            by a diagonal matrix D so that Y*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'N' :: No data scaling. */
+/* ..... */
+/*     JOBZ (input) CHARACTER*1 */
+/*     Determines whether the eigenvectors (Koopman modes) will */
+/*     be computed. */
+/*     'V' :: The eigenvectors (Koopman modes) will be computed */
+/*            and returned in the matrix Z. */
+/*            See the description of Z. */
+/*     'F' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product Z*V, where Z */
+/*            is orthonormal and V contains the eigenvectors */
+/*            of the corresponding Rayleigh quotient. */
+/*            See the descriptions of F, V, Z. */
+/*     'Q' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product Q*Z, where Z */
+/*            contains the eigenvectors of the compression of the */
+/*            underlying discretized operator onto the span of */
+/*            the data snapshots. See the descriptions of F, V, Z. */
+/*            Q is from the initial QR factorization. */
+/*     'N' :: The eigenvectors are not computed. */
+/* ..... */
+/*     JOBR (input) CHARACTER*1 */
+/*     Determines whether to compute the residuals. */
+/*     'R' :: The residuals for the computed eigenpairs will */
+/*            be computed and stored in the array RES. */
+/*            See the description of RES. */
+/*            For this option to be legal, JOBZ must be 'V'. */
+/*     'N' :: The residuals are not computed. */
+/* ..... */
+/*     JOBQ (input) CHARACTER*1 */
+/*     Specifies whether to explicitly compute and return the */
+/*     orthogonal matrix from the QR factorization. */
+/*     'Q' :: The matrix Q of the QR factorization of the data */
+/*            snapshot matrix is computed and stored in the */
+/*            array F. See the description of F. */
+/*     'N' :: The matrix Q is not explicitly computed. */
+/* ..... */
+/*     JOBT (input) CHARACTER*1 */
+/*     Specifies whether to return the upper triangular factor */
+/*     from the QR factorization. */
+/*     'R' :: The matrix R of the QR factorization of the data */
+/*            snapshot matrix F is returned in the array Y. */
+/*            See the description of Y and Further details. */
+/*     'N' :: The matrix R is not returned. */
+/* ..... */
+/*     JOBF (input) CHARACTER*1 */
+/*     Specifies whether to store information needed for post- */
+/*     processing (e.g. computing refined Ritz vectors) */
+/*     'R' :: The matrix needed for the refinement of the Ritz */
+/*            vectors is computed and stored in the array B. */
+/*            See the description of B. */
+/*     'E' :: The unscaled eigenvectors of the Exact DMD are */
+/*            computed and returned in the array B. See the */
+/*            description of B. */
+/*     'N' :: No eigenvector refinement data is computed. */
+/*     To be useful on exit, this option needs JOBQ='Q'. */
+/* ..... */
+/*     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */
+/*     Allows for a selection of the SVD algorithm from the */
+/*     LAPACK library. */
+/*     1 :: SGESVD (the QR SVD algorithm) */
+/*     2 :: SGESDD (the Divide and Conquer algorithm; if enough */
+/*          workspace available, this is the fastest option) */
+/*     3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4 */
+/*          are the most accurate options) */
+/*     4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3 */
+/*          are the most accurate options) */
+/*     For the four methods above, a significant difference in */
+/*     the accuracy of small singular values is possible if */
+/*     the snapshots vary in norm so that X is severely */
+/*     ill-conditioned. If small (smaller than EPS*||X||) */
+/*     singular values are of interest and JOBS=='N',  then */
+/*     the options (3, 4) give the most accurate results, where */
+/*     the option 4 is slightly better and with stronger */
+/*     theoretical background. */
+/*     If JOBS=='S', i.e. the columns of X will be normalized, */
+/*     then all methods give nearly equally accurate results. */
+/* ..... */
+/*     M (input) INTEGER, M >= 0 */
+/*     The state space dimension (the number of rows of F) */
+/* ..... */
+/*     N (input) INTEGER, 0 <= N <= M */
+/*     The number of data snapshots from a single trajectory, */
+/*     taken at equidistant discrete times. This is the */
+/*     number of columns of F. */
+/* ..... */
+/*     F (input/output) REAL(KIND=WP) M-by-N array */
+/*     > On entry, */
+/*     the columns of F are the sequence of data snapshots */
+/*     from a single trajectory, taken at equidistant discrete */
+/*     times. It is assumed that the column norms of F are */
+/*     in the range of the normalized floating point numbers. */
+/*     < On exit, */
+/*     If JOBQ == 'Q', the array F contains the orthogonal */
+/*     matrix/factor of the QR factorization of the initial */
+/*     data snapshots matrix F. See the description of JOBQ. */
+/*     If JOBQ == 'N', the entries in F strictly below the main */
+/*     diagonal contain, column-wise, the information on the */
+/*     Householder vectors, as returned by SGEQRF. The */
+/*     remaining information to restore the orthogonal matrix */
+/*     of the initial QR factorization is stored in WORK(1:N). */
+/*     See the description of WORK. */
+/* ..... */
+/*     LDF (input) INTEGER, LDF >= M */
+/*     The leading dimension of the array F. */
+/* ..... */
+/*     X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array */
+/*     X is used as workspace to hold representations of the */
+/*     leading N-1 snapshots in the orthonormal basis computed */
+/*     in the QR factorization of F. */
+/*     On exit, the leading K columns of X contain the leading */
+/*     K left singular vectors of the above described content */
+/*     of X. To lift them to the space of the left singular */
+/*     vectors U(:,1:K)of the input data, pre-multiply with the */
+/*     Q factor from the initial QR factorization. */
+/*     See the descriptions of F, K, V  and Z. */
+/* ..... */
+/*     LDX (input) INTEGER, LDX >= N */
+/*     The leading dimension of the array X */
+/* ..... */
+/*     Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array */
+/*     Y is used as workspace to hold representations of the */
+/*     trailing N-1 snapshots in the orthonormal basis computed */
+/*     in the QR factorization of F. */
+/*     On exit, */
+/*     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper */
+/*     triangular factor from the QR factorization of the data */
+/*     snapshot matrix F. */
+/* ..... */
+/*     LDY (input) INTEGER , LDY >= N */
+/*     The leading dimension of the array Y */
+/* ..... */
+/*     NRNK (input) INTEGER */
+/*     Determines the mode how to compute the numerical rank, */
+/*     i.e. how to truncate small singular values of the input */
+/*     matrix X. On input, if */
+/*     NRNK = -1 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(1) */
+/*                  This option is recommended. */
+/*     NRNK = -2 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(i-1) */
+/*                  This option is included for R&D purposes. */
+/*                  It requires highly accurate SVD, which */
+/*                  may not be feasible. */
+/*     The numerical rank can be enforced by using positive */
+/*     value of NRNK as follows: */
+/*     0 < NRNK <= N-1 :: at most NRNK largest singular values */
+/*     will be used. If the number of the computed nonzero */
+/*     singular values is less than NRNK, then only those */
+/*     nonzero values will be used and the actually used */
+/*     dimension is less than NRNK. The actual number of */
+/*     the nonzero singular values is returned in the variable */
+/*     K. See the description of K. */
+/* ..... */
+/*     TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */
+/*     The tolerance for truncating small singular values. */
+/*     See the description of NRNK. */
+/* ..... */
+/*     K (output) INTEGER,  0 <= K <= N */
+/*     The dimension of the SVD/POD basis for the leading N-1 */
+/*     data snapshots (columns of F) and the number of the */
+/*     computed Ritz pairs. The value of K is determined */
+/*     according to the rule set by the parameters NRNK and */
+/*     TOL. See the descriptions of NRNK and TOL. */
+/* ..... */
+/*     REIG (output) REAL(KIND=WP) (N-1)-by-1 array */
+/*     The leading K (K<=N) entries of REIG contain */
+/*     the real parts of the computed eigenvalues */
+/*     REIG(1:K) + sqrt(-1)*IMEIG(1:K). */
+/*     See the descriptions of K, IMEIG, Z. */
+/* ..... */
+/*     IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array */
+/*     The leading K (K<N) entries of REIG contain */
+/*     the imaginary parts of the computed eigenvalues */
+/*     REIG(1:K) + sqrt(-1)*IMEIG(1:K). */
+/*     The eigenvalues are determined as follows: */
+/*     If IMEIG(i) == 0, then the corresponding eigenvalue is */
+/*     real, LAMBDA(i) = REIG(i). */
+/*     If IMEIG(i)>0, then the corresponding complex */
+/*     conjugate pair of eigenvalues reads */
+/*     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i) */
+/*     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i) */
+/*     That is, complex conjugate pairs have consecutive */
+/*     indices (i,i+1), with the positive imaginary part */
+/*     listed first. */
+/*     See the descriptions of K, REIG, Z. */
+/* ..... */
+/*     Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array */
+/*     If JOBZ =='V' then */
+/*        Z contains real Ritz vectors as follows: */
+/*        If IMEIG(i)=0, then Z(:,i) is an eigenvector of */
+/*        the i-th Ritz value. */
+/*        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then */
+/*        [Z(:,i) Z(:,i+1)] span an invariant subspace and */
+/*        the Ritz values extracted from this subspace are */
+/*        REIG(i) + sqrt(-1)*IMEIG(i) and */
+/*        REIG(i) - sqrt(-1)*IMEIG(i). */
+/*        The corresponding eigenvectors are */
+/*        Z(:,i) + sqrt(-1)*Z(:,i+1) and */
+/*        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively. */
+/*     If JOBZ == 'F', then the above descriptions hold for */
+/*     the columns of Z*V, where the columns of V are the */
+/*     eigenvectors of the K-by-K Rayleigh quotient, and Z is */
+/*     orthonormal. The columns of V are similarly structured: */
+/*     If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if */
+/*     IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and */
+/*                       Z*V(:,i)-sqrt(-1)*Z*V(:,i+1) */
+/*     are the eigenvectors of LAMBDA(i), LAMBDA(i+1). */
+/*     See the descriptions of REIG, IMEIG, X and V. */
+/* ..... */
+/*     LDZ (input) INTEGER , LDZ >= M */
+/*     The leading dimension of the array Z. */
+/* ..... */
+/*     RES (output) REAL(KIND=WP) (N-1)-by-1 array */
+/*     RES(1:K) contains the residuals for the K computed */
+/*     Ritz pairs. */
+/*     If LAMBDA(i) is real, then */
+/*        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2. */
+/*     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair */
+/*     then */
+/*     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F */
+/*     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ] */
+/*               [-imag(LAMBDA(i)) real(LAMBDA(i)) ]. */
+/*     It holds that */
+/*     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2 */
+/*     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2 */
+/*     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1) */
+/*           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1) */
+/*     See the description of Z. */
+/* ..... */
+/*     B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array. */
+/*     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can */
+/*     be used for computing the refined vectors; see further */
+/*     details in the provided references. */
+/*     If JOBF == 'E', B(1:N,1;K) contains */
+/*     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */
+/*     Exact DMD, up to scaling by the inverse eigenvalues. */
+/*     In both cases, the content of B can be lifted to the */
+/*     original dimension of the input data by pre-multiplying */
+/*     with the Q factor from the initial QR factorization. */
+/*     Here A denotes a compression of the underlying operator. */
+/*     See the descriptions of F and X. */
+/*     If JOBF =='N', then B is not referenced. */
+/* ..... */
+/*     LDB (input) INTEGER, LDB >= MIN(M,N) */
+/*     The leading dimension of the array B. */
+/* ..... */
+/*     V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array */
+/*     On exit, V(1:K,1:K) contains the K eigenvectors of */
+/*     the Rayleigh quotient. The eigenvectors of a complex */
+/*     conjugate pair of eigenvalues are returned in real form */
+/*     as explained in the description of Z. The Ritz vectors */
+/*     (returned in Z) are the product of X and V; see */
+/*     the descriptions of X and Z. */
+/* ..... */
+/*     LDV (input) INTEGER, LDV >= N-1 */
+/*     The leading dimension of the array V. */
+/* ..... */
+/*     S (output) REAL(KIND=WP) (N-1)-by-(N-1) array */
+/*     The array S(1:K,1:K) is used for the matrix Rayleigh */
+/*     quotient. This content is overwritten during */
+/*     the eigenvalue decomposition by SGEEV. */
+/*     See the description of K. */
+/* ..... */
+/*     LDS (input) INTEGER, LDS >= N-1 */
+/*     The leading dimension of the array S. */
+/* ..... */
+/*     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */
+/*     On exit, */
+/*     WORK(1:MIN(M,N)) contains the scalar factors of the */
+/*     elementary reflectors as returned by SGEQRF of the */
+/*     M-by-N input matrix F. */
+/*     WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of */
+/*     the input submatrix F(1:M,1:N-1). */
+/*     If the call to SGEDMDQ is only workspace query, then */
+/*     WORK(1) contains the minimal workspace length and */
+/*     WORK(2) is the optimal workspace length. Hence, the */
+/*     length of work is at least 2. */
+/*     See the description of LWORK. */
+/* ..... */
+/*     LWORK (input) INTEGER */
+/*     The minimal length of the  workspace vector WORK. */
+/*     LWORK is calculated as follows: */
+/*     Let MLWQR  = N (minimal workspace for SGEQRF[M,N]) */
+/*         MLWDMD = minimal workspace for SGEDMD (see the */
+/*                  description of LWORK in SGEDMD) for */
+/*                  snapshots of dimensions MIN(M,N)-by-(N-1) */
+/*         MLWMQR = N (minimal workspace for */
+/*                    SORMQR['L','N',M,N,N]) */
+/*         MLWGQR = N (minimal workspace for SORGQR[M,N,N]) */
+/*     Then */
+/*     LWORK = MAX(N+MLWQR, N+MLWDMD) */
+/*     is updated as follows: */
+/*        if   JOBZ == 'V' or JOBZ == 'F' THEN */
+/*             LWORK = MAX( LWORK,MIN(M,N)+N-1 +MLWMQR ) */
+/*        if   JOBQ == 'Q' THEN */
+/*             LWORK = MAX( LWORK,MIN(M,N)+N-1+MLWGQR) */
+/*     If on entry LWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     IWORK (workspace/output) INTEGER LIWORK-by-1 array */
+/*     Workspace that is required only if WHTSVD equals */
+/*     2 , 3 or 4. (See the description of WHTSVD). */
+/*     If on entry LWORK =-1 or LIWORK=-1, then the */
+/*     minimal length of IWORK is computed and returned in */
+/*     IWORK(1). See the description of LIWORK. */
+/* ..... */
+/*     LIWORK (input) INTEGER */
+/*     The minimal length of the workspace vector IWORK. */
+/*     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */
+/*     Let M1=MIN(M,N), N1=N-1. Then */
+/*     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) */
+/*     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) */
+/*     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) */
+/*     If on entry LIWORK = -1, then a worskpace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     INFO (output) INTEGER */
+/*     -i < 0 :: On entry, the i-th argument had an */
+/*               illegal value */
+/*        = 0 :: Successful return. */
+/*        = 1 :: Void input. Quick exit (M=0 or N=0). */
+/*        = 2 :: The SVD computation of X did not converge. */
+/*               Suggestion: Check the input data and/or */
+/*               repeat with different WHTSVD. */
+/*        = 3 :: The computation of the eigenvalues did not */
+/*               converge. */
+/*        = 4 :: If data scaling was requested on input and */
+/*               the procedure found inconsistency in the data */
+/*               such that for some column index i, */
+/*               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */
+/*               to zero if JOBS=='C'. The computation proceeds */
+/*               with original or modified data and warning */
+/*               flag is set with INFO=4. */
+/* ............................................................. */
+/* ............................................................. */
+/*     Parameters */
+/*     ~~~~~~~~~~ */
+
+/*     Local scalars */
+/*     ~~~~~~~~~~~~~ */
+
+/*     Local array */
+/*     ~~~~~~~~~~~ */
+
+/*     External functions (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~ */
+
+/*     External subroutines (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     External subroutines */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     Intrinsic functions */
+/*     ~~~~~~~~~~~~~~~~~~~ */
+    /* Parameter adjustments */
+    f_dim1 = *ldf;
+    f_offset = 1 + f_dim1 * 1;
+    f -= f_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --reig;
+    --imeig;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --res;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1 * 1;
+    s -= s_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    one = 1.f;
+    zero = 0.f;
+/* .......................................................... */
+
+/*    Test the input arguments */
+    wntres = lsame_(jobr, "R");
+    sccolx = lsame_(jobs, "S") || lsame_(jobs, "C");
+    sccoly = lsame_(jobs, "Y");
+    wntvec = lsame_(jobz, "V");
+    wntvcf = lsame_(jobz, "F");
+    wntvcq = lsame_(jobz, "Q");
+    wntref = lsame_(jobf, "R");
+    wntex = lsame_(jobf, "E");
+    wantq = lsame_(jobq, "Q");
+    wnttrf = lsame_(jobt, "R");
+    minmn = f2cmin(*m,*n);
+    *info = 0;
+    lquery = *lwork == -1 || *liwork == -1;
+
+    if (! (sccolx || sccoly || lsame_(jobs, "N"))) {
+	*info = -1;
+    } else if (! (wntvec || wntvcf || wntvcq || lsame_(jobz, "N"))) {
+	*info = -2;
+    } else if (! (wntres || lsame_(jobr, "N")) || 
+	    wntres && lsame_(jobz, "N")) {
+	*info = -3;
+    } else if (! (wantq || lsame_(jobq, "N"))) {
+	*info = -4;
+    } else if (! (wnttrf || lsame_(jobt, "N"))) {
+	*info = -5;
+    } else if (! (wntref || wntex || lsame_(jobf, "N")))
+	     {
+	*info = -6;
+    } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == 
+	    4)) {
+	*info = -7;
+    } else if (*m < 0) {
+	*info = -8;
+    } else if (*n < 0 || *n > *m + 1) {
+	*info = -9;
+    } else if (*ldf < *m) {
+	*info = -11;
+    } else if (*ldx < minmn) {
+	*info = -13;
+    } else if (*ldy < minmn) {
+	*info = -15;
+    } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) {
+	*info = -16;
+    } else if (*tol < zero || *tol >= one) {
+	*info = -17;
+    } else if (*ldz < *m) {
+	*info = -22;
+    } else if ((wntref || wntex) && *ldb < minmn) {
+	*info = -25;
+    } else if (*ldv < *n - 1) {
+	*info = -27;
+    } else if (*lds < *n - 1) {
+	*info = -29;
+    }
+
+    if (wntvec || wntvcf) {
+	*(unsigned char *)jobvl = 'V';
+    } else {
+	*(unsigned char *)jobvl = 'N';
+    }
+    if (*info == 0) {
+/* Compute the minimal and the optimal workspace */
+/* requirements. Simulate running the code and */
+/* determine minimal and optimal sizes of the */
+/* workspace at any moment of the run. */
+	if (*n == 0 || *n == 1) {
+/* All output except K is void. INFO=1 signals */
+/* the void input. In case of a workspace query, */
+/* the minimal workspace lengths are returned. */
+	    if (lquery) {
+		iwork[1] = 1;
+		work[1] = 2.f;
+		work[2] = 2.f;
+	    } else {
+		*k = 0;
+	    }
+	    *info = 1;
+	    return 0;
+	}
+	mlwqr = f2cmax(1,*n);
+/* Minimal workspace length for SGEQRF. */
+	mlwork = f2cmin(*m,*n) + mlwqr;
+	if (lquery) {
+	    sgeqrf_(m, n, &f[f_offset], ldf, &work[1], rdummy, &c_n1, &info1);
+	    olwqr = (integer) rdummy[0];
+	    olwork = f2cmin(*m,*n) + olwqr;
+	}
+	i__1 = *n - 1;
+	sgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], 
+		ldx, &y[y_offset], ldy, nrnk, tol, k, &reig[1], &imeig[1], &
+		z__[z_offset], ldz, &res[1], &b[b_offset], ldb, &v[v_offset], 
+		ldv, &s[s_offset], lds, &work[1], &c_n1, &iwork[1], liwork, &
+		info1);
+	mlwdmd = (integer) work[1];
+/* Computing MAX */
+	i__1 = mlwork, i__2 = minmn + mlwdmd;
+	mlwork = f2cmax(i__1,i__2);
+	iminwr = iwork[1];
+	if (lquery) {
+	    olwdmd = (integer) work[2];
+/* Computing MAX */
+	    i__1 = olwork, i__2 = minmn + olwdmd;
+	    olwork = f2cmax(i__1,i__2);
+	}
+	if (wntvec || wntvcf) {
+	    mlwmqr = f2cmax(1,*n);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = minmn + *n - 1 + mlwmqr;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		sormqr_("L", "N", m, n, &minmn, &f[f_offset], ldf, &work[1], &
+			z__[z_offset], ldz, &work[1], &c_n1, &info1);
+		olwmqr = (integer) work[1];
+/* Computing MAX */
+		i__1 = olwork, i__2 = minmn + *n - 1 + olwmqr;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+	if (wantq) {
+	    mlwgqr = *n;
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = minmn + *n - 1 + mlwgqr;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		sorgqr_(m, &minmn, &minmn, &f[f_offset], ldf, &work[1], &work[
+			1], &c_n1, &info1);
+		olwgqr = (integer) work[1];
+/* Computing MAX */
+		i__1 = olwork, i__2 = minmn + *n - 1 + olwgqr;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+	iminwr = f2cmax(1,iminwr);
+	mlwork = f2cmax(2,mlwork);
+	if (*lwork < mlwork && ! lquery) {
+	    *info = -31;
+	}
+	if (*liwork < iminwr && ! lquery) {
+	    *info = -33;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEDMDQ", &i__1);
+	return 0;
+    } else if (lquery) {
+/*     Return minimal and optimal workspace sizes */
+	iwork[1] = iminwr;
+	work[1] = (real) mlwork;
+	work[2] = (real) olwork;
+	return 0;
+    }
+/* ..... */
+/*     Initial QR factorization that is used to represent the */
+/*     snapshots as elements of lower dimensional subspace. */
+/*     For large scale computation with M >>N , at this place */
+/*     one can use an out of core QRF. */
+
+    i__1 = *lwork - minmn;
+    sgeqrf_(m, n, &f[f_offset], ldf, &work[1], &work[minmn + 1], &i__1, &
+	    info1);
+
+/*     Define X and Y as the snapshots representations in the */
+/*     orthogonal basis computed in the QR factorization. */
+/*     X corresponds to the leading N-1 and Y to the trailing */
+/*     N-1 snapshots. */
+    i__1 = *n - 1;
+    slaset_("L", &minmn, &i__1, &zero, &zero, &x[x_offset], ldx);
+    i__1 = *n - 1;
+    slacpy_("U", &minmn, &i__1, &f[f_offset], ldf, &x[x_offset], ldx);
+    i__1 = *n - 1;
+    slacpy_("A", &minmn, &i__1, &f[(f_dim1 << 1) + 1], ldf, &y[y_offset], ldy);
+    if (*m >= 3) {
+	i__1 = minmn - 2;
+	i__2 = *n - 2;
+	slaset_("L", &i__1, &i__2, &zero, &zero, &y[y_dim1 + 3], ldy);
+    }
+
+/*     Compute the DMD of the projected snapshot pairs (X,Y) */
+    i__1 = *n - 1;
+    i__2 = *lwork - minmn;
+    sgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], ldx,
+	     &y[y_offset], ldy, nrnk, tol, k, &reig[1], &imeig[1], &z__[
+	    z_offset], ldz, &res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[
+	    s_offset], lds, &work[minmn + 1], &i__2, &iwork[1], liwork, &
+	    info1);
+    if (info1 == 2 || info1 == 3) {
+/* Return with error code. */
+	*info = info1;
+	return 0;
+    } else {
+	*info = info1;
+    }
+
+/*     The Ritz vectors (Koopman modes) can be explicitly */
+/*     formed or returned in factored form. */
+    if (wntvec) {
+/* Compute the eigenvectors explicitly. */
+	if (*m > minmn) {
+	    i__1 = *m - minmn;
+	    slaset_("A", &i__1, k, &zero, &zero, &z__[minmn + 1 + z_dim1], 
+		    ldz);
+	}
+	i__1 = *lwork - (minmn + *n - 1);
+	sormqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &work[1], &z__[
+		z_offset], ldz, &work[minmn + *n], &i__1, &info1);
+    } else if (wntvcf) {
+/*   Return the Ritz vectors (eigenvectors) in factored */
+/*   form Z*V, where Z contains orthonormal matrix (the */
+/*   product of Q from the initial QR factorization and */
+/*   the SVD/POD_basis returned by SGEDMD in X) and the */
+/*   second factor (the eigenvectors of the Rayleigh */
+/*   quotient) is in the array V, as returned by SGEDMD. */
+	slacpy_("A", n, k, &x[x_offset], ldx, &z__[z_offset], ldz);
+	if (*m > *n) {
+	    i__1 = *m - *n;
+	    slaset_("A", &i__1, k, &zero, &zero, &z__[*n + 1 + z_dim1], ldz);
+	}
+	i__1 = *lwork - (minmn + *n - 1);
+	sormqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &work[1], &z__[
+		z_offset], ldz, &work[minmn + *n], &i__1, &info1);
+    }
+
+/*     Some optional output variables: */
+
+/*     The upper triangular factor in the initial QR */
+/*     factorization is optionally returned in the array Y. */
+/*     This is useful if this call to SGEDMDQ is to be */
+/*     followed by a streaming DMD that is implemented in a */
+/*     QR compressed form. */
+    if (wnttrf) {
+/* Return the upper triangular R in Y */
+	slaset_("A", &minmn, n, &zero, &zero, &y[y_offset], ldy);
+	slacpy_("U", &minmn, n, &f[f_offset], ldf, &y[y_offset], ldy);
+    }
+
+/*     The orthonormal/orthogonal factor in the initial QR */
+/*     factorization is optionally returned in the array F. */
+/*     Same as with the triangular factor above, this is */
+/*     useful in a streaming DMD. */
+    if (wantq) {
+/* Q overwrites F */
+	i__1 = *lwork - (minmn + *n - 1);
+	sorgqr_(m, &minmn, &minmn, &f[f_offset], ldf, &work[1], &work[minmn + 
+		*n], &i__1, &info1);
+    }
+
+    return 0;
+
+} /* sgedmdq_ */
+
diff --git a/lapack-netlib/SRC/zgedmd.c b/lapack-netlib/SRC/zgedmd.c
index 447b23014..c1b39ba3e 100644
--- a/lapack-netlib/SRC/zgedmd.c
+++ b/lapack-netlib/SRC/zgedmd.c
@@ -509,3 +509,1168 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 
 
 
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static integer c__0 = 0;
+
+/* Subroutine */ int zgedmd_(char *jobs, char *jobz, char *jobr, char *jobf, 
+	integer *whtsvd, integer *m, integer *n, doublecomplex *x, integer *
+	ldx, doublecomplex *y, integer *ldy, integer *nrnk, doublereal *tol, 
+	integer *k, doublecomplex *eigs, doublecomplex *z__, integer *ldz, 
+	doublereal *res, doublecomplex *b, integer *ldb, doublecomplex *w, 
+	integer *ldw, doublecomplex *s, integer *lds, doublecomplex *zwork, 
+	integer *lzwork, doublereal *rwork, integer *lrwork, integer *iwork, 
+	integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer x_dim1, x_offset, y_dim1, y_offset, z_dim1, z_offset, b_dim1, 
+	    b_offset, w_dim1, w_offset, s_dim1, s_offset, i__1, i__2, i__3, 
+	    i__4, i__5;
+    doublereal d__1, d__2;
+    doublecomplex z__1, z__2;
+
+    /* Local variables */
+    doublecomplex zone;
+    doublereal zero, ssum;
+    integer info1, info2;
+    doublereal xscl1, xscl2;
+    integer i__, j;
+    doublereal scale;
+    extern logical lsame_(char *, char *);
+    logical badxy;
+    doublereal small;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublecomplex *, doublecomplex *, integer *, 
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
+	    integer *);
+    char jobzl[1];
+    extern /* Subroutine */ int zgeev_(char *, char *, integer *, 
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
+	     doublereal *, integer *);
+    logical wntex;
+    doublecomplex zzero;
+    extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, 
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+    extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_(
+	    char *);
+    extern logical disnan_(doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    char t_or_n__[1];
+    extern /* Subroutine */ int zdscal_(integer *, doublereal *, 
+	    doublecomplex *, integer *), zgesdd_(char *, integer *, integer *,
+	     doublecomplex *, integer *, doublereal *, doublecomplex *, 
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
+	     doublereal *, integer *, integer *), zlascl_(char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *, doublecomplex *, integer *, integer *);
+    extern integer izamax_(integer *, doublecomplex *, integer *);
+    logical sccolx, sccoly;
+    integer lwrsdd, mwrsdd;
+    extern /* Subroutine */ int zgesvd_(char *, char *, integer *, integer *, 
+	    doublecomplex *, integer *, doublereal *, doublecomplex *, 
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
+	     doublereal *, integer *), zlacpy_(char *, 
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     integer *);
+    integer iminwr;
+    logical wntref, wntvec;
+    doublereal rootsc;
+    integer lwrkev, mlwork, mwrkev, numrnk, olwork, lwrsvd, mwrsvd, mlrwrk;
+    logical lquery, wntres;
+    char jsvopt[1];
+    integer lwrsvj, mwrsvj;
+    doublereal rdummy[2];
+    extern /* Subroutine */ int zgejsv_(char *, char *, char *, char *, char *
+	    , char *, integer *, integer *, doublecomplex *, integer *, 
+	    doublereal *, doublecomplex *, integer *, doublecomplex *, 
+	    integer *, doublecomplex *, integer *, doublereal *, integer *, 
+	    integer *, integer *), zlassq_(integer *, doublecomplex *, integer *, 
+	    doublereal *, doublereal *), mecago_();
+    integer lwrsvq, mwrsvq;
+    doublereal ofl, one;
+    extern /* Subroutine */ int zgesvdq_(char *, char *, char *, char *, char 
+	    *, integer *, integer *, doublecomplex *, integer *, doublereal *,
+	     doublecomplex *, integer *, doublecomplex *, integer *, integer *
+	    , integer *, integer *, doublecomplex *, integer *, doublereal *, 
+	    integer *, integer *);
+
+/* March 2023 */
+/* ..... */
+/*      USE                   iso_fortran_env */
+/*      INTEGER, PARAMETER :: WP = real64 */
+/* ..... */
+/*     Scalar arguments */
+/*     Array arguments */
+/* ............................................................ */
+/*     Purpose */
+/*     ======= */
+/*     ZGEDMD computes the Dynamic Mode Decomposition (DMD) for */
+/*     a pair of data snapshot matrices. For the input matrices */
+/*     X and Y such that Y = A*X with an unaccessible matrix */
+/*     A, ZGEDMD computes a certain number of Ritz pairs of A using */
+/*     the standard Rayleigh-Ritz extraction from a subspace of */
+/*     range(X) that is determined using the leading left singular */
+/*     vectors of X. Optionally, ZGEDMD returns the residuals */
+/*     of the computed Ritz pairs, the information needed for */
+/*     a refinement of the Ritz vectors, or the eigenvectors of */
+/*     the Exact DMD. */
+/*     For further details see the references listed */
+/*     below. For more details of the implementation see [3]. */
+
+/*     References */
+/*     ========== */
+/*     [1] P. Schmid: Dynamic mode decomposition of numerical */
+/*         and experimental data, */
+/*         Journal of Fluid Mechanics 656, 5-28, 2010. */
+/*     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */
+/*         decompositions: analysis and enhancements, */
+/*         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */
+/*     [3] Z. Drmac: A LAPACK implementation of the Dynamic */
+/*         Mode Decomposition I. Technical report. AIMDyn Inc. */
+/*         and LAPACK Working Note 298. */
+/*     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */
+/*         Brunton, N. Kutz: On Dynamic Mode Decomposition: */
+/*         Theory and Applications, Journal of Computational */
+/*         Dynamics 1(2), 391 -421, 2014. */
+
+/* ...................................................................... */
+/*     Developed and supported by: */
+/*     =========================== */
+/*     Developed and coded by Zlatko Drmac, Faculty of Science, */
+/*     University of Zagreb;  drmac@math.hr */
+/*     In cooperation with */
+/*     AIMdyn Inc., Santa Barbara, CA. */
+/*     and supported by */
+/*     - DARPA SBIR project "Koopman Operator-Based Forecasting */
+/*     for Nonstationary Processes from Near-Term, Limited */
+/*     Observational Data" Contract No: W31P4Q-21-C-0007 */
+/*     - DARPA PAI project "Physics-Informed Machine Learning */
+/*     Methodologies" Contract No: HR0011-18-9-0033 */
+/*     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */
+/*     Framework for Space-Time Analysis of Process Dynamics" */
+/*     Contract No: HR0011-16-C-0116 */
+/*     Any opinions, findings and conclusions or recommendations */
+/*     expressed in this material are those of the author and */
+/*     do not necessarily reflect the views of the DARPA SBIR */
+/*     Program Office */
+/* ============================================================ */
+/*     Distribution Statement A: */
+/*     Approved for Public Release, Distribution Unlimited. */
+/*     Cleared by DARPA on September 29, 2022 */
+/* ============================================================ */
+/* ............................................................ */
+/*     Arguments */
+/*     ========= */
+/*     JOBS (input) CHARACTER*1 */
+/*     Determines whether the initial data snapshots are scaled */
+/*     by a diagonal matrix. */
+/*     'S' :: The data snapshots matrices X and Y are multiplied */
+/*            with a diagonal matrix D so that X*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'C' :: The snapshots are scaled as with the 'S' option. */
+/*            If it is found that an i-th column of X is zero */
+/*            vector and the corresponding i-th column of Y is */
+/*            non-zero, then the i-th column of Y is set to */
+/*            zero and a warning flag is raised. */
+/*     'Y' :: The data snapshots matrices X and Y are multiplied */
+/*            by a diagonal matrix D so that Y*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'N' :: No data scaling. */
+/* ..... */
+/*     JOBZ (input) CHARACTER*1 */
+/*     Determines whether the eigenvectors (Koopman modes) will */
+/*     be computed. */
+/*     'V' :: The eigenvectors (Koopman modes) will be computed */
+/*            and returned in the matrix Z. */
+/*            See the description of Z. */
+/*     'F' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product X(:,1:K)*W, where X */
+/*            contains a POD basis (leading left singular vectors */
+/*            of the data matrix X) and W contains the eigenvectors */
+/*            of the corresponding Rayleigh quotient. */
+/*            See the descriptions of K, X, W, Z. */
+/*     'N' :: The eigenvectors are not computed. */
+/* ..... */
+/*     JOBR (input) CHARACTER*1 */
+/*     Determines whether to compute the residuals. */
+/*     'R' :: The residuals for the computed eigenpairs will be */
+/*            computed and stored in the array RES. */
+/*            See the description of RES. */
+/*            For this option to be legal, JOBZ must be 'V'. */
+/*     'N' :: The residuals are not computed. */
+/* ..... */
+/*     JOBF (input) CHARACTER*1 */
+/*     Specifies whether to store information needed for post- */
+/*     processing (e.g. computing refined Ritz vectors) */
+/*     'R' :: The matrix needed for the refinement of the Ritz */
+/*            vectors is computed and stored in the array B. */
+/*            See the description of B. */
+/*     'E' :: The unscaled eigenvectors of the Exact DMD are */
+/*            computed and returned in the array B. See the */
+/*            description of B. */
+/*     'N' :: No eigenvector refinement data is computed. */
+/* ..... */
+/*     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */
+/*     Allows for a selection of the SVD algorithm from the */
+/*     LAPACK library. */
+/*     1 :: ZGESVD (the QR SVD algorithm) */
+/*     2 :: ZGESDD (the Divide and Conquer algorithm; if enough */
+/*          workspace available, this is the fastest option) */
+/*     3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4 */
+/*          are the most accurate options) */
+/*     4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3 */
+/*          are the most accurate options) */
+/*     For the four methods above, a significant difference in */
+/*     the accuracy of small singular values is possible if */
+/*     the snapshots vary in norm so that X is severely */
+/*     ill-conditioned. If small (smaller than EPS*||X||) */
+/*     singular values are of interest and JOBS=='N',  then */
+/*     the options (3, 4) give the most accurate results, where */
+/*     the option 4 is slightly better and with stronger */
+/*     theoretical background. */
+/*     If JOBS=='S', i.e. the columns of X will be normalized, */
+/*     then all methods give nearly equally accurate results. */
+/* ..... */
+/*     M (input) INTEGER, M>= 0 */
+/*     The state space dimension (the row dimension of X, Y). */
+/* ..... */
+/*     N (input) INTEGER, 0 <= N <= M */
+/*     The number of data snapshot pairs */
+/*     (the number of columns of X and Y). */
+/* ..... */
+/*     X (input/output) COMPLEX(KIND=WP) M-by-N array */
+/*   > On entry, X contains the data snapshot matrix X. It is */
+/*     assumed that the column norms of X are in the range of */
+/*     the normalized floating point numbers. */
+/*   < On exit, the leading K columns of X contain a POD basis, */
+/*     i.e. the leading K left singular vectors of the input */
+/*     data matrix X, U(:,1:K). All N columns of X contain all */
+/*     left singular vectors of the input matrix X. */
+/*     See the descriptions of K, Z and W. */
+/* ..... */
+/*     LDX (input) INTEGER, LDX >= M */
+/*     The leading dimension of the array X. */
+/* ..... */
+/*     Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array */
+/*   > On entry, Y contains the data snapshot matrix Y */
+/*   < On exit, */
+/*     If JOBR == 'R', the leading K columns of Y  contain */
+/*     the residual vectors for the computed Ritz pairs. */
+/*     See the description of RES. */
+/*     If JOBR == 'N', Y contains the original input data, */
+/*                     scaled according to the value of JOBS. */
+/* ..... */
+/*     LDY (input) INTEGER , LDY >= M */
+/*     The leading dimension of the array Y. */
+/* ..... */
+/*     NRNK (input) INTEGER */
+/*     Determines the mode how to compute the numerical rank, */
+/*     i.e. how to truncate small singular values of the input */
+/*     matrix X. On input, if */
+/*     NRNK = -1 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(1) */
+/*                  This option is recommended. */
+/*     NRNK = -2 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(i-1) */
+/*                  This option is included for R&D purposes. */
+/*                  It requires highly accurate SVD, which */
+/*                  may not be feasible. */
+/*     The numerical rank can be enforced by using positive */
+/*     value of NRNK as follows: */
+/*     0 < NRNK <= N :: at most NRNK largest singular values */
+/*     will be used. If the number of the computed nonzero */
+/*     singular values is less than NRNK, then only those */
+/*     nonzero values will be used and the actually used */
+/*     dimension is less than NRNK. The actual number of */
+/*     the nonzero singular values is returned in the variable */
+/*     K. See the descriptions of TOL and  K. */
+/* ..... */
+/*     TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */
+/*     The tolerance for truncating small singular values. */
+/*     See the description of NRNK. */
+/* ..... */
+/*     K (output) INTEGER,  0 <= K <= N */
+/*     The dimension of the POD basis for the data snapshot */
+/*     matrix X and the number of the computed Ritz pairs. */
+/*     The value of K is determined according to the rule set */
+/*     by the parameters NRNK and TOL. */
+/*     See the descriptions of NRNK and TOL. */
+/* ..... */
+/*     EIGS (output) COMPLEX(KIND=WP) N-by-1 array */
+/*     The leading K (K<=N) entries of EIGS contain */
+/*     the computed eigenvalues (Ritz values). */
+/*     See the descriptions of K, and Z. */
+/* ..... */
+/*     Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array */
+/*     If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i) */
+/*     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. */
+/*     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as */
+/*     the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i) */
+/*     is an eigenvector corresponding to EIGS(i). The columns */
+/*     of W(1:k,1:K) are the computed eigenvectors of the */
+/*     K-by-K Rayleigh quotient. */
+/*     See the descriptions of EIGS, X and W. */
+/* ..... */
+/*     LDZ (input) INTEGER , LDZ >= M */
+/*     The leading dimension of the array Z. */
+/* ..... */
+/*     RES (output) REAL(KIND=WP) N-by-1 array */
+/*     RES(1:K) contains the residuals for the K computed */
+/*     Ritz pairs, */
+/*     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. */
+/*     See the description of EIGS and Z. */
+/* ..... */
+/*     B (output) COMPLEX(KIND=WP)  M-by-N array. */
+/*     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can */
+/*     be used for computing the refined vectors; see further */
+/*     details in the provided references. */
+/*     If JOBF == 'E', B(1:M,1:K) contains */
+/*     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */
+/*     Exact DMD, up to scaling by the inverse eigenvalues. */
+/*     If JOBF =='N', then B is not referenced. */
+/*     See the descriptions of X, W, K. */
+/* ..... */
+/*     LDB (input) INTEGER, LDB >= M */
+/*     The leading dimension of the array B. */
+/* ..... */
+/*     W (workspace/output) COMPLEX(KIND=WP) N-by-N array */
+/*     On exit, W(1:K,1:K) contains the K computed */
+/*     eigenvectors of the matrix Rayleigh quotient. */
+/*     The Ritz vectors (returned in Z) are the */
+/*     product of X (containing a POD basis for the input */
+/*     matrix X) and W. See the descriptions of K, S, X and Z. */
+/*     W is also used as a workspace to temporarily store the */
+/*     right singular vectors of X. */
+/* ..... */
+/*     LDW (input) INTEGER, LDW >= N */
+/*     The leading dimension of the array W. */
+/* ..... */
+/*     S (workspace/output) COMPLEX(KIND=WP) N-by-N array */
+/*     The array S(1:K,1:K) is used for the matrix Rayleigh */
+/*     quotient. This content is overwritten during */
+/*     the eigenvalue decomposition by ZGEEV. */
+/*     See the description of K. */
+/* ..... */
+/*     LDS (input) INTEGER, LDS >= N */
+/*     The leading dimension of the array S. */
+/* ..... */
+/*     ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array */
+/*     ZWORK is used as complex workspace in the complex SVD, as */
+/*     specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing */
+/*     the eigenvalues of a Rayleigh quotient. */
+/*     If the call to ZGEDMD is only workspace query, then */
+/*     ZWORK(1) contains the minimal complex workspace length and */
+/*     ZWORK(2) is the optimal complex workspace length. */
+/*     Hence, the length of work is at least 2. */
+/*     See the description of LZWORK. */
+/* ..... */
+/*     LZWORK (input) INTEGER */
+/*     The minimal length of the workspace vector ZWORK. */
+/*     LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZGEEV), */
+/*     where LZWORK_ZGEEV = MAX( 1, 2*N )  and the minimal */
+/*     LZWORK_SVD is calculated as follows */
+/*     If WHTSVD == 1 :: ZGESVD :: */
+/*        LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N)) */
+/*     If WHTSVD == 2 :: ZGESDD :: */
+/*        LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) */
+/*     If WHTSVD == 3 :: ZGESVDQ :: */
+/*        LZWORK_SVD = obtainable by a query */
+/*     If WHTSVD == 4 :: ZGEJSV :: */
+/*        LZWORK_SVD = obtainable by a query */
+/*     If on entry LZWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths and returns them in */
+/*     LZWORK(1) and LZWORK(2), respectively. */
+/* ..... */
+/*     RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array */
+/*     On exit, RWORK(1:N) contains the singular values of */
+/*     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). */
+/*     If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain */
+/*     scaling factor RWORK(N+2)/RWORK(N+1) used to scale X */
+/*     and Y to avoid overflow in the SVD of X. */
+/*     This may be of interest if the scaling option is off */
+/*     and as many as possible smallest eigenvalues are */
+/*     desired to the highest feasible accuracy. */
+/*     If the call to ZGEDMD is only workspace query, then */
+/*     RWORK(1) contains the minimal workspace length. */
+/*     See the description of LRWORK. */
+/* ..... */
+/*     LRWORK (input) INTEGER */
+/*     The minimal length of the workspace vector RWORK. */
+/*     LRWORK is calculated as follows: */
+/*     LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZGEEV), where */
+/*     LRWORK_ZGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace */
+/*     for the SVD subroutine determined by the input parameter */
+/*     WHTSVD. */
+/*     If WHTSVD == 1 :: ZGESVD :: */
+/*        LRWORK_SVD = 5*MIN(M,N) */
+/*     If WHTSVD == 2 :: ZGESDD :: */
+/*        LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), */
+/*        2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) */
+/*     If WHTSVD == 3 :: ZGESVDQ :: */
+/*        LRWORK_SVD = obtainable by a query */
+/*     If WHTSVD == 4 :: ZGEJSV :: */
+/*        LRWORK_SVD = obtainable by a query */
+/*     If on entry LRWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     real workspace length and returns it in RWORK(1). */
+/* ..... */
+/*     IWORK (workspace/output) INTEGER LIWORK-by-1 array */
+/*     Workspace that is required only if WHTSVD equals */
+/*     2 , 3 or 4. (See the description of WHTSVD). */
+/*     If on entry LWORK =-1 or LIWORK=-1, then the */
+/*     minimal length of IWORK is computed and returned in */
+/*     IWORK(1). See the description of LIWORK. */
+/* ..... */
+/*     LIWORK (input) INTEGER */
+/*     The minimal length of the workspace vector IWORK. */
+/*     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */
+/*     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) */
+/*     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) */
+/*     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) */
+/*     If on entry LIWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for  ZWORK, RWORK and */
+/*     IWORK. See the descriptions of ZWORK, RWORK and IWORK. */
+/* ..... */
+/*     INFO (output) INTEGER */
+/*     -i < 0 :: On entry, the i-th argument had an */
+/*               illegal value */
+/*        = 0 :: Successful return. */
+/*        = 1 :: Void input. Quick exit (M=0 or N=0). */
+/*        = 2 :: The SVD computation of X did not converge. */
+/*               Suggestion: Check the input data and/or */
+/*               repeat with different WHTSVD. */
+/*        = 3 :: The computation of the eigenvalues did not */
+/*               converge. */
+/*        = 4 :: If data scaling was requested on input and */
+/*               the procedure found inconsistency in the data */
+/*               such that for some column index i, */
+/*               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */
+/*               to zero if JOBS=='C'. The computation proceeds */
+/*               with original or modified data and warning */
+/*               flag is set with INFO=4. */
+/* ............................................................. */
+/* ............................................................. */
+/*     Parameters */
+/*     ~~~~~~~~~~ */
+/*     Local scalars */
+/*     ~~~~~~~~~~~~~ */
+
+/*     Local arrays */
+/*     ~~~~~~~~~~~~ */
+/*     External functions (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~ */
+/*     External subroutines (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     Intrinsic functions */
+/*     ~~~~~~~~~~~~~~~~~~~ */
+/* ............................................................ */
+    /* Parameter adjustments */
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --eigs;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --res;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1 * 1;
+    w -= w_offset;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1 * 1;
+    s -= s_offset;
+    --zwork;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    zero = 0.f;
+    one = 1.f;
+    zzero.r = 0.f, zzero.i = 0.f;
+    zone.r = 1.f, zone.i = 0.f;
+
+/*    Test the input arguments */
+
+    wntres = lsame_(jobr, "R");
+    sccolx = lsame_(jobs, "S") || lsame_(jobs, "C");
+    sccoly = lsame_(jobs, "Y");
+    wntvec = lsame_(jobz, "V");
+    wntref = lsame_(jobf, "R");
+    wntex = lsame_(jobf, "E");
+    *info = 0;
+    lquery = *lzwork == -1 || *liwork == -1 || *lrwork == -1;
+
+    if (! (sccolx || sccoly || lsame_(jobs, "N"))) {
+	*info = -1;
+    } else if (! (wntvec || lsame_(jobz, "N") || lsame_(
+	    jobz, "F"))) {
+	*info = -2;
+    } else if (! (wntres || lsame_(jobr, "N")) || 
+	    wntres && ! wntvec) {
+	*info = -3;
+    } else if (! (wntref || wntex || lsame_(jobf, "N")))
+	     {
+	*info = -4;
+    } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == 
+	    4)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (*n < 0 || *n > *m) {
+	*info = -7;
+    } else if (*ldx < *m) {
+	*info = -9;
+    } else if (*ldy < *m) {
+	*info = -11;
+    } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) {
+	*info = -12;
+    } else if (*tol < zero || *tol >= one) {
+	*info = -13;
+    } else if (*ldz < *m) {
+	*info = -17;
+    } else if ((wntref || wntex) && *ldb < *m) {
+	*info = -20;
+    } else if (*ldw < *n) {
+	*info = -22;
+    } else if (*lds < *n) {
+	*info = -24;
+    }
+
+    if (*info == 0) {
+/* Compute the minimal and the optimal workspace */
+/* requirements. Simulate running the code and */
+/* determine minimal and optimal sizes of the */
+/* workspace at any moment of the run. */
+	if (*n == 0) {
+/* Quick return. All output except K is void. */
+/* INFO=1 signals the void input. */
+/* In case of a workspace query, the default */
+/* minimal workspace lengths are returned. */
+	    if (lquery) {
+		iwork[1] = 1;
+		rwork[1] = 1.;
+		zwork[1].r = 2., zwork[1].i = 0.;
+		zwork[2].r = 2., zwork[2].i = 0.;
+	    } else {
+		*k = 0;
+	    }
+	    *info = 1;
+	    return 0;
+	}
+	iminwr = 1;
+	mlrwrk = f2cmax(1,*n);
+	mlwork = 2;
+	olwork = 2;
+/*         SELECT CASE ( WHTSVD ) */
+	if (*whtsvd == 1) {
+/* The following is specified as the minimal */
+/* length of WORK in the definition of ZGESVD: */
+/* MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) */
+/* Computing MAX */
+	    i__1 = 1, i__2 = (f2cmin(*m,*n) << 1) + f2cmax(*m,*n);
+	    mwrsvd = f2cmax(i__1,i__2);
+	    mlwork = f2cmax(mlwork,mwrsvd);
+/* Computing MAX */
+	    i__1 = mlrwrk, i__2 = *n + f2cmin(*m,*n) * 5;
+	    mlrwrk = f2cmax(i__1,i__2);
+	    if (lquery) {
+		zgesvd_("O", "S", m, n, &x[x_offset], ldx, &rwork[1], &b[
+			b_offset], ldb, &w[w_offset], ldw, &zwork[1], &c_n1, 
+			rdummy, &info1);
+		lwrsvd = (integer) zwork[1].r;
+		olwork = f2cmax(olwork,lwrsvd);
+	    }
+	} else if (*whtsvd == 2) {
+/* The following is specified as the minimal */
+/* length of WORK in the definition of ZGESDD: */
+/* MWRSDD = 2*f2cmin(M,N)*f2cmin(M,N)+2*f2cmin(M,N)+f2cmax(M,N). */
+/* RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N) */
+/* In LAPACK 3.10.1 RWORK is defined differently. */
+/* Below we take f2cmax over the two versions. */
+/* IMINWR = 8*MIN(M,N) */
+	    mwrsdd = (f2cmin(*m,*n) << 1) * f2cmin(*m,*n) + (f2cmin(*m,*n) << 1) + f2cmax(
+		    *m,*n);
+	    mlwork = f2cmax(mlwork,mwrsdd);
+	    iminwr = f2cmin(*m,*n) << 3;
+/* Computing MAX */
+/* Computing MAX */
+	    i__3 = f2cmin(*m,*n) * 5 * f2cmin(*m,*n) + f2cmin(*m,*n) * 7, i__4 = f2cmin(*
+		    m,*n) * 5 * f2cmin(*m,*n) + f2cmin(*m,*n) * 5, i__3 = f2cmax(i__3,
+		    i__4), i__4 = (f2cmax(*m,*n) << 1) * f2cmin(*m,*n) + (f2cmin(*m,*n)
+		     << 1) * f2cmin(*m,*n) + f2cmin(*m,*n);
+	    i__1 = mlrwrk, i__2 = *n + f2cmax(i__3,i__4);
+	    mlrwrk = f2cmax(i__1,i__2);
+	    if (lquery) {
+		zgesdd_("O", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset],
+			 ldb, &w[w_offset], ldw, &zwork[1], &c_n1, rdummy, &
+			iwork[1], &info1);
+/* Computing MAX */
+		i__1 = mwrsdd, i__2 = (integer) zwork[1].r;
+		lwrsdd = f2cmax(i__1,i__2);
+/* Possible bug in ZGESDD optimal workspace size. */
+		olwork = f2cmax(olwork,lwrsdd);
+	    }
+	} else if (*whtsvd == 3) {
+	    zgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &rwork[
+		    1], &z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &
+		    iwork[1], &c_n1, &zwork[1], &c_n1, rdummy, &c_n1, &info1);
+	    iminwr = iwork[1];
+	    mwrsvq = (integer) zwork[2].r;
+	    mlwork = f2cmax(mlwork,mwrsvq);
+/* Computing MAX */
+	    i__1 = mlrwrk, i__2 = *n + (integer) rdummy[0];
+	    mlrwrk = f2cmax(i__1,i__2);
+	    if (lquery) {
+		lwrsvq = (integer) zwork[1].r;
+		olwork = f2cmax(olwork,lwrsvq);
+	    }
+	} else if (*whtsvd == 4) {
+	    *(unsigned char *)jsvopt = 'J';
+	    zgejsv_("F", "U", jsvopt, "R", "N", "P", m, n, &x[x_offset], ldx, 
+		    &rwork[1], &z__[z_offset], ldz, &w[w_offset], ldw, &zwork[
+		    1], &c_n1, rdummy, &c_n1, &iwork[1], &info1);
+	    iminwr = iwork[1];
+	    mwrsvj = (integer) zwork[2].r;
+	    mlwork = f2cmax(mlwork,mwrsvj);
+/* Computing MAX */
+/* Computing MAX */
+	    i__3 = 7, i__4 = (integer) rdummy[0];
+	    i__1 = mlrwrk, i__2 = *n + f2cmax(i__3,i__4);
+	    mlrwrk = f2cmax(i__1,i__2);
+	    if (lquery) {
+		lwrsvj = (integer) zwork[1].r;
+		olwork = f2cmax(olwork,lwrsvj);
+	    }
+/*         END SELECT */
+	}
+	if (wntvec || wntex || lsame_(jobz, "F")) {
+	    *(unsigned char *)jobzl = 'V';
+	} else {
+	    *(unsigned char *)jobzl = 'N';
+	}
+/* Workspace calculation to the ZGEEV call */
+/* Computing MAX */
+	i__1 = 1, i__2 = *n << 1;
+	mwrkev = f2cmax(i__1,i__2);
+	mlwork = f2cmax(mlwork,mwrkev);
+/* Computing MAX */
+	i__1 = mlrwrk, i__2 = *n + (*n << 1);
+	mlrwrk = f2cmax(i__1,i__2);
+	if (lquery) {
+	    zgeev_("N", jobzl, n, &s[s_offset], lds, &eigs[1], &w[w_offset], 
+		    ldw, &w[w_offset], ldw, &zwork[1], &c_n1, &rwork[1], &
+		    info1);
+	    lwrkev = (integer) zwork[1].r;
+	    olwork = f2cmax(olwork,lwrkev);
+	}
+
+	if (*liwork < iminwr && ! lquery) {
+	    *info = -30;
+	}
+	if (*lrwork < mlrwrk && ! lquery) {
+	    *info = -28;
+	}
+	if (*lzwork < mlwork && ! lquery) {
+	    *info = -26;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEDMD", &i__1);
+	return 0;
+    } else if (lquery) {
+/*     Return minimal and optimal workspace sizes */
+	iwork[1] = iminwr;
+	rwork[1] = (doublereal) mlrwrk;
+	zwork[1].r = (doublereal) mlwork, zwork[1].i = 0.;
+	zwork[2].r = (doublereal) olwork, zwork[2].i = 0.;
+	return 0;
+    }
+/* ............................................................ */
+
+    ofl = dlamch_("O");
+    small = dlamch_("S");
+    badxy = FALSE_;
+
+/*     <1> Optional scaling of the snapshots (columns of X, Y) */
+/*     ========================================================== */
+    if (sccolx) {
+/* The columns of X will be normalized. */
+/* To prevent overflows, the column norms of X are */
+/* carefully computed using ZLASSQ. */
+	*k = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* WORK(i) = DZNRM2( M, X(1,i), 1 ) */
+	    scale = zero;
+	    zlassq_(m, &x[i__ * x_dim1 + 1], &c__1, &scale, &ssum);
+	    if (disnan_(&scale) || disnan_(&ssum)) {
+		*k = 0;
+		*info = -8;
+		i__2 = -(*info);
+		xerbla_("ZGEDMD", &i__2);
+	    }
+	    if (scale != zero && ssum != zero) {
+		rootsc = sqrt(ssum);
+		if (scale >= ofl / rootsc) {
+/*                 Norm of X(:,i) overflows. First, X(:,i) */
+/*                 is scaled by */
+/*                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. */
+/*                 Next, the norm of X(:,i) is stored without */
+/*                 overflow as RWORK(i) = - SCALE * (ROOTSC/M), */
+/*                 the minus sign indicating the 1/M factor. */
+/*                 Scaling is performed without overflow, and */
+/*                 underflow may occur in the smallest entries */
+/*                 of X(:,i). The relative backward and forward */
+/*                 errors are small in the ell_2 norm. */
+		    d__1 = one / rootsc;
+		    zlascl_("G", &c__0, &c__0, &scale, &d__1, m, &c__1, &x[
+			    i__ * x_dim1 + 1], ldx, &info2);
+		    rwork[i__] = -scale * (rootsc / (doublereal) (*m));
+		} else {
+/*                 X(:,i) will be scaled to unit 2-norm */
+		    rwork[i__] = scale * rootsc;
+		    zlascl_("G", &c__0, &c__0, &rwork[i__], &one, m, &c__1, &
+			    x[i__ * x_dim1 + 1], ldx, &info2);
+/*                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)   ! INTRINSIC */
+/* LAPACK CALL */
+		}
+	    } else {
+		rwork[i__] = zero;
+		++(*k);
+	    }
+	}
+	if (*k == *n) {
+/* All columns of X are zero. Return error code -8. */
+/* (the 8th input variable had an illegal value) */
+	    *k = 0;
+	    *info = -8;
+	    i__1 = -(*info);
+	    xerbla_("ZGEDMD", &i__1);
+	    return 0;
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/*           Now, apply the same scaling to the columns of Y. */
+	    if (rwork[i__] > zero) {
+		d__1 = one / rwork[i__];
+		zdscal_(m, &d__1, &y[i__ * y_dim1 + 1], &c__1);
+/*               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)       ! INTRINSIC */
+/* BLAS CALL */
+	    } else if (rwork[i__] < zero) {
+		d__1 = -rwork[i__];
+		d__2 = one / (doublereal) (*m);
+		zlascl_("G", &c__0, &c__0, &d__1, &d__2, m, &c__1, &y[i__ * 
+			y_dim1 + 1], ldy, &info2);
+/* LAPACK C */
+	    } else if (z_abs(&y[izamax_(m, &y[i__ * y_dim1 + 1], &c__1) + i__ 
+		    * y_dim1]) != zero) {
+/*               X(:,i) is zero vector. For consistency, */
+/*               Y(:,i) should also be zero. If Y(:,i) is not */
+/*               zero, then the data might be inconsistent or */
+/*               corrupted. If JOBS == 'C', Y(:,i) is set to */
+/*               zero and a warning flag is raised. */
+/*               The computation continues but the */
+/*               situation will be reported in the output. */
+		badxy = TRUE_;
+		if (lsame_(jobs, "C")) {
+		    zdscal_(m, &zero, &y[i__ * y_dim1 + 1], &c__1);
+		}
+/* BLAS CALL */
+	    }
+	}
+    }
+
+    if (sccoly) {
+/* The columns of Y will be normalized. */
+/* To prevent overflows, the column norms of Y are */
+/* carefully computed using ZLASSQ. */
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* RWORK(i) = DZNRM2( M, Y(1,i), 1 ) */
+	    scale = zero;
+	    zlassq_(m, &y[i__ * y_dim1 + 1], &c__1, &scale, &ssum);
+	    if (disnan_(&scale) || disnan_(&ssum)) {
+		*k = 0;
+		*info = -10;
+		i__2 = -(*info);
+		xerbla_("ZGEDMD", &i__2);
+	    }
+	    if (scale != zero && ssum != zero) {
+		rootsc = sqrt(ssum);
+		if (scale >= ofl / rootsc) {
+/*                 Norm of Y(:,i) overflows. First, Y(:,i) */
+/*                 is scaled by */
+/*                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. */
+/*                 Next, the norm of Y(:,i) is stored without */
+/*                 overflow as RWORK(i) = - SCALE * (ROOTSC/M), */
+/*                 the minus sign indicating the 1/M factor. */
+/*                 Scaling is performed without overflow, and */
+/*                 underflow may occur in the smallest entries */
+/*                 of Y(:,i). The relative backward and forward */
+/*                 errors are small in the ell_2 norm. */
+		    d__1 = one / rootsc;
+		    zlascl_("G", &c__0, &c__0, &scale, &d__1, m, &c__1, &y[
+			    i__ * y_dim1 + 1], ldy, &info2);
+		    rwork[i__] = -scale * (rootsc / (doublereal) (*m));
+		} else {
+/*                 Y(:,i) will be scaled to unit 2-norm */
+		    rwork[i__] = scale * rootsc;
+		    zlascl_("G", &c__0, &c__0, &rwork[i__], &one, m, &c__1, &
+			    y[i__ * y_dim1 + 1], ldy, &info2);
+/*                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC */
+/* LAPAC */
+		}
+	    } else {
+		rwork[i__] = zero;
+	    }
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/*           Now, apply the same scaling to the columns of X. */
+	    if (rwork[i__] > zero) {
+		d__1 = one / rwork[i__];
+		zdscal_(m, &d__1, &x[i__ * x_dim1 + 1], &c__1);
+/*               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC */
+/* BLAS CALL */
+	    } else if (rwork[i__] < zero) {
+		d__1 = -rwork[i__];
+		d__2 = one / (doublereal) (*m);
+		zlascl_("G", &c__0, &c__0, &d__1, &d__2, m, &c__1, &x[i__ * 
+			x_dim1 + 1], ldx, &info2);
+/* LAPACK C */
+	    } else if (z_abs(&x[izamax_(m, &x[i__ * x_dim1 + 1], &c__1) + i__ 
+		    * x_dim1]) != zero) {
+/*               Y(:,i) is zero vector.  If X(:,i) is not */
+/*               zero, then a warning flag is raised. */
+/*               The computation continues but the */
+/*               situation will be reported in the output. */
+		badxy = TRUE_;
+	    }
+	}
+    }
+
+/*     <2> SVD of the data snapshot matrix X. */
+/*     ===================================== */
+/*     The left singular vectors are stored in the array X. */
+/*     The right singular vectors are in the array W. */
+/*     The array W will later on contain the eigenvectors */
+/*     of a Rayleigh quotient. */
+    numrnk = *n;
+/*      SELECT CASE ( WHTSVD ) */
+    if (*whtsvd == 1) {
+	zgesvd_("O", "S", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], 
+		ldb, &w[w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &
+		info1);
+/* LA */
+	*(unsigned char *)t_or_n__ = 'C';
+    } else if (*whtsvd == 2) {
+	zgesdd_("O", m, n, &x[x_offset], ldx, &rwork[1], &b[b_offset], ldb, &
+		w[w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &iwork[1]
+		, &info1);
+/* LAP */
+	*(unsigned char *)t_or_n__ = 'C';
+    } else if (*whtsvd == 3) {
+	i__1 = *lrwork - *n;
+	zgesvdq_("H", "P", "N", "R", "R", m, n, &x[x_offset], ldx, &rwork[1], 
+		&z__[z_offset], ldz, &w[w_offset], ldw, &numrnk, &iwork[1], 
+		liwork, &zwork[1], lzwork, &rwork[*n + 1], &i__1, &info1);
+/* LAPACK CA */
+	zlacpy_("A", m, &numrnk, &z__[z_offset], ldz, &x[x_offset], ldx);
+/* LAPACK C */
+	*(unsigned char *)t_or_n__ = 'C';
+    } else if (*whtsvd == 4) {
+	i__1 = *lrwork - *n;
+	zgejsv_("F", "U", jsvopt, "R", "N", "P", m, n, &x[x_offset], ldx, &
+		rwork[1], &z__[z_offset], ldz, &w[w_offset], ldw, &zwork[1], 
+		lzwork, &rwork[*n + 1], &i__1, &iwork[1], &info1);
+	zlacpy_("A", m, n, &z__[z_offset], ldz, &x[x_offset], ldx);
+/* LAPACK CALL */
+	*(unsigned char *)t_or_n__ = 'N';
+	xscl1 = rwork[*n + 1];
+	xscl2 = rwork[*n + 2];
+	if (xscl1 != xscl2) {
+/* This is an exceptional situation. If the */
+/* data matrices are not scaled and the */
+/* largest singular value of X overflows. */
+/* In that case ZGEJSV can return the SVD */
+/* in scaled form. The scaling factor can be used */
+/* to rescale the data (X and Y). */
+	    zlascl_("G", &c__0, &c__0, &xscl1, &xscl2, m, n, &y[y_offset], 
+		    ldy, &info2);
+	}
+/*      END SELECT */
+    }
+
+    if (info1 > 0) {
+/* The SVD selected subroutine did not converge. */
+/* Return with an error code. */
+	*info = 2;
+	return 0;
+    }
+
+    if (rwork[1] == zero) {
+/* The largest computed singular value of (scaled) */
+/* X is zero. Return error code -8 */
+/* (the 8th input variable had an illegal value). */
+	*k = 0;
+	*info = -8;
+	i__1 = -(*info);
+	xerbla_("ZGEDMD", &i__1);
+	return 0;
+    }
+
+/* <3> Determine the numerical rank of the data */
+/*    snapshots matrix X. This depends on the */
+/*    parameters NRNK and TOL. */
+/*      SELECT CASE ( NRNK ) */
+    if (*nrnk == -1) {
+	*k = 1;
+	i__1 = numrnk;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    if (rwork[i__] <= rwork[1] * *tol || rwork[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+    } else if (*nrnk == -2) {
+	*k = 1;
+	i__1 = numrnk - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (rwork[i__ + 1] <= rwork[i__] * *tol || rwork[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+    } else {
+	*k = 1;
+	i__1 = *nrnk;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    if (rwork[i__] <= small) {
+		myexit_();
+	    }
+	    ++(*k);
+	}
+/*          END SELECT */
+    }
+/*   Now, U = X(1:M,1:K) is the SVD/POD basis for the */
+/*   snapshot data in the input matrix X. */
+/* <4> Compute the Rayleigh quotient S = U^H * A * U. */
+/*    Depending on the requested outputs, the computation */
+/*    is organized to compute additional auxiliary */
+/*    matrices (for the residuals and refinements). */
+
+/*    In all formulas below, we need V_k*Sigma_k^(-1) */
+/*    where either V_k is in W(1:N,1:K), or V_k^H is in */
+/*    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). */
+    if (lsame_(t_or_n__, "N")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d__1 = one / rwork[i__];
+	    zdscal_(n, &d__1, &w[i__ * w_dim1 + 1], &c__1);
+/* W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC */
+/* BLAS CALL */
+	}
+    } else {
+/* This non-unit stride access is due to the fact */
+/* that ZGESVD, ZGESVDQ and ZGESDD return the */
+/* adjoint matrix of the right singular vectors. */
+/* DO i = 1, K */
+/* CALL ZDSCAL( N, ONE/RWORK(i), W(i,1), LDW )    ! BLAS CALL */
+/* ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC */
+/* END DO */
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    rwork[*n + i__] = one / rwork[i__];
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *k;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * w_dim1;
+		i__4 = *n + i__;
+		z__2.r = rwork[i__4], z__2.i = zero;
+		i__5 = i__ + j * w_dim1;
+		z__1.r = z__2.r * w[i__5].r - z__2.i * w[i__5].i, z__1.i = 
+			z__2.r * w[i__5].i + z__2.i * w[i__5].r;
+		w[i__3].r = z__1.r, w[i__3].i = z__1.i;
+	    }
+	}
+    }
+
+    if (wntref) {
+
+/* Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) */
+/* for computing the refined Ritz vectors */
+/* (optionally, outside ZGEDMD). */
+	zgemm_("N", t_or_n__, m, k, n, &zone, &y[y_offset], ldy, &w[w_offset],
+		 ldw, &zzero, &z__[z_offset], ldz);
+/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! */
+/* Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))                   ! */
+
+/* At this point Z contains */
+/* A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and */
+/* this is needed for computing the residuals. */
+/* This matrix is  returned in the array B and */
+/* it can be used to compute refined Ritz vectors. */
+/* BLA */
+	zlacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb);
+/* B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC */
+/* BLAS CALL */
+	zgemm_("C", "N", k, k, m, &zone, &x[x_offset], ldx, &z__[z_offset], 
+		ldz, &zzero, &s[s_offset], lds);
+/* S(1:K,1:K) = MATMUL(TRANSPOSE(CONJG(X(1:M,1:K))),Z(1:M,1:K)) */
+/* At this point S = U^H * A * U is the Rayleigh quotient. */
+/* BLA */
+    } else {
+/* A * U(:,1:K) is not explicitly needed and the */
+/* computation is organized differently. The Rayleigh */
+/* quotient is computed more efficiently. */
+	zgemm_("C", "N", k, n, m, &zone, &x[x_offset], ldx, &y[y_offset], ldy,
+		 &zzero, &z__[z_offset], ldz);
+/* Z(1:K,1:N) = MATMUL( TRANSPOSE(CONJG(X(1:M,1:K))), Y(1:M,1:N) */
+
+	zgemm_("N", t_or_n__, k, k, n, &zone, &z__[z_offset], ldz, &w[
+		w_offset], ldw, &zzero, &s[s_offset], lds);
+/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! */
+/* S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))                 ! */
+/* At this point S = U^H * A * U is the Rayleigh quotient. */
+/* If the residuals are requested, save scaled V_k into Z. */
+/* Recall that V_k or V_k^H is stored in W. */
+/* BLAS */
+	if (wntres || wntex) {
+	    if (lsame_(t_or_n__, "N")) {
+		zlacpy_("A", n, k, &w[w_offset], ldw, &z__[z_offset], ldz);
+	    } else {
+		zlacpy_("A", k, n, &w[w_offset], ldw, &z__[z_offset], ldz);
+	    }
+	}
+    }
+
+/* <5> Compute the Ritz values and (if requested) the */
+/*   right eigenvectors of the Rayleigh quotient. */
+
+    zgeev_("N", jobzl, k, &s[s_offset], lds, &eigs[1], &w[w_offset], ldw, &w[
+	    w_offset], ldw, &zwork[1], lzwork, &rwork[*n + 1], &info1);
+
+/* W(1:K,1:K) contains the eigenvectors of the Rayleigh */
+/* quotient.  See the description of Z. */
+/* Also, see the description of ZGEEV. */
+/* LAPACK CALL */
+    if (info1 > 0) {
+/* ZGEEV failed to compute the eigenvalues and */
+/* eigenvectors of the Rayleigh quotient. */
+	*info = 3;
+	return 0;
+    }
+
+/* <6> Compute the eigenvectors (if requested) and, */
+/* the residuals (if requested). */
+
+    if (wntvec || wntex) {
+	if (wntres) {
+	    if (wntref) {
+/* Here, if the refinement is requested, we have */
+/* A*U(:,1:K) already computed and stored in Z. */
+/* For the residuals, need Y = A * U(:,1;K) * W. */
+		zgemm_("N", "N", m, k, k, &zone, &z__[z_offset], ldz, &w[
+			w_offset], ldw, &zzero, &y[y_offset], ldy);
+/* Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)        ! INTRINSIC */
+/* This frees Z; Y contains A * U(:,1:K) * W. */
+/* BLAS CALL */
+	    } else {
+/* Compute S = V_k * Sigma_k^(-1) * W, where */
+/* V_k * Sigma_k^(-1) (or its adjoint) is stored in Z */
+		zgemm_(t_or_n__, "N", n, k, k, &zone, &z__[z_offset], ldz, &w[
+			w_offset], ldw, &zzero, &s[s_offset], lds);
+/* Then, compute Z = Y * S = */
+/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */
+/* = A * U(:,1:K) * W(1:K,1:K) */
+		zgemm_("N", "N", m, k, n, &zone, &y[y_offset], ldy, &s[
+			s_offset], lds, &zzero, &z__[z_offset], ldz);
+/* Save a copy of Z into Y and free Z for holding */
+/* the Ritz vectors. */
+		zlacpy_("A", m, k, &z__[z_offset], ldz, &y[y_offset], ldy);
+		if (wntex) {
+		    zlacpy_("A", m, k, &z__[z_offset], ldz, &b[b_offset], ldb);
+		}
+	    }
+	} else if (wntex) {
+/* Compute S = V_k * Sigma_k^(-1) * W, where */
+/* V_k * Sigma_k^(-1) is stored in Z */
+	    zgemm_(t_or_n__, "N", n, k, k, &zone, &z__[z_offset], ldz, &w[
+		    w_offset], ldw, &zzero, &s[s_offset], lds);
+/* Then, compute Z = Y * S = */
+/* = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = */
+/* = A * U(:,1:K) * W(1:K,1:K) */
+	    zgemm_("N", "N", m, k, n, &zone, &y[y_offset], ldy, &s[s_offset], 
+		    lds, &zzero, &b[b_offset], ldb);
+/* The above call replaces the following two calls */
+/* that were used in the developing-testing phase. */
+/* CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & */
+/*           LDS, ZZERO, Z, LDZ) */
+/* Save a copy of Z into B and free Z for holding */
+/* the Ritz vectors. */
+/* CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB ) */
+	}
+
+/* Compute the Ritz vectors */
+	if (wntvec) {
+	    zgemm_("N", "N", m, k, k, &zone, &x[x_offset], ldx, &w[w_offset], 
+		    ldw, &zzero, &z__[z_offset], ldz);
+	}
+/* Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC */
+
+/* BLAS CALL */
+	if (wntres) {
+	    i__1 = *k;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = i__;
+		z__1.r = -eigs[i__2].r, z__1.i = -eigs[i__2].i;
+		zaxpy_(m, &z__1, &z__[i__ * z_dim1 + 1], &c__1, &y[i__ * 
+			y_dim1 + 1], &c__1);
+/* Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTR */
+/* BLAS */
+		res[i__] = dznrm2_(m, &y[i__ * y_dim1 + 1], &c__1);
+/* BLAS */
+	    }
+	}
+    }
+
+    if (*whtsvd == 4) {
+	rwork[*n + 1] = xscl1;
+	rwork[*n + 2] = xscl2;
+    }
+
+/*     Successful exit. */
+    if (! badxy) {
+	*info = 0;
+    } else {
+/* A warning on possible data inconsistency. */
+/* This should be a rare event. */
+	*info = 4;
+    }
+/* ............................................................ */
+    return 0;
+/*     ...... */
+} /* zgedmd_ */
+
diff --git a/lapack-netlib/SRC/zgedmdq.c b/lapack-netlib/SRC/zgedmdq.c
index 447b23014..1815f0814 100644
--- a/lapack-netlib/SRC/zgedmdq.c
+++ b/lapack-netlib/SRC/zgedmdq.c
@@ -509,3 +509,785 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 
 
 
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+
+/* Subroutine */ int zgedmdq_(char *jobs, char *jobz, char *jobr, char *jobq, 
+	char *jobt, char *jobf, integer *whtsvd, integer *m, integer *n, 
+	doublecomplex *f, integer *ldf, doublecomplex *x, integer *ldx, 
+	doublecomplex *y, integer *ldy, integer *nrnk, doublereal *tol, 
+	integer *k, doublecomplex *eigs, doublecomplex *z__, integer *ldz, 
+	doublereal *res, doublecomplex *b, integer *ldb, doublecomplex *v, 
+	integer *ldv, doublecomplex *s, integer *lds, doublecomplex *zwork, 
+	integer *lzwork, doublereal *work, integer *lwork, integer *iwork, 
+	integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer f_dim1, f_offset, x_dim1, x_offset, y_dim1, y_offset, z_dim1, 
+	    z_offset, b_dim1, b_offset, v_dim1, v_offset, s_dim1, s_offset, 
+	    i__1, i__2;
+
+    /* Local variables */
+    doublereal zero;
+    integer info1;
+    extern logical lsame_(char *, char *);
+    char jobvl[1];
+    integer minmn;
+    logical wantq;
+    integer mlwqr, olwqr;
+    logical wntex;
+    doublecomplex zzero;
+    extern /* Subroutine */ int zgedmd_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublecomplex *, integer *, 
+	    doublecomplex *, integer *, integer *, doublereal *, integer *, 
+	    doublecomplex *, doublecomplex *, integer *, doublereal *, 
+	    doublecomplex *, integer *, doublecomplex *, integer *, 
+	    doublecomplex *, integer *, doublecomplex *, integer *, 
+	    doublereal *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *);
+    integer mlwdmd, olwdmd;
+    logical sccolx, sccoly;
+    extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *,
+	     integer *, doublecomplex *, doublecomplex *, integer *, integer *
+	    ), zlacpy_(char *, integer *, integer *, doublecomplex *, integer 
+	    *, doublecomplex *, integer *), zlaset_(char *, integer *,
+	     integer *, doublecomplex *, doublecomplex *, doublecomplex *, 
+	    integer *);
+    integer iminwr;
+    logical wntvec, wntvcf;
+    integer mlwgqr;
+    logical wntref;
+    integer mlwork, olwgqr, olwork, mlrwrk, mlwmqr, olwmqr;
+    logical lquery, wntres, wnttrf, wntvcq;
+    extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, 
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
+	    integer *, integer *), zunmqr_(char *, char *, integer *, integer 
+	    *, integer *, doublecomplex *, integer *, doublecomplex *, 
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
+    doublereal one;
+
+/* March 2023 */
+/* ..... */
+/*      USE                   iso_fortran_env */
+/*      INTEGER, PARAMETER :: WP = real64 */
+/* ..... */
+/*     Scalar arguments */
+/*     Array arguments */
+/* ..... */
+/*     Purpose */
+/*     ======= */
+/*     ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for */
+/*     a pair of data snapshot matrices, using a QR factorization */
+/*     based compression of the data. For the input matrices */
+/*     X and Y such that Y = A*X with an unaccessible matrix */
+/*     A, ZGEDMDQ computes a certain number of Ritz pairs of A using */
+/*     the standard Rayleigh-Ritz extraction from a subspace of */
+/*     range(X) that is determined using the leading left singular */
+/*     vectors of X. Optionally, ZGEDMDQ returns the residuals */
+/*     of the computed Ritz pairs, the information needed for */
+/*     a refinement of the Ritz vectors, or the eigenvectors of */
+/*     the Exact DMD. */
+/*     For further details see the references listed */
+/*     below. For more details of the implementation see [3]. */
+
+/*     References */
+/*     ========== */
+/*     [1] P. Schmid: Dynamic mode decomposition of numerical */
+/*         and experimental data, */
+/*         Journal of Fluid Mechanics 656, 5-28, 2010. */
+/*     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal */
+/*         decompositions: analysis and enhancements, */
+/*         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. */
+/*     [3] Z. Drmac: A LAPACK implementation of the Dynamic */
+/*         Mode Decomposition I. Technical report. AIMDyn Inc. */
+/*         and LAPACK Working Note 298. */
+/*     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. */
+/*         Brunton, N. Kutz: On Dynamic Mode Decomposition: */
+/*         Theory and Applications, Journal of Computational */
+/*         Dynamics 1(2), 391 -421, 2014. */
+
+/*     Developed and supported by: */
+/*     =========================== */
+/*     Developed and coded by Zlatko Drmac, Faculty of Science, */
+/*     University of Zagreb;  drmac@math.hr */
+/*     In cooperation with */
+/*     AIMdyn Inc., Santa Barbara, CA. */
+/*     and supported by */
+/*     - DARPA SBIR project "Koopman Operator-Based Forecasting */
+/*     for Nonstationary Processes from Near-Term, Limited */
+/*     Observational Data" Contract No: W31P4Q-21-C-0007 */
+/*     - DARPA PAI project "Physics-Informed Machine Learning */
+/*     Methodologies" Contract No: HR0011-18-9-0033 */
+/*     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic */
+/*     Framework for Space-Time Analysis of Process Dynamics" */
+/*     Contract No: HR0011-16-C-0116 */
+/*     Any opinions, findings and conclusions or recommendations */
+/*     expressed in this material are those of the author and */
+/*     do not necessarily reflect the views of the DARPA SBIR */
+/*     Program Office. */
+/* ============================================================ */
+/*     Distribution Statement A: */
+/*     Approved for Public Release, Distribution Unlimited. */
+/*     Cleared by DARPA on September 29, 2022 */
+/* ============================================================ */
+/* ...................................................................... */
+/*     Arguments */
+/*     ========= */
+/*     JOBS (input) CHARACTER*1 */
+/*     Determines whether the initial data snapshots are scaled */
+/*     by a diagonal matrix. The data snapshots are the columns */
+/*     of F. The leading N-1 columns of F are denoted X and the */
+/*     trailing N-1 columns are denoted Y. */
+/*     'S' :: The data snapshots matrices X and Y are multiplied */
+/*            with a diagonal matrix D so that X*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'C' :: The snapshots are scaled as with the 'S' option. */
+/*            If it is found that an i-th column of X is zero */
+/*            vector and the corresponding i-th column of Y is */
+/*            non-zero, then the i-th column of Y is set to */
+/*            zero and a warning flag is raised. */
+/*     'Y' :: The data snapshots matrices X and Y are multiplied */
+/*            by a diagonal matrix D so that Y*D has unit */
+/*            nonzero columns (in the Euclidean 2-norm) */
+/*     'N' :: No data scaling. */
+/* ..... */
+/*     JOBZ (input) CHARACTER*1 */
+/*     Determines whether the eigenvectors (Koopman modes) will */
+/*     be computed. */
+/*     'V' :: The eigenvectors (Koopman modes) will be computed */
+/*            and returned in the matrix Z. */
+/*            See the description of Z. */
+/*     'F' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product Z*V, where Z */
+/*            is orthonormal and V contains the eigenvectors */
+/*            of the corresponding Rayleigh quotient. */
+/*            See the descriptions of F, V, Z. */
+/*     'Q' :: The eigenvectors (Koopman modes) will be returned */
+/*            in factored form as the product Q*Z, where Z */
+/*            contains the eigenvectors of the compression of the */
+/*            underlying discretized operator onto the span of */
+/*            the data snapshots. See the descriptions of F, V, Z. */
+/*            Q is from the initial QR factorization. */
+/*     'N' :: The eigenvectors are not computed. */
+/* ..... */
+/*     JOBR (input) CHARACTER*1 */
+/*     Determines whether to compute the residuals. */
+/*     'R' :: The residuals for the computed eigenpairs will */
+/*            be computed and stored in the array RES. */
+/*            See the description of RES. */
+/*            For this option to be legal, JOBZ must be 'V'. */
+/*     'N' :: The residuals are not computed. */
+/* ..... */
+/*     JOBQ (input) CHARACTER*1 */
+/*     Specifies whether to explicitly compute and return the */
+/*     unitary matrix from the QR factorization. */
+/*     'Q' :: The matrix Q of the QR factorization of the data */
+/*            snapshot matrix is computed and stored in the */
+/*            array F. See the description of F. */
+/*     'N' :: The matrix Q is not explicitly computed. */
+/* ..... */
+/*     JOBT (input) CHARACTER*1 */
+/*     Specifies whether to return the upper triangular factor */
+/*     from the QR factorization. */
+/*     'R' :: The matrix R of the QR factorization of the data */
+/*            snapshot matrix F is returned in the array Y. */
+/*            See the description of Y and Further details. */
+/*     'N' :: The matrix R is not returned. */
+/* ..... */
+/*     JOBF (input) CHARACTER*1 */
+/*     Specifies whether to store information needed for post- */
+/*     processing (e.g. computing refined Ritz vectors) */
+/*     'R' :: The matrix needed for the refinement of the Ritz */
+/*            vectors is computed and stored in the array B. */
+/*            See the description of B. */
+/*     'E' :: The unscaled eigenvectors of the Exact DMD are */
+/*            computed and returned in the array B. See the */
+/*            description of B. */
+/*     'N' :: No eigenvector refinement data is computed. */
+/*     To be useful on exit, this option needs JOBQ='Q'. */
+/* ..... */
+/*     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } */
+/*     Allows for a selection of the SVD algorithm from the */
+/*     LAPACK library. */
+/*     1 :: ZGESVD (the QR SVD algorithm) */
+/*     2 :: ZGESDD (the Divide and Conquer algorithm; if enough */
+/*          workspace available, this is the fastest option) */
+/*     3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4 */
+/*          are the most accurate options) */
+/*     4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3 */
+/*          are the most accurate options) */
+/*     For the four methods above, a significant difference in */
+/*     the accuracy of small singular values is possible if */
+/*     the snapshots vary in norm so that X is severely */
+/*     ill-conditioned. If small (smaller than EPS*||X||) */
+/*     singular values are of interest and JOBS=='N',  then */
+/*     the options (3, 4) give the most accurate results, where */
+/*     the option 4 is slightly better and with stronger */
+/*     theoretical background. */
+/*     If JOBS=='S', i.e. the columns of X will be normalized, */
+/*     then all methods give nearly equally accurate results. */
+/* ..... */
+/*     M (input) INTEGER, M >= 0 */
+/*     The state space dimension (the number of rows of F). */
+/* ..... */
+/*     N (input) INTEGER, 0 <= N <= M */
+/*     The number of data snapshots from a single trajectory, */
+/*     taken at equidistant discrete times. This is the */
+/*     number of columns of F. */
+/* ..... */
+/*     F (input/output) COMPLEX(KIND=WP) M-by-N array */
+/*     > On entry, */
+/*     the columns of F are the sequence of data snapshots */
+/*     from a single trajectory, taken at equidistant discrete */
+/*     times. It is assumed that the column norms of F are */
+/*     in the range of the normalized floating point numbers. */
+/*     < On exit, */
+/*     If JOBQ == 'Q', the array F contains the orthogonal */
+/*     matrix/factor of the QR factorization of the initial */
+/*     data snapshots matrix F. See the description of JOBQ. */
+/*     If JOBQ == 'N', the entries in F strictly below the main */
+/*     diagonal contain, column-wise, the information on the */
+/*     Householder vectors, as returned by ZGEQRF. The */
+/*     remaining information to restore the orthogonal matrix */
+/*     of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). */
+/*     See the description of ZWORK. */
+/* ..... */
+/*     LDF (input) INTEGER, LDF >= M */
+/*     The leading dimension of the array F. */
+/* ..... */
+/*     X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array */
+/*     X is used as workspace to hold representations of the */
+/*     leading N-1 snapshots in the orthonormal basis computed */
+/*     in the QR factorization of F. */
+/*     On exit, the leading K columns of X contain the leading */
+/*     K left singular vectors of the above described content */
+/*     of X. To lift them to the space of the left singular */
+/*     vectors U(:,1:K) of the input data, pre-multiply with the */
+/*     Q factor from the initial QR factorization. */
+/*     See the descriptions of F, K, V  and Z. */
+/* ..... */
+/*     LDX (input) INTEGER, LDX >= N */
+/*     The leading dimension of the array X. */
+/* ..... */
+/*     Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array */
+/*     Y is used as workspace to hold representations of the */
+/*     trailing N-1 snapshots in the orthonormal basis computed */
+/*     in the QR factorization of F. */
+/*     On exit, */
+/*     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper */
+/*     triangular factor from the QR factorization of the data */
+/*     snapshot matrix F. */
+/* ..... */
+/*     LDY (input) INTEGER , LDY >= N */
+/*     The leading dimension of the array Y. */
+/* ..... */
+/*     NRNK (input) INTEGER */
+/*     Determines the mode how to compute the numerical rank, */
+/*     i.e. how to truncate small singular values of the input */
+/*     matrix X. On input, if */
+/*     NRNK = -1 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(1) */
+/*                  This option is recommended. */
+/*     NRNK = -2 :: i-th singular value sigma(i) is truncated */
+/*                  if sigma(i) <= TOL*sigma(i-1) */
+/*                  This option is included for R&D purposes. */
+/*                  It requires highly accurate SVD, which */
+/*                  may not be feasible. */
+/*     The numerical rank can be enforced by using positive */
+/*     value of NRNK as follows: */
+/*     0 < NRNK <= N-1 :: at most NRNK largest singular values */
+/*     will be used. If the number of the computed nonzero */
+/*     singular values is less than NRNK, then only those */
+/*     nonzero values will be used and the actually used */
+/*     dimension is less than NRNK. The actual number of */
+/*     the nonzero singular values is returned in the variable */
+/*     K. See the description of K. */
+/* ..... */
+/*     TOL (input) REAL(KIND=WP), 0 <= TOL < 1 */
+/*     The tolerance for truncating small singular values. */
+/*     See the description of NRNK. */
+/* ..... */
+/*     K (output) INTEGER,  0 <= K <= N */
+/*     The dimension of the SVD/POD basis for the leading N-1 */
+/*     data snapshots (columns of F) and the number of the */
+/*     computed Ritz pairs. The value of K is determined */
+/*     according to the rule set by the parameters NRNK and */
+/*     TOL. See the descriptions of NRNK and TOL. */
+/* ..... */
+/*     EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array */
+/*     The leading K (K<=N-1) entries of EIGS contain */
+/*     the computed eigenvalues (Ritz values). */
+/*     See the descriptions of K, and Z. */
+/* ..... */
+/*     Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array */
+/*     If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i) */
+/*     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1. */
+/*     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as */
+/*     Z*V, where Z contains orthonormal matrix (the product of */
+/*     Q from the initial QR factorization and the SVD/POD_basis */
+/*     returned by ZGEDMD in X) and the second factor (the */
+/*     eigenvectors of the Rayleigh quotient) is in the array V, */
+/*     as returned by ZGEDMD. That is,  X(:,1:K)*V(:,i) */
+/*     is an eigenvector corresponding to EIGS(i). The columns */
+/*     of V(1:K,1:K) are the computed eigenvectors of the */
+/*     K-by-K Rayleigh quotient. */
+/*     See the descriptions of EIGS, X and V. */
+/* ..... */
+/*     LDZ (input) INTEGER , LDZ >= M */
+/*     The leading dimension of the array Z. */
+/* ..... */
+/*     RES (output) REAL(KIND=WP) (N-1)-by-1 array */
+/*     RES(1:K) contains the residuals for the K computed */
+/*     Ritz pairs, */
+/*     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. */
+/*     See the description of EIGS and Z. */
+/* ..... */
+/*     B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array. */
+/*     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can */
+/*     be used for computing the refined vectors; see further */
+/*     details in the provided references. */
+/*     If JOBF == 'E', B(1:N,1;K) contains */
+/*     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the */
+/*     Exact DMD, up to scaling by the inverse eigenvalues. */
+/*     In both cases, the content of B can be lifted to the */
+/*     original dimension of the input data by pre-multiplying */
+/*     with the Q factor from the initial QR factorization. */
+/*     Here A denotes a compression of the underlying operator. */
+/*     See the descriptions of F and X. */
+/*     If JOBF =='N', then B is not referenced. */
+/* ..... */
+/*     LDB (input) INTEGER, LDB >= MIN(M,N) */
+/*     The leading dimension of the array B. */
+/* ..... */
+/*     V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array */
+/*     On exit, V(1:K,1:K) V contains the K eigenvectors of */
+/*     the Rayleigh quotient. The Ritz vectors */
+/*     (returned in Z) are the product of Q from the initial QR */
+/*     factorization (see the description of F) X (see the */
+/*     description of X) and V. */
+/* ..... */
+/*     LDV (input) INTEGER, LDV >= N-1 */
+/*     The leading dimension of the array V. */
+/* ..... */
+/*     S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array */
+/*     The array S(1:K,1:K) is used for the matrix Rayleigh */
+/*     quotient. This content is overwritten during */
+/*     the eigenvalue decomposition by ZGEEV. */
+/*     See the description of K. */
+/* ..... */
+/*     LDS (input) INTEGER, LDS >= N-1 */
+/*     The leading dimension of the array S. */
+/* ..... */
+/*     ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array */
+/*     On exit, */
+/*     ZWORK(1:MIN(M,N)) contains the scalar factors of the */
+/*     elementary reflectors as returned by ZGEQRF of the */
+/*     M-by-N input matrix F. */
+/*     If the call to ZGEDMDQ is only workspace query, then */
+/*     ZWORK(1) contains the minimal complex workspace length and */
+/*     ZWORK(2) is the optimal complex workspace length. */
+/*     Hence, the length of work is at least 2. */
+/*     See the description of LZWORK. */
+/* ..... */
+/*     LZWORK (input) INTEGER */
+/*     The minimal length of the  workspace vector ZWORK. */
+/*     LZWORK is calculated as follows: */
+/*     Let MLWQR  = N (minimal workspace for ZGEQRF[M,N]) */
+/*         MLWDMD = minimal workspace for ZGEDMD (see the */
+/*                  description of LWORK in ZGEDMD) */
+/*         MLWMQR = N (minimal workspace for */
+/*                    ZUNMQR['L','N',M,N,N]) */
+/*         MLWGQR = N (minimal workspace for ZUNGQR[M,N,N]) */
+/*         MINMN  = MIN(M,N) */
+/*     Then */
+/*     LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD) */
+/*     is further updated as follows: */
+/*        if   JOBZ == 'V' or JOBZ == 'F' THEN */
+/*             LZWORK = MAX(LZWORK, MINMN+MLWMQR) */
+/*        if   JOBQ == 'Q' THEN */
+/*             LZWORK = MAX(ZLWORK, MINMN+MLWGQR) */
+
+/* ..... */
+/*     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array */
+/*     On exit, */
+/*     WORK(1:N-1) contains the singular values of */
+/*     the input submatrix F(1:M,1:N-1). */
+/*     If the call to ZGEDMDQ is only workspace query, then */
+/*     WORK(1) contains the minimal workspace length and */
+/*     WORK(2) is the optimal workspace length. hence, the */
+/*     length of work is at least 2. */
+/*     See the description of LWORK. */
+/* ..... */
+/*     LWORK (input) INTEGER */
+/*     The minimal length of the  workspace vector WORK. */
+/*     LWORK is the same as in ZGEDMD, because in ZGEDMDQ */
+/*     only ZGEDMD requires real workspace for snapshots */
+/*     of dimensions MIN(M,N)-by-(N-1). */
+/*     If on entry LWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace length for WORK. */
+/* ..... */
+/*     IWORK (workspace/output) INTEGER LIWORK-by-1 array */
+/*     Workspace that is required only if WHTSVD equals */
+/*     2 , 3 or 4. (See the description of WHTSVD). */
+/*     If on entry LWORK =-1 or LIWORK=-1, then the */
+/*     minimal length of IWORK is computed and returned in */
+/*     IWORK(1). See the description of LIWORK. */
+/* ..... */
+/*     LIWORK (input) INTEGER */
+/*     The minimal length of the workspace vector IWORK. */
+/*     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 */
+/*     Let M1=MIN(M,N), N1=N-1. Then */
+/*     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1)) */
+/*     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1) */
+/*     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1) */
+/*     If on entry LIWORK = -1, then a workspace query is */
+/*     assumed and the procedure only computes the minimal */
+/*     and the optimal workspace lengths for both WORK and */
+/*     IWORK. See the descriptions of WORK and IWORK. */
+/* ..... */
+/*     INFO (output) INTEGER */
+/*     -i < 0 :: On entry, the i-th argument had an */
+/*               illegal value */
+/*        = 0 :: Successful return. */
+/*        = 1 :: Void input. Quick exit (M=0 or N=0). */
+/*        = 2 :: The SVD computation of X did not converge. */
+/*               Suggestion: Check the input data and/or */
+/*               repeat with different WHTSVD. */
+/*        = 3 :: The computation of the eigenvalues did not */
+/*               converge. */
+/*        = 4 :: If data scaling was requested on input and */
+/*               the procedure found inconsistency in the data */
+/*               such that for some column index i, */
+/*               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set */
+/*               to zero if JOBS=='C'. The computation proceeds */
+/*               with original or modified data and warning */
+/*               flag is set with INFO=4. */
+/* ............................................................. */
+/* ............................................................. */
+/*     Parameters */
+/*     ~~~~~~~~~~ */
+/*     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP ) */
+
+/*     Local scalars */
+/*     ~~~~~~~~~~~~~ */
+
+/*     External functions (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~ */
+
+/*     External subroutines (BLAS and LAPACK) */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     External subroutines */
+/*     ~~~~~~~~~~~~~~~~~~~~ */
+/*     Intrinsic functions */
+/*     ~~~~~~~~~~~~~~~~~~~ */
+/* .......................................................... */
+    /* Parameter adjustments */
+    f_dim1 = *ldf;
+    f_offset = 1 + f_dim1 * 1;
+    f -= f_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --eigs;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --res;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1 * 1;
+    s -= s_offset;
+    --zwork;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    one = 1.f;
+    zero = 0.f;
+    zzero.r = 0.f, zzero.i = 0.f;
+
+/*    Test the input arguments */
+    wntres = lsame_(jobr, "R");
+    sccolx = lsame_(jobs, "S") || lsame_(jobs, "C");
+    sccoly = lsame_(jobs, "Y");
+    wntvec = lsame_(jobz, "V");
+    wntvcf = lsame_(jobz, "F");
+    wntvcq = lsame_(jobz, "Q");
+    wntref = lsame_(jobf, "R");
+    wntex = lsame_(jobf, "E");
+    wantq = lsame_(jobq, "Q");
+    wnttrf = lsame_(jobt, "R");
+    minmn = f2cmin(*m,*n);
+    *info = 0;
+    lquery = *lzwork == -1 || *lwork == -1 || *liwork == -1;
+
+    if (! (sccolx || sccoly || lsame_(jobs, "N"))) {
+	*info = -1;
+    } else if (! (wntvec || wntvcf || wntvcq || lsame_(jobz, "N"))) {
+	*info = -2;
+    } else if (! (wntres || lsame_(jobr, "N")) || 
+	    wntres && lsame_(jobz, "N")) {
+	*info = -3;
+    } else if (! (wantq || lsame_(jobq, "N"))) {
+	*info = -4;
+    } else if (! (wnttrf || lsame_(jobt, "N"))) {
+	*info = -5;
+    } else if (! (wntref || wntex || lsame_(jobf, "N")))
+	     {
+	*info = -6;
+    } else if (! (*whtsvd == 1 || *whtsvd == 2 || *whtsvd == 3 || *whtsvd == 
+	    4)) {
+	*info = -7;
+    } else if (*m < 0) {
+	*info = -8;
+    } else if (*n < 0 || *n > *m + 1) {
+	*info = -9;
+    } else if (*ldf < *m) {
+	*info = -11;
+    } else if (*ldx < minmn) {
+	*info = -13;
+    } else if (*ldy < minmn) {
+	*info = -15;
+    } else if (! (*nrnk == -2 || *nrnk == -1 || *nrnk >= 1 && *nrnk <= *n)) {
+	*info = -16;
+    } else if (*tol < zero || *tol >= one) {
+	*info = -17;
+    } else if (*ldz < *m) {
+	*info = -21;
+    } else if ((wntref || wntex) && *ldb < minmn) {
+	*info = -24;
+    } else if (*ldv < *n - 1) {
+	*info = -26;
+    } else if (*lds < *n - 1) {
+	*info = -28;
+    }
+
+    if (wntvec || wntvcf || wntvcq) {
+	*(unsigned char *)jobvl = 'V';
+    } else {
+	*(unsigned char *)jobvl = 'N';
+    }
+    if (*info == 0) {
+/* Compute the minimal and the optimal workspace */
+/* requirements. Simulate running the code and */
+/* determine minimal and optimal sizes of the */
+/* workspace at any moment of the run. */
+	if (*n == 0 || *n == 1) {
+/* All output except K is void. INFO=1 signals */
+/* the void input. In case of a workspace query, */
+/* the minimal workspace lengths are returned. */
+	    if (lquery) {
+		iwork[1] = 1;
+		zwork[1].r = 2., zwork[1].i = 0.;
+		zwork[2].r = 2., zwork[2].i = 0.;
+		work[1] = 2.;
+		work[2] = 2.;
+	    } else {
+		*k = 0;
+	    }
+	    *info = 1;
+	    return 0;
+	}
+	mlrwrk = 2;
+	mlwork = 2;
+	olwork = 2;
+	iminwr = 1;
+	mlwqr = f2cmax(1,*n);
+/* Minimal workspace length for ZGEQRF. */
+/* Computing MAX */
+	i__1 = mlwork, i__2 = minmn + mlwqr;
+	mlwork = f2cmax(i__1,i__2);
+	if (lquery) {
+	    zgeqrf_(m, n, &f[f_offset], ldf, &zwork[1], &zwork[1], &c_n1, &
+		    info1);
+	    olwqr = (integer) zwork[1].r;
+/* Computing MAX */
+	    i__1 = olwork, i__2 = minmn + olwqr;
+	    olwork = f2cmax(i__1,i__2);
+	}
+	i__1 = *n - 1;
+	zgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], 
+		ldx, &y[y_offset], ldy, nrnk, tol, k, &eigs[1], &z__[z_offset]
+		, ldz, &res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[
+		s_offset], lds, &zwork[1], &c_n1, &work[1], &c_n1, &iwork[1], 
+		&c_n1, &info1);
+	mlwdmd = (integer) zwork[1].r;
+/* Computing MAX */
+	i__1 = mlwork, i__2 = minmn + mlwdmd;
+	mlwork = f2cmax(i__1,i__2);
+/* Computing MAX */
+	i__1 = mlrwrk, i__2 = (integer) work[1];
+	mlrwrk = f2cmax(i__1,i__2);
+	iminwr = f2cmax(iminwr,iwork[1]);
+	if (lquery) {
+	    olwdmd = (integer) zwork[2].r;
+/* Computing MAX */
+	    i__1 = olwork, i__2 = minmn + olwdmd;
+	    olwork = f2cmax(i__1,i__2);
+	}
+	if (wntvec || wntvcf) {
+	    mlwmqr = f2cmax(1,*n);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = minmn + mlwmqr;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		zunmqr_("L", "N", m, n, &minmn, &f[f_offset], ldf, &zwork[1], 
+			&z__[z_offset], ldz, &zwork[1], &c_n1, &info1);
+		olwmqr = (integer) zwork[1].r;
+/* Computing MAX */
+		i__1 = olwork, i__2 = minmn + olwmqr;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+	if (wantq) {
+	    mlwgqr = f2cmax(1,*n);
+/* Computing MAX */
+	    i__1 = mlwork, i__2 = minmn + mlwgqr;
+	    mlwork = f2cmax(i__1,i__2);
+	    if (lquery) {
+		zungqr_(m, &minmn, &minmn, &f[f_offset], ldf, &zwork[1], &
+			zwork[1], &c_n1, &info1);
+		olwgqr = (integer) zwork[1].r;
+/* Computing MAX */
+		i__1 = olwork, i__2 = minmn + olwgqr;
+		olwork = f2cmax(i__1,i__2);
+	    }
+	}
+	if (*liwork < iminwr && ! lquery) {
+	    *info = -34;
+	}
+	if (*lwork < mlrwrk && ! lquery) {
+	    *info = -32;
+	}
+	if (*lzwork < mlwork && ! lquery) {
+	    *info = -30;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEDMDQ", &i__1);
+	return 0;
+    } else if (lquery) {
+/*     Return minimal and optimal workspace sizes */
+	iwork[1] = iminwr;
+	zwork[1].r = (doublereal) mlwork, zwork[1].i = 0.;
+	zwork[2].r = (doublereal) olwork, zwork[2].i = 0.;
+	work[1] = (doublereal) mlrwrk;
+	work[2] = (doublereal) mlrwrk;
+	return 0;
+    }
+/* ..... */
+/*     Initial QR factorization that is used to represent the */
+/*     snapshots as elements of lower dimensional subspace. */
+/*     For large scale computation with M >> N, at this place */
+/*     one can use an out of core QRF. */
+
+    i__1 = *lzwork - minmn;
+    zgeqrf_(m, n, &f[f_offset], ldf, &zwork[1], &zwork[minmn + 1], &i__1, &
+	    info1);
+
+/*     Define X and Y as the snapshots representations in the */
+/*     orthogonal basis computed in the QR factorization. */
+/*     X corresponds to the leading N-1 and Y to the trailing */
+/*     N-1 snapshots. */
+    i__1 = *n - 1;
+    zlaset_("L", &minmn, &i__1, &zzero, &zzero, &x[x_offset], ldx);
+    i__1 = *n - 1;
+    zlacpy_("U", &minmn, &i__1, &f[f_offset], ldf, &x[x_offset], ldx);
+    i__1 = *n - 1;
+    zlacpy_("A", &minmn, &i__1, &f[(f_dim1 << 1) + 1], ldf, &y[y_offset], ldy);
+    if (*m >= 3) {
+	i__1 = minmn - 2;
+	i__2 = *n - 2;
+	zlaset_("L", &i__1, &i__2, &zzero, &zzero, &y[y_dim1 + 3], ldy);
+    }
+
+/*     Compute the DMD of the projected snapshot pairs (X,Y) */
+    i__1 = *n - 1;
+    i__2 = *lzwork - minmn;
+    zgedmd_(jobs, jobvl, jobr, jobf, whtsvd, &minmn, &i__1, &x[x_offset], ldx,
+	     &y[y_offset], ldy, nrnk, tol, k, &eigs[1], &z__[z_offset], ldz, &
+	    res[1], &b[b_offset], ldb, &v[v_offset], ldv, &s[s_offset], lds, &
+	    zwork[minmn + 1], &i__2, &work[1], lwork, &iwork[1], liwork, &
+	    info1);
+    if (info1 == 2 || info1 == 3) {
+/* Return with error code. See ZGEDMD for details. */
+	*info = info1;
+	return 0;
+    } else {
+	*info = info1;
+    }
+
+/*     The Ritz vectors (Koopman modes) can be explicitly */
+/*     formed or returned in factored form. */
+    if (wntvec) {
+/* Compute the eigenvectors explicitly. */
+	if (*m > minmn) {
+	    i__1 = *m - minmn;
+	    zlaset_("A", &i__1, k, &zzero, &zzero, &z__[minmn + 1 + z_dim1], 
+		    ldz);
+	}
+	i__1 = *lzwork - minmn;
+	zunmqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &zwork[1], &z__[
+		z_offset], ldz, &zwork[minmn + 1], &i__1, &info1);
+    } else if (wntvcf) {
+/*   Return the Ritz vectors (eigenvectors) in factored */
+/*   form Z*V, where Z contains orthonormal matrix (the */
+/*   product of Q from the initial QR factorization and */
+/*   the SVD/POD_basis returned by ZGEDMD in X) and the */
+/*   second factor (the eigenvectors of the Rayleigh */
+/*   quotient) is in the array V, as returned by ZGEDMD. */
+	zlacpy_("A", n, k, &x[x_offset], ldx, &z__[z_offset], ldz);
+	if (*m > *n) {
+	    i__1 = *m - *n;
+	    zlaset_("A", &i__1, k, &zzero, &zzero, &z__[*n + 1 + z_dim1], ldz);
+	}
+	i__1 = *lzwork - minmn;
+	zunmqr_("L", "N", m, k, &minmn, &f[f_offset], ldf, &zwork[1], &z__[
+		z_offset], ldz, &zwork[minmn + 1], &i__1, &info1);
+    }
+
+/*     Some optional output variables: */
+
+/*     The upper triangular factor R in the initial QR */
+/*     factorization is optionally returned in the array Y. */
+/*     This is useful if this call to ZGEDMDQ is to be */
+/*     followed by a streaming DMD that is implemented in a */
+/*     QR compressed form. */
+    if (wnttrf) {
+/* Return the upper triangular R in Y */
+	zlaset_("A", &minmn, n, &zzero, &zzero, &y[y_offset], ldy);
+	zlacpy_("U", &minmn, n, &f[f_offset], ldf, &y[y_offset], ldy);
+    }
+
+/*     The orthonormal/unitary factor Q in the initial QR */
+/*     factorization is optionally returned in the array F. */
+/*     Same as with the triangular factor above, this is */
+/*     useful in a streaming DMD. */
+    if (wantq) {
+/* Q overwrites F */
+	i__1 = *lzwork - minmn;
+	zungqr_(m, &minmn, &minmn, &f[f_offset], ldf, &zwork[1], &zwork[minmn 
+		+ 1], &i__1, &info1);
+    }
+
+    return 0;
+
+} /* zgedmdq_ */
+

From fa6d06359a28ddba5996d9e25120d9907d83a1dd Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 21 Jun 2023 17:17:31 +0200
Subject: [PATCH 14/15] correct list placement of zgedmd/zgedmdq

---
 cmake/lapack.cmake | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake
index 12127531d..d339f0ce9 100644
--- a/cmake/lapack.cmake
+++ b/cmake/lapack.cmake
@@ -915,7 +915,8 @@ set(ZLASRC
    zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c
    zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c
    zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c
-   zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c zgelst.c)
+   zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c zgelst.c
+   zgedmd.c zgedmdq.c)
 
 set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
    zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c
@@ -925,7 +926,7 @@ set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
    zla_gbrfsx_extended.c zla_gbamv.c zla_gbrcond_c.c zla_gbrcond_x.c
    zla_gbrpvgrw.c zhesvxx.c zherfsx.c zla_herfsx_extended.c
    zla_heamv.c zla_hercond_c.c zla_hercond_x.c zla_herpvgrw.c
-   zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c zgedmd.c zgedmdq.c)
+   zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c)
 
 
 if(USE_XBLAS)

From 42fd3f4ec7d091519eafa60e4e08849aa19af98d Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 21 Jun 2023 22:52:31 +0200
Subject: [PATCH 15/15] Add standard module path for Windows flang

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 16b9da4f5..65ef538e9 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -115,7 +115,7 @@ jobs:
       mkdir build 
       cd build
       call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
-      cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
+      cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER="flang -I C:\Miniconda\Library\include\flang" -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
       cmake --build . --config Release
       ctest