From 79a0785334ef3886b98d84bbc33b8289f74f5290 Mon Sep 17 00:00:00 2001
From: Kyle Guinn <elyk03@gmail.com>
Date: Tue, 16 Jan 2024 21:46:01 -0600
Subject: [PATCH 1/4] Normalize line endings

sed -i 's/\s\+$//' SRC/*dmd* TESTING/EIG/*dmd*
Plus manually stripping empty lines at EOF.
---
 SRC/cgedmd.f90          | 2301 +++++++++++++++++++------------------
 SRC/cgedmdq.f90         | 1705 ++++++++++++++-------------
 SRC/dgedmd.f90          | 2412 +++++++++++++++++++--------------------
 SRC/dgedmdq.f90         | 1727 ++++++++++++++--------------
 SRC/sgedmd.f90          | 2411 +++++++++++++++++++-------------------
 SRC/sgedmdq.f90         | 1725 ++++++++++++++--------------
 SRC/zgedmd.f90          | 2295 +++++++++++++++++++------------------
 SRC/zgedmdq.f90         | 1703 ++++++++++++++-------------
 TESTING/EIG/cchkdmd.f90 | 1442 +++++++++++------------
 TESTING/EIG/dchkdmd.f90 | 1626 +++++++++++++-------------
 TESTING/EIG/schkdmd.f90 | 1584 ++++++++++++-------------
 TESTING/EIG/zchkdmd.f90 | 1490 ++++++++++++------------
 12 files changed, 11207 insertions(+), 11214 deletions(-)

diff --git a/SRC/cgedmd.f90 b/SRC/cgedmd.f90
index 1413130ec3..87e92eeb4c 100644
--- a/SRC/cgedmd.f90
+++ b/SRC/cgedmd.f90
@@ -1,1151 +1,1150 @@
-!> \brief \b CGEDMD computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
-!
-!  =========== DOCUMENTATION ===========
-!
-!  Definition:
-!  ===========
-!
-!     SUBROUTINE CGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
-!                        M, N, X, LDX, Y, LDY, NRNK, TOL,   &
-!                        K, EIGS, Z, LDZ, RES, B,    LDB,   &
-!                        W, LDW,  S, LDS, ZWORK,  LZWORK,   &
-!                        RWORK, LRWORK, IWORK, LIWORK, INFO )
-!.....
-!     USE                   iso_fortran_env
-!     IMPLICIT NONE
-!     INTEGER, PARAMETER :: WP = real32
-!
-!.....
-!     Scalar arguments
-!     CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
-!     INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
-!                                NRNK, LDZ, LDB, LDW,  LDS, &
-!                                LIWORK, LRWORK, LZWORK
-!     INTEGER,       INTENT(OUT)  :: K, INFO
-!     REAL(KIND=WP), INTENT(IN)   ::    TOL
-!     Array arguments
-!     COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
-!                                        W(LDW,*), S(LDS,*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
-!     REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
-!     REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
-!     INTEGER,          INTENT(OUT)   :: IWORK(*)
-!
-!............................................................
-!>    \par Purpose:
-!     =============
-!>    \verbatim
-!>    CGEDMD computes the Dynamic Mode Decomposition (DMD) for
-!>    a pair of data snapshot matrices. For the input matrices
-!>    X and Y such that Y = A*X with an unaccessible matrix
-!>    A, CGEDMD computes a certain number of Ritz pairs of A using
-!>    the standard Rayleigh-Ritz extraction from a subspace of
-!>    range(X) that is determined using the leading left singular
-!>    vectors of X. Optionally, CGEDMD returns the residuals
-!>    of the computed Ritz pairs, the information needed for
-!>    a refinement of the Ritz vectors, or the eigenvectors of
-!>    the Exact DMD.
-!>    For further details see the references listed
-!>    below. For more details of the implementation see [3].
-!>    \endverbatim
-!............................................................
-!>    \par References:
-!     ================
-!>    \verbatim
-!>    [1] P. Schmid: Dynamic mode decomposition of numerical
-!>        and experimental data,
-!>        Journal of Fluid Mechanics 656, 5-28, 2010.
-!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
-!>        decompositions: analysis and enhancements,
-!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
-!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
-!>        Mode Decomposition I. Technical report. AIMDyn Inc.
-!>        and LAPACK Working Note 298.
-!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
-!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
-!>        Theory and Applications, Journal of Computational
-!>        Dynamics 1(2), 391 -421, 2014.
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Developed and coded by Zlatko Drmac, Faculty of Science,
-!>    University of Zagreb;  drmac@math.hr
-!>    In cooperation with
-!>    AIMdyn Inc., Santa Barbara, CA.
-!>    and supported by
-!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
-!>    for Nonstationary Processes from Near-Term, Limited
-!>    Observational Data" Contract No: W31P4Q-21-C-0007
-!>    - DARPA PAI project "Physics-Informed Machine Learning
-!>    Methodologies" Contract No: HR0011-18-9-0033
-!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
-!>    Framework for Space-Time Analysis of Process Dynamics"
-!>    Contract No: HR0011-16-C-0116
-!>    Any opinions, findings and conclusions or recommendations
-!>    expressed in this material are those of the author and
-!>    do not necessarily reflect the views of the DARPA SBIR
-!>    Program Office
-!>    \endverbatim
-!......................................................................
-!>    \par Distribution Statement A:
-!     ==============================
-!>    \verbatim
-!>    Approved for Public Release, Distribution Unlimited.
-!>    Cleared by DARPA on September 29, 2022
-!>    \endverbatim
-!......................................................................
-!     Arguments
-!     =========
-!
-!>    \param[in] JOBS
-!>    \verbatim
-!>    JOBS (input) CHARACTER*1
-!>    Determines whether the initial data snapshots are scaled
-!>    by a diagonal matrix.
-!>    'S' :: The data snapshots matrices X and Y are multiplied
-!>           with a diagonal matrix D so that X*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'C' :: The snapshots are scaled as with the 'S' option.
-!>           If it is found that an i-th column of X is zero
-!>           vector and the corresponding i-th column of Y is
-!>           non-zero, then the i-th column of Y is set to
-!>           zero and a warning flag is raised.
-!>    'Y' :: The data snapshots matrices X and Y are multiplied
-!>           by a diagonal matrix D so that Y*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'N' :: No data scaling.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBZ
-!>    \verbatim
-!>    JOBZ (input) CHARACTER*1
-!>    Determines whether the eigenvectors (Koopman modes) will
-!>    be computed.
-!>    'V' :: The eigenvectors (Koopman modes) will be computed
-!>           and returned in the matrix Z.
-!>           See the description of Z.
-!>    'F' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product X(:,1:K)*W, where X
-!>           contains a POD basis (leading left singular vectors
-!>           of the data matrix X) and W contains the eigenvectors
-!>           of the corresponding Rayleigh quotient.
-!>           See the descriptions of K, X, W, Z.
-!>    'N' :: The eigenvectors are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBR
-!>    \verbatim
-!>    JOBR (input) CHARACTER*1
-!>    Determines whether to compute the residuals.
-!>    'R' :: The residuals for the computed eigenpairs will be
-!>           computed and stored in the array RES.
-!>           See the description of RES.
-!>           For this option to be legal, JOBZ must be 'V'.
-!>    'N' :: The residuals are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBF
-!>    \verbatim
-!>    JOBF (input) CHARACTER*1
-!>    Specifies whether to store information needed for post-
-!>    processing (e.g. computing refined Ritz vectors)
-!>    'R' :: The matrix needed for the refinement of the Ritz
-!>           vectors is computed and stored in the array B.
-!>           See the description of B.
-!>    'E' :: The unscaled eigenvectors of the Exact DMD are
-!>           computed and returned in the array B. See the
-!>           description of B.
-!>    'N' :: No eigenvector refinement data is computed.
-!>    \endverbatim
-!.....
-!>    \param[in] WHTSVD
-!>    \verbatim
-!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
-!>    Allows for a selection of the SVD algorithm from the
-!>    LAPACK library.
-!>    1 :: CGESVD (the QR SVD algorithm)
-!>    2 :: CGESDD (the Divide and Conquer algorithm; if enough
-!>         workspace available, this is the fastest option)
-!>    3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
-!>         are the most accurate options)
-!>    4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
-!>         are the most accurate options)
-!>    For the four methods above, a significant difference in
-!>    the accuracy of small singular values is possible if
-!>    the snapshots vary in norm so that X is severely
-!>    ill-conditioned. If small (smaller than EPS*||X||)
-!>    singular values are of interest and JOBS=='N',  then
-!>    the options (3, 4) give the most accurate results, where
-!>    the option 4 is slightly better and with stronger
-!>    theoretical background.
-!>    If JOBS=='S', i.e. the columns of X will be normalized,
-!>    then all methods give nearly equally accurate results.
-!>    \endverbatim
-!.....
-!>    \param[in] M
-!>    \verbatim
-!>    M (input) INTEGER, M>= 0
-!>    The state space dimension (the row dimension of X, Y).
-!>    \endverbatim
-!.....
-!>    \param[in] N
-!>    \verbatim
-!>    N (input) INTEGER, 0 <= N <= M
-!>    The number of data snapshot pairs
-!>    (the number of columns of X and Y).
-!>    \endverbatim
-!.....
-!>    \param[in,out] X
-!>    \verbatim
-!>    X (input/output) COMPLEX(KIND=WP) M-by-N array
-!>    > On entry, X contains the data snapshot matrix X. It is
-!>    assumed that the column norms of X are in the range of
-!>    the normalized floating point numbers.
-!>    < On exit, the leading K columns of X contain a POD basis,
-!>    i.e. the leading K left singular vectors of the input
-!>    data matrix X, U(:,1:K). All N columns of X contain all
-!>    left singular vectors of the input matrix X.
-!>    See the descriptions of K, Z and W.
-!>    \endverbatim
-!.....
-!>    \param[in] LDX
-!>    \verbatim
-!>    LDX (input) INTEGER, LDX >= M
-!>    The leading dimension of the array X.
-!>    \endverbatim
-!.....
-!>    \param[in,out] Y
-!>    \verbatim
-!>    Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
-!>    > On entry, Y contains the data snapshot matrix Y
-!>    < On exit,
-!>    If JOBR == 'R', the leading K columns of Y  contain
-!>    the residual vectors for the computed Ritz pairs.
-!>    See the description of RES.
-!>    If JOBR == 'N', Y contains the original input data,
-!>                    scaled according to the value of JOBS.
-!>    \endverbatim
-!.....
-!>    \param[in] LDY
-!>    \verbatim
-!>    LDY (input) INTEGER , LDY >= M
-!>    The leading dimension of the array Y.
-!>    \endverbatim
-!.....
-!>    \param[in] NRNK
-!>    \verbatim
-!>    NRNK (input) INTEGER
-!>    Determines the mode how to compute the numerical rank,
-!>    i.e. how to truncate small singular values of the input
-!>    matrix X. On input, if
-!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(1)
-!>                 This option is recommended.
-!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(i-1)
-!>                 This option is included for R&D purposes.
-!>                 It requires highly accurate SVD, which
-!>                 may not be feasible.
-!>    The numerical rank can be enforced by using positive
-!>    value of NRNK as follows:
-!>    0 < NRNK <= N :: at most NRNK largest singular values
-!>    will be used. If the number of the computed nonzero
-!>    singular values is less than NRNK, then only those
-!>    nonzero values will be used and the actually used
-!>    dimension is less than NRNK. The actual number of
-!>    the nonzero singular values is returned in the variable
-!>    K. See the descriptions of TOL and  K.
-!>    \endverbatim
-!.....
-!>    \param[in] TOL
-!>    \verbatim
-!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
-!>    The tolerance for truncating small singular values.
-!>    See the description of NRNK.
-!>    \endverbatim
-!.....
-!>    \param[out] K
-!>    \verbatim
-!>    K (output) INTEGER,  0 <= K <= N
-!>    The dimension of the POD basis for the data snapshot
-!>    matrix X and the number of the computed Ritz pairs.
-!>    The value of K is determined according to the rule set
-!>    by the parameters NRNK and TOL.
-!>    See the descriptions of NRNK and TOL.
-!>    \endverbatim
-!.....
-!>    \param[out] EIGS
-!>    \verbatim
-!>    EIGS (output) COMPLEX(KIND=WP) N-by-1 array
-!>    The leading K (K<=N) entries of EIGS contain
-!>    the computed eigenvalues (Ritz values).
-!>    See the descriptions of K, and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] Z
-!>    \verbatim
-!>    Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
-!>    If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
-!>    is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
-!>    If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
-!>    the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
-!>    is an eigenvector corresponding to EIGS(i). The columns
-!>    of W(1:k,1:K) are the computed eigenvectors of the
-!>    K-by-K Rayleigh quotient.
-!>    See the descriptions of EIGS, X and W.
-!>    \endverbatim
-!.....
-!>    \param[in] LDZ
-!>    \verbatim
-!>    LDZ (input) INTEGER , LDZ >= M
-!>    The leading dimension of the array Z.
-!>    \endverbatim
-!.....
-!>    \param[out] RES
-!>    \verbatim
-!>    RES (output) REAL(KIND=WP) N-by-1 array
-!>    RES(1:K) contains the residuals for the K computed
-!>    Ritz pairs,
-!>    RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
-!>    See the description of EIGS and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] B
-!>    \verbatim
-!>    B (output) COMPLEX(KIND=WP)  M-by-N array.
-!>    IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
-!>    be used for computing the refined vectors; see further
-!>    details in the provided references.
-!>    If JOBF == 'E', B(1:M,1:K) contains
-!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
-!>    Exact DMD, up to scaling by the inverse eigenvalues.
-!>    If JOBF =='N', then B is not referenced.
-!>    See the descriptions of X, W, K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDB
-!>    \verbatim
-!>    LDB (input) INTEGER, LDB >= M
-!>    The leading dimension of the array B.
-!>    \endverbatim
-!.....
-!>    \param[out] W
-!>    \verbatim
-!>    W (workspace/output) COMPLEX(KIND=WP) N-by-N array
-!>    On exit, W(1:K,1:K) contains the K computed
-!>    eigenvectors of the matrix Rayleigh quotient.
-!>    The Ritz vectors (returned in Z) are the
-!>    product of X (containing a POD basis for the input
-!>    matrix X) and W. See the descriptions of K, S, X and Z.
-!>    W is also used as a workspace to temporarily store the
-!>    right singular vectors of X.
-!>    \endverbatim
-!.....
-!>    \param[in] LDW
-!>    \verbatim
-!>    LDW (input) INTEGER, LDW >= N
-!>    The leading dimension of the array W.
-!>    \endverbatim
-!.....
-!>    \param[out] S
-!>    \verbatim
-!>    S (workspace/output) COMPLEX(KIND=WP) N-by-N array
-!>    The array S(1:K,1:K) is used for the matrix Rayleigh
-!>    quotient. This content is overwritten during
-!>    the eigenvalue decomposition by CGEEV.
-!>    See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDS
-!>    \verbatim
-!>    LDS (input) INTEGER, LDS >= N
-!>    The leading dimension of the array S.
-!>    \endverbatim
-!.....
-!>    \param[out] ZWORK
-!>    \verbatim
-!>    ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
-!>    ZWORK is used as complex workspace in the complex SVD, as
-!>    specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing
-!>    the eigenvalues of a Rayleigh quotient.
-!>    If the call to CGEDMD is only workspace query, then
-!>    ZWORK(1) contains the minimal complex workspace length and
-!>    ZWORK(2) is the optimal complex workspace length.
-!>    Hence, the length of work is at least 2.
-!>    See the description of LZWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LZWORK
-!>    \verbatim
-!>    LZWORK (input) INTEGER
-!>    The minimal length of the workspace vector ZWORK.
-!>    LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CGEEV),
-!>    where LZWORK_CGEEV = MAX( 1, 2*N )  and the minimal
-!>    LZWORK_SVD is calculated as follows
-!>    If WHTSVD == 1 :: CGESVD ::
-!>       LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
-!>    If WHTSVD == 2 :: CGESDD ::
-!>       LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
-!>    If WHTSVD == 3 :: CGESVDQ ::
-!>       LZWORK_SVD = obtainable by a query
-!>    If WHTSVD == 4 :: CGEJSV ::
-!>       LZWORK_SVD = obtainable by a query
-!>    If on entry LZWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths and returns them in
-!>    LZWORK(1) and LZWORK(2), respectively.
-!>    \endverbatim
-!.....
-!>    \param[out] RWORK
-!>    \verbatim
-!>    RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
-!>    On exit, RWORK(1:N) contains the singular values of
-!>    X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
-!>    If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
-!>    scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
-!>    and Y to avoid overflow in the SVD of X.
-!>    This may be of interest if the scaling option is off
-!>    and as many as possible smallest eigenvalues are
-!>    desired to the highest feasible accuracy.
-!>    If the call to CGEDMD is only workspace query, then
-!>    RWORK(1) contains the minimal workspace length.
-!>    See the description of LRWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LRWORK
-!>    \verbatim
-!>    LRWORK (input) INTEGER
-!>    The minimal length of the workspace vector RWORK.
-!>    LRWORK is calculated as follows:
-!>    LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CGEEV), where
-!>    LRWORK_CGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
-!>    for the SVD subroutine determined by the input parameter
-!>    WHTSVD.
-!>    If WHTSVD == 1 :: CGESVD ::
-!>       LRWORK_SVD = 5*MIN(M,N)
-!>    If WHTSVD == 2 :: CGESDD ::
-!>       LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
-!>       2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
-!>    If WHTSVD == 3 :: CGESVDQ ::
-!>       LRWORK_SVD = obtainable by a query
-!>    If WHTSVD == 4 :: CGEJSV ::
-!>       LRWORK_SVD = obtainable by a query
-!>    If on entry LRWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    real workspace length and returns it in RWORK(1).
-!>    \endverbatim
-!.....
-!>    \param[out] IWORK
-!>    \verbatim
-!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
-!>    Workspace that is required only if WHTSVD equals
-!>    2 , 3 or 4. (See the description of WHTSVD).
-!>    If on entry LWORK =-1 or LIWORK=-1, then the
-!>    minimal length of IWORK is computed and returned in
-!>    IWORK(1). See the description of LIWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LIWORK
-!>    \verbatim
-!>    LIWORK (input) INTEGER
-!>    The minimal length of the workspace vector IWORK.
-!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
-!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
-!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
-!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
-!>    If on entry LIWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for  ZWORK, RWORK and
-!>    IWORK. See the descriptions of ZWORK, RWORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] INFO
-!>    \verbatim
-!>    INFO (output) INTEGER
-!>    -i < 0 :: On entry, the i-th argument had an
-!>              illegal value
-!>       = 0 :: Successful return.
-!>       = 1 :: Void input. Quick exit (M=0 or N=0).
-!>       = 2 :: The SVD computation of X did not converge.
-!>              Suggestion: Check the input data and/or
-!>              repeat with different WHTSVD.
-!>       = 3 :: The computation of the eigenvalues did not
-!>              converge.
-!>       = 4 :: If data scaling was requested on input and
-!>              the procedure found inconsistency in the data
-!>              such that for some column index i,
-!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
-!>              to zero if JOBS=='C'. The computation proceeds
-!>              with original or modified data and warning
-!>              flag is set with INFO=4.
-!>    \endverbatim
-!
-!  Authors:
-!  ========
-!
-!> \author Zlatko Drmac
-!
-!> \ingroup gedmd
-!
-!.............................................................
-!.............................................................
-      SUBROUTINE CGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
-                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
-                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
-                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
-                         RWORK, LRWORK, IWORK, LIWORK, INFO )
-!
-!  -- LAPACK driver routine                                           --
-!
-!  -- LAPACK is a software package provided by University of          --
-!  -- Tennessee, University of California Berkeley, University of     --
-!  -- Colorado Denver and NAG Ltd..                                   --
-!
-!.....
-      USE                   iso_fortran_env
-      IMPLICIT NONE
-      INTEGER, PARAMETER :: WP = real32
-!
-!     Scalar arguments
-!     ~~~~~~~~~~~~~~~~
-      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
-      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
-                                 NRNK, LDZ, LDB, LDW,  LDS, &
-                                 LIWORK, LRWORK, LZWORK
-      INTEGER,       INTENT(OUT)  :: K, INFO
-      REAL(KIND=WP), INTENT(IN)   ::    TOL
-!
-!     Array arguments
-!     ~~~~~~~~~~~~~~~
-      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
-                                         W(LDW,*), S(LDS,*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
-      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
-      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
-      INTEGER,          INTENT(OUT)   :: IWORK(*)
-!
-!     Parameters
-!     ~~~~~~~~~~
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
-      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
-!
-!     Local scalars
-!     ~~~~~~~~~~~~~
-      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
-                       SSUM,  XSCL1,  XSCL2
-      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,   &
-                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
-                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
-                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
-                       OLWORK, MLRWRK
-      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
-                        WNTEX, WNTREF, WNTRES, WNTVEC
-      CHARACTER     ::  JOBZL, T_OR_N
-      CHARACTER     ::  JSVOPT
-!
-!     Local arrays
-!     ~~~~~~~~~~~~
-      REAL(KIND=WP) :: RDUMMY(2)
-!
-!     External functions (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~
-      REAL(KIND=WP) CLANGE, SLAMCH, SCNRM2
-      EXTERNAL      CLANGE, SLAMCH, SCNRM2, ICAMAX
-      INTEGER                               ICAMAX
-      LOGICAL       SISNAN, LSAME
-      EXTERNAL      SISNAN, LSAME
-!
-!     External subroutines (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~~~~
-      EXTERNAL      CAXPY,  CGEMM,  CSSCAL
-      EXTERNAL      CGEEV,  CGEJSV, CGESDD, CGESVD, CGESVDQ, &
-                    CLACPY, CLASCL, CLASSQ, XERBLA
-!
-!     Intrinsic functions
-!     ~~~~~~~~~~~~~~~~~~~
-      INTRINSIC     FLOAT, INT, MAX, SQRT
-!............................................................
-!
-!    Test the input arguments
-!
-      WNTRES = LSAME(JOBR,'R')
-      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
-      SCCOLY = LSAME(JOBS,'Y')
-      WNTVEC = LSAME(JOBZ,'V')
-      WNTREF = LSAME(JOBF,'R')
-      WNTEX  = LSAME(JOBF,'E')
-      INFO   = 0
-      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
-                                  .OR. ( LRWORK == -1 ) )
-!
-      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
-                                  LSAME(JOBS,'N')) )   THEN
-          INFO = -1
-      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
-                              .OR. LSAME(JOBZ,'F')) )  THEN
-          INFO = -2
-      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
-                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
-          INFO = -3
-      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
-                LSAME(JOBF,'N') ) )                    THEN
-          INFO = -4
-      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
-                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
-          INFO = -5
-      ELSE IF ( M < 0 )   THEN
-          INFO = -6
-      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
-          INFO = -7
-      ELSE IF ( LDX < M ) THEN
-          INFO = -9
-      ELSE IF ( LDY < M ) THEN
-          INFO = -11
-      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
-                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
-          INFO = -12
-      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
-          INFO = -13
-      ELSE IF ( LDZ < M ) THEN
-          INFO = -17
-      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
-          INFO = -20
-      ELSE IF ( LDW < N ) THEN
-          INFO = -22
-      ELSE IF ( LDS < N ) THEN
-          INFO = -24
-      END IF
-!
-      IF ( INFO == 0 ) THEN
-          ! Compute the minimal and the optimal workspace
-          ! requirements. Simulate running the code and
-          ! determine minimal and optimal sizes of the
-          ! workspace at any moment of the run.
-         IF ( N == 0 ) THEN
-             ! Quick return. All output except K is void.
-             ! INFO=1 signals the void input.
-             ! In case of a workspace query, the default
-             ! minimal workspace lengths are returned.
-            IF ( LQUERY ) THEN
-                IWORK(1) = 1
-                RWORK(1) = 1
-                ZWORK(1) = 2
-                ZWORK(2) = 2
-            ELSE
-               K   =  0
-            END IF
-            INFO = 1
-            RETURN
-         END IF
-
-         IMINWR = 1
-         MLRWRK = MAX(1,N)
-         MLWORK = 2
-         OLWORK = 2
-         SELECT CASE ( WHTSVD )
-         CASE (1)
-             ! The following is specified as the minimal
-             ! length of WORK in the definition of CGESVD:
-             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
-             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
-             MLWORK = MAX(MLWORK,MWRSVD)
-             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
-             IF ( LQUERY ) THEN
-                CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
-                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
-                LWRSVD = INT( ZWORK(1) )
-                OLWORK = MAX(OLWORK,LWRSVD)
-             END IF
-         CASE (2)
-             ! The following is specified as the minimal
-             ! length of WORK in the definition of CGESDD:
-             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
-             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
-             ! In LAPACK 3.10.1 RWORK is defined differently.
-             ! Below we take max over the two versions.
-             ! IMINWR = 8*MIN(M,N)
-             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
-             MLWORK = MAX(MLWORK,MWRSDD)
-             IMINWR = 8*MIN(M,N)
-             MLRWRK = MAX( MLRWRK,  N +                    &
-                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
-                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
-                           2*MAX(M,N)*MIN(M,N)+            &
-                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
-             IF ( LQUERY ) THEN
-                CALL CGESDD( 'O', M, N, X, LDX, RWORK, B,     &
-                     LDB, W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
-                LWRSDD = MAX(MWRSDD,INT( ZWORK(1) ))
-                OLWORK = MAX(OLWORK,LWRSDD)
-             END IF
-         CASE (3)
-             CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
-                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
-                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
-             IMINWR = IWORK(1)
-             MWRSVQ = INT(ZWORK(2))
-             MLWORK = MAX(MLWORK,MWRSVQ)
-             MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1)))
-             IF ( LQUERY ) THEN
-                LWRSVQ = INT(ZWORK(1))
-                OLWORK = MAX(OLWORK,LWRSVQ)
-             END IF
-         CASE (4)
-             JSVOPT = 'J'
-             CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
-                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
-                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
-             IMINWR = IWORK(1)
-             MWRSVJ = INT(ZWORK(2))
-             MLWORK = MAX(MLWORK,MWRSVJ)
-             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
-             IF ( LQUERY ) THEN
-                LWRSVJ = INT(ZWORK(1))
-                OLWORK = MAX(OLWORK,LWRSVJ)
-             END IF
-         END SELECT
-         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
-             JOBZL = 'V'
-         ELSE
-             JOBZL = 'N'
-         END IF
-         ! Workspace calculation to the CGEEV call
-         MWRKEV = MAX( 1, 2*N )
-         MLWORK = MAX(MLWORK,MWRKEV)
-         MLRWRK = MAX(MLRWRK,N+2*N)
-         IF ( LQUERY ) THEN
-             CALL CGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
-              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 ) ! LAPACK CALL
-                LWRKEV = INT(ZWORK(1))
-                OLWORK = MAX( OLWORK, LWRKEV )
-                OLWORK = MAX( 2, OLWORK )
-         END IF
-!
-         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
-         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
-         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26
-
-      END IF
-!
-      IF( INFO /= 0 ) THEN
-         CALL XERBLA( 'CGEDMD', -INFO )
-         RETURN
-      ELSE IF ( LQUERY ) THEN
-!     Return minimal and optimal workspace sizes
-          IWORK(1) = IMINWR
-          RWORK(1) = MLRWRK
-          ZWORK(1) = MLWORK
-          ZWORK(2) = OLWORK
-          RETURN
-      END IF
-!............................................................
-!
-      OFL   = SLAMCH('O')*SLAMCH('P')
-      SMALL = SLAMCH('S')
-      BADXY = .FALSE.
-!
-!     <1> Optional scaling of the snapshots (columns of X, Y)
-!     ==========================================================
-      IF ( SCCOLX ) THEN
-          ! The columns of X will be normalized.
-          ! To prevent overflows, the column norms of X are
-          ! carefully computed using CLASSQ.
-          K = 0
-          DO i = 1, N
-            !WORK(i) = SCNRM2( M, X(1,i), 1 )
-            SSUM  = ONE
-            SCALE = ZERO
-            CALL CLASSQ( M, X(1,i), 1, SCALE, SSUM )
-            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
-                K    =  0
-                INFO = -8
-                CALL XERBLA('CGEDMD',-INFO)
-            END IF
-            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
-               ROOTSC = SQRT(SSUM)
-               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
-!                 Norm of X(:,i) overflows. First, X(:,i)
-!                 is scaled by
-!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
-!                 Next, the norm of X(:,i) is stored without
-!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
-!                 the minus sign indicating the 1/M factor.
-!                 Scaling is performed without overflow, and
-!                 underflow may occur in the smallest entries
-!                 of X(:,i). The relative backward and forward
-!                 errors are small in the ell_2 norm.
-                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
-                               M, 1, X(1,i), LDX, INFO2 )
-                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
-               ELSE
-!                 X(:,i) will be scaled to unit 2-norm
-                  RWORK(i) =   SCALE * ROOTSC
-                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
-                               X(1,i), LDX, INFO2 )             ! LAPACK CALL
-!                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)          ! INTRINSIC
-               END IF
-            ELSE
-               RWORK(i) = ZERO
-               K = K + 1
-            END IF
-          END DO
-          IF ( K == N ) THEN
-          ! All columns of X are zero. Return error code -8.
-          ! (the 8th input variable had an illegal value)
-          K = 0
-          INFO = -8
-          CALL XERBLA('CGEDMD',-INFO)
-          RETURN
-          END IF
-          DO i = 1, N
-!           Now, apply the same scaling to the columns of Y.
-            IF ( RWORK(i) >  ZERO ) THEN
-                CALL CSSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL
-!               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)      ! INTRINSIC
-            ELSE IF ( RWORK(i) < ZERO ) THEN
-                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
-                     ONE/FLOAT(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
-            ELSE IF ( ABS(Y(ICAMAX(M, Y(1,i),1),i ))  &
-                                            /= ZERO ) THEN
-!               X(:,i) is zero vector. For consistency,
-!               Y(:,i) should also be zero. If Y(:,i) is not
-!               zero, then the data might be inconsistent or
-!               corrupted. If JOBS == 'C', Y(:,i) is set to
-!               zero and a warning flag is raised.
-!               The computation continues but the
-!               situation will be reported in the output.
-                BADXY = .TRUE.
-                IF ( LSAME(JOBS,'C')) &
-                CALL CSSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
-            END IF
-          END DO
-      END IF
-  !
-      IF ( SCCOLY ) THEN
-          ! The columns of Y will be normalized.
-          ! To prevent overflows, the column norms of Y are
-          ! carefully computed using CLASSQ.
-          DO i = 1, N
-            !RWORK(i) = SCNRM2( M, Y(1,i), 1 )
-            SSUM  = ONE
-            SCALE = ZERO
-            CALL CLASSQ( M, Y(1,i), 1, SCALE, SSUM )
-            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
-                K    =  0
-                INFO = -10
-                CALL XERBLA('CGEDMD',-INFO)
-            END IF
-            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
-               ROOTSC = SQRT(SSUM)
-               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
-!                 Norm of Y(:,i) overflows. First, Y(:,i)
-!                 is scaled by
-!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
-!                 Next, the norm of Y(:,i) is stored without
-!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
-!                 the minus sign indicating the 1/M factor.
-!                 Scaling is performed without overflow, and
-!                 underflow may occur in the smallest entries
-!                 of Y(:,i). The relative backward and forward
-!                 errors are small in the ell_2 norm.
-                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
-                               M, 1, Y(1,i), LDY, INFO2 )
-                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
-               ELSE
-!                 Y(:,i) will be scaled to unit 2-norm
-                  RWORK(i) =   SCALE * ROOTSC
-                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
-                               Y(1,i), LDY, INFO2 )              ! LAPACK CALL
-!                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
-               END IF
-            ELSE
-               RWORK(i) = ZERO
-            END IF
-         END DO
-         DO i = 1, N
-!           Now, apply the same scaling to the columns of X.
-            IF ( RWORK(i) >  ZERO ) THEN
-                CALL CSSCAL( M, ONE/RWORK(i), X(1,i), 1 )  ! BLAS CALL
-!               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
-            ELSE IF ( RWORK(i) < ZERO ) THEN
-                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
-                     ONE/FLOAT(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
-            ELSE IF ( ABS(X(ICAMAX(M, X(1,i),1),i ))  &
-                                           /= ZERO ) THEN
-!               Y(:,i) is zero vector.  If X(:,i) is not
-!               zero, then a warning flag is raised.
-!               The computation continues but the
-!               situation will be reported in the output.
-                BADXY = .TRUE.
-            END IF
-         END DO
-       END IF
-!
-!     <2> SVD of the data snapshot matrix X.
-!     =====================================
-!     The left singular vectors are stored in the array X.
-!     The right singular vectors are in the array W.
-!     The array W will later on contain the eigenvectors
-!     of a Rayleigh quotient.
-      NUMRNK = N
-      SELECT CASE ( WHTSVD )
-         CASE (1)
-             CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
-                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
-             T_OR_N = 'C'
-         CASE (2)
-            CALL CGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
-                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
-            T_OR_N = 'C'
-         CASE (3)
-              CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
-                   X, LDX, RWORK, Z, LDZ, W, LDW, &
-                   NUMRNK, IWORK, LIWORK, ZWORK,     &
-                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
-              CALL CLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
-         T_OR_N = 'C'
-         CASE (4)
-              CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
-                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
-                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
-              CALL CLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
-              T_OR_N = 'N'
-              XSCL1 = RWORK(N+1)
-              XSCL2 = RWORK(N+2)
-              IF ( XSCL1 /=  XSCL2 ) THEN
-                 ! This is an exceptional situation. If the
-                 ! data matrices are not scaled and the
-                 ! largest singular value of X overflows.
-                 ! In that case CGEJSV can return the SVD
-                 ! in scaled form. The scaling factor can be used
-                 ! to rescale the data (X and Y).
-                 CALL CLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
-              END IF
-      END SELECT
-!
-      IF ( INFO1 > 0 ) THEN
-         ! The SVD selected subroutine did not converge.
-         ! Return with an error code.
-         INFO = 2
-         RETURN
-      END IF
-!
-      IF ( RWORK(1) == ZERO ) THEN
-          ! The largest computed singular value of (scaled)
-          ! X is zero. Return error code -8
-          ! (the 8th input variable had an illegal value).
-          K = 0
-          INFO = -8
-          CALL XERBLA('CGEDMD',-INFO)
-          RETURN
-      END IF
-!
-      !<3> Determine the numerical rank of the data
-      !    snapshots matrix X. This depends on the
-      !    parameters NRNK and TOL.
-
-      SELECT CASE ( NRNK )
-          CASE ( -1 )
-               K = 1
-               DO i = 2, NUMRNK
-                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
-                      ( RWORK(i) <= SMALL ) ) EXIT
-                 K = K + 1
-               END DO
-          CASE ( -2 )
-               K = 1
-               DO i = 1, NUMRNK-1
-                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
-                      ( RWORK(i) <= SMALL ) ) EXIT
-                 K = K + 1
-               END DO
-          CASE DEFAULT
-               K = 1
-               DO i = 2, NRNK
-                  IF ( RWORK(i) <= SMALL ) EXIT
-                  K = K + 1
-               END DO
-          END SELECT
-      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
-      !   snapshot data in the input matrix X.
-
-      !<4> Compute the Rayleigh quotient S = U^H * A * U.
-      !    Depending on the requested outputs, the computation
-      !    is organized to compute additional auxiliary
-      !    matrices (for the residuals and refinements).
-      !
-      !    In all formulas below, we need V_k*Sigma_k^(-1)
-      !    where either V_k is in W(1:N,1:K), or V_k^H is in
-      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
-      IF ( LSAME(T_OR_N, 'N') ) THEN
-          DO i = 1, K
-           CALL CSSCAL( N, ONE/RWORK(i), W(1,i), 1 )   ! BLAS CALL
-           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
-          END DO
-      ELSE
-          ! This non-unit stride access is due to the fact
-          ! that CGESVD, CGESVDQ and CGESDD return the
-          ! adjoint matrix of the right singular vectors.
-          !DO i = 1, K
-          ! CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW )  ! BLAS CALL
-          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
-          !END DO
-          DO i = 1, K
-              RWORK(N+i) = ONE/RWORK(i)
-          END DO
-          DO j = 1, N
-             DO i = 1, K
-                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
-             END DO
-          END DO
-      END IF
-!
-      IF ( WNTREF ) THEN
-         !
-         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
-         ! for computing the refined Ritz vectors
-         ! (optionally, outside CGEDMD).
-          CALL CGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
-                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
-          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
-          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
-          !
-          ! At this point Z contains
-          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
-          ! this is needed for computing the residuals.
-          ! This matrix is  returned in the array B and
-          ! it can be used to compute refined Ritz vectors.
-          CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
-          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
-
-          CALL CGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
-                      LDZ, ZZERO, S, LDS )                       ! BLAS CALL
-          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
-          ! At this point S = U^H * A * U is the Rayleigh quotient.
-      ELSE
-        ! A * U(:,1:K) is not explicitly needed and the
-        ! computation is organized differently. The Rayleigh
-        ! quotient is computed more efficiently.
-        CALL CGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
-                   ZZERO, Z, LDZ )                                  ! BLAS CALL
-        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
-        !
-        CALL CGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
-                    LDW, ZZERO, S, LDS )                        ! BLAS CALL
-        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
-        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
-        ! At this point S = U^H * A * U is the Rayleigh quotient.
-        ! If the residuals are requested, save scaled V_k into Z.
-        ! Recall that V_k or V_k^H is stored in W.
-        IF ( WNTRES .OR. WNTEX ) THEN
-          IF ( LSAME(T_OR_N, 'N') ) THEN
-              CALL CLACPY( 'A', N, K, W, LDW, Z, LDZ )
-          ELSE
-              CALL CLACPY( 'A', K, N, W, LDW, Z, LDZ )
-          END IF
-        END IF
-      END IF
-!
-      !<5> Compute the Ritz values and (if requested) the
-      !   right eigenvectors of the Rayleigh quotient.
-      !
-      CALL CGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, &
-           LDW, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
-      !
-      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
-      ! quotient.  See the description of Z.
-      ! Also, see the description of CGEEV.
-      IF ( INFO1 > 0 ) THEN
-         ! CGEEV failed to compute the eigenvalues and
-         ! eigenvectors of the Rayleigh quotient.
-         INFO = 3
-         RETURN
-      END IF
-!
-      ! <6> Compute the eigenvectors (if requested) and,
-      ! the residuals (if requested).
-      !
-      IF ( WNTVEC .OR. WNTEX ) THEN
-          IF ( WNTRES ) THEN
-              IF ( WNTREF ) THEN
-                ! Here, if the refinement is requested, we have
-                ! A*U(:,1:K) already computed and stored in Z.
-                ! For the residuals, need Y = A * U(:,1;K) * W.
-                CALL CGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
-                           LDW, ZZERO, Y, LDY )              ! BLAS CALL
-                ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
-                ! This frees Z; Y contains A * U(:,1:K) * W.
-              ELSE
-                ! Compute S = V_k * Sigma_k^(-1) * W, where
-                ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
-                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
-                           W, LDW, ZZERO, S, LDS)
-                ! Then, compute Z = Y * S =
-                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
-                ! = A * U(:,1:K) * W(1:K,1:K)
-                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
-                           LDS, ZZERO, Z, LDZ)
-                ! Save a copy of Z into Y and free Z for holding
-                ! the Ritz vectors.
-                CALL CLACPY( 'A', M, K, Z, LDZ, Y, LDY )
-                IF ( WNTEX ) CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
-              END IF
-          ELSE IF ( WNTEX ) THEN
-              ! Compute S = V_k * Sigma_k^(-1) * W, where
-                ! V_k * Sigma_k^(-1) is stored in Z
-                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
-                           W, LDW, ZZERO, S, LDS)
-                ! Then, compute Z = Y * S =
-                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
-                ! = A * U(:,1:K) * W(1:K,1:K)
-                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
-                           LDS, ZZERO, B, LDB)
-                ! The above call replaces the following two calls
-                ! that were used in the developing-testing phase.
-                ! CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
-                !           LDS, ZZERO, Z, LDZ)
-                ! Save a copy of Z into Y and free Z for holding
-                ! the Ritz vectors.
-                ! CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
-          END IF
-!
-          ! Compute the Ritz vectors
-          IF ( WNTVEC ) CALL CGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
-                       ZZERO, Z, LDZ )                          ! BLAS CALL
-          ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
-!
-          IF ( WNTRES ) THEN
-             DO i = 1, K
-                CALL CAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
-                ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
-                RES(i) = SCNRM2( M, Y(1,i), 1)                        ! BLAS CALL
-             END DO
-          END IF
-      END IF
-!
-      IF ( WHTSVD == 4 ) THEN
-          RWORK(N+1) = XSCL1
-          RWORK(N+2) = XSCL2
-      END IF
-!
-!     Successful exit.
-      IF ( .NOT. BADXY ) THEN
-         INFO = 0
-      ELSE
-         ! A warning on possible data inconsistency.
-         ! This should be a rare event.
-         INFO = 4
-      END IF
-!............................................................
-      RETURN
-!     ......
-      END SUBROUTINE CGEDMD
-
+!> \brief \b CGEDMD computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
+!
+!  =========== DOCUMENTATION ===========
+!
+!  Definition:
+!  ===========
+!
+!     SUBROUTINE CGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
+!                        M, N, X, LDX, Y, LDY, NRNK, TOL,   &
+!                        K, EIGS, Z, LDZ, RES, B,    LDB,   &
+!                        W, LDW,  S, LDS, ZWORK,  LZWORK,   &
+!                        RWORK, LRWORK, IWORK, LIWORK, INFO )
+!.....
+!     USE                   iso_fortran_env
+!     IMPLICIT NONE
+!     INTEGER, PARAMETER :: WP = real32
+!
+!.....
+!     Scalar arguments
+!     CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+!     INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+!                                NRNK, LDZ, LDB, LDW,  LDS, &
+!                                LIWORK, LRWORK, LZWORK
+!     INTEGER,       INTENT(OUT)  :: K, INFO
+!     REAL(KIND=WP), INTENT(IN)   ::    TOL
+!     Array arguments
+!     COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+!                                        W(LDW,*), S(LDS,*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+!     REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
+!     REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
+!     INTEGER,          INTENT(OUT)   :: IWORK(*)
+!
+!............................................................
+!>    \par Purpose:
+!     =============
+!>    \verbatim
+!>    CGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!>    a pair of data snapshot matrices. For the input matrices
+!>    X and Y such that Y = A*X with an unaccessible matrix
+!>    A, CGEDMD computes a certain number of Ritz pairs of A using
+!>    the standard Rayleigh-Ritz extraction from a subspace of
+!>    range(X) that is determined using the leading left singular
+!>    vectors of X. Optionally, CGEDMD returns the residuals
+!>    of the computed Ritz pairs, the information needed for
+!>    a refinement of the Ritz vectors, or the eigenvectors of
+!>    the Exact DMD.
+!>    For further details see the references listed
+!>    below. For more details of the implementation see [3].
+!>    \endverbatim
+!............................................................
+!>    \par References:
+!     ================
+!>    \verbatim
+!>    [1] P. Schmid: Dynamic mode decomposition of numerical
+!>        and experimental data,
+!>        Journal of Fluid Mechanics 656, 5-28, 2010.
+!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!>        decompositions: analysis and enhancements,
+!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!>        Mode Decomposition I. Technical report. AIMDyn Inc.
+!>        and LAPACK Working Note 298.
+!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!>        Theory and Applications, Journal of Computational
+!>        Dynamics 1(2), 391 -421, 2014.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Developed and coded by Zlatko Drmac, Faculty of Science,
+!>    University of Zagreb;  drmac@math.hr
+!>    In cooperation with
+!>    AIMdyn Inc., Santa Barbara, CA.
+!>    and supported by
+!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
+!>    for Nonstationary Processes from Near-Term, Limited
+!>    Observational Data" Contract No: W31P4Q-21-C-0007
+!>    - DARPA PAI project "Physics-Informed Machine Learning
+!>    Methodologies" Contract No: HR0011-18-9-0033
+!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!>    Framework for Space-Time Analysis of Process Dynamics"
+!>    Contract No: HR0011-16-C-0116
+!>    Any opinions, findings and conclusions or recommendations
+!>    expressed in this material are those of the author and
+!>    do not necessarily reflect the views of the DARPA SBIR
+!>    Program Office
+!>    \endverbatim
+!......................................................................
+!>    \par Distribution Statement A:
+!     ==============================
+!>    \verbatim
+!>    Approved for Public Release, Distribution Unlimited.
+!>    Cleared by DARPA on September 29, 2022
+!>    \endverbatim
+!......................................................................
+!     Arguments
+!     =========
+!
+!>    \param[in] JOBS
+!>    \verbatim
+!>    JOBS (input) CHARACTER*1
+!>    Determines whether the initial data snapshots are scaled
+!>    by a diagonal matrix.
+!>    'S' :: The data snapshots matrices X and Y are multiplied
+!>           with a diagonal matrix D so that X*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'C' :: The snapshots are scaled as with the 'S' option.
+!>           If it is found that an i-th column of X is zero
+!>           vector and the corresponding i-th column of Y is
+!>           non-zero, then the i-th column of Y is set to
+!>           zero and a warning flag is raised.
+!>    'Y' :: The data snapshots matrices X and Y are multiplied
+!>           by a diagonal matrix D so that Y*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'N' :: No data scaling.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBZ
+!>    \verbatim
+!>    JOBZ (input) CHARACTER*1
+!>    Determines whether the eigenvectors (Koopman modes) will
+!>    be computed.
+!>    'V' :: The eigenvectors (Koopman modes) will be computed
+!>           and returned in the matrix Z.
+!>           See the description of Z.
+!>    'F' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product X(:,1:K)*W, where X
+!>           contains a POD basis (leading left singular vectors
+!>           of the data matrix X) and W contains the eigenvectors
+!>           of the corresponding Rayleigh quotient.
+!>           See the descriptions of K, X, W, Z.
+!>    'N' :: The eigenvectors are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBR
+!>    \verbatim
+!>    JOBR (input) CHARACTER*1
+!>    Determines whether to compute the residuals.
+!>    'R' :: The residuals for the computed eigenpairs will be
+!>           computed and stored in the array RES.
+!>           See the description of RES.
+!>           For this option to be legal, JOBZ must be 'V'.
+!>    'N' :: The residuals are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBF
+!>    \verbatim
+!>    JOBF (input) CHARACTER*1
+!>    Specifies whether to store information needed for post-
+!>    processing (e.g. computing refined Ritz vectors)
+!>    'R' :: The matrix needed for the refinement of the Ritz
+!>           vectors is computed and stored in the array B.
+!>           See the description of B.
+!>    'E' :: The unscaled eigenvectors of the Exact DMD are
+!>           computed and returned in the array B. See the
+!>           description of B.
+!>    'N' :: No eigenvector refinement data is computed.
+!>    \endverbatim
+!.....
+!>    \param[in] WHTSVD
+!>    \verbatim
+!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!>    Allows for a selection of the SVD algorithm from the
+!>    LAPACK library.
+!>    1 :: CGESVD (the QR SVD algorithm)
+!>    2 :: CGESDD (the Divide and Conquer algorithm; if enough
+!>         workspace available, this is the fastest option)
+!>    3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
+!>         are the most accurate options)
+!>    4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
+!>         are the most accurate options)
+!>    For the four methods above, a significant difference in
+!>    the accuracy of small singular values is possible if
+!>    the snapshots vary in norm so that X is severely
+!>    ill-conditioned. If small (smaller than EPS*||X||)
+!>    singular values are of interest and JOBS=='N',  then
+!>    the options (3, 4) give the most accurate results, where
+!>    the option 4 is slightly better and with stronger
+!>    theoretical background.
+!>    If JOBS=='S', i.e. the columns of X will be normalized,
+!>    then all methods give nearly equally accurate results.
+!>    \endverbatim
+!.....
+!>    \param[in] M
+!>    \verbatim
+!>    M (input) INTEGER, M>= 0
+!>    The state space dimension (the row dimension of X, Y).
+!>    \endverbatim
+!.....
+!>    \param[in] N
+!>    \verbatim
+!>    N (input) INTEGER, 0 <= N <= M
+!>    The number of data snapshot pairs
+!>    (the number of columns of X and Y).
+!>    \endverbatim
+!.....
+!>    \param[in,out] X
+!>    \verbatim
+!>    X (input/output) COMPLEX(KIND=WP) M-by-N array
+!>    > On entry, X contains the data snapshot matrix X. It is
+!>    assumed that the column norms of X are in the range of
+!>    the normalized floating point numbers.
+!>    < On exit, the leading K columns of X contain a POD basis,
+!>    i.e. the leading K left singular vectors of the input
+!>    data matrix X, U(:,1:K). All N columns of X contain all
+!>    left singular vectors of the input matrix X.
+!>    See the descriptions of K, Z and W.
+!>    \endverbatim
+!.....
+!>    \param[in] LDX
+!>    \verbatim
+!>    LDX (input) INTEGER, LDX >= M
+!>    The leading dimension of the array X.
+!>    \endverbatim
+!.....
+!>    \param[in,out] Y
+!>    \verbatim
+!>    Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
+!>    > On entry, Y contains the data snapshot matrix Y
+!>    < On exit,
+!>    If JOBR == 'R', the leading K columns of Y  contain
+!>    the residual vectors for the computed Ritz pairs.
+!>    See the description of RES.
+!>    If JOBR == 'N', Y contains the original input data,
+!>                    scaled according to the value of JOBS.
+!>    \endverbatim
+!.....
+!>    \param[in] LDY
+!>    \verbatim
+!>    LDY (input) INTEGER , LDY >= M
+!>    The leading dimension of the array Y.
+!>    \endverbatim
+!.....
+!>    \param[in] NRNK
+!>    \verbatim
+!>    NRNK (input) INTEGER
+!>    Determines the mode how to compute the numerical rank,
+!>    i.e. how to truncate small singular values of the input
+!>    matrix X. On input, if
+!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(1)
+!>                 This option is recommended.
+!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(i-1)
+!>                 This option is included for R&D purposes.
+!>                 It requires highly accurate SVD, which
+!>                 may not be feasible.
+!>    The numerical rank can be enforced by using positive
+!>    value of NRNK as follows:
+!>    0 < NRNK <= N :: at most NRNK largest singular values
+!>    will be used. If the number of the computed nonzero
+!>    singular values is less than NRNK, then only those
+!>    nonzero values will be used and the actually used
+!>    dimension is less than NRNK. The actual number of
+!>    the nonzero singular values is returned in the variable
+!>    K. See the descriptions of TOL and  K.
+!>    \endverbatim
+!.....
+!>    \param[in] TOL
+!>    \verbatim
+!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!>    The tolerance for truncating small singular values.
+!>    See the description of NRNK.
+!>    \endverbatim
+!.....
+!>    \param[out] K
+!>    \verbatim
+!>    K (output) INTEGER,  0 <= K <= N
+!>    The dimension of the POD basis for the data snapshot
+!>    matrix X and the number of the computed Ritz pairs.
+!>    The value of K is determined according to the rule set
+!>    by the parameters NRNK and TOL.
+!>    See the descriptions of NRNK and TOL.
+!>    \endverbatim
+!.....
+!>    \param[out] EIGS
+!>    \verbatim
+!>    EIGS (output) COMPLEX(KIND=WP) N-by-1 array
+!>    The leading K (K<=N) entries of EIGS contain
+!>    the computed eigenvalues (Ritz values).
+!>    See the descriptions of K, and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] Z
+!>    \verbatim
+!>    Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
+!>    If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
+!>    is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!>    If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!>    the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
+!>    is an eigenvector corresponding to EIGS(i). The columns
+!>    of W(1:k,1:K) are the computed eigenvectors of the
+!>    K-by-K Rayleigh quotient.
+!>    See the descriptions of EIGS, X and W.
+!>    \endverbatim
+!.....
+!>    \param[in] LDZ
+!>    \verbatim
+!>    LDZ (input) INTEGER , LDZ >= M
+!>    The leading dimension of the array Z.
+!>    \endverbatim
+!.....
+!>    \param[out] RES
+!>    \verbatim
+!>    RES (output) REAL(KIND=WP) N-by-1 array
+!>    RES(1:K) contains the residuals for the K computed
+!>    Ritz pairs,
+!>    RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!>    See the description of EIGS and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] B
+!>    \verbatim
+!>    B (output) COMPLEX(KIND=WP)  M-by-N array.
+!>    IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!>    be used for computing the refined vectors; see further
+!>    details in the provided references.
+!>    If JOBF == 'E', B(1:M,1:K) contains
+!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!>    Exact DMD, up to scaling by the inverse eigenvalues.
+!>    If JOBF =='N', then B is not referenced.
+!>    See the descriptions of X, W, K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDB
+!>    \verbatim
+!>    LDB (input) INTEGER, LDB >= M
+!>    The leading dimension of the array B.
+!>    \endverbatim
+!.....
+!>    \param[out] W
+!>    \verbatim
+!>    W (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!>    On exit, W(1:K,1:K) contains the K computed
+!>    eigenvectors of the matrix Rayleigh quotient.
+!>    The Ritz vectors (returned in Z) are the
+!>    product of X (containing a POD basis for the input
+!>    matrix X) and W. See the descriptions of K, S, X and Z.
+!>    W is also used as a workspace to temporarily store the
+!>    right singular vectors of X.
+!>    \endverbatim
+!.....
+!>    \param[in] LDW
+!>    \verbatim
+!>    LDW (input) INTEGER, LDW >= N
+!>    The leading dimension of the array W.
+!>    \endverbatim
+!.....
+!>    \param[out] S
+!>    \verbatim
+!>    S (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!>    The array S(1:K,1:K) is used for the matrix Rayleigh
+!>    quotient. This content is overwritten during
+!>    the eigenvalue decomposition by CGEEV.
+!>    See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDS
+!>    \verbatim
+!>    LDS (input) INTEGER, LDS >= N
+!>    The leading dimension of the array S.
+!>    \endverbatim
+!.....
+!>    \param[out] ZWORK
+!>    \verbatim
+!>    ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
+!>    ZWORK is used as complex workspace in the complex SVD, as
+!>    specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing
+!>    the eigenvalues of a Rayleigh quotient.
+!>    If the call to CGEDMD is only workspace query, then
+!>    ZWORK(1) contains the minimal complex workspace length and
+!>    ZWORK(2) is the optimal complex workspace length.
+!>    Hence, the length of work is at least 2.
+!>    See the description of LZWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LZWORK
+!>    \verbatim
+!>    LZWORK (input) INTEGER
+!>    The minimal length of the workspace vector ZWORK.
+!>    LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CGEEV),
+!>    where LZWORK_CGEEV = MAX( 1, 2*N )  and the minimal
+!>    LZWORK_SVD is calculated as follows
+!>    If WHTSVD == 1 :: CGESVD ::
+!>       LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+!>    If WHTSVD == 2 :: CGESDD ::
+!>       LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+!>    If WHTSVD == 3 :: CGESVDQ ::
+!>       LZWORK_SVD = obtainable by a query
+!>    If WHTSVD == 4 :: CGEJSV ::
+!>       LZWORK_SVD = obtainable by a query
+!>    If on entry LZWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths and returns them in
+!>    LZWORK(1) and LZWORK(2), respectively.
+!>    \endverbatim
+!.....
+!>    \param[out] RWORK
+!>    \verbatim
+!>    RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
+!>    On exit, RWORK(1:N) contains the singular values of
+!>    X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!>    If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
+!>    scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
+!>    and Y to avoid overflow in the SVD of X.
+!>    This may be of interest if the scaling option is off
+!>    and as many as possible smallest eigenvalues are
+!>    desired to the highest feasible accuracy.
+!>    If the call to CGEDMD is only workspace query, then
+!>    RWORK(1) contains the minimal workspace length.
+!>    See the description of LRWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LRWORK
+!>    \verbatim
+!>    LRWORK (input) INTEGER
+!>    The minimal length of the workspace vector RWORK.
+!>    LRWORK is calculated as follows:
+!>    LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CGEEV), where
+!>    LRWORK_CGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
+!>    for the SVD subroutine determined by the input parameter
+!>    WHTSVD.
+!>    If WHTSVD == 1 :: CGESVD ::
+!>       LRWORK_SVD = 5*MIN(M,N)
+!>    If WHTSVD == 2 :: CGESDD ::
+!>       LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
+!>       2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+!>    If WHTSVD == 3 :: CGESVDQ ::
+!>       LRWORK_SVD = obtainable by a query
+!>    If WHTSVD == 4 :: CGEJSV ::
+!>       LRWORK_SVD = obtainable by a query
+!>    If on entry LRWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    real workspace length and returns it in RWORK(1).
+!>    \endverbatim
+!.....
+!>    \param[out] IWORK
+!>    \verbatim
+!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!>    Workspace that is required only if WHTSVD equals
+!>    2 , 3 or 4. (See the description of WHTSVD).
+!>    If on entry LWORK =-1 or LIWORK=-1, then the
+!>    minimal length of IWORK is computed and returned in
+!>    IWORK(1). See the description of LIWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LIWORK
+!>    \verbatim
+!>    LIWORK (input) INTEGER
+!>    The minimal length of the workspace vector IWORK.
+!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!>    If on entry LIWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for  ZWORK, RWORK and
+!>    IWORK. See the descriptions of ZWORK, RWORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] INFO
+!>    \verbatim
+!>    INFO (output) INTEGER
+!>    -i < 0 :: On entry, the i-th argument had an
+!>              illegal value
+!>       = 0 :: Successful return.
+!>       = 1 :: Void input. Quick exit (M=0 or N=0).
+!>       = 2 :: The SVD computation of X did not converge.
+!>              Suggestion: Check the input data and/or
+!>              repeat with different WHTSVD.
+!>       = 3 :: The computation of the eigenvalues did not
+!>              converge.
+!>       = 4 :: If data scaling was requested on input and
+!>              the procedure found inconsistency in the data
+!>              such that for some column index i,
+!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!>              to zero if JOBS=='C'. The computation proceeds
+!>              with original or modified data and warning
+!>              flag is set with INFO=4.
+!>    \endverbatim
+!
+!  Authors:
+!  ========
+!
+!> \author Zlatko Drmac
+!
+!> \ingroup gedmd
+!
+!.............................................................
+!.............................................................
+      SUBROUTINE CGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
+                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
+                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
+                         RWORK, LRWORK, IWORK, LIWORK, INFO )
+!
+!  -- LAPACK driver routine                                           --
+!
+!  -- LAPACK is a software package provided by University of          --
+!  -- Tennessee, University of California Berkeley, University of     --
+!  -- Colorado Denver and NAG Ltd..                                   --
+!
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32
+!
+!     Scalar arguments
+!     ~~~~~~~~~~~~~~~~
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LIWORK, LRWORK, LZWORK
+      INTEGER,       INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)   ::    TOL
+!
+!     Array arguments
+!     ~~~~~~~~~~~~~~~
+      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                         W(LDW,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
+      INTEGER,          INTENT(OUT)   :: IWORK(*)
+!
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,   &
+                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK, MLRWRK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+!
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: RDUMMY(2)
+!
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) CLANGE, SLAMCH, SCNRM2
+      EXTERNAL      CLANGE, SLAMCH, SCNRM2, ICAMAX
+      INTEGER                               ICAMAX
+      LOGICAL       SISNAN, LSAME
+      EXTERNAL      SISNAN, LSAME
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      CAXPY,  CGEMM,  CSSCAL
+      EXTERNAL      CGEEV,  CGEJSV, CGESDD, CGESVD, CGESVDQ, &
+                    CLACPY, CLASCL, CLASSQ, XERBLA
+!
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     FLOAT, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
+                                  .OR. ( LRWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -17
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -20
+      ELSE IF ( LDW < N ) THEN
+          INFO = -22
+      ELSE IF ( LDS < N ) THEN
+          INFO = -24
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                RWORK(1) = 1
+                ZWORK(1) = 2
+                ZWORK(2) = 2
+            ELSE
+               K   =  0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+
+         IMINWR = 1
+         MLRWRK = MAX(1,N)
+         MLWORK = 2
+         OLWORK = 2
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of CGESVD:
+             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MLWORK = MAX(MLWORK,MWRSVD)
+             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
+             IF ( LQUERY ) THEN
+                CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
+                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
+                LWRSVD = INT( ZWORK(1) )
+                OLWORK = MAX(OLWORK,LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of CGESDD:
+             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
+             ! In LAPACK 3.10.1 RWORK is defined differently.
+             ! Below we take max over the two versions.
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+             MLWORK = MAX(MLWORK,MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             MLRWRK = MAX( MLRWRK,  N +                    &
+                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
+                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
+                           2*MAX(M,N)*MIN(M,N)+            &
+                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+             IF ( LQUERY ) THEN
+                CALL CGESDD( 'O', M, N, X, LDX, RWORK, B,     &
+                     LDB, W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
+                LWRSDD = MAX(MWRSDD,INT( ZWORK(1) ))
+                OLWORK = MAX(OLWORK,LWRSDD)
+             END IF
+         CASE (3)
+             CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
+                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVQ)
+             MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVQ)
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
+                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVJ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVJ)
+             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
+             IF ( LQUERY ) THEN
+                LWRSVJ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the CGEEV call
+         MWRKEV = MAX( 1, 2*N )
+         MLWORK = MAX(MLWORK,MWRKEV)
+         MLRWRK = MAX(MLRWRK,N+2*N)
+         IF ( LQUERY ) THEN
+             CALL CGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
+              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 ) ! LAPACK CALL
+                LWRKEV = INT(ZWORK(1))
+                OLWORK = MAX( OLWORK, LWRKEV )
+                OLWORK = MAX( 2, OLWORK )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
+         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26
+
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'CGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          RWORK(1) = MLRWRK
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = SLAMCH('O')*SLAMCH('P')
+      SMALL = SLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using CLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = SCNRM2( M, X(1,i), 1 )
+            SSUM  = ONE
+            SCALE = ZERO
+            CALL CLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('CGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), LDX, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               X(1,i), LDX, INFO2 )             ! LAPACK CALL
+!                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('CGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL CSSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL
+!               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/FLOAT(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(Y(ICAMAX(M, Y(1,i),1),i ))  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL CSSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using CLASSQ.
+          DO i = 1, N
+            !RWORK(i) = SCNRM2( M, Y(1,i), 1 )
+            SSUM  = ONE
+            SCALE = ZERO
+            CALL CLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('CGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), LDY, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 Y(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               Y(1,i), LDY, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL CSSCAL( M, ONE/RWORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/FLOAT(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(X(ICAMAX(M, X(1,i),1),i ))  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
+                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
+             T_OR_N = 'C'
+         CASE (2)
+            CALL CGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
+                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'C'
+         CASE (3)
+              CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, ZWORK,     &
+                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
+              CALL CLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'C'
+         CASE (4)
+              CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL CLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = RWORK(N+1)
+              XSCL2 = RWORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case CGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL CLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( RWORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('CGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( RWORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^H * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^H is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL CSSCAL( N, ONE/RWORK(i), W(1,i), 1 )   ! BLAS CALL
+           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that CGESVD, CGESVDQ and CGESDD return the
+          ! adjoint matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW )  ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              RWORK(N+i) = ONE/RWORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside CGEDMD).
+          CALL CGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
+                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL CGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
+                      LDZ, ZZERO, S, LDS )                       ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^H * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL CGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
+                   ZZERO, Z, LDZ )                                  ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        !
+        CALL CGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
+                    LDW, ZZERO, S, LDS )                        ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^H * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^H is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL CLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL CLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL CGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, &
+           LDW, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient.  See the description of Z.
+      ! Also, see the description of CGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! CGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+          IF ( WNTRES ) THEN
+              IF ( WNTREF ) THEN
+                ! Here, if the refinement is requested, we have
+                ! A*U(:,1:K) already computed and stored in Z.
+                ! For the residuals, need Y = A * U(:,1;K) * W.
+                CALL CGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
+                           LDW, ZZERO, Y, LDY )              ! BLAS CALL
+                ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+                ! This frees Z; Y contains A * U(:,1:K) * W.
+              ELSE
+                ! Compute S = V_k * Sigma_k^(-1) * W, where
+                ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
+                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                           W, LDW, ZZERO, S, LDS)
+                ! Then, compute Z = Y * S =
+                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+                ! = A * U(:,1:K) * W(1:K,1:K)
+                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                           LDS, ZZERO, Z, LDZ)
+                ! Save a copy of Z into Y and free Z for holding
+                ! the Ritz vectors.
+                CALL CLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+                IF ( WNTEX ) CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
+              END IF
+          ELSE IF ( WNTEX ) THEN
+              ! Compute S = V_k * Sigma_k^(-1) * W, where
+                ! V_k * Sigma_k^(-1) is stored in Z
+                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                           W, LDW, ZZERO, S, LDS)
+                ! Then, compute Z = Y * S =
+                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+                ! = A * U(:,1:K) * W(1:K,1:K)
+                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                           LDS, ZZERO, B, LDB)
+                ! The above call replaces the following two calls
+                ! that were used in the developing-testing phase.
+                ! CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                !           LDS, ZZERO, Z, LDZ)
+                ! Save a copy of Z into Y and free Z for holding
+                ! the Ritz vectors.
+                ! CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+!
+          ! Compute the Ritz vectors
+          IF ( WNTVEC ) CALL CGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
+                       ZZERO, Z, LDZ )                          ! BLAS CALL
+          ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+          IF ( WNTRES ) THEN
+             DO i = 1, K
+                CALL CAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = SCNRM2( M, Y(1,i), 1)                        ! BLAS CALL
+             END DO
+          END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          RWORK(N+1) = XSCL1
+          RWORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE CGEDMD
diff --git a/SRC/cgedmdq.f90 b/SRC/cgedmdq.f90
index 180563e513..b4eebee5dc 100644
--- a/SRC/cgedmdq.f90
+++ b/SRC/cgedmdq.f90
@@ -1,853 +1,852 @@
-!> \brief \b CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
-!
-!  =========== DOCUMENTATION ===========
-!
-!  Definition:
-!  ===========
-!
-!     SUBROUTINE CGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
-!                   WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
-!                   LDY,   NRNK,  TOL,   K,  EIGS,         &
-!                   Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
-!                   S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
-!                   IWORK, LIWORK, INFO )
-!.....
-!     USE                   iso_fortran_env
-!     IMPLICIT NONE 
-!     INTEGER, PARAMETER :: WP = real32
-!.....      
-!     Scalar arguments       
-!     CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
-!                               JOBT, JOBF
-!     INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
-!                               LDY, NRNK, LDZ, LDB, LDV,  &
-!                               LDS, LZWORK,  LWORK, LIWORK
-!     INTEGER,   INTENT(OUT) :: INFO,   K      
-!     REAL(KIND=WP), INTENT(IN)    ::   TOL     
-!     Array arguments      
-!     COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
-!                                        Z(LDZ,*), B(LDB,*), &
-!                                        V(LDV,*), S(LDS,*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
-!     REAL(KIND=WP), INTENT(OUT)   :: RES(*)
-!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
-!     INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!............................................................
-!>    \par Purpose:
-!     =============
-!>    \verbatim
-!>    CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
-!>    a pair of data snapshot matrices, using a QR factorization
-!>    based compression of the data. For the input matrices
-!>    X and Y such that Y = A*X with an unaccessible matrix
-!>    A, CGEDMDQ computes a certain number of Ritz pairs of A using
-!>    the standard Rayleigh-Ritz extraction from a subspace of
-!>    range(X) that is determined using the leading left singular 
-!>    vectors of X. Optionally, CGEDMDQ returns the residuals 
-!>    of the computed Ritz pairs, the information needed for
-!>    a refinement of the Ritz vectors, or the eigenvectors of
-!>    the Exact DMD.
-!>    For further details see the references listed
-!>    below. For more details of the implementation see [3].      
-!>    \endverbatim
-!............................................................
-!>    \par References:
-!     ================
-!>    \verbatim
-!>    [1] P. Schmid: Dynamic mode decomposition of numerical
-!>        and experimental data,
-!>        Journal of Fluid Mechanics 656, 5-28, 2010.
-!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
-!>        decompositions: analysis and enhancements,
-!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
-!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
-!>        Mode Decomposition I. Technical report. AIMDyn Inc.
-!>        and LAPACK Working Note 298.      
-!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
-!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
-!>        Theory and Applications, Journal of Computational
-!>        Dynamics 1(2), 391 -421, 2014.
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Developed and coded by Zlatko Drmac, Faculty of Science,
-!>    University of Zagreb;  drmac@math.hr
-!>    In cooperation with
-!>    AIMdyn Inc., Santa Barbara, CA.
-!>    and supported by
-!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
-!>    for Nonstationary Processes from Near-Term, Limited
-!>    Observational Data" Contract No: W31P4Q-21-C-0007
-!>    - DARPA PAI project "Physics-Informed Machine Learning
-!>    Methodologies" Contract No: HR0011-18-9-0033
-!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
-!>    Framework for Space-Time Analysis of Process Dynamics"
-!>    Contract No: HR0011-16-C-0116
-!>    Any opinions, findings and conclusions or recommendations 
-!>    expressed in this material are those of the author and 
-!>    do not necessarily reflect the views of the DARPA SBIR 
-!>    Program Office.      
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Approved for Public Release, Distribution Unlimited.
-!>    Cleared by DARPA on September 29, 2022      
-!>    \endverbatim
-!......................................................................
-!     Arguments
-!     =========
-!
-!>    \param[in] JOBS
-!>    \verbatim
-!>    JOBS (input) CHARACTER*1
-!>    Determines whether the initial data snapshots are scaled
-!>    by a diagonal matrix. The data snapshots are the columns
-!>    of F. The leading N-1 columns of F are denoted X and the
-!>    trailing N-1 columns are denoted Y. 
-!>    'S' :: The data snapshots matrices X and Y are multiplied
-!>           with a diagonal matrix D so that X*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'C' :: The snapshots are scaled as with the 'S' option.
-!>           If it is found that an i-th column of X is zero
-!>           vector and the corresponding i-th column of Y is
-!>           non-zero, then the i-th column of Y is set to
-!>           zero and a warning flag is raised.
-!>    'Y' :: The data snapshots matrices X and Y are multiplied
-!>           by a diagonal matrix D so that Y*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)    
-!>    'N' :: No data scaling.   
-!>    \endverbatim
-!.....
-!>    \param[in] JOBZ
-!>    \verbatim
-!>    JOBZ (input) CHARACTER*1
-!>    Determines whether the eigenvectors (Koopman modes) will
-!>    be computed.
-!>    'V' :: The eigenvectors (Koopman modes) will be computed
-!>           and returned in the matrix Z.
-!>           See the description of Z.
-!>    'F' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product Z*V, where Z
-!>           is orthonormal and V contains the eigenvectors
-!>           of the corresponding Rayleigh quotient.
-!>           See the descriptions of F, V, Z.
-!>    'Q' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product Q*Z, where Z
-!>           contains the eigenvectors of the compression of the
-!>           underlying discretised operator onto the span of
-!>           the data snapshots. See the descriptions of F, V, Z.   
-!>           Q is from the inital QR facorization.    
-!>    'N' :: The eigenvectors are not computed.  
-!>    \endverbatim
-!.....
-!>    \param[in] JOBR
-!>    \verbatim
-!>    JOBR (input) CHARACTER*1 
-!>    Determines whether to compute the residuals.
-!>    'R' :: The residuals for the computed eigenpairs will
-!>           be computed and stored in the array RES.
-!>           See the description of RES.
-!>           For this option to be legal, JOBZ must be 'V'.
-!>    'N' :: The residuals are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBQ
-!>    \verbatim
-!>    JOBQ (input) CHARACTER*1 
-!>    Specifies whether to explicitly compute and return the
-!>    unitary matrix from the QR factorization.
-!>    'Q' :: The matrix Q of the QR factorization of the data
-!>           snapshot matrix is computed and stored in the
-!>           array F. See the description of F.       
-!>    'N' :: The matrix Q is not explicitly computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBT
-!>    \verbatim
-!>    JOBT (input) CHARACTER*1 
-!>    Specifies whether to return the upper triangular factor
-!>    from the QR factorization.
-!>    'R' :: The matrix R of the QR factorization of the data 
-!>           snapshot matrix F is returned in the array Y.
-!>           See the description of Y and Further details.       
-!>    'N' :: The matrix R is not returned. 
-!>    \endverbatim
-!.....
-!>    \param[in] JOBF
-!>    \verbatim
-!>    JOBF (input) CHARACTER*1
-!>    Specifies whether to store information needed for post-
-!>    processing (e.g. computing refined Ritz vectors)
-!>    'R' :: The matrix needed for the refinement of the Ritz
-!>           vectors is computed and stored in the array B.
-!>           See the description of B.
-!>    'E' :: The unscaled eigenvectors of the Exact DMD are 
-!>           computed and returned in the array B. See the
-!>           description of B.
-!>    'N' :: No eigenvector refinement data is computed.   
-!>    To be useful on exit, this option needs JOBQ='Q'.    
-!>    \endverbatim
-!.....
-!>    \param[in] WHTSVD
-!>    \verbatim
-!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
-!>    Allows for a selection of the SVD algorithm from the
-!>    LAPACK library.
-!>    1 :: CGESVD (the QR SVD algorithm)
-!>    2 :: CGESDD (the Divide and Conquer algorithm; if enough
-!>         workspace available, this is the fastest option)
-!>    3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
-!>         are the most accurate options)
-!>    4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
-!>         are the most accurate options)
-!>    For the four methods above, a significant difference in
-!>    the accuracy of small singular values is possible if
-!>    the snapshots vary in norm so that X is severely
-!>    ill-conditioned. If small (smaller than EPS*||X||)
-!>    singular values are of interest and JOBS=='N',  then
-!>    the options (3, 4) give the most accurate results, where
-!>    the option 4 is slightly better and with stronger 
-!>    theoretical background.
-!>    If JOBS=='S', i.e. the columns of X will be normalized,
-!>    then all methods give nearly equally accurate results.
-!>    \endverbatim
-!.....
-!>    \param[in] M
-!>    \verbatim
-!>    M (input) INTEGER, M >= 0 
-!>    The state space dimension (the number of rows of F).
-!>    \endverbatim
-!.....
-!>    \param[in] N
-!>    \verbatim
-!>    N (input) INTEGER, 0 <= N <= M
-!>    The number of data snapshots from a single trajectory,
-!>    taken at equidistant discrete times. This is the 
-!>    number of columns of F.
-!>    \endverbatim
-!.....
-!>    \param[in,out] F
-!>    \verbatim
-!>    F (input/output) COMPLEX(KIND=WP) M-by-N array
-!>    > On entry,
-!>    the columns of F are the sequence of data snapshots 
-!>    from a single trajectory, taken at equidistant discrete
-!>    times. It is assumed that the column norms of F are 
-!>    in the range of the normalized floating point numbers. 
-!>    < On exit,
-!>    If JOBQ == 'Q', the array F contains the orthogonal 
-!>    matrix/factor of the QR factorization of the initial 
-!>    data snapshots matrix F. See the description of JOBQ. 
-!>    If JOBQ == 'N', the entries in F strictly below the main
-!>    diagonal contain, column-wise, the information on the 
-!>    Householder vectors, as returned by CGEQRF. The 
-!>    remaining information to restore the orthogonal matrix
-!>    of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). 
-!>    See the description of ZWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LDF
-!>    \verbatim
-!>    LDF (input) INTEGER, LDF >= M 
-!>    The leading dimension of the array F.
-!>    \endverbatim
-!.....
-!>    \param[in,out] X
-!>    \verbatim
-!>    X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
-!>    X is used as workspace to hold representations of the
-!>    leading N-1 snapshots in the orthonormal basis computed
-!>    in the QR factorization of F.
-!>    On exit, the leading K columns of X contain the leading
-!>    K left singular vectors of the above described content
-!>    of X. To lift them to the space of the left singular
-!>    vectors U(:,1:K) of the input data, pre-multiply with the 
-!>    Q factor from the initial QR factorization. 
-!>    See the descriptions of F, K, V  and Z.
-!>    \endverbatim
-!.....
-!>    \param[in] LDX
-!>    \verbatim
-!>    LDX (input) INTEGER, LDX >= N  
-!>    The leading dimension of the array X. 
-!>    \endverbatim
-!.....
-!>    \param[in,out] Y
-!>    \verbatim
-!>    Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
-!>    Y is used as workspace to hold representations of the
-!>    trailing N-1 snapshots in the orthonormal basis computed
-!>    in the QR factorization of F.
-!>    On exit, 
-!>    If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
-!>    triangular factor from the QR factorization of the data
-!>    snapshot matrix F.
-!>    \endverbatim
-!.....
-!>    \param[in] LDY
-!>    \verbatim
-!>    LDY (input) INTEGER , LDY >= N
-!>    The leading dimension of the array Y.   
-!>    \endverbatim
-!.....
-!>    \param[in] NRNK
-!>    \verbatim
-!>    NRNK (input) INTEGER
-!>    Determines the mode how to compute the numerical rank,
-!>    i.e. how to truncate small singular values of the input
-!>    matrix X. On input, if
-!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(1)
-!>                 This option is recommended.
-!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(i-1)
-!>                 This option is included for R&D purposes.
-!>                 It requires highly accurate SVD, which
-!>                 may not be feasible.      
-!>    The numerical rank can be enforced by using positive 
-!>    value of NRNK as follows: 
-!>    0 < NRNK <= N-1 :: at most NRNK largest singular values
-!>    will be used. If the number of the computed nonzero
-!>    singular values is less than NRNK, then only those
-!>    nonzero values will be used and the actually used
-!>    dimension is less than NRNK. The actual number of
-!>    the nonzero singular values is returned in the variable
-!>    K. See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] TOL
-!>    \verbatim
-!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
-!>    The tolerance for truncating small singular values.
-!>    See the description of NRNK.  
-!>    \endverbatim
-!.....
-!>    \param[out] K
-!>    \verbatim
-!>    K (output) INTEGER,  0 <= K <= N 
-!>    The dimension of the SVD/POD basis for the leading N-1
-!>    data snapshots (columns of F) and the number of the 
-!>    computed Ritz pairs. The value of K is determined
-!>    according to the rule set by the parameters NRNK and 
-!>    TOL. See the descriptions of NRNK and TOL. 
-!>    \endverbatim
-!.....
-!>    \param[out] EIGS
-!>    \verbatim
-!>    EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
-!>    The leading K (K<=N-1) entries of EIGS contain
-!>    the computed eigenvalues (Ritz values).
-!>    See the descriptions of K, and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] Z
-!>    \verbatim
-!>    Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
-!>    If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
-!>    is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
-!>    If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
-!>    Z*V, where Z contains orthonormal matrix (the product of
-!>    Q from the initial QR factorization and the SVD/POD_basis
-!>    returned by CGEDMD in X) and the second factor (the 
-!>    eigenvectors of the Rayleigh quotient) is in the array V, 
-!>    as returned by CGEDMD. That is,  X(:,1:K)*V(:,i)
-!>    is an eigenvector corresponding to EIGS(i). The columns 
-!>    of V(1:K,1:K) are the computed eigenvectors of the 
-!>    K-by-K Rayleigh quotient.  
-!>    See the descriptions of EIGS, X and V.      
-!>    \endverbatim
-!.....
-!>    \param[in] LDZ
-!>    \verbatim
-!>    LDZ (input) INTEGER , LDZ >= M
-!>    The leading dimension of the array Z.
-!>    \endverbatim
-!.....
-!>    \param[out] RES
-!>    \verbatim
-!>    RES (output) REAL(KIND=WP) (N-1)-by-1 array
-!>    RES(1:K) contains the residuals for the K computed 
-!>    Ritz pairs, 
-!>    RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
-!>    See the description of EIGS and Z.      
-!>    \endverbatim
-!.....
-!>    \param[out] B
-!>    \verbatim
-!>    B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
-!>    IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
-!>    be used for computing the refined vectors; see further 
-!>    details in the provided references. 
-!>    If JOBF == 'E', B(1:N,1;K) contains 
-!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
-!>    Exact DMD, up to scaling by the inverse eigenvalues.   
-!>    In both cases, the content of B can be lifted to the 
-!>    original dimension of the input data by pre-multiplying
-!>    with the Q factor from the initial QR factorization. 
-!>    Here A denotes a compression of the underlying operator.      
-!>    See the descriptions of F and X.
-!>    If JOBF =='N', then B is not referenced.
-!>    \endverbatim
-!.....
-!>    \param[in] LDB
-!>    \verbatim
-!>    LDB (input) INTEGER, LDB >= MIN(M,N)
-!>    The leading dimension of the array B.
-!>    \endverbatim
-!.....
-!>    \param[out] V
-!>    \verbatim
-!>    V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
-!>    On exit, V(1:K,1:K) V contains the K eigenvectors of
-!>    the Rayleigh quotient. The Ritz vectors
-!>    (returned in Z) are the product of Q from the initial QR
-!>    factorization (see the description of F) X (see the 
-!>    description of X) and V.
-!>    \endverbatim
-!.....
-!>    \param[in] LDV
-!>    \verbatim
-!>    LDV (input) INTEGER, LDV >= N-1
-!>    The leading dimension of the array V.
-!>    \endverbatim
-!.....
-!>    \param[out] S
-!>    \verbatim
-!>    S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
-!>    The array S(1:K,1:K) is used for the matrix Rayleigh
-!>    quotient. This content is overwritten during
-!>    the eigenvalue decomposition by CGEEV.
-!>    See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDS
-!>    \verbatim
-!>    LDS (input) INTEGER, LDS >= N-1        
-!>    The leading dimension of the array S.
-!>    \endverbatim
-!.....
-!>    \param[out] LZWORK
-!>    \verbatim
-!>    ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
-!>    On exit, 
-!>    ZWORK(1:MIN(M,N)) contains the scalar factors of the 
-!>    elementary reflectors as returned by CGEQRF of the 
-!>    M-by-N input matrix F.   
-!>    If the call to CGEDMDQ is only workspace query, then
-!>    ZWORK(1) contains the minimal complex workspace length and
-!>    ZWORK(2) is the optimal complex workspace length. 
-!>    Hence, the length of work is at least 2.
-!>    See the description of LZWORK.      
-!>    \endverbatim
-!.....
-!>    \param[in] LZWORK
-!>    \verbatim
-!>    LZWORK (input) INTEGER
-!>    The minimal length of the  workspace vector ZWORK.
-!>    LZWORK is calculated as follows:
-!>    Let MLWQR  = N (minimal workspace for CGEQRF[M,N])
-!>        MLWDMD = minimal workspace for CGEDMD (see the
-!>                 description of LWORK in CGEDMD)
-!>        MLWMQR = N (minimal workspace for 
-!>                   ZUNMQR['L','N',M,N,N])
-!>        MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
-!>        MINMN  = MIN(M,N)      
-!>    Then
-!>    LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
-!>    is further updated as follows:
-!>       if   JOBZ == 'V' or JOBZ == 'F' THEN 
-!>            LZWORK = MAX( LZWORK, MINMN+MLWMQR )
-!>       if   JOBQ == 'Q' THEN
-!>            LZWORK = MAX( ZLWORK, MINMN+MLWGQR)      
-!>    \endverbatim
-!.....
-!>    \param[out] WORK
-!>    \verbatim
-!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
-!>    On exit,
-!>    WORK(1:N-1) contains the singular values of 
-!>    the input submatrix F(1:M,1:N-1).
-!>    If the call to CGEDMDQ is only workspace query, then
-!>    WORK(1) contains the minimal workspace length and
-!>    WORK(2) is the optimal workspace length. hence, the
-!>    length of work is at least 2.
-!>    See the description of LWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LWORK
-!>    \verbatim
-!>    LWORK (input) INTEGER
-!>    The minimal length of the  workspace vector WORK.
-!>    LWORK is the same as in CGEDMD, because in CGEDMDQ
-!>    only CGEDMD requires real workspace for snapshots
-!>    of dimensions MIN(M,N)-by-(N-1).
-!>    If on entry LWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.          
-!>    \endverbatim
-!.....
-!>    \param[out] IWORK
-!>    \verbatim
-!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
-!>    Workspace that is required only if WHTSVD equals
-!>    2 , 3 or 4. (See the description of WHTSVD).
-!>    If on entry LWORK =-1 or LIWORK=-1, then the
-!>    minimal length of IWORK is computed and returned in
-!>    IWORK(1). See the description of LIWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LIWORK
-!>    \verbatim
-!>    LIWORK (input) INTEGER
-!>    The minimal length of the workspace vector IWORK.
-!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
-!>    Let M1=MIN(M,N), N1=N-1. Then      
-!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
-!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
-!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
-!>    If on entry LIWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] INFO
-!>    \verbatim
-!>    INFO (output) INTEGER
-!>    -i < 0 :: On entry, the i-th argument had an
-!>              illegal value
-!>       = 0 :: Successful return.
-!>       = 1 :: Void input. Quick exit (M=0 or N=0).
-!>       = 2 :: The SVD computation of X did not converge.
-!>              Suggestion: Check the input data and/or
-!>              repeat with different WHTSVD.
-!>       = 3 :: The computation of the eigenvalues did not
-!>              converge.
-!>       = 4 :: If data scaling was requested on input and
-!>              the procedure found inconsistency in the data
-!>              such that for some column index i,
-!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
-!>              to zero if JOBS=='C'. The computation proceeds
-!>              with original or modified data and warning
-!>              flag is set with INFO=4.  
-!>    \endverbatim
-!
-!  Authors:
-!  ========
-!
-!> \author Zlatko Drmac
-!
-!> \ingroup gedmd
-!
-!.............................................................
-!.............................................................
-SUBROUTINE CGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
-                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
-                    LDY,   NRNK,  TOL,   K,  EIGS,         &
-                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
-                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
-                    IWORK, LIWORK, INFO )
-!
-!  -- LAPACK driver routine                                           --
-!
-!  -- LAPACK is a software package provided by University of          --
-!  -- Tennessee, University of California Berkeley, University of     --
-!  -- Colorado Denver and NAG Ltd..                                   --
-!
-!.....
-      USE                   iso_fortran_env
-      IMPLICIT NONE 
-      INTEGER, PARAMETER :: WP = real32
-!
-!     Scalar arguments       
-!     ~~~~~~~~~~~~~~~~
-      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
-                                JOBT, JOBF
-      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
-                                LDY, NRNK, LDZ, LDB, LDV,  &
-                                LDS, LZWORK,  LWORK, LIWORK
-      INTEGER,   INTENT(OUT) :: INFO,   K      
-      REAL(KIND=WP), INTENT(IN)    ::   TOL     
-!
-!     Array arguments      
-!     ~~~~~~~~~~~~~~~
-      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
-                                         Z(LDZ,*), B(LDB,*), &
-                                         V(LDV,*), S(LDS,*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
-      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
-      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
-      INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!     Parameters
-!     ~~~~~~~~~~      
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-!     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
-      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
-!      
-!     Local scalars      
-!     ~~~~~~~~~~~~~
-      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
-                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  & 
-                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
-                           OLWORK, OLWQR
-      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
-                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
-                           WNTVCQ, WNTREF, WNTEX
-      CHARACTER(LEN=1)  :: JOBVL
-!      
-!     External functions (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~
-      LOGICAL       LSAME
-      EXTERNAL      LSAME 
-!
-!     External subroutines (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~~~~
-      EXTERNAL      CGEDMD, CGEQRF, CLACPY, CLASET, CUNGQR, & 
-                    CUNMQR, XERBLA
-!
-!     Intrinsic functions
-!     ~~~~~~~~~~~~~~~~~~~
-      INTRINSIC      MAX, MIN, INT         
-!..........................................................  
-!
-!     Test the input arguments    
-      WNTRES = LSAME(JOBR,'R')
-      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
-      SCCOLY = LSAME(JOBS,'Y')
-      WNTVEC = LSAME(JOBZ,'V')
-      WNTVCF = LSAME(JOBZ,'F')
-      WNTVCQ = LSAME(JOBZ,'Q')
-      WNTREF = LSAME(JOBF,'R') 
-      WNTEX  = LSAME(JOBF,'E')
-      WANTQ  = LSAME(JOBQ,'Q')
-      WNTTRF = LSAME(JOBT,'R')     
-      MINMN  = MIN(M,N)
-      INFO = 0 
-      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
-!       
-      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
-                                  LSAME(JOBS,'N')) )  THEN 
-          INFO = -1
-      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
-                              .OR. LSAME(JOBZ,'N')) ) THEN
-          INFO = -2
-      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
-          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
-          INFO = -3
-      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
-           INFO = -4                 
-      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
-          INFO = -5
-       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
-                LSAME(JOBF,'N') ) )                     THEN
-          INFO = -6    
-      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
-                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
-          INFO = -7
-      ELSE IF ( M < 0 ) THEN
-          INFO = -8
-      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
-          INFO = -9
-      ELSE IF ( LDF < M ) THEN
-          INFO = -11
-      ELSE IF ( LDX < MINMN ) THEN
-          INFO = -13
-      ELSE IF ( LDY < MINMN ) THEN
-          INFO = -15
-      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
-                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
-          INFO = -16
-      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
-          INFO = -17
-      ELSE IF ( LDZ < M ) THEN
-          INFO = -21
-      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
-          INFO = -24
-      ELSE IF ( LDV < N-1 ) THEN
-          INFO = -26
-      ELSE IF ( LDS < N-1 ) THEN
-          INFO = -28
-      END IF
-!      
-      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
-          JOBVL = 'V'
-      ELSE
-          JOBVL = 'N'
-      END IF     
-      IF ( INFO == 0 ) THEN  
-          ! Compute the minimal and the optimal workspace
-          ! requirements. Simulate running the code and 
-          ! determine minimal and optimal sizes of the 
-          ! workspace at any moment of the run.         
-         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
-             ! All output except K is void. INFO=1 signals
-             ! the void input. In case of a workspace query,
-             ! the minimal workspace lengths are returned.
-            IF ( LQUERY ) THEN  
-               IWORK(1) = 1
-                WORK(1) = 2
-                WORK(2) = 2
-            ELSE                
-               K = 0
-            END IF             
-            INFO = 1  
-            RETURN
-         END IF     
-         
-         MLRWRK = 2
-         MLWORK = 2
-         OLWORK = 2 
-         IMINWR = 1
-         MLWQR  = MAX(1,N)  ! Minimal workspace length for CGEQRF.
-         MLWORK = MAX(MLWORK,MINMN + MLWQR) 
-
-         IF ( LQUERY ) THEN 
-             CALL CGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
-                          INFO1 )
-             OLWQR  = INT(ZWORK(1))
-             OLWORK = MAX(OLWORK,MINMN + OLWQR)           
-         END IF
-         CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
-                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
-                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    & 
-                      S, LDS, ZWORK, LZWORK, WORK, -1, IWORK,&
-                      LIWORK, INFO1 )
-         MLWDMD = INT(ZWORK(1))
-         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
-         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
-         IMINWR = MAX(IMINWR, IWORK(1))
-         IF ( LQUERY ) THEN 
-             OLWDMD = INT(ZWORK(2))
-             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
-         END IF
-         IF ( WNTVEC .OR. WNTVCF ) THEN
-            MLWMQR = MAX(1,N) 
-            MLWORK = MAX(MLWORK, MINMN+MLWMQR)
-            IF ( LQUERY ) THEN
-               CALL CUNMQR( 'L','N', M, N, MINMN, F, LDF,  & 
-                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
-               OLWMQR = INT(ZWORK(1))
-               OLWORK = MAX(OLWORK, MINMN+OLWMQR)
-            END IF
-         END IF  
-         IF ( WANTQ ) THEN
-            MLWGQR = MAX(1,N)
-            MLWORK = MAX(MLWORK, MINMN+MLWGQR)
-            IF ( LQUERY ) THEN 
-                CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
-                             ZWORK, -1, INFO1 )               
-                OLWGQR = INT(ZWORK(1))
-                OLWORK = MAX(OLWORK, MINMN+OLWGQR)
-            END IF            
-         END IF          
-         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
-         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
-         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
-      END IF  
-      IF( INFO /= 0 ) THEN
-         CALL XERBLA( 'CGEDMDQ', -INFO )
-         RETURN
-      ELSE IF ( LQUERY ) THEN
-!     Return minimal and optimal workspace sizes
-          IWORK(1) = IMINWR
-          ZWORK(1) = MLWORK
-          ZWORK(2) = OLWORK
-          WORK(1)  = MLRWRK
-          WORK(2)  = MLRWRK
-          RETURN
-      END IF   
-!.....	  
-!     Initial QR factorization that is used to represent the
-!     snapshots as elements of lower dimensional subspace.
-!     For large scale computation with M >>N , at this place 
-!     one can use an out of core QRF.
-!   
-      CALL CGEQRF( M, N, F, LDF, ZWORK,                & 
-                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
-!      
-!     Define X and Y as the snapshots representations in the
-!     orthogonal basis computed in the QR factorization.
-!     X corresponds to the leading N-1 and Y to the trailing
-!     N-1 snapshots.
-      CALL CLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
-      CALL CLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
-      CALL CLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
-      IF ( M >= 3 ) THEN
-          CALL CLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
-                       Y(3,1), LDY )  
-      END IF
-!
-!     Compute the DMD of the projected snapshot pairs (X,Y)   
-      CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
-                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
-                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
-                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN,    & 
-                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
-      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
-          ! Return with error code. See CGEDMD for details.
-          INFO = INFO1
-          RETURN
-      ELSE
-          INFO = INFO1
-      END IF    
-!      
-!     The Ritz vectors (Koopman modes) can be explicitly 
-!     formed or returned in factored form.
-      IF ( WNTVEC ) THEN
-        ! Compute the eigenvectors explicitly.  
-        IF ( M > MINMN ) CALL CLASET( 'A', M-MINMN, K, ZZERO, &
-                                     ZZERO, Z(MINMN+1,1), LDZ )
-        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
-             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
-      ELSE IF ( WNTVCF ) THEN   
-        !   Return the Ritz vectors (eigenvectors) in factored
-        !   form Z*V, where Z contains orthonormal matrix (the
-        !   product of Q from the initial QR factorization and 
-        !   the SVD/POD_basis returned by CGEDMD in X) and the 
-        !   second factor (the eigenvectors of the Rayleigh 
-        !   quotient) is in the array V, as returned by CGEDMD.
-        CALL CLACPY( 'A', N, K, X, LDX, Z, LDZ )
-        IF ( M > N ) CALL CLASET( 'A', M-N, K, ZZERO, ZZERO, & 
-                                 Z(N+1,1), LDZ )
-        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
-                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
-      END IF
-!     
-!     Some optional output variables:
-!
-!     The upper triangular factor R in the initial QR 
-!     factorization is optionally returned in the array Y.
-!     This is useful if this call to CGEDMDQ is to be 
-      
-!     followed by a streaming DMD that is implemented in a 
-!     QR compressed form.
-      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
-         CALL CLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
-         CALL CLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
-      END IF    
-!
-!     The orthonormal/unitary factor Q in the initial QR 
-!     factorization is optionally returned in the array F. 
-!     Same as with the triangular factor above, this is 
-!     useful in a streaming DMD.
-      IF ( WANTQ ) THEN                   ! Q overwrites F 
-         CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
-                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )  
-      END IF
-!      
-      RETURN
-!      
-      END SUBROUTINE CGEDMDQ
-    
+!> \brief \b CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
+!
+!  =========== DOCUMENTATION ===========
+!
+!  Definition:
+!  ===========
+!
+!     SUBROUTINE CGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+!                   WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+!                   LDY,   NRNK,  TOL,   K,  EIGS,         &
+!                   Z, LDZ, RES,  B,     LDB,   V, LDV,    &
+!                   S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
+!                   IWORK, LIWORK, INFO )
+!.....
+!     USE                   iso_fortran_env
+!     IMPLICIT NONE
+!     INTEGER, PARAMETER :: WP = real32
+!.....
+!     Scalar arguments
+!     CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+!                               JOBT, JOBF
+!     INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+!                               LDY, NRNK, LDZ, LDB, LDV,  &
+!                               LDS, LZWORK,  LWORK, LIWORK
+!     INTEGER,   INTENT(OUT) :: INFO,   K
+!     REAL(KIND=WP), INTENT(IN)    ::   TOL
+!     Array arguments
+!     COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
+!                                        Z(LDZ,*), B(LDB,*), &
+!                                        V(LDV,*), S(LDS,*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+!     REAL(KIND=WP), INTENT(OUT)   :: RES(*)
+!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+!     INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!............................................................
+!>    \par Purpose:
+!     =============
+!>    \verbatim
+!>    CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!>    a pair of data snapshot matrices, using a QR factorization
+!>    based compression of the data. For the input matrices
+!>    X and Y such that Y = A*X with an unaccessible matrix
+!>    A, CGEDMDQ computes a certain number of Ritz pairs of A using
+!>    the standard Rayleigh-Ritz extraction from a subspace of
+!>    range(X) that is determined using the leading left singular
+!>    vectors of X. Optionally, CGEDMDQ returns the residuals
+!>    of the computed Ritz pairs, the information needed for
+!>    a refinement of the Ritz vectors, or the eigenvectors of
+!>    the Exact DMD.
+!>    For further details see the references listed
+!>    below. For more details of the implementation see [3].
+!>    \endverbatim
+!............................................................
+!>    \par References:
+!     ================
+!>    \verbatim
+!>    [1] P. Schmid: Dynamic mode decomposition of numerical
+!>        and experimental data,
+!>        Journal of Fluid Mechanics 656, 5-28, 2010.
+!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!>        decompositions: analysis and enhancements,
+!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!>        Mode Decomposition I. Technical report. AIMDyn Inc.
+!>        and LAPACK Working Note 298.
+!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!>        Theory and Applications, Journal of Computational
+!>        Dynamics 1(2), 391 -421, 2014.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Developed and coded by Zlatko Drmac, Faculty of Science,
+!>    University of Zagreb;  drmac@math.hr
+!>    In cooperation with
+!>    AIMdyn Inc., Santa Barbara, CA.
+!>    and supported by
+!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
+!>    for Nonstationary Processes from Near-Term, Limited
+!>    Observational Data" Contract No: W31P4Q-21-C-0007
+!>    - DARPA PAI project "Physics-Informed Machine Learning
+!>    Methodologies" Contract No: HR0011-18-9-0033
+!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!>    Framework for Space-Time Analysis of Process Dynamics"
+!>    Contract No: HR0011-16-C-0116
+!>    Any opinions, findings and conclusions or recommendations
+!>    expressed in this material are those of the author and
+!>    do not necessarily reflect the views of the DARPA SBIR
+!>    Program Office.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Approved for Public Release, Distribution Unlimited.
+!>    Cleared by DARPA on September 29, 2022
+!>    \endverbatim
+!......................................................................
+!     Arguments
+!     =========
+!
+!>    \param[in] JOBS
+!>    \verbatim
+!>    JOBS (input) CHARACTER*1
+!>    Determines whether the initial data snapshots are scaled
+!>    by a diagonal matrix. The data snapshots are the columns
+!>    of F. The leading N-1 columns of F are denoted X and the
+!>    trailing N-1 columns are denoted Y.
+!>    'S' :: The data snapshots matrices X and Y are multiplied
+!>           with a diagonal matrix D so that X*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'C' :: The snapshots are scaled as with the 'S' option.
+!>           If it is found that an i-th column of X is zero
+!>           vector and the corresponding i-th column of Y is
+!>           non-zero, then the i-th column of Y is set to
+!>           zero and a warning flag is raised.
+!>    'Y' :: The data snapshots matrices X and Y are multiplied
+!>           by a diagonal matrix D so that Y*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'N' :: No data scaling.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBZ
+!>    \verbatim
+!>    JOBZ (input) CHARACTER*1
+!>    Determines whether the eigenvectors (Koopman modes) will
+!>    be computed.
+!>    'V' :: The eigenvectors (Koopman modes) will be computed
+!>           and returned in the matrix Z.
+!>           See the description of Z.
+!>    'F' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product Z*V, where Z
+!>           is orthonormal and V contains the eigenvectors
+!>           of the corresponding Rayleigh quotient.
+!>           See the descriptions of F, V, Z.
+!>    'Q' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product Q*Z, where Z
+!>           contains the eigenvectors of the compression of the
+!>           underlying discretised operator onto the span of
+!>           the data snapshots. See the descriptions of F, V, Z.
+!>           Q is from the inital QR facorization.
+!>    'N' :: The eigenvectors are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBR
+!>    \verbatim
+!>    JOBR (input) CHARACTER*1
+!>    Determines whether to compute the residuals.
+!>    'R' :: The residuals for the computed eigenpairs will
+!>           be computed and stored in the array RES.
+!>           See the description of RES.
+!>           For this option to be legal, JOBZ must be 'V'.
+!>    'N' :: The residuals are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBQ
+!>    \verbatim
+!>    JOBQ (input) CHARACTER*1
+!>    Specifies whether to explicitly compute and return the
+!>    unitary matrix from the QR factorization.
+!>    'Q' :: The matrix Q of the QR factorization of the data
+!>           snapshot matrix is computed and stored in the
+!>           array F. See the description of F.
+!>    'N' :: The matrix Q is not explicitly computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBT
+!>    \verbatim
+!>    JOBT (input) CHARACTER*1
+!>    Specifies whether to return the upper triangular factor
+!>    from the QR factorization.
+!>    'R' :: The matrix R of the QR factorization of the data
+!>           snapshot matrix F is returned in the array Y.
+!>           See the description of Y and Further details.
+!>    'N' :: The matrix R is not returned.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBF
+!>    \verbatim
+!>    JOBF (input) CHARACTER*1
+!>    Specifies whether to store information needed for post-
+!>    processing (e.g. computing refined Ritz vectors)
+!>    'R' :: The matrix needed for the refinement of the Ritz
+!>           vectors is computed and stored in the array B.
+!>           See the description of B.
+!>    'E' :: The unscaled eigenvectors of the Exact DMD are
+!>           computed and returned in the array B. See the
+!>           description of B.
+!>    'N' :: No eigenvector refinement data is computed.
+!>    To be useful on exit, this option needs JOBQ='Q'.
+!>    \endverbatim
+!.....
+!>    \param[in] WHTSVD
+!>    \verbatim
+!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!>    Allows for a selection of the SVD algorithm from the
+!>    LAPACK library.
+!>    1 :: CGESVD (the QR SVD algorithm)
+!>    2 :: CGESDD (the Divide and Conquer algorithm; if enough
+!>         workspace available, this is the fastest option)
+!>    3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
+!>         are the most accurate options)
+!>    4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
+!>         are the most accurate options)
+!>    For the four methods above, a significant difference in
+!>    the accuracy of small singular values is possible if
+!>    the snapshots vary in norm so that X is severely
+!>    ill-conditioned. If small (smaller than EPS*||X||)
+!>    singular values are of interest and JOBS=='N',  then
+!>    the options (3, 4) give the most accurate results, where
+!>    the option 4 is slightly better and with stronger
+!>    theoretical background.
+!>    If JOBS=='S', i.e. the columns of X will be normalized,
+!>    then all methods give nearly equally accurate results.
+!>    \endverbatim
+!.....
+!>    \param[in] M
+!>    \verbatim
+!>    M (input) INTEGER, M >= 0
+!>    The state space dimension (the number of rows of F).
+!>    \endverbatim
+!.....
+!>    \param[in] N
+!>    \verbatim
+!>    N (input) INTEGER, 0 <= N <= M
+!>    The number of data snapshots from a single trajectory,
+!>    taken at equidistant discrete times. This is the
+!>    number of columns of F.
+!>    \endverbatim
+!.....
+!>    \param[in,out] F
+!>    \verbatim
+!>    F (input/output) COMPLEX(KIND=WP) M-by-N array
+!>    > On entry,
+!>    the columns of F are the sequence of data snapshots
+!>    from a single trajectory, taken at equidistant discrete
+!>    times. It is assumed that the column norms of F are
+!>    in the range of the normalized floating point numbers.
+!>    < On exit,
+!>    If JOBQ == 'Q', the array F contains the orthogonal
+!>    matrix/factor of the QR factorization of the initial
+!>    data snapshots matrix F. See the description of JOBQ.
+!>    If JOBQ == 'N', the entries in F strictly below the main
+!>    diagonal contain, column-wise, the information on the
+!>    Householder vectors, as returned by CGEQRF. The
+!>    remaining information to restore the orthogonal matrix
+!>    of the initial QR factorization is stored in ZWORK(1:MIN(M,N)).
+!>    See the description of ZWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LDF
+!>    \verbatim
+!>    LDF (input) INTEGER, LDF >= M
+!>    The leading dimension of the array F.
+!>    \endverbatim
+!.....
+!>    \param[in,out] X
+!>    \verbatim
+!>    X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
+!>    X is used as workspace to hold representations of the
+!>    leading N-1 snapshots in the orthonormal basis computed
+!>    in the QR factorization of F.
+!>    On exit, the leading K columns of X contain the leading
+!>    K left singular vectors of the above described content
+!>    of X. To lift them to the space of the left singular
+!>    vectors U(:,1:K) of the input data, pre-multiply with the
+!>    Q factor from the initial QR factorization.
+!>    See the descriptions of F, K, V  and Z.
+!>    \endverbatim
+!.....
+!>    \param[in] LDX
+!>    \verbatim
+!>    LDX (input) INTEGER, LDX >= N
+!>    The leading dimension of the array X.
+!>    \endverbatim
+!.....
+!>    \param[in,out] Y
+!>    \verbatim
+!>    Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
+!>    Y is used as workspace to hold representations of the
+!>    trailing N-1 snapshots in the orthonormal basis computed
+!>    in the QR factorization of F.
+!>    On exit,
+!>    If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!>    triangular factor from the QR factorization of the data
+!>    snapshot matrix F.
+!>    \endverbatim
+!.....
+!>    \param[in] LDY
+!>    \verbatim
+!>    LDY (input) INTEGER , LDY >= N
+!>    The leading dimension of the array Y.
+!>    \endverbatim
+!.....
+!>    \param[in] NRNK
+!>    \verbatim
+!>    NRNK (input) INTEGER
+!>    Determines the mode how to compute the numerical rank,
+!>    i.e. how to truncate small singular values of the input
+!>    matrix X. On input, if
+!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(1)
+!>                 This option is recommended.
+!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(i-1)
+!>                 This option is included for R&D purposes.
+!>                 It requires highly accurate SVD, which
+!>                 may not be feasible.
+!>    The numerical rank can be enforced by using positive
+!>    value of NRNK as follows:
+!>    0 < NRNK <= N-1 :: at most NRNK largest singular values
+!>    will be used. If the number of the computed nonzero
+!>    singular values is less than NRNK, then only those
+!>    nonzero values will be used and the actually used
+!>    dimension is less than NRNK. The actual number of
+!>    the nonzero singular values is returned in the variable
+!>    K. See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] TOL
+!>    \verbatim
+!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!>    The tolerance for truncating small singular values.
+!>    See the description of NRNK.
+!>    \endverbatim
+!.....
+!>    \param[out] K
+!>    \verbatim
+!>    K (output) INTEGER,  0 <= K <= N
+!>    The dimension of the SVD/POD basis for the leading N-1
+!>    data snapshots (columns of F) and the number of the
+!>    computed Ritz pairs. The value of K is determined
+!>    according to the rule set by the parameters NRNK and
+!>    TOL. See the descriptions of NRNK and TOL.
+!>    \endverbatim
+!.....
+!>    \param[out] EIGS
+!>    \verbatim
+!>    EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
+!>    The leading K (K<=N-1) entries of EIGS contain
+!>    the computed eigenvalues (Ritz values).
+!>    See the descriptions of K, and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] Z
+!>    \verbatim
+!>    Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
+!>    If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
+!>    is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!>    If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!>    Z*V, where Z contains orthonormal matrix (the product of
+!>    Q from the initial QR factorization and the SVD/POD_basis
+!>    returned by CGEDMD in X) and the second factor (the
+!>    eigenvectors of the Rayleigh quotient) is in the array V,
+!>    as returned by CGEDMD. That is,  X(:,1:K)*V(:,i)
+!>    is an eigenvector corresponding to EIGS(i). The columns
+!>    of V(1:K,1:K) are the computed eigenvectors of the
+!>    K-by-K Rayleigh quotient.
+!>    See the descriptions of EIGS, X and V.
+!>    \endverbatim
+!.....
+!>    \param[in] LDZ
+!>    \verbatim
+!>    LDZ (input) INTEGER , LDZ >= M
+!>    The leading dimension of the array Z.
+!>    \endverbatim
+!.....
+!>    \param[out] RES
+!>    \verbatim
+!>    RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!>    RES(1:K) contains the residuals for the K computed
+!>    Ritz pairs,
+!>    RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!>    See the description of EIGS and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] B
+!>    \verbatim
+!>    B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!>    IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!>    be used for computing the refined vectors; see further
+!>    details in the provided references.
+!>    If JOBF == 'E', B(1:N,1;K) contains
+!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!>    Exact DMD, up to scaling by the inverse eigenvalues.
+!>    In both cases, the content of B can be lifted to the
+!>    original dimension of the input data by pre-multiplying
+!>    with the Q factor from the initial QR factorization.
+!>    Here A denotes a compression of the underlying operator.
+!>    See the descriptions of F and X.
+!>    If JOBF =='N', then B is not referenced.
+!>    \endverbatim
+!.....
+!>    \param[in] LDB
+!>    \verbatim
+!>    LDB (input) INTEGER, LDB >= MIN(M,N)
+!>    The leading dimension of the array B.
+!>    \endverbatim
+!.....
+!>    \param[out] V
+!>    \verbatim
+!>    V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!>    On exit, V(1:K,1:K) V contains the K eigenvectors of
+!>    the Rayleigh quotient. The Ritz vectors
+!>    (returned in Z) are the product of Q from the initial QR
+!>    factorization (see the description of F) X (see the
+!>    description of X) and V.
+!>    \endverbatim
+!.....
+!>    \param[in] LDV
+!>    \verbatim
+!>    LDV (input) INTEGER, LDV >= N-1
+!>    The leading dimension of the array V.
+!>    \endverbatim
+!.....
+!>    \param[out] S
+!>    \verbatim
+!>    S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!>    The array S(1:K,1:K) is used for the matrix Rayleigh
+!>    quotient. This content is overwritten during
+!>    the eigenvalue decomposition by CGEEV.
+!>    See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDS
+!>    \verbatim
+!>    LDS (input) INTEGER, LDS >= N-1
+!>    The leading dimension of the array S.
+!>    \endverbatim
+!.....
+!>    \param[out] LZWORK
+!>    \verbatim
+!>    ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
+!>    On exit,
+!>    ZWORK(1:MIN(M,N)) contains the scalar factors of the
+!>    elementary reflectors as returned by CGEQRF of the
+!>    M-by-N input matrix F.
+!>    If the call to CGEDMDQ is only workspace query, then
+!>    ZWORK(1) contains the minimal complex workspace length and
+!>    ZWORK(2) is the optimal complex workspace length.
+!>    Hence, the length of work is at least 2.
+!>    See the description of LZWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LZWORK
+!>    \verbatim
+!>    LZWORK (input) INTEGER
+!>    The minimal length of the  workspace vector ZWORK.
+!>    LZWORK is calculated as follows:
+!>    Let MLWQR  = N (minimal workspace for CGEQRF[M,N])
+!>        MLWDMD = minimal workspace for CGEDMD (see the
+!>                 description of LWORK in CGEDMD)
+!>        MLWMQR = N (minimal workspace for
+!>                   ZUNMQR['L','N',M,N,N])
+!>        MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
+!>        MINMN  = MIN(M,N)
+!>    Then
+!>    LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
+!>    is further updated as follows:
+!>       if   JOBZ == 'V' or JOBZ == 'F' THEN
+!>            LZWORK = MAX( LZWORK, MINMN+MLWMQR )
+!>       if   JOBQ == 'Q' THEN
+!>            LZWORK = MAX( ZLWORK, MINMN+MLWGQR)
+!>    \endverbatim
+!.....
+!>    \param[out] WORK
+!>    \verbatim
+!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!>    On exit,
+!>    WORK(1:N-1) contains the singular values of
+!>    the input submatrix F(1:M,1:N-1).
+!>    If the call to CGEDMDQ is only workspace query, then
+!>    WORK(1) contains the minimal workspace length and
+!>    WORK(2) is the optimal workspace length. hence, the
+!>    length of work is at least 2.
+!>    See the description of LWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LWORK
+!>    \verbatim
+!>    LWORK (input) INTEGER
+!>    The minimal length of the  workspace vector WORK.
+!>    LWORK is the same as in CGEDMD, because in CGEDMDQ
+!>    only CGEDMD requires real workspace for snapshots
+!>    of dimensions MIN(M,N)-by-(N-1).
+!>    If on entry LWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] IWORK
+!>    \verbatim
+!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!>    Workspace that is required only if WHTSVD equals
+!>    2 , 3 or 4. (See the description of WHTSVD).
+!>    If on entry LWORK =-1 or LIWORK=-1, then the
+!>    minimal length of IWORK is computed and returned in
+!>    IWORK(1). See the description of LIWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LIWORK
+!>    \verbatim
+!>    LIWORK (input) INTEGER
+!>    The minimal length of the workspace vector IWORK.
+!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!>    Let M1=MIN(M,N), N1=N-1. Then
+!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!>    If on entry LIWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] INFO
+!>    \verbatim
+!>    INFO (output) INTEGER
+!>    -i < 0 :: On entry, the i-th argument had an
+!>              illegal value
+!>       = 0 :: Successful return.
+!>       = 1 :: Void input. Quick exit (M=0 or N=0).
+!>       = 2 :: The SVD computation of X did not converge.
+!>              Suggestion: Check the input data and/or
+!>              repeat with different WHTSVD.
+!>       = 3 :: The computation of the eigenvalues did not
+!>              converge.
+!>       = 4 :: If data scaling was requested on input and
+!>              the procedure found inconsistency in the data
+!>              such that for some column index i,
+!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!>              to zero if JOBS=='C'. The computation proceeds
+!>              with original or modified data and warning
+!>              flag is set with INFO=4.
+!>    \endverbatim
+!
+!  Authors:
+!  ========
+!
+!> \author Zlatko Drmac
+!
+!> \ingroup gedmd
+!
+!.............................................................
+!.............................................................
+SUBROUTINE CGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  EIGS,         &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    &
+                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
+                    IWORK, LIWORK, INFO )
+!
+!  -- LAPACK driver routine                                           --
+!
+!  -- LAPACK is a software package provided by University of          --
+!  -- Tennessee, University of California Berkeley, University of     --
+!  -- Colorado Denver and NAG Ltd..                                   --
+!
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32
+!
+!     Scalar arguments
+!     ~~~~~~~~~~~~~~~~
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LZWORK,  LWORK, LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K
+      REAL(KIND=WP), INTENT(IN)    ::   TOL
+!
+!     Array arguments
+!     ~~~~~~~~~~~~~~~
+      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
+                                         Z(LDZ,*), B(LDB,*), &
+                                         V(LDV,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
+                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  &
+                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
+                           OLWORK, OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      CGEDMD, CGEQRF, CLACPY, CLASET, CUNGQR, &
+                    CUNMQR, XERBLA
+!
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT
+!..........................................................
+!
+!     Test the input arguments
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')
+      MINMN  = MIN(M,N)
+      INFO = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    &
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+           INFO = -4
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                     THEN
+          INFO = -6
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    &
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -21
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -24
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -26
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -28
+      END IF
+!
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+
+         MLRWRK = 2
+         MLWORK = 2
+         OLWORK = 2
+         IMINWR = 1
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for CGEQRF.
+         MLWORK = MAX(MLWORK,MINMN + MLWQR)
+
+         IF ( LQUERY ) THEN
+             CALL CGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
+                          INFO1 )
+             OLWQR  = INT(ZWORK(1))
+             OLWORK = MAX(OLWORK,MINMN + OLWQR)
+         END IF
+         CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,&
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     &
+                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    &
+                      S, LDS, ZWORK, LZWORK, WORK, -1, IWORK,&
+                      LIWORK, INFO1 )
+         MLWDMD = INT(ZWORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
+         IMINWR = MAX(IMINWR, IWORK(1))
+         IF ( LQUERY ) THEN
+             OLWDMD = INT(ZWORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N)
+            MLWORK = MAX(MLWORK, MINMN+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL CUNMQR( 'L','N', M, N, MINMN, F, LDF,  &
+                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
+               OLWMQR = INT(ZWORK(1))
+               OLWORK = MAX(OLWORK, MINMN+OLWMQR)
+            END IF
+         END IF
+         IF ( WANTQ ) THEN
+            MLWGQR = MAX(1,N)
+            MLWORK = MAX(MLWORK, MINMN+MLWGQR)
+            IF ( LQUERY ) THEN
+                CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
+                             ZWORK, -1, INFO1 )
+                OLWGQR = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK, MINMN+OLWGQR)
+            END IF
+         END IF
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
+         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
+      END IF
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'CGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          WORK(1)  = MLRWRK
+          WORK(2)  = MLRWRK
+          RETURN
+      END IF
+!.....
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place
+!     one can use an out of core QRF.
+!
+      CALL CGEQRF( M, N, F, LDF, ZWORK,                &
+                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+!
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL CLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
+      CALL CLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL CLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL CLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
+                       Y(3,1), LDY )
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)
+      CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
+                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
+                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
+                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN,    &
+                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See CGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF
+!
+!     The Ritz vectors (Koopman modes) can be explicitly
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.
+        IF ( M > MINMN ) CALL CLASET( 'A', M-MINMN, K, ZZERO, &
+                                     ZZERO, Z(MINMN+1,1), LDZ )
+        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
+             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      ELSE IF ( WNTVCF ) THEN
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and
+        !   the SVD/POD_basis returned by CGEDMD in X) and the
+        !   second factor (the eigenvectors of the Rayleigh
+        !   quotient) is in the array V, as returned by CGEDMD.
+        CALL CLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL CLASET( 'A', M-N, K, ZZERO, ZZERO, &
+                                 Z(N+1,1), LDZ )
+        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
+                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      END IF
+!
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to CGEDMDQ is to be
+
+!     followed by a streaming DMD that is implemented in a
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y
+         CALL CLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
+         CALL CLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
+      END IF
+!
+!     The orthonormal/unitary factor Q in the initial QR
+!     factorization is optionally returned in the array F.
+!     Same as with the triangular factor above, this is
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN                   ! Q overwrites F
+         CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
+                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      END IF
+!
+      RETURN
+!
+      END SUBROUTINE CGEDMDQ
diff --git a/SRC/dgedmd.f90 b/SRC/dgedmd.f90
index 15df48fe91..9c4afd182d 100644
--- a/SRC/dgedmd.f90
+++ b/SRC/dgedmd.f90
@@ -1,1206 +1,1206 @@
-!> \brief \b DGEDMD computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
-!
-!  =========== DOCUMENTATION ===========
-!
-!  Definition:
-!  ===========
-!
-!     SUBROUTINE DGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
-!                        M, N, X, LDX, Y, LDY, NRNK, TOL,  &
-!                        K, REIG,  IMEIG,   Z, LDZ,  RES,  &
-!                        B, LDB, W,  LDW,   S, LDS,        &
-!                        WORK, LWORK, IWORK, LIWORK, INFO )
-!
-!.....
-!     USE                   iso_fortran_env
-!     IMPLICIT NONE
-!     INTEGER, PARAMETER :: WP = real64
-!.....
-!     Scalar arguments
-!     CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
-!     INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
-!                                NRNK, LDZ, LDB, LDW,  LDS, &
-!                                LWORK,  LIWORK
-!     INTEGER,   INTENT(OUT)  :: K, INFO
-!     REAL(KIND=WP), INTENT(IN)  :: TOL
-!     Array arguments
-!     REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
-!     REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
-!                                     W(LDW,*), S(LDS,*)
-!     REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
-!                                     RES(*)
-!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
-!     INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!............................................................
-!>    \par Purpose:
-!     =============
-!>    \verbatim
-!>    DGEDMD computes the Dynamic Mode Decomposition (DMD) for
-!>    a pair of data snapshot matrices. For the input matrices
-!>    X and Y such that Y = A*X with an unaccessible matrix
-!>    A, DGEDMD computes a certain number of Ritz pairs of A using
-!>    the standard Rayleigh-Ritz extraction from a subspace of
-!>    range(X) that is determined using the leading left singular
-!>    vectors of X. Optionally, DGEDMD returns the residuals
-!>    of the computed Ritz pairs, the information needed for
-!>    a refinement of the Ritz vectors, or the eigenvectors of
-!>    the Exact DMD.
-!>    For further details see the references listed
-!>    below. For more details of the implementation see [3].
-!>    \endverbatim
-!............................................................
-!>    \par References:
-!     ================
-!>    \verbatim
-!>    [1] P. Schmid: Dynamic mode decomposition of numerical
-!>        and experimental data,
-!>        Journal of Fluid Mechanics 656, 5-28, 2010.
-!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
-!>        decompositions: analysis and enhancements,
-!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
-!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
-!>        Mode Decomposition I. Technical report. AIMDyn Inc.
-!>        and LAPACK Working Note 298.
-!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
-!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
-!>        Theory and Applications, Journal of Computational
-!>        Dynamics 1(2), 391 -421, 2014.
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Developed and coded by Zlatko Drmac, Faculty of Science,
-!>    University of Zagreb;  drmac@math.hr
-!>    In cooperation with
-!>    AIMdyn Inc., Santa Barbara, CA.
-!>    and supported by
-!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
-!>    for Nonstationary Processes from Near-Term, Limited
-!>    Observational Data" Contract No: W31P4Q-21-C-0007
-!>    - DARPA PAI project "Physics-Informed Machine Learning
-!>    Methodologies" Contract No: HR0011-18-9-0033
-!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
-!>    Framework for Space-Time Analysis of Process Dynamics"
-!>    Contract No: HR0011-16-C-0116
-!>    Any opinions, findings and conclusions or recommendations
-!>    expressed in this material are those of the author and
-!>    do not necessarily reflect the views of the DARPA SBIR
-!>    Program Office
-!>    \endverbatim
-!......................................................................
-!>    \par Distribution Statement A:
-!     ==============================
-!>    \verbatim
-!>    Approved for Public Release, Distribution Unlimited.
-!>    Cleared by DARPA on September 29, 2022
-!>    \endverbatim
-!......................................................................
-!     Arguments
-!     =========
-!
-!>    \param[in] JOBS
-!>    \verbatim
-!>    JOBS (input) is CHARACTER*1
-!>    Determines whether the initial data snapshots are scaled
-!>    by a diagonal matrix.
-!>    'S' :: The data snapshots matrices X and Y are multiplied
-!>           with a diagonal matrix D so that X*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'C' :: The snapshots are scaled as with the 'S' option.
-!>           If it is found that an i-th column of X is zero
-!>           vector and the corresponding i-th column of Y is
-!>           non-zero, then the i-th column of Y is set to
-!>           zero and a warning flag is raised.
-!>    'Y' :: The data snapshots matrices X and Y are multiplied
-!>           by a diagonal matrix D so that Y*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'N' :: No data scaling.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBZ
-!>    \verbatim
-!>    JOBZ (input) CHARACTER*1
-!>    Determines whether the eigenvectors (Koopman modes) will
-!>    be computed.
-!>    'V' :: The eigenvectors (Koopman modes) will be computed
-!>           and returned in the matrix Z.
-!>           See the description of Z.
-!>    'F' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product X(:,1:K)*W, where X
-!>           contains a POD basis (leading left singular vectors
-!>           of the data matrix X) and W contains the eigenvectors
-!>           of the corresponding Rayleigh quotient.
-!>           See the descriptions of K, X, W, Z.
-!>    'N' :: The eigenvectors are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBR
-!>    \verbatim
-!>    JOBR (input) CHARACTER*1
-!>    Determines whether to compute the residuals.
-!>    'R' :: The residuals for the computed eigenpairs will be
-!>           computed and stored in the array RES.
-!>           See the description of RES.
-!>           For this option to be legal, JOBZ must be 'V'.
-!>    'N' :: The residuals are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBF
-!>    \verbatim
-!>    JOBF (input) CHARACTER*1
-!>    Specifies whether to store information needed for post-
-!>    processing (e.g. computing refined Ritz vectors)
-!>    'R' :: The matrix needed for the refinement of the Ritz
-!>           vectors is computed and stored in the array B.
-!>           See the description of B.
-!>    'E' :: The unscaled eigenvectors of the Exact DMD are
-!>           computed and returned in the array B. See the
-!>           description of B.
-!>    'N' :: No eigenvector refinement data is computed.
-!>    \endverbatim
-!.....
-!>    \param[in] WHTSVD
-!>    \verbatim
-!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
-!>    Allows for a selection of the SVD algorithm from the
-!>    LAPACK library.
-!>    1 :: DGESVD (the QR SVD algorithm)
-!>    2 :: DGESDD (the Divide and Conquer algorithm; if enough
-!>         workspace available, this is the fastest option)
-!>    3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
-!>         are the most accurate options)
-!>    4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
-!>         are the most accurate options)
-!>    For the four methods above, a significant difference in
-!>    the accuracy of small singular values is possible if
-!>    the snapshots vary in norm so that X is severely
-!>    ill-conditioned. If small (smaller than EPS*||X||)
-!>    singular values are of interest and JOBS=='N',  then
-!>    the options (3, 4) give the most accurate results, where
-!>    the option 4 is slightly better and with stronger
-!>    theoretical background.
-!>    If JOBS=='S', i.e. the columns of X will be normalized,
-!>    then all methods give nearly equally accurate results.
-!>    \endverbatim
-!.....
-!>    \param[in] M
-!>    \verbatim
-!>    M (input) INTEGER, M>= 0
-!>    The state space dimension (the row dimension of X, Y).
-!>    \endverbatim
-!.....
-!>    \param[in] N
-!>    \verbatim
-!>    N (input) INTEGER, 0 <= N <= M
-!>    The number of data snapshot pairs
-!>    (the number of columns of X and Y).
-!>    \endverbatim
-!.....
-!>    \param[in,out] X
-!>    \verbatim
-!>    X (input/output) REAL(KIND=WP) M-by-N array
-!>    > On entry, X contains the data snapshot matrix X. It is
-!>    assumed that the column norms of X are in the range of
-!>    the normalized floating point numbers.
-!>    < On exit, the leading K columns of X contain a POD basis,
-!>    i.e. the leading K left singular vectors of the input
-!>    data matrix X, U(:,1:K). All N columns of X contain all
-!>    left singular vectors of the input matrix X.
-!>    See the descriptions of K, Z and W.
-!>    \endverbatim
-!.....
-!>    \param[in] LDX
-!>    \verbatim
-!>    LDX (input) INTEGER, LDX >= M
-!>    The leading dimension of the array X.
-!>    \endverbatim
-!.....
-!>    \param[in,out] Y
-!>    \verbatim
-!>    Y (input/workspace/output) REAL(KIND=WP) M-by-N array
-!>    > On entry, Y contains the data snapshot matrix Y
-!>    < On exit,
-!>    If JOBR == 'R', the leading K columns of Y  contain
-!>    the residual vectors for the computed Ritz pairs.
-!>    See the description of RES.
-!>    If JOBR == 'N', Y contains the original input data,
-!>                    scaled according to the value of JOBS.
-!>    \endverbatim
-!.....
-!>    \param[in] LDY
-!>    \verbatim
-!>    LDY (input) INTEGER , LDY >= M
-!>    The leading dimension of the array Y.
-!>    \endverbatim
-!.....
-!>    \param[in] NRNK
-!>    \verbatim
-!>    NRNK (input) INTEGER
-!>    Determines the mode how to compute the numerical rank,
-!>    i.e. how to truncate small singular values of the input
-!>    matrix X. On input, if
-!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(1).
-!>                 This option is recommended.
-!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(i-1)
-!>                 This option is included for R&D purposes.
-!>                 It requires highly accurate SVD, which
-!>                 may not be feasible.
-!>
-!>    The numerical rank can be enforced by using positive
-!>    value of NRNK as follows:
-!>    0 < NRNK <= N :: at most NRNK largest singular values
-!>    will be used. If the number of the computed nonzero
-!>    singular values is less than NRNK, then only those
-!>    nonzero values will be used and the actually used
-!>    dimension is less than NRNK. The actual number of
-!>    the nonzero singular values is returned in the variable
-!>    K. See the descriptions of TOL and  K.
-!>    \endverbatim
-!.....
-!>    \param[in] TOL
-!>    \verbatim
-!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
-!>    The tolerance for truncating small singular values.
-!>    See the description of NRNK.
-!>    \endverbatim
-!.....
-!>    \param[out] K
-!>    \verbatim
-!>    K (output) INTEGER,  0 <= K <= N
-!>    The dimension of the POD basis for the data snapshot
-!>    matrix X and the number of the computed Ritz pairs.
-!>    The value of K is determined according to the rule set
-!>    by the parameters NRNK and TOL.
-!>    See the descriptions of NRNK and TOL.
-!>    \endverbatim
-!.....
-!>    \param[out] REIG
-!>    \verbatim
-!>    REIG (output) REAL(KIND=WP) N-by-1 array
-!>    The leading K (K<=N) entries of REIG contain
-!>    the real parts of the computed eigenvalues
-!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
-!>    See the descriptions of K, IMEIG, and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] IMEIG
-!>    \verbatim
-!>    IMEIG (output) REAL(KIND=WP) N-by-1 array
-!>    The leading K (K<=N) entries of IMEIG contain
-!>    the imaginary parts of the computed eigenvalues
-!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
-!>    The eigenvalues are determined as follows:
-!>    If IMEIG(i) == 0, then the corresponding eigenvalue is
-!>    real, LAMBDA(i) = REIG(i).
-!>    If IMEIG(i)>0, then the corresponding complex
-!>    conjugate pair of eigenvalues reads
-!>    LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
-!>    LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
-!>    That is, complex conjugate pairs have consecutive
-!>    indices (i,i+1), with the positive imaginary part
-!>    listed first.
-!>    See the descriptions of K, REIG, and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] Z
-!>    \verbatim
-!>    Z (workspace/output) REAL(KIND=WP)  M-by-N array
-!>    If JOBZ =='V' then
-!>       Z contains real Ritz vectors as follows:
-!>       If IMEIG(i)=0, then Z(:,i) is an eigenvector of
-!>       the i-th Ritz value; ||Z(:,i)||_2=1.
-!>       If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
-!>       [Z(:,i) Z(:,i+1)] span an invariant subspace and
-!>       the Ritz values extracted from this subspace are
-!>       REIG(i) + sqrt(-1)*IMEIG(i) and
-!>       REIG(i) - sqrt(-1)*IMEIG(i).
-!>       The corresponding eigenvectors are
-!>       Z(:,i) + sqrt(-1)*Z(:,i+1) and
-!>       Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
-!>       || Z(:,i:i+1)||_F = 1.
-!>    If JOBZ == 'F', then the above descriptions hold for
-!>    the columns of X(:,1:K)*W(1:K,1:K), where the columns
-!>    of W(1:k,1:K) are the computed eigenvectors of the
-!>    K-by-K Rayleigh quotient. The columns of W(1:K,1:K)
-!>    are similarly structured: If IMEIG(i) == 0 then
-!>    X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0
-!>    then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and
-!>         X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1)
-!>    are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
-!>    See the descriptions of REIG, IMEIG, X and W.
-!>    \endverbatim
-!.....
-!>    \param[in] LDZ
-!>    \verbatim
-!>    LDZ (input) INTEGER , LDZ >= M
-!>    The leading dimension of the array Z.
-!>    \endverbatim
-!.....
-!>    \param[out] RES
-!>    \verbatim
-!>    RES (output) REAL(KIND=WP) N-by-1 array
-!>    RES(1:K) contains the residuals for the K computed
-!>    Ritz pairs.
-!>    If LAMBDA(i) is real, then
-!>       RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
-!>    If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
-!>    then
-!>    RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
-!>    where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
-!>              [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
-!>    It holds that
-!>    RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
-!>    RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
-!>    where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
-!>          ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
-!>    See the description of REIG, IMEIG and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] B
-!>    \verbatim
-!>    B (output) REAL(KIND=WP)  M-by-N array.
-!>    IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
-!>    be used for computing the refined vectors; see further
-!>    details in the provided references.
-!>    If JOBF == 'E', B(1:M,1;K) contains
-!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
-!>    Exact DMD, up to scaling by the inverse eigenvalues.
-!>    If JOBF =='N', then B is not referenced.
-!>    See the descriptions of X, W, K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDB
-!>    \verbatim
-!>    LDB (input) INTEGER, LDB >= M
-!>    The leading dimension of the array B.
-!>    \endverbatim
-!.....
-!>    \param[out] W
-!>    \verbatim
-!>    W (workspace/output) REAL(KIND=WP) N-by-N array
-!>    On exit, W(1:K,1:K) contains the K computed
-!>    eigenvectors of the matrix Rayleigh quotient (real and
-!>    imaginary parts for each complex conjugate pair of the
-!>    eigenvalues). The Ritz vectors (returned in Z) are the
-!>    product of X (containing a POD basis for the input
-!>    matrix X) and W. See the descriptions of K, S, X and Z.
-!>    W is also used as a workspace to temporarily store the
-!>    right singular vectors of X.
-!>    \endverbatim
-!.....
-!>    \param[in] LDW
-!>    \verbatim
-!>    LDW (input) INTEGER, LDW >= N
-!>    The leading dimension of the array W.
-!>    \endverbatim
-!.....
-!>    \param[out] S
-!>    \verbatim
-!>    S (workspace/output) REAL(KIND=WP) N-by-N array
-!>    The array S(1:K,1:K) is used for the matrix Rayleigh
-!>    quotient. This content is overwritten during
-!>    the eigenvalue decomposition by DGEEV.
-!>    See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDS
-!>    \verbatim
-!>    LDS (input) INTEGER, LDS >= N
-!>    The leading dimension of the array S.
-!>    \endverbatim
-!.....
-!>    \param[out] WORK
-!>    \verbatim
-!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
-!>    On exit, WORK(1:N) contains the singular values of
-!>    X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
-!>    If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain
-!>    scaling factor WORK(N+2)/WORK(N+1) used to scale X
-!>    and Y to avoid overflow in the SVD of X.
-!>    This may be of interest if the scaling option is off
-!>    and as many as possible smallest eigenvalues are
-!>    desired to the highest feasible accuracy.
-!>    If the call to DGEDMD is only workspace query, then
-!>    WORK(1) contains the minimal workspace length and
-!>    WORK(2) is the optimal workspace length. Hence, the
-!>    leng of work is at least 2.
-!>    See the description of LWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LWORK
-!>    \verbatim
-!>    LWORK (input) INTEGER
-!>    The minimal length of the workspace vector WORK.
-!>    LWORK is calculated as follows:
-!>    If WHTSVD == 1 ::
-!>       If JOBZ == 'V', then
-!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)).
-!>       If JOBZ == 'N'  then
-!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)).
-!>       Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal
-!>       workspace length of DGESVD.
-!>    If WHTSVD == 2 ::
-!>       If JOBZ == 'V', then
-!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N))
-!>       If JOBZ == 'N', then
-!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N))
-!>       Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the
-!>       minimal workspace length of DGESDD.
-!>    If WHTSVD == 3 ::
-!>       If JOBZ == 'V', then
-!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
-!>       If JOBZ == 'N', then
-!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
-!>       Here LWORK_SVD = N+M+MAX(3*N+1,
-!>                       MAX(1,3*N+M,5*N),MAX(1,N))
-!>       is the minimal workspace length of DGESVDQ.
-!>    If WHTSVD == 4 ::
-!>       If JOBZ == 'V', then
-!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
-!>       If JOBZ == 'N', then
-!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
-!>       Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the
-!>       minimal workspace length of DGEJSV.
-!>    The above expressions are not simplified in order to
-!>    make the usage of WORK more transparent, and for
-!>    easier checking. In any case, LWORK >= 2.
-!>    If on entry LWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] IWORK
-!>    \verbatim
-!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
-!>    Workspace that is required only if WHTSVD equals
-!>    2 , 3 or 4. (See the description of WHTSVD).
-!>    If on entry LWORK =-1 or LIWORK=-1, then the
-!>    minimal length of IWORK is computed and returned in
-!>    IWORK(1). See the description of LIWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LIWORK
-!>    \verbatim
-!>    LIWORK (input) INTEGER
-!>    The minimal length of the workspace vector IWORK.
-!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
-!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
-!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
-!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
-!>    If on entry LIWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] INFO
-!>    \verbatim
-!>    INFO (output) INTEGER
-!>    -i < 0 :: On entry, the i-th argument had an
-!>              illegal value
-!>       = 0 :: Successful return.
-!>       = 1 :: Void input. Quick exit (M=0 or N=0).
-!>       = 2 :: The SVD computation of X did not converge.
-!>              Suggestion: Check the input data and/or
-!>              repeat with different WHTSVD.
-!>       = 3 :: The computation of the eigenvalues did not
-!>              converge.
-!>       = 4 :: If data scaling was requested on input and
-!>              the procedure found inconsistency in the data
-!>              such that for some column index i,
-!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
-!>              to zero if JOBS=='C'. The computation proceeds
-!>              with original or modified data and warning
-!>              flag is set with INFO=4.
-!>    \endverbatim
-!
-!  Authors:
-!  ========
-!
-!> \author Zlatko Drmac
-!
-!> \ingroup gedmd
-!
-!.............................................................
-!.............................................................
-      SUBROUTINE DGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
-                         M, N, X, LDX, Y, LDY, NRNK, TOL,  &
-                         K, REIG,  IMEIG,   Z, LDZ,  RES,  &
-                         B, LDB, W,  LDW,   S, LDS,        &
-                         WORK, LWORK, IWORK, LIWORK, INFO )
-!
-!  -- LAPACK driver routine                                           --
-!
-!  -- LAPACK is a software package provided by University of          --
-!  -- Tennessee, University of California Berkeley, University of     --
-!  -- Colorado Denver and NAG Ltd..                                   --
-!
-!.....
-      USE                   iso_fortran_env
-      IMPLICIT NONE
-      INTEGER, PARAMETER :: WP = real64
-!
-!     Scalar arguments
-!     ~~~~~~~~~~~~~~~~
-      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
-      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
-                                 NRNK, LDZ, LDB, LDW,  LDS, &
-                                 LWORK,  LIWORK
-      INTEGER,   INTENT(OUT)  :: K, INFO
-      REAL(KIND=WP), INTENT(IN)  :: TOL
-!
-!     Array arguments
-!     ~~~~~~~~~~~~~~~
-      REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
-      REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
-                                      W(LDW,*), S(LDS,*)
-      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
-                                      RES(*)
-      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
-      INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!     Parameters
-!     ~~~~~~~~~~
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-!
-!     Local scalars
-!     ~~~~~~~~~~~~~
-      REAL(KIND=WP) :: OFL,    ROOTSC, SCALE,  SMALL,  &
-                       SSUM,   XSCL1,  XSCL2
-      INTEGER       :: i,   j, IMINWR,  INFO1, INFO2,  &
-                       LWRKEV, LWRSDD, LWRSVD,         &
-                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
-                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
-                       OLWORK
-      LOGICAL       :: BADXY,  LQUERY, SCCOLX, SCCOLY, &
-                       WNTEX,  WNTREF, WNTRES, WNTVEC
-      CHARACTER     :: JOBZL,  T_OR_N
-      CHARACTER     :: JSVOPT
-!
-!     Local arrays
-!     ~~~~~~~~~~~~
-      REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2)
-!
-!     External functions (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~
-      REAL(KIND=WP) DLANGE, DLAMCH, DNRM2
-      EXTERNAL      DLANGE, DLAMCH, DNRM2, IDAMAX
-      INTEGER       IDAMAX
-      LOGICAL       DISNAN, LSAME
-      EXTERNAL      DISNAN, LSAME
-!
-!     External subroutines (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~~~~
-      EXTERNAL      DAXPY,  DGEMM,  DSCAL
-      EXTERNAL      DGEEV,  DGEJSV, DGESDD, DGESVD, DGESVDQ, &
-                    DLACPY, DLASCL, DLASSQ, XERBLA
-!
-!     Intrinsic functions
-!     ~~~~~~~~~~~~~~~~~~~
-      INTRINSIC     DBLE, INT, MAX, SQRT
-!............................................................
-!
-!    Test the input arguments
-!
-      WNTRES = LSAME(JOBR,'R')
-      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
-      SCCOLY = LSAME(JOBS,'Y')
-      WNTVEC = LSAME(JOBZ,'V')
-      WNTREF = LSAME(JOBF,'R')
-      WNTEX  = LSAME(JOBF,'E')
-      INFO   = 0
-      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
-!
-      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
-                                  LSAME(JOBS,'N')) )   THEN
-          INFO = -1
-      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
-                              .OR. LSAME(JOBZ,'F')) )  THEN
-          INFO = -2
-      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
-                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
-          INFO = -3
-      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
-                LSAME(JOBF,'N') ) )                    THEN
-          INFO = -4
-      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
-                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
-          INFO = -5
-      ELSE IF ( M < 0 )   THEN
-          INFO = -6
-      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
-          INFO = -7
-      ELSE IF ( LDX < M ) THEN
-          INFO = -9
-      ELSE IF ( LDY < M ) THEN
-          INFO = -11
-      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
-                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
-          INFO = -12
-      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
-          INFO = -13
-      ELSE IF ( LDZ < M ) THEN
-          INFO = -18
-      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
-          INFO = -21
-      ELSE IF ( LDW < N ) THEN
-          INFO = -23
-      ELSE IF ( LDS < N ) THEN
-          INFO = -25
-      END IF
-!
-      IF ( INFO == 0 ) THEN
-          ! Compute the minimal and the optimal workspace
-          ! requirements. Simulate running the code and
-          ! determine minimal and optimal sizes of the
-          ! workspace at any moment of the run.
-         IF ( N == 0 ) THEN
-             ! Quick return. All output except K is void.
-             ! INFO=1 signals the void input.
-             ! In case of a workspace query, the default
-             ! minimal workspace lengths are returned.
-            IF ( LQUERY ) THEN
-                IWORK(1) = 1
-                WORK(1)  = 2
-                WORK(2)  = 2
-            ELSE
-               K = 0
-            END IF
-            INFO = 1
-            RETURN
-         END IF
-         MLWORK = MAX(2,N)
-         OLWORK = MAX(2,N)
-         IMINWR = 1
-         SELECT CASE ( WHTSVD )
-         CASE (1)
-             ! The following is specified as the minimal
-             ! length of WORK in the definition of DGESVD:
-             ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
-             MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
-             MLWORK = MAX(MLWORK,N + MWRSVD)
-             IF ( LQUERY ) THEN
-                CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, &
-                           B, LDB, W, LDW, RDUMMY, -1, INFO1 )
-                LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) )
-                OLWORK = MAX(OLWORK,N + LWRSVD)
-             END IF
-         CASE (2)
-             ! The following is specified as the minimal
-             ! length of WORK in the definition of DGESDD:
-             ! MWRSDD = 3*MIN(M,N)*MIN(M,N) +
-             ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
-             ! IMINWR = 8*MIN(M,N)
-             MWRSDD = 3*MIN(M,N)*MIN(M,N) +                &
-              MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
-             MLWORK = MAX(MLWORK,N + MWRSDD)
-             IMINWR = 8*MIN(M,N)
-             IF ( LQUERY ) THEN
-                CALL DGESDD( 'O', M, N, X, LDX, WORK, B,     &
-                     LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 )
-                LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) )
-                OLWORK = MAX(OLWORK,N + LWRSDD)
-             END IF
-         CASE (3)
-             !LWQP3 = 3*N+1
-             !LWORQ = MAX(N, 1)
-             !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
-             !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2)
-             !MLWORK = N +  MWRSVQ
-             !IMINWR = M+N-1
-             CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
-                             X, LDX, WORK, Z, LDZ, W, LDW,   &
-                             NUMRNK, IWORK, LIWORK, RDUMMY,  &
-                             -1, RDUMMY2, -1, INFO1 )
-             IMINWR = IWORK(1)
-             MWRSVQ = INT(RDUMMY(2))
-             MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1)))
-             IF ( LQUERY ) THEN
-                LWRSVQ = MAX( MWRSVQ, INT(RDUMMY(1)) )
-                OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1)))
-             END IF
-         CASE (4)
-             JSVOPT = 'J'
-             !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V'
-             MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 )
-             MLWORK = MAX(MLWORK,N+MWRSVJ)
-             IMINWR = MAX( 3, M+3*N )
-             IF ( LQUERY ) THEN
-                OLWORK =  MAX(OLWORK,N+MWRSVJ)
-             END IF
-         END SELECT
-         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
-             JOBZL = 'V'
-         ELSE
-             JOBZL = 'N'
-         END IF
-         ! Workspace calculation to the DGEEV call
-         IF ( LSAME(JOBZL,'V') ) THEN
-             MWRKEV = MAX( 1, 4*N )
-         ELSE
-             MWRKEV = MAX( 1, 3*N )
-         END IF
-         MLWORK = MAX(MLWORK,N+MWRKEV)
-         IF ( LQUERY ) THEN
-                CALL DGEEV( 'N', JOBZL, N, S, LDS, REIG, &
-                    IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 )
-                LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) )
-                OLWORK = MAX( OLWORK, N+LWRKEV )
-         END IF
-!
-         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29
-         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27
-      END IF
-!
-      IF( INFO /= 0 ) THEN
-         CALL XERBLA( 'DGEDMD', -INFO )
-         RETURN
-      ELSE IF ( LQUERY ) THEN
-!     Return minimal and optimal workspace sizes
-          IWORK(1) = IMINWR
-          WORK(1)  = MLWORK
-          WORK(2)  = OLWORK
-          RETURN
-      END IF
-!............................................................
-!
-      OFL   = DLAMCH('O')
-      SMALL = DLAMCH('S')
-      BADXY = .FALSE.
-!
-!     <1> Optional scaling of the snapshots (columns of X, Y)
-!     ==========================================================
-      IF ( SCCOLX ) THEN
-          ! The columns of X will be normalized.
-          ! To prevent overflows, the column norms of X are
-          ! carefully computed using DLASSQ.
-          K = 0
-          DO i = 1, N
-            !WORK(i) = DNRM2( M, X(1,i), 1 )
-            SSUM  = ONE
-            SCALE = ZERO
-            CALL DLASSQ( M, X(1,i), 1, SCALE, SSUM )
-            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
-                K    =  0
-                INFO = -8
-                CALL XERBLA('DGEDMD',-INFO)
-            END IF
-            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
-               ROOTSC = SQRT(SSUM)
-               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
-!                 Norm of X(:,i) overflows. First, X(:,i)
-!                 is scaled by
-!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
-!                 Next, the norm of X(:,i) is stored without
-!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
-!                 the minus sign indicating the 1/M factor.
-!                 Scaling is performed without overflow, and
-!                 underflow may occur in the smallest entries
-!                 of X(:,i). The relative backward and forward
-!                 errors are small in the ell_2 norm.
-                  CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
-                               M, 1, X(1,i), M, INFO2 )
-                  WORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
-               ELSE
-!                 X(:,i) will be scaled to unit 2-norm
-                  WORK(i) =   SCALE * ROOTSC
-                  CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
-                               X(1,i), M, INFO2 )              ! LAPACK CALL
-!                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC
-               END IF
-            ELSE
-               WORK(i) = ZERO
-               K = K + 1
-            END IF
-          END DO
-          IF ( K == N ) THEN
-          ! All columns of X are zero. Return error code -8.
-          ! (the 8th input variable had an illegal value)
-          K = 0
-          INFO = -8
-          CALL XERBLA('DGEDMD',-INFO)
-          RETURN
-          END IF
-          DO i = 1, N
-!           Now, apply the same scaling to the columns of Y.
-            IF ( WORK(i) >  ZERO ) THEN
-                CALL DSCAL( M, ONE/WORK(i), Y(1,i), 1 )  ! BLAS CALL
-!               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC
-            ELSE IF ( WORK(i) < ZERO ) THEN
-                CALL DLASCL( 'G', 0, 0, -WORK(i),          &
-                     ONE/DBLE(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL
-            ELSE IF ( Y(IDAMAX(M, Y(1,i),1),i )  &
-                                            /= ZERO ) THEN
-!               X(:,i) is zero vector. For consistency,
-!               Y(:,i) should also be zero. If Y(:,i) is not
-!               zero, then the data might be inconsistent or
-!               corrupted. If JOBS == 'C', Y(:,i) is set to
-!               zero and a warning flag is raised.
-!               The computation continues but the
-!               situation will be reported in the output.
-                BADXY = .TRUE.
-                IF ( LSAME(JOBS,'C')) &
-                CALL DSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
-            END IF
-          END DO
-      END IF
-  !
-      IF ( SCCOLY ) THEN
-          ! The columns of Y will be normalized.
-          ! To prevent overflows, the column norms of Y are
-          ! carefully computed using DLASSQ.
-          DO i = 1, N
-            !WORK(i) = DNRM2( M, Y(1,i), 1 )
-            SSUM  = ONE
-            SCALE = ZERO
-            CALL DLASSQ( M, Y(1,i), 1, SCALE, SSUM )
-            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
-                K    =  0
-                INFO = -10
-                CALL XERBLA('DGEDMD',-INFO)
-            END IF
-            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
-               ROOTSC = SQRT(SSUM)
-               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
-!                 Norm of Y(:,i) overflows. First, Y(:,i)
-!                 is scaled by
-!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
-!                 Next, the norm of Y(:,i) is stored without
-!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
-!                 the minus sign indicating the 1/M factor.
-!                 Scaling is performed without overflow, and
-!                 underflow may occur in the smallest entries
-!                 of Y(:,i). The relative backward and forward
-!                 errors are small in the ell_2 norm.
-                  CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
-                               M, 1, Y(1,i), M, INFO2 )
-                  WORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
-               ELSE
-!                 X(:,i) will be scaled to unit 2-norm
-                  WORK(i) =   SCALE * ROOTSC
-                  CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
-                               Y(1,i), M, INFO2 )              ! LAPACK CALL
-!                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC
-               END IF
-            ELSE
-               WORK(i) = ZERO
-            END IF
-         END DO
-         DO i = 1, N
-!           Now, apply the same scaling to the columns of X.
-            IF ( WORK(i) >  ZERO ) THEN
-                CALL DSCAL( M, ONE/WORK(i), X(1,i), 1 )  ! BLAS CALL
-!               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC
-            ELSE IF ( WORK(i) < ZERO ) THEN
-                CALL DLASCL( 'G', 0, 0, -WORK(i),          &
-                     ONE/DBLE(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL
-            ELSE IF ( X(IDAMAX(M, X(1,i),1),i )  &
-                                           /= ZERO ) THEN
-!               Y(:,i) is zero vector.  If X(:,i) is not
-!               zero, then a warning flag is raised.
-!               The computation continues but the
-!               situation will be reported in the output.
-                BADXY = .TRUE.
-            END IF
-         END DO
-       END IF
-!
-!     <2> SVD of the data snapshot matrix X.
-!     =====================================
-!     The left singular vectors are stored in the array X.
-!     The right singular vectors are in the array W.
-!     The array W will later on contain the eigenvectors
-!     of a Rayleigh quotient.
-      NUMRNK = N
-      SELECT CASE ( WHTSVD )
-         CASE (1)
-             CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, B, &
-                  LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL
-             T_OR_N = 'T'
-         CASE (2)
-            CALL DGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, &
-                 LDW, WORK(N+1), LWORK-N, IWORK, INFO1 )   ! LAPACK CALL
-            T_OR_N = 'T'
-         CASE (3)
-              CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
-                   X, LDX, WORK, Z, LDZ, W, LDW, &
-                   NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),&
-                   LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1)     ! LAPACK CALL
-              CALL DLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
-         T_OR_N = 'T'
-         CASE (4)
-              CALL DGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
-                   N, X, LDX, WORK, Z, LDZ, W, LDW, &
-                   WORK(N+1), LWORK-N, IWORK, INFO1 )    ! LAPACK CALL
-              CALL DLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
-              T_OR_N = 'N'
-              XSCL1 = WORK(N+1)
-              XSCL2 = WORK(N+2)
-              IF ( XSCL1 /=  XSCL2 ) THEN
-                 ! This is an exceptional situation. If the
-                 ! data matrices are not scaled and the
-                 ! largest singular value of X overflows.
-                 ! In that case DGEJSV can return the SVD
-                 ! in scaled form. The scaling factor can be used
-                 ! to rescale the data (X and Y).
-                 CALL DLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
-              END IF
-      END SELECT
-!
-      IF ( INFO1 > 0 ) THEN
-         ! The SVD selected subroutine did not converge.
-         ! Return with an error code.
-         INFO = 2
-         RETURN
-      END IF
-!
-      IF ( WORK(1) == ZERO ) THEN
-          ! The largest computed singular value of (scaled)
-          ! X is zero. Return error code -8
-          ! (the 8th input variable had an illegal value).
-          K = 0
-          INFO = -8
-          CALL XERBLA('DGEDMD',-INFO)
-          RETURN
-      END IF
-!
-      !<3> Determine the numerical rank of the data
-      !    snapshots matrix X. This depends on the
-      !    parameters NRNK and TOL.
-
-      SELECT CASE ( NRNK )
-          CASE ( -1 )
-               K = 1
-               DO i = 2, NUMRNK
-                 IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. &
-                      ( WORK(i) <= SMALL ) ) EXIT
-                 K = K + 1
-               END DO
-          CASE ( -2 )
-               K = 1
-               DO i = 1, NUMRNK-1
-                 IF ( ( WORK(i+1) <= WORK(i)*TOL  ) .OR. &
-                      ( WORK(i) <= SMALL ) ) EXIT
-                 K = K + 1
-               END DO
-          CASE DEFAULT
-               K = 1
-               DO i = 2, NRNK
-                  IF ( WORK(i) <= SMALL ) EXIT
-                  K = K + 1
-               END DO
-          END SELECT
-      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
-      !   snapshot data in the input matrix X.
-
-      !<4> Compute the Rayleigh quotient S = U^T * A * U.
-      !    Depending on the requested outputs, the computation
-      !    is organized to compute additional auxiliary
-      !    matrices (for the residuals and refinements).
-      !
-      !    In all formulas below, we need V_k*Sigma_k^(-1)
-      !    where either V_k is in W(1:N,1:K), or V_k^T is in
-      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
-      IF ( LSAME(T_OR_N, 'N') ) THEN
-          DO i = 1, K
-           CALL DSCAL( N, ONE/WORK(i), W(1,i), 1 )    ! BLAS CALL
-           ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC
-          END DO
-      ELSE
-          ! This non-unit stride access is due to the fact
-          ! that DGESVD, DGESVDQ and DGESDD return the
-          ! transposed matrix of the right singular vectors.
-          !DO i = 1, K
-          ! CALL DSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL
-          ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC
-          !END DO
-          DO i = 1, K
-              WORK(N+i) = ONE/WORK(i)
-          END DO
-          DO j = 1, N
-             DO i = 1, K
-                 W(i,j) = (WORK(N+i))*W(i,j)
-             END DO
-          END DO
-      END IF
-!
-      IF ( WNTREF ) THEN
-         !
-         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
-         ! for computing the refined Ritz vectors
-         ! (optionally, outside DGEDMD).
-          CALL DGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, &
-                      LDW, ZERO, Z, LDZ )                        ! BLAS CALL
-          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
-          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
-          !
-          ! At this point Z contains
-          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
-          ! this is needed for computing the residuals.
-          ! This matrix is  returned in the array B and
-          ! it can be used to compute refined Ritz vectors.
-          CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
-          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
-
-          CALL DGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, &
-                      LDZ, ZERO, S, LDS )                        ! BLAS CALL
-          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
-          ! At this point S = U^T * A * U is the Rayleigh quotient.
-      ELSE
-        ! A * U(:,1:K) is not explicitly needed and the
-        ! computation is organized differently. The Rayleigh
-        ! quotient is computed more efficiently.
-        CALL DGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, &
-                   ZERO, Z, LDZ )                                   ! BLAS CALL
-        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
-        ! In the two DGEMM calls here, can use K for LDZ.
-        CALL DGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, &
-                    LDW, ZERO, S, LDS )                         ! BLAS CALL
-        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
-        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
-        ! At this point S = U^T * A * U is the Rayleigh quotient.
-        ! If the residuals are requested, save scaled V_k into Z.
-        ! Recall that V_k or V_k^T is stored in W.
-        IF ( WNTRES .OR. WNTEX ) THEN
-          IF ( LSAME(T_OR_N, 'N') ) THEN
-              CALL DLACPY( 'A', N, K, W, LDW, Z, LDZ )
-          ELSE
-              CALL DLACPY( 'A', K, N, W, LDW, Z, LDZ )
-          END IF
-        END IF
-      END IF
-!
-      !<5> Compute the Ritz values and (if requested) the
-      !   right eigenvectors of the Rayleigh quotient.
-      !
-      CALL DGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, &
-                  LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 )   ! LAPACK CALL
-      !
-      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
-      ! quotient. Even in the case of complex spectrum, all
-      ! computation is done in real arithmetic. REIG and
-      ! IMEIG are the real and the imaginary parts of the
-      ! eigenvalues, so that the spectrum is given as
-      ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs
-      ! are listed at consecutive positions. For such a
-      ! complex conjugate pair of the eigenvalues, the
-      ! corresponding eigenvectors are also a complex
-      ! conjugate pair with the real and imaginary parts
-      ! stored column-wise in W at the corresponding
-      ! consecutive column indices. See the description of Z.
-      ! Also, see the description of DGEEV.
-      IF ( INFO1 > 0 ) THEN
-         ! DGEEV failed to compute the eigenvalues and
-         ! eigenvectors of the Rayleigh quotient.
-         INFO = 3
-         RETURN
-      END IF
-!
-      ! <6> Compute the eigenvectors (if requested) and,
-      ! the residuals (if requested).
-      !
-      IF ( WNTVEC .OR. WNTEX ) THEN
-      IF ( WNTRES ) THEN
-          IF ( WNTREF ) THEN
-            ! Here, if the refinement is requested, we have
-            ! A*U(:,1:K) already computed and stored in Z.
-            ! For the residuals, need Y = A * U(:,1;K) * W.
-            CALL DGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, &
-                       LDW, ZERO, Y, LDY )               ! BLAS CALL
-            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
-            ! This frees Z; Y contains A * U(:,1:K) * W.
-          ELSE
-            ! Compute S = V_k * Sigma_k^(-1) * W, where
-            ! V_k * Sigma_k^(-1) is stored in Z
-            CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
-                       W, LDW, ZERO, S, LDS)
-            ! Then, compute Z = Y * S =
-            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
-            ! = A * U(:,1:K) * W(1:K,1:K)
-            CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
-                       LDS, ZERO, Z, LDZ)
-            ! Save a copy of Z into Y and free Z for holding
-            ! the Ritz vectors.
-            CALL DLACPY( 'A', M, K, Z, LDZ, Y, LDY )
-            IF ( WNTEX ) CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )
-          END IF
-      ELSE IF ( WNTEX ) THEN
-          ! Compute S = V_k * Sigma_k^(-1) * W, where
-            ! V_k * Sigma_k^(-1) is stored in Z
-            CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
-                       W, LDW, ZERO, S, LDS )
-            ! Then, compute Z = Y * S =
-            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
-            ! = A * U(:,1:K) * W(1:K,1:K)
-            CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
-                       LDS, ZERO, B, LDB )
-            ! The above call replaces the following two calls
-            ! that were used in the developing-testing phase.
-            ! CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
-            !           LDS, ZERO, Z, LDZ)
-            ! Save a copy of Z into B and free Z for holding
-            ! the Ritz vectors.
-            ! CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )
-      END IF
-!
-      ! Compute the real form of the Ritz vectors
-      IF ( WNTVEC ) CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
-                   ZERO, Z, LDZ )                           ! BLAS CALL
-      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
-!
-      IF ( WNTRES ) THEN
-         i = 1
-         DO WHILE ( i <= K )
-            IF ( IMEIG(i) == ZERO ) THEN
-                ! have a real eigenvalue with real eigenvector
-                CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
-                ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! INTRINSIC
-                RES(i) = DNRM2( M, Y(1,i), 1)                         ! BLAS CALL
-                i = i + 1
-            ELSE
-               ! Have a complex conjugate pair
-               ! REIG(i) +- sqrt(-1)*IMEIG(i).
-               ! Since all computation is done in real
-               ! arithmetic, the formula for the residual
-               ! is recast for real representation of the
-               ! complex conjugate eigenpair. See the
-               ! description of RES.
-               AB(1,1) =  REIG(i)
-               AB(2,1) = -IMEIG(i)
-               AB(1,2) =  IMEIG(i)
-               AB(2,2) =  REIG(i)
-               CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
-                           LDZ, AB, 2, ONE, Y(1,i), LDY )          ! BLAS CALL
-               ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
-               RES(i)   = DLANGE( 'F', M, 2, Y(1,i), LDY, &
-                                  WORK(N+1) )                      ! LAPACK CALL
-               RES(i+1) = RES(i)
-               i = i + 2
-            END IF
-         END DO
-      END IF
-      END IF
-!
-      IF ( WHTSVD == 4 ) THEN
-          WORK(N+1) = XSCL1
-          WORK(N+2) = XSCL2
-      END IF
-!
-!     Successful exit.
-      IF ( .NOT. BADXY ) THEN
-         INFO = 0
-      ELSE
-         ! A warning on possible data inconsistency.
-         ! This should be a rare event.
-         INFO = 4
-      END IF
-!............................................................
-      RETURN
-!     ......
-      END SUBROUTINE DGEDMD
+!> \brief \b DGEDMD computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
+!
+!  =========== DOCUMENTATION ===========
+!
+!  Definition:
+!  ===========
+!
+!     SUBROUTINE DGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
+!                        M, N, X, LDX, Y, LDY, NRNK, TOL,  &
+!                        K, REIG,  IMEIG,   Z, LDZ,  RES,  &
+!                        B, LDB, W,  LDW,   S, LDS,        &
+!                        WORK, LWORK, IWORK, LIWORK, INFO )
+!
+!.....
+!     USE                   iso_fortran_env
+!     IMPLICIT NONE
+!     INTEGER, PARAMETER :: WP = real64
+!.....
+!     Scalar arguments
+!     CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+!     INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+!                                NRNK, LDZ, LDB, LDW,  LDS, &
+!                                LWORK,  LIWORK
+!     INTEGER,   INTENT(OUT)  :: K, INFO
+!     REAL(KIND=WP), INTENT(IN)  :: TOL
+!     Array arguments
+!     REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+!     REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+!                                     W(LDW,*), S(LDS,*)
+!     REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
+!                                     RES(*)
+!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+!     INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!............................................................
+!>    \par Purpose:
+!     =============
+!>    \verbatim
+!>    DGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!>    a pair of data snapshot matrices. For the input matrices
+!>    X and Y such that Y = A*X with an unaccessible matrix
+!>    A, DGEDMD computes a certain number of Ritz pairs of A using
+!>    the standard Rayleigh-Ritz extraction from a subspace of
+!>    range(X) that is determined using the leading left singular
+!>    vectors of X. Optionally, DGEDMD returns the residuals
+!>    of the computed Ritz pairs, the information needed for
+!>    a refinement of the Ritz vectors, or the eigenvectors of
+!>    the Exact DMD.
+!>    For further details see the references listed
+!>    below. For more details of the implementation see [3].
+!>    \endverbatim
+!............................................................
+!>    \par References:
+!     ================
+!>    \verbatim
+!>    [1] P. Schmid: Dynamic mode decomposition of numerical
+!>        and experimental data,
+!>        Journal of Fluid Mechanics 656, 5-28, 2010.
+!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!>        decompositions: analysis and enhancements,
+!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!>        Mode Decomposition I. Technical report. AIMDyn Inc.
+!>        and LAPACK Working Note 298.
+!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!>        Theory and Applications, Journal of Computational
+!>        Dynamics 1(2), 391 -421, 2014.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Developed and coded by Zlatko Drmac, Faculty of Science,
+!>    University of Zagreb;  drmac@math.hr
+!>    In cooperation with
+!>    AIMdyn Inc., Santa Barbara, CA.
+!>    and supported by
+!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
+!>    for Nonstationary Processes from Near-Term, Limited
+!>    Observational Data" Contract No: W31P4Q-21-C-0007
+!>    - DARPA PAI project "Physics-Informed Machine Learning
+!>    Methodologies" Contract No: HR0011-18-9-0033
+!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!>    Framework for Space-Time Analysis of Process Dynamics"
+!>    Contract No: HR0011-16-C-0116
+!>    Any opinions, findings and conclusions or recommendations
+!>    expressed in this material are those of the author and
+!>    do not necessarily reflect the views of the DARPA SBIR
+!>    Program Office
+!>    \endverbatim
+!......................................................................
+!>    \par Distribution Statement A:
+!     ==============================
+!>    \verbatim
+!>    Approved for Public Release, Distribution Unlimited.
+!>    Cleared by DARPA on September 29, 2022
+!>    \endverbatim
+!......................................................................
+!     Arguments
+!     =========
+!
+!>    \param[in] JOBS
+!>    \verbatim
+!>    JOBS (input) is CHARACTER*1
+!>    Determines whether the initial data snapshots are scaled
+!>    by a diagonal matrix.
+!>    'S' :: The data snapshots matrices X and Y are multiplied
+!>           with a diagonal matrix D so that X*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'C' :: The snapshots are scaled as with the 'S' option.
+!>           If it is found that an i-th column of X is zero
+!>           vector and the corresponding i-th column of Y is
+!>           non-zero, then the i-th column of Y is set to
+!>           zero and a warning flag is raised.
+!>    'Y' :: The data snapshots matrices X and Y are multiplied
+!>           by a diagonal matrix D so that Y*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'N' :: No data scaling.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBZ
+!>    \verbatim
+!>    JOBZ (input) CHARACTER*1
+!>    Determines whether the eigenvectors (Koopman modes) will
+!>    be computed.
+!>    'V' :: The eigenvectors (Koopman modes) will be computed
+!>           and returned in the matrix Z.
+!>           See the description of Z.
+!>    'F' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product X(:,1:K)*W, where X
+!>           contains a POD basis (leading left singular vectors
+!>           of the data matrix X) and W contains the eigenvectors
+!>           of the corresponding Rayleigh quotient.
+!>           See the descriptions of K, X, W, Z.
+!>    'N' :: The eigenvectors are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBR
+!>    \verbatim
+!>    JOBR (input) CHARACTER*1
+!>    Determines whether to compute the residuals.
+!>    'R' :: The residuals for the computed eigenpairs will be
+!>           computed and stored in the array RES.
+!>           See the description of RES.
+!>           For this option to be legal, JOBZ must be 'V'.
+!>    'N' :: The residuals are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBF
+!>    \verbatim
+!>    JOBF (input) CHARACTER*1
+!>    Specifies whether to store information needed for post-
+!>    processing (e.g. computing refined Ritz vectors)
+!>    'R' :: The matrix needed for the refinement of the Ritz
+!>           vectors is computed and stored in the array B.
+!>           See the description of B.
+!>    'E' :: The unscaled eigenvectors of the Exact DMD are
+!>           computed and returned in the array B. See the
+!>           description of B.
+!>    'N' :: No eigenvector refinement data is computed.
+!>    \endverbatim
+!.....
+!>    \param[in] WHTSVD
+!>    \verbatim
+!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!>    Allows for a selection of the SVD algorithm from the
+!>    LAPACK library.
+!>    1 :: DGESVD (the QR SVD algorithm)
+!>    2 :: DGESDD (the Divide and Conquer algorithm; if enough
+!>         workspace available, this is the fastest option)
+!>    3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
+!>         are the most accurate options)
+!>    4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
+!>         are the most accurate options)
+!>    For the four methods above, a significant difference in
+!>    the accuracy of small singular values is possible if
+!>    the snapshots vary in norm so that X is severely
+!>    ill-conditioned. If small (smaller than EPS*||X||)
+!>    singular values are of interest and JOBS=='N',  then
+!>    the options (3, 4) give the most accurate results, where
+!>    the option 4 is slightly better and with stronger
+!>    theoretical background.
+!>    If JOBS=='S', i.e. the columns of X will be normalized,
+!>    then all methods give nearly equally accurate results.
+!>    \endverbatim
+!.....
+!>    \param[in] M
+!>    \verbatim
+!>    M (input) INTEGER, M>= 0
+!>    The state space dimension (the row dimension of X, Y).
+!>    \endverbatim
+!.....
+!>    \param[in] N
+!>    \verbatim
+!>    N (input) INTEGER, 0 <= N <= M
+!>    The number of data snapshot pairs
+!>    (the number of columns of X and Y).
+!>    \endverbatim
+!.....
+!>    \param[in,out] X
+!>    \verbatim
+!>    X (input/output) REAL(KIND=WP) M-by-N array
+!>    > On entry, X contains the data snapshot matrix X. It is
+!>    assumed that the column norms of X are in the range of
+!>    the normalized floating point numbers.
+!>    < On exit, the leading K columns of X contain a POD basis,
+!>    i.e. the leading K left singular vectors of the input
+!>    data matrix X, U(:,1:K). All N columns of X contain all
+!>    left singular vectors of the input matrix X.
+!>    See the descriptions of K, Z and W.
+!>    \endverbatim
+!.....
+!>    \param[in] LDX
+!>    \verbatim
+!>    LDX (input) INTEGER, LDX >= M
+!>    The leading dimension of the array X.
+!>    \endverbatim
+!.....
+!>    \param[in,out] Y
+!>    \verbatim
+!>    Y (input/workspace/output) REAL(KIND=WP) M-by-N array
+!>    > On entry, Y contains the data snapshot matrix Y
+!>    < On exit,
+!>    If JOBR == 'R', the leading K columns of Y  contain
+!>    the residual vectors for the computed Ritz pairs.
+!>    See the description of RES.
+!>    If JOBR == 'N', Y contains the original input data,
+!>                    scaled according to the value of JOBS.
+!>    \endverbatim
+!.....
+!>    \param[in] LDY
+!>    \verbatim
+!>    LDY (input) INTEGER , LDY >= M
+!>    The leading dimension of the array Y.
+!>    \endverbatim
+!.....
+!>    \param[in] NRNK
+!>    \verbatim
+!>    NRNK (input) INTEGER
+!>    Determines the mode how to compute the numerical rank,
+!>    i.e. how to truncate small singular values of the input
+!>    matrix X. On input, if
+!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(1).
+!>                 This option is recommended.
+!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(i-1)
+!>                 This option is included for R&D purposes.
+!>                 It requires highly accurate SVD, which
+!>                 may not be feasible.
+!>
+!>    The numerical rank can be enforced by using positive
+!>    value of NRNK as follows:
+!>    0 < NRNK <= N :: at most NRNK largest singular values
+!>    will be used. If the number of the computed nonzero
+!>    singular values is less than NRNK, then only those
+!>    nonzero values will be used and the actually used
+!>    dimension is less than NRNK. The actual number of
+!>    the nonzero singular values is returned in the variable
+!>    K. See the descriptions of TOL and  K.
+!>    \endverbatim
+!.....
+!>    \param[in] TOL
+!>    \verbatim
+!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!>    The tolerance for truncating small singular values.
+!>    See the description of NRNK.
+!>    \endverbatim
+!.....
+!>    \param[out] K
+!>    \verbatim
+!>    K (output) INTEGER,  0 <= K <= N
+!>    The dimension of the POD basis for the data snapshot
+!>    matrix X and the number of the computed Ritz pairs.
+!>    The value of K is determined according to the rule set
+!>    by the parameters NRNK and TOL.
+!>    See the descriptions of NRNK and TOL.
+!>    \endverbatim
+!.....
+!>    \param[out] REIG
+!>    \verbatim
+!>    REIG (output) REAL(KIND=WP) N-by-1 array
+!>    The leading K (K<=N) entries of REIG contain
+!>    the real parts of the computed eigenvalues
+!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!>    See the descriptions of K, IMEIG, and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] IMEIG
+!>    \verbatim
+!>    IMEIG (output) REAL(KIND=WP) N-by-1 array
+!>    The leading K (K<=N) entries of IMEIG contain
+!>    the imaginary parts of the computed eigenvalues
+!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!>    The eigenvalues are determined as follows:
+!>    If IMEIG(i) == 0, then the corresponding eigenvalue is
+!>    real, LAMBDA(i) = REIG(i).
+!>    If IMEIG(i)>0, then the corresponding complex
+!>    conjugate pair of eigenvalues reads
+!>    LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!>    LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!>    That is, complex conjugate pairs have consecutive
+!>    indices (i,i+1), with the positive imaginary part
+!>    listed first.
+!>    See the descriptions of K, REIG, and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] Z
+!>    \verbatim
+!>    Z (workspace/output) REAL(KIND=WP)  M-by-N array
+!>    If JOBZ =='V' then
+!>       Z contains real Ritz vectors as follows:
+!>       If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!>       the i-th Ritz value; ||Z(:,i)||_2=1.
+!>       If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!>       [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!>       the Ritz values extracted from this subspace are
+!>       REIG(i) + sqrt(-1)*IMEIG(i) and
+!>       REIG(i) - sqrt(-1)*IMEIG(i).
+!>       The corresponding eigenvectors are
+!>       Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!>       Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!>       || Z(:,i:i+1)||_F = 1.
+!>    If JOBZ == 'F', then the above descriptions hold for
+!>    the columns of X(:,1:K)*W(1:K,1:K), where the columns
+!>    of W(1:k,1:K) are the computed eigenvectors of the
+!>    K-by-K Rayleigh quotient. The columns of W(1:K,1:K)
+!>    are similarly structured: If IMEIG(i) == 0 then
+!>    X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0
+!>    then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and
+!>         X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1)
+!>    are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!>    See the descriptions of REIG, IMEIG, X and W.
+!>    \endverbatim
+!.....
+!>    \param[in] LDZ
+!>    \verbatim
+!>    LDZ (input) INTEGER , LDZ >= M
+!>    The leading dimension of the array Z.
+!>    \endverbatim
+!.....
+!>    \param[out] RES
+!>    \verbatim
+!>    RES (output) REAL(KIND=WP) N-by-1 array
+!>    RES(1:K) contains the residuals for the K computed
+!>    Ritz pairs.
+!>    If LAMBDA(i) is real, then
+!>       RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!>    If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!>    then
+!>    RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!>    where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!>              [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!>    It holds that
+!>    RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!>    RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!>    where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!>          ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!>    See the description of REIG, IMEIG and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] B
+!>    \verbatim
+!>    B (output) REAL(KIND=WP)  M-by-N array.
+!>    IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!>    be used for computing the refined vectors; see further
+!>    details in the provided references.
+!>    If JOBF == 'E', B(1:M,1;K) contains
+!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!>    Exact DMD, up to scaling by the inverse eigenvalues.
+!>    If JOBF =='N', then B is not referenced.
+!>    See the descriptions of X, W, K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDB
+!>    \verbatim
+!>    LDB (input) INTEGER, LDB >= M
+!>    The leading dimension of the array B.
+!>    \endverbatim
+!.....
+!>    \param[out] W
+!>    \verbatim
+!>    W (workspace/output) REAL(KIND=WP) N-by-N array
+!>    On exit, W(1:K,1:K) contains the K computed
+!>    eigenvectors of the matrix Rayleigh quotient (real and
+!>    imaginary parts for each complex conjugate pair of the
+!>    eigenvalues). The Ritz vectors (returned in Z) are the
+!>    product of X (containing a POD basis for the input
+!>    matrix X) and W. See the descriptions of K, S, X and Z.
+!>    W is also used as a workspace to temporarily store the
+!>    right singular vectors of X.
+!>    \endverbatim
+!.....
+!>    \param[in] LDW
+!>    \verbatim
+!>    LDW (input) INTEGER, LDW >= N
+!>    The leading dimension of the array W.
+!>    \endverbatim
+!.....
+!>    \param[out] S
+!>    \verbatim
+!>    S (workspace/output) REAL(KIND=WP) N-by-N array
+!>    The array S(1:K,1:K) is used for the matrix Rayleigh
+!>    quotient. This content is overwritten during
+!>    the eigenvalue decomposition by DGEEV.
+!>    See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDS
+!>    \verbatim
+!>    LDS (input) INTEGER, LDS >= N
+!>    The leading dimension of the array S.
+!>    \endverbatim
+!.....
+!>    \param[out] WORK
+!>    \verbatim
+!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!>    On exit, WORK(1:N) contains the singular values of
+!>    X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!>    If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain
+!>    scaling factor WORK(N+2)/WORK(N+1) used to scale X
+!>    and Y to avoid overflow in the SVD of X.
+!>    This may be of interest if the scaling option is off
+!>    and as many as possible smallest eigenvalues are
+!>    desired to the highest feasible accuracy.
+!>    If the call to DGEDMD is only workspace query, then
+!>    WORK(1) contains the minimal workspace length and
+!>    WORK(2) is the optimal workspace length. Hence, the
+!>    leng of work is at least 2.
+!>    See the description of LWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LWORK
+!>    \verbatim
+!>    LWORK (input) INTEGER
+!>    The minimal length of the workspace vector WORK.
+!>    LWORK is calculated as follows:
+!>    If WHTSVD == 1 ::
+!>       If JOBZ == 'V', then
+!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)).
+!>       If JOBZ == 'N'  then
+!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)).
+!>       Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal
+!>       workspace length of DGESVD.
+!>    If WHTSVD == 2 ::
+!>       If JOBZ == 'V', then
+!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N))
+!>       If JOBZ == 'N', then
+!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N))
+!>       Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the
+!>       minimal workspace length of DGESDD.
+!>    If WHTSVD == 3 ::
+!>       If JOBZ == 'V', then
+!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!>       If JOBZ == 'N', then
+!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!>       Here LWORK_SVD = N+M+MAX(3*N+1,
+!>                       MAX(1,3*N+M,5*N),MAX(1,N))
+!>       is the minimal workspace length of DGESVDQ.
+!>    If WHTSVD == 4 ::
+!>       If JOBZ == 'V', then
+!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!>       If JOBZ == 'N', then
+!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!>       Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the
+!>       minimal workspace length of DGEJSV.
+!>    The above expressions are not simplified in order to
+!>    make the usage of WORK more transparent, and for
+!>    easier checking. In any case, LWORK >= 2.
+!>    If on entry LWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] IWORK
+!>    \verbatim
+!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!>    Workspace that is required only if WHTSVD equals
+!>    2 , 3 or 4. (See the description of WHTSVD).
+!>    If on entry LWORK =-1 or LIWORK=-1, then the
+!>    minimal length of IWORK is computed and returned in
+!>    IWORK(1). See the description of LIWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LIWORK
+!>    \verbatim
+!>    LIWORK (input) INTEGER
+!>    The minimal length of the workspace vector IWORK.
+!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!>    If on entry LIWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] INFO
+!>    \verbatim
+!>    INFO (output) INTEGER
+!>    -i < 0 :: On entry, the i-th argument had an
+!>              illegal value
+!>       = 0 :: Successful return.
+!>       = 1 :: Void input. Quick exit (M=0 or N=0).
+!>       = 2 :: The SVD computation of X did not converge.
+!>              Suggestion: Check the input data and/or
+!>              repeat with different WHTSVD.
+!>       = 3 :: The computation of the eigenvalues did not
+!>              converge.
+!>       = 4 :: If data scaling was requested on input and
+!>              the procedure found inconsistency in the data
+!>              such that for some column index i,
+!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!>              to zero if JOBS=='C'. The computation proceeds
+!>              with original or modified data and warning
+!>              flag is set with INFO=4.
+!>    \endverbatim
+!
+!  Authors:
+!  ========
+!
+!> \author Zlatko Drmac
+!
+!> \ingroup gedmd
+!
+!.............................................................
+!.............................................................
+      SUBROUTINE DGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,  &
+                         K, REIG,  IMEIG,   Z, LDZ,  RES,  &
+                         B, LDB, W,  LDW,   S, LDS,        &
+                         WORK, LWORK, IWORK, LIWORK, INFO )
+!
+!  -- LAPACK driver routine                                           --
+!
+!  -- LAPACK is a software package provided by University of          --
+!  -- Tennessee, University of California Berkeley, University of     --
+!  -- Colorado Denver and NAG Ltd..                                   --
+!
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+!
+!     Scalar arguments
+!     ~~~~~~~~~~~~~~~~
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LWORK,  LIWORK
+      INTEGER,   INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)  :: TOL
+!
+!     Array arguments
+!     ~~~~~~~~~~~~~~~
+      REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                      W(LDW,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,    ROOTSC, SCALE,  SMALL,  &
+                       SSUM,   XSCL1,  XSCL2
+      INTEGER       :: i,   j, IMINWR,  INFO1, INFO2,  &
+                       LWRKEV, LWRSDD, LWRSVD,         &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK
+      LOGICAL       :: BADXY,  LQUERY, SCCOLX, SCCOLY, &
+                       WNTEX,  WNTREF, WNTRES, WNTVEC
+      CHARACTER     :: JOBZL,  T_OR_N
+      CHARACTER     :: JSVOPT
+!
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2)
+!
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) DLANGE, DLAMCH, DNRM2
+      EXTERNAL      DLANGE, DLAMCH, DNRM2, IDAMAX
+      INTEGER       IDAMAX
+      LOGICAL       DISNAN, LSAME
+      EXTERNAL      DISNAN, LSAME
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      DAXPY,  DGEMM,  DSCAL
+      EXTERNAL      DGEEV,  DGEJSV, DGESDD, DGESVD, DGESVDQ, &
+                    DLACPY, DLASCL, DLASSQ, XERBLA
+!
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     DBLE, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -18
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -21
+      ELSE IF ( LDW < N ) THEN
+          INFO = -23
+      ELSE IF ( LDS < N ) THEN
+          INFO = -25
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                WORK(1)  = 2
+                WORK(2)  = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+         MLWORK = MAX(2,N)
+         OLWORK = MAX(2,N)
+         IMINWR = 1
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of DGESVD:
+             ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MLWORK = MAX(MLWORK,N + MWRSVD)
+             IF ( LQUERY ) THEN
+                CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, &
+                           B, LDB, W, LDW, RDUMMY, -1, INFO1 )
+                LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of DGESDD:
+             ! MWRSDD = 3*MIN(M,N)*MIN(M,N) +
+             ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 3*MIN(M,N)*MIN(M,N) +                &
+              MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             MLWORK = MAX(MLWORK,N + MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             IF ( LQUERY ) THEN
+                CALL DGESDD( 'O', M, N, X, LDX, WORK, B,     &
+                     LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSDD)
+             END IF
+         CASE (3)
+             !LWQP3 = 3*N+1
+             !LWORQ = MAX(N, 1)
+             !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2)
+             !MLWORK = N +  MWRSVQ
+             !IMINWR = M+N-1
+             CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                             X, LDX, WORK, Z, LDZ, W, LDW,   &
+                             NUMRNK, IWORK, LIWORK, RDUMMY,  &
+                             -1, RDUMMY2, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(RDUMMY(2))
+             MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = MAX( MWRSVQ, INT(RDUMMY(1)) )
+                OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1)))
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V'
+             MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 )
+             MLWORK = MAX(MLWORK,N+MWRSVJ)
+             IMINWR = MAX( 3, M+3*N )
+             IF ( LQUERY ) THEN
+                OLWORK =  MAX(OLWORK,N+MWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the DGEEV call
+         IF ( LSAME(JOBZL,'V') ) THEN
+             MWRKEV = MAX( 1, 4*N )
+         ELSE
+             MWRKEV = MAX( 1, 3*N )
+         END IF
+         MLWORK = MAX(MLWORK,N+MWRKEV)
+         IF ( LQUERY ) THEN
+                CALL DGEEV( 'N', JOBZL, N, S, LDS, REIG, &
+                    IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 )
+                LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) )
+                OLWORK = MAX( OLWORK, N+LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'DGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = DLAMCH('O')
+      SMALL = DLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using DLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, X(1,i), 1 )
+            SSUM  = ONE
+            SCALE = ZERO
+            CALL DLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('DGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               X(1,i), M, INFO2 )              ! LAPACK CALL
+!                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('DGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL DSCAL( M, ONE/WORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL DLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/DBLE(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( Y(IDAMAX(M, Y(1,i),1),i )  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL DSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using DLASSQ.
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, Y(1,i), 1 )
+            SSUM  = ONE
+            SCALE = ZERO
+            CALL DLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('DGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               Y(1,i), M, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL DSCAL( M, ONE/WORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL DLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/DBLE(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( X(IDAMAX(M, X(1,i),1),i )  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, B, &
+                  LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL
+             T_OR_N = 'T'
+         CASE (2)
+            CALL DGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, &
+                 LDW, WORK(N+1), LWORK-N, IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'T'
+         CASE (3)
+              CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, WORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),&
+                   LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1)     ! LAPACK CALL
+              CALL DLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'T'
+         CASE (4)
+              CALL DGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, WORK, Z, LDZ, W, LDW, &
+                   WORK(N+1), LWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL DLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = WORK(N+1)
+              XSCL2 = WORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case DGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL DLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( WORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('DGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( WORK(i+1) <= WORK(i)*TOL  ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( WORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^T * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^T is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL DSCAL( N, ONE/WORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that DGESVD, DGESVDQ and DGESDD return the
+          ! transposed matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL DSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              WORK(N+i) = ONE/WORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = (WORK(N+i))*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside DGEDMD).
+          CALL DGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, &
+                      LDW, ZERO, Z, LDZ )                        ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL DGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, &
+                      LDZ, ZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^T * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL DGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, &
+                   ZERO, Z, LDZ )                                   ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        ! In the two DGEMM calls here, can use K for LDZ.
+        CALL DGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, &
+                    LDW, ZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^T * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^T is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL DLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL DLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL DGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, &
+                  LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 )   ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient. Even in the case of complex spectrum, all
+      ! computation is done in real arithmetic. REIG and
+      ! IMEIG are the real and the imaginary parts of the
+      ! eigenvalues, so that the spectrum is given as
+      ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs
+      ! are listed at consecutive positions. For such a
+      ! complex conjugate pair of the eigenvalues, the
+      ! corresponding eigenvectors are also a complex
+      ! conjugate pair with the real and imaginary parts
+      ! stored column-wise in W at the corresponding
+      ! consecutive column indices. See the description of Z.
+      ! Also, see the description of DGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! DGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL DGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, &
+                       LDW, ZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS)
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL DLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+            !           LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the real form of the Ritz vectors
+      IF ( WNTVEC ) CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                   ZERO, Z, LDZ )                           ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         i = 1
+         DO WHILE ( i <= K )
+            IF ( IMEIG(i) == ZERO ) THEN
+                ! have a real eigenvalue with real eigenvector
+                CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = DNRM2( M, Y(1,i), 1)                         ! BLAS CALL
+                i = i + 1
+            ELSE
+               ! Have a complex conjugate pair
+               ! REIG(i) +- sqrt(-1)*IMEIG(i).
+               ! Since all computation is done in real
+               ! arithmetic, the formula for the residual
+               ! is recast for real representation of the
+               ! complex conjugate eigenpair. See the
+               ! description of RES.
+               AB(1,1) =  REIG(i)
+               AB(2,1) = -IMEIG(i)
+               AB(1,2) =  IMEIG(i)
+               AB(2,2) =  REIG(i)
+               CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                           LDZ, AB, 2, ONE, Y(1,i), LDY )          ! BLAS CALL
+               ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+               RES(i)   = DLANGE( 'F', M, 2, Y(1,i), LDY, &
+                                  WORK(N+1) )                      ! LAPACK CALL
+               RES(i+1) = RES(i)
+               i = i + 2
+            END IF
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          WORK(N+1) = XSCL1
+          WORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE DGEDMD
diff --git a/SRC/dgedmdq.f90 b/SRC/dgedmdq.f90
index 2bf939f489..b1fb62b44a 100644
--- a/SRC/dgedmdq.f90
+++ b/SRC/dgedmdq.f90
@@ -1,864 +1,863 @@
-!> \brief \b DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
-!
-!  =========== DOCUMENTATION ===========
-!
-!  Definition:
-!  ===========
-!
-!     SUBROUTINE DGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
-!                         WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
-!                         LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
-!                         Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
-!                         S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
-!.....
-!     USE                   iso_fortran_env
-!     IMPLICIT NONE
-!     INTEGER, PARAMETER :: WP = real64      
-!.....      
-!     Scalar arguments       
-!     CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
-!                               JOBT, JOBF
-!     INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
-!                               LDY, NRNK, LDZ, LDB, LDV,  &
-!                               LDS, LWORK,  LIWORK
-!     INTEGER,   INTENT(OUT) :: INFO,    K      
-!     REAL(KIND=WP), INTENT(IN)    ::   TOL     
-!     Array arguments      
-!     REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
-!     REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
-!                                     Z(LDZ,*), B(LDB,*),  &
-!                                     V(LDV,*), S(LDS,*)
-!     REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
-!                                     RES(*)
-!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
-!     INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!............................................................
-!>    \par Purpose:
-!     =============
-!>    \verbatim
-!>     DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
-!>     a pair of data snapshot matrices, using a QR factorization
-!>     based compression of the data. For the input matrices
-!>     X and Y such that Y = A*X with an unaccessible matrix
-!>     A, DGEDMDQ computes a certain number of Ritz pairs of A using
-!>     the standard Rayleigh-Ritz extraction from a subspace of
-!>     range(X) that is determined using the leading left singular 
-!>     vectors of X. Optionally, DGEDMDQ returns the residuals 
-!>     of the computed Ritz pairs, the information needed for
-!>     a refinement of the Ritz vectors, or the eigenvectors of
-!>     the Exact DMD.
-!>     For further details see the references listed
-!>     below. For more details of the implementation see [3].      
-!>    \endverbatim
-!............................................................
-!>    \par References:
-!     ================
-!>    \verbatim
-!>    [1] P. Schmid: Dynamic mode decomposition of numerical
-!>        and experimental data,
-!>        Journal of Fluid Mechanics 656, 5-28, 2010.
-!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
-!>        decompositions: analysis and enhancements,
-!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
-!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
-!>        Mode Decomposition I. Technical report. AIMDyn Inc.
-!>        and LAPACK Working Note 298.      
-!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
-!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
-!>        Theory and Applications, Journal of Computational
-!>        Dynamics 1(2), 391 -421, 2014.
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Developed and coded by Zlatko Drmac, Faculty of Science,
-!>    University of Zagreb;  drmac@math.hr
-!>    In cooperation with
-!>    AIMdyn Inc., Santa Barbara, CA.
-!>    and supported by
-!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
-!>    for Nonstationary Processes from Near-Term, Limited
-!>    Observational Data" Contract No: W31P4Q-21-C-0007
-!>    - DARPA PAI project "Physics-Informed Machine Learning
-!>    Methodologies" Contract No: HR0011-18-9-0033
-!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
-!>    Framework for Space-Time Analysis of Process Dynamics"
-!>    Contract No: HR0011-16-C-0116
-!>    Any opinions, findings and conclusions or recommendations 
-!>    expressed in this material are those of the author and 
-!>    do not necessarily reflect the views of the DARPA SBIR 
-!>    Program Office.      
-!>    \endverbatim
-!......................................................................
-!>    \par Distribution Statement A:
-!     ==============================
-!>    \verbatim
-!>    Approved for Public Release, Distribution Unlimited.
-!>    Cleared by DARPA on September 29, 2022  
-!>    \endverbatim
-!......................................................................
-!     Arguments
-!     =========
-!
-!>    \param[in] JOBS
-!>    \verbatim
-!>    JOBS (input) CHARACTER*1
-!>    Determines whether the initial data snapshots are scaled
-!>    by a diagonal matrix. The data snapshots are the columns
-!>    of F. The leading N-1 columns of F are denoted X and the
-!>    trailing N-1 columns are denoted Y. 
-!>    'S' :: The data snapshots matrices X and Y are multiplied
-!>           with a diagonal matrix D so that X*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'C' :: The snapshots are scaled as with the 'S' option.
-!>           If it is found that an i-th column of X is zero
-!>           vector and the corresponding i-th column of Y is
-!>           non-zero, then the i-th column of Y is set to
-!>           zero and a warning flag is raised.
-!>    'Y' :: The data snapshots matrices X and Y are multiplied
-!>           by a diagonal matrix D so that Y*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)    
-!>    'N' :: No data scaling.   
-!>    \endverbatim
-!.....
-!>    \param[in] JOBZ
-!>    \verbatim
-!>    JOBZ (input) CHARACTER*1
-!>    Determines whether the eigenvectors (Koopman modes) will
-!>    be computed.
-!>    'V' :: The eigenvectors (Koopman modes) will be computed
-!>           and returned in the matrix Z.
-!>           See the description of Z.
-!>    'F' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product Z*V, where Z
-!>           is orthonormal and V contains the eigenvectors
-!>           of the corresponding Rayleigh quotient.
-!>           See the descriptions of F, V, Z.
-!>    'Q' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product Q*Z, where Z
-!>           contains the eigenvectors of the compression of the
-!>           underlying discretized operator onto the span of
-!>           the data snapshots. See the descriptions of F, V, Z.  
-!>           Q is from the initial QR factorization.      
-!>    'N' :: The eigenvectors are not computed.  
-!>    \endverbatim
-!.....
-!>    \param[in] JOBR
-!>    \verbatim
-!>    JOBR (input) CHARACTER*1 
-!>    Determines whether to compute the residuals.
-!>    'R' :: The residuals for the computed eigenpairs will
-!>           be computed and stored in the array RES.
-!>           See the description of RES.
-!>           For this option to be legal, JOBZ must be 'V'.
-!>    'N' :: The residuals are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBQ
-!>    \verbatim
-!>    JOBQ (input) CHARACTER*1 
-!>    Specifies whether to explicitly compute and return the
-!>    orthogonal matrix from the QR factorization.
-!>    'Q' :: The matrix Q of the QR factorization of the data
-!>           snapshot matrix is computed and stored in the
-!>           array F. See the description of F.       
-!>    'N' :: The matrix Q is not explicitly computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBT
-!>    \verbatim
-!>    JOBT (input) CHARACTER*1 
-!>    Specifies whether to return the upper triangular factor
-!>    from the QR factorization.
-!>    'R' :: The matrix R of the QR factorization of the data 
-!>           snapshot matrix F is returned in the array Y.
-!>           See the description of Y and Further details.       
-!>    'N' :: The matrix R is not returned.    
-!>    \endverbatim
-!.....
-!>    \param[in] JOBF
-!>    \verbatim
-!>    JOBF (input) CHARACTER*1
-!>    Specifies whether to store information needed for post-
-!>    processing (e.g. computing refined Ritz vectors)
-!>    'R' :: The matrix needed for the refinement of the Ritz
-!>           vectors is computed and stored in the array B.
-!>           See the description of B.
-!>    'E' :: The unscaled eigenvectors of the Exact DMD are 
-!>           computed and returned in the array B. See the
-!>           description of B.
-!>    'N' :: No eigenvector refinement data is computed.
-!>    To be useful on exit, this option needs JOBQ='Q'.
-!>    \endverbatim
-!.....
-!>    \param[in] WHTSVD
-!>    \verbatim
-!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
-!>    Allows for a selection of the SVD algorithm from the
-!>    LAPACK library.
-!>    1 :: DGESVD (the QR SVD algorithm)
-!>    2 :: DGESDD (the Divide and Conquer algorithm; if enough
-!>         workspace available, this is the fastest option)
-!>    3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
-!>         are the most accurate options)
-!>    4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
-!>         are the most accurate options)
-!>    For the four methods above, a significant difference in
-!>    the accuracy of small singular values is possible if
-!>    the snapshots vary in norm so that X is severely
-!>    ill-conditioned. If small (smaller than EPS*||X||)
-!>    singular values are of interest and JOBS=='N',  then
-!>    the options (3, 4) give the most accurate results, where
-!>    the option 4 is slightly better and with stronger 
-!>    theoretical background.
-!>    If JOBS=='S', i.e. the columns of X will be normalized,
-!>    then all methods give nearly equally accurate results.
-!>    \endverbatim
-!.....
-!>    \param[in] M
-!>    \verbatim
-!>    M (input) INTEGER, M >= 0 
-!>    The state space dimension (the number of rows of F).
-!>    \endverbatim
-!.....
-!>    \param[in] N
-!>    \verbatim
-!>    N (input) INTEGER, 0 <= N <= M
-!>    The number of data snapshots from a single trajectory,
-!>    taken at equidistant discrete times. This is the 
-!>    number of columns of F.
-!>    \endverbatim
-!.....
-!>    \param[in,out] F
-!>    \verbatim
-!>    F (input/output) REAL(KIND=WP) M-by-N array
-!>    > On entry,
-!>    the columns of F are the sequence of data snapshots 
-!>    from a single trajectory, taken at equidistant discrete
-!>    times. It is assumed that the column norms of F are 
-!>    in the range of the normalized floating point numbers. 
-!>    < On exit,
-!>    If JOBQ == 'Q', the array F contains the orthogonal 
-!>    matrix/factor of the QR factorization of the initial 
-!>    data snapshots matrix F. See the description of JOBQ. 
-!>    If JOBQ == 'N', the entries in F strictly below the main
-!>    diagonal contain, column-wise, the information on the 
-!>    Householder vectors, as returned by DGEQRF. The 
-!>    remaining information to restore the orthogonal matrix
-!>    of the initial QR factorization is stored in WORK(1:N). 
-!>    See the description of WORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LDF
-!>    \verbatim
-!>    LDF (input) INTEGER, LDF >= M 
-!>    The leading dimension of the array F.
-!>    \endverbatim
-!.....
-!>    \param[in,out] X
-!>    \verbatim
-!>    X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
-!>    X is used as workspace to hold representations of the
-!>    leading N-1 snapshots in the orthonormal basis computed
-!>    in the QR factorization of F.
-!>    On exit, the leading K columns of X contain the leading
-!>    K left singular vectors of the above described content
-!>    of X. To lift them to the space of the left singular
-!>    vectors U(:,1:K)of the input data, pre-multiply with the 
-!>    Q factor from the initial QR factorization. 
-!>    See the descriptions of F, K, V  and Z.
-!>    \endverbatim
-!.....
-!>    \param[in] LDX
-!>    \verbatim
-!>    LDX (input) INTEGER, LDX >= N  
-!>    The leading dimension of the array X. 
-!>    \endverbatim
-!.....
-!>    \param[in,out] Y
-!>    \verbatim
-!>    Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
-!>    Y is used as workspace to hold representations of the
-!>    trailing N-1 snapshots in the orthonormal basis computed
-!>    in the QR factorization of F.
-!>    On exit, 
-!>    If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
-!>    triangular factor from the QR factorization of the data
-!>    snapshot matrix F.
-!>    \endverbatim
-!.....
-!>    \param[in] LDY
-!>    \verbatim
-!>    LDY (input) INTEGER , LDY >= N
-!>    The leading dimension of the array Y.   
-!>    \endverbatim
-!.....
-!>    \param[in] NRNK
-!>    \verbatim
-!>    NRNK (input) INTEGER
-!>    Determines the mode how to compute the numerical rank,
-!>    i.e. how to truncate small singular values of the input
-!>    matrix X. On input, if
-!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(1)
-!>                 This option is recommended.  
-!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(i-1)
-!>                 This option is included for R&D purposes.
-!>                 It requires highly accurate SVD, which
-!>                 may not be feasible.       
-!>    The numerical rank can be enforced by using positive 
-!>    value of NRNK as follows: 
-!>    0 < NRNK <= N-1 :: at most NRNK largest singular values
-!>    will be used. If the number of the computed nonzero
-!>    singular values is less than NRNK, then only those
-!>    nonzero values will be used and the actually used
-!>    dimension is less than NRNK. The actual number of
-!>    the nonzero singular values is returned in the variable
-!>    K. See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] TOL
-!>     \verbatim
-!>     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
-!>     The tolerance for truncating small singular values.
-!>     See the description of NRNK.  
-!>     \endverbatim
-!.....
-!>     \param[out] K
-!>     \verbatim
-!>     K (output) INTEGER,  0 <= K <= N 
-!>     The dimension of the SVD/POD basis for the leading N-1
-!>     data snapshots (columns of F) and the number of the 
-!>     computed Ritz pairs. The value of K is determined
-!>     according to the rule set by the parameters NRNK and 
-!>     TOL. See the descriptions of NRNK and TOL. 
-!>     \endverbatim
-!.....
-!>    \param[out] REIG
-!>    \verbatim
-!>    REIG (output) REAL(KIND=WP) (N-1)-by-1 array
-!>    The leading K (K<=N) entries of REIG contain
-!>    the real parts of the computed eigenvalues
-!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
-!>    See the descriptions of K, IMEIG, Z.
-!>    \endverbatim
-!.....
-!>    \param[out] IMEIG
-!>    \verbatim
-!>    IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
-!>    The leading K (K<N) entries of REIG contain
-!>    the imaginary parts of the computed eigenvalues
-!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
-!>    The eigenvalues are determined as follows:
-!>    If IMEIG(i) == 0, then the corresponding eigenvalue is
-!>    real, LAMBDA(i) = REIG(i).
-!>    If IMEIG(i)>0, then the corresponding complex
-!>    conjugate pair of eigenvalues reads
-!>    LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
-!>    LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
-!>    That is, complex conjugate pairs have consequtive
-!>    indices (i,i+1), with the positive imaginary part
-!>    listed first.
-!>    See the descriptions of K, REIG, Z.     
-!>    \endverbatim
-!.....
-!>    \param[out] Z
-!>    \verbatim
-!>    Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
-!>    If JOBZ =='V' then
-!>       Z contains real Ritz vectors as follows:
-!>       If IMEIG(i)=0, then Z(:,i) is an eigenvector of
-!>       the i-th Ritz value.
-!>       If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
-!>       [Z(:,i) Z(:,i+1)] span an invariant subspace and
-!>       the Ritz values extracted from this subspace are
-!>       REIG(i) + sqrt(-1)*IMEIG(i) and
-!>       REIG(i) - sqrt(-1)*IMEIG(i).
-!>       The corresponding eigenvectors are
-!>       Z(:,i) + sqrt(-1)*Z(:,i+1) and
-!>       Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
-!>    If JOBZ == 'F', then the above descriptions hold for
-!>    the columns of Z*V, where the columns of V are the
-!>    eigenvectors of the K-by-K Rayleigh quotient, and Z is
-!>    orthonormal. The columns of V are similarly structured:
-!>    If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if 
-!>    IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
-!>                      Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
-!>    are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
-!>    See the descriptions of REIG, IMEIG, X and V.
-!>    \endverbatim
-!.....
-!>    \param[in] LDZ
-!>    \verbatim
-!>    LDZ (input) INTEGER , LDZ >= M
-!>    The leading dimension of the array Z.
-!>    \endverbatim
-!.....
-!>    \param[out] RES
-!>    \verbatim
-!>    RES (output) REAL(KIND=WP) (N-1)-by-1 array
-!>    RES(1:K) contains the residuals for the K computed 
-!>    Ritz pairs.       
-!>    If LAMBDA(i) is real, then
-!>       RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
-!>    If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
-!>    then
-!>    RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
-!>    where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
-!>              [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
-!>    It holds that
-!>    RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
-!>    RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
-!>    where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
-!>          ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
-!>    See the description of Z.
-!>    \endverbatim
-!.....
-!>    \param[out] B
-!>    \verbatim
-!>    B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
-!>    IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
-!>    be used for computing the refined vectors; see further 
-!>    details in the provided references. 
-!>    If JOBF == 'E', B(1:N,1;K) contains 
-!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
-!>    Exact DMD, up to scaling by the inverse eigenvalues.   
-!>    In both cases, the content of B can be lifted to the 
-!>    original dimension of the input data by pre-multiplying
-!>    with the Q factor from the initial QR factorization.
-!>    Here A denotes a compression of the underlying operator.
-!>    See the descriptions of F and X.
-!>    If JOBF =='N', then B is not referenced.
-!>    \endverbatim
-!.....
-!>    \param[in] LDB
-!>    \verbatim
-!>    LDB (input) INTEGER, LDB >= MIN(M,N)
-!>    The leading dimension of the array B.
-!>    \endverbatim
-!.....
-!>    \param[out] V
-!>    \verbatim
-!>    V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
-!>    On exit, V(1:K,1:K) contains the K eigenvectors of
-!>    the Rayleigh quotient. The eigenvectors of a complex
-!>    conjugate pair of eigenvalues are returned in real form
-!>    as explained in the description of Z. The Ritz vectors
-!>    (returned in Z) are the product of X and V; see
-!>    the descriptions of X and Z.
-!>    \endverbatim
-!.....
-!>    \param[in] LDV
-!>    \verbatim
-!>    LDV (input) INTEGER, LDV >= N-1
-!>    The leading dimension of the array V.
-!>    \endverbatim
-!.....
-!>    \param[out] S
-!>    \verbatim
-!>    S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
-!>    The array S(1:K,1:K) is used for the matrix Rayleigh
-!>    quotient. This content is overwritten during
-!>    the eigenvalue decomposition by DGEEV.
-!>    See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDS
-!>    \verbatim
-!>    LDS (input) INTEGER, LDS >= N-1        
-!>    The leading dimension of the array S.
-!>    \endverbatim
-!.....
-!>    \param[out] WORK
-!>    \verbatim
-!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
-!>    On exit, 
-!>    WORK(1:MIN(M,N)) contains the scalar factors of the 
-!>    elementary reflectors as returned by DGEQRF of the 
-!>    M-by-N input matrix F.
-!>    WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of 
-!>    the input submatrix F(1:M,1:N-1).
-!>    If the call to DGEDMDQ is only workspace query, then
-!>    WORK(1) contains the minimal workspace length and
-!>    WORK(2) is the optimal workspace length. Hence, the
-!>    length of work is at least 2.
-!>    See the description of LWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LWORK
-!>    \verbatim
-!>    LWORK (input) INTEGER
-!>    The minimal length of the  workspace vector WORK.
-!>    LWORK is calculated as follows:
-!>    Let MLWQR  = N (minimal workspace for DGEQRF[M,N])
-!>        MLWDMD = minimal workspace for DGEDMD (see the
-!>                 description of LWORK in DGEDMD) for 
-!>                 snapshots of dimensions MIN(M,N)-by-(N-1)
-!>        MLWMQR = N (minimal workspace for 
-!>                   DORMQR['L','N',M,N,N])
-!>        MLWGQR = N (minimal workspace for DORGQR[M,N,N])
-!>    Then
-!>    LWORK = MAX(N+MLWQR, N+MLWDMD)
-!>    is updated as follows:
-!>       if   JOBZ == 'V' or JOBZ == 'F' THEN 
-!>            LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR )
-!>       if   JOBQ == 'Q' THEN
-!>            LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR)
-!>    If on entry LWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.          
-!>    \endverbatim
-!.....
-!>    \param[out] IWORK
-!>    \verbatim
-!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
-!>    Workspace that is required only if WHTSVD equals
-!>    2 , 3 or 4. (See the description of WHTSVD).
-!>    If on entry LWORK =-1 or LIWORK=-1, then the
-!>    minimal length of IWORK is computed and returned in
-!>    IWORK(1). See the description of LIWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LIWORK
-!>    \verbatim
-!>    LIWORK (input) INTEGER
-!>    The minimal length of the workspace vector IWORK.
-!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
-!>    Let M1=MIN(M,N), N1=N-1. Then    
-!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
-!>    If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
-!>    If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
-!>    If on entry LIWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] INFO
-!>    \verbatim
-!>    INFO (output) INTEGER
-!>    -i < 0 :: On entry, the i-th argument had an
-!>              illegal value
-!>       = 0 :: Successful return.
-!>       = 1 :: Void input. Quick exit (M=0 or N=0).
-!>       = 2 :: The SVD computation of X did not converge.
-!>              Suggestion: Check the input data and/or
-!>              repeat with different WHTSVD.
-!>       = 3 :: The computation of the eigenvalues did not
-!>              converge.
-!>       = 4 :: If data scaling was requested on input and
-!>              the procedure found inconsistency in the data
-!>              such that for some column index i,
-!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
-!>              to zero if JOBS=='C'. The computation proceeds
-!>              with original or modified data and warning
-!>              flag is set with INFO=4.  
-!>    \endverbatim
-!
-!  Authors:
-!  ========
-!
-!> \author Zlatko Drmac
-!
-!> \ingroup gedmd
-!
-!.............................................................
-!.............................................................
-SUBROUTINE DGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
-                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
-                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
-                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
-                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
-!
-!  -- LAPACK driver routine                                           --
-!
-!  -- LAPACK is a software package provided by University of          --
-!  -- Tennessee, University of California Berkeley, University of     --
-!  -- Colorado Denver and NAG Ltd..                                   --
-!
-!.....
-      USE                   iso_fortran_env
-      IMPLICIT NONE
-      INTEGER, PARAMETER :: WP = real64      
-!
-!     Scalar arguments       
-!     ~~~~~~~~~~~~~~~~
-      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
-                                JOBT, JOBF
-      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
-                                LDY, NRNK, LDZ, LDB, LDV,  &
-                                LDS, LWORK,  LIWORK
-      INTEGER,   INTENT(OUT) :: INFO,    K      
-      REAL(KIND=WP), INTENT(IN)    ::   TOL     
-!
-!     Array arguments      
-!     ~~~~~~~~~~~~~~~
-      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
-      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
-                                      Z(LDZ,*), B(LDB,*),  &
-                                      V(LDV,*), S(LDS,*)
-      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
-                                      RES(*)
-      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
-      INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!     Parameters
-!     ~~~~~~~~~~      
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-!      
-!     Local scalars      
-!     ~~~~~~~~~~~~~
-      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
-                           MLWMQR, MLWORK, MLWQR,  MINMN,  & 
-                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
-                           OLWQR
-      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
-                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
-                           WNTVCQ, WNTREF, WNTEX
-      CHARACTER(LEN=1)  :: JOBVL
-!      
-!     Local array      
-!     ~~~~~~~~~~~      
-      REAL(KIND=WP) :: RDUMMY(2)
-!      
-!     External functions (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~
-      LOGICAL       LSAME
-      EXTERNAL      LSAME 
-!
-!     External subroutines (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~~~~
-      EXTERNAL      DGEMM 
-      EXTERNAL      DGEDMD, DGEQRF, DLACPY, DLASET, DORGQR, & 
-                    DORMQR, XERBLA
-!
-!     Intrinsic functions
-!     ~~~~~~~~~~~~~~~~~~~
-      INTRINSIC      MAX, MIN, INT         
- !..........................................................  
- !
- !    Test the input arguments    
-      WNTRES = LSAME(JOBR,'R')
-      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
-      SCCOLY = LSAME(JOBS,'Y')
-      WNTVEC = LSAME(JOBZ,'V')
-      WNTVCF = LSAME(JOBZ,'F')
-      WNTVCQ = LSAME(JOBZ,'Q')
-      WNTREF = LSAME(JOBF,'R') 
-      WNTEX  = LSAME(JOBF,'E')
-      WANTQ  = LSAME(JOBQ,'Q')
-      WNTTRF = LSAME(JOBT,'R')     
-      MINMN  = MIN(M,N)
-      INFO = 0 
-      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
-!       
-      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
-                                  LSAME(JOBS,'N')) )  THEN 
-          INFO = -1
-      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
-                              .OR. LSAME(JOBZ,'N')) ) THEN
-          INFO = -2
-      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
-          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
-          INFO = -3
-      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
-          INFO = -4                 
-      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
-          INFO = -5
-      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
-                LSAME(JOBF,'N') ) )                    THEN
-          INFO = -6    
-      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
-                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
-          INFO = -7
-      ELSE IF ( M < 0 ) THEN
-          INFO = -8
-      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
-          INFO = -9
-      ELSE IF ( LDF < M ) THEN
-          INFO = -11
-      ELSE IF ( LDX < MINMN ) THEN
-          INFO = -13
-      ELSE IF ( LDY < MINMN ) THEN
-          INFO = -15
-      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
-                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
-          INFO = -16
-      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
-          INFO = -17
-      ELSE IF ( LDZ < M ) THEN
-          INFO = -22
-      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
-          INFO = -25
-      ELSE IF ( LDV < N-1 ) THEN
-          INFO = -27
-      ELSE IF ( LDS < N-1 ) THEN
-          INFO = -29
-      END IF
-!      
-      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
-          JOBVL = 'V'
-      ELSE
-          JOBVL = 'N'
-      END IF     
-      IF ( INFO == 0 ) THEN  
-          ! Compute the minimal and the optimal workspace
-          ! requirements. Simulate running the code and 
-          ! determine minimal and optimal sizes of the 
-          ! workspace at any moment of the run.         
-         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
-             ! All output except K is void. INFO=1 signals
-             ! the void input. In case of a workspace query,
-             ! the minimal workspace lengths are returned.
-            IF ( LQUERY ) THEN  
-               IWORK(1) = 1
-                WORK(1) = 2
-                WORK(2) = 2
-            ELSE                
-               K = 0
-            END IF             
-            INFO = 1  
-            RETURN
-         END IF     
-         MLWQR  = MAX(1,N)  ! Minimal workspace length for DGEQRF.
-         MLWORK = MINMN + MLWQR 
-         IF ( LQUERY ) THEN 
-             CALL DGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
-                          INFO1 )
-             OLWQR  = INT(RDUMMY(1))
-             OLWORK = MIN(M,N) + OLWQR           
-         END IF
-         CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
-                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
-                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     & 
-                      V, LDV, S, LDS, WORK, -1, IWORK,       &
-                      LIWORK, INFO1 )
-         MLWDMD = INT(WORK(1))
-         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
-         IMINWR = IWORK(1)
-         IF ( LQUERY ) THEN 
-             OLWDMD = INT(WORK(2))
-             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
-         END IF
-         IF ( WNTVEC .OR. WNTVCF ) THEN
-            MLWMQR = MAX(1,N) 
-            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
-            IF ( LQUERY ) THEN
-               CALL DORMQR( 'L','N', M, N, MINMN, F, LDF,  & 
-                            WORK, Z, LDZ, WORK, -1, INFO1 )
-               OLWMQR = INT(WORK(1))
-               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
-            END IF
-         END IF  
-         IF ( WANTQ ) THEN
-            MLWGQR = N
-            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
-            IF ( LQUERY ) THEN 
-                CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
-                             WORK, -1, INFO1 )        
-                OLWGQR = INT(WORK(1))
-                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
-            END IF            
-         END IF   
-         IMINWR = MAX( 1, IMINWR )
-         MLWORK = MAX( 2, MLWORK )      
-         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
-         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
-      END IF  
-      IF( INFO /= 0 ) THEN
-         CALL XERBLA( 'DGEDMDQ', -INFO )
-         RETURN
-      ELSE IF ( LQUERY ) THEN
-!     Return minimal and optimal workspace sizes
-          IWORK(1) = IMINWR
-          WORK(1)  = MLWORK
-          WORK(2)  = OLWORK
-          RETURN
-      END IF   
-!.....	  
-!     Initial QR factorization that is used to represent the
-!     snapshots as elements of lower dimensional subspace.
-!     For large scale computation with M >>N , at this place 
-!     one can use an out of core QRF.
-!   
-      CALL DGEQRF( M, N, F, LDF, WORK,               & 
-                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
-!      
-!     Define X and Y as the snapshots representations in the
-!     orthogonal basis computed in the QR factorization.
-!     X corresponds to the leading N-1 and Y to the trailing
-!     N-1 snapshots.
-      CALL DLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
-      CALL DLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
-      CALL DLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
-      IF ( M >= 3 ) THEN
-          CALL DLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
-                       Y(3,1), LDY )  
-      END IF
-!
-!     Compute the DMD of the projected snapshot pairs (X,Y)   
-      CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
-                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
-                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
-                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, & 
-                   IWORK, LIWORK, INFO1 )
-      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
-          ! Return with error code. See DGEDMD for details.
-          INFO = INFO1
-          RETURN
-      ELSE
-          INFO = INFO1
-      END IF    
-!      
-!     The Ritz vectors (Koopman modes) can be explicitly 
-!     formed or returned in factored form.
-      IF ( WNTVEC ) THEN
-        ! Compute the eigenvectors explicitly.  
-        IF ( M > MINMN ) CALL DLASET( 'A', M-MINMN, K, ZERO, &
-                                     ZERO, Z(MINMN+1,1), LDZ )
-        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
-             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
-      ELSE IF ( WNTVCF ) THEN   
-        !   Return the Ritz vectors (eigenvectors) in factored
-        !   form Z*V, where Z contains orthonormal matrix (the
-        !   product of Q from the initial QR factorization and 
-        !   the SVD/POD_basis returned by DGEDMD in X) and the 
-        !   second factor (the eigenvectors of the Rayleigh 
-        !   quotient) is in the array V, as returned by DGEDMD.
-        CALL DLACPY( 'A', N, K, X, LDX, Z, LDZ )
-        IF ( M > N ) CALL DLASET( 'A', M-N, K, ZERO, ZERO,   & 
-                                  Z(N+1,1), LDZ )
-        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
-             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
-      END IF
-!     
-!     Some optional output variables:
-!
-!     The upper triangular factor R in the initial QR 
-!     factorization is optionally returned in the array Y.
-!     This is useful if this call to DGEDMDQ is to be 
-!     followed by a streaming DMD that is implemented in a 
-!     QR compressed form.
-      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
-         CALL DLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
-         CALL DLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
-      END IF    
-!
-!     The orthonormal/orthogonal factor Q in the initial QR 
-!     factorization is optionally returned in the array F. 
-!     Same as with the triangular factor above, this is 
-!     useful in a streaming DMD.
-      IF ( WANTQ ) THEN               ! Q overwrites F 
-         CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
-              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )  
-      END IF
-!      
-      RETURN
-!      
-      END SUBROUTINE DGEDMDQ
-    
+!> \brief \b DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
+!
+!  =========== DOCUMENTATION ===========
+!
+!  Definition:
+!  ===========
+!
+!     SUBROUTINE DGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+!                         WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+!                         LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
+!                         Z, LDZ, RES,  B,     LDB,   V, LDV,    &
+!                         S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
+!.....
+!     USE                   iso_fortran_env
+!     IMPLICIT NONE
+!     INTEGER, PARAMETER :: WP = real64
+!.....
+!     Scalar arguments
+!     CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+!                               JOBT, JOBF
+!     INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+!                               LDY, NRNK, LDZ, LDB, LDV,  &
+!                               LDS, LWORK,  LIWORK
+!     INTEGER,   INTENT(OUT) :: INFO,    K
+!     REAL(KIND=WP), INTENT(IN)    ::   TOL
+!     Array arguments
+!     REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+!     REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
+!                                     Z(LDZ,*), B(LDB,*),  &
+!                                     V(LDV,*), S(LDS,*)
+!     REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
+!                                     RES(*)
+!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+!     INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!............................................................
+!>    \par Purpose:
+!     =============
+!>    \verbatim
+!>     DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!>     a pair of data snapshot matrices, using a QR factorization
+!>     based compression of the data. For the input matrices
+!>     X and Y such that Y = A*X with an unaccessible matrix
+!>     A, DGEDMDQ computes a certain number of Ritz pairs of A using
+!>     the standard Rayleigh-Ritz extraction from a subspace of
+!>     range(X) that is determined using the leading left singular
+!>     vectors of X. Optionally, DGEDMDQ returns the residuals
+!>     of the computed Ritz pairs, the information needed for
+!>     a refinement of the Ritz vectors, or the eigenvectors of
+!>     the Exact DMD.
+!>     For further details see the references listed
+!>     below. For more details of the implementation see [3].
+!>    \endverbatim
+!............................................................
+!>    \par References:
+!     ================
+!>    \verbatim
+!>    [1] P. Schmid: Dynamic mode decomposition of numerical
+!>        and experimental data,
+!>        Journal of Fluid Mechanics 656, 5-28, 2010.
+!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!>        decompositions: analysis and enhancements,
+!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!>        Mode Decomposition I. Technical report. AIMDyn Inc.
+!>        and LAPACK Working Note 298.
+!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!>        Theory and Applications, Journal of Computational
+!>        Dynamics 1(2), 391 -421, 2014.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Developed and coded by Zlatko Drmac, Faculty of Science,
+!>    University of Zagreb;  drmac@math.hr
+!>    In cooperation with
+!>    AIMdyn Inc., Santa Barbara, CA.
+!>    and supported by
+!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
+!>    for Nonstationary Processes from Near-Term, Limited
+!>    Observational Data" Contract No: W31P4Q-21-C-0007
+!>    - DARPA PAI project "Physics-Informed Machine Learning
+!>    Methodologies" Contract No: HR0011-18-9-0033
+!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!>    Framework for Space-Time Analysis of Process Dynamics"
+!>    Contract No: HR0011-16-C-0116
+!>    Any opinions, findings and conclusions or recommendations
+!>    expressed in this material are those of the author and
+!>    do not necessarily reflect the views of the DARPA SBIR
+!>    Program Office.
+!>    \endverbatim
+!......................................................................
+!>    \par Distribution Statement A:
+!     ==============================
+!>    \verbatim
+!>    Approved for Public Release, Distribution Unlimited.
+!>    Cleared by DARPA on September 29, 2022
+!>    \endverbatim
+!......................................................................
+!     Arguments
+!     =========
+!
+!>    \param[in] JOBS
+!>    \verbatim
+!>    JOBS (input) CHARACTER*1
+!>    Determines whether the initial data snapshots are scaled
+!>    by a diagonal matrix. The data snapshots are the columns
+!>    of F. The leading N-1 columns of F are denoted X and the
+!>    trailing N-1 columns are denoted Y.
+!>    'S' :: The data snapshots matrices X and Y are multiplied
+!>           with a diagonal matrix D so that X*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'C' :: The snapshots are scaled as with the 'S' option.
+!>           If it is found that an i-th column of X is zero
+!>           vector and the corresponding i-th column of Y is
+!>           non-zero, then the i-th column of Y is set to
+!>           zero and a warning flag is raised.
+!>    'Y' :: The data snapshots matrices X and Y are multiplied
+!>           by a diagonal matrix D so that Y*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'N' :: No data scaling.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBZ
+!>    \verbatim
+!>    JOBZ (input) CHARACTER*1
+!>    Determines whether the eigenvectors (Koopman modes) will
+!>    be computed.
+!>    'V' :: The eigenvectors (Koopman modes) will be computed
+!>           and returned in the matrix Z.
+!>           See the description of Z.
+!>    'F' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product Z*V, where Z
+!>           is orthonormal and V contains the eigenvectors
+!>           of the corresponding Rayleigh quotient.
+!>           See the descriptions of F, V, Z.
+!>    'Q' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product Q*Z, where Z
+!>           contains the eigenvectors of the compression of the
+!>           underlying discretized operator onto the span of
+!>           the data snapshots. See the descriptions of F, V, Z.
+!>           Q is from the initial QR factorization.
+!>    'N' :: The eigenvectors are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBR
+!>    \verbatim
+!>    JOBR (input) CHARACTER*1
+!>    Determines whether to compute the residuals.
+!>    'R' :: The residuals for the computed eigenpairs will
+!>           be computed and stored in the array RES.
+!>           See the description of RES.
+!>           For this option to be legal, JOBZ must be 'V'.
+!>    'N' :: The residuals are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBQ
+!>    \verbatim
+!>    JOBQ (input) CHARACTER*1
+!>    Specifies whether to explicitly compute and return the
+!>    orthogonal matrix from the QR factorization.
+!>    'Q' :: The matrix Q of the QR factorization of the data
+!>           snapshot matrix is computed and stored in the
+!>           array F. See the description of F.
+!>    'N' :: The matrix Q is not explicitly computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBT
+!>    \verbatim
+!>    JOBT (input) CHARACTER*1
+!>    Specifies whether to return the upper triangular factor
+!>    from the QR factorization.
+!>    'R' :: The matrix R of the QR factorization of the data
+!>           snapshot matrix F is returned in the array Y.
+!>           See the description of Y and Further details.
+!>    'N' :: The matrix R is not returned.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBF
+!>    \verbatim
+!>    JOBF (input) CHARACTER*1
+!>    Specifies whether to store information needed for post-
+!>    processing (e.g. computing refined Ritz vectors)
+!>    'R' :: The matrix needed for the refinement of the Ritz
+!>           vectors is computed and stored in the array B.
+!>           See the description of B.
+!>    'E' :: The unscaled eigenvectors of the Exact DMD are
+!>           computed and returned in the array B. See the
+!>           description of B.
+!>    'N' :: No eigenvector refinement data is computed.
+!>    To be useful on exit, this option needs JOBQ='Q'.
+!>    \endverbatim
+!.....
+!>    \param[in] WHTSVD
+!>    \verbatim
+!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!>    Allows for a selection of the SVD algorithm from the
+!>    LAPACK library.
+!>    1 :: DGESVD (the QR SVD algorithm)
+!>    2 :: DGESDD (the Divide and Conquer algorithm; if enough
+!>         workspace available, this is the fastest option)
+!>    3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
+!>         are the most accurate options)
+!>    4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
+!>         are the most accurate options)
+!>    For the four methods above, a significant difference in
+!>    the accuracy of small singular values is possible if
+!>    the snapshots vary in norm so that X is severely
+!>    ill-conditioned. If small (smaller than EPS*||X||)
+!>    singular values are of interest and JOBS=='N',  then
+!>    the options (3, 4) give the most accurate results, where
+!>    the option 4 is slightly better and with stronger
+!>    theoretical background.
+!>    If JOBS=='S', i.e. the columns of X will be normalized,
+!>    then all methods give nearly equally accurate results.
+!>    \endverbatim
+!.....
+!>    \param[in] M
+!>    \verbatim
+!>    M (input) INTEGER, M >= 0
+!>    The state space dimension (the number of rows of F).
+!>    \endverbatim
+!.....
+!>    \param[in] N
+!>    \verbatim
+!>    N (input) INTEGER, 0 <= N <= M
+!>    The number of data snapshots from a single trajectory,
+!>    taken at equidistant discrete times. This is the
+!>    number of columns of F.
+!>    \endverbatim
+!.....
+!>    \param[in,out] F
+!>    \verbatim
+!>    F (input/output) REAL(KIND=WP) M-by-N array
+!>    > On entry,
+!>    the columns of F are the sequence of data snapshots
+!>    from a single trajectory, taken at equidistant discrete
+!>    times. It is assumed that the column norms of F are
+!>    in the range of the normalized floating point numbers.
+!>    < On exit,
+!>    If JOBQ == 'Q', the array F contains the orthogonal
+!>    matrix/factor of the QR factorization of the initial
+!>    data snapshots matrix F. See the description of JOBQ.
+!>    If JOBQ == 'N', the entries in F strictly below the main
+!>    diagonal contain, column-wise, the information on the
+!>    Householder vectors, as returned by DGEQRF. The
+!>    remaining information to restore the orthogonal matrix
+!>    of the initial QR factorization is stored in WORK(1:N).
+!>    See the description of WORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LDF
+!>    \verbatim
+!>    LDF (input) INTEGER, LDF >= M
+!>    The leading dimension of the array F.
+!>    \endverbatim
+!.....
+!>    \param[in,out] X
+!>    \verbatim
+!>    X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!>    X is used as workspace to hold representations of the
+!>    leading N-1 snapshots in the orthonormal basis computed
+!>    in the QR factorization of F.
+!>    On exit, the leading K columns of X contain the leading
+!>    K left singular vectors of the above described content
+!>    of X. To lift them to the space of the left singular
+!>    vectors U(:,1:K)of the input data, pre-multiply with the
+!>    Q factor from the initial QR factorization.
+!>    See the descriptions of F, K, V  and Z.
+!>    \endverbatim
+!.....
+!>    \param[in] LDX
+!>    \verbatim
+!>    LDX (input) INTEGER, LDX >= N
+!>    The leading dimension of the array X.
+!>    \endverbatim
+!.....
+!>    \param[in,out] Y
+!>    \verbatim
+!>    Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!>    Y is used as workspace to hold representations of the
+!>    trailing N-1 snapshots in the orthonormal basis computed
+!>    in the QR factorization of F.
+!>    On exit,
+!>    If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!>    triangular factor from the QR factorization of the data
+!>    snapshot matrix F.
+!>    \endverbatim
+!.....
+!>    \param[in] LDY
+!>    \verbatim
+!>    LDY (input) INTEGER , LDY >= N
+!>    The leading dimension of the array Y.
+!>    \endverbatim
+!.....
+!>    \param[in] NRNK
+!>    \verbatim
+!>    NRNK (input) INTEGER
+!>    Determines the mode how to compute the numerical rank,
+!>    i.e. how to truncate small singular values of the input
+!>    matrix X. On input, if
+!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(1)
+!>                 This option is recommended.
+!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(i-1)
+!>                 This option is included for R&D purposes.
+!>                 It requires highly accurate SVD, which
+!>                 may not be feasible.
+!>    The numerical rank can be enforced by using positive
+!>    value of NRNK as follows:
+!>    0 < NRNK <= N-1 :: at most NRNK largest singular values
+!>    will be used. If the number of the computed nonzero
+!>    singular values is less than NRNK, then only those
+!>    nonzero values will be used and the actually used
+!>    dimension is less than NRNK. The actual number of
+!>    the nonzero singular values is returned in the variable
+!>    K. See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] TOL
+!>     \verbatim
+!>     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!>     The tolerance for truncating small singular values.
+!>     See the description of NRNK.
+!>     \endverbatim
+!.....
+!>     \param[out] K
+!>     \verbatim
+!>     K (output) INTEGER,  0 <= K <= N
+!>     The dimension of the SVD/POD basis for the leading N-1
+!>     data snapshots (columns of F) and the number of the
+!>     computed Ritz pairs. The value of K is determined
+!>     according to the rule set by the parameters NRNK and
+!>     TOL. See the descriptions of NRNK and TOL.
+!>     \endverbatim
+!.....
+!>    \param[out] REIG
+!>    \verbatim
+!>    REIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!>    The leading K (K<=N) entries of REIG contain
+!>    the real parts of the computed eigenvalues
+!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!>    See the descriptions of K, IMEIG, Z.
+!>    \endverbatim
+!.....
+!>    \param[out] IMEIG
+!>    \verbatim
+!>    IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!>    The leading K (K<N) entries of REIG contain
+!>    the imaginary parts of the computed eigenvalues
+!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!>    The eigenvalues are determined as follows:
+!>    If IMEIG(i) == 0, then the corresponding eigenvalue is
+!>    real, LAMBDA(i) = REIG(i).
+!>    If IMEIG(i)>0, then the corresponding complex
+!>    conjugate pair of eigenvalues reads
+!>    LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!>    LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!>    That is, complex conjugate pairs have consequtive
+!>    indices (i,i+1), with the positive imaginary part
+!>    listed first.
+!>    See the descriptions of K, REIG, Z.
+!>    \endverbatim
+!.....
+!>    \param[out] Z
+!>    \verbatim
+!>    Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
+!>    If JOBZ =='V' then
+!>       Z contains real Ritz vectors as follows:
+!>       If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!>       the i-th Ritz value.
+!>       If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!>       [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!>       the Ritz values extracted from this subspace are
+!>       REIG(i) + sqrt(-1)*IMEIG(i) and
+!>       REIG(i) - sqrt(-1)*IMEIG(i).
+!>       The corresponding eigenvectors are
+!>       Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!>       Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!>    If JOBZ == 'F', then the above descriptions hold for
+!>    the columns of Z*V, where the columns of V are the
+!>    eigenvectors of the K-by-K Rayleigh quotient, and Z is
+!>    orthonormal. The columns of V are similarly structured:
+!>    If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if
+!>    IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
+!>                      Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
+!>    are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!>    See the descriptions of REIG, IMEIG, X and V.
+!>    \endverbatim
+!.....
+!>    \param[in] LDZ
+!>    \verbatim
+!>    LDZ (input) INTEGER , LDZ >= M
+!>    The leading dimension of the array Z.
+!>    \endverbatim
+!.....
+!>    \param[out] RES
+!>    \verbatim
+!>    RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!>    RES(1:K) contains the residuals for the K computed
+!>    Ritz pairs.
+!>    If LAMBDA(i) is real, then
+!>       RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!>    If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!>    then
+!>    RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!>    where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!>              [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!>    It holds that
+!>    RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!>    RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!>    where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!>          ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!>    See the description of Z.
+!>    \endverbatim
+!.....
+!>    \param[out] B
+!>    \verbatim
+!>    B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!>    IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!>    be used for computing the refined vectors; see further
+!>    details in the provided references.
+!>    If JOBF == 'E', B(1:N,1;K) contains
+!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!>    Exact DMD, up to scaling by the inverse eigenvalues.
+!>    In both cases, the content of B can be lifted to the
+!>    original dimension of the input data by pre-multiplying
+!>    with the Q factor from the initial QR factorization.
+!>    Here A denotes a compression of the underlying operator.
+!>    See the descriptions of F and X.
+!>    If JOBF =='N', then B is not referenced.
+!>    \endverbatim
+!.....
+!>    \param[in] LDB
+!>    \verbatim
+!>    LDB (input) INTEGER, LDB >= MIN(M,N)
+!>    The leading dimension of the array B.
+!>    \endverbatim
+!.....
+!>    \param[out] V
+!>    \verbatim
+!>    V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!>    On exit, V(1:K,1:K) contains the K eigenvectors of
+!>    the Rayleigh quotient. The eigenvectors of a complex
+!>    conjugate pair of eigenvalues are returned in real form
+!>    as explained in the description of Z. The Ritz vectors
+!>    (returned in Z) are the product of X and V; see
+!>    the descriptions of X and Z.
+!>    \endverbatim
+!.....
+!>    \param[in] LDV
+!>    \verbatim
+!>    LDV (input) INTEGER, LDV >= N-1
+!>    The leading dimension of the array V.
+!>    \endverbatim
+!.....
+!>    \param[out] S
+!>    \verbatim
+!>    S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!>    The array S(1:K,1:K) is used for the matrix Rayleigh
+!>    quotient. This content is overwritten during
+!>    the eigenvalue decomposition by DGEEV.
+!>    See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDS
+!>    \verbatim
+!>    LDS (input) INTEGER, LDS >= N-1
+!>    The leading dimension of the array S.
+!>    \endverbatim
+!.....
+!>    \param[out] WORK
+!>    \verbatim
+!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!>    On exit,
+!>    WORK(1:MIN(M,N)) contains the scalar factors of the
+!>    elementary reflectors as returned by DGEQRF of the
+!>    M-by-N input matrix F.
+!>    WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of
+!>    the input submatrix F(1:M,1:N-1).
+!>    If the call to DGEDMDQ is only workspace query, then
+!>    WORK(1) contains the minimal workspace length and
+!>    WORK(2) is the optimal workspace length. Hence, the
+!>    length of work is at least 2.
+!>    See the description of LWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LWORK
+!>    \verbatim
+!>    LWORK (input) INTEGER
+!>    The minimal length of the  workspace vector WORK.
+!>    LWORK is calculated as follows:
+!>    Let MLWQR  = N (minimal workspace for DGEQRF[M,N])
+!>        MLWDMD = minimal workspace for DGEDMD (see the
+!>                 description of LWORK in DGEDMD) for
+!>                 snapshots of dimensions MIN(M,N)-by-(N-1)
+!>        MLWMQR = N (minimal workspace for
+!>                   DORMQR['L','N',M,N,N])
+!>        MLWGQR = N (minimal workspace for DORGQR[M,N,N])
+!>    Then
+!>    LWORK = MAX(N+MLWQR, N+MLWDMD)
+!>    is updated as follows:
+!>       if   JOBZ == 'V' or JOBZ == 'F' THEN
+!>            LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR )
+!>       if   JOBQ == 'Q' THEN
+!>            LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR)
+!>    If on entry LWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] IWORK
+!>    \verbatim
+!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!>    Workspace that is required only if WHTSVD equals
+!>    2 , 3 or 4. (See the description of WHTSVD).
+!>    If on entry LWORK =-1 or LIWORK=-1, then the
+!>    minimal length of IWORK is computed and returned in
+!>    IWORK(1). See the description of LIWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LIWORK
+!>    \verbatim
+!>    LIWORK (input) INTEGER
+!>    The minimal length of the workspace vector IWORK.
+!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!>    Let M1=MIN(M,N), N1=N-1. Then
+!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!>    If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!>    If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!>    If on entry LIWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] INFO
+!>    \verbatim
+!>    INFO (output) INTEGER
+!>    -i < 0 :: On entry, the i-th argument had an
+!>              illegal value
+!>       = 0 :: Successful return.
+!>       = 1 :: Void input. Quick exit (M=0 or N=0).
+!>       = 2 :: The SVD computation of X did not converge.
+!>              Suggestion: Check the input data and/or
+!>              repeat with different WHTSVD.
+!>       = 3 :: The computation of the eigenvalues did not
+!>              converge.
+!>       = 4 :: If data scaling was requested on input and
+!>              the procedure found inconsistency in the data
+!>              such that for some column index i,
+!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!>              to zero if JOBS=='C'. The computation proceeds
+!>              with original or modified data and warning
+!>              flag is set with INFO=4.
+!>    \endverbatim
+!
+!  Authors:
+!  ========
+!
+!> \author Zlatko Drmac
+!
+!> \ingroup gedmd
+!
+!.............................................................
+!.............................................................
+SUBROUTINE DGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    &
+                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
+!
+!  -- LAPACK driver routine                                           --
+!
+!  -- LAPACK is a software package provided by University of          --
+!  -- Tennessee, University of California Berkeley, University of     --
+!  -- Colorado Denver and NAG Ltd..                                   --
+!
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+!
+!     Scalar arguments
+!     ~~~~~~~~~~~~~~~~
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LWORK,  LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,    K
+      REAL(KIND=WP), INTENT(IN)    ::   TOL
+!
+!     Array arguments
+!     ~~~~~~~~~~~~~~~
+      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
+                                      Z(LDZ,*), B(LDB,*),  &
+                                      V(LDV,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
+                           MLWMQR, MLWORK, MLWQR,  MINMN,  &
+                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
+                           OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!
+!     Local array
+!     ~~~~~~~~~~~
+      REAL(KIND=WP) :: RDUMMY(2)
+!
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      DGEMM
+      EXTERNAL      DGEDMD, DGEQRF, DLACPY, DLASET, DORGQR, &
+                    DORMQR, XERBLA
+!
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT
+ !..........................................................
+ !
+ !    Test the input arguments
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')
+      MINMN  = MIN(M,N)
+      INFO = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    &
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+          INFO = -4
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -6
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    &
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -22
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -25
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -27
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -29
+      END IF
+!
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for DGEQRF.
+         MLWORK = MINMN + MLWQR
+         IF ( LQUERY ) THEN
+             CALL DGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
+                          INFO1 )
+             OLWQR  = INT(RDUMMY(1))
+             OLWORK = MIN(M,N) + OLWQR
+         END IF
+         CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,&
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     &
+                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     &
+                      V, LDV, S, LDS, WORK, -1, IWORK,       &
+                      LIWORK, INFO1 )
+         MLWDMD = INT(WORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         IMINWR = IWORK(1)
+         IF ( LQUERY ) THEN
+             OLWDMD = INT(WORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N)
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL DORMQR( 'L','N', M, N, MINMN, F, LDF,  &
+                            WORK, Z, LDZ, WORK, -1, INFO1 )
+               OLWMQR = INT(WORK(1))
+               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
+            END IF
+         END IF
+         IF ( WANTQ ) THEN
+            MLWGQR = N
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
+            IF ( LQUERY ) THEN
+                CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+                             WORK, -1, INFO1 )
+                OLWGQR = INT(WORK(1))
+                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
+            END IF
+         END IF
+         IMINWR = MAX( 1, IMINWR )
+         MLWORK = MAX( 2, MLWORK )
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
+      END IF
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'DGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF
+!.....
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place
+!     one can use an out of core QRF.
+!
+      CALL DGEQRF( M, N, F, LDF, WORK,               &
+                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
+!
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL DLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
+      CALL DLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL DLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL DLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
+                       Y(3,1), LDY )
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)
+      CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
+                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
+                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
+                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, &
+                   IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See DGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF
+!
+!     The Ritz vectors (Koopman modes) can be explicitly
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.
+        IF ( M > MINMN ) CALL DLASET( 'A', M-MINMN, K, ZERO, &
+                                     ZERO, Z(MINMN+1,1), LDZ )
+        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      ELSE IF ( WNTVCF ) THEN
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and
+        !   the SVD/POD_basis returned by DGEDMD in X) and the
+        !   second factor (the eigenvectors of the Rayleigh
+        !   quotient) is in the array V, as returned by DGEDMD.
+        CALL DLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL DLASET( 'A', M-N, K, ZERO, ZERO,   &
+                                  Z(N+1,1), LDZ )
+        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      END IF
+!
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to DGEDMDQ is to be
+!     followed by a streaming DMD that is implemented in a
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y
+         CALL DLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
+         CALL DLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
+      END IF
+!
+!     The orthonormal/orthogonal factor Q in the initial QR
+!     factorization is optionally returned in the array F.
+!     Same as with the triangular factor above, this is
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN               ! Q overwrites F
+         CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      END IF
+!
+      RETURN
+!
+      END SUBROUTINE DGEDMDQ
diff --git a/SRC/sgedmd.f90 b/SRC/sgedmd.f90
index 4860e88983..90d15c3360 100644
--- a/SRC/sgedmd.f90
+++ b/SRC/sgedmd.f90
@@ -1,1206 +1,1205 @@
-!> \brief \b SGEDMD computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
-!
-!  =========== DOCUMENTATION ===========
-!
-!  Definition:
-!  ===========
-!
-!     SUBROUTINE SGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
-!                        M, N, X, LDX, Y, LDY, NRNK, TOL,  &
-!                        K, REIG,  IMEIG,   Z, LDZ,  RES,  &
-!                        B, LDB, W,  LDW,   S, LDS,        &
-!                        WORK, LWORK, IWORK, LIWORK, INFO )
-!.....
-!     USE                   iso_fortran_env
-!     IMPLICIT NONE
-!     INTEGER, PARAMETER :: WP = real32
-!.....
-!     Scalar arguments
-!     CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
-!     INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
-!                                NRNK, LDZ, LDB, LDW,  LDS, &
-!                                LWORK,  LIWORK
-!     INTEGER,   INTENT(OUT)  :: K, INFO
-!     REAL(KIND=WP), INTENT(IN) ::  TOL
-!     Array arguments
-!     REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
-!     REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
-!                                     W(LDW,*), S(LDS,*)
-!     REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
-!                                     RES(*)
-!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
-!     INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!............................................................
-!>    \par Purpose:
-!     =============
-!>    \verbatim
-!>    SGEDMD computes the Dynamic Mode Decomposition (DMD) for
-!>    a pair of data snapshot matrices. For the input matrices
-!>    X and Y such that Y = A*X with an unaccessible matrix
-!>    A, SGEDMD computes a certain number of Ritz pairs of A using
-!>    the standard Rayleigh-Ritz extraction from a subspace of
-!>    range(X) that is determined using the leading left singular
-!>    vectors of X. Optionally, SGEDMD returns the residuals
-!>    of the computed Ritz pairs, the information needed for
-!>    a refinement of the Ritz vectors, or the eigenvectors of
-!>    the Exact DMD.
-!>    For further details see the references listed
-!>    below. For more details of the implementation see [3].
-!>    \endverbatim
-!............................................................
-!>    \par References:
-!     ================
-!>    \verbatim
-!>    [1] P. Schmid: Dynamic mode decomposition of numerical
-!>        and experimental data,
-!>        Journal of Fluid Mechanics 656, 5-28, 2010.
-!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
-!>        decompositions: analysis and enhancements,
-!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
-!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
-!>        Mode Decomposition I. Technical report. AIMDyn Inc.
-!>        and LAPACK Working Note 298.
-!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
-!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
-!>        Theory and Applications, Journal of Computational
-!>        Dynamics 1(2), 391 -421, 2014.
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Developed and coded by Zlatko Drmac, Faculty of Science,
-!>    University of Zagreb;  drmac@math.hr
-!>    In cooperation with
-!>    AIMdyn Inc., Santa Barbara, CA.
-!>    and supported by
-!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
-!>    for Nonstationary Processes from Near-Term, Limited
-!>    Observational Data" Contract No: W31P4Q-21-C-0007
-!>    - DARPA PAI project "Physics-Informed Machine Learning
-!>    Methodologies" Contract No: HR0011-18-9-0033
-!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
-!>    Framework for Space-Time Analysis of Process Dynamics"
-!>    Contract No: HR0011-16-C-0116
-!>    Any opinions, findings and conclusions or recommendations
-!>    expressed in this material are those of the author and
-!>    do not necessarily reflect the views of the DARPA SBIR
-!>    Program Office
-!>    \endverbatim
-!......................................................................
-!>    \par Distribution Statement A:
-!     ==============================
-!>    \verbatim
-!>    Distribution Statement A:
-!>    Approved for Public Release, Distribution Unlimited.
-!>    Cleared by DARPA on September 29, 2022
-!>    \endverbatim
-!============================================================
-!     Arguments
-!     =========
-!
-!>    \param[in] JOBS
-!>    \verbatim
-!>    JOBS (input) CHARACTER*1
-!>    Determines whether the initial data snapshots are scaled
-!>    by a diagonal matrix.
-!>    'S' :: The data snapshots matrices X and Y are multiplied
-!>           with a diagonal matrix D so that X*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'C' :: The snapshots are scaled as with the 'S' option.
-!>           If it is found that an i-th column of X is zero
-!>           vector and the corresponding i-th column of Y is
-!>           non-zero, then the i-th column of Y is set to
-!>           zero and a warning flag is raised.
-!>    'Y' :: The data snapshots matrices X and Y are multiplied
-!>           by a diagonal matrix D so that Y*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'N' :: No data scaling.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBZ
-!>    \verbatim
-!>    JOBZ (input) CHARACTER*1
-!>    Determines whether the eigenvectors (Koopman modes) will
-!>    be computed.
-!>    'V' :: The eigenvectors (Koopman modes) will be computed
-!>           and returned in the matrix Z.
-!>           See the description of Z.
-!>    'F' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product X(:,1:K)*W, where X
-!>           contains a POD basis (leading left singular vectors
-!>           of the data matrix X) and W contains the eigenvectors
-!>           of the corresponding Rayleigh quotient.
-!>           See the descriptions of K, X, W, Z.
-!>    'N' :: The eigenvectors are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBR
-!>    \verbatim
-!>    JOBR (input) CHARACTER*1
-!>    Determines whether to compute the residuals.
-!>    'R' :: The residuals for the computed eigenpairs will be
-!>           computed and stored in the array RES.
-!>           See the description of RES.
-!>           For this option to be legal, JOBZ must be 'V'.
-!>    'N' :: The residuals are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBF
-!>    \verbatim
-!>    JOBF (input) CHARACTER*1
-!>    Specifies whether to store information needed for post-
-!>    processing (e.g. computing refined Ritz vectors)
-!>    'R' :: The matrix needed for the refinement of the Ritz
-!>           vectors is computed and stored in the array B.
-!>           See the description of B.
-!>    'E' :: The unscaled eigenvectors of the Exact DMD are
-!>           computed and returned in the array B. See the
-!>           description of B.
-!>    'N' :: No eigenvector refinement data is computed.
-!>    \endverbatim
-!.....
-!>    \param[in] WHTSVD
-!>    \verbatim
-!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
-!>    Allows for a selection of the SVD algorithm from the
-!>    LAPACK library.
-!>    1 :: SGESVD (the QR SVD algorithm)
-!>    2 :: SGESDD (the Divide and Conquer algorithm; if enough
-!>         workspace available, this is the fastest option)
-!>    3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
-!>         are the most accurate options)
-!>    4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
-!>         are the most accurate options)
-!>    For the four methods above, a significant difference in
-!>    the accuracy of small singular values is possible if
-!>    the snapshots vary in norm so that X is severely
-!>    ill-conditioned. If small (smaller than EPS*||X||)
-!>    singular values are of interest and JOBS=='N',  then
-!>    the options (3, 4) give the most accurate results, where
-!>    the option 4 is slightly better and with stronger
-!>    theoretical background.
-!>    If JOBS=='S', i.e. the columns of X will be normalized,
-!>    then all methods give nearly equally accurate results.
-!>    \endverbatim
-!.....
-!>    \param[in] M
-!>    \verbatim
-!>    M (input) INTEGER, M>= 0
-!>    The state space dimension (the row dimension of X, Y).
-!>    \endverbatim
-!.....
-!>    \param[in] N
-!>    \verbatim
-!>    N (input) INTEGER, 0 <= N <= M
-!>    The number of data snapshot pairs
-!>    (the number of columns of X and Y).
-!>    \endverbatim
-!.....
-!>    \param[in,out] X
-!>    \verbatim
-!>    X (input/output) REAL(KIND=WP) M-by-N array
-!>    > On entry, X contains the data snapshot matrix X. It is
-!>    assumed that the column norms of X are in the range of
-!>    the normalized floating point numbers.
-!>    < On exit, the leading K columns of X contain a POD basis,
-!>    i.e. the leading K left singular vectors of the input
-!>    data matrix X, U(:,1:K). All N columns of X contain all
-!>    left singular vectors of the input matrix X.
-!>    See the descriptions of K, Z and W.
-!>    \endverbatim
-!.....
-!>    \param[in] LDX
-!>    \verbatim
-!>    LDX (input) INTEGER, LDX >= M
-!>    The leading dimension of the array X.
-!>    \endverbatim
-!.....
-!>    \param[in,out] Y
-!>    \verbatim
-!>    Y (input/workspace/output) REAL(KIND=WP) M-by-N array
-!>    > On entry, Y contains the data snapshot matrix Y
-!>    < On exit,
-!>    If JOBR == 'R', the leading K columns of Y  contain
-!>    the residual vectors for the computed Ritz pairs.
-!>    See the description of RES.
-!>    If JOBR == 'N', Y contains the original input data,
-!>                    scaled according to the value of JOBS.
-!>    \endverbatim
-!.....
-!>    \param[in] LDY
-!>    \verbatim
-!>    LDY (input) INTEGER , LDY >= M
-!>    The leading dimension of the array Y.
-!>    \endverbatim
-!.....
-!>    \param[in] NRNK
-!>    \verbatim
-!>    NRNK (input) INTEGER
-!>    Determines the mode how to compute the numerical rank,
-!>    i.e. how to truncate small singular values of the input
-!>    matrix X. On input, if
-!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(1)
-!>                 This option is recommended.
-!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(i-1)
-!>                 This option is included for R&D purposes.
-!>                 It requires highly accurate SVD, which
-!>                 may not be feasible.
-!>    The numerical rank can be enforced by using positive
-!>    value of NRNK as follows:
-!>    0 < NRNK <= N :: at most NRNK largest singular values
-!>    will be used. If the number of the computed nonzero
-!>    singular values is less than NRNK, then only those
-!>    nonzero values will be used and the actually used
-!>    dimension is less than NRNK. The actual number of
-!>    the nonzero singular values is returned in the variable
-!>    K. See the descriptions of TOL and  K.
-!>    \endverbatim
-!.....
-!>    \param[in] TOL
-!>    \verbatim
-!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
-!>    The tolerance for truncating small singular values.
-!>    See the description of NRNK.
-!>    \endverbatim
-!.....
-!>    \param[out] K
-!>    \verbatim
-!>    K (output) INTEGER,  0 <= K <= N
-!>    The dimension of the POD basis for the data snapshot
-!>    matrix X and the number of the computed Ritz pairs.
-!>    The value of K is determined according to the rule set
-!>    by the parameters NRNK and TOL.
-!>    See the descriptions of NRNK and TOL.
-!>    \endverbatim
-!.....
-!>    \param[out] REIG
-!>    \verbatim
-!>    REIG (output) REAL(KIND=WP) N-by-1 array
-!>    The leading K (K<=N) entries of REIG contain
-!>    the real parts of the computed eigenvalues
-!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
-!>    See the descriptions of K, IMEIG, and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] IMEIG
-!>    \verbatim
-!>    IMEIG (output) REAL(KIND=WP) N-by-1 array
-!>    The leading K (K<=N) entries of IMEIG contain
-!>    the imaginary parts of the computed eigenvalues
-!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
-!>    The eigenvalues are determined as follows:
-!>    If IMEIG(i) == 0, then the corresponding eigenvalue is
-!>    real, LAMBDA(i) = REIG(i).
-!>    If IMEIG(i)>0, then the corresponding complex
-!>    conjugate pair of eigenvalues reads
-!>    LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
-!>    LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
-!>    That is, complex conjugate pairs have consecutive
-!>    indices (i,i+1), with the positive imaginary part
-!>    listed first.
-!>    See the descriptions of K, REIG, and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] Z
-!>    \verbatim
-!>    Z (workspace/output) REAL(KIND=WP)  M-by-N array
-!>    If JOBZ =='V' then
-!>       Z contains real Ritz vectors as follows:
-!>       If IMEIG(i)=0, then Z(:,i) is an eigenvector of
-!>       the i-th Ritz value; ||Z(:,i)||_2=1.
-!>       If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
-!>       [Z(:,i) Z(:,i+1)] span an invariant subspace and
-!>       the Ritz values extracted from this subspace are
-!>       REIG(i) + sqrt(-1)*IMEIG(i) and
-!>       REIG(i) - sqrt(-1)*IMEIG(i).
-!>       The corresponding eigenvectors are
-!>       Z(:,i) + sqrt(-1)*Z(:,i+1) and
-!>       Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
-!>       || Z(:,i:i+1)||_F = 1.
-!>    If JOBZ == 'F', then the above descriptions hold for
-!>    the columns of X(:,1:K)*W(1:K,1:K), where the columns
-!>    of W(1:k,1:K) are the computed eigenvectors of the
-!>    K-by-K Rayleigh quotient. The columns of W(1:K,1:K)
-!>    are similarly structured: If IMEIG(i) == 0 then
-!>    X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0
-!>    then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and
-!>         X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1)
-!>    are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
-!>    See the descriptions of REIG, IMEIG, X and W.
-!>    \endverbatim
-!.....
-!>    \param[in] LDZ
-!>    \verbatim
-!>    LDZ (input) INTEGER , LDZ >= M
-!>    The leading dimension of the array Z.
-!>    \endverbatim
-!.....
-!>    \param[out] RES
-!>    \verbatim
-!>    RES (output) REAL(KIND=WP) N-by-1 array
-!>    RES(1:K) contains the residuals for the K computed
-!>    Ritz pairs.
-!>    If LAMBDA(i) is real, then
-!>       RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
-!>    If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
-!>    then
-!>    RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
-!>    where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
-!>              [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
-!>    It holds that
-!>    RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
-!>    RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
-!>    where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
-!>          ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
-!>    See the description of REIG, IMEIG and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] B
-!>    \verbatim
-!>    B (output) REAL(KIND=WP)  M-by-N array.
-!>    IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
-!>    be used for computing the refined vectors; see further
-!>    details in the provided references.
-!>    If JOBF == 'E', B(1:M,1;K) contains
-!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
-!>    Exact DMD, up to scaling by the inverse eigenvalues.
-!>    If JOBF =='N', then B is not referenced.
-!>    See the descriptions of X, W, K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDB
-!>    \verbatim
-!>    LDB (input) INTEGER, LDB >= M
-!>    The leading dimension of the array B.
-!>    \endverbatim
-!.....
-!>    \param[out] W
-!>    \verbatim
-!>    W (workspace/output) REAL(KIND=WP) N-by-N array
-!>    On exit, W(1:K,1:K) contains the K computed
-!>    eigenvectors of the matrix Rayleigh quotient (real and
-!>    imaginary parts for each complex conjugate pair of the
-!>    eigenvalues). The Ritz vectors (returned in Z) are the
-!>    product of X (containing a POD basis for the input
-!>    matrix X) and W. See the descriptions of K, S, X and Z.
-!>    W is also used as a workspace to temporarily store the
-!>    left singular vectors of X.
-!>    \endverbatim
-!.....
-!>    \param[in] LDW
-!>    \verbatim
-!>    LDW (input) INTEGER, LDW >= N
-!>    The leading dimension of the array W.
-!>    \endverbatim
-!.....
-!>    \param[out] S
-!>    \verbatim
-!>    S (workspace/output) REAL(KIND=WP) N-by-N array
-!>    The array S(1:K,1:K) is used for the matrix Rayleigh
-!>    quotient. This content is overwritten during
-!>    the eigenvalue decomposition by SGEEV.
-!>    See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDS
-!>    \verbatim
-!>    LDS (input) INTEGER, LDS >= N
-!>    The leading dimension of the array S.
-!>    \endverbatim
-!.....
-!>    \param[out] WORK
-!>    \verbatim
-!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
-!>    On exit, WORK(1:N) contains the singular values of
-!>    X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
-!>    If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain
-!>    scaling factor WORK(N+2)/WORK(N+1) used to scale X
-!>    and Y to avoid overflow in the SVD of X.
-!>    This may be of interest if the scaling option is off
-!>    and as many as possible smallest eigenvalues are
-!>    desired to the highest feasible accuracy.
-!>    If the call to SGEDMD is only workspace query, then
-!>    WORK(1) contains the minimal workspace length and
-!>    WORK(2) is the optimal workspace length. Hence, the
-!>    length of work is at least 2.
-!>    See the description of LWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LWORK
-!>    \verbatim
-!>    LWORK (input) INTEGER
-!>    The minimal length of the workspace vector WORK.
-!>    LWORK is calculated as follows:
-!>    If WHTSVD == 1 ::
-!>       If JOBZ == 'V', then
-!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)).
-!>       If JOBZ == 'N'  then
-!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)).
-!>       Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal
-!>       workspace length of SGESVD.
-!>    If WHTSVD == 2 ::
-!>       If JOBZ == 'V', then
-!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N))
-!>       If JOBZ == 'N', then
-!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N))
-!>       Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the
-!>       minimal workspace length of SGESDD.
-!>    If WHTSVD == 3 ::
-!>       If JOBZ == 'V', then
-!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
-!>       If JOBZ == 'N', then
-!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
-!>       Here LWORK_SVD = N+M+MAX(3*N+1,
-!>                       MAX(1,3*N+M,5*N),MAX(1,N))
-!>       is the minimal workspace length of SGESVDQ.
-!>    If WHTSVD == 4 ::
-!>       If JOBZ == 'V', then
-!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
-!>       If JOBZ == 'N', then
-!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
-!>       Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the
-!>       minimal workspace length of SGEJSV.
-!>    The above expressions are not simplified in order to
-!>    make the usage of WORK more transparent, and for
-!>    easier checking. In any case, LWORK >= 2.
-!>    If on entry LWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] IWORK
-!>    \verbatim
-!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
-!>    Workspace that is required only if WHTSVD equals
-!>    2 , 3 or 4. (See the description of WHTSVD).
-!>    If on entry LWORK =-1 or LIWORK=-1, then the
-!>    minimal length of IWORK is computed and returned in
-!>    IWORK(1). See the description of LIWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LIWORK
-!>    \verbatim
-!>    LIWORK (input) INTEGER
-!>    The minimal length of the workspace vector IWORK.
-!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
-!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
-!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
-!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
-!>    If on entry LIWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] INFO
-!>    \verbatim
-!>    INFO (output) INTEGER
-!>    -i < 0 :: On entry, the i-th argument had an
-!>              illegal value
-!>       = 0 :: Successful return.
-!>       = 1 :: Void input. Quick exit (M=0 or N=0).
-!>       = 2 :: The SVD computation of X did not converge.
-!>              Suggestion: Check the input data and/or
-!>              repeat with different WHTSVD.
-!>       = 3 :: The computation of the eigenvalues did not
-!>              converge.
-!>       = 4 :: If data scaling was requested on input and
-!>              the procedure found inconsistency in the data
-!>              such that for some column index i,
-!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
-!>              to zero if JOBS=='C'. The computation proceeds
-!>              with original or modified data and warning
-!>              flag is set with INFO=4.
-!>    \endverbatim
-!
-!  Authors:
-!  ========
-!
-!> \author Zlatko Drmac
-!
-!> \ingroup gedmd
-!
-!.............................................................
-!.............................................................
-      SUBROUTINE SGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
-                         M, N, X, LDX, Y, LDY, NRNK, TOL,  &
-                         K, REIG,  IMEIG,   Z, LDZ,  RES,  &
-                         B, LDB, W,  LDW,   S, LDS,        &
-                         WORK, LWORK, IWORK, LIWORK, INFO )
-!
-!  -- LAPACK driver routine                                           --
-!
-!  -- LAPACK is a software package provided by University of          --
-!  -- Tennessee, University of California Berkeley, University of     --
-!  -- Colorado Denver and NAG Ltd..                                   --
-!
-!.....
-      USE                   iso_fortran_env
-      IMPLICIT NONE
-      INTEGER, PARAMETER :: WP = real32
-!
-!     Scalar arguments
-!     ~~~~~~~~~~~~~~~~
-      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
-      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
-                                 NRNK, LDZ, LDB, LDW,  LDS, &
-                                 LWORK,  LIWORK
-      INTEGER,   INTENT(OUT)  :: K, INFO
-      REAL(KIND=WP), INTENT(IN) ::  TOL
-!
-!     Array arguments
-!     ~~~~~~~~~~~~~~~
-      REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
-      REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
-                                      W(LDW,*), S(LDS,*)
-      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
-                                      RES(*)
-      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
-      INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!     Parameters
-!     ~~~~~~~~~~
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-!
-!     Local scalars
-!     ~~~~~~~~~~~~~
-      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
-                       SSUM,  XSCL1,  XSCL2
-      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,  &
-                       LWRKEV, LWRSDD, LWRSVD, &
-                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
-                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
-                       OLWORK
-      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
-                        WNTEX, WNTREF, WNTRES, WNTVEC
-      CHARACTER     ::  JOBZL, T_OR_N
-      CHARACTER     ::  JSVOPT
-!
-!     Local arrays
-!     ~~~~~~~~~~~~
-      REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2)
-!
-!     External functions (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~
-      REAL(KIND=WP) SLANGE, SLAMCH, SNRM2
-      EXTERNAL      SLANGE, SLAMCH, SNRM2, ISAMAX
-      INTEGER       ISAMAX
-      LOGICAL       SISNAN, LSAME
-      EXTERNAL      SISNAN, LSAME
-!
-!     External subroutines (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~~~~
-      EXTERNAL      SAXPY,  SGEMM,  SSCAL
-      EXTERNAL      SGEEV,  SGEJSV, SGESDD, SGESVD, SGESVDQ, &
-                    SLACPY, SLASCL, SLASSQ, XERBLA
-!
-!     Intrinsic functions
-!     ~~~~~~~~~~~~~~~~~~~
-      INTRINSIC     INT, FLOAT, MAX, SQRT
-!............................................................
-!
-!    Test the input arguments
-!
-      WNTRES = LSAME(JOBR,'R')
-      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
-      SCCOLY = LSAME(JOBS,'Y')
-      WNTVEC = LSAME(JOBZ,'V')
-      WNTREF = LSAME(JOBF,'R')
-      WNTEX  = LSAME(JOBF,'E')
-      INFO   = 0
-      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
-!
-      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
-                                  LSAME(JOBS,'N')) )   THEN
-          INFO = -1
-      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
-                              .OR. LSAME(JOBZ,'F')) )  THEN
-          INFO = -2
-      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
-                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
-          INFO = -3
-      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
-                LSAME(JOBF,'N') ) )                    THEN
-          INFO = -4
-      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
-                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
-          INFO = -5
-      ELSE IF ( M < 0 )   THEN
-          INFO = -6
-      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
-          INFO = -7
-      ELSE IF ( LDX < M ) THEN
-          INFO = -9
-      ELSE IF ( LDY < M ) THEN
-          INFO = -11
-      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
-                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
-          INFO = -12
-      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
-          INFO = -13
-      ELSE IF ( LDZ < M ) THEN
-          INFO = -18
-      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
-          INFO = -21
-      ELSE IF ( LDW < N ) THEN
-          INFO = -23
-      ELSE IF ( LDS < N ) THEN
-          INFO = -25
-      END IF
-!
-      IF ( INFO == 0 ) THEN
-          ! Compute the minimal and the optimal workspace
-          ! requirements. Simulate running the code and
-          ! determine minimal and optimal sizes of the
-          ! workspace at any moment of the run.
-         IF ( N == 0 ) THEN
-             ! Quick return. All output except K is void.
-             ! INFO=1 signals the void input.
-             ! In case of a workspace query, the default
-             ! minimal workspace lengths are returned.
-            IF ( LQUERY ) THEN
-                IWORK(1) = 1
-                WORK(1)  = 2
-                WORK(2)  = 2
-            ELSE
-               K = 0
-            END IF
-            INFO = 1
-            RETURN
-         END IF
-         MLWORK = MAX(2,N)
-         OLWORK = MAX(2,N)
-         IMINWR = 1
-         SELECT CASE ( WHTSVD )
-         CASE (1)
-             ! The following is specified as the minimal
-             ! length of WORK in the definition of SGESVD:
-             ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
-             MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
-             MLWORK = MAX(MLWORK,N + MWRSVD)
-             IF ( LQUERY ) THEN
-                CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, &
-                           B, LDB, W, LDW, RDUMMY, -1, INFO1 )
-                LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) )
-                OLWORK = MAX(OLWORK,N + LWRSVD)
-             END IF
-         CASE (2)
-             ! The following is specified as the minimal
-             ! length of WORK in the definition of SGESDD:
-             ! MWRSDD = 3*MIN(M,N)*MIN(M,N) +
-             ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
-             ! IMINWR = 8*MIN(M,N)
-             MWRSDD = 3*MIN(M,N)*MIN(M,N) +                &
-              MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
-             MLWORK = MAX(MLWORK,N + MWRSDD)
-             IMINWR = 8*MIN(M,N)
-             IF ( LQUERY ) THEN
-                CALL SGESDD( 'O', M, N, X, LDX, WORK, B,     &
-                     LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 )
-                LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) )
-                OLWORK = MAX(OLWORK,N + LWRSDD)
-             END IF
-         CASE (3)
-             !LWQP3 = 3*N+1
-             !LWORQ = MAX(N, 1)
-             !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
-             !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ )+ MAX(M,2)
-             !MLWORK = N + MWRSVQ
-             !IMINWR = M+N-1
-             CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
-                          X, LDX, WORK, Z, LDZ, W, LDW,   &
-                             NUMRNK, IWORK, -1, RDUMMY,   &
-                             -1, RDUMMY2, -1, INFO1 )
-             IMINWR = IWORK(1)
-             MWRSVQ = INT(RDUMMY(2))
-             MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1)))
-             IF ( LQUERY ) THEN
-                LWRSVQ = INT(RDUMMY(1))
-                OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1)))
-             END IF
-         CASE (4)
-             JSVOPT = 'J'
-             !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N )! for JSVOPT='V'
-             MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 )
-             MLWORK = MAX(MLWORK,N+MWRSVJ)
-             IMINWR = MAX( 3, M+3*N )
-             IF ( LQUERY ) THEN
-                OLWORK = MAX(OLWORK,N+MWRSVJ)
-             END IF
-         END SELECT
-         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
-             JOBZL = 'V'
-         ELSE
-             JOBZL = 'N'
-         END IF
-         ! Workspace calculation to the SGEEV call
-         IF ( LSAME(JOBZL,'V') ) THEN
-             MWRKEV = MAX( 1, 4*N )
-         ELSE
-             MWRKEV = MAX( 1, 3*N )
-         END IF
-         MLWORK = MAX(MLWORK,N+MWRKEV)
-         IF ( LQUERY ) THEN
-                CALL SGEEV( 'N', JOBZL, N, S, LDS, REIG, &
-                    IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 )
-                LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) )
-                OLWORK = MAX( OLWORK, N+LWRKEV )
-         END IF
-!
-         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29
-         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27
-      END IF
-!
-      IF( INFO /= 0 ) THEN
-         CALL XERBLA( 'SGEDMD', -INFO )
-         RETURN
-      ELSE IF ( LQUERY ) THEN
-!     Return minimal and optimal workspace sizes
-          IWORK(1) = IMINWR
-          WORK(1)  = MLWORK
-          WORK(2)  = OLWORK
-          RETURN
-      END IF
-!............................................................
-!
-      OFL   = SLAMCH('O')
-      SMALL = SLAMCH('S')
-      BADXY = .FALSE.
-!
-!     <1> Optional scaling of the snapshots (columns of X, Y)
-!     ==========================================================
-      IF ( SCCOLX ) THEN
-          ! The columns of X will be normalized.
-          ! To prevent overflows, the column norms of X are
-          ! carefully computed using SLASSQ.
-          K = 0
-          DO i = 1, N
-            !WORK(i) = DNRM2( M, X(1,i), 1 )
-            SSUM  = ONE
-            SCALE = ZERO
-            CALL SLASSQ( M, X(1,i), 1, SCALE, SSUM )
-            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
-                K    =  0
-                INFO = -8
-                CALL XERBLA('SGEDMD',-INFO)
-            END IF
-            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
-               ROOTSC = SQRT(SSUM)
-               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
-!                 Norm of X(:,i) overflows. First, X(:,i)
-!                 is scaled by
-!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
-!                 Next, the norm of X(:,i) is stored without
-!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
-!                 the minus sign indicating the 1/M factor.
-!                 Scaling is performed without overflow, and
-!                 underflow may occur in the smallest entries
-!                 of X(:,i). The relative backward and forward
-!                 errors are small in the ell_2 norm.
-                  CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
-                               M, 1, X(1,i), M, INFO2 )
-                  WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
-               ELSE
-!                 X(:,i) will be scaled to unit 2-norm
-                  WORK(i) =   SCALE * ROOTSC
-                  CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
-                               X(1,i), M, INFO2 )              ! LAPACK CALL
-!                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC
-               END IF
-            ELSE
-               WORK(i) = ZERO
-               K = K + 1
-            END IF
-          END DO
-          IF ( K == N ) THEN
-          ! All columns of X are zero. Return error code -8.
-          ! (the 8th input variable had an illegal value)
-          K = 0
-          INFO = -8
-          CALL XERBLA('SGEDMD',-INFO)
-          RETURN
-          END IF
-          DO i = 1, N
-!           Now, apply the same scaling to the columns of Y.
-            IF ( WORK(i) >  ZERO ) THEN
-                CALL SSCAL( M, ONE/WORK(i), Y(1,i), 1 )  ! BLAS CALL
-!               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC
-            ELSE IF ( WORK(i) < ZERO ) THEN
-                CALL SLASCL( 'G', 0, 0, -WORK(i),          &
-                     ONE/FLOAT(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL
-            ELSE IF ( Y(ISAMAX(M, Y(1,i),1),i )  &
-                                            /= ZERO ) THEN
-!               X(:,i) is zero vector. For consistency,
-!               Y(:,i) should also be zero. If Y(:,i) is not
-!               zero, then the data might be inconsistent or
-!               corrupted. If JOBS == 'C', Y(:,i) is set to
-!               zero and a warning flag is raised.
-!               The computation continues but the
-!               situation will be reported in the output.
-                BADXY = .TRUE.
-                IF ( LSAME(JOBS,'C')) &
-                CALL SSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
-            END IF
-          END DO
-      END IF
-  !
-      IF ( SCCOLY ) THEN
-          ! The columns of Y will be normalized.
-          ! To prevent overflows, the column norms of Y are
-          ! carefully computed using SLASSQ.
-          DO i = 1, N
-            !WORK(i) = DNRM2( M, Y(1,i), 1 )
-            SSUM  = ONE
-            SCALE = ZERO
-            CALL SLASSQ( M, Y(1,i), 1, SCALE, SSUM )
-            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
-                K    =  0
-                INFO = -10
-                CALL XERBLA('SGEDMD',-INFO)
-            END IF
-            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
-               ROOTSC = SQRT(SSUM)
-               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
-!                 Norm of Y(:,i) overflows. First, Y(:,i)
-!                 is scaled by
-!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
-!                 Next, the norm of Y(:,i) is stored without
-!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
-!                 the minus sign indicating the 1/M factor.
-!                 Scaling is performed without overflow, and
-!                 underflow may occur in the smallest entries
-!                 of Y(:,i). The relative backward and forward
-!                 errors are small in the ell_2 norm.
-                  CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
-                               M, 1, Y(1,i), M, INFO2 )
-                  WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
-               ELSE
-!                 X(:,i) will be scaled to unit 2-norm
-                  WORK(i) =   SCALE * ROOTSC
-                  CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
-                               Y(1,i), M, INFO2 )              ! LAPACK CALL
-!                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC
-               END IF
-            ELSE
-               WORK(i) = ZERO
-            END IF
-         END DO
-         DO i = 1, N
-!           Now, apply the same scaling to the columns of X.
-            IF ( WORK(i) >  ZERO ) THEN
-                CALL SSCAL( M, ONE/WORK(i), X(1,i), 1 )  ! BLAS CALL
-!               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC
-            ELSE IF ( WORK(i) < ZERO ) THEN
-                CALL SLASCL( 'G', 0, 0, -WORK(i),          &
-                     ONE/FLOAT(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL
-            ELSE IF ( X(ISAMAX(M, X(1,i),1),i )  &
-                                           /= ZERO ) THEN
-!               Y(:,i) is zero vector.  If X(:,i) is not
-!               zero, then a warning flag is raised.
-!               The computation continues but the
-!               situation will be reported in the output.
-                BADXY = .TRUE.
-            END IF
-         END DO
-       END IF
-!
-!     <2> SVD of the data snapshot matrix X.
-!     =====================================
-!     The left singular vectors are stored in the array X.
-!     The right singular vectors are in the array W.
-!     The array W will later on contain the eigenvectors
-!     of a Rayleigh quotient.
-      NUMRNK = N
-      SELECT CASE ( WHTSVD )
-         CASE (1)
-             CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, B, &
-                  LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL
-             T_OR_N = 'T'
-         CASE (2)
-            CALL SGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, &
-                 LDW, WORK(N+1), LWORK-N, IWORK, INFO1 )   ! LAPACK CALL
-            T_OR_N = 'T'
-         CASE (3)
-              CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
-                   X, LDX, WORK, Z, LDZ, W, LDW, &
-                   NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),&
-                   LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1)     ! LAPACK CALL
-              CALL SLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
-         T_OR_N = 'T'
-         CASE (4)
-              CALL SGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
-                   N, X, LDX, WORK, Z, LDZ, W, LDW, &
-                   WORK(N+1), LWORK-N, IWORK, INFO1 )    ! LAPACK CALL
-              CALL SLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
-              T_OR_N = 'N'
-              XSCL1 = WORK(N+1)
-              XSCL2 = WORK(N+2)
-              IF ( XSCL1 /=  XSCL2 ) THEN
-                 ! This is an exceptional situation. If the
-                 ! data matrices are not scaled and the
-                 ! largest singular value of X overflows.
-                 ! In that case SGEJSV can return the SVD
-                 ! in scaled form. The scaling factor can be used
-                 ! to rescale the data (X and Y).
-                 CALL SLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
-              END IF
-      END SELECT
-!
-      IF ( INFO1 > 0 ) THEN
-         ! The SVD selected subroutine did not converge.
-         ! Return with an error code.
-         INFO = 2
-         RETURN
-      END IF
-!
-      IF ( WORK(1) == ZERO ) THEN
-          ! The largest computed singular value of (scaled)
-          ! X is zero. Return error code -8
-          ! (the 8th input variable had an illegal value).
-          K = 0
-          INFO = -8
-          CALL XERBLA('SGEDMD',-INFO)
-          RETURN
-      END IF
-!
-      !<3> Determine the numerical rank of the data
-      !    snapshots matrix X. This depends on the
-      !    parameters NRNK and TOL.
-
-      SELECT CASE ( NRNK )
-          CASE ( -1 )
-               K = 1
-               DO i = 2, NUMRNK
-                 IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. &
-                      ( WORK(i) <= SMALL ) ) EXIT
-                 K = K + 1
-               END DO
-          CASE ( -2 )
-               K = 1
-               DO i = 1, NUMRNK-1
-                 IF ( ( WORK(i+1) <= WORK(i)*TOL  ) .OR. &
-                      ( WORK(i) <= SMALL ) ) EXIT
-                 K = K + 1
-               END DO
-          CASE DEFAULT
-               K = 1
-               DO i = 2, NRNK
-                  IF ( WORK(i) <= SMALL ) EXIT
-                  K = K + 1
-               END DO
-          END SELECT
-      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
-      !   snapshot data in the input matrix X.
-
-      !<4> Compute the Rayleigh quotient S = U^T * A * U.
-      !    Depending on the requested outputs, the computation
-      !    is organized to compute additional auxiliary
-      !    matrices (for the residuals and refinements).
-      !
-      !    In all formulas below, we need V_k*Sigma_k^(-1)
-      !    where either V_k is in W(1:N,1:K), or V_k^T is in
-      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
-      IF ( LSAME(T_OR_N, 'N') ) THEN
-          DO i = 1, K
-           CALL SSCAL( N, ONE/WORK(i), W(1,i), 1 )    ! BLAS CALL
-           ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC
-          END DO
-      ELSE
-          ! This non-unit stride access is due to the fact
-          ! that SGESVD, SGESVDQ and SGESDD return the
-          ! transposed matrix of the right singular vectors.
-          !DO i = 1, K
-          ! CALL SSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL
-          ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC
-          !END DO
-          DO i = 1, K
-              WORK(N+i) = ONE/WORK(i)
-          END DO
-          DO j = 1, N
-             DO i = 1, K
-                 W(i,j) = (WORK(N+i))*W(i,j)
-             END DO
-          END DO
-      END IF
-!
-      IF ( WNTREF ) THEN
-         !
-         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
-         ! for computing the refined Ritz vectors
-         ! (optionally, outside SGEDMD).
-          CALL SGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, &
-                      LDW, ZERO, Z, LDZ )                        ! BLAS CALL
-          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
-          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
-          !
-          ! At this point Z contains
-          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
-          ! this is needed for computing the residuals.
-          ! This matrix is  returned in the array B and
-          ! it can be used to compute refined Ritz vectors.
-          CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
-          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
-
-          CALL SGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, &
-                      LDZ, ZERO, S, LDS )                        ! BLAS CALL
-          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
-          ! At this point S = U^T * A * U is the Rayleigh quotient.
-      ELSE
-        ! A * U(:,1:K) is not explicitly needed and the
-        ! computation is organized differently. The Rayleigh
-        ! quotient is computed more efficiently.
-        CALL SGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, &
-                   ZERO, Z, LDZ )                                   ! BLAS CALL
-        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
-        ! In the two SGEMM calls here, can use K for LDZ
-        CALL SGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, &
-                    LDW, ZERO, S, LDS )                         ! BLAS CALL
-        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
-        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
-        ! At this point S = U^T * A * U is the Rayleigh quotient.
-        ! If the residuals are requested, save scaled V_k into Z.
-        ! Recall that V_k or V_k^T is stored in W.
-        IF ( WNTRES .OR. WNTEX ) THEN
-          IF ( LSAME(T_OR_N, 'N') ) THEN
-              CALL SLACPY( 'A', N, K, W, LDW, Z, LDZ )
-          ELSE
-              CALL SLACPY( 'A', K, N, W, LDW, Z, LDZ )
-          END IF
-        END IF
-      END IF
-!
-      !<5> Compute the Ritz values and (if requested) the
-      !   right eigenvectors of the Rayleigh quotient.
-      !
-      CALL SGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, &
-                  LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 )   ! LAPACK CALL
-      !
-      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
-      ! quotient. Even in the case of complex spectrum, all
-      ! computation is done in real arithmetic. REIG and
-      ! IMEIG are the real and the imaginary parts of the
-      ! eigenvalues, so that the spectrum is given as
-      ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs
-      ! are listed at consecutive positions. For such a
-      ! complex conjugate pair of the eigenvalues, the
-      ! corresponding eigenvectors are also a complex
-      ! conjugate pair with the real and imaginary parts
-      ! stored column-wise in W at the corresponding
-      ! consecutive column indices. See the description of Z.
-      ! Also, see the description of SGEEV.
-      IF ( INFO1 > 0 ) THEN
-         ! SGEEV failed to compute the eigenvalues and
-         ! eigenvectors of the Rayleigh quotient.
-         INFO = 3
-         RETURN
-      END IF
-!
-      ! <6> Compute the eigenvectors (if requested) and,
-      ! the residuals (if requested).
-      !
-      IF ( WNTVEC .OR. WNTEX ) THEN
-      IF ( WNTRES ) THEN
-          IF ( WNTREF ) THEN
-            ! Here, if the refinement is requested, we have
-            ! A*U(:,1:K) already computed and stored in Z.
-            ! For the residuals, need Y = A * U(:,1;K) * W.
-            CALL SGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, &
-                       LDW, ZERO, Y, LDY )               ! BLAS CALL
-            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
-            ! This frees Z; Y contains A * U(:,1:K) * W.
-          ELSE
-            ! Compute S = V_k * Sigma_k^(-1) * W, where
-            ! V_k * Sigma_k^(-1) is stored in Z
-            CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
-                       W, LDW, ZERO, S, LDS )
-            ! Then, compute Z = Y * S =
-            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
-            ! = A * U(:,1:K) * W(1:K,1:K)
-            CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
-                       LDS, ZERO, Z, LDZ )
-            ! Save a copy of Z into Y and free Z for holding
-            ! the Ritz vectors.
-            CALL SLACPY( 'A', M, K, Z, LDZ, Y, LDY )
-            IF ( WNTEX ) CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )
-          END IF
-      ELSE IF ( WNTEX ) THEN
-          ! Compute S = V_k * Sigma_k^(-1) * W, where
-            ! V_k * Sigma_k^(-1) is stored in Z
-            CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
-                       W, LDW, ZERO, S, LDS )
-            ! Then, compute Z = Y * S =
-            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
-            ! = A * U(:,1:K) * W(1:K,1:K)
-            CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
-                       LDS, ZERO, B, LDB )
-            ! The above call replaces the following two calls
-            ! that were used in the developing-testing phase.
-            ! CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
-            !           LDS, ZERO, Z, LDZ)
-            ! Save a copy of Z into B and free Z for holding
-            ! the Ritz vectors.
-            ! CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )
-      END IF
-!
-      ! Compute the real form of the Ritz vectors
-      IF ( WNTVEC ) CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
-                   ZERO, Z, LDZ )                           ! BLAS CALL
-      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
-!
-      IF ( WNTRES ) THEN
-         i = 1
-         DO WHILE ( i <= K )
-            IF ( IMEIG(i) == ZERO ) THEN
-                ! have a real eigenvalue with real eigenvector
-                CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
-                ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! INTRINSIC
-                RES(i) = SNRM2( M, Y(1,i), 1 )                         ! BLAS CALL
-                i = i + 1
-            ELSE
-               ! Have a complex conjugate pair
-               ! REIG(i) +- sqrt(-1)*IMEIG(i).
-               ! Since all computation is done in real
-               ! arithmetic, the formula for the residual
-               ! is recast for real representation of the
-               ! complex conjugate eigenpair. See the
-               ! description of RES.
-               AB(1,1) =  REIG(i)
-               AB(2,1) = -IMEIG(i)
-               AB(1,2) =  IMEIG(i)
-               AB(2,2) =  REIG(i)
-               CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
-                           LDZ, AB, 2, ONE, Y(1,i), LDY )          ! BLAS CALL
-               ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
-               RES(i)   = SLANGE( 'F', M, 2, Y(1,i), LDY, &
-                                  WORK(N+1) )                      ! LAPACK CALL
-               RES(i+1) = RES(i)
-               i = i + 2
-            END IF
-         END DO
-      END IF
-      END IF
-!
-      IF ( WHTSVD == 4 ) THEN
-          WORK(N+1) = XSCL1
-          WORK(N+2) = XSCL2
-      END IF
-!
-!     Successful exit.
-      IF ( .NOT. BADXY ) THEN
-         INFO = 0
-      ELSE
-         ! A warning on possible data inconsistency.
-         ! This should be a rare event.
-         INFO = 4
-      END IF
-!............................................................
-      RETURN
-!     ......
-      END SUBROUTINE SGEDMD
-
+!> \brief \b SGEDMD computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
+!
+!  =========== DOCUMENTATION ===========
+!
+!  Definition:
+!  ===========
+!
+!     SUBROUTINE SGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
+!                        M, N, X, LDX, Y, LDY, NRNK, TOL,  &
+!                        K, REIG,  IMEIG,   Z, LDZ,  RES,  &
+!                        B, LDB, W,  LDW,   S, LDS,        &
+!                        WORK, LWORK, IWORK, LIWORK, INFO )
+!.....
+!     USE                   iso_fortran_env
+!     IMPLICIT NONE
+!     INTEGER, PARAMETER :: WP = real32
+!.....
+!     Scalar arguments
+!     CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+!     INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+!                                NRNK, LDZ, LDB, LDW,  LDS, &
+!                                LWORK,  LIWORK
+!     INTEGER,   INTENT(OUT)  :: K, INFO
+!     REAL(KIND=WP), INTENT(IN) ::  TOL
+!     Array arguments
+!     REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+!     REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+!                                     W(LDW,*), S(LDS,*)
+!     REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
+!                                     RES(*)
+!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+!     INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!............................................................
+!>    \par Purpose:
+!     =============
+!>    \verbatim
+!>    SGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!>    a pair of data snapshot matrices. For the input matrices
+!>    X and Y such that Y = A*X with an unaccessible matrix
+!>    A, SGEDMD computes a certain number of Ritz pairs of A using
+!>    the standard Rayleigh-Ritz extraction from a subspace of
+!>    range(X) that is determined using the leading left singular
+!>    vectors of X. Optionally, SGEDMD returns the residuals
+!>    of the computed Ritz pairs, the information needed for
+!>    a refinement of the Ritz vectors, or the eigenvectors of
+!>    the Exact DMD.
+!>    For further details see the references listed
+!>    below. For more details of the implementation see [3].
+!>    \endverbatim
+!............................................................
+!>    \par References:
+!     ================
+!>    \verbatim
+!>    [1] P. Schmid: Dynamic mode decomposition of numerical
+!>        and experimental data,
+!>        Journal of Fluid Mechanics 656, 5-28, 2010.
+!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!>        decompositions: analysis and enhancements,
+!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!>        Mode Decomposition I. Technical report. AIMDyn Inc.
+!>        and LAPACK Working Note 298.
+!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!>        Theory and Applications, Journal of Computational
+!>        Dynamics 1(2), 391 -421, 2014.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Developed and coded by Zlatko Drmac, Faculty of Science,
+!>    University of Zagreb;  drmac@math.hr
+!>    In cooperation with
+!>    AIMdyn Inc., Santa Barbara, CA.
+!>    and supported by
+!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
+!>    for Nonstationary Processes from Near-Term, Limited
+!>    Observational Data" Contract No: W31P4Q-21-C-0007
+!>    - DARPA PAI project "Physics-Informed Machine Learning
+!>    Methodologies" Contract No: HR0011-18-9-0033
+!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!>    Framework for Space-Time Analysis of Process Dynamics"
+!>    Contract No: HR0011-16-C-0116
+!>    Any opinions, findings and conclusions or recommendations
+!>    expressed in this material are those of the author and
+!>    do not necessarily reflect the views of the DARPA SBIR
+!>    Program Office
+!>    \endverbatim
+!......................................................................
+!>    \par Distribution Statement A:
+!     ==============================
+!>    \verbatim
+!>    Distribution Statement A:
+!>    Approved for Public Release, Distribution Unlimited.
+!>    Cleared by DARPA on September 29, 2022
+!>    \endverbatim
+!============================================================
+!     Arguments
+!     =========
+!
+!>    \param[in] JOBS
+!>    \verbatim
+!>    JOBS (input) CHARACTER*1
+!>    Determines whether the initial data snapshots are scaled
+!>    by a diagonal matrix.
+!>    'S' :: The data snapshots matrices X and Y are multiplied
+!>           with a diagonal matrix D so that X*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'C' :: The snapshots are scaled as with the 'S' option.
+!>           If it is found that an i-th column of X is zero
+!>           vector and the corresponding i-th column of Y is
+!>           non-zero, then the i-th column of Y is set to
+!>           zero and a warning flag is raised.
+!>    'Y' :: The data snapshots matrices X and Y are multiplied
+!>           by a diagonal matrix D so that Y*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'N' :: No data scaling.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBZ
+!>    \verbatim
+!>    JOBZ (input) CHARACTER*1
+!>    Determines whether the eigenvectors (Koopman modes) will
+!>    be computed.
+!>    'V' :: The eigenvectors (Koopman modes) will be computed
+!>           and returned in the matrix Z.
+!>           See the description of Z.
+!>    'F' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product X(:,1:K)*W, where X
+!>           contains a POD basis (leading left singular vectors
+!>           of the data matrix X) and W contains the eigenvectors
+!>           of the corresponding Rayleigh quotient.
+!>           See the descriptions of K, X, W, Z.
+!>    'N' :: The eigenvectors are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBR
+!>    \verbatim
+!>    JOBR (input) CHARACTER*1
+!>    Determines whether to compute the residuals.
+!>    'R' :: The residuals for the computed eigenpairs will be
+!>           computed and stored in the array RES.
+!>           See the description of RES.
+!>           For this option to be legal, JOBZ must be 'V'.
+!>    'N' :: The residuals are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBF
+!>    \verbatim
+!>    JOBF (input) CHARACTER*1
+!>    Specifies whether to store information needed for post-
+!>    processing (e.g. computing refined Ritz vectors)
+!>    'R' :: The matrix needed for the refinement of the Ritz
+!>           vectors is computed and stored in the array B.
+!>           See the description of B.
+!>    'E' :: The unscaled eigenvectors of the Exact DMD are
+!>           computed and returned in the array B. See the
+!>           description of B.
+!>    'N' :: No eigenvector refinement data is computed.
+!>    \endverbatim
+!.....
+!>    \param[in] WHTSVD
+!>    \verbatim
+!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!>    Allows for a selection of the SVD algorithm from the
+!>    LAPACK library.
+!>    1 :: SGESVD (the QR SVD algorithm)
+!>    2 :: SGESDD (the Divide and Conquer algorithm; if enough
+!>         workspace available, this is the fastest option)
+!>    3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
+!>         are the most accurate options)
+!>    4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
+!>         are the most accurate options)
+!>    For the four methods above, a significant difference in
+!>    the accuracy of small singular values is possible if
+!>    the snapshots vary in norm so that X is severely
+!>    ill-conditioned. If small (smaller than EPS*||X||)
+!>    singular values are of interest and JOBS=='N',  then
+!>    the options (3, 4) give the most accurate results, where
+!>    the option 4 is slightly better and with stronger
+!>    theoretical background.
+!>    If JOBS=='S', i.e. the columns of X will be normalized,
+!>    then all methods give nearly equally accurate results.
+!>    \endverbatim
+!.....
+!>    \param[in] M
+!>    \verbatim
+!>    M (input) INTEGER, M>= 0
+!>    The state space dimension (the row dimension of X, Y).
+!>    \endverbatim
+!.....
+!>    \param[in] N
+!>    \verbatim
+!>    N (input) INTEGER, 0 <= N <= M
+!>    The number of data snapshot pairs
+!>    (the number of columns of X and Y).
+!>    \endverbatim
+!.....
+!>    \param[in,out] X
+!>    \verbatim
+!>    X (input/output) REAL(KIND=WP) M-by-N array
+!>    > On entry, X contains the data snapshot matrix X. It is
+!>    assumed that the column norms of X are in the range of
+!>    the normalized floating point numbers.
+!>    < On exit, the leading K columns of X contain a POD basis,
+!>    i.e. the leading K left singular vectors of the input
+!>    data matrix X, U(:,1:K). All N columns of X contain all
+!>    left singular vectors of the input matrix X.
+!>    See the descriptions of K, Z and W.
+!>    \endverbatim
+!.....
+!>    \param[in] LDX
+!>    \verbatim
+!>    LDX (input) INTEGER, LDX >= M
+!>    The leading dimension of the array X.
+!>    \endverbatim
+!.....
+!>    \param[in,out] Y
+!>    \verbatim
+!>    Y (input/workspace/output) REAL(KIND=WP) M-by-N array
+!>    > On entry, Y contains the data snapshot matrix Y
+!>    < On exit,
+!>    If JOBR == 'R', the leading K columns of Y  contain
+!>    the residual vectors for the computed Ritz pairs.
+!>    See the description of RES.
+!>    If JOBR == 'N', Y contains the original input data,
+!>                    scaled according to the value of JOBS.
+!>    \endverbatim
+!.....
+!>    \param[in] LDY
+!>    \verbatim
+!>    LDY (input) INTEGER , LDY >= M
+!>    The leading dimension of the array Y.
+!>    \endverbatim
+!.....
+!>    \param[in] NRNK
+!>    \verbatim
+!>    NRNK (input) INTEGER
+!>    Determines the mode how to compute the numerical rank,
+!>    i.e. how to truncate small singular values of the input
+!>    matrix X. On input, if
+!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(1)
+!>                 This option is recommended.
+!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(i-1)
+!>                 This option is included for R&D purposes.
+!>                 It requires highly accurate SVD, which
+!>                 may not be feasible.
+!>    The numerical rank can be enforced by using positive
+!>    value of NRNK as follows:
+!>    0 < NRNK <= N :: at most NRNK largest singular values
+!>    will be used. If the number of the computed nonzero
+!>    singular values is less than NRNK, then only those
+!>    nonzero values will be used and the actually used
+!>    dimension is less than NRNK. The actual number of
+!>    the nonzero singular values is returned in the variable
+!>    K. See the descriptions of TOL and  K.
+!>    \endverbatim
+!.....
+!>    \param[in] TOL
+!>    \verbatim
+!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!>    The tolerance for truncating small singular values.
+!>    See the description of NRNK.
+!>    \endverbatim
+!.....
+!>    \param[out] K
+!>    \verbatim
+!>    K (output) INTEGER,  0 <= K <= N
+!>    The dimension of the POD basis for the data snapshot
+!>    matrix X and the number of the computed Ritz pairs.
+!>    The value of K is determined according to the rule set
+!>    by the parameters NRNK and TOL.
+!>    See the descriptions of NRNK and TOL.
+!>    \endverbatim
+!.....
+!>    \param[out] REIG
+!>    \verbatim
+!>    REIG (output) REAL(KIND=WP) N-by-1 array
+!>    The leading K (K<=N) entries of REIG contain
+!>    the real parts of the computed eigenvalues
+!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!>    See the descriptions of K, IMEIG, and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] IMEIG
+!>    \verbatim
+!>    IMEIG (output) REAL(KIND=WP) N-by-1 array
+!>    The leading K (K<=N) entries of IMEIG contain
+!>    the imaginary parts of the computed eigenvalues
+!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!>    The eigenvalues are determined as follows:
+!>    If IMEIG(i) == 0, then the corresponding eigenvalue is
+!>    real, LAMBDA(i) = REIG(i).
+!>    If IMEIG(i)>0, then the corresponding complex
+!>    conjugate pair of eigenvalues reads
+!>    LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!>    LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!>    That is, complex conjugate pairs have consecutive
+!>    indices (i,i+1), with the positive imaginary part
+!>    listed first.
+!>    See the descriptions of K, REIG, and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] Z
+!>    \verbatim
+!>    Z (workspace/output) REAL(KIND=WP)  M-by-N array
+!>    If JOBZ =='V' then
+!>       Z contains real Ritz vectors as follows:
+!>       If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!>       the i-th Ritz value; ||Z(:,i)||_2=1.
+!>       If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!>       [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!>       the Ritz values extracted from this subspace are
+!>       REIG(i) + sqrt(-1)*IMEIG(i) and
+!>       REIG(i) - sqrt(-1)*IMEIG(i).
+!>       The corresponding eigenvectors are
+!>       Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!>       Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!>       || Z(:,i:i+1)||_F = 1.
+!>    If JOBZ == 'F', then the above descriptions hold for
+!>    the columns of X(:,1:K)*W(1:K,1:K), where the columns
+!>    of W(1:k,1:K) are the computed eigenvectors of the
+!>    K-by-K Rayleigh quotient. The columns of W(1:K,1:K)
+!>    are similarly structured: If IMEIG(i) == 0 then
+!>    X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0
+!>    then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and
+!>         X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1)
+!>    are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!>    See the descriptions of REIG, IMEIG, X and W.
+!>    \endverbatim
+!.....
+!>    \param[in] LDZ
+!>    \verbatim
+!>    LDZ (input) INTEGER , LDZ >= M
+!>    The leading dimension of the array Z.
+!>    \endverbatim
+!.....
+!>    \param[out] RES
+!>    \verbatim
+!>    RES (output) REAL(KIND=WP) N-by-1 array
+!>    RES(1:K) contains the residuals for the K computed
+!>    Ritz pairs.
+!>    If LAMBDA(i) is real, then
+!>       RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!>    If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!>    then
+!>    RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!>    where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!>              [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!>    It holds that
+!>    RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!>    RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!>    where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!>          ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!>    See the description of REIG, IMEIG and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] B
+!>    \verbatim
+!>    B (output) REAL(KIND=WP)  M-by-N array.
+!>    IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!>    be used for computing the refined vectors; see further
+!>    details in the provided references.
+!>    If JOBF == 'E', B(1:M,1;K) contains
+!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!>    Exact DMD, up to scaling by the inverse eigenvalues.
+!>    If JOBF =='N', then B is not referenced.
+!>    See the descriptions of X, W, K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDB
+!>    \verbatim
+!>    LDB (input) INTEGER, LDB >= M
+!>    The leading dimension of the array B.
+!>    \endverbatim
+!.....
+!>    \param[out] W
+!>    \verbatim
+!>    W (workspace/output) REAL(KIND=WP) N-by-N array
+!>    On exit, W(1:K,1:K) contains the K computed
+!>    eigenvectors of the matrix Rayleigh quotient (real and
+!>    imaginary parts for each complex conjugate pair of the
+!>    eigenvalues). The Ritz vectors (returned in Z) are the
+!>    product of X (containing a POD basis for the input
+!>    matrix X) and W. See the descriptions of K, S, X and Z.
+!>    W is also used as a workspace to temporarily store the
+!>    left singular vectors of X.
+!>    \endverbatim
+!.....
+!>    \param[in] LDW
+!>    \verbatim
+!>    LDW (input) INTEGER, LDW >= N
+!>    The leading dimension of the array W.
+!>    \endverbatim
+!.....
+!>    \param[out] S
+!>    \verbatim
+!>    S (workspace/output) REAL(KIND=WP) N-by-N array
+!>    The array S(1:K,1:K) is used for the matrix Rayleigh
+!>    quotient. This content is overwritten during
+!>    the eigenvalue decomposition by SGEEV.
+!>    See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDS
+!>    \verbatim
+!>    LDS (input) INTEGER, LDS >= N
+!>    The leading dimension of the array S.
+!>    \endverbatim
+!.....
+!>    \param[out] WORK
+!>    \verbatim
+!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!>    On exit, WORK(1:N) contains the singular values of
+!>    X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!>    If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain
+!>    scaling factor WORK(N+2)/WORK(N+1) used to scale X
+!>    and Y to avoid overflow in the SVD of X.
+!>    This may be of interest if the scaling option is off
+!>    and as many as possible smallest eigenvalues are
+!>    desired to the highest feasible accuracy.
+!>    If the call to SGEDMD is only workspace query, then
+!>    WORK(1) contains the minimal workspace length and
+!>    WORK(2) is the optimal workspace length. Hence, the
+!>    length of work is at least 2.
+!>    See the description of LWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LWORK
+!>    \verbatim
+!>    LWORK (input) INTEGER
+!>    The minimal length of the workspace vector WORK.
+!>    LWORK is calculated as follows:
+!>    If WHTSVD == 1 ::
+!>       If JOBZ == 'V', then
+!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)).
+!>       If JOBZ == 'N'  then
+!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)).
+!>       Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal
+!>       workspace length of SGESVD.
+!>    If WHTSVD == 2 ::
+!>       If JOBZ == 'V', then
+!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N))
+!>       If JOBZ == 'N', then
+!>       LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N))
+!>       Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the
+!>       minimal workspace length of SGESDD.
+!>    If WHTSVD == 3 ::
+!>       If JOBZ == 'V', then
+!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!>       If JOBZ == 'N', then
+!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!>       Here LWORK_SVD = N+M+MAX(3*N+1,
+!>                       MAX(1,3*N+M,5*N),MAX(1,N))
+!>       is the minimal workspace length of SGESVDQ.
+!>    If WHTSVD == 4 ::
+!>       If JOBZ == 'V', then
+!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!>       If JOBZ == 'N', then
+!>       LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!>       Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the
+!>       minimal workspace length of SGEJSV.
+!>    The above expressions are not simplified in order to
+!>    make the usage of WORK more transparent, and for
+!>    easier checking. In any case, LWORK >= 2.
+!>    If on entry LWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] IWORK
+!>    \verbatim
+!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!>    Workspace that is required only if WHTSVD equals
+!>    2 , 3 or 4. (See the description of WHTSVD).
+!>    If on entry LWORK =-1 or LIWORK=-1, then the
+!>    minimal length of IWORK is computed and returned in
+!>    IWORK(1). See the description of LIWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LIWORK
+!>    \verbatim
+!>    LIWORK (input) INTEGER
+!>    The minimal length of the workspace vector IWORK.
+!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!>    If on entry LIWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] INFO
+!>    \verbatim
+!>    INFO (output) INTEGER
+!>    -i < 0 :: On entry, the i-th argument had an
+!>              illegal value
+!>       = 0 :: Successful return.
+!>       = 1 :: Void input. Quick exit (M=0 or N=0).
+!>       = 2 :: The SVD computation of X did not converge.
+!>              Suggestion: Check the input data and/or
+!>              repeat with different WHTSVD.
+!>       = 3 :: The computation of the eigenvalues did not
+!>              converge.
+!>       = 4 :: If data scaling was requested on input and
+!>              the procedure found inconsistency in the data
+!>              such that for some column index i,
+!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!>              to zero if JOBS=='C'. The computation proceeds
+!>              with original or modified data and warning
+!>              flag is set with INFO=4.
+!>    \endverbatim
+!
+!  Authors:
+!  ========
+!
+!> \author Zlatko Drmac
+!
+!> \ingroup gedmd
+!
+!.............................................................
+!.............................................................
+      SUBROUTINE SGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,  &
+                         K, REIG,  IMEIG,   Z, LDZ,  RES,  &
+                         B, LDB, W,  LDW,   S, LDS,        &
+                         WORK, LWORK, IWORK, LIWORK, INFO )
+!
+!  -- LAPACK driver routine                                           --
+!
+!  -- LAPACK is a software package provided by University of          --
+!  -- Tennessee, University of California Berkeley, University of     --
+!  -- Colorado Denver and NAG Ltd..                                   --
+!
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32
+!
+!     Scalar arguments
+!     ~~~~~~~~~~~~~~~~
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LWORK,  LIWORK
+      INTEGER,   INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN) ::  TOL
+!
+!     Array arguments
+!     ~~~~~~~~~~~~~~~
+      REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                      W(LDW,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,  &
+                       LWRKEV, LWRSDD, LWRSVD, &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+!
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2)
+!
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) SLANGE, SLAMCH, SNRM2
+      EXTERNAL      SLANGE, SLAMCH, SNRM2, ISAMAX
+      INTEGER       ISAMAX
+      LOGICAL       SISNAN, LSAME
+      EXTERNAL      SISNAN, LSAME
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      SAXPY,  SGEMM,  SSCAL
+      EXTERNAL      SGEEV,  SGEJSV, SGESDD, SGESVD, SGESVDQ, &
+                    SLACPY, SLASCL, SLASSQ, XERBLA
+!
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     INT, FLOAT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -18
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -21
+      ELSE IF ( LDW < N ) THEN
+          INFO = -23
+      ELSE IF ( LDS < N ) THEN
+          INFO = -25
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                WORK(1)  = 2
+                WORK(2)  = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+         MLWORK = MAX(2,N)
+         OLWORK = MAX(2,N)
+         IMINWR = 1
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of SGESVD:
+             ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MLWORK = MAX(MLWORK,N + MWRSVD)
+             IF ( LQUERY ) THEN
+                CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, &
+                           B, LDB, W, LDW, RDUMMY, -1, INFO1 )
+                LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of SGESDD:
+             ! MWRSDD = 3*MIN(M,N)*MIN(M,N) +
+             ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 3*MIN(M,N)*MIN(M,N) +                &
+              MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             MLWORK = MAX(MLWORK,N + MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             IF ( LQUERY ) THEN
+                CALL SGESDD( 'O', M, N, X, LDX, WORK, B,     &
+                     LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSDD)
+             END IF
+         CASE (3)
+             !LWQP3 = 3*N+1
+             !LWORQ = MAX(N, 1)
+             !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ )+ MAX(M,2)
+             !MLWORK = N + MWRSVQ
+             !IMINWR = M+N-1
+             CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                          X, LDX, WORK, Z, LDZ, W, LDW,   &
+                             NUMRNK, IWORK, -1, RDUMMY,   &
+                             -1, RDUMMY2, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(RDUMMY(2))
+             MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(RDUMMY(1))
+                OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1)))
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N )! for JSVOPT='V'
+             MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 )
+             MLWORK = MAX(MLWORK,N+MWRSVJ)
+             IMINWR = MAX( 3, M+3*N )
+             IF ( LQUERY ) THEN
+                OLWORK = MAX(OLWORK,N+MWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the SGEEV call
+         IF ( LSAME(JOBZL,'V') ) THEN
+             MWRKEV = MAX( 1, 4*N )
+         ELSE
+             MWRKEV = MAX( 1, 3*N )
+         END IF
+         MLWORK = MAX(MLWORK,N+MWRKEV)
+         IF ( LQUERY ) THEN
+                CALL SGEEV( 'N', JOBZL, N, S, LDS, REIG, &
+                    IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 )
+                LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) )
+                OLWORK = MAX( OLWORK, N+LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'SGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = SLAMCH('O')
+      SMALL = SLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using SLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, X(1,i), 1 )
+            SSUM  = ONE
+            SCALE = ZERO
+            CALL SLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('SGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               X(1,i), M, INFO2 )              ! LAPACK CALL
+!                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('SGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL SSCAL( M, ONE/WORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL SLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/FLOAT(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( Y(ISAMAX(M, Y(1,i),1),i )  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL SSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using SLASSQ.
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, Y(1,i), 1 )
+            SSUM  = ONE
+            SCALE = ZERO
+            CALL SLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('SGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               Y(1,i), M, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL SSCAL( M, ONE/WORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL SLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/FLOAT(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( X(ISAMAX(M, X(1,i),1),i )  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, B, &
+                  LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL
+             T_OR_N = 'T'
+         CASE (2)
+            CALL SGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, &
+                 LDW, WORK(N+1), LWORK-N, IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'T'
+         CASE (3)
+              CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, WORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),&
+                   LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1)     ! LAPACK CALL
+              CALL SLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'T'
+         CASE (4)
+              CALL SGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, WORK, Z, LDZ, W, LDW, &
+                   WORK(N+1), LWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL SLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = WORK(N+1)
+              XSCL2 = WORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case SGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL SLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( WORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('SGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( WORK(i+1) <= WORK(i)*TOL  ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( WORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^T * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^T is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL SSCAL( N, ONE/WORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that SGESVD, SGESVDQ and SGESDD return the
+          ! transposed matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL SSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              WORK(N+i) = ONE/WORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = (WORK(N+i))*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside SGEDMD).
+          CALL SGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, &
+                      LDW, ZERO, Z, LDZ )                        ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL SGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, &
+                      LDZ, ZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^T * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL SGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, &
+                   ZERO, Z, LDZ )                                   ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        ! In the two SGEMM calls here, can use K for LDZ
+        CALL SGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, &
+                    LDW, ZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^T * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^T is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL SLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL SLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL SGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, &
+                  LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 )   ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient. Even in the case of complex spectrum, all
+      ! computation is done in real arithmetic. REIG and
+      ! IMEIG are the real and the imaginary parts of the
+      ! eigenvalues, so that the spectrum is given as
+      ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs
+      ! are listed at consecutive positions. For such a
+      ! complex conjugate pair of the eigenvalues, the
+      ! corresponding eigenvectors are also a complex
+      ! conjugate pair with the real and imaginary parts
+      ! stored column-wise in W at the corresponding
+      ! consecutive column indices. See the description of Z.
+      ! Also, see the description of SGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! SGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL SGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, &
+                       LDW, ZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, Z, LDZ )
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL SLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+            !           LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the real form of the Ritz vectors
+      IF ( WNTVEC ) CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                   ZERO, Z, LDZ )                           ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         i = 1
+         DO WHILE ( i <= K )
+            IF ( IMEIG(i) == ZERO ) THEN
+                ! have a real eigenvalue with real eigenvector
+                CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = SNRM2( M, Y(1,i), 1 )                         ! BLAS CALL
+                i = i + 1
+            ELSE
+               ! Have a complex conjugate pair
+               ! REIG(i) +- sqrt(-1)*IMEIG(i).
+               ! Since all computation is done in real
+               ! arithmetic, the formula for the residual
+               ! is recast for real representation of the
+               ! complex conjugate eigenpair. See the
+               ! description of RES.
+               AB(1,1) =  REIG(i)
+               AB(2,1) = -IMEIG(i)
+               AB(1,2) =  IMEIG(i)
+               AB(2,2) =  REIG(i)
+               CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                           LDZ, AB, 2, ONE, Y(1,i), LDY )          ! BLAS CALL
+               ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+               RES(i)   = SLANGE( 'F', M, 2, Y(1,i), LDY, &
+                                  WORK(N+1) )                      ! LAPACK CALL
+               RES(i+1) = RES(i)
+               i = i + 2
+            END IF
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          WORK(N+1) = XSCL1
+          WORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE SGEDMD
diff --git a/SRC/sgedmdq.f90 b/SRC/sgedmdq.f90
index 5ee337b289..2506149cc7 100644
--- a/SRC/sgedmdq.f90
+++ b/SRC/sgedmdq.f90
@@ -1,863 +1,862 @@
-!> \brief \b SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
-!
-!  =========== DOCUMENTATION ===========
-!
-!  Definition:
-!  ===========
-!
-!     SUBROUTINE SGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
-!                         WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
-!                         LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
-!                         Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
-!                         S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
-!.....
-!     USE                   iso_fortran_env 
-!     IMPLICIT NONE
-!     INTEGER, PARAMETER :: WP = real32     
-!.....      
-!     Scalar arguments       
-!     CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
-!                               JOBT, JOBF
-!     INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
-!                               LDY, NRNK, LDZ, LDB, LDV,  &
-!                               LDS, LWORK,  LIWORK
-!     INTEGER,   INTENT(OUT) :: INFO,   K      
-!     REAL(KIND=WP), INTENT(IN)    ::   TOL     
-!     Array arguments      
-!     REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
-!     REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
-!                                     Z(LDZ,*), B(LDB,*),  &
-!                                     V(LDV,*), S(LDS,*)
-!     REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
-!                                     RES(*)
-!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
-!     INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!............................................................
-!>    \par Purpose:
-!     =============
-!>    \verbatim
-!>    SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
-!>    a pair of data snapshot matrices, using a QR factorization
-!>    based compression of the data. For the input matrices
-!>    X and Y such that Y = A*X with an unaccessible matrix
-!>    A, SGEDMDQ computes a certain number of Ritz pairs of A using
-!>    the standard Rayleigh-Ritz extraction from a subspace of
-!>    range(X) that is determined using the leading left singular 
-!>    vectors of X. Optionally, SGEDMDQ returns the residuals 
-!>    of the computed Ritz pairs, the information needed for
-!>    a refinement of the Ritz vectors, or the eigenvectors of
-!>    the Exact DMD.
-!>    For further details see the references listed
-!>    below. For more details of the implementation see [3].      
-!>    \endverbatim
-!............................................................
-!>    \par References:
-!     ================
-!>    \verbatim
-!>    [1] P. Schmid: Dynamic mode decomposition of numerical
-!>        and experimental data,
-!>        Journal of Fluid Mechanics 656, 5-28, 2010.
-!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
-!>        decompositions: analysis and enhancements,
-!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
-!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
-!>        Mode Decomposition I. Technical report. AIMDyn Inc.
-!>        and LAPACK Working Note 298.      
-!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
-!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
-!>        Theory and Applications, Journal of Computational
-!>        Dynamics 1(2), 391 -421, 2014.
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Developed and coded by Zlatko Drmac, Faculty of Science,
-!>    University of Zagreb;  drmac@math.hr
-!>    In cooperation with
-!>    AIMdyn Inc., Santa Barbara, CA.
-!>    and supported by
-!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
-!>    for Nonstationary Processes from Near-Term, Limited
-!>    Observational Data" Contract No: W31P4Q-21-C-0007
-!>    - DARPA PAI project "Physics-Informed Machine Learning
-!>    Methodologies" Contract No: HR0011-18-9-0033
-!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
-!>    Framework for Space-Time Analysis of Process Dynamics"
-!>    Contract No: HR0011-16-C-0116
-!>    Any opinions, findings and conclusions or recommendations 
-!>    expressed in this material are those of the author and 
-!>    do not necessarily reflect the views of the DARPA SBIR 
-!>    Program Office.      
-!>    \endverbatim
-!......................................................................
-!>    \par Distribution Statement A:
-!     ==============================
-!>    \verbatim
-!>    Approved for Public Release, Distribution Unlimited.
-!>    Cleared by DARPA on September 29, 2022
-!>    \endverbatim
-!......................................................................
-!     Arguments
-!     =========
-!
-!>    \param[in] JOBS
-!>    \verbatim
-!>    JOBS (input) CHARACTER*1
-!>    Determines whether the initial data snapshots are scaled
-!>    by a diagonal matrix. The data snapshots are the columns
-!>    of F. The leading N-1 columns of F are denoted X and the
-!>    trailing N-1 columns are denoted Y. 
-!>    'S' :: The data snapshots matrices X and Y are multiplied
-!>           with a diagonal matrix D so that X*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'C' :: The snapshots are scaled as with the 'S' option.
-!>           If it is found that an i-th column of X is zero
-!>           vector and the corresponding i-th column of Y is
-!>           non-zero, then the i-th column of Y is set to
-!>           zero and a warning flag is raised.
-!>    'Y' :: The data snapshots matrices X and Y are multiplied
-!>           by a diagonal matrix D so that Y*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)    
-!>    'N' :: No data scaling.   
-!>    \endverbatim
-!.....
-!>    \param[in] JOBZ
-!>    \verbatim
-!>    JOBZ (input) CHARACTER*1
-!>    Determines whether the eigenvectors (Koopman modes) will
-!>    be computed.
-!>    'V' :: The eigenvectors (Koopman modes) will be computed
-!>           and returned in the matrix Z.
-!>           See the description of Z.
-!>    'F' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product Z*V, where Z
-!>           is orthonormal and V contains the eigenvectors
-!>           of the corresponding Rayleigh quotient.
-!>           See the descriptions of F, V, Z.
-!>    'Q' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product Q*Z, where Z
-!>           contains the eigenvectors of the compression of the
-!>           underlying discretized operator onto the span of
-!>           the data snapshots. See the descriptions of F, V, Z. 
-!>           Q is from the initial QR factorization.  
-!>    'N' :: The eigenvectors are not computed.  
-!>    \endverbatim
-!.....
-!>    \param[in] JOBR
-!>    \verbatim
-!>    JOBR (input) CHARACTER*1 
-!>    Determines whether to compute the residuals.
-!>    'R' :: The residuals for the computed eigenpairs will
-!>           be computed and stored in the array RES.
-!>           See the description of RES.
-!>           For this option to be legal, JOBZ must be 'V'.
-!>    'N' :: The residuals are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBQ
-!>    \verbatim
-!>    JOBQ (input) CHARACTER*1 
-!>    Specifies whether to explicitly compute and return the
-!>    orthogonal matrix from the QR factorization.
-!>    'Q' :: The matrix Q of the QR factorization of the data
-!>           snapshot matrix is computed and stored in the
-!>           array F. See the description of F.       
-!>    'N' :: The matrix Q is not explicitly computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBT
-!>    \verbatim
-!>    JOBT (input) CHARACTER*1 
-!>    Specifies whether to return the upper triangular factor
-!>    from the QR factorization.
-!>    'R' :: The matrix R of the QR factorization of the data 
-!>           snapshot matrix F is returned in the array Y.
-!>           See the description of Y and Further details.       
-!>    'N' :: The matrix R is not returned.    
-!>    \endverbatim
-!.....
-!>    \param[in] JOBF
-!>    \verbatim
-!>    JOBF (input) CHARACTER*1
-!>    Specifies whether to store information needed for post-
-!>    processing (e.g. computing refined Ritz vectors)
-!>    'R' :: The matrix needed for the refinement of the Ritz
-!>           vectors is computed and stored in the array B.
-!>           See the description of B.
-!>    'E' :: The unscaled eigenvectors of the Exact DMD are 
-!>           computed and returned in the array B. See the
-!>           description of B.
-!>    'N' :: No eigenvector refinement data is computed.   
-!>    To be useful on exit, this option needs JOBQ='Q'.      
-!>    \endverbatim
-!.....
-!>    \param[in] WHTSVD
-!>    \verbatim
-!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
-!>    Allows for a selection of the SVD algorithm from the
-!>    LAPACK library.
-!>    1 :: SGESVD (the QR SVD algorithm)
-!>    2 :: SGESDD (the Divide and Conquer algorithm; if enough
-!>         workspace available, this is the fastest option)
-!>    3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
-!>         are the most accurate options)
-!>    4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
-!>         are the most accurate options)
-!>    For the four methods above, a significant difference in
-!>    the accuracy of small singular values is possible if
-!>    the snapshots vary in norm so that X is severely
-!>    ill-conditioned. If small (smaller than EPS*||X||)
-!>    singular values are of interest and JOBS=='N',  then
-!>    the options (3, 4) give the most accurate results, where
-!>    the option 4 is slightly better and with stronger 
-!>    theoretical background.
-!>    If JOBS=='S', i.e. the columns of X will be normalized,
-!>    then all methods give nearly equally accurate results.
-!>    \endverbatim
-!.....
-!>    \param[in] M
-!>    \verbatim
-!>    M (input) INTEGER, M >= 0 
-!>    The state space dimension (the number of rows of F)
-!>    \endverbatim
-!.....
-!>    \param[in] N
-!>    \verbatim
-!>    N (input) INTEGER, 0 <= N <= M
-!>    The number of data snapshots from a single trajectory,
-!>    taken at equidistant discrete times. This is the 
-!>    number of columns of F.
-!>    \endverbatim
-!.....
-!>    \param[in,out] F
-!>    \verbatim
-!>    F (input/output) REAL(KIND=WP) M-by-N array
-!>    > On entry,
-!>    the columns of F are the sequence of data snapshots 
-!>    from a single trajectory, taken at equidistant discrete
-!>    times. It is assumed that the column norms of F are 
-!>    in the range of the normalized floating point numbers. 
-!>    < On exit,
-!>    If JOBQ == 'Q', the array F contains the orthogonal 
-!>    matrix/factor of the QR factorization of the initial 
-!>    data snapshots matrix F. See the description of JOBQ. 
-!>    If JOBQ == 'N', the entries in F strictly below the main
-!>    diagonal contain, column-wise, the information on the 
-!>    Householder vectors, as returned by SGEQRF. The 
-!>    remaining information to restore the orthogonal matrix
-!>    of the initial QR factorization is stored in WORK(1:N). 
-!>    See the description of WORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LDF
-!>    \verbatim
-!>    LDF (input) INTEGER, LDF >= M 
-!>    The leading dimension of the array F.
-!>    \endverbatim
-!.....
-!>    \param[in,out] X
-!>    \verbatim
-!>    X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
-!>    X is used as workspace to hold representations of the
-!>    leading N-1 snapshots in the orthonormal basis computed
-!>    in the QR factorization of F.
-!>    On exit, the leading K columns of X contain the leading
-!>    K left singular vectors of the above described content
-!>    of X. To lift them to the space of the left singular
-!>    vectors U(:,1:K)of the input data, pre-multiply with the 
-!>    Q factor from the initial QR factorization. 
-!>    See the descriptions of F, K, V  and Z.
-!>    \endverbatim
-!.....
-!>    \param[in] LDX
-!>    \verbatim
-!>    LDX (input) INTEGER, LDX >= N  
-!>    The leading dimension of the array X 
-!>    \endverbatim
-!.....
-!>    \param[in,out] Y
-!>    \verbatim
-!>    Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
-!>    Y is used as workspace to hold representations of the
-!>    trailing N-1 snapshots in the orthonormal basis computed
-!>    in the QR factorization of F.
-!>    On exit, 
-!>    If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
-!>    triangular factor from the QR factorization of the data
-!>    snapshot matrix F.
-!>    \endverbatim
-!.....
-!>    \param[in] LDY
-!>    \verbatim
-!>    LDY (input) INTEGER , LDY >= N
-!>    The leading dimension of the array Y   
-!>    \endverbatim
-!.....
-!>    \param[in] NRNK
-!>    \verbatim
-!>    NRNK (input) INTEGER
-!>    Determines the mode how to compute the numerical rank,
-!>    i.e. how to truncate small singular values of the input
-!>    matrix X. On input, if
-!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(1)
-!>                 This option is recommended.   
-!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(i-1)
-!>                 This option is included for R&D purposes.
-!>                 It requires highly accurate SVD, which
-!>                 may not be feasible.     
-!>    The numerical rank can be enforced by using positive 
-!>    value of NRNK as follows: 
-!>    0 < NRNK <= N-1 :: at most NRNK largest singular values
-!>    will be used. If the number of the computed nonzero
-!>    singular values is less than NRNK, then only those
-!>    nonzero values will be used and the actually used
-!>    dimension is less than NRNK. The actual number of
-!>    the nonzero singular values is returned in the variable
-!>    K. See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] TOL
-!>    \verbatim
-!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
-!>    The tolerance for truncating small singular values.
-!>    See the description of NRNK.  
-!>    \endverbatim
-!.....
-!>    \param[out] K
-!>    \verbatim
-!>    K (output) INTEGER,  0 <= K <= N 
-!>    The dimension of the SVD/POD basis for the leading N-1
-!>    data snapshots (columns of F) and the number of the 
-!>    computed Ritz pairs. The value of K is determined
-!>    according to the rule set by the parameters NRNK and 
-!>    TOL. See the descriptions of NRNK and TOL. 
-!>    \endverbatim
-!.....
-!>    \param[out] REIG
-!>    \verbatim
-!>    REIG (output) REAL(KIND=WP) (N-1)-by-1 array
-!>    The leading K (K<=N) entries of REIG contain
-!>    the real parts of the computed eigenvalues
-!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
-!>    See the descriptions of K, IMEIG, Z.
-!>    \endverbatim
-!.....
-!>    \param[out] IMEIG
-!>    \verbatim
-!>    IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
-!>    The leading K (K<N) entries of REIG contain
-!>    the imaginary parts of the computed eigenvalues
-!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
-!>    The eigenvalues are determined as follows:
-!>    If IMEIG(i) == 0, then the corresponding eigenvalue is
-!>    real, LAMBDA(i) = REIG(i).
-!>    If IMEIG(i)>0, then the corresponding complex
-!>    conjugate pair of eigenvalues reads
-!>    LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
-!>    LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
-!>    That is, complex conjugate pairs have consecutive
-!>    indices (i,i+1), with the positive imaginary part
-!>    listed first.
-!>    See the descriptions of K, REIG, Z.     
-!>    \endverbatim
-!.....
-!>    \param[out] Z
-!>    \verbatim
-!>    Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
-!>    If JOBZ =='V' then
-!>       Z contains real Ritz vectors as follows:
-!>       If IMEIG(i)=0, then Z(:,i) is an eigenvector of
-!>       the i-th Ritz value.
-!>       If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
-!>       [Z(:,i) Z(:,i+1)] span an invariant subspace and
-!>       the Ritz values extracted from this subspace are
-!>       REIG(i) + sqrt(-1)*IMEIG(i) and
-!>       REIG(i) - sqrt(-1)*IMEIG(i).
-!>       The corresponding eigenvectors are
-!>       Z(:,i) + sqrt(-1)*Z(:,i+1) and
-!>       Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
-!>    If JOBZ == 'F', then the above descriptions hold for
-!>    the columns of Z*V, where the columns of V are the
-!>    eigenvectors of the K-by-K Rayleigh quotient, and Z is
-!>    orthonormal. The columns of V are similarly structured:
-!>    If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if 
-!>    IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
-!>                      Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
-!>    are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
-!>    See the descriptions of REIG, IMEIG, X and V.
-!>    \endverbatim
-!.....
-!>    \param[in] LDZ
-!>    \verbatim
-!>    LDZ (input) INTEGER , LDZ >= M
-!>    The leading dimension of the array Z.
-!>    \endverbatim
-!.....
-!>    \param[out] RES
-!>    \verbatim
-!>    RES (output) REAL(KIND=WP) (N-1)-by-1 array
-!>    RES(1:K) contains the residuals for the K computed 
-!>    Ritz pairs.       
-!>    If LAMBDA(i) is real, then
-!>       RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
-!>    If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
-!>    then
-!>    RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
-!>    where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
-!>              [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
-!>    It holds that
-!>    RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
-!>    RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
-!>    where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
-!>          ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
-!>    See the description of Z.
-!>    \endverbatim
-!.....
-!>    \param[out] B
-!>    \verbatim
-!>    B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
-!>    IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
-!>    be used for computing the refined vectors; see further 
-!>    details in the provided references. 
-!>    If JOBF == 'E', B(1:N,1;K) contains 
-!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
-!>    Exact DMD, up to scaling by the inverse eigenvalues.   
-!>    In both cases, the content of B can be lifted to the 
-!>    original dimension of the input data by pre-multiplying
-!>    with the Q factor from the initial QR factorization.     
-!>    Here A denotes a compression of the underlying operator.      
-!>    See the descriptions of F and X.
-!>    If JOBF =='N', then B is not referenced.
-!>    \endverbatim
-!.....
-!>    \param[in] LDB
-!>    \verbatim
-!>    LDB (input) INTEGER, LDB >= MIN(M,N)
-!>    The leading dimension of the array B.
-!>    \endverbatim
-!.....
-!>    \param[out] V
-!>    \verbatim
-!>    V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
-!>    On exit, V(1:K,1:K) contains the K eigenvectors of
-!>    the Rayleigh quotient. The eigenvectors of a complex
-!>    conjugate pair of eigenvalues are returned in real form
-!>    as explained in the description of Z. The Ritz vectors
-!>    (returned in Z) are the product of X and V; see
-!>    the descriptions of X and Z.
-!>    \endverbatim
-!.....
-!>    \param[in] LDV
-!>    \verbatim
-!>    LDV (input) INTEGER, LDV >= N-1
-!>    The leading dimension of the array V.
-!>    \endverbatim
-!.....
-!>    \param[out] S
-!>    \verbatim
-!>    S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
-!>    The array S(1:K,1:K) is used for the matrix Rayleigh
-!>    quotient. This content is overwritten during
-!>    the eigenvalue decomposition by SGEEV.
-!>    See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDS
-!>    \verbatim
-!>    LDS (input) INTEGER, LDS >= N-1        
-!>    The leading dimension of the array S.
-!>    \endverbatim
-!.....
-!>    \param[out] WORK
-!>    \verbatim
-!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
-!>    On exit, 
-!>    WORK(1:MIN(M,N)) contains the scalar factors of the 
-!>    elementary reflectors as returned by SGEQRF of the 
-!>    M-by-N input matrix F.
-!>    WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of 
-!>    the input submatrix F(1:M,1:N-1).
-!>    If the call to SGEDMDQ is only workspace query, then
-!>    WORK(1) contains the minimal workspace length and
-!>    WORK(2) is the optimal workspace length. Hence, the
-!>    length of work is at least 2.
-!>    See the description of LWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LWORK
-!>    \verbatim
-!>    LWORK (input) INTEGER
-!>    The minimal length of the  workspace vector WORK.
-!>    LWORK is calculated as follows:
-!>    Let MLWQR  = N (minimal workspace for SGEQRF[M,N])
-!>        MLWDMD = minimal workspace for SGEDMD (see the
-!>                 description of LWORK in SGEDMD) for 
-!>                 snapshots of dimensions MIN(M,N)-by-(N-1)
-!>        MLWMQR = N (minimal workspace for 
-!>                   SORMQR['L','N',M,N,N])
-!>        MLWGQR = N (minimal workspace for SORGQR[M,N,N])
-!>    Then
-!>    LWORK = MAX(N+MLWQR, N+MLWDMD)
-!>    is updated as follows:
-!>       if   JOBZ == 'V' or JOBZ == 'F' THEN 
-!>            LWORK = MAX( LWORK,MIN(M,N)+N-1 +MLWMQR )
-!>       if   JOBQ == 'Q' THEN
-!>            LWORK = MAX( LWORK,MIN(M,N)+N-1+MLWGQR)
-!>    If on entry LWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.          
-!>    \endverbatim
-!.....
-!>    \param[out] IWORK
-!>    \verbatim
-!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
-!>    Workspace that is required only if WHTSVD equals
-!>    2 , 3 or 4. (See the description of WHTSVD).
-!>    If on entry LWORK =-1 or LIWORK=-1, then the
-!>    minimal length of IWORK is computed and returned in
-!>    IWORK(1). See the description of LIWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LIWORK
-!>    \verbatim
-!>    LIWORK (input) INTEGER
-!>    The minimal length of the workspace vector IWORK.
-!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
-!>    Let M1=MIN(M,N), N1=N-1. Then
-!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
-!>    If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
-!>    If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
-!>    If on entry LIWORK = -1, then a worskpace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] INFO
-!>    \verbatim
-!>    INFO (output) INTEGER
-!>    -i < 0 :: On entry, the i-th argument had an
-!>              illegal value
-!>       = 0 :: Successful return.
-!>       = 1 :: Void input. Quick exit (M=0 or N=0).
-!>       = 2 :: The SVD computation of X did not converge.
-!>              Suggestion: Check the input data and/or
-!>              repeat with different WHTSVD.
-!>       = 3 :: The computation of the eigenvalues did not
-!>              converge.
-!>       = 4 :: If data scaling was requested on input and
-!>              the procedure found inconsistency in the data
-!>              such that for some column index i,
-!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
-!>              to zero if JOBS=='C'. The computation proceeds
-!>              with original or modified data and warning
-!>              flag is set with INFO=4.  
-!>    \endverbatim
-!
-!  Authors:
-!  ========
-!
-!> \author Zlatko Drmac
-!
-!> \ingroup gedmd
-!
-!.............................................................
-!.............................................................
-SUBROUTINE SGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
-                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
-                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
-                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
-                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
-!
-!  -- LAPACK driver routine                                           --
-!
-!  -- LAPACK is a software package provided by University of          --
-!  -- Tennessee, University of California Berkeley, University of     --
-!  -- Colorado Denver and NAG Ltd..                                   --
-!
-!.....
-      USE                   iso_fortran_env 
-      IMPLICIT NONE
-      INTEGER, PARAMETER :: WP = real32     
-!
-!     Scalar arguments       
-!     ~~~~~~~~~~~~~~~~
-      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
-                                JOBT, JOBF
-      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
-                                LDY, NRNK, LDZ, LDB, LDV,  &
-                                LDS, LWORK,  LIWORK
-      INTEGER,   INTENT(OUT) :: INFO,   K      
-      REAL(KIND=WP), INTENT(IN)    ::   TOL     
-!
-!     Array arguments      
-!     ~~~~~~~~~~~~~~~~
-      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
-      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
-                                      Z(LDZ,*), B(LDB,*),  &
-                                      V(LDV,*), S(LDS,*)
-      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
-                                      RES(*)
-      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
-      INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!     Parameters
-!     ~~~~~~~~~~      
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-!      
-!     Local scalars      
-!     ~~~~~~~~~~~~~
-      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
-                           MLWMQR, MLWORK, MLWQR,  MINMN,  & 
-                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
-                           OLWQR
-      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
-                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
-                           WNTVCQ, WNTREF, WNTEX
-      CHARACTER(LEN=1)  :: JOBVL
-!      
-!     Local array      
-!     ~~~~~~~~~~~      
-      REAL(KIND=WP) :: RDUMMY(2)
-!      
-!     External functions (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~
-      LOGICAL       LSAME
-      EXTERNAL      LSAME 
-!
-!     External subroutines (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~~~~
-      EXTERNAL      SGEMM 
-      EXTERNAL      SGEDMD, SGEQRF, SLACPY, SLASET, SORGQR, & 
-                    SORMQR, XERBLA
-!      
-!     Intrinsic functions
-!     ~~~~~~~~~~~~~~~~~~~
-      INTRINSIC      MAX, MIN, INT         
-!..........................................................  
-!
-!     Test the input arguments    
-      WNTRES = LSAME(JOBR,'R')
-      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
-      SCCOLY = LSAME(JOBS,'Y')
-      WNTVEC = LSAME(JOBZ,'V') 
-      WNTVCF = LSAME(JOBZ,'F')
-      WNTVCQ = LSAME(JOBZ,'Q')
-      WNTREF = LSAME(JOBF,'R') 
-      WNTEX  = LSAME(JOBF,'E')
-      WANTQ  = LSAME(JOBQ,'Q')
-      WNTTRF = LSAME(JOBT,'R')     
-      MINMN  = MIN(M,N)
-      INFO = 0 
-      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
-!       
-      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. LSAME(JOBS,'N')) )  THEN 
-          INFO = -1
-      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
-                              .OR. LSAME(JOBZ,'N')) ) THEN
-          INFO = -2
-      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
-          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
-          INFO = -3
-      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
-          INFO = -4                 
-      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
-          INFO = -5
-      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
-                LSAME(JOBF,'N') ) )                    THEN
-          INFO = -6    
-      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
-                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
-          INFO = -7
-      ELSE IF ( M < 0 ) THEN
-          INFO = -8
-      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
-          INFO = -9
-      ELSE IF ( LDF < M ) THEN
-          INFO = -11
-      ELSE IF ( LDX < MINMN ) THEN
-          INFO = -13
-      ELSE IF ( LDY < MINMN ) THEN
-          INFO = -15
-      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
-                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
-          INFO = -16
-      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
-          INFO = -17
-      ELSE IF ( LDZ < M ) THEN
-          INFO = -22
-      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
-          INFO = -25
-      ELSE IF ( LDV < N-1 ) THEN
-          INFO = -27
-      ELSE IF ( LDS < N-1 ) THEN
-          INFO = -29
-      END IF
-!      
-      IF ( WNTVEC .OR. WNTVCF ) THEN
-          JOBVL = 'V'
-      ELSE
-          JOBVL = 'N'
-      END IF     
-      IF ( INFO == 0 ) THEN  
-          ! Compute the minimal and the optimal workspace
-          ! requirements. Simulate running the code and 
-          ! determine minimal and optimal sizes of the 
-          ! workspace at any moment of the run.         
-         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
-             ! All output except K is void. INFO=1 signals
-             ! the void input. In case of a workspace query,
-             ! the minimal workspace lengths are returned.
-            IF ( LQUERY ) THEN  
-               IWORK(1) = 1
-                WORK(1) = 2
-                WORK(2) = 2
-            ELSE                
-               K = 0
-            END IF             
-            INFO = 1  
-            RETURN
-         END IF     
-         MLWQR  = MAX(1,N)  ! Minimal workspace length for SGEQRF.
-         MLWORK = MIN(M,N) + MLWQR 
-         IF ( LQUERY ) THEN 
-             CALL SGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
-                          INFO1 )
-             OLWQR = INT(RDUMMY(1))
-             OLWORK = MIN(M,N) + OLWQR           
-         END IF
-         CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
-                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
-                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     & 
-                      V, LDV, S, LDS, WORK, -1, IWORK,       &
-                      LIWORK, INFO1 )
-         MLWDMD = INT(WORK(1))
-         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
-         IMINWR = IWORK(1)
-         IF ( LQUERY ) THEN 
-             OLWDMD = INT(WORK(2))
-             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
-         END IF
-         IF ( WNTVEC .OR. WNTVCF ) THEN
-            MLWMQR = MAX(1,N) 
-            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
-            IF ( LQUERY ) THEN
-               CALL SORMQR( 'L','N', M, N, MINMN, F, LDF,  & 
-                            WORK, Z, LDZ, WORK, -1, INFO1 )
-               OLWMQR = INT(WORK(1))
-               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
-            END IF
-         END IF  
-         IF ( WANTQ ) THEN
-            MLWGQR = N
-            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
-            IF ( LQUERY ) THEN 
-                CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
-                             WORK, -1, INFO1 )        
-                OLWGQR = INT(WORK(1))
-                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
-            END IF            
-         END IF   
-         IMINWR = MAX( 1, IMINWR )
-         MLWORK = MAX( 2, MLWORK )      
-         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
-         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
-      END IF  
-      IF( INFO /= 0 ) THEN
-         CALL XERBLA( 'SGEDMDQ', -INFO )
-         RETURN
-      ELSE IF ( LQUERY ) THEN
-!     Return minimal and optimal workspace sizes
-          IWORK(1) = IMINWR
-          WORK(1)  = MLWORK
-          WORK(2)  = OLWORK
-          RETURN
-      END IF   
-!.....	  
-!     Initial QR factorization that is used to represent the
-!     snapshots as elements of lower dimensional subspace.
-!     For large scale computation with M >>N , at this place 
-!     one can use an out of core QRF.
-!   
-      CALL SGEQRF( M, N, F, LDF, WORK,         & 
-                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
-!      
-!     Define X and Y as the snapshots representations in the
-!     orthogonal basis computed in the QR factorization.
-!     X corresponds to the leading N-1 and Y to the trailing
-!     N-1 snapshots.
-      CALL SLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
-      CALL SLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
-      CALL SLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
-      IF ( M >= 3 ) THEN
-          CALL SLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
-                       Y(3,1), LDY )  
-      END IF
-!
-!     Compute the DMD of the projected snapshot pairs (X,Y)   
-      CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
-                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
-                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
-                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, IWORK,  & 
-                   LIWORK, INFO1 )
-      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
-          ! Return with error code.
-          INFO = INFO1
-          RETURN
-      ELSE
-          INFO = INFO1
-      END IF    
-!      
-!     The Ritz vectors (Koopman modes) can be explicitly 
-!     formed or returned in factored form.
-      IF ( WNTVEC ) THEN
-        ! Compute the eigenvectors explicitly.  
-        IF ( M > MINMN ) CALL SLASET( 'A', M-MINMN, K, ZERO, &
-                                     ZERO, Z(MINMN+1,1), LDZ )
-        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
-                     LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
-      ELSE IF ( WNTVCF ) THEN   
-        !   Return the Ritz vectors (eigenvectors) in factored
-        !   form Z*V, where Z contains orthonormal matrix (the
-        !   product of Q from the initial QR factorization and 
-        !   the SVD/POD_basis returned by SGEDMD in X) and the 
-        !   second factor (the eigenvectors of the Rayleigh 
-        !   quotient) is in the array V, as returned by SGEDMD.
-        CALL SLACPY( 'A', N, K, X, LDX, Z, LDZ )
-        IF ( M > N ) CALL SLASET( 'A', M-N, K, ZERO, ZERO,   & 
-                                  Z(N+1,1), LDZ )
-        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
-             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
-      END IF
-!     
-!     Some optional output variables:
-!
-!     The upper triangular factor in the initial QR 
-!     factorization is optionally returned in the array Y.
-!     This is useful if this call to SGEDMDQ is to be 
-!     followed by a streaming DMD that is implemented in a 
-!     QR compressed form.
-      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
-         CALL SLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
-         CALL SLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
-      END IF    
-!
-!     The orthonormal/orthogonal factor in the initial QR 
-!     factorization is optionally returned in the array F. 
-!     Same as with the triangular factor above, this is 
-!     useful in a streaming DMD.
-      IF ( WANTQ ) THEN  ! Q overwrites F 
-         CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
-              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )  
-      END IF
-!      
-      RETURN
-!      
-      END SUBROUTINE SGEDMDQ
-    
+!> \brief \b SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
+!
+!  =========== DOCUMENTATION ===========
+!
+!  Definition:
+!  ===========
+!
+!     SUBROUTINE SGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+!                         WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+!                         LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
+!                         Z, LDZ, RES,  B,     LDB,   V, LDV,    &
+!                         S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
+!.....
+!     USE                   iso_fortran_env
+!     IMPLICIT NONE
+!     INTEGER, PARAMETER :: WP = real32
+!.....
+!     Scalar arguments
+!     CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+!                               JOBT, JOBF
+!     INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+!                               LDY, NRNK, LDZ, LDB, LDV,  &
+!                               LDS, LWORK,  LIWORK
+!     INTEGER,   INTENT(OUT) :: INFO,   K
+!     REAL(KIND=WP), INTENT(IN)    ::   TOL
+!     Array arguments
+!     REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+!     REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
+!                                     Z(LDZ,*), B(LDB,*),  &
+!                                     V(LDV,*), S(LDS,*)
+!     REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
+!                                     RES(*)
+!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+!     INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!............................................................
+!>    \par Purpose:
+!     =============
+!>    \verbatim
+!>    SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!>    a pair of data snapshot matrices, using a QR factorization
+!>    based compression of the data. For the input matrices
+!>    X and Y such that Y = A*X with an unaccessible matrix
+!>    A, SGEDMDQ computes a certain number of Ritz pairs of A using
+!>    the standard Rayleigh-Ritz extraction from a subspace of
+!>    range(X) that is determined using the leading left singular
+!>    vectors of X. Optionally, SGEDMDQ returns the residuals
+!>    of the computed Ritz pairs, the information needed for
+!>    a refinement of the Ritz vectors, or the eigenvectors of
+!>    the Exact DMD.
+!>    For further details see the references listed
+!>    below. For more details of the implementation see [3].
+!>    \endverbatim
+!............................................................
+!>    \par References:
+!     ================
+!>    \verbatim
+!>    [1] P. Schmid: Dynamic mode decomposition of numerical
+!>        and experimental data,
+!>        Journal of Fluid Mechanics 656, 5-28, 2010.
+!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!>        decompositions: analysis and enhancements,
+!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!>        Mode Decomposition I. Technical report. AIMDyn Inc.
+!>        and LAPACK Working Note 298.
+!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!>        Theory and Applications, Journal of Computational
+!>        Dynamics 1(2), 391 -421, 2014.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Developed and coded by Zlatko Drmac, Faculty of Science,
+!>    University of Zagreb;  drmac@math.hr
+!>    In cooperation with
+!>    AIMdyn Inc., Santa Barbara, CA.
+!>    and supported by
+!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
+!>    for Nonstationary Processes from Near-Term, Limited
+!>    Observational Data" Contract No: W31P4Q-21-C-0007
+!>    - DARPA PAI project "Physics-Informed Machine Learning
+!>    Methodologies" Contract No: HR0011-18-9-0033
+!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!>    Framework for Space-Time Analysis of Process Dynamics"
+!>    Contract No: HR0011-16-C-0116
+!>    Any opinions, findings and conclusions or recommendations
+!>    expressed in this material are those of the author and
+!>    do not necessarily reflect the views of the DARPA SBIR
+!>    Program Office.
+!>    \endverbatim
+!......................................................................
+!>    \par Distribution Statement A:
+!     ==============================
+!>    \verbatim
+!>    Approved for Public Release, Distribution Unlimited.
+!>    Cleared by DARPA on September 29, 2022
+!>    \endverbatim
+!......................................................................
+!     Arguments
+!     =========
+!
+!>    \param[in] JOBS
+!>    \verbatim
+!>    JOBS (input) CHARACTER*1
+!>    Determines whether the initial data snapshots are scaled
+!>    by a diagonal matrix. The data snapshots are the columns
+!>    of F. The leading N-1 columns of F are denoted X and the
+!>    trailing N-1 columns are denoted Y.
+!>    'S' :: The data snapshots matrices X and Y are multiplied
+!>           with a diagonal matrix D so that X*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'C' :: The snapshots are scaled as with the 'S' option.
+!>           If it is found that an i-th column of X is zero
+!>           vector and the corresponding i-th column of Y is
+!>           non-zero, then the i-th column of Y is set to
+!>           zero and a warning flag is raised.
+!>    'Y' :: The data snapshots matrices X and Y are multiplied
+!>           by a diagonal matrix D so that Y*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'N' :: No data scaling.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBZ
+!>    \verbatim
+!>    JOBZ (input) CHARACTER*1
+!>    Determines whether the eigenvectors (Koopman modes) will
+!>    be computed.
+!>    'V' :: The eigenvectors (Koopman modes) will be computed
+!>           and returned in the matrix Z.
+!>           See the description of Z.
+!>    'F' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product Z*V, where Z
+!>           is orthonormal and V contains the eigenvectors
+!>           of the corresponding Rayleigh quotient.
+!>           See the descriptions of F, V, Z.
+!>    'Q' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product Q*Z, where Z
+!>           contains the eigenvectors of the compression of the
+!>           underlying discretized operator onto the span of
+!>           the data snapshots. See the descriptions of F, V, Z.
+!>           Q is from the initial QR factorization.
+!>    'N' :: The eigenvectors are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBR
+!>    \verbatim
+!>    JOBR (input) CHARACTER*1
+!>    Determines whether to compute the residuals.
+!>    'R' :: The residuals for the computed eigenpairs will
+!>           be computed and stored in the array RES.
+!>           See the description of RES.
+!>           For this option to be legal, JOBZ must be 'V'.
+!>    'N' :: The residuals are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBQ
+!>    \verbatim
+!>    JOBQ (input) CHARACTER*1
+!>    Specifies whether to explicitly compute and return the
+!>    orthogonal matrix from the QR factorization.
+!>    'Q' :: The matrix Q of the QR factorization of the data
+!>           snapshot matrix is computed and stored in the
+!>           array F. See the description of F.
+!>    'N' :: The matrix Q is not explicitly computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBT
+!>    \verbatim
+!>    JOBT (input) CHARACTER*1
+!>    Specifies whether to return the upper triangular factor
+!>    from the QR factorization.
+!>    'R' :: The matrix R of the QR factorization of the data
+!>           snapshot matrix F is returned in the array Y.
+!>           See the description of Y and Further details.
+!>    'N' :: The matrix R is not returned.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBF
+!>    \verbatim
+!>    JOBF (input) CHARACTER*1
+!>    Specifies whether to store information needed for post-
+!>    processing (e.g. computing refined Ritz vectors)
+!>    'R' :: The matrix needed for the refinement of the Ritz
+!>           vectors is computed and stored in the array B.
+!>           See the description of B.
+!>    'E' :: The unscaled eigenvectors of the Exact DMD are
+!>           computed and returned in the array B. See the
+!>           description of B.
+!>    'N' :: No eigenvector refinement data is computed.
+!>    To be useful on exit, this option needs JOBQ='Q'.
+!>    \endverbatim
+!.....
+!>    \param[in] WHTSVD
+!>    \verbatim
+!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!>    Allows for a selection of the SVD algorithm from the
+!>    LAPACK library.
+!>    1 :: SGESVD (the QR SVD algorithm)
+!>    2 :: SGESDD (the Divide and Conquer algorithm; if enough
+!>         workspace available, this is the fastest option)
+!>    3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
+!>         are the most accurate options)
+!>    4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
+!>         are the most accurate options)
+!>    For the four methods above, a significant difference in
+!>    the accuracy of small singular values is possible if
+!>    the snapshots vary in norm so that X is severely
+!>    ill-conditioned. If small (smaller than EPS*||X||)
+!>    singular values are of interest and JOBS=='N',  then
+!>    the options (3, 4) give the most accurate results, where
+!>    the option 4 is slightly better and with stronger
+!>    theoretical background.
+!>    If JOBS=='S', i.e. the columns of X will be normalized,
+!>    then all methods give nearly equally accurate results.
+!>    \endverbatim
+!.....
+!>    \param[in] M
+!>    \verbatim
+!>    M (input) INTEGER, M >= 0
+!>    The state space dimension (the number of rows of F)
+!>    \endverbatim
+!.....
+!>    \param[in] N
+!>    \verbatim
+!>    N (input) INTEGER, 0 <= N <= M
+!>    The number of data snapshots from a single trajectory,
+!>    taken at equidistant discrete times. This is the
+!>    number of columns of F.
+!>    \endverbatim
+!.....
+!>    \param[in,out] F
+!>    \verbatim
+!>    F (input/output) REAL(KIND=WP) M-by-N array
+!>    > On entry,
+!>    the columns of F are the sequence of data snapshots
+!>    from a single trajectory, taken at equidistant discrete
+!>    times. It is assumed that the column norms of F are
+!>    in the range of the normalized floating point numbers.
+!>    < On exit,
+!>    If JOBQ == 'Q', the array F contains the orthogonal
+!>    matrix/factor of the QR factorization of the initial
+!>    data snapshots matrix F. See the description of JOBQ.
+!>    If JOBQ == 'N', the entries in F strictly below the main
+!>    diagonal contain, column-wise, the information on the
+!>    Householder vectors, as returned by SGEQRF. The
+!>    remaining information to restore the orthogonal matrix
+!>    of the initial QR factorization is stored in WORK(1:N).
+!>    See the description of WORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LDF
+!>    \verbatim
+!>    LDF (input) INTEGER, LDF >= M
+!>    The leading dimension of the array F.
+!>    \endverbatim
+!.....
+!>    \param[in,out] X
+!>    \verbatim
+!>    X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!>    X is used as workspace to hold representations of the
+!>    leading N-1 snapshots in the orthonormal basis computed
+!>    in the QR factorization of F.
+!>    On exit, the leading K columns of X contain the leading
+!>    K left singular vectors of the above described content
+!>    of X. To lift them to the space of the left singular
+!>    vectors U(:,1:K)of the input data, pre-multiply with the
+!>    Q factor from the initial QR factorization.
+!>    See the descriptions of F, K, V  and Z.
+!>    \endverbatim
+!.....
+!>    \param[in] LDX
+!>    \verbatim
+!>    LDX (input) INTEGER, LDX >= N
+!>    The leading dimension of the array X
+!>    \endverbatim
+!.....
+!>    \param[in,out] Y
+!>    \verbatim
+!>    Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!>    Y is used as workspace to hold representations of the
+!>    trailing N-1 snapshots in the orthonormal basis computed
+!>    in the QR factorization of F.
+!>    On exit,
+!>    If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!>    triangular factor from the QR factorization of the data
+!>    snapshot matrix F.
+!>    \endverbatim
+!.....
+!>    \param[in] LDY
+!>    \verbatim
+!>    LDY (input) INTEGER , LDY >= N
+!>    The leading dimension of the array Y
+!>    \endverbatim
+!.....
+!>    \param[in] NRNK
+!>    \verbatim
+!>    NRNK (input) INTEGER
+!>    Determines the mode how to compute the numerical rank,
+!>    i.e. how to truncate small singular values of the input
+!>    matrix X. On input, if
+!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(1)
+!>                 This option is recommended.
+!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(i-1)
+!>                 This option is included for R&D purposes.
+!>                 It requires highly accurate SVD, which
+!>                 may not be feasible.
+!>    The numerical rank can be enforced by using positive
+!>    value of NRNK as follows:
+!>    0 < NRNK <= N-1 :: at most NRNK largest singular values
+!>    will be used. If the number of the computed nonzero
+!>    singular values is less than NRNK, then only those
+!>    nonzero values will be used and the actually used
+!>    dimension is less than NRNK. The actual number of
+!>    the nonzero singular values is returned in the variable
+!>    K. See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] TOL
+!>    \verbatim
+!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!>    The tolerance for truncating small singular values.
+!>    See the description of NRNK.
+!>    \endverbatim
+!.....
+!>    \param[out] K
+!>    \verbatim
+!>    K (output) INTEGER,  0 <= K <= N
+!>    The dimension of the SVD/POD basis for the leading N-1
+!>    data snapshots (columns of F) and the number of the
+!>    computed Ritz pairs. The value of K is determined
+!>    according to the rule set by the parameters NRNK and
+!>    TOL. See the descriptions of NRNK and TOL.
+!>    \endverbatim
+!.....
+!>    \param[out] REIG
+!>    \verbatim
+!>    REIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!>    The leading K (K<=N) entries of REIG contain
+!>    the real parts of the computed eigenvalues
+!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!>    See the descriptions of K, IMEIG, Z.
+!>    \endverbatim
+!.....
+!>    \param[out] IMEIG
+!>    \verbatim
+!>    IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!>    The leading K (K<N) entries of REIG contain
+!>    the imaginary parts of the computed eigenvalues
+!>    REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!>    The eigenvalues are determined as follows:
+!>    If IMEIG(i) == 0, then the corresponding eigenvalue is
+!>    real, LAMBDA(i) = REIG(i).
+!>    If IMEIG(i)>0, then the corresponding complex
+!>    conjugate pair of eigenvalues reads
+!>    LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!>    LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!>    That is, complex conjugate pairs have consecutive
+!>    indices (i,i+1), with the positive imaginary part
+!>    listed first.
+!>    See the descriptions of K, REIG, Z.
+!>    \endverbatim
+!.....
+!>    \param[out] Z
+!>    \verbatim
+!>    Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
+!>    If JOBZ =='V' then
+!>       Z contains real Ritz vectors as follows:
+!>       If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!>       the i-th Ritz value.
+!>       If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!>       [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!>       the Ritz values extracted from this subspace are
+!>       REIG(i) + sqrt(-1)*IMEIG(i) and
+!>       REIG(i) - sqrt(-1)*IMEIG(i).
+!>       The corresponding eigenvectors are
+!>       Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!>       Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!>    If JOBZ == 'F', then the above descriptions hold for
+!>    the columns of Z*V, where the columns of V are the
+!>    eigenvectors of the K-by-K Rayleigh quotient, and Z is
+!>    orthonormal. The columns of V are similarly structured:
+!>    If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if
+!>    IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
+!>                      Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
+!>    are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!>    See the descriptions of REIG, IMEIG, X and V.
+!>    \endverbatim
+!.....
+!>    \param[in] LDZ
+!>    \verbatim
+!>    LDZ (input) INTEGER , LDZ >= M
+!>    The leading dimension of the array Z.
+!>    \endverbatim
+!.....
+!>    \param[out] RES
+!>    \verbatim
+!>    RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!>    RES(1:K) contains the residuals for the K computed
+!>    Ritz pairs.
+!>    If LAMBDA(i) is real, then
+!>       RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!>    If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!>    then
+!>    RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!>    where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!>              [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!>    It holds that
+!>    RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!>    RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!>    where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!>          ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!>    See the description of Z.
+!>    \endverbatim
+!.....
+!>    \param[out] B
+!>    \verbatim
+!>    B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!>    IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!>    be used for computing the refined vectors; see further
+!>    details in the provided references.
+!>    If JOBF == 'E', B(1:N,1;K) contains
+!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!>    Exact DMD, up to scaling by the inverse eigenvalues.
+!>    In both cases, the content of B can be lifted to the
+!>    original dimension of the input data by pre-multiplying
+!>    with the Q factor from the initial QR factorization.
+!>    Here A denotes a compression of the underlying operator.
+!>    See the descriptions of F and X.
+!>    If JOBF =='N', then B is not referenced.
+!>    \endverbatim
+!.....
+!>    \param[in] LDB
+!>    \verbatim
+!>    LDB (input) INTEGER, LDB >= MIN(M,N)
+!>    The leading dimension of the array B.
+!>    \endverbatim
+!.....
+!>    \param[out] V
+!>    \verbatim
+!>    V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!>    On exit, V(1:K,1:K) contains the K eigenvectors of
+!>    the Rayleigh quotient. The eigenvectors of a complex
+!>    conjugate pair of eigenvalues are returned in real form
+!>    as explained in the description of Z. The Ritz vectors
+!>    (returned in Z) are the product of X and V; see
+!>    the descriptions of X and Z.
+!>    \endverbatim
+!.....
+!>    \param[in] LDV
+!>    \verbatim
+!>    LDV (input) INTEGER, LDV >= N-1
+!>    The leading dimension of the array V.
+!>    \endverbatim
+!.....
+!>    \param[out] S
+!>    \verbatim
+!>    S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!>    The array S(1:K,1:K) is used for the matrix Rayleigh
+!>    quotient. This content is overwritten during
+!>    the eigenvalue decomposition by SGEEV.
+!>    See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDS
+!>    \verbatim
+!>    LDS (input) INTEGER, LDS >= N-1
+!>    The leading dimension of the array S.
+!>    \endverbatim
+!.....
+!>    \param[out] WORK
+!>    \verbatim
+!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!>    On exit,
+!>    WORK(1:MIN(M,N)) contains the scalar factors of the
+!>    elementary reflectors as returned by SGEQRF of the
+!>    M-by-N input matrix F.
+!>    WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of
+!>    the input submatrix F(1:M,1:N-1).
+!>    If the call to SGEDMDQ is only workspace query, then
+!>    WORK(1) contains the minimal workspace length and
+!>    WORK(2) is the optimal workspace length. Hence, the
+!>    length of work is at least 2.
+!>    See the description of LWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LWORK
+!>    \verbatim
+!>    LWORK (input) INTEGER
+!>    The minimal length of the  workspace vector WORK.
+!>    LWORK is calculated as follows:
+!>    Let MLWQR  = N (minimal workspace for SGEQRF[M,N])
+!>        MLWDMD = minimal workspace for SGEDMD (see the
+!>                 description of LWORK in SGEDMD) for
+!>                 snapshots of dimensions MIN(M,N)-by-(N-1)
+!>        MLWMQR = N (minimal workspace for
+!>                   SORMQR['L','N',M,N,N])
+!>        MLWGQR = N (minimal workspace for SORGQR[M,N,N])
+!>    Then
+!>    LWORK = MAX(N+MLWQR, N+MLWDMD)
+!>    is updated as follows:
+!>       if   JOBZ == 'V' or JOBZ == 'F' THEN
+!>            LWORK = MAX( LWORK,MIN(M,N)+N-1 +MLWMQR )
+!>       if   JOBQ == 'Q' THEN
+!>            LWORK = MAX( LWORK,MIN(M,N)+N-1+MLWGQR)
+!>    If on entry LWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] IWORK
+!>    \verbatim
+!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!>    Workspace that is required only if WHTSVD equals
+!>    2 , 3 or 4. (See the description of WHTSVD).
+!>    If on entry LWORK =-1 or LIWORK=-1, then the
+!>    minimal length of IWORK is computed and returned in
+!>    IWORK(1). See the description of LIWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LIWORK
+!>    \verbatim
+!>    LIWORK (input) INTEGER
+!>    The minimal length of the workspace vector IWORK.
+!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!>    Let M1=MIN(M,N), N1=N-1. Then
+!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!>    If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!>    If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!>    If on entry LIWORK = -1, then a worskpace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] INFO
+!>    \verbatim
+!>    INFO (output) INTEGER
+!>    -i < 0 :: On entry, the i-th argument had an
+!>              illegal value
+!>       = 0 :: Successful return.
+!>       = 1 :: Void input. Quick exit (M=0 or N=0).
+!>       = 2 :: The SVD computation of X did not converge.
+!>              Suggestion: Check the input data and/or
+!>              repeat with different WHTSVD.
+!>       = 3 :: The computation of the eigenvalues did not
+!>              converge.
+!>       = 4 :: If data scaling was requested on input and
+!>              the procedure found inconsistency in the data
+!>              such that for some column index i,
+!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!>              to zero if JOBS=='C'. The computation proceeds
+!>              with original or modified data and warning
+!>              flag is set with INFO=4.
+!>    \endverbatim
+!
+!  Authors:
+!  ========
+!
+!> \author Zlatko Drmac
+!
+!> \ingroup gedmd
+!
+!.............................................................
+!.............................................................
+SUBROUTINE SGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    &
+                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
+!
+!  -- LAPACK driver routine                                           --
+!
+!  -- LAPACK is a software package provided by University of          --
+!  -- Tennessee, University of California Berkeley, University of     --
+!  -- Colorado Denver and NAG Ltd..                                   --
+!
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32
+!
+!     Scalar arguments
+!     ~~~~~~~~~~~~~~~~
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LWORK,  LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K
+      REAL(KIND=WP), INTENT(IN)    ::   TOL
+!
+!     Array arguments
+!     ~~~~~~~~~~~~~~~~
+      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
+                                      Z(LDZ,*), B(LDB,*),  &
+                                      V(LDV,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
+                           MLWMQR, MLWORK, MLWQR,  MINMN,  &
+                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
+                           OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!
+!     Local array
+!     ~~~~~~~~~~~
+      REAL(KIND=WP) :: RDUMMY(2)
+!
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      SGEMM
+      EXTERNAL      SGEDMD, SGEQRF, SLACPY, SLASET, SORGQR, &
+                    SORMQR, XERBLA
+!
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT
+!..........................................................
+!
+!     Test the input arguments
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')
+      MINMN  = MIN(M,N)
+      INFO = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. LSAME(JOBS,'N')) )  THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    &
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+          INFO = -4
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -6
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    &
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -22
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -25
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -27
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -29
+      END IF
+!
+      IF ( WNTVEC .OR. WNTVCF ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for SGEQRF.
+         MLWORK = MIN(M,N) + MLWQR
+         IF ( LQUERY ) THEN
+             CALL SGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
+                          INFO1 )
+             OLWQR = INT(RDUMMY(1))
+             OLWORK = MIN(M,N) + OLWQR
+         END IF
+         CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,&
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     &
+                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     &
+                      V, LDV, S, LDS, WORK, -1, IWORK,       &
+                      LIWORK, INFO1 )
+         MLWDMD = INT(WORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         IMINWR = IWORK(1)
+         IF ( LQUERY ) THEN
+             OLWDMD = INT(WORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N)
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL SORMQR( 'L','N', M, N, MINMN, F, LDF,  &
+                            WORK, Z, LDZ, WORK, -1, INFO1 )
+               OLWMQR = INT(WORK(1))
+               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
+            END IF
+         END IF
+         IF ( WANTQ ) THEN
+            MLWGQR = N
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
+            IF ( LQUERY ) THEN
+                CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+                             WORK, -1, INFO1 )
+                OLWGQR = INT(WORK(1))
+                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
+            END IF
+         END IF
+         IMINWR = MAX( 1, IMINWR )
+         MLWORK = MAX( 2, MLWORK )
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
+      END IF
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'SGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF
+!.....
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place
+!     one can use an out of core QRF.
+!
+      CALL SGEQRF( M, N, F, LDF, WORK,         &
+                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
+!
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL SLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
+      CALL SLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL SLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL SLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
+                       Y(3,1), LDY )
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)
+      CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
+                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
+                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
+                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, IWORK,  &
+                   LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF
+!
+!     The Ritz vectors (Koopman modes) can be explicitly
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.
+        IF ( M > MINMN ) CALL SLASET( 'A', M-MINMN, K, ZERO, &
+                                     ZERO, Z(MINMN+1,1), LDZ )
+        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+                     LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      ELSE IF ( WNTVCF ) THEN
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and
+        !   the SVD/POD_basis returned by SGEDMD in X) and the
+        !   second factor (the eigenvectors of the Rayleigh
+        !   quotient) is in the array V, as returned by SGEDMD.
+        CALL SLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL SLASET( 'A', M-N, K, ZERO, ZERO,   &
+                                  Z(N+1,1), LDZ )
+        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      END IF
+!
+!     Some optional output variables:
+!
+!     The upper triangular factor in the initial QR
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to SGEDMDQ is to be
+!     followed by a streaming DMD that is implemented in a
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y
+         CALL SLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
+         CALL SLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
+      END IF
+!
+!     The orthonormal/orthogonal factor in the initial QR
+!     factorization is optionally returned in the array F.
+!     Same as with the triangular factor above, this is
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN  ! Q overwrites F
+         CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      END IF
+!
+      RETURN
+!
+      END SUBROUTINE SGEDMDQ
diff --git a/SRC/zgedmd.f90 b/SRC/zgedmd.f90
index 5045cb166c..385b82061f 100644
--- a/SRC/zgedmd.f90
+++ b/SRC/zgedmd.f90
@@ -1,1148 +1,1147 @@
-!> \brief \b ZGEDMD computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
-!
-!  =========== DOCUMENTATION ===========
-!
-!  Definition:
-!  ===========
-!
-!      SUBROUTINE ZGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
-!                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
-!                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
-!                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
-!                         RWORK, LRWORK, IWORK, LIWORK, INFO )
-!......
-!      USE                   iso_fortran_env
-!      IMPLICIT NONE
-!      INTEGER, PARAMETER :: WP = real64
-! 
-!......
-!      Scalar arguments
-!      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
-!      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
-!                                 NRNK, LDZ, LDB, LDW,  LDS, &
-!                                 LIWORK, LRWORK, LZWORK
-!      INTEGER,       INTENT(OUT)  :: K, INFO
-!      REAL(KIND=WP), INTENT(IN)   ::    TOL
-!      Array arguments
-!      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
-!      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
-!                                         W(LDW,*), S(LDS,*)
-!      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
-!      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
-!      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
-!      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
-!      INTEGER,          INTENT(OUT)   :: IWORK(*)
-!
-!............................................................
-!>    \par Purpose:
-!     =============
-!>    \verbatim
-!>    ZGEDMD computes the Dynamic Mode Decomposition (DMD) for
-!>    a pair of data snapshot matrices. For the input matrices
-!>    X and Y such that Y = A*X with an unaccessible matrix
-!>    A, ZGEDMD computes a certain number of Ritz pairs of A using
-!>    the standard Rayleigh-Ritz extraction from a subspace of
-!>    range(X) that is determined using the leading left singular
-!>    vectors of X. Optionally, ZGEDMD returns the residuals
-!>    of the computed Ritz pairs, the information needed for
-!>    a refinement of the Ritz vectors, or the eigenvectors of
-!>    the Exact DMD.
-!>    For further details see the references listed
-!>    below. For more details of the implementation see [3].
-!>    \endverbatim
-!............................................................
-!>    \par References:
-!     ================
-!>    \verbatim
-!>    [1] P. Schmid: Dynamic mode decomposition of numerical
-!>        and experimental data,
-!>        Journal of Fluid Mechanics 656, 5-28, 2010.
-!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
-!>        decompositions: analysis and enhancements,
-!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
-!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
-!>        Mode Decomposition I. Technical report. AIMDyn Inc.
-!>        and LAPACK Working Note 298.
-!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
-!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
-!>        Theory and Applications, Journal of Computational
-!>        Dynamics 1(2), 391 -421, 2014.
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Developed and coded by Zlatko Drmac, Faculty of Science,
-!>    University of Zagreb;  drmac@math.hr
-!>    In cooperation with
-!>    AIMdyn Inc., Santa Barbara, CA.
-!>    and supported by
-!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
-!>    for Nonstationary Processes from Near-Term, Limited
-!>    Observational Data" Contract No: W31P4Q-21-C-0007
-!>    - DARPA PAI project "Physics-Informed Machine Learning
-!>    Methodologies" Contract No: HR0011-18-9-0033
-!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
-!>    Framework for Space-Time Analysis of Process Dynamics"
-!>    Contract No: HR0011-16-C-0116
-!>    Any opinions, findings and conclusions or recommendations
-!>    expressed in this material are those of the author and
-!>    do not necessarily reflect the views of the DARPA SBIR
-!>    Program Office
-!>    \endverbatim
-!......................................................................
-!>    \par Distribution Statement A:
-!     ==============================
-!>    \verbatim
-!>    Approved for Public Release, Distribution Unlimited.
-!>    Cleared by DARPA on September 29, 2022
-!>    \endverbatim
-!............................................................
-!     Arguments
-!     =========
-!
-!>    \param[in] JOBS
-!>    \verbatim
-!>    JOBS (input) CHARACTER*1
-!>    Determines whether the initial data snapshots are scaled
-!>    by a diagonal matrix.
-!>    'S' :: The data snapshots matrices X and Y are multiplied
-!>           with a diagonal matrix D so that X*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'C' :: The snapshots are scaled as with the 'S' option.
-!>           If it is found that an i-th column of X is zero
-!>           vector and the corresponding i-th column of Y is
-!>           non-zero, then the i-th column of Y is set to
-!>           zero and a warning flag is raised.
-!>    'Y' :: The data snapshots matrices X and Y are multiplied
-!>           by a diagonal matrix D so that Y*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'N' :: No data scaling.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBZ
-!>    \verbatim
-!>    JOBZ (input) CHARACTER*1
-!>    Determines whether the eigenvectors (Koopman modes) will
-!>    be computed.
-!>    'V' :: The eigenvectors (Koopman modes) will be computed
-!>           and returned in the matrix Z.
-!>           See the description of Z.
-!>    'F' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product X(:,1:K)*W, where X
-!>           contains a POD basis (leading left singular vectors
-!>           of the data matrix X) and W contains the eigenvectors
-!>           of the corresponding Rayleigh quotient.
-!>           See the descriptions of K, X, W, Z.
-!>    'N' :: The eigenvectors are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBR
-!>    \verbatim
-!>    JOBR (input) CHARACTER*1
-!>    Determines whether to compute the residuals.
-!>    'R' :: The residuals for the computed eigenpairs will be
-!>           computed and stored in the array RES.
-!>           See the description of RES.
-!>           For this option to be legal, JOBZ must be 'V'.
-!>    'N' :: The residuals are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBF
-!>    \verbatim
-!>    JOBF (input) CHARACTER*1
-!>    Specifies whether to store information needed for post-
-!>    processing (e.g. computing refined Ritz vectors)
-!>    'R' :: The matrix needed for the refinement of the Ritz
-!>           vectors is computed and stored in the array B.
-!>           See the description of B.
-!>    'E' :: The unscaled eigenvectors of the Exact DMD are
-!>           computed and returned in the array B. See the
-!>           description of B.
-!>    'N' :: No eigenvector refinement data is computed.
-!>    \endverbatim
-!.....
-!>    \param[in] WHTSVD
-!>    \verbatim
-!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
-!>    Allows for a selection of the SVD algorithm from the
-!>    LAPACK library.
-!>    1 :: ZGESVD (the QR SVD algorithm)
-!>    2 :: ZGESDD (the Divide and Conquer algorithm; if enough
-!>         workspace available, this is the fastest option)
-!>    3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
-!>         are the most accurate options)
-!>    4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
-!>         are the most accurate options)
-!>    For the four methods above, a significant difference in
-!>    the accuracy of small singular values is possible if
-!>    the snapshots vary in norm so that X is severely
-!>    ill-conditioned. If small (smaller than EPS*||X||)
-!>    singular values are of interest and JOBS=='N',  then
-!>    the options (3, 4) give the most accurate results, where
-!>    the option 4 is slightly better and with stronger
-!>    theoretical background.
-!>    If JOBS=='S', i.e. the columns of X will be normalized,
-!>    then all methods give nearly equally accurate results.
-!>    \endverbatim
-!.....
-!>    \param[in] M
-!>    \verbatim
-!>    M (input) INTEGER, M>= 0
-!>    The state space dimension (the row dimension of X, Y).
-!>    \endverbatim
-!.....
-!>    \param[in] N
-!>    \verbatim
-!>    N (input) INTEGER, 0 <= N <= M
-!>    The number of data snapshot pairs
-!>    (the number of columns of X and Y).
-!>    \endverbatim
-!.....
-!>    \param[in] LDX
-!>    \verbatim
-!>    X (input/output) COMPLEX(KIND=WP) M-by-N array
-!>    > On entry, X contains the data snapshot matrix X. It is
-!>    assumed that the column norms of X are in the range of
-!>    the normalized floating point numbers.
-!>    < On exit, the leading K columns of X contain a POD basis,
-!>    i.e. the leading K left singular vectors of the input
-!>    data matrix X, U(:,1:K). All N columns of X contain all
-!>    left singular vectors of the input matrix X.
-!>    See the descriptions of K, Z and W.
-!.....
-!>    LDX (input) INTEGER, LDX >= M
-!>    The leading dimension of the array X.
-!>    \endverbatim
-!.....
-!>    \param[in,out] Y
-!>    \verbatim
-!>    Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
-!>    > On entry, Y contains the data snapshot matrix Y
-!>    < On exit,
-!>    If JOBR == 'R', the leading K columns of Y  contain
-!>    the residual vectors for the computed Ritz pairs.
-!>    See the description of RES.
-!>    If JOBR == 'N', Y contains the original input data,
-!>                    scaled according to the value of JOBS.
-!>    \endverbatim
-!.....
-!>    \param[in] LDY
-!>    \verbatim
-!>    LDY (input) INTEGER , LDY >= M
-!>    The leading dimension of the array Y.
-!>    \endverbatim
-!.....
-!>    \param[in] NRNK
-!>    \verbatim
-!>    NRNK (input) INTEGER
-!>    Determines the mode how to compute the numerical rank,
-!>    i.e. how to truncate small singular values of the input
-!>    matrix X. On input, if
-!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(1)
-!>                 This option is recommended.
-!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(i-1)
-!>                 This option is included for R&D purposes.
-!>                 It requires highly accurate SVD, which
-!>                 may not be feasible.
-!>    The numerical rank can be enforced by using positive
-!>    value of NRNK as follows:
-!>    0 < NRNK <= N :: at most NRNK largest singular values
-!>    will be used. If the number of the computed nonzero
-!>    singular values is less than NRNK, then only those
-!>    nonzero values will be used and the actually used
-!>    dimension is less than NRNK. The actual number of
-!>    the nonzero singular values is returned in the variable
-!>    K. See the descriptions of TOL and  K.
-!>    \endverbatim
-!.....
-!>    \param[in] TOL
-!>    \verbatim
-!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
-!>    The tolerance for truncating small singular values.
-!>    See the description of NRNK.
-!>    \endverbatim
-!.....
-!>    \param[out] K
-!>    \verbatim
-!>    K (output) INTEGER,  0 <= K <= N
-!>    The dimension of the POD basis for the data snapshot
-!>    matrix X and the number of the computed Ritz pairs.
-!>    The value of K is determined according to the rule set
-!>    by the parameters NRNK and TOL.
-!>    See the descriptions of NRNK and TOL.
-!>    \endverbatim
-!.....
-!>    \param[out] EIGS
-!>    \verbatim
-!>    EIGS (output) COMPLEX(KIND=WP) N-by-1 array
-!>    The leading K (K<=N) entries of EIGS contain
-!>    the computed eigenvalues (Ritz values).
-!>    See the descriptions of K, and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] Z
-!>    \verbatim
-!>    Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
-!>    If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
-!>    is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
-!>    If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
-!>    the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
-!>    is an eigenvector corresponding to EIGS(i). The columns
-!>    of W(1:k,1:K) are the computed eigenvectors of the
-!>    K-by-K Rayleigh quotient.
-!>    See the descriptions of EIGS, X and W.
-!>    \endverbatim
-!.....
-!>    \param[in] LDZ
-!>    \verbatim
-!>    LDZ (input) INTEGER , LDZ >= M
-!>    The leading dimension of the array Z.
-!>    \endverbatim
-!.....
-!>    \param[out] RES
-!>    \verbatim
-!>    RES (output) REAL(KIND=WP) N-by-1 array
-!>    RES(1:K) contains the residuals for the K computed
-!>    Ritz pairs,
-!>    RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
-!>    See the description of EIGS and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] B
-!>    \verbatim
-!>    B (output) COMPLEX(KIND=WP)  M-by-N array.
-!>    IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
-!>    be used for computing the refined vectors; see further
-!>    details in the provided references.
-!>    If JOBF == 'E', B(1:M,1:K) contains
-!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
-!>    Exact DMD, up to scaling by the inverse eigenvalues.
-!>    If JOBF =='N', then B is not referenced.
-!>    See the descriptions of X, W, K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDB
-!>    \verbatim
-!>    LDB (input) INTEGER, LDB >= M
-!>    The leading dimension of the array B.
-!>    \endverbatim
-!.....
-!>    \param[out] W
-!>    \verbatim
-!>    W (workspace/output) COMPLEX(KIND=WP) N-by-N array
-!>    On exit, W(1:K,1:K) contains the K computed
-!>    eigenvectors of the matrix Rayleigh quotient.
-!>    The Ritz vectors (returned in Z) are the
-!>    product of X (containing a POD basis for the input
-!>    matrix X) and W. See the descriptions of K, S, X and Z.
-!>    W is also used as a workspace to temporarily store the
-!>    right singular vectors of X.
-!>    \endverbatim
-!.....
-!>    \param[in] LDW
-!>    \verbatim
-!>    LDW (input) INTEGER, LDW >= N
-!>    The leading dimension of the array W.
-!>    \endverbatim
-!.....
-!>    \param[out] S
-!>    \verbatim
-!>    S (workspace/output) COMPLEX(KIND=WP) N-by-N array
-!>    The array S(1:K,1:K) is used for the matrix Rayleigh
-!>    quotient. This content is overwritten during
-!>    the eigenvalue decomposition by ZGEEV.
-!>    See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDS
-!>    \verbatim
-!>    LDS (input) INTEGER, LDS >= N
-!>    The leading dimension of the array S.
-!>    \endverbatim
-!.....
-!>    \param[out] ZWORK
-!>    \verbatim
-!>    ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
-!>    ZWORK is used as complex workspace in the complex SVD, as
-!>    specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing
-!>    the eigenvalues of a Rayleigh quotient.
-!>    If the call to ZGEDMD is only workspace query, then
-!>    ZWORK(1) contains the minimal complex workspace length and
-!>    ZWORK(2) is the optimal complex workspace length.
-!>    Hence, the length of work is at least 2.
-!>    See the description of LZWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LZWORK
-!>    \verbatim
-!>    LZWORK (input) INTEGER
-!>    The minimal length of the workspace vector ZWORK.
-!>    LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZGEEV),
-!>    where LZWORK_ZGEEV = MAX( 1, 2*N )  and the minimal
-!>    LZWORK_SVD is calculated as follows
-!>    If WHTSVD == 1 :: ZGESVD ::
-!>       LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
-!>    If WHTSVD == 2 :: ZGESDD ::
-!>       LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
-!>    If WHTSVD == 3 :: ZGESVDQ ::
-!>       LZWORK_SVD = obtainable by a query
-!>    If WHTSVD == 4 :: ZGEJSV ::
-!>       LZWORK_SVD = obtainable by a query
-!>    If on entry LZWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths and returns them in
-!>    LZWORK(1) and LZWORK(2), respectively.
-!>    \endverbatim
-!.....
-!>    \param[out] RWORK
-!>    \verbatim
-!>    RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
-!>    On exit, RWORK(1:N) contains the singular values of
-!>    X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
-!>    If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
-!>    scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
-!>    and Y to avoid overflow in the SVD of X.
-!>    This may be of interest if the scaling option is off
-!>    and as many as possible smallest eigenvalues are
-!>    desired to the highest feasible accuracy.
-!>    If the call to ZGEDMD is only workspace query, then
-!>    RWORK(1) contains the minimal workspace length.
-!>    See the description of LRWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LRWORK
-!>    \verbatim
-!>    LRWORK (input) INTEGER
-!>    The minimal length of the workspace vector RWORK.
-!>    LRWORK is calculated as follows:
-!>    LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZGEEV), where
-!>    LRWORK_ZGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
-!>    for the SVD subroutine determined by the input parameter
-!>    WHTSVD.
-!>    If WHTSVD == 1 :: ZGESVD ::
-!>       LRWORK_SVD = 5*MIN(M,N)
-!>    If WHTSVD == 2 :: ZGESDD ::
-!>       LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
-!>       2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
-!>    If WHTSVD == 3 :: ZGESVDQ ::
-!>       LRWORK_SVD = obtainable by a query
-!>    If WHTSVD == 4 :: ZGEJSV ::
-!>       LRWORK_SVD = obtainable by a query
-!>    If on entry LRWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    real workspace length and returns it in RWORK(1).
-!>    \endverbatim
-!.....
-!>    \param[out] IWORK
-!>    \verbatim
-!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
-!>    Workspace that is required only if WHTSVD equals
-!>    2 , 3 or 4. (See the description of WHTSVD).
-!>    If on entry LWORK =-1 or LIWORK=-1, then the
-!>    minimal length of IWORK is computed and returned in
-!>    IWORK(1). See the description of LIWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LIWORK
-!>    \verbatim
-!>    LIWORK (input) INTEGER
-!>    The minimal length of the workspace vector IWORK.
-!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
-!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
-!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
-!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
-!>    If on entry LIWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for  ZWORK, RWORK and
-!>    IWORK. See the descriptions of ZWORK, RWORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] INFO
-!>    \verbatim
-!>    INFO (output) INTEGER
-!>    -i < 0 :: On entry, the i-th argument had an
-!>              illegal value
-!>       = 0 :: Successful return.
-!>       = 1 :: Void input. Quick exit (M=0 or N=0).
-!>       = 2 :: The SVD computation of X did not converge.
-!>              Suggestion: Check the input data and/or
-!>              repeat with different WHTSVD.
-!>       = 3 :: The computation of the eigenvalues did not
-!>              converge.
-!>       = 4 :: If data scaling was requested on input and
-!>              the procedure found inconsistency in the data
-!>              such that for some column index i,
-!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
-!>              to zero if JOBS=='C'. The computation proceeds
-!>              with original or modified data and warning
-!>              flag is set with INFO=4.
-!>    \endverbatim
-!
-!  Authors:
-!  ========
-!
-!> \author Zlatko Drmac
-!
-!> \ingroup gedmd
-!
-!.............................................................
-!.............................................................
-      SUBROUTINE ZGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
-                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
-                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
-                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
-                         RWORK, LRWORK, IWORK, LIWORK, INFO )
-!
-!  -- LAPACK driver routine                                           --
-!
-!  -- LAPACK is a software package provided by University of          --
-!  -- Tennessee, University of California Berkeley, University of     --
-!  -- Colorado Denver and NAG Ltd..                                   --
-!
-!.....
-      USE                   iso_fortran_env
-      IMPLICIT NONE
-      INTEGER, PARAMETER :: WP = real64
-!
-!     Scalar arguments
-!     ~~~~~~~~~~~~~~~~
-      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
-      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
-                                 NRNK, LDZ, LDB, LDW,  LDS, &
-                                 LIWORK, LRWORK, LZWORK
-      INTEGER,       INTENT(OUT)  :: K, INFO
-      REAL(KIND=WP), INTENT(IN)   ::    TOL
-!
-!     Array arguments
-!     ~~~~~~~~~~~~~~~
-      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
-                                         W(LDW,*), S(LDS,*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
-      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
-      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
-      INTEGER,          INTENT(OUT)   :: IWORK(*)
-!
-!     Parameters
-!     ~~~~~~~~~~
-      REAL(KIND=WP),    PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP),    PARAMETER :: ZERO = 0.0_WP
-      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
-      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
-!
-!     Local scalars
-!     ~~~~~~~~~~~~~
-      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,    &
-                       SSUM,  XSCL1,  XSCL2
-      INTEGER       ::  i,  j,  IMINWR,  INFO1, INFO2,  &
-                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
-                        LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
-                        MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
-                        OLWORK, MLRWRK
-      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY,  &
-                        WNTEX, WNTREF, WNTRES, WNTVEC
-      CHARACTER     ::  JOBZL, T_OR_N
-      CHARACTER     ::  JSVOPT
-!
-!     Local arrays
-!     ~~~~~~~~~~~~
-      REAL(KIND=WP) :: RDUMMY(2)
-!
-!     External functions (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~
-      REAL(KIND=WP) ZLANGE, DLAMCH, DZNRM2
-      EXTERNAL      ZLANGE, DLAMCH, DZNRM2, IZAMAX
-      INTEGER                               IZAMAX
-      LOGICAL       DISNAN, LSAME
-      EXTERNAL      DISNAN, LSAME
-!
-!     External subroutines (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~~~~
-      EXTERNAL      ZAXPY,  ZGEMM,  ZDSCAL
-      EXTERNAL      ZGEEV,  ZGEJSV, ZGESDD, ZGESVD, ZGESVDQ, &
-                    ZLACPY, ZLASCL, ZLASSQ, XERBLA
-!
-!     Intrinsic functions
-!     ~~~~~~~~~~~~~~~~~~~
-      INTRINSIC     DBLE, INT, MAX, SQRT
-!............................................................
-!
-!    Test the input arguments
-!
-      WNTRES = LSAME(JOBR,'R')
-      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
-      SCCOLY = LSAME(JOBS,'Y')
-      WNTVEC = LSAME(JOBZ,'V')
-      WNTREF = LSAME(JOBF,'R')
-      WNTEX  = LSAME(JOBF,'E')
-      INFO   = 0
-      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
-                                  .OR. ( LRWORK == -1 ) )
-!
-      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
-                                  LSAME(JOBS,'N')) )   THEN
-          INFO = -1
-      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
-                              .OR. LSAME(JOBZ,'F')) )  THEN
-          INFO = -2
-      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
-                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
-          INFO = -3
-      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
-                LSAME(JOBF,'N') ) )                    THEN
-          INFO = -4
-      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
-                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
-          INFO = -5
-      ELSE IF ( M < 0 )   THEN
-          INFO = -6
-      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
-          INFO = -7
-      ELSE IF ( LDX < M ) THEN
-          INFO = -9
-      ELSE IF ( LDY < M ) THEN
-          INFO = -11
-      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
-                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
-          INFO = -12
-      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
-          INFO = -13
-      ELSE IF ( LDZ < M ) THEN
-          INFO = -17
-      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
-          INFO = -20
-      ELSE IF ( LDW < N ) THEN
-          INFO = -22
-      ELSE IF ( LDS < N ) THEN
-          INFO = -24
-      END IF
-!
-      IF ( INFO == 0 ) THEN
-          ! Compute the minimal and the optimal workspace
-          ! requirements. Simulate running the code and
-          ! determine minimal and optimal sizes of the
-          ! workspace at any moment of the run.
-         IF ( N == 0 ) THEN
-             ! Quick return. All output except K is void.
-             ! INFO=1 signals the void input.
-             ! In case of a workspace query, the default
-             ! minimal workspace lengths are returned.
-            IF ( LQUERY ) THEN
-                IWORK(1) = 1
-                RWORK(1) = 1
-                ZWORK(1) = 2
-                ZWORK(2) = 2
-            ELSE
-               K   =  0
-            END IF
-            INFO = 1
-            RETURN
-         END IF
-
-         IMINWR = 1
-         MLRWRK = MAX(1,N)
-         MLWORK = 2
-         OLWORK = 2
-         SELECT CASE ( WHTSVD )
-         CASE (1)
-             ! The following is specified as the minimal
-             ! length of WORK in the definition of ZGESVD:
-             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
-             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
-             MLWORK = MAX(MLWORK,MWRSVD)
-             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
-             IF ( LQUERY ) THEN
-                CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
-                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
-                LWRSVD = INT( ZWORK(1) )
-                OLWORK = MAX(OLWORK,LWRSVD)
-             END IF
-         CASE (2)
-             ! The following is specified as the minimal
-             ! length of WORK in the definition of ZGESDD:
-             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
-             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
-             ! In LAPACK 3.10.1 RWORK is defined differently.
-             ! Below we take max over the two versions.
-             ! IMINWR = 8*MIN(M,N)
-             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
-             MLWORK = MAX(MLWORK,MWRSDD)
-             IMINWR = 8*MIN(M,N)
-             MLRWRK = MAX( MLRWRK,  N +                    &
-                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
-                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
-                           2*MAX(M,N)*MIN(M,N)+            &
-                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
-             IF ( LQUERY ) THEN
-                CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B,LDB,&
-                     W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
-                LWRSDD = MAX( MWRSDD,INT( ZWORK(1) ))
-                ! Possible bug in ZGESDD optimal workspace size.
-                OLWORK = MAX(OLWORK,LWRSDD)
-             END IF
-         CASE (3)
-             CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
-                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
-                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
-             IMINWR = IWORK(1)
-             MWRSVQ = INT(ZWORK(2))
-             MLWORK = MAX(MLWORK,MWRSVQ)
-             MLRWRK  = MAX(MLRWRK,N + INT(RDUMMY(1)))
-             IF ( LQUERY ) THEN
-                LWRSVQ = INT(ZWORK(1))
-                OLWORK = MAX(OLWORK,LWRSVQ)
-             END IF
-         CASE (4)
-             JSVOPT = 'J'
-             CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
-                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
-                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
-             IMINWR = IWORK(1)
-             MWRSVJ = INT(ZWORK(2))
-             MLWORK = MAX(MLWORK,MWRSVJ)
-             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
-             IF ( LQUERY ) THEN
-                LWRSVJ = INT(ZWORK(1))
-                OLWORK = MAX(OLWORK,LWRSVJ)
-             END IF
-         END SELECT
-         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
-             JOBZL = 'V'
-         ELSE
-             JOBZL = 'N'
-         END IF
-         ! Workspace calculation to the ZGEEV call
-         MWRKEV = MAX( 1, 2*N )
-         MLWORK = MAX(MLWORK,MWRKEV)
-         MLRWRK = MAX(MLRWRK,N+2*N)
-         IF ( LQUERY ) THEN
-             CALL ZGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
-              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 )
-                LWRKEV = INT(ZWORK(1))
-                OLWORK = MAX( OLWORK, LWRKEV )
-         END IF
-!
-         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
-         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
-         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26
-
-      END IF
-!
-      IF( INFO /= 0 ) THEN
-         CALL XERBLA( 'ZGEDMD', -INFO )
-         RETURN
-      ELSE IF ( LQUERY ) THEN
-!     Return minimal and optimal workspace sizes
-          IWORK(1) = IMINWR
-          RWORK(1) = MLRWRK
-          ZWORK(1) = MLWORK
-          ZWORK(2) = OLWORK
-          RETURN
-      END IF
-!............................................................
-!
-      OFL   = DLAMCH('O')
-      SMALL = DLAMCH('S')
-      BADXY = .FALSE.
-!
-!     <1> Optional scaling of the snapshots (columns of X, Y)
-!     ==========================================================
-      IF ( SCCOLX ) THEN
-          ! The columns of X will be normalized.
-          ! To prevent overflows, the column norms of X are
-          ! carefully computed using ZLASSQ.
-          K = 0
-          DO i = 1, N
-            !WORK(i) = DZNRM2( M, X(1,i), 1 )
-            SSUM  = ONE
-            SCALE = ZERO
-            CALL ZLASSQ( M, X(1,i), 1, SCALE, SSUM )
-            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
-                K    =  0
-                INFO = -8
-                CALL XERBLA('ZGEDMD',-INFO)
-            END IF
-            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
-               ROOTSC = SQRT(SSUM)
-               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
-!                 Norm of X(:,i) overflows. First, X(:,i)
-!                 is scaled by
-!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
-!                 Next, the norm of X(:,i) is stored without
-!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
-!                 the minus sign indicating the 1/M factor.
-!                 Scaling is performed without overflow, and
-!                 underflow may occur in the smallest entries
-!                 of X(:,i). The relative backward and forward
-!                 errors are small in the ell_2 norm.
-                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
-                               M, 1, X(1,i), LDX, INFO2 )
-                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
-               ELSE
-!                 X(:,i) will be scaled to unit 2-norm
-                  RWORK(i) =   SCALE * ROOTSC
-                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
-                               X(1,i), LDX, INFO2 )      ! LAPACK CALL
-!                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)   ! INTRINSIC
-               END IF
-            ELSE
-               RWORK(i) = ZERO
-               K = K + 1
-            END IF
-          END DO
-          IF ( K == N ) THEN
-             ! All columns of X are zero. Return error code -8.
-             ! (the 8th input variable had an illegal value)
-             K = 0
-             INFO = -8
-             CALL XERBLA('ZGEDMD',-INFO)
-             RETURN
-          END IF
-          DO i = 1, N
-!           Now, apply the same scaling to the columns of Y.
-            IF ( RWORK(i) >  ZERO ) THEN
-                CALL ZDSCAL( M, ONE/RWORK(i), Y(1,i), 1 )  ! BLAS CALL
-!               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)       ! INTRINSIC
-            ELSE IF ( RWORK(i) < ZERO ) THEN
-                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
-                     ONE/DBLE(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
-            ELSE IF ( ABS(Y(IZAMAX(M, Y(1,i),1),i ))  &
-                                            /= ZERO ) THEN
-!               X(:,i) is zero vector. For consistency,
-!               Y(:,i) should also be zero. If Y(:,i) is not
-!               zero, then the data might be inconsistent or
-!               corrupted. If JOBS == 'C', Y(:,i) is set to
-!               zero and a warning flag is raised.
-!               The computation continues but the
-!               situation will be reported in the output.
-                BADXY = .TRUE.
-                IF ( LSAME(JOBS,'C')) &
-                CALL ZDSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
-            END IF
-          END DO
-      END IF
-  !
-      IF ( SCCOLY ) THEN
-          ! The columns of Y will be normalized.
-          ! To prevent overflows, the column norms of Y are
-          ! carefully computed using ZLASSQ.
-          DO i = 1, N
-            !RWORK(i) = DZNRM2( M, Y(1,i), 1 )
-            SSUM  = ONE
-            SCALE = ZERO
-            CALL ZLASSQ( M, Y(1,i), 1, SCALE, SSUM )
-            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
-                K    =  0
-                INFO = -10
-                CALL XERBLA('ZGEDMD',-INFO)
-            END IF
-            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
-               ROOTSC = SQRT(SSUM)
-               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
-!                 Norm of Y(:,i) overflows. First, Y(:,i)
-!                 is scaled by
-!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
-!                 Next, the norm of Y(:,i) is stored without
-!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
-!                 the minus sign indicating the 1/M factor.
-!                 Scaling is performed without overflow, and
-!                 underflow may occur in the smallest entries
-!                 of Y(:,i). The relative backward and forward
-!                 errors are small in the ell_2 norm.
-                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
-                               M, 1, Y(1,i), LDY, INFO2 )
-                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
-               ELSE
-!                 Y(:,i) will be scaled to unit 2-norm
-                  RWORK(i) =   SCALE * ROOTSC
-                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
-                               Y(1,i), LDY, INFO2 )             ! LAPACK CALL
-!                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
-               END IF
-            ELSE
-               RWORK(i) = ZERO
-            END IF
-         END DO
-         DO i = 1, N
-!           Now, apply the same scaling to the columns of X.
-            IF ( RWORK(i) >  ZERO ) THEN
-                CALL ZDSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL
-!               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
-            ELSE IF ( RWORK(i) < ZERO ) THEN
-                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
-                     ONE/DBLE(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
-            ELSE IF ( ABS(X(IZAMAX(M, X(1,i),1),i ))  &
-                                           /= ZERO ) THEN
-!               Y(:,i) is zero vector.  If X(:,i) is not
-!               zero, then a warning flag is raised.
-!               The computation continues but the
-!               situation will be reported in the output.
-                BADXY = .TRUE.
-            END IF
-         END DO
-       END IF
-!
-!     <2> SVD of the data snapshot matrix X.
-!     =====================================
-!     The left singular vectors are stored in the array X.
-!     The right singular vectors are in the array W.
-!     The array W will later on contain the eigenvectors
-!     of a Rayleigh quotient.
-      NUMRNK = N
-      SELECT CASE ( WHTSVD )
-         CASE (1)
-             CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
-                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
-             T_OR_N = 'C'
-         CASE (2)
-            CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
-                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
-            T_OR_N = 'C'
-         CASE (3)
-              CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
-                   X, LDX, RWORK, Z, LDZ, W, LDW, &
-                   NUMRNK, IWORK, LIWORK, ZWORK,     &
-                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
-              CALL ZLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
-         T_OR_N = 'C'
-         CASE (4)
-              CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
-                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
-                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
-              CALL ZLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
-              T_OR_N = 'N'
-              XSCL1 = RWORK(N+1)
-              XSCL2 = RWORK(N+2)
-              IF ( XSCL1 /=  XSCL2 ) THEN
-                 ! This is an exceptional situation. If the
-                 ! data matrices are not scaled and the
-                 ! largest singular value of X overflows.
-                 ! In that case ZGEJSV can return the SVD
-                 ! in scaled form. The scaling factor can be used
-                 ! to rescale the data (X and Y).
-                 CALL ZLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
-              END IF
-      END SELECT
-!
-      IF ( INFO1 > 0 ) THEN
-         ! The SVD selected subroutine did not converge.
-         ! Return with an error code.
-         INFO = 2
-         RETURN
-      END IF
-!
-      IF ( RWORK(1) == ZERO ) THEN
-          ! The largest computed singular value of (scaled)
-          ! X is zero. Return error code -8
-          ! (the 8th input variable had an illegal value).
-          K = 0
-          INFO = -8
-          CALL XERBLA('ZGEDMD',-INFO)
-          RETURN
-      END IF
-!
-      !<3> Determine the numerical rank of the data
-      !    snapshots matrix X. This depends on the
-      !    parameters NRNK and TOL.
-
-      SELECT CASE ( NRNK )
-          CASE ( -1 )
-               K = 1
-               DO i = 2, NUMRNK
-                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
-                      ( RWORK(i) <= SMALL ) ) EXIT
-                 K = K + 1
-               END DO
-          CASE ( -2 )
-               K = 1
-               DO i = 1, NUMRNK-1
-                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
-                      ( RWORK(i) <= SMALL ) ) EXIT
-                 K = K + 1
-               END DO
-          CASE DEFAULT
-               K = 1
-               DO i = 2, NRNK
-                  IF ( RWORK(i) <= SMALL ) EXIT
-                  K = K + 1
-               END DO
-          END SELECT
-      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
-      !   snapshot data in the input matrix X.
-
-      !<4> Compute the Rayleigh quotient S = U^H * A * U.
-      !    Depending on the requested outputs, the computation
-      !    is organized to compute additional auxiliary
-      !    matrices (for the residuals and refinements).
-      !
-      !    In all formulas below, we need V_k*Sigma_k^(-1)
-      !    where either V_k is in W(1:N,1:K), or V_k^H is in
-      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
-      IF ( LSAME(T_OR_N, 'N') ) THEN
-          DO i = 1, K
-           CALL ZDSCAL( N, ONE/RWORK(i), W(1,i), 1 )    ! BLAS CALL
-           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
-          END DO
-      ELSE
-          ! This non-unit stride access is due to the fact
-          ! that ZGESVD, ZGESVDQ and ZGESDD return the
-          ! adjoint matrix of the right singular vectors.
-          !DO i = 1, K
-          ! CALL ZDSCAL( N, ONE/RWORK(i), W(i,1), LDW )    ! BLAS CALL
-          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
-          !END DO
-          DO i = 1, K
-              RWORK(N+i) = ONE/RWORK(i)
-          END DO
-          DO j = 1, N
-             DO i = 1, K
-                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
-             END DO
-          END DO
-      END IF
-!
-      IF ( WNTREF ) THEN
-         !
-         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
-         ! for computing the refined Ritz vectors
-         ! (optionally, outside ZGEDMD).
-          CALL ZGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
-                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
-          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C'
-          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))                   ! INTRINSIC, for T_OR_N=='N'
-          !
-          ! At this point Z contains
-          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
-          ! this is needed for computing the residuals.
-          ! This matrix is  returned in the array B and
-          ! it can be used to compute refined Ritz vectors.
-          CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
-          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
-
-          CALL ZGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
-                      LDZ, ZZERO, S, LDS )                        ! BLAS CALL
-          ! S(1:K,1:K) = MATMUL(TRANSPOSE(CONJG(X(1:M,1:K))),Z(1:M,1:K)) ! INTRINSIC
-          ! At this point S = U^H * A * U is the Rayleigh quotient.
-      ELSE
-        ! A * U(:,1:K) is not explicitly needed and the
-        ! computation is organized differently. The Rayleigh
-        ! quotient is computed more efficiently.
-        CALL ZGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
-                   ZZERO, Z, LDZ )                                         ! BLAS CALL
-        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(CONJG(X(1:M,1:K))), Y(1:M,1:N) )  ! INTRINSIC
-        !
-        CALL ZGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
-                    LDW, ZZERO, S, LDS )                         ! BLAS CALL
-        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='T'
-        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))                 ! INTRINSIC, for T_OR_N=='N'
-        ! At this point S = U^H * A * U is the Rayleigh quotient.
-        ! If the residuals are requested, save scaled V_k into Z.
-        ! Recall that V_k or V_k^H is stored in W.
-        IF ( WNTRES .OR. WNTEX ) THEN
-          IF ( LSAME(T_OR_N, 'N') ) THEN
-              CALL ZLACPY( 'A', N, K, W, LDW, Z, LDZ )
-          ELSE
-              CALL ZLACPY( 'A', K, N, W, LDW, Z, LDZ )
-          END IF
-        END IF
-      END IF
-!
-      !<5> Compute the Ritz values and (if requested) the
-      !   right eigenvectors of the Rayleigh quotient.
-      !
-      CALL ZGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, LDW, &
-            W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
-      !
-      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
-      ! quotient.  See the description of Z.
-      ! Also, see the description of ZGEEV.
-      IF ( INFO1 > 0 ) THEN
-         ! ZGEEV failed to compute the eigenvalues and
-         ! eigenvectors of the Rayleigh quotient.
-         INFO = 3
-         RETURN
-      END IF
-!
-      ! <6> Compute the eigenvectors (if requested) and,
-      ! the residuals (if requested).
-      !
-      IF ( WNTVEC .OR. WNTEX ) THEN
-      IF ( WNTRES ) THEN
-          IF ( WNTREF ) THEN
-            ! Here, if the refinement is requested, we have
-            ! A*U(:,1:K) already computed and stored in Z.
-            ! For the residuals, need Y = A * U(:,1;K) * W.
-            CALL ZGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
-                       LDW, ZZERO, Y, LDY )               ! BLAS CALL
-            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)        ! INTRINSIC
-            ! This frees Z; Y contains A * U(:,1:K) * W.
-          ELSE
-            ! Compute S = V_k * Sigma_k^(-1) * W, where
-            ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
-            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
-                       W, LDW, ZZERO, S, LDS )
-            ! Then, compute Z = Y * S =
-            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
-            ! = A * U(:,1:K) * W(1:K,1:K)
-            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
-                       LDS, ZZERO, Z, LDZ )
-            ! Save a copy of Z into Y and free Z for holding
-            ! the Ritz vectors.
-            CALL ZLACPY( 'A', M, K, Z, LDZ, Y, LDY )
-            IF ( WNTEX ) CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
-          END IF
-      ELSE IF ( WNTEX ) THEN
-          ! Compute S = V_k * Sigma_k^(-1) * W, where
-            ! V_k * Sigma_k^(-1) is stored in Z
-            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
-                       W, LDW, ZZERO, S, LDS )
-            ! Then, compute Z = Y * S =
-            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
-            ! = A * U(:,1:K) * W(1:K,1:K)
-            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
-                       LDS, ZZERO, B, LDB )
-            ! The above call replaces the following two calls
-            ! that were used in the developing-testing phase.
-            ! CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
-            !           LDS, ZZERO, Z, LDZ)
-            ! Save a copy of Z into B and free Z for holding
-            ! the Ritz vectors.
-            ! CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
-      END IF
-!
-      ! Compute the Ritz vectors
-      IF ( WNTVEC ) CALL ZGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
-                   ZZERO, Z, LDZ )                          ! BLAS CALL
-      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
-!
-      IF ( WNTRES ) THEN
-         DO i = 1, K
-            CALL ZAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
-            ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
-            RES(i) = DZNRM2( M, Y(1,i), 1 )                       ! BLAS CALL
-         END DO
-      END IF
-      END IF
-!
-      IF ( WHTSVD == 4 ) THEN
-          RWORK(N+1) = XSCL1
-          RWORK(N+2) = XSCL2
-      END IF
-!
-!     Successful exit.
-      IF ( .NOT. BADXY ) THEN
-         INFO = 0
-      ELSE
-         ! A warning on possible data inconsistency.
-         ! This should be a rare event.
-         INFO = 4
-      END IF
-!............................................................
-      RETURN
-!     ......
-      END SUBROUTINE ZGEDMD
-
+!> \brief \b ZGEDMD computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
+!
+!  =========== DOCUMENTATION ===========
+!
+!  Definition:
+!  ===========
+!
+!      SUBROUTINE ZGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
+!                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
+!                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
+!                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
+!                         RWORK, LRWORK, IWORK, LIWORK, INFO )
+!......
+!      USE                   iso_fortran_env
+!      IMPLICIT NONE
+!      INTEGER, PARAMETER :: WP = real64
+!
+!......
+!      Scalar arguments
+!      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+!      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+!                                 NRNK, LDZ, LDB, LDW,  LDS, &
+!                                 LIWORK, LRWORK, LZWORK
+!      INTEGER,       INTENT(OUT)  :: K, INFO
+!      REAL(KIND=WP), INTENT(IN)   ::    TOL
+!      Array arguments
+!      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+!      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+!                                         W(LDW,*), S(LDS,*)
+!      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+!      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+!      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
+!      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
+!      INTEGER,          INTENT(OUT)   :: IWORK(*)
+!
+!............................................................
+!>    \par Purpose:
+!     =============
+!>    \verbatim
+!>    ZGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!>    a pair of data snapshot matrices. For the input matrices
+!>    X and Y such that Y = A*X with an unaccessible matrix
+!>    A, ZGEDMD computes a certain number of Ritz pairs of A using
+!>    the standard Rayleigh-Ritz extraction from a subspace of
+!>    range(X) that is determined using the leading left singular
+!>    vectors of X. Optionally, ZGEDMD returns the residuals
+!>    of the computed Ritz pairs, the information needed for
+!>    a refinement of the Ritz vectors, or the eigenvectors of
+!>    the Exact DMD.
+!>    For further details see the references listed
+!>    below. For more details of the implementation see [3].
+!>    \endverbatim
+!............................................................
+!>    \par References:
+!     ================
+!>    \verbatim
+!>    [1] P. Schmid: Dynamic mode decomposition of numerical
+!>        and experimental data,
+!>        Journal of Fluid Mechanics 656, 5-28, 2010.
+!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!>        decompositions: analysis and enhancements,
+!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!>        Mode Decomposition I. Technical report. AIMDyn Inc.
+!>        and LAPACK Working Note 298.
+!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!>        Theory and Applications, Journal of Computational
+!>        Dynamics 1(2), 391 -421, 2014.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Developed and coded by Zlatko Drmac, Faculty of Science,
+!>    University of Zagreb;  drmac@math.hr
+!>    In cooperation with
+!>    AIMdyn Inc., Santa Barbara, CA.
+!>    and supported by
+!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
+!>    for Nonstationary Processes from Near-Term, Limited
+!>    Observational Data" Contract No: W31P4Q-21-C-0007
+!>    - DARPA PAI project "Physics-Informed Machine Learning
+!>    Methodologies" Contract No: HR0011-18-9-0033
+!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!>    Framework for Space-Time Analysis of Process Dynamics"
+!>    Contract No: HR0011-16-C-0116
+!>    Any opinions, findings and conclusions or recommendations
+!>    expressed in this material are those of the author and
+!>    do not necessarily reflect the views of the DARPA SBIR
+!>    Program Office
+!>    \endverbatim
+!......................................................................
+!>    \par Distribution Statement A:
+!     ==============================
+!>    \verbatim
+!>    Approved for Public Release, Distribution Unlimited.
+!>    Cleared by DARPA on September 29, 2022
+!>    \endverbatim
+!............................................................
+!     Arguments
+!     =========
+!
+!>    \param[in] JOBS
+!>    \verbatim
+!>    JOBS (input) CHARACTER*1
+!>    Determines whether the initial data snapshots are scaled
+!>    by a diagonal matrix.
+!>    'S' :: The data snapshots matrices X and Y are multiplied
+!>           with a diagonal matrix D so that X*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'C' :: The snapshots are scaled as with the 'S' option.
+!>           If it is found that an i-th column of X is zero
+!>           vector and the corresponding i-th column of Y is
+!>           non-zero, then the i-th column of Y is set to
+!>           zero and a warning flag is raised.
+!>    'Y' :: The data snapshots matrices X and Y are multiplied
+!>           by a diagonal matrix D so that Y*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'N' :: No data scaling.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBZ
+!>    \verbatim
+!>    JOBZ (input) CHARACTER*1
+!>    Determines whether the eigenvectors (Koopman modes) will
+!>    be computed.
+!>    'V' :: The eigenvectors (Koopman modes) will be computed
+!>           and returned in the matrix Z.
+!>           See the description of Z.
+!>    'F' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product X(:,1:K)*W, where X
+!>           contains a POD basis (leading left singular vectors
+!>           of the data matrix X) and W contains the eigenvectors
+!>           of the corresponding Rayleigh quotient.
+!>           See the descriptions of K, X, W, Z.
+!>    'N' :: The eigenvectors are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBR
+!>    \verbatim
+!>    JOBR (input) CHARACTER*1
+!>    Determines whether to compute the residuals.
+!>    'R' :: The residuals for the computed eigenpairs will be
+!>           computed and stored in the array RES.
+!>           See the description of RES.
+!>           For this option to be legal, JOBZ must be 'V'.
+!>    'N' :: The residuals are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBF
+!>    \verbatim
+!>    JOBF (input) CHARACTER*1
+!>    Specifies whether to store information needed for post-
+!>    processing (e.g. computing refined Ritz vectors)
+!>    'R' :: The matrix needed for the refinement of the Ritz
+!>           vectors is computed and stored in the array B.
+!>           See the description of B.
+!>    'E' :: The unscaled eigenvectors of the Exact DMD are
+!>           computed and returned in the array B. See the
+!>           description of B.
+!>    'N' :: No eigenvector refinement data is computed.
+!>    \endverbatim
+!.....
+!>    \param[in] WHTSVD
+!>    \verbatim
+!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!>    Allows for a selection of the SVD algorithm from the
+!>    LAPACK library.
+!>    1 :: ZGESVD (the QR SVD algorithm)
+!>    2 :: ZGESDD (the Divide and Conquer algorithm; if enough
+!>         workspace available, this is the fastest option)
+!>    3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
+!>         are the most accurate options)
+!>    4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
+!>         are the most accurate options)
+!>    For the four methods above, a significant difference in
+!>    the accuracy of small singular values is possible if
+!>    the snapshots vary in norm so that X is severely
+!>    ill-conditioned. If small (smaller than EPS*||X||)
+!>    singular values are of interest and JOBS=='N',  then
+!>    the options (3, 4) give the most accurate results, where
+!>    the option 4 is slightly better and with stronger
+!>    theoretical background.
+!>    If JOBS=='S', i.e. the columns of X will be normalized,
+!>    then all methods give nearly equally accurate results.
+!>    \endverbatim
+!.....
+!>    \param[in] M
+!>    \verbatim
+!>    M (input) INTEGER, M>= 0
+!>    The state space dimension (the row dimension of X, Y).
+!>    \endverbatim
+!.....
+!>    \param[in] N
+!>    \verbatim
+!>    N (input) INTEGER, 0 <= N <= M
+!>    The number of data snapshot pairs
+!>    (the number of columns of X and Y).
+!>    \endverbatim
+!.....
+!>    \param[in] LDX
+!>    \verbatim
+!>    X (input/output) COMPLEX(KIND=WP) M-by-N array
+!>    > On entry, X contains the data snapshot matrix X. It is
+!>    assumed that the column norms of X are in the range of
+!>    the normalized floating point numbers.
+!>    < On exit, the leading K columns of X contain a POD basis,
+!>    i.e. the leading K left singular vectors of the input
+!>    data matrix X, U(:,1:K). All N columns of X contain all
+!>    left singular vectors of the input matrix X.
+!>    See the descriptions of K, Z and W.
+!.....
+!>    LDX (input) INTEGER, LDX >= M
+!>    The leading dimension of the array X.
+!>    \endverbatim
+!.....
+!>    \param[in,out] Y
+!>    \verbatim
+!>    Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
+!>    > On entry, Y contains the data snapshot matrix Y
+!>    < On exit,
+!>    If JOBR == 'R', the leading K columns of Y  contain
+!>    the residual vectors for the computed Ritz pairs.
+!>    See the description of RES.
+!>    If JOBR == 'N', Y contains the original input data,
+!>                    scaled according to the value of JOBS.
+!>    \endverbatim
+!.....
+!>    \param[in] LDY
+!>    \verbatim
+!>    LDY (input) INTEGER , LDY >= M
+!>    The leading dimension of the array Y.
+!>    \endverbatim
+!.....
+!>    \param[in] NRNK
+!>    \verbatim
+!>    NRNK (input) INTEGER
+!>    Determines the mode how to compute the numerical rank,
+!>    i.e. how to truncate small singular values of the input
+!>    matrix X. On input, if
+!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(1)
+!>                 This option is recommended.
+!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(i-1)
+!>                 This option is included for R&D purposes.
+!>                 It requires highly accurate SVD, which
+!>                 may not be feasible.
+!>    The numerical rank can be enforced by using positive
+!>    value of NRNK as follows:
+!>    0 < NRNK <= N :: at most NRNK largest singular values
+!>    will be used. If the number of the computed nonzero
+!>    singular values is less than NRNK, then only those
+!>    nonzero values will be used and the actually used
+!>    dimension is less than NRNK. The actual number of
+!>    the nonzero singular values is returned in the variable
+!>    K. See the descriptions of TOL and  K.
+!>    \endverbatim
+!.....
+!>    \param[in] TOL
+!>    \verbatim
+!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!>    The tolerance for truncating small singular values.
+!>    See the description of NRNK.
+!>    \endverbatim
+!.....
+!>    \param[out] K
+!>    \verbatim
+!>    K (output) INTEGER,  0 <= K <= N
+!>    The dimension of the POD basis for the data snapshot
+!>    matrix X and the number of the computed Ritz pairs.
+!>    The value of K is determined according to the rule set
+!>    by the parameters NRNK and TOL.
+!>    See the descriptions of NRNK and TOL.
+!>    \endverbatim
+!.....
+!>    \param[out] EIGS
+!>    \verbatim
+!>    EIGS (output) COMPLEX(KIND=WP) N-by-1 array
+!>    The leading K (K<=N) entries of EIGS contain
+!>    the computed eigenvalues (Ritz values).
+!>    See the descriptions of K, and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] Z
+!>    \verbatim
+!>    Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
+!>    If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
+!>    is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!>    If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!>    the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
+!>    is an eigenvector corresponding to EIGS(i). The columns
+!>    of W(1:k,1:K) are the computed eigenvectors of the
+!>    K-by-K Rayleigh quotient.
+!>    See the descriptions of EIGS, X and W.
+!>    \endverbatim
+!.....
+!>    \param[in] LDZ
+!>    \verbatim
+!>    LDZ (input) INTEGER , LDZ >= M
+!>    The leading dimension of the array Z.
+!>    \endverbatim
+!.....
+!>    \param[out] RES
+!>    \verbatim
+!>    RES (output) REAL(KIND=WP) N-by-1 array
+!>    RES(1:K) contains the residuals for the K computed
+!>    Ritz pairs,
+!>    RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!>    See the description of EIGS and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] B
+!>    \verbatim
+!>    B (output) COMPLEX(KIND=WP)  M-by-N array.
+!>    IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!>    be used for computing the refined vectors; see further
+!>    details in the provided references.
+!>    If JOBF == 'E', B(1:M,1:K) contains
+!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!>    Exact DMD, up to scaling by the inverse eigenvalues.
+!>    If JOBF =='N', then B is not referenced.
+!>    See the descriptions of X, W, K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDB
+!>    \verbatim
+!>    LDB (input) INTEGER, LDB >= M
+!>    The leading dimension of the array B.
+!>    \endverbatim
+!.....
+!>    \param[out] W
+!>    \verbatim
+!>    W (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!>    On exit, W(1:K,1:K) contains the K computed
+!>    eigenvectors of the matrix Rayleigh quotient.
+!>    The Ritz vectors (returned in Z) are the
+!>    product of X (containing a POD basis for the input
+!>    matrix X) and W. See the descriptions of K, S, X and Z.
+!>    W is also used as a workspace to temporarily store the
+!>    right singular vectors of X.
+!>    \endverbatim
+!.....
+!>    \param[in] LDW
+!>    \verbatim
+!>    LDW (input) INTEGER, LDW >= N
+!>    The leading dimension of the array W.
+!>    \endverbatim
+!.....
+!>    \param[out] S
+!>    \verbatim
+!>    S (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!>    The array S(1:K,1:K) is used for the matrix Rayleigh
+!>    quotient. This content is overwritten during
+!>    the eigenvalue decomposition by ZGEEV.
+!>    See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDS
+!>    \verbatim
+!>    LDS (input) INTEGER, LDS >= N
+!>    The leading dimension of the array S.
+!>    \endverbatim
+!.....
+!>    \param[out] ZWORK
+!>    \verbatim
+!>    ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
+!>    ZWORK is used as complex workspace in the complex SVD, as
+!>    specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing
+!>    the eigenvalues of a Rayleigh quotient.
+!>    If the call to ZGEDMD is only workspace query, then
+!>    ZWORK(1) contains the minimal complex workspace length and
+!>    ZWORK(2) is the optimal complex workspace length.
+!>    Hence, the length of work is at least 2.
+!>    See the description of LZWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LZWORK
+!>    \verbatim
+!>    LZWORK (input) INTEGER
+!>    The minimal length of the workspace vector ZWORK.
+!>    LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZGEEV),
+!>    where LZWORK_ZGEEV = MAX( 1, 2*N )  and the minimal
+!>    LZWORK_SVD is calculated as follows
+!>    If WHTSVD == 1 :: ZGESVD ::
+!>       LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+!>    If WHTSVD == 2 :: ZGESDD ::
+!>       LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+!>    If WHTSVD == 3 :: ZGESVDQ ::
+!>       LZWORK_SVD = obtainable by a query
+!>    If WHTSVD == 4 :: ZGEJSV ::
+!>       LZWORK_SVD = obtainable by a query
+!>    If on entry LZWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths and returns them in
+!>    LZWORK(1) and LZWORK(2), respectively.
+!>    \endverbatim
+!.....
+!>    \param[out] RWORK
+!>    \verbatim
+!>    RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
+!>    On exit, RWORK(1:N) contains the singular values of
+!>    X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!>    If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
+!>    scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
+!>    and Y to avoid overflow in the SVD of X.
+!>    This may be of interest if the scaling option is off
+!>    and as many as possible smallest eigenvalues are
+!>    desired to the highest feasible accuracy.
+!>    If the call to ZGEDMD is only workspace query, then
+!>    RWORK(1) contains the minimal workspace length.
+!>    See the description of LRWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LRWORK
+!>    \verbatim
+!>    LRWORK (input) INTEGER
+!>    The minimal length of the workspace vector RWORK.
+!>    LRWORK is calculated as follows:
+!>    LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZGEEV), where
+!>    LRWORK_ZGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
+!>    for the SVD subroutine determined by the input parameter
+!>    WHTSVD.
+!>    If WHTSVD == 1 :: ZGESVD ::
+!>       LRWORK_SVD = 5*MIN(M,N)
+!>    If WHTSVD == 2 :: ZGESDD ::
+!>       LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
+!>       2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+!>    If WHTSVD == 3 :: ZGESVDQ ::
+!>       LRWORK_SVD = obtainable by a query
+!>    If WHTSVD == 4 :: ZGEJSV ::
+!>       LRWORK_SVD = obtainable by a query
+!>    If on entry LRWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    real workspace length and returns it in RWORK(1).
+!>    \endverbatim
+!.....
+!>    \param[out] IWORK
+!>    \verbatim
+!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!>    Workspace that is required only if WHTSVD equals
+!>    2 , 3 or 4. (See the description of WHTSVD).
+!>    If on entry LWORK =-1 or LIWORK=-1, then the
+!>    minimal length of IWORK is computed and returned in
+!>    IWORK(1). See the description of LIWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LIWORK
+!>    \verbatim
+!>    LIWORK (input) INTEGER
+!>    The minimal length of the workspace vector IWORK.
+!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!>    If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!>    If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!>    If on entry LIWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for  ZWORK, RWORK and
+!>    IWORK. See the descriptions of ZWORK, RWORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] INFO
+!>    \verbatim
+!>    INFO (output) INTEGER
+!>    -i < 0 :: On entry, the i-th argument had an
+!>              illegal value
+!>       = 0 :: Successful return.
+!>       = 1 :: Void input. Quick exit (M=0 or N=0).
+!>       = 2 :: The SVD computation of X did not converge.
+!>              Suggestion: Check the input data and/or
+!>              repeat with different WHTSVD.
+!>       = 3 :: The computation of the eigenvalues did not
+!>              converge.
+!>       = 4 :: If data scaling was requested on input and
+!>              the procedure found inconsistency in the data
+!>              such that for some column index i,
+!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!>              to zero if JOBS=='C'. The computation proceeds
+!>              with original or modified data and warning
+!>              flag is set with INFO=4.
+!>    \endverbatim
+!
+!  Authors:
+!  ========
+!
+!> \author Zlatko Drmac
+!
+!> \ingroup gedmd
+!
+!.............................................................
+!.............................................................
+      SUBROUTINE ZGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
+                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
+                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
+                         RWORK, LRWORK, IWORK, LIWORK, INFO )
+!
+!  -- LAPACK driver routine                                           --
+!
+!  -- LAPACK is a software package provided by University of          --
+!  -- Tennessee, University of California Berkeley, University of     --
+!  -- Colorado Denver and NAG Ltd..                                   --
+!
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+!
+!     Scalar arguments
+!     ~~~~~~~~~~~~~~~~
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LIWORK, LRWORK, LZWORK
+      INTEGER,       INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)   ::    TOL
+!
+!     Array arguments
+!     ~~~~~~~~~~~~~~~
+      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                         W(LDW,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
+      INTEGER,          INTENT(OUT)   :: IWORK(*)
+!
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP),    PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP),    PARAMETER :: ZERO = 0.0_WP
+      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,    &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j,  IMINWR,  INFO1, INFO2,  &
+                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
+                        LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                        MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                        OLWORK, MLRWRK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY,  &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+!
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: RDUMMY(2)
+!
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) ZLANGE, DLAMCH, DZNRM2
+      EXTERNAL      ZLANGE, DLAMCH, DZNRM2, IZAMAX
+      INTEGER                               IZAMAX
+      LOGICAL       DISNAN, LSAME
+      EXTERNAL      DISNAN, LSAME
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      ZAXPY,  ZGEMM,  ZDSCAL
+      EXTERNAL      ZGEEV,  ZGEJSV, ZGESDD, ZGESVD, ZGESVDQ, &
+                    ZLACPY, ZLASCL, ZLASSQ, XERBLA
+!
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     DBLE, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
+                                  .OR. ( LRWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -17
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -20
+      ELSE IF ( LDW < N ) THEN
+          INFO = -22
+      ELSE IF ( LDS < N ) THEN
+          INFO = -24
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                RWORK(1) = 1
+                ZWORK(1) = 2
+                ZWORK(2) = 2
+            ELSE
+               K   =  0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+
+         IMINWR = 1
+         MLRWRK = MAX(1,N)
+         MLWORK = 2
+         OLWORK = 2
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of ZGESVD:
+             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MLWORK = MAX(MLWORK,MWRSVD)
+             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
+             IF ( LQUERY ) THEN
+                CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
+                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
+                LWRSVD = INT( ZWORK(1) )
+                OLWORK = MAX(OLWORK,LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of ZGESDD:
+             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
+             ! In LAPACK 3.10.1 RWORK is defined differently.
+             ! Below we take max over the two versions.
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+             MLWORK = MAX(MLWORK,MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             MLRWRK = MAX( MLRWRK,  N +                    &
+                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
+                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
+                           2*MAX(M,N)*MIN(M,N)+            &
+                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+             IF ( LQUERY ) THEN
+                CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B,LDB,&
+                     W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD,INT( ZWORK(1) ))
+                ! Possible bug in ZGESDD optimal workspace size.
+                OLWORK = MAX(OLWORK,LWRSDD)
+             END IF
+         CASE (3)
+             CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
+                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVQ)
+             MLRWRK  = MAX(MLRWRK,N + INT(RDUMMY(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVQ)
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
+                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVJ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVJ)
+             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
+             IF ( LQUERY ) THEN
+                LWRSVJ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the ZGEEV call
+         MWRKEV = MAX( 1, 2*N )
+         MLWORK = MAX(MLWORK,MWRKEV)
+         MLRWRK = MAX(MLRWRK,N+2*N)
+         IF ( LQUERY ) THEN
+             CALL ZGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
+              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 )
+                LWRKEV = INT(ZWORK(1))
+                OLWORK = MAX( OLWORK, LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
+         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26
+
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'ZGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          RWORK(1) = MLRWRK
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = DLAMCH('O')
+      SMALL = DLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using ZLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DZNRM2( M, X(1,i), 1 )
+            SSUM  = ONE
+            SCALE = ZERO
+            CALL ZLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('ZGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), LDX, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               X(1,i), LDX, INFO2 )      ! LAPACK CALL
+!                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)   ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+             ! All columns of X are zero. Return error code -8.
+             ! (the 8th input variable had an illegal value)
+             K = 0
+             INFO = -8
+             CALL XERBLA('ZGEDMD',-INFO)
+             RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL ZDSCAL( M, ONE/RWORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)       ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/DBLE(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(Y(IZAMAX(M, Y(1,i),1),i ))  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL ZDSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using ZLASSQ.
+          DO i = 1, N
+            !RWORK(i) = DZNRM2( M, Y(1,i), 1 )
+            SSUM  = ONE
+            SCALE = ZERO
+            CALL ZLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('ZGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), LDY, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 Y(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               Y(1,i), LDY, INFO2 )             ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL ZDSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL
+!               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/DBLE(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(X(IZAMAX(M, X(1,i),1),i ))  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
+                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
+             T_OR_N = 'C'
+         CASE (2)
+            CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
+                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'C'
+         CASE (3)
+              CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, ZWORK,     &
+                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
+              CALL ZLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'C'
+         CASE (4)
+              CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL ZLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = RWORK(N+1)
+              XSCL2 = RWORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case ZGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL ZLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( RWORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('ZGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( RWORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^H * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^H is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL ZDSCAL( N, ONE/RWORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that ZGESVD, ZGESVDQ and ZGESDD return the
+          ! adjoint matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL ZDSCAL( N, ONE/RWORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              RWORK(N+i) = ONE/RWORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside ZGEDMD).
+          CALL ZGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
+                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))                   ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL ZGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
+                      LDZ, ZZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TRANSPOSE(CONJG(X(1:M,1:K))),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^H * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL ZGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
+                   ZZERO, Z, LDZ )                                         ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(CONJG(X(1:M,1:K))), Y(1:M,1:N) )  ! INTRINSIC
+        !
+        CALL ZGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
+                    LDW, ZZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))                 ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^H * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^H is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL ZLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL ZLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL ZGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, LDW, &
+            W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient.  See the description of Z.
+      ! Also, see the description of ZGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! ZGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL ZGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
+                       LDW, ZZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)        ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
+            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                       W, LDW, ZZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                       LDS, ZZERO, Z, LDZ )
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL ZLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                       W, LDW, ZZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                       LDS, ZZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+            !           LDS, ZZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the Ritz vectors
+      IF ( WNTVEC ) CALL ZGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
+                   ZZERO, Z, LDZ )                          ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         DO i = 1, K
+            CALL ZAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+            ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
+            RES(i) = DZNRM2( M, Y(1,i), 1 )                       ! BLAS CALL
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          RWORK(N+1) = XSCL1
+          RWORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE ZGEDMD
diff --git a/SRC/zgedmdq.f90 b/SRC/zgedmdq.f90
index 213caf8550..606c5666e7 100644
--- a/SRC/zgedmdq.f90
+++ b/SRC/zgedmdq.f90
@@ -1,852 +1,851 @@
-!> \brief \b ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
-!
-!  =========== DOCUMENTATION ===========
-!
-!  Definition:
-!  ===========
-!
-!     SUBROUTINE ZGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
-!                         WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
-!                         LDY,   NRNK,  TOL,   K,  EIGS,         &
-!                         Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
-!                         S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
-!                         IWORK, LIWORK, INFO )
-!.....
-!     USE                   iso_fortran_env
-!     IMPLICIT NONE
-!     INTEGER, PARAMETER :: WP = real64
-!.....      
-!     Scalar arguments       
-!     CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
-!                               JOBT, JOBF
-!     INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
-!                               LDY, NRNK, LDZ, LDB, LDV,  &
-!                               LDS, LZWORK,  LWORK, LIWORK
-!     INTEGER,   INTENT(OUT) :: INFO,   K      
-!     REAL(KIND=WP), INTENT(IN)    ::   TOL     
-!     Array arguments      
-!     COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
-!                                        Z(LDZ,*), B(LDB,*), &
-!                                        V(LDV,*), S(LDS,*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
-!     COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
-!     REAL(KIND=WP), INTENT(OUT)   :: RES(*)
-!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
-!     INTEGER,       INTENT(OUT)   :: IWORK(*)
-!............................................................
-!>    \par Purpose:
-!     =============
-!>    \verbatim
-!>    ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
-!>    a pair of data snapshot matrices, using a QR factorization
-!>    based compression of the data. For the input matrices
-!>    X and Y such that Y = A*X with an unaccessible matrix
-!>    A, ZGEDMDQ computes a certain number of Ritz pairs of A using
-!>    the standard Rayleigh-Ritz extraction from a subspace of
-!>    range(X) that is determined using the leading left singular 
-!>    vectors of X. Optionally, ZGEDMDQ returns the residuals 
-!>    of the computed Ritz pairs, the information needed for
-!>    a refinement of the Ritz vectors, or the eigenvectors of
-!>    the Exact DMD.
-!>    For further details see the references listed
-!>    below. For more details of the implementation see [3].      
-!>    \endverbatim
-!............................................................
-!>    \par References:
-!     ================
-!>    \verbatim
-!>    [1] P. Schmid: Dynamic mode decomposition of numerical
-!>        and experimental data,
-!>        Journal of Fluid Mechanics 656, 5-28, 2010.
-!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
-!>        decompositions: analysis and enhancements,
-!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
-!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
-!>        Mode Decomposition I. Technical report. AIMDyn Inc.
-!>        and LAPACK Working Note 298.      
-!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
-!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
-!>        Theory and Applications, Journal of Computational
-!>        Dynamics 1(2), 391 -421, 2014.
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Developed and coded by Zlatko Drmac, Faculty of Science,
-!>    University of Zagreb;  drmac@math.hr
-!>    In cooperation with
-!>    AIMdyn Inc., Santa Barbara, CA.
-!>    and supported by
-!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
-!>    for Nonstationary Processes from Near-Term, Limited
-!>    Observational Data" Contract No: W31P4Q-21-C-0007
-!>    - DARPA PAI project "Physics-Informed Machine Learning
-!>    Methodologies" Contract No: HR0011-18-9-0033
-!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
-!>    Framework for Space-Time Analysis of Process Dynamics"
-!>    Contract No: HR0011-16-C-0116
-!>    Any opinions, findings and conclusions or recommendations 
-!>    expressed in this material are those of the author and 
-!>    do not necessarily reflect the views of the DARPA SBIR 
-!>    Program Office.      
-!>    \endverbatim
-!......................................................................
-!>    \par Developed and supported by:
-!     ================================
-!>    \verbatim
-!>    Distribution Statement A: 
-!>    Approved for Public Release, Distribution Unlimited.
-!>    Cleared by DARPA on September 29, 2022
-!>    \endverbatim
-!============================================================      
-!     Arguments
-!     =========
-!
-!>    \param[in] JOBS
-!>    \verbatim
-!>    JOBS (input) CHARACTER*1
-!>    Determines whether the initial data snapshots are scaled
-!>    by a diagonal matrix. The data snapshots are the columns
-!>    of F. The leading N-1 columns of F are denoted X and the
-!>    trailing N-1 columns are denoted Y. 
-!>    'S' :: The data snapshots matrices X and Y are multiplied
-!>           with a diagonal matrix D so that X*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)
-!>    'C' :: The snapshots are scaled as with the 'S' option.
-!>           If it is found that an i-th column of X is zero
-!>           vector and the corresponding i-th column of Y is
-!>           non-zero, then the i-th column of Y is set to
-!>           zero and a warning flag is raised.
-!>    'Y' :: The data snapshots matrices X and Y are multiplied
-!>           by a diagonal matrix D so that Y*D has unit
-!>           nonzero columns (in the Euclidean 2-norm)    
-!>    'N' :: No data scaling.   
-!>    \endverbatim
-!.....
-!>    \param[in] JOBZ
-!>    \verbatim
-!>    JOBZ (input) CHARACTER*1
-!>    Determines whether the eigenvectors (Koopman modes) will
-!>    be computed.
-!>    'V' :: The eigenvectors (Koopman modes) will be computed
-!>           and returned in the matrix Z.
-!>           See the description of Z.
-!>    'F' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product Z*V, where Z
-!>           is orthonormal and V contains the eigenvectors
-!>           of the corresponding Rayleigh quotient.
-!>           See the descriptions of F, V, Z.
-!>    'Q' :: The eigenvectors (Koopman modes) will be returned
-!>           in factored form as the product Q*Z, where Z
-!>           contains the eigenvectors of the compression of the
-!>           underlying discretized operator onto the span of
-!>           the data snapshots. See the descriptions of F, V, Z.
-!>           Q is from the initial QR factorization.  
-!>    'N' :: The eigenvectors are not computed.  
-!>    \endverbatim
-!.....
-!>    \param[in] JOBR
-!>    \verbatim    
-!>    JOBR (input) CHARACTER*1 
-!>    Determines whether to compute the residuals.
-!>    'R' :: The residuals for the computed eigenpairs will
-!>           be computed and stored in the array RES.
-!>           See the description of RES.
-!>           For this option to be legal, JOBZ must be 'V'.
-!>    'N' :: The residuals are not computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBQ
-!>    \verbatim
-!>    JOBQ (input) CHARACTER*1 
-!>    Specifies whether to explicitly compute and return the
-!>    unitary matrix from the QR factorization.
-!>    'Q' :: The matrix Q of the QR factorization of the data
-!>           snapshot matrix is computed and stored in the
-!>           array F. See the description of F.       
-!>    'N' :: The matrix Q is not explicitly computed.
-!>    \endverbatim
-!.....
-!>    \param[in] JOBT
-!>    \verbatim
-!>    JOBT (input) CHARACTER*1 
-!>    Specifies whether to return the upper triangular factor
-!>    from the QR factorization.
-!>    'R' :: The matrix R of the QR factorization of the data 
-!>           snapshot matrix F is returned in the array Y.
-!>           See the description of Y and Further details.       
-!>    'N' :: The matrix R is not returned. 
-!>    \endverbatim
-!.....
-!>    \param[in] JOBF
-!>    \verbatim
-!>    JOBF (input) CHARACTER*1
-!>    Specifies whether to store information needed for post-
-!>    processing (e.g. computing refined Ritz vectors)
-!>    'R' :: The matrix needed for the refinement of the Ritz
-!>           vectors is computed and stored in the array B.
-!>           See the description of B.
-!>    'E' :: The unscaled eigenvectors of the Exact DMD are 
-!>           computed and returned in the array B. See the
-!>           description of B.
-!>    'N' :: No eigenvector refinement data is computed.   
-!>    To be useful on exit, this option needs JOBQ='Q'.    
-!>    \endverbatim
-!.....
-!>    \param[in] WHTSVD
-!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
-!>    Allows for a selection of the SVD algorithm from the
-!>    LAPACK library.
-!>    1 :: ZGESVD (the QR SVD algorithm)
-!>    2 :: ZGESDD (the Divide and Conquer algorithm; if enough
-!>         workspace available, this is the fastest option)
-!>    3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
-!>         are the most accurate options)
-!>    4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
-!>         are the most accurate options)
-!>    For the four methods above, a significant difference in
-!>    the accuracy of small singular values is possible if
-!>    the snapshots vary in norm so that X is severely
-!>    ill-conditioned. If small (smaller than EPS*||X||)
-!>    singular values are of interest and JOBS=='N',  then
-!>    the options (3, 4) give the most accurate results, where
-!>    the option 4 is slightly better and with stronger 
-!>    theoretical background.
-!>    If JOBS=='S', i.e. the columns of X will be normalized,
-!>    then all methods give nearly equally accurate results.
-!>    \endverbatim
-!.....
-!>    \param[in] M
-!>    \verbatim
-!>    M (input) INTEGER, M >= 0 
-!>    The state space dimension (the number of rows of F).
-!>    \endverbatim
-!.....
-!>    \param[in] N
-!>    \verbatim     
-!>    N (input) INTEGER, 0 <= N <= M
-!>    The number of data snapshots from a single trajectory,
-!>    taken at equidistant discrete times. This is the 
-!>    number of columns of F.
-!>    \endverbatim
-!.....
-!>    \param[in,out] F
-!>    \verbatim
-!>    F (input/output) COMPLEX(KIND=WP) M-by-N array
-!>    > On entry,
-!>    the columns of F are the sequence of data snapshots 
-!>    from a single trajectory, taken at equidistant discrete
-!>    times. It is assumed that the column norms of F are 
-!>    in the range of the normalized floating point numbers. 
-!>    < On exit,
-!>    If JOBQ == 'Q', the array F contains the orthogonal 
-!>    matrix/factor of the QR factorization of the initial 
-!>    data snapshots matrix F. See the description of JOBQ. 
-!>    If JOBQ == 'N', the entries in F strictly below the main
-!>    diagonal contain, column-wise, the information on the 
-!>    Householder vectors, as returned by ZGEQRF. The 
-!>    remaining information to restore the orthogonal matrix
-!>    of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). 
-!>    See the description of ZWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LDF
-!>    \verbatim
-!>    LDF (input) INTEGER, LDF >= M 
-!>    The leading dimension of the array F.
-!>    \endverbatim
-!.....
-!>    \param[in,out] X
-!>    \verbatim
-!>    X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
-!>    X is used as workspace to hold representations of the
-!>    leading N-1 snapshots in the orthonormal basis computed
-!>    in the QR factorization of F.
-!>    On exit, the leading K columns of X contain the leading
-!>    K left singular vectors of the above described content
-!>    of X. To lift them to the space of the left singular
-!>    vectors U(:,1:K) of the input data, pre-multiply with the 
-!>    Q factor from the initial QR factorization. 
-!>    See the descriptions of F, K, V  and Z.
-!>    \endverbatim
-!.....
-!>    \param[in] LDX
-!>    \verbatim      
-!>    LDX (input) INTEGER, LDX >= N  
-!>    The leading dimension of the array X. 
-!>    \endverbatim
-!.....
-!>    \param[in,out] Y
-!>    \verbatim      
-!>    Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
-!>    Y is used as workspace to hold representations of the
-!>    trailing N-1 snapshots in the orthonormal basis computed
-!>    in the QR factorization of F.
-!>    On exit, 
-!>    If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
-!>    triangular factor from the QR factorization of the data
-!>    snapshot matrix F.
-!>    \endverbatim
-!.....
-!>    \param[in] LDY
-!>    \verbatim      
-!>    LDY (input) INTEGER , LDY >= N
-!>    The leading dimension of the array Y.   
-!>    \endverbatim
-!.....
-!>    \param[in] NRNK
-!>    \verbatim
-!>    NRNK (input) INTEGER
-!>    Determines the mode how to compute the numerical rank,
-!>    i.e. how to truncate small singular values of the input
-!>    matrix X. On input, if
-!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(1)
-!>                 This option is recommended.  
-!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
-!>                 if sigma(i) <= TOL*sigma(i-1)
-!>                 This option is included for R&D purposes.
-!>                 It requires highly accurate SVD, which
-!>                 may not be feasible.      
-!>    The numerical rank can be enforced by using positive 
-!>    value of NRNK as follows: 
-!>    0 < NRNK <= N-1 :: at most NRNK largest singular values
-!>    will be used. If the number of the computed nonzero
-!>    singular values is less than NRNK, then only those
-!>    nonzero values will be used and the actually used
-!>    dimension is less than NRNK. The actual number of
-!>    the nonzero singular values is returned in the variable
-!>    K. See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] TOL
-!>    \verbatim
-!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
-!>    The tolerance for truncating small singular values.
-!>    See the description of NRNK.  
-!>    \endverbatim
-!.....
-!>    \param[out] K
-!>    \verbatim
-!>    K (output) INTEGER,  0 <= K <= N 
-!>    The dimension of the SVD/POD basis for the leading N-1
-!>    data snapshots (columns of F) and the number of the 
-!>    computed Ritz pairs. The value of K is determined
-!>    according to the rule set by the parameters NRNK and 
-!>    TOL. See the descriptions of NRNK and TOL. 
-!>    \endverbatim
-!.....
-!>    \param[out] EIGS
-!>    \verbatim
-!>    EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
-!>    The leading K (K<=N-1) entries of EIGS contain
-!>    the computed eigenvalues (Ritz values).
-!>    See the descriptions of K, and Z.
-!>    \endverbatim
-!.....
-!>    \param[out] Z
-!>    \verbatim
-!>    Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
-!>    If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
-!>    is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
-!>    If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
-!>    Z*V, where Z contains orthonormal matrix (the product of
-!>    Q from the initial QR factorization and the SVD/POD_basis
-!>    returned by ZGEDMD in X) and the second factor (the 
-!>    eigenvectors of the Rayleigh quotient) is in the array V, 
-!>    as returned by ZGEDMD. That is,  X(:,1:K)*V(:,i)
-!>    is an eigenvector corresponding to EIGS(i). The columns 
-!>    of V(1:K,1:K) are the computed eigenvectors of the 
-!>    K-by-K Rayleigh quotient.  
-!>    See the descriptions of EIGS, X and V.      
-!>    \endverbatim
-!.....
-!>    \param[in] LDZ
-!>    \verbatim
-!>    LDZ (input) INTEGER , LDZ >= M
-!>    The leading dimension of the array Z.
-!>    \endverbatim
-!.....
-!>    \param[out] RES
-!>    \verbatim
-!>    RES (output) REAL(KIND=WP) (N-1)-by-1 array
-!>    RES(1:K) contains the residuals for the K computed 
-!>    Ritz pairs, 
-!>    RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
-!>    See the description of EIGS and Z.      
-!>    \endverbatim
-!.....
-!>    \param[out] B
-!>    \verbatim
-!>    B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
-!>    IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
-!>    be used for computing the refined vectors; see further 
-!>    details in the provided references. 
-!>    If JOBF == 'E', B(1:N,1;K) contains 
-!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
-!>    Exact DMD, up to scaling by the inverse eigenvalues.   
-!>    In both cases, the content of B can be lifted to the 
-!>    original dimension of the input data by pre-multiplying
-!>    with the Q factor from the initial QR factorization.   
-!>    Here A denotes a compression of the underlying operator.      
-!>    See the descriptions of F and X.
-!>    If JOBF =='N', then B is not referenced.
-!>    \endverbatim
-!.....
-!>    \param[in] LDB
-!>    \verbatim
-!>    LDB (input) INTEGER, LDB >= MIN(M,N)
-!>    The leading dimension of the array B.
-!>    \endverbatim
-!.....
-!>    \param[out] V
-!>    \verbatim
-!>    V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
-!>    On exit, V(1:K,1:K) V contains the K eigenvectors of
-!>    the Rayleigh quotient. The Ritz vectors
-!>    (returned in Z) are the product of Q from the initial QR
-!>    factorization (see the description of F) X (see the 
-!>    description of X) and V.
-!>    \endverbatim
-!.....
-!>    \param[in] LDV
-!>    \verbatim
-!>    LDV (input) INTEGER, LDV >= N-1
-!>    The leading dimension of the array V.
-!>    \endverbatim
-!.....
-!>    \param[out] S
-!>    \verbatim
-!>    S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
-!>    The array S(1:K,1:K) is used for the matrix Rayleigh
-!>    quotient. This content is overwritten during
-!>    the eigenvalue decomposition by ZGEEV.
-!>    See the description of K.
-!>    \endverbatim
-!.....
-!>    \param[in] LDS
-!>    \verbatim
-!>    LDS (input) INTEGER, LDS >= N-1        
-!>    The leading dimension of the array S.
-!>    \endverbatim
-!.....
-!>    \param[out] LZWORK
-!>    \verbatim
-!>    ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
-!>    On exit, 
-!>    ZWORK(1:MIN(M,N)) contains the scalar factors of the 
-!>    elementary reflectors as returned by ZGEQRF of the 
-!>    M-by-N input matrix F.   
-!>    If the call to ZGEDMDQ is only workspace query, then
-!>    ZWORK(1) contains the minimal complex workspace length and
-!>    ZWORK(2) is the optimal complex workspace length. 
-!>    Hence, the length of work is at least 2.
-!>    See the description of LZWORK.      
-!>    \endverbatim
-!.....
-!>    \param[in] LZWORK
-!>    \verbatim
-!>    LZWORK (input) INTEGER
-!>    The minimal length of the  workspace vector ZWORK.
-!>    LZWORK is calculated as follows:
-!>    Let MLWQR  = N (minimal workspace for ZGEQRF[M,N])
-!>        MLWDMD = minimal workspace for ZGEDMD (see the
-!>                 description of LWORK in ZGEDMD)
-!>        MLWMQR = N (minimal workspace for 
-!>                   ZUNMQR['L','N',M,N,N])
-!>        MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
-!>        MINMN  = MIN(M,N)      
-!>    Then
-!>    LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
-!>    is further updated as follows:
-!>       if   JOBZ == 'V' or JOBZ == 'F' THEN 
-!>            LZWORK = MAX(LZWORK, MINMN+MLWMQR)
-!>       if   JOBQ == 'Q' THEN
-!>            LZWORK = MAX(ZLWORK, MINMN+MLWGQR)      
-!>    \endverbatim
-!.....
-!>    \param[out] WORK
-!>    \verbatim
-!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
-!>    On exit,
-!>    WORK(1:N-1) contains the singular values of 
-!>    the input submatrix F(1:M,1:N-1).
-!>    If the call to ZGEDMDQ is only workspace query, then
-!>    WORK(1) contains the minimal workspace length and
-!>    WORK(2) is the optimal workspace length. hence, the
-!>    length of work is at least 2.
-!>    See the description of LWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LWORK
-!>    \verbatim
-!>    LWORK (input) INTEGER
-!>    The minimal length of the  workspace vector WORK.
-!>    LWORK is the same as in ZGEDMD, because in ZGEDMDQ
-!>    only ZGEDMD requires real workspace for snapshots
-!>    of dimensions MIN(M,N)-by-(N-1). 
-!>    If on entry LWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace length for WORK.          
-!>    \endverbatim
-!.....
-!>    \param[out] IWORK
-!>    \verbatim
-!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
-!>    Workspace that is required only if WHTSVD equals
-!>    2 , 3 or 4. (See the description of WHTSVD).
-!>    If on entry LWORK =-1 or LIWORK=-1, then the
-!>    minimal length of IWORK is computed and returned in
-!>    IWORK(1). See the description of LIWORK.
-!>    \endverbatim
-!.....
-!>    \param[in] LIWORK
-!>    \verbatim
-!>    LIWORK (input) INTEGER
-!>    The minimal length of the workspace vector IWORK.
-!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
-!>    Let M1=MIN(M,N), N1=N-1. Then
-!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
-!>    If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
-!>    If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
-!>    If on entry LIWORK = -1, then a workspace query is
-!>    assumed and the procedure only computes the minimal
-!>    and the optimal workspace lengths for both WORK and
-!>    IWORK. See the descriptions of WORK and IWORK.
-!>    \endverbatim
-!.....
-!>    \param[out] INFO
-!>    \verbatim
-!>    INFO (output) INTEGER
-!>    -i < 0 :: On entry, the i-th argument had an
-!>              illegal value
-!>       = 0 :: Successful return.
-!>       = 1 :: Void input. Quick exit (M=0 or N=0).
-!>       = 2 :: The SVD computation of X did not converge.
-!>              Suggestion: Check the input data and/or
-!>              repeat with different WHTSVD.
-!>       = 3 :: The computation of the eigenvalues did not
-!>              converge.
-!>       = 4 :: If data scaling was requested on input and
-!>              the procedure found inconsistency in the data
-!>              such that for some column index i,
-!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
-!>              to zero if JOBS=='C'. The computation proceeds
-!>              with original or modified data and warning
-!>              flag is set with INFO=4.  
-!>    \endverbatim
-!
-!  Authors:
-!  ========
-!
-!> \author Zlatko Drmac
-!
-!> \ingroup gedmd
-!
-!.............................................................
-!.............................................................
-SUBROUTINE ZGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
-                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
-                    LDY,   NRNK,  TOL,   K,  EIGS,         &
-                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
-                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
-                    IWORK, LIWORK, INFO )
-!
-!  -- LAPACK driver routine                                           --
-!
-!  -- LAPACK is a software package provided by University of          --
-!  -- Tennessee, University of California Berkeley, University of     --
-!  -- Colorado Denver and NAG Ltd..                                   --
-!
-!.....
-      USE                   iso_fortran_env
-      IMPLICIT NONE
-      INTEGER, PARAMETER :: WP = real64
-!
-!     Scalar arguments       
-!     ~~~~~~~~~~~~~~~~
-      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
-                                JOBT, JOBF
-      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
-                                LDY, NRNK, LDZ, LDB, LDV,  &
-                                LDS, LZWORK,  LWORK, LIWORK
-      INTEGER,   INTENT(OUT) :: INFO,   K      
-      REAL(KIND=WP), INTENT(IN)    ::   TOL     
-!
-!     Array arguments      
-!     ~~~~~~~~~~~~~~~
-      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
-                                         Z(LDZ,*), B(LDB,*), &
-                                         V(LDV,*), S(LDS,*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
-      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
-      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
-      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
-      INTEGER,       INTENT(OUT)   :: IWORK(*)
-!
-!     Parameters
-!     ~~~~~~~~~~      
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-!     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
-      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
-!      
-!     Local scalars      
-!     ~~~~~~~~~~~~~
-      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
-                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  & 
-                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
-                           OLWORK, OLWQR
-      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
-                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
-                           WNTVCQ, WNTREF, WNTEX
-      CHARACTER(LEN=1)  :: JOBVL
-!      
-!     External functions (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~
-      LOGICAL       LSAME
-      EXTERNAL      LSAME 
-!
-!     External subroutines (BLAS and LAPACK)
-!     ~~~~~~~~~~~~~~~~~~~~
-      EXTERNAL      ZGEDMD, ZGEQRF, ZLACPY, ZLASET, ZUNGQR, & 
-                    ZUNMQR, XERBLA
-!
-!     Intrinsic functions
-!     ~~~~~~~~~~~~~~~~~~~
-      INTRINSIC      MAX, MIN, INT         
-!..........................................................  
-!
-!     Test the input arguments    
-      WNTRES = LSAME(JOBR,'R')
-      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
-      SCCOLY = LSAME(JOBS,'Y')
-      WNTVEC = LSAME(JOBZ,'V')
-      WNTVCF = LSAME(JOBZ,'F')
-      WNTVCQ = LSAME(JOBZ,'Q') 
-      WNTREF = LSAME(JOBF,'R') 
-      WNTEX  = LSAME(JOBF,'E')
-      WANTQ  = LSAME(JOBQ,'Q')
-      WNTTRF = LSAME(JOBT,'R')     
-      MINMN  = MIN(M,N)
-      INFO = 0 
-      LQUERY = ( (LZWORK == -1) .OR. (LWORK == -1) .OR. (LIWORK == -1) )
-!       
-      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
-                                  LSAME(JOBS,'N')) )  THEN 
-          INFO = -1
-      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
-                              .OR. LSAME(JOBZ,'N')) ) THEN
-          INFO = -2
-      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
-          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
-          INFO = -3
-      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
-           INFO = -4                 
-      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
-          INFO = -5
-       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
-                LSAME(JOBF,'N') ) )                     THEN
-          INFO = -6    
-      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
-                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
-          INFO = -7
-      ELSE IF ( M < 0 ) THEN
-          INFO = -8
-      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
-          INFO = -9
-      ELSE IF ( LDF < M ) THEN
-          INFO = -11
-      ELSE IF ( LDX < MINMN ) THEN
-          INFO = -13
-      ELSE IF ( LDY < MINMN ) THEN
-          INFO = -15
-      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
-                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
-          INFO = -16
-      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
-          INFO = -17
-      ELSE IF ( LDZ < M ) THEN
-          INFO = -21
-      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
-          INFO = -24
-      ELSE IF ( LDV < N-1 ) THEN
-          INFO = -26
-      ELSE IF ( LDS < N-1 ) THEN
-          INFO = -28
-      END IF
-!      
-      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
-          JOBVL = 'V'
-      ELSE
-          JOBVL = 'N'
-      END IF     
-      IF ( INFO == 0 ) THEN  
-          ! Compute the minimal and the optimal workspace
-          ! requirements. Simulate running the code and 
-          ! determine minimal and optimal sizes of the 
-          ! workspace at any moment of the run.         
-         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
-             ! All output except K is void. INFO=1 signals
-             ! the void input. In case of a workspace query,
-             ! the minimal workspace lengths are returned.
-            IF ( LQUERY ) THEN  
-               IWORK(1) = 1
-               ZWORK(1) = 2
-               ZWORK(2) = 2
-               WORK(1)  = 2
-               WORK(2)  = 2
-            ELSE                
-               K = 0
-            END IF             
-            INFO = 1  
-            RETURN
-         END IF   
-         
-         MLRWRK = 2
-         MLWORK = 2
-         OLWORK = 2 
-         IMINWR = 1
-         MLWQR  = MAX(1,N)  ! Minimal workspace length for ZGEQRF.
-         MLWORK = MAX(MLWORK,MINMN + MLWQR) 
-         
-         IF ( LQUERY ) THEN 
-             CALL ZGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
-                          INFO1 )
-             OLWQR  = INT(ZWORK(1))
-             OLWORK = MAX(OLWORK,MINMN + OLWQR)         
-         END IF
-         CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
-                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
-                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    & 
-                      S, LDS, ZWORK, -1, WORK, -1, IWORK,&
-                      -1, INFO1 )
-         MLWDMD = INT(ZWORK(1))
-         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
-         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
-         IMINWR = MAX(IMINWR, IWORK(1))
-         IF ( LQUERY ) THEN 
-             OLWDMD = INT(ZWORK(2))
-             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
-         END IF
-         IF ( WNTVEC .OR. WNTVCF ) THEN
-            MLWMQR = MAX(1,N) 
-            MLWORK = MAX(MLWORK,MINMN+MLWMQR)
-            IF ( LQUERY ) THEN
-               CALL ZUNMQR( 'L','N', M, N, MINMN, F, LDF,  & 
-                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
-               OLWMQR = INT(ZWORK(1))
-               OLWORK = MAX(OLWORK,MINMN+OLWMQR)
-            END IF
-         END IF  
-         IF ( WANTQ ) THEN
-            MLWGQR = MAX(1,N)
-            MLWORK = MAX(MLWORK,MINMN+MLWGQR)
-            IF ( LQUERY ) THEN 
-                CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
-                             ZWORK, -1, INFO1 )               
-                OLWGQR = INT(ZWORK(1))
-                OLWORK = MAX(OLWORK,MINMN+OLWGQR)
-            END IF            
-         END IF         
-         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
-         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
-         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
-      END IF  
-      IF( INFO /= 0 ) THEN
-         CALL XERBLA( 'ZGEDMDQ', -INFO )
-         RETURN
-      ELSE IF ( LQUERY ) THEN
-!     Return minimal and optimal workspace sizes
-          IWORK(1) = IMINWR
-          ZWORK(1) = MLWORK
-          ZWORK(2) = OLWORK
-          WORK(1)  = MLRWRK
-          WORK(2)  = MLRWRK
-          RETURN
-      END IF   
-!.....	  
-!     Initial QR factorization that is used to represent the
-!     snapshots as elements of lower dimensional subspace.
-!     For large scale computation with M >> N, at this place 
-!     one can use an out of core QRF.
-!   
-      CALL ZGEQRF( M, N, F, LDF, ZWORK,                & 
-                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
-!      
-!     Define X and Y as the snapshots representations in the
-!     orthogonal basis computed in the QR factorization.
-!     X corresponds to the leading N-1 and Y to the trailing
-!     N-1 snapshots.
-      CALL ZLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
-      CALL ZLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
-      CALL ZLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
-      IF ( M >= 3 ) THEN
-          CALL ZLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
-                       Y(3,1), LDY )  
-      END IF
-!
-!     Compute the DMD of the projected snapshot pairs (X,Y)   
-      CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
-                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
-                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
-                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, & 
-                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
-      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
-          ! Return with error code. See ZGEDMD for details.
-          INFO = INFO1
-          RETURN
-      ELSE
-          INFO = INFO1
-      END IF    
-!      
-!     The Ritz vectors (Koopman modes) can be explicitly 
-!     formed or returned in factored form.
-      IF ( WNTVEC ) THEN
-        ! Compute the eigenvectors explicitly.  
-        IF ( M > MINMN ) CALL ZLASET( 'A', M-MINMN, K, ZZERO, &
-                                     ZZERO, Z(MINMN+1,1), LDZ )
-        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
-             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
-      ELSE IF ( WNTVCF ) THEN   
-        !   Return the Ritz vectors (eigenvectors) in factored
-        !   form Z*V, where Z contains orthonormal matrix (the
-        !   product of Q from the initial QR factorization and 
-        !   the SVD/POD_basis returned by ZGEDMD in X) and the 
-        !   second factor (the eigenvectors of the Rayleigh 
-        !   quotient) is in the array V, as returned by ZGEDMD.
-        CALL ZLACPY( 'A', N, K, X, LDX, Z, LDZ )
-        IF ( M > N ) CALL ZLASET( 'A', M-N, K, ZZERO, ZZERO, & 
-                                 Z(N+1,1), LDZ )
-        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
-                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
-      END IF
-!     
-!     Some optional output variables:
-!
-!     The upper triangular factor R in the initial QR 
-!     factorization is optionally returned in the array Y.
-!     This is useful if this call to ZGEDMDQ is to be 
-!     followed by a streaming DMD that is implemented in a 
-!     QR compressed form.
-      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
-         CALL ZLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
-         CALL ZLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
-      END IF    
-!
-!     The orthonormal/unitary factor Q in the initial QR 
-!     factorization is optionally returned in the array F. 
-!     Same as with the triangular factor above, this is 
-!     useful in a streaming DMD.
-      IF ( WANTQ ) THEN                   ! Q overwrites F 
-         CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
-                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )  
-      END IF
-!      
-      RETURN
-!      
-      END SUBROUTINE ZGEDMDQ
-    
+!> \brief \b ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for a pair of data snapshot matrices.
+!
+!  =========== DOCUMENTATION ===========
+!
+!  Definition:
+!  ===========
+!
+!     SUBROUTINE ZGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+!                         WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+!                         LDY,   NRNK,  TOL,   K,  EIGS,         &
+!                         Z, LDZ, RES,  B,     LDB,   V, LDV,    &
+!                         S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
+!                         IWORK, LIWORK, INFO )
+!.....
+!     USE                   iso_fortran_env
+!     IMPLICIT NONE
+!     INTEGER, PARAMETER :: WP = real64
+!.....
+!     Scalar arguments
+!     CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+!                               JOBT, JOBF
+!     INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+!                               LDY, NRNK, LDZ, LDB, LDV,  &
+!                               LDS, LZWORK,  LWORK, LIWORK
+!     INTEGER,   INTENT(OUT) :: INFO,   K
+!     REAL(KIND=WP), INTENT(IN)    ::   TOL
+!     Array arguments
+!     COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
+!                                        Z(LDZ,*), B(LDB,*), &
+!                                        V(LDV,*), S(LDS,*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+!     COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+!     REAL(KIND=WP), INTENT(OUT)   :: RES(*)
+!     REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+!     INTEGER,       INTENT(OUT)   :: IWORK(*)
+!............................................................
+!>    \par Purpose:
+!     =============
+!>    \verbatim
+!>    ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!>    a pair of data snapshot matrices, using a QR factorization
+!>    based compression of the data. For the input matrices
+!>    X and Y such that Y = A*X with an unaccessible matrix
+!>    A, ZGEDMDQ computes a certain number of Ritz pairs of A using
+!>    the standard Rayleigh-Ritz extraction from a subspace of
+!>    range(X) that is determined using the leading left singular
+!>    vectors of X. Optionally, ZGEDMDQ returns the residuals
+!>    of the computed Ritz pairs, the information needed for
+!>    a refinement of the Ritz vectors, or the eigenvectors of
+!>    the Exact DMD.
+!>    For further details see the references listed
+!>    below. For more details of the implementation see [3].
+!>    \endverbatim
+!............................................................
+!>    \par References:
+!     ================
+!>    \verbatim
+!>    [1] P. Schmid: Dynamic mode decomposition of numerical
+!>        and experimental data,
+!>        Journal of Fluid Mechanics 656, 5-28, 2010.
+!>    [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!>        decompositions: analysis and enhancements,
+!>        SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!>    [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!>        Mode Decomposition I. Technical report. AIMDyn Inc.
+!>        and LAPACK Working Note 298.
+!>    [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!>        Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!>        Theory and Applications, Journal of Computational
+!>        Dynamics 1(2), 391 -421, 2014.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Developed and coded by Zlatko Drmac, Faculty of Science,
+!>    University of Zagreb;  drmac@math.hr
+!>    In cooperation with
+!>    AIMdyn Inc., Santa Barbara, CA.
+!>    and supported by
+!>    - DARPA SBIR project "Koopman Operator-Based Forecasting
+!>    for Nonstationary Processes from Near-Term, Limited
+!>    Observational Data" Contract No: W31P4Q-21-C-0007
+!>    - DARPA PAI project "Physics-Informed Machine Learning
+!>    Methodologies" Contract No: HR0011-18-9-0033
+!>    - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!>    Framework for Space-Time Analysis of Process Dynamics"
+!>    Contract No: HR0011-16-C-0116
+!>    Any opinions, findings and conclusions or recommendations
+!>    expressed in this material are those of the author and
+!>    do not necessarily reflect the views of the DARPA SBIR
+!>    Program Office.
+!>    \endverbatim
+!......................................................................
+!>    \par Developed and supported by:
+!     ================================
+!>    \verbatim
+!>    Distribution Statement A:
+!>    Approved for Public Release, Distribution Unlimited.
+!>    Cleared by DARPA on September 29, 2022
+!>    \endverbatim
+!============================================================
+!     Arguments
+!     =========
+!
+!>    \param[in] JOBS
+!>    \verbatim
+!>    JOBS (input) CHARACTER*1
+!>    Determines whether the initial data snapshots are scaled
+!>    by a diagonal matrix. The data snapshots are the columns
+!>    of F. The leading N-1 columns of F are denoted X and the
+!>    trailing N-1 columns are denoted Y.
+!>    'S' :: The data snapshots matrices X and Y are multiplied
+!>           with a diagonal matrix D so that X*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'C' :: The snapshots are scaled as with the 'S' option.
+!>           If it is found that an i-th column of X is zero
+!>           vector and the corresponding i-th column of Y is
+!>           non-zero, then the i-th column of Y is set to
+!>           zero and a warning flag is raised.
+!>    'Y' :: The data snapshots matrices X and Y are multiplied
+!>           by a diagonal matrix D so that Y*D has unit
+!>           nonzero columns (in the Euclidean 2-norm)
+!>    'N' :: No data scaling.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBZ
+!>    \verbatim
+!>    JOBZ (input) CHARACTER*1
+!>    Determines whether the eigenvectors (Koopman modes) will
+!>    be computed.
+!>    'V' :: The eigenvectors (Koopman modes) will be computed
+!>           and returned in the matrix Z.
+!>           See the description of Z.
+!>    'F' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product Z*V, where Z
+!>           is orthonormal and V contains the eigenvectors
+!>           of the corresponding Rayleigh quotient.
+!>           See the descriptions of F, V, Z.
+!>    'Q' :: The eigenvectors (Koopman modes) will be returned
+!>           in factored form as the product Q*Z, where Z
+!>           contains the eigenvectors of the compression of the
+!>           underlying discretized operator onto the span of
+!>           the data snapshots. See the descriptions of F, V, Z.
+!>           Q is from the initial QR factorization.
+!>    'N' :: The eigenvectors are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBR
+!>    \verbatim
+!>    JOBR (input) CHARACTER*1
+!>    Determines whether to compute the residuals.
+!>    'R' :: The residuals for the computed eigenpairs will
+!>           be computed and stored in the array RES.
+!>           See the description of RES.
+!>           For this option to be legal, JOBZ must be 'V'.
+!>    'N' :: The residuals are not computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBQ
+!>    \verbatim
+!>    JOBQ (input) CHARACTER*1
+!>    Specifies whether to explicitly compute and return the
+!>    unitary matrix from the QR factorization.
+!>    'Q' :: The matrix Q of the QR factorization of the data
+!>           snapshot matrix is computed and stored in the
+!>           array F. See the description of F.
+!>    'N' :: The matrix Q is not explicitly computed.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBT
+!>    \verbatim
+!>    JOBT (input) CHARACTER*1
+!>    Specifies whether to return the upper triangular factor
+!>    from the QR factorization.
+!>    'R' :: The matrix R of the QR factorization of the data
+!>           snapshot matrix F is returned in the array Y.
+!>           See the description of Y and Further details.
+!>    'N' :: The matrix R is not returned.
+!>    \endverbatim
+!.....
+!>    \param[in] JOBF
+!>    \verbatim
+!>    JOBF (input) CHARACTER*1
+!>    Specifies whether to store information needed for post-
+!>    processing (e.g. computing refined Ritz vectors)
+!>    'R' :: The matrix needed for the refinement of the Ritz
+!>           vectors is computed and stored in the array B.
+!>           See the description of B.
+!>    'E' :: The unscaled eigenvectors of the Exact DMD are
+!>           computed and returned in the array B. See the
+!>           description of B.
+!>    'N' :: No eigenvector refinement data is computed.
+!>    To be useful on exit, this option needs JOBQ='Q'.
+!>    \endverbatim
+!.....
+!>    \param[in] WHTSVD
+!>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!>    Allows for a selection of the SVD algorithm from the
+!>    LAPACK library.
+!>    1 :: ZGESVD (the QR SVD algorithm)
+!>    2 :: ZGESDD (the Divide and Conquer algorithm; if enough
+!>         workspace available, this is the fastest option)
+!>    3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
+!>         are the most accurate options)
+!>    4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
+!>         are the most accurate options)
+!>    For the four methods above, a significant difference in
+!>    the accuracy of small singular values is possible if
+!>    the snapshots vary in norm so that X is severely
+!>    ill-conditioned. If small (smaller than EPS*||X||)
+!>    singular values are of interest and JOBS=='N',  then
+!>    the options (3, 4) give the most accurate results, where
+!>    the option 4 is slightly better and with stronger
+!>    theoretical background.
+!>    If JOBS=='S', i.e. the columns of X will be normalized,
+!>    then all methods give nearly equally accurate results.
+!>    \endverbatim
+!.....
+!>    \param[in] M
+!>    \verbatim
+!>    M (input) INTEGER, M >= 0
+!>    The state space dimension (the number of rows of F).
+!>    \endverbatim
+!.....
+!>    \param[in] N
+!>    \verbatim
+!>    N (input) INTEGER, 0 <= N <= M
+!>    The number of data snapshots from a single trajectory,
+!>    taken at equidistant discrete times. This is the
+!>    number of columns of F.
+!>    \endverbatim
+!.....
+!>    \param[in,out] F
+!>    \verbatim
+!>    F (input/output) COMPLEX(KIND=WP) M-by-N array
+!>    > On entry,
+!>    the columns of F are the sequence of data snapshots
+!>    from a single trajectory, taken at equidistant discrete
+!>    times. It is assumed that the column norms of F are
+!>    in the range of the normalized floating point numbers.
+!>    < On exit,
+!>    If JOBQ == 'Q', the array F contains the orthogonal
+!>    matrix/factor of the QR factorization of the initial
+!>    data snapshots matrix F. See the description of JOBQ.
+!>    If JOBQ == 'N', the entries in F strictly below the main
+!>    diagonal contain, column-wise, the information on the
+!>    Householder vectors, as returned by ZGEQRF. The
+!>    remaining information to restore the orthogonal matrix
+!>    of the initial QR factorization is stored in ZWORK(1:MIN(M,N)).
+!>    See the description of ZWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LDF
+!>    \verbatim
+!>    LDF (input) INTEGER, LDF >= M
+!>    The leading dimension of the array F.
+!>    \endverbatim
+!.....
+!>    \param[in,out] X
+!>    \verbatim
+!>    X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
+!>    X is used as workspace to hold representations of the
+!>    leading N-1 snapshots in the orthonormal basis computed
+!>    in the QR factorization of F.
+!>    On exit, the leading K columns of X contain the leading
+!>    K left singular vectors of the above described content
+!>    of X. To lift them to the space of the left singular
+!>    vectors U(:,1:K) of the input data, pre-multiply with the
+!>    Q factor from the initial QR factorization.
+!>    See the descriptions of F, K, V  and Z.
+!>    \endverbatim
+!.....
+!>    \param[in] LDX
+!>    \verbatim
+!>    LDX (input) INTEGER, LDX >= N
+!>    The leading dimension of the array X.
+!>    \endverbatim
+!.....
+!>    \param[in,out] Y
+!>    \verbatim
+!>    Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
+!>    Y is used as workspace to hold representations of the
+!>    trailing N-1 snapshots in the orthonormal basis computed
+!>    in the QR factorization of F.
+!>    On exit,
+!>    If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!>    triangular factor from the QR factorization of the data
+!>    snapshot matrix F.
+!>    \endverbatim
+!.....
+!>    \param[in] LDY
+!>    \verbatim
+!>    LDY (input) INTEGER , LDY >= N
+!>    The leading dimension of the array Y.
+!>    \endverbatim
+!.....
+!>    \param[in] NRNK
+!>    \verbatim
+!>    NRNK (input) INTEGER
+!>    Determines the mode how to compute the numerical rank,
+!>    i.e. how to truncate small singular values of the input
+!>    matrix X. On input, if
+!>    NRNK = -1 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(1)
+!>                 This option is recommended.
+!>    NRNK = -2 :: i-th singular value sigma(i) is truncated
+!>                 if sigma(i) <= TOL*sigma(i-1)
+!>                 This option is included for R&D purposes.
+!>                 It requires highly accurate SVD, which
+!>                 may not be feasible.
+!>    The numerical rank can be enforced by using positive
+!>    value of NRNK as follows:
+!>    0 < NRNK <= N-1 :: at most NRNK largest singular values
+!>    will be used. If the number of the computed nonzero
+!>    singular values is less than NRNK, then only those
+!>    nonzero values will be used and the actually used
+!>    dimension is less than NRNK. The actual number of
+!>    the nonzero singular values is returned in the variable
+!>    K. See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] TOL
+!>    \verbatim
+!>    TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!>    The tolerance for truncating small singular values.
+!>    See the description of NRNK.
+!>    \endverbatim
+!.....
+!>    \param[out] K
+!>    \verbatim
+!>    K (output) INTEGER,  0 <= K <= N
+!>    The dimension of the SVD/POD basis for the leading N-1
+!>    data snapshots (columns of F) and the number of the
+!>    computed Ritz pairs. The value of K is determined
+!>    according to the rule set by the parameters NRNK and
+!>    TOL. See the descriptions of NRNK and TOL.
+!>    \endverbatim
+!.....
+!>    \param[out] EIGS
+!>    \verbatim
+!>    EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
+!>    The leading K (K<=N-1) entries of EIGS contain
+!>    the computed eigenvalues (Ritz values).
+!>    See the descriptions of K, and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] Z
+!>    \verbatim
+!>    Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
+!>    If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
+!>    is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!>    If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!>    Z*V, where Z contains orthonormal matrix (the product of
+!>    Q from the initial QR factorization and the SVD/POD_basis
+!>    returned by ZGEDMD in X) and the second factor (the
+!>    eigenvectors of the Rayleigh quotient) is in the array V,
+!>    as returned by ZGEDMD. That is,  X(:,1:K)*V(:,i)
+!>    is an eigenvector corresponding to EIGS(i). The columns
+!>    of V(1:K,1:K) are the computed eigenvectors of the
+!>    K-by-K Rayleigh quotient.
+!>    See the descriptions of EIGS, X and V.
+!>    \endverbatim
+!.....
+!>    \param[in] LDZ
+!>    \verbatim
+!>    LDZ (input) INTEGER , LDZ >= M
+!>    The leading dimension of the array Z.
+!>    \endverbatim
+!.....
+!>    \param[out] RES
+!>    \verbatim
+!>    RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!>    RES(1:K) contains the residuals for the K computed
+!>    Ritz pairs,
+!>    RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!>    See the description of EIGS and Z.
+!>    \endverbatim
+!.....
+!>    \param[out] B
+!>    \verbatim
+!>    B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!>    IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!>    be used for computing the refined vectors; see further
+!>    details in the provided references.
+!>    If JOBF == 'E', B(1:N,1;K) contains
+!>    A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!>    Exact DMD, up to scaling by the inverse eigenvalues.
+!>    In both cases, the content of B can be lifted to the
+!>    original dimension of the input data by pre-multiplying
+!>    with the Q factor from the initial QR factorization.
+!>    Here A denotes a compression of the underlying operator.
+!>    See the descriptions of F and X.
+!>    If JOBF =='N', then B is not referenced.
+!>    \endverbatim
+!.....
+!>    \param[in] LDB
+!>    \verbatim
+!>    LDB (input) INTEGER, LDB >= MIN(M,N)
+!>    The leading dimension of the array B.
+!>    \endverbatim
+!.....
+!>    \param[out] V
+!>    \verbatim
+!>    V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!>    On exit, V(1:K,1:K) V contains the K eigenvectors of
+!>    the Rayleigh quotient. The Ritz vectors
+!>    (returned in Z) are the product of Q from the initial QR
+!>    factorization (see the description of F) X (see the
+!>    description of X) and V.
+!>    \endverbatim
+!.....
+!>    \param[in] LDV
+!>    \verbatim
+!>    LDV (input) INTEGER, LDV >= N-1
+!>    The leading dimension of the array V.
+!>    \endverbatim
+!.....
+!>    \param[out] S
+!>    \verbatim
+!>    S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!>    The array S(1:K,1:K) is used for the matrix Rayleigh
+!>    quotient. This content is overwritten during
+!>    the eigenvalue decomposition by ZGEEV.
+!>    See the description of K.
+!>    \endverbatim
+!.....
+!>    \param[in] LDS
+!>    \verbatim
+!>    LDS (input) INTEGER, LDS >= N-1
+!>    The leading dimension of the array S.
+!>    \endverbatim
+!.....
+!>    \param[out] LZWORK
+!>    \verbatim
+!>    ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
+!>    On exit,
+!>    ZWORK(1:MIN(M,N)) contains the scalar factors of the
+!>    elementary reflectors as returned by ZGEQRF of the
+!>    M-by-N input matrix F.
+!>    If the call to ZGEDMDQ is only workspace query, then
+!>    ZWORK(1) contains the minimal complex workspace length and
+!>    ZWORK(2) is the optimal complex workspace length.
+!>    Hence, the length of work is at least 2.
+!>    See the description of LZWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LZWORK
+!>    \verbatim
+!>    LZWORK (input) INTEGER
+!>    The minimal length of the  workspace vector ZWORK.
+!>    LZWORK is calculated as follows:
+!>    Let MLWQR  = N (minimal workspace for ZGEQRF[M,N])
+!>        MLWDMD = minimal workspace for ZGEDMD (see the
+!>                 description of LWORK in ZGEDMD)
+!>        MLWMQR = N (minimal workspace for
+!>                   ZUNMQR['L','N',M,N,N])
+!>        MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
+!>        MINMN  = MIN(M,N)
+!>    Then
+!>    LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
+!>    is further updated as follows:
+!>       if   JOBZ == 'V' or JOBZ == 'F' THEN
+!>            LZWORK = MAX(LZWORK, MINMN+MLWMQR)
+!>       if   JOBQ == 'Q' THEN
+!>            LZWORK = MAX(ZLWORK, MINMN+MLWGQR)
+!>    \endverbatim
+!.....
+!>    \param[out] WORK
+!>    \verbatim
+!>    WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!>    On exit,
+!>    WORK(1:N-1) contains the singular values of
+!>    the input submatrix F(1:M,1:N-1).
+!>    If the call to ZGEDMDQ is only workspace query, then
+!>    WORK(1) contains the minimal workspace length and
+!>    WORK(2) is the optimal workspace length. hence, the
+!>    length of work is at least 2.
+!>    See the description of LWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LWORK
+!>    \verbatim
+!>    LWORK (input) INTEGER
+!>    The minimal length of the  workspace vector WORK.
+!>    LWORK is the same as in ZGEDMD, because in ZGEDMDQ
+!>    only ZGEDMD requires real workspace for snapshots
+!>    of dimensions MIN(M,N)-by-(N-1).
+!>    If on entry LWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace length for WORK.
+!>    \endverbatim
+!.....
+!>    \param[out] IWORK
+!>    \verbatim
+!>    IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!>    Workspace that is required only if WHTSVD equals
+!>    2 , 3 or 4. (See the description of WHTSVD).
+!>    If on entry LWORK =-1 or LIWORK=-1, then the
+!>    minimal length of IWORK is computed and returned in
+!>    IWORK(1). See the description of LIWORK.
+!>    \endverbatim
+!.....
+!>    \param[in] LIWORK
+!>    \verbatim
+!>    LIWORK (input) INTEGER
+!>    The minimal length of the workspace vector IWORK.
+!>    If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!>    Let M1=MIN(M,N), N1=N-1. Then
+!>    If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!>    If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!>    If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!>    If on entry LIWORK = -1, then a workspace query is
+!>    assumed and the procedure only computes the minimal
+!>    and the optimal workspace lengths for both WORK and
+!>    IWORK. See the descriptions of WORK and IWORK.
+!>    \endverbatim
+!.....
+!>    \param[out] INFO
+!>    \verbatim
+!>    INFO (output) INTEGER
+!>    -i < 0 :: On entry, the i-th argument had an
+!>              illegal value
+!>       = 0 :: Successful return.
+!>       = 1 :: Void input. Quick exit (M=0 or N=0).
+!>       = 2 :: The SVD computation of X did not converge.
+!>              Suggestion: Check the input data and/or
+!>              repeat with different WHTSVD.
+!>       = 3 :: The computation of the eigenvalues did not
+!>              converge.
+!>       = 4 :: If data scaling was requested on input and
+!>              the procedure found inconsistency in the data
+!>              such that for some column index i,
+!>              X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!>              to zero if JOBS=='C'. The computation proceeds
+!>              with original or modified data and warning
+!>              flag is set with INFO=4.
+!>    \endverbatim
+!
+!  Authors:
+!  ========
+!
+!> \author Zlatko Drmac
+!
+!> \ingroup gedmd
+!
+!.............................................................
+!.............................................................
+SUBROUTINE ZGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  EIGS,         &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    &
+                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
+                    IWORK, LIWORK, INFO )
+!
+!  -- LAPACK driver routine                                           --
+!
+!  -- LAPACK is a software package provided by University of          --
+!  -- Tennessee, University of California Berkeley, University of     --
+!  -- Colorado Denver and NAG Ltd..                                   --
+!
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+!
+!     Scalar arguments
+!     ~~~~~~~~~~~~~~~~
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LZWORK,  LWORK, LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K
+      REAL(KIND=WP), INTENT(IN)    ::   TOL
+!
+!     Array arguments
+!     ~~~~~~~~~~~~~~~
+      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
+                                         Z(LDZ,*), B(LDB,*), &
+                                         V(LDV,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
+                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  &
+                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
+                           OLWORK, OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      ZGEDMD, ZGEQRF, ZLACPY, ZLASET, ZUNGQR, &
+                    ZUNMQR, XERBLA
+!
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT
+!..........................................................
+!
+!     Test the input arguments
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')
+      MINMN  = MIN(M,N)
+      INFO = 0
+      LQUERY = ( (LZWORK == -1) .OR. (LWORK == -1) .OR. (LIWORK == -1) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    &
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+           INFO = -4
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                     THEN
+          INFO = -6
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    &
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -21
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -24
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -26
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -28
+      END IF
+!
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+               IWORK(1) = 1
+               ZWORK(1) = 2
+               ZWORK(2) = 2
+               WORK(1)  = 2
+               WORK(2)  = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+
+         MLRWRK = 2
+         MLWORK = 2
+         OLWORK = 2
+         IMINWR = 1
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for ZGEQRF.
+         MLWORK = MAX(MLWORK,MINMN + MLWQR)
+
+         IF ( LQUERY ) THEN
+             CALL ZGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
+                          INFO1 )
+             OLWQR  = INT(ZWORK(1))
+             OLWORK = MAX(OLWORK,MINMN + OLWQR)
+         END IF
+         CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,&
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     &
+                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    &
+                      S, LDS, ZWORK, -1, WORK, -1, IWORK,&
+                      -1, INFO1 )
+         MLWDMD = INT(ZWORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
+         IMINWR = MAX(IMINWR, IWORK(1))
+         IF ( LQUERY ) THEN
+             OLWDMD = INT(ZWORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N)
+            MLWORK = MAX(MLWORK,MINMN+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL ZUNMQR( 'L','N', M, N, MINMN, F, LDF,  &
+                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
+               OLWMQR = INT(ZWORK(1))
+               OLWORK = MAX(OLWORK,MINMN+OLWMQR)
+            END IF
+         END IF
+         IF ( WANTQ ) THEN
+            MLWGQR = MAX(1,N)
+            MLWORK = MAX(MLWORK,MINMN+MLWGQR)
+            IF ( LQUERY ) THEN
+                CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
+                             ZWORK, -1, INFO1 )
+                OLWGQR = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,MINMN+OLWGQR)
+            END IF
+         END IF
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
+         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
+      END IF
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'ZGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          WORK(1)  = MLRWRK
+          WORK(2)  = MLRWRK
+          RETURN
+      END IF
+!.....
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >> N, at this place
+!     one can use an out of core QRF.
+!
+      CALL ZGEQRF( M, N, F, LDF, ZWORK,                &
+                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+!
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL ZLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
+      CALL ZLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL ZLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL ZLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
+                       Y(3,1), LDY )
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)
+      CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
+                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
+                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
+                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, &
+                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See ZGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF
+!
+!     The Ritz vectors (Koopman modes) can be explicitly
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.
+        IF ( M > MINMN ) CALL ZLASET( 'A', M-MINMN, K, ZZERO, &
+                                     ZZERO, Z(MINMN+1,1), LDZ )
+        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
+             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      ELSE IF ( WNTVCF ) THEN
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and
+        !   the SVD/POD_basis returned by ZGEDMD in X) and the
+        !   second factor (the eigenvectors of the Rayleigh
+        !   quotient) is in the array V, as returned by ZGEDMD.
+        CALL ZLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL ZLASET( 'A', M-N, K, ZZERO, ZZERO, &
+                                 Z(N+1,1), LDZ )
+        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
+                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      END IF
+!
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to ZGEDMDQ is to be
+!     followed by a streaming DMD that is implemented in a
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y
+         CALL ZLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
+         CALL ZLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
+      END IF
+!
+!     The orthonormal/unitary factor Q in the initial QR
+!     factorization is optionally returned in the array F.
+!     Same as with the triangular factor above, this is
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN                   ! Q overwrites F
+         CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
+                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      END IF
+!
+      RETURN
+!
+      END SUBROUTINE ZGEDMDQ
diff --git a/TESTING/EIG/cchkdmd.f90 b/TESTING/EIG/cchkdmd.f90
index a9c181da9b..aa90046ff7 100644
--- a/TESTING/EIG/cchkdmd.f90
+++ b/TESTING/EIG/cchkdmd.f90
@@ -1,721 +1,721 @@
-!   This is a test program for checking the implementations of
-!   the implementations of the following subroutines
-!
-!   CGEDMD,  for computation of the
-!            Dynamic Mode Decomposition (DMD)
-!   CGEDMDQ, for computation of a
-!            QR factorization based compressed DMD
-!
-!   Developed and supported by:
-!   ===========================
-!   Developed and coded by Zlatko Drmac, Faculty of Science,
-!   University of Zagreb;  drmac@math.hr
-!   In cooperation with
-!   AIMdyn Inc., Santa Barbara, CA.
-!   ========================================================
-!   How to run the code (compiler, link info)
-!   ========================================================
-!   Compile as FORTRAN 90 (or later) and link with BLAS and
-!   LAPACK libraries.
-!   NOTE: The code is developed and tested on top of the
-!   Intel MKL library (versions 2022.0.3 and 2022.2.0),
-!   using the Intel Fortran compiler.
-!
-!   For developers of the C++ implementation
-!   ========================================================
-!   See the LAPACK++ and Template Numerical Toolkit (TNT)
-!
-!   Note on a development of the GPU HP implementation
-!   ========================================================
-!   Work in progress. See CUDA, MAGMA, SLATE.
-!   NOTE: The four SVD subroutines used in this code are
-!   included as a part of R&D and for the completeness.
-!   This was also an opportunity to test those SVD codes.
-!   If the scaling option is used all four are essentially
-!   equally good. For implementations on HP platforms,
-!   one can use whichever SVD is available.
-!............................................................
-
-!............................................................
-!............................................................
-!
-      PROGRAM DMD_TEST
-
-      use iso_fortran_env
-      IMPLICIT NONE
-      integer, parameter :: WP = real32
-!............................................................
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-
-      COMPLEX(KIND=WP), PARAMETER ::  CONE = ( 1.0_WP, 0.0_WP )
-      COMPLEX(KIND=WP), PARAMETER :: CZERO = ( 0.0_WP, 0.0_WP )
-!............................................................
-      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   :: RES, &
-                     RES1, RESEX, SINGVX, SINGVQX, WORK
-      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
-      REAL(KIND=WP) :: WDUMMY(2)
-      INTEGER       :: IDUMMY(4), ISEED(4)
-      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS,       &
-                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
-                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_XW, &
-                       TMP_EX
-!............................................................
-      COMPLEX(KIND=WP) :: CMAX
-      INTEGER :: LCWORK
-      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::  A, AC,  &
-                                 AU, F, F0, F1, S, W,  &
-                                 X, X0, Y, Y0, Y1, Z, Z1
-      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::  CDA, CDR, &
-                                       CDL, CEIGS, CEIGSA, CWORK
-      COMPLEX(KIND=WP) ::  CDUMMY(22), CDUM2X2(2,2)
-!............................................................
-      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
-                 LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK
-      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j,     &
-                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
-                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL,  &
-                 NFAIL_Z_XV,  MODE, MODEL, MODER, WHTSVD
-      INTEGER :: iNRNK, iWHTSVD,  K_traj, LWMINOPT
-      CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
-                   SCALE, RESIDS, WANTQ, WANTR
-      LOGICAL :: TEST_QRDMD
-
-!..... external subroutines (BLAS and LAPACK)
-      EXTERNAL CAXPY, CGEEV, CGEMM, CGEMV, CLASCL
-!.....external subroutines DMD package
-!     subroutines under test
-      EXTERNAL CGEDMD, CGEDMDQ
-!..... external functions (BLAS and LAPACK)
-      EXTERNAL         SCNRM2, SLAMCH
-      REAL(KIND=WP) :: SCNRM2, SLAMCH
-      EXTERNAL         CLANGE
-      REAL(KIND=WP) :: CLANGE
-      EXTERNAL ICAMAX
-      INTEGER  ICAMAX
-      EXTERNAL LSAME
-      LOGICAL  LSAME
-
-      INTRINSIC ABS, INT, MIN, MAX, SIGN
-!............................................................
-
-
-      WRITE(*,*) 'COMPLEX CODE TESTING'
-
-      ! The test is always in pairs : ( CGEDMD and CGEDMDQ)
-      ! because the test includes comparing the results (in pairs).
-!.....................................................................................
-      ! This code by default performs tests on CGEDMDQ
-      ! Since the QR factorizations based algorithm is designed for
-      ! single trajectory data, only single trajectory tests will
-      ! be performed with xGEDMDQ.
-
-      WANTQ = 'Q'
-      WANTR = 'R'
-!.................................................................................
-
-      EPS = SLAMCH( 'P' )  ! machine precision WP
-
-      ! Global counters of failures of some particular tests
-      NFAIL      = 0
-      NFAIL_REZ  = 0
-      NFAIL_REZQ = 0
-      NFAIL_Z_XV = 0
-      NFAIL_F_QR = 0
-      NFAIL_AU   = 0
-      NFAIL_SVDIFF = 0
-      NFAIL_TOTAL  = 0
-      NFAILQ_TOTAL = 0
-
-      DO LLOOP = 1, 4
-
-      WRITE(*,*) 'L Loop Index = ', LLOOP
-
-      ! Set the dimensions of the problem ...
-      READ(*,*) M
-      WRITE(*,*) 'M = ', M
-      ! ... and the number of snapshots.
-      READ(*,*) N
-      WRITE(*,*) 'N = ', N
-
-      ! Test the dimensions
-      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
-          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
-          STOP
-      END IF
-!.............
-      ! The seed inside the LLOOP so that each pass can be reproduced easily.
-      ISEED(1) = 4
-      ISEED(2) = 3
-      ISEED(3) = 2
-      ISEED(4) = 1
-
-      LDA  = M
-      LDF  = M
-      LDX  = M
-      LDY  = M
-      LDW  = N
-      LDZ  = M
-      LDAU = M
-      LDS  = N
-
-      TMP_XW  = ZERO
-      TMP_AU   = ZERO
-      TMP_REZ  = ZERO
-      TMP_REZQ = ZERO
-      SVDIFF   = ZERO
-      TMP_EX   = ZERO
-
-      ALLOCATE( A(LDA,M) )
-      ALLOCATE( AC(LDA,M) )
-      ALLOCATE( F(LDF,N+1) )
-      ALLOCATE( F0(LDF,N+1) )
-      ALLOCATE( F1(LDF,N+1) )
-      ALLOCATE( X(LDX,N) )
-      ALLOCATE( X0(LDX,N) )
-      ALLOCATE( Y(LDY,N+1) )
-      ALLOCATE( Y0(LDY,N+1) )
-      ALLOCATE( Y1(LDY,N+1) )
-      ALLOCATE( AU(LDAU,N) )
-      ALLOCATE( W(LDW,N) )
-      ALLOCATE( S(LDS,N) )
-      ALLOCATE( Z(LDZ,N) )
-      ALLOCATE( Z1(LDZ,N) )
-      ALLOCATE( RES(N) )
-      ALLOCATE( RES1(N) )
-      ALLOCATE( RESEX(N) )
-      ALLOCATE( CEIGS(N) )
-      ALLOCATE( SINGVX(N) )
-      ALLOCATE( SINGVQX(N) )
-
-      TOL  = 10*M*EPS
-      TOL2 = 10*M*N*EPS
-
-!.............
-
-      DO K_traj = 1, 2
-      !  Number of intial conditions in the simulation/trajectories (1 or 2)
-
-      COND   = 1.0D4
-      CMAX   = (1.0D1,1.0D1)
-      RSIGN  = 'F'
-      GRADE  = 'N'
-      MODEL  = 6
-      CONDL  = 1.0D1
-      MODER  = 6
-      CONDR  = 1.0D1
-      PIVTNG = 'N'
-      ! Loop over all parameter MODE values for CLATMR (+-1,..,+-6)
-
-      DO MODE = 1, 6
-
-      ALLOCATE( IWORK(2*M) )
-      ALLOCATE( CDA(M) )
-      ALLOCATE( CDL(M) )
-      ALLOCATE( CDR(M) )
-
-      CALL CLATMR( M, M, 'N', ISEED, 'N', CDA, MODE, COND, &
-                   CMAX, RSIGN, GRADE, CDL, MODEL,  CONDL, &
-                   CDR, MODER, CONDR, PIVTNG, IWORK, M, M, &
-                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
-      DEALLOCATE( CDR )
-      DEALLOCATE( CDL )
-      DEALLOCATE( CDA )
-      DEALLOCATE( IWORK )
-
-      LCWORK = MAX(1,2*M)
-      ALLOCATE( CEIGSA(M) )
-      ALLOCATE( CWORK(LCWORK) )
-      ALLOCATE( WORK(2*M) )
-      AC(1:M,1:M) = A(1:M,1:M)
-      CALL CGEEV( 'N','N', M, AC, LDA, CEIGSA, CDUM2X2, 2, &
-                  CDUM2X2, 2, CWORK, LCWORK, WORK, INFO ) ! LAPACK CALL
-      DEALLOCATE(WORK)
-      DEALLOCATE(CWORK)
-
-      TMP = ABS(CEIGSA(ICAMAX(M, CEIGSA, 1))) ! The spectral radius of A
-      ! Scale the matrix A to have unit spectral radius.
-      CALL CLASCL( 'G',0, 0, TMP, ONE, M, M, &
-                   A, LDA, INFO )
-      CALL CLASCL( 'G',0, 0, TMP, ONE, M, 1, &
-                   CEIGSA, M, INFO )
-      ANORM = CLANGE( 'F', M, M, A, LDA, WDUMMY )
-
-      IF ( K_traj == 2 ) THEN
-          ! generate data as two trajectories
-          ! with two inital conditions
-          CALL CLARNV(2, ISEED, M, F(1,1) )
-          DO i = 1, N/2
-             CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1,  &
-                  CZERO, F(1,i+1), 1 )
-          END DO
-          X0(1:M,1:N/2) = F(1:M,1:N/2)
-          Y0(1:M,1:N/2) = F(1:M,2:N/2+1)
-
-          CALL CLARNV(2, ISEED, M, F(1,1) )
-          DO i = 1, N-N/2
-             CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1,  &
-                  CZERO, F(1,i+1), 1 )
-          END DO
-          X0(1:M,N/2+1:N) = F(1:M,1:N-N/2)
-          Y0(1:M,N/2+1:N) = F(1:M,2:N-N/2+1)
-      ELSE
-          CALL CLARNV(2, ISEED, M, F(1,1) )
-          DO i = 1, N
-             CALL CGEMV( 'N', M, M, CONE, A, M, F(1,i), 1,  &
-                  CZERO, F(1,i+1), 1 )
-          END DO
-          F0(1:M,1:N+1) = F(1:M,1:N+1)
-          X0(1:M,1:N) = F0(1:M,1:N)
-          Y0(1:M,1:N) = F0(1:M,2:N+1)
-      END IF
-
-      DEALLOCATE( CEIGSA )
-!........................................................................
-
-      DO iJOBZ = 1, 4
-
-          SELECT CASE ( iJOBZ )
-          CASE(1)
-              JOBZ   = 'V'
-              RESIDS = 'R'
-          CASE(2)
-              JOBZ   = 'V'
-              RESIDS = 'N'
-          CASE(3)
-              JOBZ   = 'F'
-              RESIDS = 'N'
-          CASE(4)
-              JOBZ   = 'N'
-              RESIDS = 'N'
-          END SELECT
-
-      DO iJOBREF = 1, 3
-
-          SELECT CASE ( iJOBREF )
-          CASE(1)
-              JOBREF = 'R'
-          CASE(2)
-              JOBREF = 'E'
-          CASE(3)
-              JOBREF = 'N'
-          END SELECT
-
-      DO iSCALE = 1, 4
-
-          SELECT CASE ( iSCALE )
-          CASE(1)
-              SCALE = 'S'
-          CASE(2)
-              SCALE = 'C'
-          CASE(3)
-              SCALE = 'Y'
-          CASE(4)
-              SCALE = 'N'
-          END SELECT
-
-      DO iNRNK = -1, -2, -1
-          NRNK   = iNRNK
-
-      DO iWHTSVD = 1,  3
-         ! Check all four options to compute the POD basis
-         ! via the SVD.
-         WHTSVD   = iWHTSVD
-
-      DO LWMINOPT = 1, 2
-         ! Workspace query for the minimal (1) and for the optimal
-         ! (2) workspace lengths determined by workspace query.
-
-      ! CGEDMD is always tested and its results are also used for
-      ! comparisons with CGEDMDQ.
-
-      X(1:M,1:N) = X0(1:M,1:N)
-      Y(1:M,1:N) = Y0(1:M,1:N)
-
-      CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
-                M,  N, X, LDX, Y, LDY, NRNK, TOL,  &
-                K, CEIGS, Z, LDZ,  RES,  &
-                AU, LDAU, W,  LDW,   S, LDS,        &
-                CDUMMY, -1, WDUMMY, -1, IDUMMY, -1, INFO )
-
-      IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) &
-                       .OR. ( INFO < 0 ) ) THEN
-        WRITE(*,*) 'Call to CGEDMD workspace query failed. &
-                   &Check the calling sequence and the code.'
-        WRITE(*,*) 'The error code is ', INFO
-        WRITE(*,*) 'The input parameters were ',      &
-        SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,          &
-        M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS
-        STOP
-      ELSE
-        !WRITE(*,*) '... done. Workspace length computed.'
-      END IF
-
-      LCWORK = INT(CDUMMY(LWMINOPT))
-      ALLOCATE(CWORK(LCWORK))
-      LIWORK = IDUMMY(1)
-      ALLOCATE(IWORK(LIWORK))
-      LWORK = INT(WDUMMY(1))
-      ALLOCATE(WORK(LWORK))
-
-      CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
-                   M,  N, X, LDX, Y, LDY, NRNK, TOL,  &
-                   K, CEIGS, Z, LDZ,  RES,  &
-                   AU, LDAU, W,  LDW,   S, LDS,        &
-                   CWORK, LCWORK, WORK, LWORK, IWORK, LIWORK, INFO )
-      IF ( INFO /= 0 ) THEN
-           WRITE(*,*) 'Call to CGEDMD failed. &
-           &Check the calling sequence and the code.'
-           WRITE(*,*) 'The error code is ', INFO
-           WRITE(*,*) 'The input parameters were ',&
-           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-           M, N, LDX, LDY, NRNK, TOL
-           STOP
-      END IF
-      SINGVX(1:N) = WORK(1:N)
-
-      !...... CGEDMD check point
-      IF ( LSAME(JOBZ,'V')  ) THEN
-          ! Check that Z = X*W, on return from CGEDMD
-          ! This checks that the returned eigenvectors in Z are
-          ! the product of the SVD'POD basis returned in X
-          ! and the eigenvectors of the Rayleigh quotient
-          ! returned in W
-          CALL CGEMM( 'N', 'N', M, K, K, CONE, X, LDX, W, LDW, &
-                      CZERO, Z1, LDZ )
-          TMP = ZERO
-          DO i = 1, K
-             CALL CAXPY( M, -CONE, Z(1,i), 1, Z1(1,i), 1)
-             TMP = MAX(TMP, SCNRM2( M, Z1(1,i), 1 ) )
-          END DO
-          TMP_XW = MAX(TMP_XW, TMP )
-          IF ( TMP_XW <= TOL ) THEN
-              !WRITE(*,*) ' :) .... OK .........CGEDMD PASSED.'
-          ELSE
-              NFAIL_Z_XV = NFAIL_Z_XV + 1
-              WRITE(*,*) ':( .................CGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-          END IF
-      END IF
-      !...... CGEDMD check point
-
-      IF ( LSAME(JOBREF,'R') ) THEN
-           ! The matrix A*U is returned for computing refined Ritz vectors.
-           ! Check that A*U is computed correctly using the formula
-           ! A*U = Y * V * inv(SIGMA). This depends on the
-           ! accuracy in the computed singular values and vectors of X.
-           ! See the paper for an error analysis.
-           ! Note that the left singular vectors of the input matrix X
-           ! are returned in the array X.
-           CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, X, LDX, &
-                      CZERO, Z1, LDZ )
-          TMP = ZERO
-          DO i = 1, K
-             CALL CAXPY( M, -CONE, AU(1,i), 1, Z1(1,i), 1)
-             TMP = MAX( TMP, SCNRM2( M, Z1(1,i),1 ) * &
-                     SINGVX(K)/(ANORM*SINGVX(1)) )
-          END DO
-          TMP_AU = MAX( TMP_AU, TMP )
-          IF ( TMP <= TOL2 ) THEN
-              !WRITE(*,*) ':) .... OK .........CGEDMD PASSED.'
-          ELSE
-              NFAIL_AU = NFAIL_AU + 1
-              WRITE(*,*) ':( .................CGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL2
-          END IF
-      ELSEIF ( LSAME(JOBREF,'E') ) THEN
-          ! The unscaled vectors of the Exact DMD are computed.
-          ! This option is included for the sake of completeness,
-          ! for users who prefer the Exact DMD vectors. The
-          ! returned vectors are in the real form, in the same way
-          ! as the Ritz vectors. Here we just save the vectors
-          ! and test them separately using a Matlab script.
-          CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, AU, LDAU, CZERO, Y1, LDY )
-
-          DO i=1, K
-             CALL CAXPY( M, -CEIGS(i), AU(1,i), 1, Y1(1,i), 1 )
-             RESEX(i) = SCNRM2( M, Y1(1,i), 1) / SCNRM2(M,AU(1,i),1)
-          END DO
-      END IF
-      !...... CGEDMD check point
-
-      IF ( LSAME(RESIDS, 'R') ) THEN
-          ! Compare the residuals returned by CGEDMD with the
-          ! explicitly computed residuals using the matrix A.
-          ! Compute explicitly Y1 = A*Z
-          CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY )
-          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
-          ! of the invariant subspaces that correspond to complex conjugate
-          ! pairs of eigencalues. (See the description of Z in CGEDMD,)
-
-          DO i=1, K
-                ! have a real eigenvalue with real eigenvector
-                CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 )
-                RES1(i) = SCNRM2( M, Y1(1,i), 1)
-          END DO
-          TMP = ZERO
-          DO i = 1, K
-          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
-                    SINGVX(K)/(ANORM*SINGVX(1)) )
-          END DO
-          TMP_REZ = MAX( TMP_REZ, TMP )
-          IF ( TMP <= TOL2 ) THEN
-              !WRITE(*,*) ':) .... OK ..........CGEDMD PASSED.'
-          ELSE
-              NFAIL_REZ = NFAIL_REZ + 1
-              WRITE(*,*) ':( ..................CGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-          END IF
-
-
-         IF ( LSAME(JOBREF,'E') ) THEN
-            TMP = ZERO
-          DO i = 1, K
-          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
-          END DO
-          TMP_EX = MAX(TMP_EX,TMP)
-         END IF
-
-      END IF
-
-      DEALLOCATE(CWORK)
-      DEALLOCATE(WORK)
-      DEALLOCATE(IWORK)
-
-!.......................................................................................................
-
-      IF ( K_traj == 1 ) THEN
-
-          F(1:M,1:N+1) = F0(1:M,1:N+1)
-          CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
-                    WHTSVD, M, N+1, F, LDF,  X, LDX,  Y, LDY,  &
-                    NRNK,  TOL, K, CEIGS, Z, LDZ, RES,  AU,  &
-                    LDAU, W, LDW, S, LDS, CDUMMY, -1,   &
-                    WDUMMY,  -1, IDUMMY, -1, INFO )
-
-          LCWORK = INT(CDUMMY(LWMINOPT))
-          ALLOCATE(CWORK(LCWORK))
-          LIWORK = IDUMMY(1)
-          ALLOCATE(IWORK(LIWORK))
-          LWORK = INT(WDUMMY(1))
-          ALLOCATE(WORK(LWORK))
-
-          CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
-                        WHTSVD, M, N+1, F, LDF,  X, LDX,  Y, LDY,  &
-                        NRNK,  TOL, KQ, CEIGS, Z, LDZ, RES,  AU,  &
-                        LDAU, W, LDW, S, LDS, CWORK, LCWORK,   &
-                        WORK,  LWORK, IWORK, LIWORK, INFO )
-          IF ( INFO /= 0 ) THEN
-                 WRITE(*,*) 'Call to CGEDMDQ failed. &
-                 &Check the calling sequence and the code.'
-                 WRITE(*,*) 'The error code is ', INFO
-                 WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-                 STOP
-          END IF
-          SINGVQX(1:N) =WORK(1:N)
-
-          !..... ZGEDMDQ check point
-
-          TMP = ZERO
-          DO i = 1, MIN(K, KQ)
-             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
-                                   SINGVX(1) )
-          END DO
-          SVDIFF = MAX( SVDIFF, TMP )
-          IF ( TMP > TOL2 ) THEN
-               WRITE(*,*) 'FAILED! Something was wrong with the run.'
-             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
-          END IF
-          !..... CGEDMDQ check point
-
-          !..... CGEDMDQ check point
-          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
-             ! Check that the QR factors are computed and returned
-             ! as requested. The residual ||F-Q*R||_F / ||F||_F
-             ! is compared to M*N*EPS.
-             F1(1:M,1:N+1) = F0(1:M,1:N+1)
-             CALL CGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -CONE, F, &
-                         LDF, Y, LDY, CONE, F1, LDF )
-             TMP_FQR = CLANGE( 'F', M, N+1, F1, LDF, WORK ) / &
-                   CLANGE( 'F', M, N+1, F0,  LDF, WORK )
-             IF ( TMP_FQR <= TOL2 ) THEN
-                !WRITE(*,*) ':) CGEDMDQ ........ PASSED.'
-             ELSE
-                WRITE(*,*) ':( CGEDMDQ ........ FAILED.'
-                NFAIL_F_QR = NFAIL_F_QR + 1
-             END IF
-          END IF
-          !..... ZGEDMDQ checkpoint
-                 !..... ZGEDMDQ checkpoint
-          IF ( LSAME(RESIDS, 'R') ) THEN
-              ! Compare the residuals returned by ZGEDMDQ with the
-              ! explicitly computed residuals using the matrix A.
-              ! Compute explicitly Y1 = A*Z
-              CALL CGEMM( 'N', 'N', M, KQ, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY )
-              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
-              ! of the invariant subspaces that correspond to complex conjugate
-              ! pairs of eigencalues. (See the description of Z in ZGEDMDQ)
-              DO i = 1, KQ
-                    ! have a real eigenvalue with real eigenvector
-                    CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 )
-                    ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
-                    RES1(i) = SCNRM2( M, Y1(1,i), 1)
-              END DO
-              TMP = ZERO
-              DO i = 1, KQ
-              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
-                  SINGVQX(KQ)/(ANORM*SINGVQX(1)) )
-              END DO
-              TMP_REZQ = MAX( TMP_REZQ, TMP )
-              IF ( TMP <= TOL2 ) THEN
-                  !WRITE(*,*) '.... OK ........ CGEDMDQ PASSED.'
-              ELSE
-                  NFAIL_REZQ = NFAIL_REZQ + 1
-                  WRITE(*,*) '................ CGEDMDQ FAILED!', &
-                      'Check the code for implementation errors.'
-              END IF
-          END IF
-
-          DEALLOCATE(CWORK)
-          DEALLOCATE(WORK)
-          DEALLOCATE(IWORK)
-
-      END IF
-
-      END DO   ! LWMINOPT
-      !write(*,*) 'LWMINOPT loop completed'
-      END DO   ! iWHTSVD
-      !write(*,*) 'WHTSVD loop completed'
-      END DO   ! iNRNK  -2:-1
-      !write(*,*) 'NRNK loop completed'
-      END DO   ! iSCALE  1:4
-      !write(*,*) 'SCALE loop completed'
-      END DO
-      !write(*,*) 'JOBREF loop completed'
-      END DO   ! iJOBZ
-      !write(*,*) 'JOBZ loop completed'
-
-      END DO ! MODE -6:6
-      !write(*,*) 'MODE loop completed'
-      END DO ! 1 or 2 trajectories
-      !write(*,*) 'trajectories  loop completed'
-
-      DEALLOCATE( A )
-      DEALLOCATE( AC )
-      DEALLOCATE( Z )
-      DEALLOCATE( F )
-      DEALLOCATE( F0 )
-      DEALLOCATE( F1 )
-      DEALLOCATE( X )
-      DEALLOCATE( X0 )
-      DEALLOCATE( Y )
-      DEALLOCATE( Y0 )
-      DEALLOCATE( Y1 )
-      DEALLOCATE( AU )
-      DEALLOCATE( W )
-      DEALLOCATE( S )
-      DEALLOCATE( Z1 )
-      DEALLOCATE( RES )
-      DEALLOCATE( RES1 )
-      DEALLOCATE( RESEX )
-      DEALLOCATE( CEIGS )
-      DEALLOCATE( SINGVX )
-      DEALLOCATE( SINGVQX )
-
-      END DO ! LLOOP
-
-      WRITE(*,*)
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*) ' Test summary for CGEDMD :'
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*)
-      IF ( NFAIL_Z_XV == 0 ) THEN
-          WRITE(*,*) '>>>> Z - U*V test PASSED.'
-      ELSE
-          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
-          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_XW
-          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_z_XV
-      END IF
-
-      IF ( NFAIL_AU == 0 ) THEN
-          WRITE(*,*) '>>>> A*U test PASSED. '
-      ELSE
-          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
-          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
-      END IF
-
-
-      IF ( NFAIL_REZ == 0 ) THEN
-         WRITE(*,*) '>>>> Rezidual computation test PASSED.'
-      ELSE
-        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
-        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
-        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
-      END IF
-      IF ( NFAIL_TOTAL == 0 ) THEN
-        WRITE(*,*) '>>>> CGEDMD :: ALL TESTS PASSED.'
-      ELSE
-        WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
-        WRITE(*,*) '>>>>>>>>>>>>>> CGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
-      END IF
-
-      WRITE(*,*)
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*) ' Test summary for CGEDMDQ :'
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*)
-
-      IF ( NFAIL_SVDIFF == 0 ) THEN
-        WRITE(*,*) '>>>> CGEDMD and CGEDMDQ computed singular &
-           &values test PASSED.'
-      ELSE
-        WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in &
-            &the singular values unacceptable ', &
-            NFAIL_SVDIFF, ' times. Test FAILED.'
-        WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
-        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
-      END IF
-      IF ( NFAIL_F_QR == 0 ) THEN
-        WRITE(*,*) '>>>> F - Q*R test PASSED.'
-      ELSE
-        WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
-        WRITE(*,*) 'The largest relative residual was ', TMP_FQR
-        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
-      END IF
-
-      IF ( NFAIL_REZQ == 0 ) THEN
-        WRITE(*,*) '>>>> Rezidual computation test PASSED.'
-      ELSE
-        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
-        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
-        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
-      END IF
-
-      IF ( NFAILQ_TOTAL == 0 ) THEN
-        WRITE(*,*) '>>>>>>> CGEDMDQ :: ALL TESTS PASSED.'
-      ELSE
-        WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
-        WRITE(*,*) '>>>>>>> CGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
-      END IF
-
-      WRITE(*,*)
-      WRITE(*,*) 'Test completed.'
-      STOP
-      END
+!   This is a test program for checking the implementations of
+!   the implementations of the following subroutines
+!
+!   CGEDMD,  for computation of the
+!            Dynamic Mode Decomposition (DMD)
+!   CGEDMDQ, for computation of a
+!            QR factorization based compressed DMD
+!
+!   Developed and supported by:
+!   ===========================
+!   Developed and coded by Zlatko Drmac, Faculty of Science,
+!   University of Zagreb;  drmac@math.hr
+!   In cooperation with
+!   AIMdyn Inc., Santa Barbara, CA.
+!   ========================================================
+!   How to run the code (compiler, link info)
+!   ========================================================
+!   Compile as FORTRAN 90 (or later) and link with BLAS and
+!   LAPACK libraries.
+!   NOTE: The code is developed and tested on top of the
+!   Intel MKL library (versions 2022.0.3 and 2022.2.0),
+!   using the Intel Fortran compiler.
+!
+!   For developers of the C++ implementation
+!   ========================================================
+!   See the LAPACK++ and Template Numerical Toolkit (TNT)
+!
+!   Note on a development of the GPU HP implementation
+!   ========================================================
+!   Work in progress. See CUDA, MAGMA, SLATE.
+!   NOTE: The four SVD subroutines used in this code are
+!   included as a part of R&D and for the completeness.
+!   This was also an opportunity to test those SVD codes.
+!   If the scaling option is used all four are essentially
+!   equally good. For implementations on HP platforms,
+!   one can use whichever SVD is available.
+!............................................................
+
+!............................................................
+!............................................................
+!
+      PROGRAM DMD_TEST
+
+      use iso_fortran_env
+      IMPLICIT NONE
+      integer, parameter :: WP = real32
+!............................................................
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+
+      COMPLEX(KIND=WP), PARAMETER ::  CONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: CZERO = ( 0.0_WP, 0.0_WP )
+!............................................................
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   :: RES, &
+                     RES1, RESEX, SINGVX, SINGVQX, WORK
+      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
+      REAL(KIND=WP) :: WDUMMY(2)
+      INTEGER       :: IDUMMY(4), ISEED(4)
+      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS,       &
+                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
+                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_XW, &
+                       TMP_EX
+!............................................................
+      COMPLEX(KIND=WP) :: CMAX
+      INTEGER :: LCWORK
+      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::  A, AC,  &
+                                 AU, F, F0, F1, S, W,  &
+                                 X, X0, Y, Y0, Y1, Z, Z1
+      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::  CDA, CDR, &
+                                       CDL, CEIGS, CEIGSA, CWORK
+      COMPLEX(KIND=WP) ::  CDUMMY(22), CDUM2X2(2,2)
+!............................................................
+      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
+                 LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK
+      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j,     &
+                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
+                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL,  &
+                 NFAIL_Z_XV,  MODE, MODEL, MODER, WHTSVD
+      INTEGER :: iNRNK, iWHTSVD,  K_traj, LWMINOPT
+      CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
+                   SCALE, RESIDS, WANTQ, WANTR
+      LOGICAL :: TEST_QRDMD
+
+!..... external subroutines (BLAS and LAPACK)
+      EXTERNAL CAXPY, CGEEV, CGEMM, CGEMV, CLASCL
+!.....external subroutines DMD package
+!     subroutines under test
+      EXTERNAL CGEDMD, CGEDMDQ
+!..... external functions (BLAS and LAPACK)
+      EXTERNAL         SCNRM2, SLAMCH
+      REAL(KIND=WP) :: SCNRM2, SLAMCH
+      EXTERNAL         CLANGE
+      REAL(KIND=WP) :: CLANGE
+      EXTERNAL ICAMAX
+      INTEGER  ICAMAX
+      EXTERNAL LSAME
+      LOGICAL  LSAME
+
+      INTRINSIC ABS, INT, MIN, MAX, SIGN
+!............................................................
+
+
+      WRITE(*,*) 'COMPLEX CODE TESTING'
+
+      ! The test is always in pairs : ( CGEDMD and CGEDMDQ)
+      ! because the test includes comparing the results (in pairs).
+!.....................................................................................
+      ! This code by default performs tests on CGEDMDQ
+      ! Since the QR factorizations based algorithm is designed for
+      ! single trajectory data, only single trajectory tests will
+      ! be performed with xGEDMDQ.
+
+      WANTQ = 'Q'
+      WANTR = 'R'
+!.................................................................................
+
+      EPS = SLAMCH( 'P' )  ! machine precision WP
+
+      ! Global counters of failures of some particular tests
+      NFAIL      = 0
+      NFAIL_REZ  = 0
+      NFAIL_REZQ = 0
+      NFAIL_Z_XV = 0
+      NFAIL_F_QR = 0
+      NFAIL_AU   = 0
+      NFAIL_SVDIFF = 0
+      NFAIL_TOTAL  = 0
+      NFAILQ_TOTAL = 0
+
+      DO LLOOP = 1, 4
+
+      WRITE(*,*) 'L Loop Index = ', LLOOP
+
+      ! Set the dimensions of the problem ...
+      READ(*,*) M
+      WRITE(*,*) 'M = ', M
+      ! ... and the number of snapshots.
+      READ(*,*) N
+      WRITE(*,*) 'N = ', N
+
+      ! Test the dimensions
+      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
+          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
+          STOP
+      END IF
+!.............
+      ! The seed inside the LLOOP so that each pass can be reproduced easily.
+      ISEED(1) = 4
+      ISEED(2) = 3
+      ISEED(3) = 2
+      ISEED(4) = 1
+
+      LDA  = M
+      LDF  = M
+      LDX  = M
+      LDY  = M
+      LDW  = N
+      LDZ  = M
+      LDAU = M
+      LDS  = N
+
+      TMP_XW  = ZERO
+      TMP_AU   = ZERO
+      TMP_REZ  = ZERO
+      TMP_REZQ = ZERO
+      SVDIFF   = ZERO
+      TMP_EX   = ZERO
+
+      ALLOCATE( A(LDA,M) )
+      ALLOCATE( AC(LDA,M) )
+      ALLOCATE( F(LDF,N+1) )
+      ALLOCATE( F0(LDF,N+1) )
+      ALLOCATE( F1(LDF,N+1) )
+      ALLOCATE( X(LDX,N) )
+      ALLOCATE( X0(LDX,N) )
+      ALLOCATE( Y(LDY,N+1) )
+      ALLOCATE( Y0(LDY,N+1) )
+      ALLOCATE( Y1(LDY,N+1) )
+      ALLOCATE( AU(LDAU,N) )
+      ALLOCATE( W(LDW,N) )
+      ALLOCATE( S(LDS,N) )
+      ALLOCATE( Z(LDZ,N) )
+      ALLOCATE( Z1(LDZ,N) )
+      ALLOCATE( RES(N) )
+      ALLOCATE( RES1(N) )
+      ALLOCATE( RESEX(N) )
+      ALLOCATE( CEIGS(N) )
+      ALLOCATE( SINGVX(N) )
+      ALLOCATE( SINGVQX(N) )
+
+      TOL  = 10*M*EPS
+      TOL2 = 10*M*N*EPS
+
+!.............
+
+      DO K_traj = 1, 2
+      !  Number of intial conditions in the simulation/trajectories (1 or 2)
+
+      COND   = 1.0D4
+      CMAX   = (1.0D1,1.0D1)
+      RSIGN  = 'F'
+      GRADE  = 'N'
+      MODEL  = 6
+      CONDL  = 1.0D1
+      MODER  = 6
+      CONDR  = 1.0D1
+      PIVTNG = 'N'
+      ! Loop over all parameter MODE values for CLATMR (+-1,..,+-6)
+
+      DO MODE = 1, 6
+
+      ALLOCATE( IWORK(2*M) )
+      ALLOCATE( CDA(M) )
+      ALLOCATE( CDL(M) )
+      ALLOCATE( CDR(M) )
+
+      CALL CLATMR( M, M, 'N', ISEED, 'N', CDA, MODE, COND, &
+                   CMAX, RSIGN, GRADE, CDL, MODEL,  CONDL, &
+                   CDR, MODER, CONDR, PIVTNG, IWORK, M, M, &
+                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
+      DEALLOCATE( CDR )
+      DEALLOCATE( CDL )
+      DEALLOCATE( CDA )
+      DEALLOCATE( IWORK )
+
+      LCWORK = MAX(1,2*M)
+      ALLOCATE( CEIGSA(M) )
+      ALLOCATE( CWORK(LCWORK) )
+      ALLOCATE( WORK(2*M) )
+      AC(1:M,1:M) = A(1:M,1:M)
+      CALL CGEEV( 'N','N', M, AC, LDA, CEIGSA, CDUM2X2, 2, &
+                  CDUM2X2, 2, CWORK, LCWORK, WORK, INFO ) ! LAPACK CALL
+      DEALLOCATE(WORK)
+      DEALLOCATE(CWORK)
+
+      TMP = ABS(CEIGSA(ICAMAX(M, CEIGSA, 1))) ! The spectral radius of A
+      ! Scale the matrix A to have unit spectral radius.
+      CALL CLASCL( 'G',0, 0, TMP, ONE, M, M, &
+                   A, LDA, INFO )
+      CALL CLASCL( 'G',0, 0, TMP, ONE, M, 1, &
+                   CEIGSA, M, INFO )
+      ANORM = CLANGE( 'F', M, M, A, LDA, WDUMMY )
+
+      IF ( K_traj == 2 ) THEN
+          ! generate data as two trajectories
+          ! with two inital conditions
+          CALL CLARNV(2, ISEED, M, F(1,1) )
+          DO i = 1, N/2
+             CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1,  &
+                  CZERO, F(1,i+1), 1 )
+          END DO
+          X0(1:M,1:N/2) = F(1:M,1:N/2)
+          Y0(1:M,1:N/2) = F(1:M,2:N/2+1)
+
+          CALL CLARNV(2, ISEED, M, F(1,1) )
+          DO i = 1, N-N/2
+             CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1,  &
+                  CZERO, F(1,i+1), 1 )
+          END DO
+          X0(1:M,N/2+1:N) = F(1:M,1:N-N/2)
+          Y0(1:M,N/2+1:N) = F(1:M,2:N-N/2+1)
+      ELSE
+          CALL CLARNV(2, ISEED, M, F(1,1) )
+          DO i = 1, N
+             CALL CGEMV( 'N', M, M, CONE, A, M, F(1,i), 1,  &
+                  CZERO, F(1,i+1), 1 )
+          END DO
+          F0(1:M,1:N+1) = F(1:M,1:N+1)
+          X0(1:M,1:N) = F0(1:M,1:N)
+          Y0(1:M,1:N) = F0(1:M,2:N+1)
+      END IF
+
+      DEALLOCATE( CEIGSA )
+!........................................................................
+
+      DO iJOBZ = 1, 4
+
+          SELECT CASE ( iJOBZ )
+          CASE(1)
+              JOBZ   = 'V'
+              RESIDS = 'R'
+          CASE(2)
+              JOBZ   = 'V'
+              RESIDS = 'N'
+          CASE(3)
+              JOBZ   = 'F'
+              RESIDS = 'N'
+          CASE(4)
+              JOBZ   = 'N'
+              RESIDS = 'N'
+          END SELECT
+
+      DO iJOBREF = 1, 3
+
+          SELECT CASE ( iJOBREF )
+          CASE(1)
+              JOBREF = 'R'
+          CASE(2)
+              JOBREF = 'E'
+          CASE(3)
+              JOBREF = 'N'
+          END SELECT
+
+      DO iSCALE = 1, 4
+
+          SELECT CASE ( iSCALE )
+          CASE(1)
+              SCALE = 'S'
+          CASE(2)
+              SCALE = 'C'
+          CASE(3)
+              SCALE = 'Y'
+          CASE(4)
+              SCALE = 'N'
+          END SELECT
+
+      DO iNRNK = -1, -2, -1
+          NRNK   = iNRNK
+
+      DO iWHTSVD = 1,  3
+         ! Check all four options to compute the POD basis
+         ! via the SVD.
+         WHTSVD   = iWHTSVD
+
+      DO LWMINOPT = 1, 2
+         ! Workspace query for the minimal (1) and for the optimal
+         ! (2) workspace lengths determined by workspace query.
+
+      ! CGEDMD is always tested and its results are also used for
+      ! comparisons with CGEDMDQ.
+
+      X(1:M,1:N) = X0(1:M,1:N)
+      Y(1:M,1:N) = Y0(1:M,1:N)
+
+      CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
+                M,  N, X, LDX, Y, LDY, NRNK, TOL,  &
+                K, CEIGS, Z, LDZ,  RES,  &
+                AU, LDAU, W,  LDW,   S, LDS,        &
+                CDUMMY, -1, WDUMMY, -1, IDUMMY, -1, INFO )
+
+      IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) &
+                       .OR. ( INFO < 0 ) ) THEN
+        WRITE(*,*) 'Call to CGEDMD workspace query failed. &
+                   &Check the calling sequence and the code.'
+        WRITE(*,*) 'The error code is ', INFO
+        WRITE(*,*) 'The input parameters were ',      &
+        SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,          &
+        M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS
+        STOP
+      ELSE
+        !WRITE(*,*) '... done. Workspace length computed.'
+      END IF
+
+      LCWORK = INT(CDUMMY(LWMINOPT))
+      ALLOCATE(CWORK(LCWORK))
+      LIWORK = IDUMMY(1)
+      ALLOCATE(IWORK(LIWORK))
+      LWORK = INT(WDUMMY(1))
+      ALLOCATE(WORK(LWORK))
+
+      CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
+                   M,  N, X, LDX, Y, LDY, NRNK, TOL,  &
+                   K, CEIGS, Z, LDZ,  RES,  &
+                   AU, LDAU, W,  LDW,   S, LDS,        &
+                   CWORK, LCWORK, WORK, LWORK, IWORK, LIWORK, INFO )
+      IF ( INFO /= 0 ) THEN
+           WRITE(*,*) 'Call to CGEDMD failed. &
+           &Check the calling sequence and the code.'
+           WRITE(*,*) 'The error code is ', INFO
+           WRITE(*,*) 'The input parameters were ',&
+           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+           M, N, LDX, LDY, NRNK, TOL
+           STOP
+      END IF
+      SINGVX(1:N) = WORK(1:N)
+
+      !...... CGEDMD check point
+      IF ( LSAME(JOBZ,'V')  ) THEN
+          ! Check that Z = X*W, on return from CGEDMD
+          ! This checks that the returned eigenvectors in Z are
+          ! the product of the SVD'POD basis returned in X
+          ! and the eigenvectors of the Rayleigh quotient
+          ! returned in W
+          CALL CGEMM( 'N', 'N', M, K, K, CONE, X, LDX, W, LDW, &
+                      CZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL CAXPY( M, -CONE, Z(1,i), 1, Z1(1,i), 1)
+             TMP = MAX(TMP, SCNRM2( M, Z1(1,i), 1 ) )
+          END DO
+          TMP_XW = MAX(TMP_XW, TMP )
+          IF ( TMP_XW <= TOL ) THEN
+              !WRITE(*,*) ' :) .... OK .........CGEDMD PASSED.'
+          ELSE
+              NFAIL_Z_XV = NFAIL_Z_XV + 1
+              WRITE(*,*) ':( .................CGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+      END IF
+      !...... CGEDMD check point
+
+      IF ( LSAME(JOBREF,'R') ) THEN
+           ! The matrix A*U is returned for computing refined Ritz vectors.
+           ! Check that A*U is computed correctly using the formula
+           ! A*U = Y * V * inv(SIGMA). This depends on the
+           ! accuracy in the computed singular values and vectors of X.
+           ! See the paper for an error analysis.
+           ! Note that the left singular vectors of the input matrix X
+           ! are returned in the array X.
+           CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, X, LDX, &
+                      CZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL CAXPY( M, -CONE, AU(1,i), 1, Z1(1,i), 1)
+             TMP = MAX( TMP, SCNRM2( M, Z1(1,i),1 ) * &
+                     SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_AU = MAX( TMP_AU, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) ':) .... OK .........CGEDMD PASSED.'
+          ELSE
+              NFAIL_AU = NFAIL_AU + 1
+              WRITE(*,*) ':( .................CGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL2
+          END IF
+      ELSEIF ( LSAME(JOBREF,'E') ) THEN
+          ! The unscaled vectors of the Exact DMD are computed.
+          ! This option is included for the sake of completeness,
+          ! for users who prefer the Exact DMD vectors. The
+          ! returned vectors are in the real form, in the same way
+          ! as the Ritz vectors. Here we just save the vectors
+          ! and test them separately using a Matlab script.
+          CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, AU, LDAU, CZERO, Y1, LDY )
+
+          DO i=1, K
+             CALL CAXPY( M, -CEIGS(i), AU(1,i), 1, Y1(1,i), 1 )
+             RESEX(i) = SCNRM2( M, Y1(1,i), 1) / SCNRM2(M,AU(1,i),1)
+          END DO
+      END IF
+      !...... CGEDMD check point
+
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by CGEDMD with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in CGEDMD,)
+
+          DO i=1, K
+                ! have a real eigenvalue with real eigenvector
+                CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 )
+                RES1(i) = SCNRM2( M, Y1(1,i), 1)
+          END DO
+          TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                    SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_REZ = MAX( TMP_REZ, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) ':) .... OK ..........CGEDMD PASSED.'
+          ELSE
+              NFAIL_REZ = NFAIL_REZ + 1
+              WRITE(*,*) ':( ..................CGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+
+         IF ( LSAME(JOBREF,'E') ) THEN
+            TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
+          END DO
+          TMP_EX = MAX(TMP_EX,TMP)
+         END IF
+
+      END IF
+
+      DEALLOCATE(CWORK)
+      DEALLOCATE(WORK)
+      DEALLOCATE(IWORK)
+
+!.......................................................................................................
+
+      IF ( K_traj == 1 ) THEN
+
+          F(1:M,1:N+1) = F0(1:M,1:N+1)
+          CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
+                    WHTSVD, M, N+1, F, LDF,  X, LDX,  Y, LDY,  &
+                    NRNK,  TOL, K, CEIGS, Z, LDZ, RES,  AU,  &
+                    LDAU, W, LDW, S, LDS, CDUMMY, -1,   &
+                    WDUMMY,  -1, IDUMMY, -1, INFO )
+
+          LCWORK = INT(CDUMMY(LWMINOPT))
+          ALLOCATE(CWORK(LCWORK))
+          LIWORK = IDUMMY(1)
+          ALLOCATE(IWORK(LIWORK))
+          LWORK = INT(WDUMMY(1))
+          ALLOCATE(WORK(LWORK))
+
+          CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
+                        WHTSVD, M, N+1, F, LDF,  X, LDX,  Y, LDY,  &
+                        NRNK,  TOL, KQ, CEIGS, Z, LDZ, RES,  AU,  &
+                        LDAU, W, LDW, S, LDS, CWORK, LCWORK,   &
+                        WORK,  LWORK, IWORK, LIWORK, INFO )
+          IF ( INFO /= 0 ) THEN
+                 WRITE(*,*) 'Call to CGEDMDQ failed. &
+                 &Check the calling sequence and the code.'
+                 WRITE(*,*) 'The error code is ', INFO
+                 WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+                 STOP
+          END IF
+          SINGVQX(1:N) =WORK(1:N)
+
+          !..... ZGEDMDQ check point
+
+          TMP = ZERO
+          DO i = 1, MIN(K, KQ)
+             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
+                                   SINGVX(1) )
+          END DO
+          SVDIFF = MAX( SVDIFF, TMP )
+          IF ( TMP > TOL2 ) THEN
+               WRITE(*,*) 'FAILED! Something was wrong with the run.'
+             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
+          END IF
+          !..... CGEDMDQ check point
+
+          !..... CGEDMDQ check point
+          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
+             ! Check that the QR factors are computed and returned
+             ! as requested. The residual ||F-Q*R||_F / ||F||_F
+             ! is compared to M*N*EPS.
+             F1(1:M,1:N+1) = F0(1:M,1:N+1)
+             CALL CGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -CONE, F, &
+                         LDF, Y, LDY, CONE, F1, LDF )
+             TMP_FQR = CLANGE( 'F', M, N+1, F1, LDF, WORK ) / &
+                   CLANGE( 'F', M, N+1, F0,  LDF, WORK )
+             IF ( TMP_FQR <= TOL2 ) THEN
+                !WRITE(*,*) ':) CGEDMDQ ........ PASSED.'
+             ELSE
+                WRITE(*,*) ':( CGEDMDQ ........ FAILED.'
+                NFAIL_F_QR = NFAIL_F_QR + 1
+             END IF
+          END IF
+          !..... ZGEDMDQ checkpoint
+                 !..... ZGEDMDQ checkpoint
+          IF ( LSAME(RESIDS, 'R') ) THEN
+              ! Compare the residuals returned by ZGEDMDQ with the
+              ! explicitly computed residuals using the matrix A.
+              ! Compute explicitly Y1 = A*Z
+              CALL CGEMM( 'N', 'N', M, KQ, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY )
+              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+              ! of the invariant subspaces that correspond to complex conjugate
+              ! pairs of eigencalues. (See the description of Z in ZGEDMDQ)
+              DO i = 1, KQ
+                    ! have a real eigenvalue with real eigenvector
+                    CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 )
+                    ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
+                    RES1(i) = SCNRM2( M, Y1(1,i), 1)
+              END DO
+              TMP = ZERO
+              DO i = 1, KQ
+              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                  SINGVQX(KQ)/(ANORM*SINGVQX(1)) )
+              END DO
+              TMP_REZQ = MAX( TMP_REZQ, TMP )
+              IF ( TMP <= TOL2 ) THEN
+                  !WRITE(*,*) '.... OK ........ CGEDMDQ PASSED.'
+              ELSE
+                  NFAIL_REZQ = NFAIL_REZQ + 1
+                  WRITE(*,*) '................ CGEDMDQ FAILED!', &
+                      'Check the code for implementation errors.'
+              END IF
+          END IF
+
+          DEALLOCATE(CWORK)
+          DEALLOCATE(WORK)
+          DEALLOCATE(IWORK)
+
+      END IF
+
+      END DO   ! LWMINOPT
+      !write(*,*) 'LWMINOPT loop completed'
+      END DO   ! iWHTSVD
+      !write(*,*) 'WHTSVD loop completed'
+      END DO   ! iNRNK  -2:-1
+      !write(*,*) 'NRNK loop completed'
+      END DO   ! iSCALE  1:4
+      !write(*,*) 'SCALE loop completed'
+      END DO
+      !write(*,*) 'JOBREF loop completed'
+      END DO   ! iJOBZ
+      !write(*,*) 'JOBZ loop completed'
+
+      END DO ! MODE -6:6
+      !write(*,*) 'MODE loop completed'
+      END DO ! 1 or 2 trajectories
+      !write(*,*) 'trajectories  loop completed'
+
+      DEALLOCATE( A )
+      DEALLOCATE( AC )
+      DEALLOCATE( Z )
+      DEALLOCATE( F )
+      DEALLOCATE( F0 )
+      DEALLOCATE( F1 )
+      DEALLOCATE( X )
+      DEALLOCATE( X0 )
+      DEALLOCATE( Y )
+      DEALLOCATE( Y0 )
+      DEALLOCATE( Y1 )
+      DEALLOCATE( AU )
+      DEALLOCATE( W )
+      DEALLOCATE( S )
+      DEALLOCATE( Z1 )
+      DEALLOCATE( RES )
+      DEALLOCATE( RES1 )
+      DEALLOCATE( RESEX )
+      DEALLOCATE( CEIGS )
+      DEALLOCATE( SINGVX )
+      DEALLOCATE( SINGVQX )
+
+      END DO ! LLOOP
+
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for CGEDMD :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+      IF ( NFAIL_Z_XV == 0 ) THEN
+          WRITE(*,*) '>>>> Z - U*V test PASSED.'
+      ELSE
+          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
+          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_XW
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_z_XV
+      END IF
+
+      IF ( NFAIL_AU == 0 ) THEN
+          WRITE(*,*) '>>>> A*U test PASSED. '
+      ELSE
+          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
+          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
+      END IF
+
+
+      IF ( NFAIL_REZ == 0 ) THEN
+         WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
+        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
+      END IF
+      IF ( NFAIL_TOTAL == 0 ) THEN
+        WRITE(*,*) '>>>> CGEDMD :: ALL TESTS PASSED.'
+      ELSE
+        WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
+        WRITE(*,*) '>>>>>>>>>>>>>> CGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for CGEDMDQ :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+
+      IF ( NFAIL_SVDIFF == 0 ) THEN
+        WRITE(*,*) '>>>> CGEDMD and CGEDMDQ computed singular &
+           &values test PASSED.'
+      ELSE
+        WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in &
+            &the singular values unacceptable ', &
+            NFAIL_SVDIFF, ' times. Test FAILED.'
+        WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
+      END IF
+      IF ( NFAIL_F_QR == 0 ) THEN
+        WRITE(*,*) '>>>> F - Q*R test PASSED.'
+      ELSE
+        WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
+        WRITE(*,*) 'The largest relative residual was ', TMP_FQR
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
+      END IF
+
+      IF ( NFAIL_REZQ == 0 ) THEN
+        WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
+        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
+      END IF
+
+      IF ( NFAILQ_TOTAL == 0 ) THEN
+        WRITE(*,*) '>>>>>>> CGEDMDQ :: ALL TESTS PASSED.'
+      ELSE
+        WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
+        WRITE(*,*) '>>>>>>> CGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) 'Test completed.'
+      STOP
+      END
diff --git a/TESTING/EIG/dchkdmd.f90 b/TESTING/EIG/dchkdmd.f90
index 4fbf7531b3..c64d01a412 100644
--- a/TESTING/EIG/dchkdmd.f90
+++ b/TESTING/EIG/dchkdmd.f90
@@ -1,813 +1,813 @@
-!     This is a test program for checking the implementations of
-!     the implementations of the following subroutines
-!
-!     DGEDMD  for computation of the
-!             Dynamic Mode Decomposition (DMD)
-!     DGEDMDQ for computation of a
-!             QR factorization based compressed DMD
-!
-!     Developed and supported by:
-!     ===========================
-!     Developed and coded by Zlatko Drmac, Faculty of Science,
-!     University of Zagreb;  drmac@math.hr
-!     In cooperation with
-!     AIMdyn Inc., Santa Barbara, CA.
-!     ========================================================
-!     How to run the code (compiler, link info)
-!     ========================================================
-!     Compile as FORTRAN 90 (or later) and link with BLAS and
-!     LAPACK libraries.
-!     NOTE: The code is developed and tested on top of the
-!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
-!     using the Intel Fortran compiler.
-!
-!     For developers of the C++ implementation
-!     ========================================================
-!     See the LAPACK++ and Template Numerical Toolkit (TNT)
-!
-!     Note on a development of the GPU HP implementation
-!     ========================================================
-!     Work in progress. See CUDA, MAGMA, SLATE.
-!     NOTE: The four SVD subroutines used in this code are
-!     included as a part of R&D and for the completeness.
-!     This was also an opportunity to test those SVD codes.
-!     If the scaling option is used all four are essentially
-!     equally good. For implementations on HP platforms,
-!     one can use whichever SVD is available.
-!...  .........................................................
-!     NOTE:
-!     When using the Intel MKL 2022.0.3 the subroutine xGESVDQ
-!     (optionally used in xGEDMD) may cause access violation
-!     error for x = S, D, C, Z, but only if called with the
-!     work space query. (At least in our Windows 10 MSVS 2019.)
-!     The problem can be mitigated by downloading the source
-!     code of xGESVDQ from the LAPACK repository and use it
-!     localy instead of the one in the MKL. This seems to
-!     indicate that the problem is indeed in the MKL.
-!     This problem did not appear whith Intel MKL 2022.2.0.
-!
-!     NOTE:
-!     xGESDD seems to have a problem with workspace. In some
-!     cases the length of the optimal workspace is returned
-!     smaller than the minimal workspace, as specified in the
-!     code. As a precaution, all optimal workspaces are
-!     set as MAX(minimal, optimal).
-!     Latest implementations of complex xGESDD have different
-!     length of the real worksapce. We use max value over
-!     two versions.
-!............................................................
-!............................................................
-!
-      PROGRAM DMD_TEST
-      use iso_fortran_env, only: real64
-      IMPLICIT NONE
-      integer, parameter :: WP = real64
-
-!............................................................
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-!............................................................
-      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::          &
-                     A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,&
-                     Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1
-      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::          &
-                     DA, DL, DR, REIG, REIGA, REIGQ, IEIG,   &
-                     IEIGA, IEIGQ,  RES, RES1, RESEX, SINGVX,&
-                     SINGVQX, WORK
-      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
-      REAL(KIND=WP) :: AB(2,2),   WDUMMY(2)
-      INTEGER       :: IDUMMY(2), ISEED(4), RJOBDATA(8)
-      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, &
-                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
-                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
-                       TMP_EX, XNORM, YNORM
-!............................................................
-      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
-                 LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK
-      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, KDIFF,  &
-                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
-                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, &
-                 NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD
-      INTEGER    iNRNK, iWHTSVD, K_TRAJ, LWMINOPT
-      CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
-                       SCALE, RESIDS, WANTQ, WANTR
-
-      LOGICAL  TEST_QRDMD
-!..... external subroutines (BLAS and LAPACK)
-      EXTERNAL DAXPY,  DGEEV, DGEMM, DGEMV, DLACPY, DLASCL
-      EXTERNAL DLARNV, DLATMR
-!.....external subroutines DMD package, part 1
-!     subroutines under test
-      EXTERNAL DGEDMD, DGEDMDQ
-
-!..... external functions (BLAS and LAPACK)
-      EXTERNAL         DLAMCH, DLANGE, DNRM2
-      REAL(KIND=WP) :: DLAMCH, DLANGE, DNRM2
-      EXTERNAL         LSAME
-      LOGICAL          LSAME
-
-      INTRINSIC ABS, INT, MIN, MAX
-!............................................................
-
-      ! The test is always in pairs : ( DGEDMD and DGEDMDQ )
-      ! because the test includes comparing the results (in pairs).
-!.....................................................................................
-      TEST_QRDMD = .TRUE. ! This code by default performs tests on DGEDMDQ
-                          ! Since the QR factorizations based algorithm is designed for
-                          ! single trajectory data, only single trajectory tests will
-                          ! be performed with xGEDMDQ.
-      WANTQ = 'Q'
-      WANTR = 'R'
-!.................................................................................
-
-      EPS = DLAMCH( 'P' )  ! machine precision DP
-
-      ! Global counters of failures of some particular tests
-      NFAIL      = 0
-      NFAIL_REZ  = 0
-      NFAIL_REZQ = 0
-      NFAIL_Z_XV = 0
-      NFAIL_F_QR = 0
-      NFAIL_AU   = 0
-      KDIFF      = 0
-      NFAIL_SVDIFF = 0
-      NFAIL_TOTAL  = 0
-      NFAILQ_TOTAL = 0
-
-
-      DO LLOOP = 1, 4
-
-      WRITE(*,*) 'L Loop Index = ', LLOOP
-
-      ! Set the dimensions of the problem ...
-      WRITE(*,*) 'M = '
-      READ(*,*) M
-      WRITE(*,*) M
-      ! ... and the number of snapshots.
-      WRITE(*,*) 'N = '
-      READ(*,*) N
-      WRITE(*,*) N
-
-      ! ... Test the dimensions
-      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
-          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
-          STOP
-      END IF
-!.............
-      ! The seed inside the LLOOP so that each pass can be reproduced easily.
-
-      ISEED(1) = 4
-      ISEED(2) = 3
-      ISEED(3) = 2
-      ISEED(4) = 1
-
-      LDA  = M
-      LDF  = M
-      LDX  = MAX(M,N+1)
-      LDY  = MAX(M,N+1)
-      LDW  = N
-      LDZ  = M
-      LDAU = MAX(M,N+1)
-      LDS  = N
-
-      TMP_ZXW  = ZERO
-      TMP_AU   = ZERO
-      TMP_REZ  = ZERO
-      TMP_REZQ = ZERO
-      SVDIFF   = ZERO
-      TMP_EX   = ZERO
-
-      !
-      ! Test the subroutines on real data snapshots. All
-      ! computation is done in real arithmetic, even when
-      ! Koopman eigenvalues and modes are real.
-      !
-      ! Allocate memory space
-      ALLOCATE( A(LDA,M) )
-      ALLOCATE( AC(LDA,M) )
-      ALLOCATE( DA(M) )
-      ALLOCATE( DL(M) )
-      ALLOCATE( F(LDF,N+1) )
-      ALLOCATE( F1(LDF,N+1) )
-      ALLOCATE( F2(LDF,N+1) )
-      ALLOCATE( X(LDX,N) )
-      ALLOCATE( X0(LDX,N) )
-      ALLOCATE( SINGVX(N) )
-      ALLOCATE( SINGVQX(N) )
-      ALLOCATE( Y(LDY,N+1) )
-      ALLOCATE( Y0(LDY,N+1) )
-      ALLOCATE( Y1(M,N+1) )
-      ALLOCATE( Z(LDZ,N) )
-      ALLOCATE( Z1(LDZ,N) )
-      ALLOCATE( RES(N)  )
-      ALLOCATE( RES1(N) )
-      ALLOCATE( RESEX(N) )
-      ALLOCATE( REIG(N) )
-      ALLOCATE( IEIG(N) )
-      ALLOCATE( REIGQ(N) )
-      ALLOCATE( IEIGQ(N) )
-      ALLOCATE( REIGA(M) )
-      ALLOCATE( IEIGA(M) )
-      ALLOCATE( VA(LDA,M) )
-      ALLOCATE( LAMBDA(N,2) )
-      ALLOCATE( LAMBDAQ(N,2) )
-      ALLOCATE( EIGA(M,2) )
-      ALLOCATE( W(LDW,N) )
-      ALLOCATE( AU(LDAU,N) )
-      ALLOCATE( S(N,N) )
-
-      TOL  = M*EPS
-      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
-      ! The factor 10 is somewhat arbitrary.
-      TOL2 = 10*M*N*EPS
-
-!.............
-
-      DO K_TRAJ = 1, 2
-      !  Number of intial conditions in the simulation/trajectories (1 or 2)
-
-      COND = 1.0D8
-      DMAX = 1.0D2
-      RSIGN = 'F'
-      GRADE = 'N'
-      MODEL = 6
-      CONDL = 1.0D2
-      MODER = 6
-      CONDR = 1.0D2
-      PIVTNG = 'N'
-
-      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
-      DO MODE = 1, 6
-
-      ALLOCATE( IWORK(2*M) )
-      ALLOCATE(DR(N))
-      CALL DLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, &
-                   DMAX, RSIGN, GRADE, DL, MODEL,  CONDL, &
-                   DR, MODER, CONDR, PIVTNG, IWORK, M, M, &
-                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
-      DEALLOCATE(IWORK)
-      DEALLOCATE(DR)
-
-      LWORK = 4*M+1
-      ALLOCATE(WORK(LWORK))
-      AC  = A
-      CALL DGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, &
-                  VA, M, WORK, LWORK, INFO ) ! LAPACK CALL
-      DEALLOCATE(WORK)
-      TMP = ZERO
-      DO i = 1, M
-         EIGA(i,1) = REIGA(i)
-         EIGA(i,2) = IEIGA(i)
-         TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2))
-      END DO
-
-      ! Scale A to have the desirable spectral radius.
-      CALL DLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO )
-      CALL DLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO )
-
-      ! Compute the norm of A
-      ANORM = DLANGE( 'F', N, N, A, M, WDUMMY )
-
-      IF ( K_TRAJ == 2 ) THEN
-          ! generate data with two inital conditions
-      CALL DLARNV(2, ISEED, M, F1(1,1) )
-      F1(1:M,1) = 1.0E-10*F1(1:M,1)
-      DO i = 1, N/2
-         CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
-              F1(1,i+1), 1 )
-      END DO
-      X0(1:M,1:N/2) = F1(1:M,1:N/2)
-      Y0(1:M,1:N/2) = F1(1:M,2:N/2+1)
-
-      CALL DLARNV(2, ISEED, M, F1(1,1) )
-      DO i = 1, N-N/2
-         CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
-              F1(1,i+1), 1 )
-      END DO
-      X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2)
-      Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1)
-      ELSE
-      CALL DLARNV(2, ISEED, M, F(1,1) )
-      DO i = 1, N
-         CALL DGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, &
-              F(1,i+1), 1 )
-      END DO
-      X0(1:M,1:N) = F(1:M,1:N)
-      Y0(1:M,1:N) = F(1:M,2:N+1)
-      END IF
-
-      XNORM = DLANGE( 'F', M, N, X0, LDX, WDUMMY )
-      YNORM = DLANGE( 'F', M, N, Y0, LDX, WDUMMY )
-!............................................................
-
-      DO iJOBZ = 1, 4
-
-          SELECT CASE ( iJOBZ )
-          CASE(1)
-              JOBZ   = 'V' ! Ritz vectors will be computed
-              RESIDS = 'R' ! Residuals will be computed
-          CASE(2)
-              JOBZ   = 'V'
-              RESIDS = 'N'
-          CASE(3)
-              JOBZ   = 'F' ! Ritz vectors in factored form
-              RESIDS = 'N'
-          CASE(4)
-              JOBZ   = 'N'
-              RESIDS = 'N'
-          END SELECT
-
-      DO iJOBREF = 1, 3
-
-          SELECT CASE ( iJOBREF )
-          CASE(1)
-              JOBREF = 'R' ! Data for refined Ritz vectors
-          CASE(2)
-              JOBREF = 'E' ! Exact DMD vectors
-          CASE(3)
-              JOBREF = 'N'
-          END SELECT
-
-      DO iSCALE = 1, 4
-
-          SELECT CASE ( iSCALE )
-          CASE(1)
-              SCALE = 'S' ! X data normalized
-          CASE(2)
-              SCALE = 'C' ! X normalized, consist. check
-          CASE(3)
-              SCALE = 'Y' ! Y data normalized
-          CASE(4)
-              SCALE = 'N'
-          END SELECT
-
-      DO iNRNK = -1, -2, -1
-          ! Two truncation strategies. The "-2" case for R&D
-          ! purposes only - it uses possibly low accuracy small
-          ! singular values, in which case the formulas used in
-          ! the DMD are highly sensitive.
-          NRNK   = iNRNK
-
-      DO iWHTSVD = 1, 4
-          ! Check all four options to compute the POD basis
-          ! via the SVD.
-          WHTSVD   = iWHTSVD
-
-      DO LWMINOPT = 1, 2
-          ! Workspace query for the minimal (1) and for the optimal
-          ! (2) workspace lengths determined by workspace query.
-
-      X(1:M,1:N) = X0(1:M,1:N)
-      Y(1:M,1:N) = Y0(1:M,1:N)
-
-      ! DGEDMD: Workspace query and workspace allocation
-      CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
-           N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
-           LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, &
-           IDUMMY, -1, INFO )
-
-      LIWORK = IDUMMY(1)
-      ALLOCATE( IWORK(LIWORK) )
-      LWORK = INT(WDUMMY(LWMINOPT))
-      ALLOCATE( WORK(LWORK) )
-
-      ! DGEDMD test: CALL DGEDMD
-      CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
-           N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
-           LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,&
-           IWORK, LIWORK, INFO )
-
-      SINGVX(1:N) = WORK(1:N)
-
-      !...... DGEDMD check point
-      IF ( LSAME(JOBZ,'V')  ) THEN
-          ! Check that Z = X*W, on return from DGEDMD
-          ! This checks that the returned aigenvectors in Z are
-          ! the product of the SVD'POD basis returned in X
-          ! and the eigenvectors of the rayleigh quotient
-          ! returned in W
-          CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
-                      ZERO, Z1, LDZ )
-          TMP = ZERO
-          DO i = 1, K
-             CALL DAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1)
-             TMP = MAX(TMP, DNRM2( M, Z1(1,i), 1 ) )
-          END DO
-          TMP_ZXW = MAX(TMP_ZXW, TMP )
-
-          IF ( TMP_ZXW > 10*M*EPS ) THEN
-              NFAIL_Z_XV = NFAIL_Z_XV + 1
-              WRITE(*,*) ':( .................DGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-          END IF
-
-      END IF
-
-      !...... DGEDMD check point
-      IF ( LSAME(JOBREF,'R') ) THEN
-          ! The matrix A*U is returned for computing refined Ritz vectors.
-          ! Check that A*U is computed correctly using the formula
-          ! A*U = Y * V * inv(SIGMA). This depends on the
-          ! accuracy in the computed singular values and vectors of X.
-          ! See the paper for an error analysis.
-          ! Note that the left singular vectors of the input matrix X
-          ! are returned in the array X.
-          CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, &
-                     ZERO, Z1, LDZ )
-          TMP = ZERO
-          DO i = 1, K
-              CALL DAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1)
-              TMP = MAX( TMP, DNRM2( M, Z1(1,i),1 ) * &
-                       SINGVX(K)/(ANORM*SINGVX(1)) )
-          END DO
-          TMP_AU = MAX( TMP_AU, TMP )
-
-          IF ( TMP > TOL2 ) THEN
-              NFAIL_AU = NFAIL_AU + 1
-              WRITE(*,*) ':( .................DGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-          END IF
-
-      ELSEIF ( LSAME(JOBREF,'E') ) THEN
-      ! The unscaled vectors of the Exact DMD are computed.
-      ! This option is included for the sake of completeness,
-      ! for users who prefer the Exact DMD vectors. The
-      ! returned vectors are in the real form, in the same way
-      ! as the Ritz vectors. Here we just save the vectors
-      ! and test them separately using a Matlab script.
-
-       CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M )
-       i=1
-       DO WHILE ( i <= K )
-           IF ( IEIG(i) == ZERO ) THEN
-           ! have a real eigenvalue with real eigenvector
-           CALL DAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 )
-           RESEX(i) = DNRM2( M, Y1(1,i), 1) / DNRM2(M,AU(1,i),1)
-           i = i + 1
-           ELSE
-           ! Have a complex conjugate pair
-           ! REIG(i) +- sqrt(-1)*IMEIG(i).
-           ! Since all computation is done in real
-           ! arithmetic, the formula for the residual
-           ! is recast for real representation of the
-           ! complex conjugate eigenpair. See the
-           ! description of RES.
-           AB(1,1) =  REIG(i)
-           AB(2,1) = -IEIG(i)
-           AB(1,2) =  IEIG(i)
-           AB(2,2) =  REIG(i)
-           CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), &
-                       M, AB, 2, ONE, Y1(1,i), M )
-           RESEX(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
-                        WORK )/ DLANGE( 'F', M, 2, AU(1,i), M, &
-                        WORK )
-           RESEX(i+1) = RESEX(i)
-           i = i + 2
-           END IF
-       END DO
-
-      END IF
-
-      !...... DGEDMD check point
-      IF ( LSAME(RESIDS, 'R') ) THEN
-          ! Compare the residuals returned by DGEDMD with the
-          ! explicitly computed residuals using the matrix A.
-          ! Compute explicitly Y1 = A*Z
-          CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M )
-          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
-          ! of the invariant subspaces that correspond to complex conjugate
-          ! pairs of eigencalues. (See the description of Z in DGEDMD,)
-          i = 1
-          DO WHILE ( i <= K )
-              IF ( IEIG(i) == ZERO ) THEN
-                  ! have a real eigenvalue with real eigenvector
-                  CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 )
-                  RES1(i) = DNRM2( M, Y1(1,i), 1)
-                  i = i + 1
-              ELSE
-                  ! Have a complex conjugate pair
-                  ! REIG(i) +- sqrt(-1)*IMEIG(i).
-                  ! Since all computation is done in real
-                  ! arithmetic, the formula for the residual
-                  ! is recast for real representation of the
-                  ! complex conjugate eigenpair. See the
-                  ! description of RES.
-                  AB(1,1) =  REIG(i)
-                  AB(2,1) = -IEIG(i)
-                  AB(1,2) =  IEIG(i)
-                  AB(2,2) =  REIG(i)
-                  CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
-                              M, AB, 2, ONE, Y1(1,i), M )
-                  RES1(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
-                                     WORK )
-                  RES1(i+1) = RES1(i)
-                  i = i + 2
-              END IF
-          END DO
-          TMP = ZERO
-          DO i = 1, K
-              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
-                        SINGVX(K)/(ANORM*SINGVX(1)) )
-          END DO
-          TMP_REZ = MAX( TMP_REZ, TMP )
-
-          IF ( TMP > TOL2 ) THEN
-              NFAIL_REZ = NFAIL_REZ + 1
-              WRITE(*,*) ':( ..................DGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-          END IF
-
-          IF ( LSAME(JOBREF,'E') ) THEN
-              TMP = ZERO
-              DO i = 1, K
-                  TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
-              END DO
-              TMP_EX = MAX(TMP_EX,TMP)
-          END IF
-
-      END IF
-
-      !..... store the results for inspection
-      DO i = 1, K
-          LAMBDA(i,1) = REIG(i)
-          LAMBDA(i,2) = IEIG(i)
-      END DO
-
-      DEALLOCATE(IWORK)
-      DEALLOCATE(WORK)
-
-      !======================================================================
-      !     Now test the DGEDMDQ
-      !======================================================================
-      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
-          RJOBDATA(2) = 1
-          F1 = F
-
-          ! DGEDMDQ test: Workspace query and workspace allocation
-          CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
-               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
-               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
-               RES, AU, LDAU, W, LDW, S, LDS, WDUMMY,      &
-               -1, IDUMMY, -1, INFO )
-          LIWORK = IDUMMY(1)
-          ALLOCATE( IWORK(LIWORK) )
-          LWORK = INT(WDUMMY(LWMINOPT))
-          ALLOCATE(WORK(LWORK))
-          ! DGEDMDQ test: CALL DGEDMDQ
-          CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
-               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
-               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
-               RES, AU, LDAU, W, LDW, S, LDS,              &
-               WORK, LWORK, IWORK, LIWORK, INFO )
-
-          SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ)
-
-          !..... DGEDMDQ check point
-          IF ( KQ /= K ) THEN
-              KDIFF = KDIFF+1
-          END IF
-
-          TMP = ZERO
-          DO i = 1, MIN(K, KQ)
-              TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
-                                    SINGVX(1) )
-          END DO
-          SVDIFF = MAX( SVDIFF, TMP )
-          IF ( TMP > M*N*EPS ) THEN
-              WRITE(*,*) 'FAILED! Something was wrong with the run.'
-              NFAIL_SVDIFF = NFAIL_SVDIFF + 1
-              DO j =1, 3
-                  write(*,*) j, SINGVX(j), SINGVQX(j)
-                  read(*,*)
-              END DO
-          END IF
-
-          !..... DGEDMDQ check point
-          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
-              ! Check that the QR factors are computed and returned
-              ! as requested. The residual ||F-Q*R||_F / ||F||_F
-              ! is compared to M*N*EPS.
-              F2 = F
-              CALL DGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, &
-                          LDF, Y, LDY, ONE, F2, LDF )
-              TMP_FQR = DLANGE( 'F', M, N+1, F2, LDF, WORK ) / &
-                    DLANGE( 'F', M, N+1, F,  LDF, WORK )
-              IF ( TMP_FQR > TOL2 ) THEN
-                  WRITE(*,*) 'FAILED! Something was wrong with the run.'
-                  NFAIL_F_QR = NFAIL_F_QR + 1
-              END IF
-          END IF
-
-          !..... DGEDMDQ check point
-          IF ( LSAME(RESIDS, 'R') ) THEN
-              ! Compare the residuals returned by DGEDMDQ with the
-              ! explicitly computed residuals using the matrix A.
-              ! Compute explicitly Y1 = A*Z
-              CALL DGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M )
-              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
-              ! of the invariant subspaces that correspond to complex conjugate
-              ! pairs of eigencalues. (See the description of Z in DGEDMDQ)
-              i = 1
-              DO WHILE ( i <= KQ )
-                  IF ( IEIGQ(i) == ZERO ) THEN
-                      ! have a real eigenvalue with real eigenvector
-                      CALL DAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 )
-                      ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
-                      RES1(i) = DNRM2( M, Y1(1,i), 1)
-                      i = i + 1
-                  ELSE
-                     ! Have a complex conjugate pair
-                     ! REIG(i) +- sqrt(-1)*IMEIG(i).
-                     ! Since all computation is done in real
-                     ! arithmetic, the formula for the residual
-                     ! is recast for real representation of the
-                     ! complex conjugate eigenpair. See the
-                     ! description of RES.
-                     AB(1,1) =  REIGQ(i)
-                     AB(2,1) = -IEIGQ(i)
-                     AB(1,2) =  IEIGQ(i)
-                     AB(2,2) =  REIGQ(i)
-                     CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
-                                 M, AB, 2, ONE, Y1(1,i), M )             ! BLAS CALL
-                     ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
-                     RES1(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
-                                        WORK )                           ! LAPACK CALL
-                     RES1(i+1) = RES1(i)
-                     i = i + 2
-                  END IF
-              END DO
-              TMP = ZERO
-              DO i = 1, KQ
-                  TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
-                      SINGVQX(K)/(ANORM*SINGVQX(1)) )
-              END DO
-              TMP_REZQ = MAX( TMP_REZQ, TMP )
-              IF ( TMP > TOL2 ) THEN
-                  NFAIL_REZQ = NFAIL_REZQ + 1
-                  WRITE(*,*) '................ DGEDMDQ FAILED!', &
-                      'Check the code for implementation errors.'
-                  STOP
-              END IF
-
-          END IF
-
-          DO i = 1, KQ
-              LAMBDAQ(i,1) = REIGQ(i)
-              LAMBDAQ(i,2) = IEIGQ(i)
-          END DO
-
-          DEALLOCATE(WORK)
-          DEALLOCATE(IWORK)
-      END IF ! TEST_QRDMD
-!======================================================================
-
-      END DO ! LWMINOPT
-      !write(*,*) 'LWMINOPT loop completed'
-      END DO ! WHTSVD LOOP
-      !write(*,*) 'WHTSVD loop completed'
-      END DO ! NRNK LOOP
-      !write(*,*) 'NRNK loop completed'
-      END DO ! SCALE LOOP
-      !write(*,*) 'SCALE loop completed'
-      END DO ! JOBF LOOP
-      !write(*,*) 'JOBREF loop completed'
-      END DO ! JOBZ LOOP
-      !write(*,*) 'JOBZ loop completed'
-
-      END DO ! MODE -6:6
-      !write(*,*) 'MODE loop completed'
-      END DO ! 1 or 2 trajectories
-      !write(*,*) 'trajectories  loop completed'
-
-      DEALLOCATE(A)
-      DEALLOCATE(AC)
-      DEALLOCATE(DA)
-      DEALLOCATE(DL)
-      DEALLOCATE(F)
-      DEALLOCATE(F1)
-      DEALLOCATE(F2)
-      DEALLOCATE(X)
-      DEALLOCATE(X0)
-      DEALLOCATE(SINGVX)
-      DEALLOCATE(SINGVQX)
-      DEALLOCATE(Y)
-      DEALLOCATE(Y0)
-      DEALLOCATE(Y1)
-      DEALLOCATE(Z)
-      DEALLOCATE(Z1)
-      DEALLOCATE(RES)
-      DEALLOCATE(RES1)
-      DEALLOCATE(RESEX)
-      DEALLOCATE(REIG)
-      DEALLOCATE(IEIG)
-      DEALLOCATE(REIGQ)
-      DEALLOCATE(IEIGQ)
-      DEALLOCATE(REIGA)
-      DEALLOCATE(IEIGA)
-      DEALLOCATE(VA)
-      DEALLOCATE(LAMBDA)
-      DEALLOCATE(LAMBDAQ)
-      DEALLOCATE(EIGA)
-      DEALLOCATE(W)
-      DEALLOCATE(AU)
-      DEALLOCATE(S)
-
-!............................................................
-      !     Generate random M-by-M matrix A. Use DLATMR from
-      END DO ! LLOOP
-
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*) ' Test summary for DGEDMD :'
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*)
-      IF ( NFAIL_Z_XV == 0 ) THEN
-          WRITE(*,*) '>>>> Z - U*V test PASSED.'
-      ELSE
-          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
-          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
-          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
-      END IF
-      IF ( NFAIL_AU == 0 ) THEN
-          WRITE(*,*) '>>>> A*U test PASSED. '
-      ELSE
-          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
-          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
-      END IF
-
-      IF ( NFAIL_REZ == 0 ) THEN
-          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
-      ELSE
-          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
-          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
-      END IF
-
-      IF ( NFAIL_TOTAL == 0 ) THEN
-          WRITE(*,*) '>>>> DGEDMD :: ALL TESTS PASSED.'
-      ELSE
-          WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
-          WRITE(*,*) '>>>>>>>>>>>>>> DGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
-      END IF
-
-      IF ( TEST_QRDMD ) THEN
-      WRITE(*,*)
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*) ' Test summary for DGEDMDQ :'
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*)
-
-      IF ( NFAIL_SVDIFF == 0 ) THEN
-          WRITE(*,*) '>>>> DGEDMD and DGEDMDQ computed singular &
-              &values test PASSED.'
-      ELSE
-          WRITE(*,*) 'DGEDMD and DGEDMDQ discrepancies in &
-              &the singular values unacceptable ', &
-              NFAIL_SVDIFF, ' times. Test FAILED.'
-          WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
-      END IF
-
-      IF ( NFAIL_F_QR == 0 ) THEN
-          WRITE(*,*) '>>>> F - Q*R test PASSED.'
-      ELSE
-          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
-          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
-      END IF
-
-      IF ( NFAIL_REZQ == 0 ) THEN
-          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
-      ELSE
-          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
-          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
-      END IF
-
-      IF ( NFAILQ_TOTAL == 0 ) THEN
-          WRITE(*,*) '>>>>>>> DGEDMDQ :: ALL TESTS PASSED.'
-      ELSE
-         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
-         WRITE(*,*) '>>>>>>> DGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
-      END IF
-
-      END IF
-
-      WRITE(*,*)
-      WRITE(*,*) 'Test completed.'
-      STOP
-      END
+!     This is a test program for checking the implementations of
+!     the implementations of the following subroutines
+!
+!     DGEDMD  for computation of the
+!             Dynamic Mode Decomposition (DMD)
+!     DGEDMDQ for computation of a
+!             QR factorization based compressed DMD
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     ========================================================
+!     How to run the code (compiler, link info)
+!     ========================================================
+!     Compile as FORTRAN 90 (or later) and link with BLAS and
+!     LAPACK libraries.
+!     NOTE: The code is developed and tested on top of the
+!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
+!     using the Intel Fortran compiler.
+!
+!     For developers of the C++ implementation
+!     ========================================================
+!     See the LAPACK++ and Template Numerical Toolkit (TNT)
+!
+!     Note on a development of the GPU HP implementation
+!     ========================================================
+!     Work in progress. See CUDA, MAGMA, SLATE.
+!     NOTE: The four SVD subroutines used in this code are
+!     included as a part of R&D and for the completeness.
+!     This was also an opportunity to test those SVD codes.
+!     If the scaling option is used all four are essentially
+!     equally good. For implementations on HP platforms,
+!     one can use whichever SVD is available.
+!...  .........................................................
+!     NOTE:
+!     When using the Intel MKL 2022.0.3 the subroutine xGESVDQ
+!     (optionally used in xGEDMD) may cause access violation
+!     error for x = S, D, C, Z, but only if called with the
+!     work space query. (At least in our Windows 10 MSVS 2019.)
+!     The problem can be mitigated by downloading the source
+!     code of xGESVDQ from the LAPACK repository and use it
+!     localy instead of the one in the MKL. This seems to
+!     indicate that the problem is indeed in the MKL.
+!     This problem did not appear whith Intel MKL 2022.2.0.
+!
+!     NOTE:
+!     xGESDD seems to have a problem with workspace. In some
+!     cases the length of the optimal workspace is returned
+!     smaller than the minimal workspace, as specified in the
+!     code. As a precaution, all optimal workspaces are
+!     set as MAX(minimal, optimal).
+!     Latest implementations of complex xGESDD have different
+!     length of the real worksapce. We use max value over
+!     two versions.
+!............................................................
+!............................................................
+!
+      PROGRAM DMD_TEST
+      use iso_fortran_env, only: real64
+      IMPLICIT NONE
+      integer, parameter :: WP = real64
+
+!............................................................
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!............................................................
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::          &
+                     A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,&
+                     Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::          &
+                     DA, DL, DR, REIG, REIGA, REIGQ, IEIG,   &
+                     IEIGA, IEIGQ,  RES, RES1, RESEX, SINGVX,&
+                     SINGVQX, WORK
+      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
+      REAL(KIND=WP) :: AB(2,2),   WDUMMY(2)
+      INTEGER       :: IDUMMY(2), ISEED(4), RJOBDATA(8)
+      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, &
+                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
+                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
+                       TMP_EX, XNORM, YNORM
+!............................................................
+      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
+                 LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK
+      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, KDIFF,  &
+                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
+                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, &
+                 NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD
+      INTEGER    iNRNK, iWHTSVD, K_TRAJ, LWMINOPT
+      CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
+                       SCALE, RESIDS, WANTQ, WANTR
+
+      LOGICAL  TEST_QRDMD
+!..... external subroutines (BLAS and LAPACK)
+      EXTERNAL DAXPY,  DGEEV, DGEMM, DGEMV, DLACPY, DLASCL
+      EXTERNAL DLARNV, DLATMR
+!.....external subroutines DMD package, part 1
+!     subroutines under test
+      EXTERNAL DGEDMD, DGEDMDQ
+
+!..... external functions (BLAS and LAPACK)
+      EXTERNAL         DLAMCH, DLANGE, DNRM2
+      REAL(KIND=WP) :: DLAMCH, DLANGE, DNRM2
+      EXTERNAL         LSAME
+      LOGICAL          LSAME
+
+      INTRINSIC ABS, INT, MIN, MAX
+!............................................................
+
+      ! The test is always in pairs : ( DGEDMD and DGEDMDQ )
+      ! because the test includes comparing the results (in pairs).
+!.....................................................................................
+      TEST_QRDMD = .TRUE. ! This code by default performs tests on DGEDMDQ
+                          ! Since the QR factorizations based algorithm is designed for
+                          ! single trajectory data, only single trajectory tests will
+                          ! be performed with xGEDMDQ.
+      WANTQ = 'Q'
+      WANTR = 'R'
+!.................................................................................
+
+      EPS = DLAMCH( 'P' )  ! machine precision DP
+
+      ! Global counters of failures of some particular tests
+      NFAIL      = 0
+      NFAIL_REZ  = 0
+      NFAIL_REZQ = 0
+      NFAIL_Z_XV = 0
+      NFAIL_F_QR = 0
+      NFAIL_AU   = 0
+      KDIFF      = 0
+      NFAIL_SVDIFF = 0
+      NFAIL_TOTAL  = 0
+      NFAILQ_TOTAL = 0
+
+
+      DO LLOOP = 1, 4
+
+      WRITE(*,*) 'L Loop Index = ', LLOOP
+
+      ! Set the dimensions of the problem ...
+      WRITE(*,*) 'M = '
+      READ(*,*) M
+      WRITE(*,*) M
+      ! ... and the number of snapshots.
+      WRITE(*,*) 'N = '
+      READ(*,*) N
+      WRITE(*,*) N
+
+      ! ... Test the dimensions
+      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
+          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
+          STOP
+      END IF
+!.............
+      ! The seed inside the LLOOP so that each pass can be reproduced easily.
+
+      ISEED(1) = 4
+      ISEED(2) = 3
+      ISEED(3) = 2
+      ISEED(4) = 1
+
+      LDA  = M
+      LDF  = M
+      LDX  = MAX(M,N+1)
+      LDY  = MAX(M,N+1)
+      LDW  = N
+      LDZ  = M
+      LDAU = MAX(M,N+1)
+      LDS  = N
+
+      TMP_ZXW  = ZERO
+      TMP_AU   = ZERO
+      TMP_REZ  = ZERO
+      TMP_REZQ = ZERO
+      SVDIFF   = ZERO
+      TMP_EX   = ZERO
+
+      !
+      ! Test the subroutines on real data snapshots. All
+      ! computation is done in real arithmetic, even when
+      ! Koopman eigenvalues and modes are real.
+      !
+      ! Allocate memory space
+      ALLOCATE( A(LDA,M) )
+      ALLOCATE( AC(LDA,M) )
+      ALLOCATE( DA(M) )
+      ALLOCATE( DL(M) )
+      ALLOCATE( F(LDF,N+1) )
+      ALLOCATE( F1(LDF,N+1) )
+      ALLOCATE( F2(LDF,N+1) )
+      ALLOCATE( X(LDX,N) )
+      ALLOCATE( X0(LDX,N) )
+      ALLOCATE( SINGVX(N) )
+      ALLOCATE( SINGVQX(N) )
+      ALLOCATE( Y(LDY,N+1) )
+      ALLOCATE( Y0(LDY,N+1) )
+      ALLOCATE( Y1(M,N+1) )
+      ALLOCATE( Z(LDZ,N) )
+      ALLOCATE( Z1(LDZ,N) )
+      ALLOCATE( RES(N)  )
+      ALLOCATE( RES1(N) )
+      ALLOCATE( RESEX(N) )
+      ALLOCATE( REIG(N) )
+      ALLOCATE( IEIG(N) )
+      ALLOCATE( REIGQ(N) )
+      ALLOCATE( IEIGQ(N) )
+      ALLOCATE( REIGA(M) )
+      ALLOCATE( IEIGA(M) )
+      ALLOCATE( VA(LDA,M) )
+      ALLOCATE( LAMBDA(N,2) )
+      ALLOCATE( LAMBDAQ(N,2) )
+      ALLOCATE( EIGA(M,2) )
+      ALLOCATE( W(LDW,N) )
+      ALLOCATE( AU(LDAU,N) )
+      ALLOCATE( S(N,N) )
+
+      TOL  = M*EPS
+      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
+      ! The factor 10 is somewhat arbitrary.
+      TOL2 = 10*M*N*EPS
+
+!.............
+
+      DO K_TRAJ = 1, 2
+      !  Number of intial conditions in the simulation/trajectories (1 or 2)
+
+      COND = 1.0D8
+      DMAX = 1.0D2
+      RSIGN = 'F'
+      GRADE = 'N'
+      MODEL = 6
+      CONDL = 1.0D2
+      MODER = 6
+      CONDR = 1.0D2
+      PIVTNG = 'N'
+
+      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
+      DO MODE = 1, 6
+
+      ALLOCATE( IWORK(2*M) )
+      ALLOCATE(DR(N))
+      CALL DLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, &
+                   DMAX, RSIGN, GRADE, DL, MODEL,  CONDL, &
+                   DR, MODER, CONDR, PIVTNG, IWORK, M, M, &
+                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
+      DEALLOCATE(IWORK)
+      DEALLOCATE(DR)
+
+      LWORK = 4*M+1
+      ALLOCATE(WORK(LWORK))
+      AC  = A
+      CALL DGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, &
+                  VA, M, WORK, LWORK, INFO ) ! LAPACK CALL
+      DEALLOCATE(WORK)
+      TMP = ZERO
+      DO i = 1, M
+         EIGA(i,1) = REIGA(i)
+         EIGA(i,2) = IEIGA(i)
+         TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2))
+      END DO
+
+      ! Scale A to have the desirable spectral radius.
+      CALL DLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO )
+      CALL DLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO )
+
+      ! Compute the norm of A
+      ANORM = DLANGE( 'F', N, N, A, M, WDUMMY )
+
+      IF ( K_TRAJ == 2 ) THEN
+          ! generate data with two inital conditions
+      CALL DLARNV(2, ISEED, M, F1(1,1) )
+      F1(1:M,1) = 1.0E-10*F1(1:M,1)
+      DO i = 1, N/2
+         CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
+              F1(1,i+1), 1 )
+      END DO
+      X0(1:M,1:N/2) = F1(1:M,1:N/2)
+      Y0(1:M,1:N/2) = F1(1:M,2:N/2+1)
+
+      CALL DLARNV(2, ISEED, M, F1(1,1) )
+      DO i = 1, N-N/2
+         CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
+              F1(1,i+1), 1 )
+      END DO
+      X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2)
+      Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1)
+      ELSE
+      CALL DLARNV(2, ISEED, M, F(1,1) )
+      DO i = 1, N
+         CALL DGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, &
+              F(1,i+1), 1 )
+      END DO
+      X0(1:M,1:N) = F(1:M,1:N)
+      Y0(1:M,1:N) = F(1:M,2:N+1)
+      END IF
+
+      XNORM = DLANGE( 'F', M, N, X0, LDX, WDUMMY )
+      YNORM = DLANGE( 'F', M, N, Y0, LDX, WDUMMY )
+!............................................................
+
+      DO iJOBZ = 1, 4
+
+          SELECT CASE ( iJOBZ )
+          CASE(1)
+              JOBZ   = 'V' ! Ritz vectors will be computed
+              RESIDS = 'R' ! Residuals will be computed
+          CASE(2)
+              JOBZ   = 'V'
+              RESIDS = 'N'
+          CASE(3)
+              JOBZ   = 'F' ! Ritz vectors in factored form
+              RESIDS = 'N'
+          CASE(4)
+              JOBZ   = 'N'
+              RESIDS = 'N'
+          END SELECT
+
+      DO iJOBREF = 1, 3
+
+          SELECT CASE ( iJOBREF )
+          CASE(1)
+              JOBREF = 'R' ! Data for refined Ritz vectors
+          CASE(2)
+              JOBREF = 'E' ! Exact DMD vectors
+          CASE(3)
+              JOBREF = 'N'
+          END SELECT
+
+      DO iSCALE = 1, 4
+
+          SELECT CASE ( iSCALE )
+          CASE(1)
+              SCALE = 'S' ! X data normalized
+          CASE(2)
+              SCALE = 'C' ! X normalized, consist. check
+          CASE(3)
+              SCALE = 'Y' ! Y data normalized
+          CASE(4)
+              SCALE = 'N'
+          END SELECT
+
+      DO iNRNK = -1, -2, -1
+          ! Two truncation strategies. The "-2" case for R&D
+          ! purposes only - it uses possibly low accuracy small
+          ! singular values, in which case the formulas used in
+          ! the DMD are highly sensitive.
+          NRNK   = iNRNK
+
+      DO iWHTSVD = 1, 4
+          ! Check all four options to compute the POD basis
+          ! via the SVD.
+          WHTSVD   = iWHTSVD
+
+      DO LWMINOPT = 1, 2
+          ! Workspace query for the minimal (1) and for the optimal
+          ! (2) workspace lengths determined by workspace query.
+
+      X(1:M,1:N) = X0(1:M,1:N)
+      Y(1:M,1:N) = Y0(1:M,1:N)
+
+      ! DGEDMD: Workspace query and workspace allocation
+      CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
+           N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
+           LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, &
+           IDUMMY, -1, INFO )
+
+      LIWORK = IDUMMY(1)
+      ALLOCATE( IWORK(LIWORK) )
+      LWORK = INT(WDUMMY(LWMINOPT))
+      ALLOCATE( WORK(LWORK) )
+
+      ! DGEDMD test: CALL DGEDMD
+      CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
+           N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
+           LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,&
+           IWORK, LIWORK, INFO )
+
+      SINGVX(1:N) = WORK(1:N)
+
+      !...... DGEDMD check point
+      IF ( LSAME(JOBZ,'V')  ) THEN
+          ! Check that Z = X*W, on return from DGEDMD
+          ! This checks that the returned aigenvectors in Z are
+          ! the product of the SVD'POD basis returned in X
+          ! and the eigenvectors of the rayleigh quotient
+          ! returned in W
+          CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                      ZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL DAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1)
+             TMP = MAX(TMP, DNRM2( M, Z1(1,i), 1 ) )
+          END DO
+          TMP_ZXW = MAX(TMP_ZXW, TMP )
+
+          IF ( TMP_ZXW > 10*M*EPS ) THEN
+              NFAIL_Z_XV = NFAIL_Z_XV + 1
+              WRITE(*,*) ':( .................DGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+      END IF
+
+      !...... DGEDMD check point
+      IF ( LSAME(JOBREF,'R') ) THEN
+          ! The matrix A*U is returned for computing refined Ritz vectors.
+          ! Check that A*U is computed correctly using the formula
+          ! A*U = Y * V * inv(SIGMA). This depends on the
+          ! accuracy in the computed singular values and vectors of X.
+          ! See the paper for an error analysis.
+          ! Note that the left singular vectors of the input matrix X
+          ! are returned in the array X.
+          CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, &
+                     ZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+              CALL DAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1)
+              TMP = MAX( TMP, DNRM2( M, Z1(1,i),1 ) * &
+                       SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_AU = MAX( TMP_AU, TMP )
+
+          IF ( TMP > TOL2 ) THEN
+              NFAIL_AU = NFAIL_AU + 1
+              WRITE(*,*) ':( .................DGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+      ELSEIF ( LSAME(JOBREF,'E') ) THEN
+      ! The unscaled vectors of the Exact DMD are computed.
+      ! This option is included for the sake of completeness,
+      ! for users who prefer the Exact DMD vectors. The
+      ! returned vectors are in the real form, in the same way
+      ! as the Ritz vectors. Here we just save the vectors
+      ! and test them separately using a Matlab script.
+
+       CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M )
+       i=1
+       DO WHILE ( i <= K )
+           IF ( IEIG(i) == ZERO ) THEN
+           ! have a real eigenvalue with real eigenvector
+           CALL DAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 )
+           RESEX(i) = DNRM2( M, Y1(1,i), 1) / DNRM2(M,AU(1,i),1)
+           i = i + 1
+           ELSE
+           ! Have a complex conjugate pair
+           ! REIG(i) +- sqrt(-1)*IMEIG(i).
+           ! Since all computation is done in real
+           ! arithmetic, the formula for the residual
+           ! is recast for real representation of the
+           ! complex conjugate eigenpair. See the
+           ! description of RES.
+           AB(1,1) =  REIG(i)
+           AB(2,1) = -IEIG(i)
+           AB(1,2) =  IEIG(i)
+           AB(2,2) =  REIG(i)
+           CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), &
+                       M, AB, 2, ONE, Y1(1,i), M )
+           RESEX(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
+                        WORK )/ DLANGE( 'F', M, 2, AU(1,i), M, &
+                        WORK )
+           RESEX(i+1) = RESEX(i)
+           i = i + 2
+           END IF
+       END DO
+
+      END IF
+
+      !...... DGEDMD check point
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by DGEDMD with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in DGEDMD,)
+          i = 1
+          DO WHILE ( i <= K )
+              IF ( IEIG(i) == ZERO ) THEN
+                  ! have a real eigenvalue with real eigenvector
+                  CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 )
+                  RES1(i) = DNRM2( M, Y1(1,i), 1)
+                  i = i + 1
+              ELSE
+                  ! Have a complex conjugate pair
+                  ! REIG(i) +- sqrt(-1)*IMEIG(i).
+                  ! Since all computation is done in real
+                  ! arithmetic, the formula for the residual
+                  ! is recast for real representation of the
+                  ! complex conjugate eigenpair. See the
+                  ! description of RES.
+                  AB(1,1) =  REIG(i)
+                  AB(2,1) = -IEIG(i)
+                  AB(1,2) =  IEIG(i)
+                  AB(2,2) =  REIG(i)
+                  CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                              M, AB, 2, ONE, Y1(1,i), M )
+                  RES1(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
+                                     WORK )
+                  RES1(i+1) = RES1(i)
+                  i = i + 2
+              END IF
+          END DO
+          TMP = ZERO
+          DO i = 1, K
+              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                        SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_REZ = MAX( TMP_REZ, TMP )
+
+          IF ( TMP > TOL2 ) THEN
+              NFAIL_REZ = NFAIL_REZ + 1
+              WRITE(*,*) ':( ..................DGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+          IF ( LSAME(JOBREF,'E') ) THEN
+              TMP = ZERO
+              DO i = 1, K
+                  TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
+              END DO
+              TMP_EX = MAX(TMP_EX,TMP)
+          END IF
+
+      END IF
+
+      !..... store the results for inspection
+      DO i = 1, K
+          LAMBDA(i,1) = REIG(i)
+          LAMBDA(i,2) = IEIG(i)
+      END DO
+
+      DEALLOCATE(IWORK)
+      DEALLOCATE(WORK)
+
+      !======================================================================
+      !     Now test the DGEDMDQ
+      !======================================================================
+      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
+          RJOBDATA(2) = 1
+          F1 = F
+
+          ! DGEDMDQ test: Workspace query and workspace allocation
+          CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
+               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
+               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
+               RES, AU, LDAU, W, LDW, S, LDS, WDUMMY,      &
+               -1, IDUMMY, -1, INFO )
+          LIWORK = IDUMMY(1)
+          ALLOCATE( IWORK(LIWORK) )
+          LWORK = INT(WDUMMY(LWMINOPT))
+          ALLOCATE(WORK(LWORK))
+          ! DGEDMDQ test: CALL DGEDMDQ
+          CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
+               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
+               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
+               RES, AU, LDAU, W, LDW, S, LDS,              &
+               WORK, LWORK, IWORK, LIWORK, INFO )
+
+          SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ)
+
+          !..... DGEDMDQ check point
+          IF ( KQ /= K ) THEN
+              KDIFF = KDIFF+1
+          END IF
+
+          TMP = ZERO
+          DO i = 1, MIN(K, KQ)
+              TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
+                                    SINGVX(1) )
+          END DO
+          SVDIFF = MAX( SVDIFF, TMP )
+          IF ( TMP > M*N*EPS ) THEN
+              WRITE(*,*) 'FAILED! Something was wrong with the run.'
+              NFAIL_SVDIFF = NFAIL_SVDIFF + 1
+              DO j =1, 3
+                  write(*,*) j, SINGVX(j), SINGVQX(j)
+                  read(*,*)
+              END DO
+          END IF
+
+          !..... DGEDMDQ check point
+          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
+              ! Check that the QR factors are computed and returned
+              ! as requested. The residual ||F-Q*R||_F / ||F||_F
+              ! is compared to M*N*EPS.
+              F2 = F
+              CALL DGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, &
+                          LDF, Y, LDY, ONE, F2, LDF )
+              TMP_FQR = DLANGE( 'F', M, N+1, F2, LDF, WORK ) / &
+                    DLANGE( 'F', M, N+1, F,  LDF, WORK )
+              IF ( TMP_FQR > TOL2 ) THEN
+                  WRITE(*,*) 'FAILED! Something was wrong with the run.'
+                  NFAIL_F_QR = NFAIL_F_QR + 1
+              END IF
+          END IF
+
+          !..... DGEDMDQ check point
+          IF ( LSAME(RESIDS, 'R') ) THEN
+              ! Compare the residuals returned by DGEDMDQ with the
+              ! explicitly computed residuals using the matrix A.
+              ! Compute explicitly Y1 = A*Z
+              CALL DGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M )
+              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+              ! of the invariant subspaces that correspond to complex conjugate
+              ! pairs of eigencalues. (See the description of Z in DGEDMDQ)
+              i = 1
+              DO WHILE ( i <= KQ )
+                  IF ( IEIGQ(i) == ZERO ) THEN
+                      ! have a real eigenvalue with real eigenvector
+                      CALL DAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 )
+                      ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
+                      RES1(i) = DNRM2( M, Y1(1,i), 1)
+                      i = i + 1
+                  ELSE
+                     ! Have a complex conjugate pair
+                     ! REIG(i) +- sqrt(-1)*IMEIG(i).
+                     ! Since all computation is done in real
+                     ! arithmetic, the formula for the residual
+                     ! is recast for real representation of the
+                     ! complex conjugate eigenpair. See the
+                     ! description of RES.
+                     AB(1,1) =  REIGQ(i)
+                     AB(2,1) = -IEIGQ(i)
+                     AB(1,2) =  IEIGQ(i)
+                     AB(2,2) =  REIGQ(i)
+                     CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                                 M, AB, 2, ONE, Y1(1,i), M )             ! BLAS CALL
+                     ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+                     RES1(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
+                                        WORK )                           ! LAPACK CALL
+                     RES1(i+1) = RES1(i)
+                     i = i + 2
+                  END IF
+              END DO
+              TMP = ZERO
+              DO i = 1, KQ
+                  TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                      SINGVQX(K)/(ANORM*SINGVQX(1)) )
+              END DO
+              TMP_REZQ = MAX( TMP_REZQ, TMP )
+              IF ( TMP > TOL2 ) THEN
+                  NFAIL_REZQ = NFAIL_REZQ + 1
+                  WRITE(*,*) '................ DGEDMDQ FAILED!', &
+                      'Check the code for implementation errors.'
+                  STOP
+              END IF
+
+          END IF
+
+          DO i = 1, KQ
+              LAMBDAQ(i,1) = REIGQ(i)
+              LAMBDAQ(i,2) = IEIGQ(i)
+          END DO
+
+          DEALLOCATE(WORK)
+          DEALLOCATE(IWORK)
+      END IF ! TEST_QRDMD
+!======================================================================
+
+      END DO ! LWMINOPT
+      !write(*,*) 'LWMINOPT loop completed'
+      END DO ! WHTSVD LOOP
+      !write(*,*) 'WHTSVD loop completed'
+      END DO ! NRNK LOOP
+      !write(*,*) 'NRNK loop completed'
+      END DO ! SCALE LOOP
+      !write(*,*) 'SCALE loop completed'
+      END DO ! JOBF LOOP
+      !write(*,*) 'JOBREF loop completed'
+      END DO ! JOBZ LOOP
+      !write(*,*) 'JOBZ loop completed'
+
+      END DO ! MODE -6:6
+      !write(*,*) 'MODE loop completed'
+      END DO ! 1 or 2 trajectories
+      !write(*,*) 'trajectories  loop completed'
+
+      DEALLOCATE(A)
+      DEALLOCATE(AC)
+      DEALLOCATE(DA)
+      DEALLOCATE(DL)
+      DEALLOCATE(F)
+      DEALLOCATE(F1)
+      DEALLOCATE(F2)
+      DEALLOCATE(X)
+      DEALLOCATE(X0)
+      DEALLOCATE(SINGVX)
+      DEALLOCATE(SINGVQX)
+      DEALLOCATE(Y)
+      DEALLOCATE(Y0)
+      DEALLOCATE(Y1)
+      DEALLOCATE(Z)
+      DEALLOCATE(Z1)
+      DEALLOCATE(RES)
+      DEALLOCATE(RES1)
+      DEALLOCATE(RESEX)
+      DEALLOCATE(REIG)
+      DEALLOCATE(IEIG)
+      DEALLOCATE(REIGQ)
+      DEALLOCATE(IEIGQ)
+      DEALLOCATE(REIGA)
+      DEALLOCATE(IEIGA)
+      DEALLOCATE(VA)
+      DEALLOCATE(LAMBDA)
+      DEALLOCATE(LAMBDAQ)
+      DEALLOCATE(EIGA)
+      DEALLOCATE(W)
+      DEALLOCATE(AU)
+      DEALLOCATE(S)
+
+!............................................................
+      !     Generate random M-by-M matrix A. Use DLATMR from
+      END DO ! LLOOP
+
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for DGEDMD :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+      IF ( NFAIL_Z_XV == 0 ) THEN
+          WRITE(*,*) '>>>> Z - U*V test PASSED.'
+      ELSE
+          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
+          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
+      END IF
+      IF ( NFAIL_AU == 0 ) THEN
+          WRITE(*,*) '>>>> A*U test PASSED. '
+      ELSE
+          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
+          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
+      END IF
+
+      IF ( NFAIL_REZ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
+      END IF
+
+      IF ( NFAIL_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>> DGEDMD :: ALL TESTS PASSED.'
+      ELSE
+          WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
+          WRITE(*,*) '>>>>>>>>>>>>>> DGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      IF ( TEST_QRDMD ) THEN
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for DGEDMDQ :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+
+      IF ( NFAIL_SVDIFF == 0 ) THEN
+          WRITE(*,*) '>>>> DGEDMD and DGEDMDQ computed singular &
+              &values test PASSED.'
+      ELSE
+          WRITE(*,*) 'DGEDMD and DGEDMDQ discrepancies in &
+              &the singular values unacceptable ', &
+              NFAIL_SVDIFF, ' times. Test FAILED.'
+          WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
+      END IF
+
+      IF ( NFAIL_F_QR == 0 ) THEN
+          WRITE(*,*) '>>>> F - Q*R test PASSED.'
+      ELSE
+          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
+          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
+      END IF
+
+      IF ( NFAIL_REZQ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
+      END IF
+
+      IF ( NFAILQ_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>>>>> DGEDMDQ :: ALL TESTS PASSED.'
+      ELSE
+         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
+         WRITE(*,*) '>>>>>>> DGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) 'Test completed.'
+      STOP
+      END
diff --git a/TESTING/EIG/schkdmd.f90 b/TESTING/EIG/schkdmd.f90
index 77e3e46c05..855d981fda 100644
--- a/TESTING/EIG/schkdmd.f90
+++ b/TESTING/EIG/schkdmd.f90
@@ -1,792 +1,792 @@
-!     This is a test program for checking the implementations of
-!     the implementations of the following subroutines
-!
-!     SGEDMD  for computation of the
-!             Dynamic Mode Decomposition (DMD)
-!     SGEDMDQ for computation of a
-!             QR factorization based compressed DMD
-!
-!     Developed and supported by:
-!     ===========================
-!     Developed and coded by Zlatko Drmac, Faculty of Science,
-!     University of Zagreb;  drmac@math.hr
-!     In cooperation with
-!     AIMdyn Inc., Santa Barbara, CA.
-!     ========================================================
-!     How to run the code (compiler, link info)
-!     ========================================================
-!     Compile as FORTRAN 90 (or later) and link with BLAS and
-!     LAPACK libraries.
-!     NOTE: The code is developed and tested on top of the
-!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
-!     using the Intel Fortran compiler.
-!
-!     For developers of the C++ implementation
-!     ========================================================
-!     See the LAPACK++ and Template Numerical Toolkit (TNT)
-!
-!     Note on a development of the GPU HP implementation
-!     ========================================================
-!     Work in progress. See CUDA, MAGMA, SLATE.
-!     NOTE: The four SVD subroutines used in this code are
-!     included as a part of R&D and for the completeness.
-!     This was also an opportunity to test those SVD codes.
-!     If the scaling option is used all four are essentially
-!     equally good. For implementations on HP platforms,
-!     one can use whichever SVD is available.
-!...  .........................................................
-!     NOTE:
-!     When using the Intel MKL 2022.0.3 the subroutine xGESVDQ
-!     (optionally used in xGEDMD) may cause access violation
-!     error for x = S, D, C, Z, but only if called with the
-!     work space query. (At least in our Windows 10 MSVS 2019.)
-!     The problem can be mitigated by downloading the source
-!     code of xGESVDQ from the LAPACK repository and use it
-!     localy instead of the one in the MKL. This seems to
-!     indicate that the problem is indeed in the MKL.
-!     This problem did not appear whith Intel MKL 2022.2.0.
-!
-!     NOTE:
-!     xGESDD seems to have a problem with workspace. In some
-!     cases the length of the optimal workspace is returned
-!     smaller than the minimal workspace, as specified in the
-!     code. As a precaution, all optimal workspaces are
-!     set as MAX(minimal, optimal).
-!     Latest implementations of complex xGESDD have different
-!     length of the real worksapce. We use max value over
-!     two versions.
-!............................................................
-!............................................................
-!
-      PROGRAM DMD_TEST
-      use iso_fortran_env, only: real32
-      IMPLICIT NONE
-      integer, parameter :: WP = real32
-
-!............................................................
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-!............................................................
-      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::          &
-                     A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,&
-                     Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1
-      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::          &
-                     DA, DL, DR, REIG, REIGA, REIGQ, IEIG,   &
-                     IEIGA, IEIGQ,  RES, RES1, RESEX, SINGVX,&
-                     SINGVQX, WORK
-      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
-      REAL(KIND=WP) :: AB(2,2),   WDUMMY(2)
-      INTEGER       :: IDUMMY(2), ISEED(4), RJOBDATA(8)
-      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, &
-                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
-                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
-                       TMP_EX, XNORM, YNORM
-!............................................................
-      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
-                 LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK
-      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, KDIFF,     &
-                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
-                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, &
-                 NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD
-      INTEGER    iNRNK, iWHTSVD, K_TRAJ, LWMINOPT
-      CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
-                       SCALE, RESIDS, WANTQ, WANTR
-
-      LOGICAL          TEST_QRDMD
-!..... external subroutines (BLAS and LAPACK)
-      EXTERNAL SAXPY,  SGEEV, SGEMM, SGEMV, SLACPY, SLASCL
-      EXTERNAL SLARNV, SLATMR
-!.....external subroutines DMD package, part 1
-!     subroutines under test
-      EXTERNAL SGEDMD, SGEDMDQ
-
-!..... external functions (BLAS and LAPACK)
-      EXTERNAL         SLAMCH, SLANGE, SNRM2
-      REAL(KIND=WP) :: SLAMCH, SLANGE, SNRM2
-      EXTERNAL         LSAME
-      LOGICAL          LSAME
-
-      INTRINSIC ABS, INT, MIN, MAX
-!............................................................
-
-      ! The test is always in pairs : ( SGEDMD and SGEDMDQ )
-      ! because the test includes comparing the results (in pairs).
-!.....................................................................................
-      TEST_QRDMD = .TRUE. ! This code by default performs tests on SGEDMDQ
-                          ! Since the QR factorizations based algorithm is designed for
-                          ! single trajectory data, only single trajectory tests will
-                          ! be performed with xGEDMDQ.
-      WANTQ = 'Q'
-      WANTR = 'R'
-!.................................................................................
-
-      EPS = SLAMCH( 'P' )  ! machine precision SP
-
-      ! Global counters of failures of some particular tests
-      NFAIL      = 0
-      NFAIL_REZ  = 0
-      NFAIL_REZQ = 0
-      NFAIL_Z_XV = 0
-      NFAIL_F_QR = 0
-      NFAIL_AU   = 0
-      KDIFF      = 0
-      NFAIL_SVDIFF = 0
-      NFAIL_TOTAL  = 0
-      NFAILQ_TOTAL = 0
-
-
-      DO LLOOP = 1, 4
-
-      WRITE(*,*) 'L Loop Index = ', LLOOP
-
-      ! Set the dimensions of the problem ...
-      WRITE(*,*) 'M = '
-      READ(*,*) M
-      WRITE(*,*) M
-      ! ... and the number of snapshots.
-      WRITE(*,*) 'N = '
-      READ(*,*) N
-      WRITE(*,*) N
-
-      ! ... Test the dimensions
-      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
-          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
-          STOP
-      END IF
-!.............
-      ! The seed inside the LLOOP so that each pass can be reproduced easily.
-
-      ISEED(1) = 4
-      ISEED(2) = 3
-      ISEED(3) = 2
-      ISEED(4) = 1
-
-      LDA = M
-      LDF = M
-      LDX = MAX(M,N+1)
-      LDY = MAX(M,N+1)
-      LDW = N
-      LDZ = M
-      LDAU = MAX(M,N+1)
-      LDS = N
-
-      TMP_ZXW  = ZERO
-      TMP_AU   = ZERO
-      TMP_REZ  = ZERO
-      TMP_REZQ = ZERO
-      SVDIFF   = ZERO
-      TMP_EX   = ZERO
-
-      !
-      ! Test the subroutines on real data snapshots. All
-      ! computation is done in real arithmetic, even when
-      ! Koopman eigenvalues and modes are real.
-      !
-      ! Allocate memory space
-      ALLOCATE( A(LDA,M) )
-      ALLOCATE( AC(LDA,M) )
-      ALLOCATE( DA(M) )
-      ALLOCATE( DL(M) )
-      ALLOCATE( F(LDF,N+1) )
-      ALLOCATE( F1(LDF,N+1) )
-      ALLOCATE( F2(LDF,N+1) )
-      ALLOCATE( X(LDX,N) )
-      ALLOCATE( X0(LDX,N) )
-      ALLOCATE( SINGVX(N) )
-      ALLOCATE( SINGVQX(N) )
-      ALLOCATE( Y(LDY,N+1) )
-      ALLOCATE( Y0(LDY,N+1) )
-      ALLOCATE( Y1(M,N+1) )
-      ALLOCATE( Z(LDZ,N) )
-      ALLOCATE( Z1(LDZ,N) )
-      ALLOCATE( RES(N)  )
-      ALLOCATE( RES1(N) )
-      ALLOCATE( RESEX(N) )
-      ALLOCATE( REIG(N) )
-      ALLOCATE( IEIG(N) )
-      ALLOCATE( REIGQ(N) )
-      ALLOCATE( IEIGQ(N) )
-      ALLOCATE( REIGA(M) )
-      ALLOCATE( IEIGA(M) )
-      ALLOCATE( VA(LDA,M) )
-      ALLOCATE( LAMBDA(N,2) )
-      ALLOCATE( LAMBDAQ(N,2) )
-      ALLOCATE( EIGA(M,2) )
-      ALLOCATE( W(LDW,N) )
-      ALLOCATE( AU(LDAU,N) )
-      ALLOCATE( S(N,N) )
-
-      TOL  = M*EPS
-      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
-      ! The factor 10 is somewhat arbitrary.
-      TOL2 = 10*M*N*EPS
-
-!.............
-
-      DO K_TRAJ = 1, 2
-      !  Number of intial conditions in the simulation/trajectories (1 or 2)
-
-      COND = 1.0D8
-      DMAX = 1.0D2
-      RSIGN = 'F'
-      GRADE = 'N'
-      MODEL = 6
-      CONDL = 1.0D2
-      MODER = 6
-      CONDR = 1.0D2
-      PIVTNG = 'N'
-
-      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
-      DO MODE = 1, 6
-
-      ALLOCATE( IWORK(2*M) )
-      ALLOCATE(DR(N))
-      CALL SLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, &
-                   DMAX, RSIGN, GRADE, DL, MODEL,  CONDL, &
-                   DR, MODER, CONDR, PIVTNG, IWORK, M, M, &
-                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
-      DEALLOCATE(IWORK)
-      DEALLOCATE(DR)
-
-      LWORK = 4*M+1
-      ALLOCATE(WORK(LWORK))
-      AC  = A
-      CALL SGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, &
-                  VA, M, WORK, LWORK, INFO ) ! LAPACK CALL
-      DEALLOCATE(WORK)
-      TMP = ZERO
-      DO i = 1, M
-          EIGA(i,1) = REIGA(i)
-          EIGA(i,2) = IEIGA(i)
-          TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2))
-      END DO
-
-      ! Scale A to have the desirable spectral radius.
-      CALL SLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO )
-      CALL SLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO )
-
-      ! Compute the norm of A
-      ANORM = SLANGE( 'F', N, N, A, M, WDUMMY )
-
-      IF ( K_TRAJ == 2 ) THEN
-          ! generate data with two inital conditions
-      CALL SLARNV(2, ISEED, M, F1(1,1) )
-      F1(1:M,1) = 1.0E-10*F1(1:M,1)
-      DO i = 1, N/2
-         CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
-              F1(1,i+1), 1 )
-      END DO
-      X0(1:M,1:N/2) = F1(1:M,1:N/2)
-      Y0(1:M,1:N/2) = F1(1:M,2:N/2+1)
-
-      CALL SLARNV(2, ISEED, M, F1(1,1) )
-      DO i = 1, N-N/2
-         CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
-              F1(1,i+1), 1 )
-      END DO
-      X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2)
-      Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1)
-      ELSE
-          ! single trajectory
-      CALL SLARNV(2, ISEED, M, F(1,1) )
-      DO i = 1, N
-         CALL SGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, &
-              F(1,i+1), 1 )
-      END DO
-      X0(1:M,1:N) = F(1:M,1:N)
-      Y0(1:M,1:N) = F(1:M,2:N+1)
-      END IF
-
-      XNORM = SLANGE( 'F', M, N, X0, LDX, WDUMMY )
-      YNORM = SLANGE( 'F', M, N, Y0, LDX, WDUMMY )
-!............................................................
-
-      DO iJOBZ = 1, 4
-
-          SELECT CASE ( iJOBZ )
-          CASE(1)
-              JOBZ   = 'V' ! Ritz vectors will be computed
-              RESIDS = 'R' ! Residuals will be computed
-          CASE(2)
-              JOBZ   = 'V'
-              RESIDS = 'N'
-          CASE(3)
-              JOBZ   = 'F' ! Ritz vectors in factored form
-              RESIDS = 'N'
-          CASE(4)
-              JOBZ   = 'N'
-              RESIDS = 'N'
-          END SELECT
-
-      DO iJOBREF = 1, 3
-
-          SELECT CASE ( iJOBREF )
-          CASE(1)
-              JOBREF = 'R' ! Data for refined Ritz vectors
-          CASE(2)
-              JOBREF = 'E' ! Exact DMD vectors
-          CASE(3)
-              JOBREF = 'N'
-          END SELECT
-
-      DO iSCALE = 1, 4
-
-          SELECT CASE ( iSCALE )
-          CASE(1)
-              SCALE = 'S' ! X data normalized
-          CASE(2)
-              SCALE = 'C' ! X normalized, consist. check
-          CASE(3)
-              SCALE = 'Y' ! Y data normalized
-          CASE(4)
-              SCALE = 'N'
-          END SELECT
-
-      DO iNRNK = -1, -2, -1
-          ! Two truncation strategies. The "-2" case for R&D
-          ! purposes only - it uses possibly low accuracy small
-          ! singular values, in which case the formulas used in
-          ! the DMD are highly sensitive.
-          NRNK   = iNRNK
-
-      DO iWHTSVD = 1, 4
-          ! Check all four options to compute the POD basis
-          ! via the SVD.
-          WHTSVD   = iWHTSVD
-
-      DO LWMINOPT = 1, 2
-          ! Workspace query for the minimal (1) and for the optimal
-          ! (2) workspace lengths determined by workspace query.
-
-       X(1:M,1:N) = X0(1:M,1:N)
-       Y(1:M,1:N) = Y0(1:M,1:N)
-
-       ! SGEDMD: Workspace query and workspace allocation
-       CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
-            N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
-            LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, &
-            IDUMMY, -1, INFO )
-
-       LIWORK = IDUMMY(1)
-       ALLOCATE( IWORK(LIWORK) )
-       LWORK = INT(WDUMMY(LWMINOPT))
-       ALLOCATE( WORK(LWORK) )
-
-       ! SGEDMD test: CALL SGEDMD
-       CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
-            N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
-            LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,&
-            IWORK, LIWORK, INFO )
-
-       SINGVX(1:N) = WORK(1:N)
-
-       !...... SGEDMD check point
-       IF ( LSAME(JOBZ,'V')  ) THEN
-          ! Check that Z = X*W, on return from SGEDMD
-          ! This checks that the returned aigenvectors in Z are
-          ! the product of the SVD'POD basis returned in X
-          ! and the eigenvectors of the rayleigh quotient
-          ! returned in W
-          CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
-                      ZERO, Z1, LDZ )
-          TMP = ZERO
-          DO i = 1, K
-             CALL SAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1)
-             TMP = MAX(TMP, SNRM2( M, Z1(1,i), 1 ) )
-          END DO
-          TMP_ZXW = MAX(TMP_ZXW, TMP )
-
-          IF ( TMP_ZXW > 10*M*EPS ) THEN
-              NFAIL_Z_XV = NFAIL_Z_XV + 1
-          END IF
-
-       END IF
-
-       !...... SGEDMD check point
-       IF ( LSAME(JOBREF,'R') ) THEN
-           ! The matrix A*U is returned for computing refined Ritz vectors.
-           ! Check that A*U is computed correctly using the formula
-           ! A*U = Y * V * inv(SIGMA). This depends on the
-           ! accuracy in the computed singular values and vectors of X.
-           ! See the paper for an error analysis.
-           ! Note that the left singular vectors of the input matrix X
-           ! are returned in the array X.
-           CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, &
-                      ZERO, Z1, LDZ )
-           TMP = ZERO
-           DO i = 1, K
-              CALL SAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1)
-              TMP = MAX( TMP, SNRM2( M, Z1(1,i),1 ) * &
-                       SINGVX(K)/(ANORM*SINGVX(1)) )
-           END DO
-           TMP_AU = MAX( TMP_AU, TMP )
-
-           IF ( TMP > TOL2 ) THEN
-               NFAIL_AU = NFAIL_AU + 1
-           END IF
-
-       ELSEIF ( LSAME(JOBREF,'E') ) THEN
-       ! The unscaled vectors of the Exact DMD are computed.
-       ! This option is included for the sake of completeness,
-       ! for users who prefer the Exact DMD vectors. The
-       ! returned vectors are in the real form, in the same way
-       ! as the Ritz vectors. Here we just save the vectors
-       ! and test them separately using a Matlab script.
-
-       CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M )
-       i=1
-       DO WHILE ( i <= K )
-       IF ( IEIG(i) == ZERO ) THEN
-        ! have a real eigenvalue with real eigenvector
-        CALL SAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 )
-        RESEX(i) = SNRM2( M, Y1(1,i), 1) / SNRM2(M,AU(1,i),1)
-        i = i + 1
-       ELSE
-       ! Have a complex conjugate pair
-       ! REIG(i) +- sqrt(-1)*IMEIG(i).
-       ! Since all computation is done in real
-       ! arithmetic, the formula for the residual
-       ! is recast for real representation of the
-       ! complex conjugate eigenpair. See the
-       ! description of RES.
-       AB(1,1) =  REIG(i)
-       AB(2,1) = -IEIG(i)
-       AB(1,2) =  IEIG(i)
-       AB(2,2) =  REIG(i)
-       CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), &
-                   M, AB, 2, ONE, Y1(1,i), M )
-       RESEX(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
-                    WORK )/ SLANGE( 'F', M, 2, AU(1,i), M, &
-                    WORK )
-       RESEX(i+1) = RESEX(i)
-       i = i + 2
-       END IF
-       END DO
-
-       END IF
-
-      !...... SGEDMD check point
-      IF ( LSAME(RESIDS, 'R') ) THEN
-          ! Compare the residuals returned by SGEDMD with the
-          ! explicitly computed residuals using the matrix A.
-          ! Compute explicitly Y1 = A*Z
-          CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M )
-          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
-          ! of the invariant subspaces that correspond to complex conjugate
-          ! pairs of eigencalues. (See the description of Z in SGEDMD,)
-          i = 1
-          DO WHILE ( i <= K )
-            IF ( IEIG(i) == ZERO ) THEN
-                ! have a real eigenvalue with real eigenvector
-                CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 )
-                RES1(i) = SNRM2( M, Y1(1,i), 1)
-                i = i + 1
-            ELSE
-               ! Have a complex conjugate pair
-               ! REIG(i) +- sqrt(-1)*IMEIG(i).
-               ! Since all computation is done in real
-               ! arithmetic, the formula for the residual
-               ! is recast for real representation of the
-               ! complex conjugate eigenpair. See the
-               ! description of RES.
-               AB(1,1) =  REIG(i)
-               AB(2,1) = -IEIG(i)
-               AB(1,2) =  IEIG(i)
-               AB(2,2) =  REIG(i)
-               CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
-                           M, AB, 2, ONE, Y1(1,i), M )
-               RES1(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
-                                  WORK )
-               RES1(i+1) = RES1(i)
-               i = i + 2
-            END IF
-          END DO
-          TMP = ZERO
-          DO i = 1, K
-          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
-                    SINGVX(K)/(ANORM*SINGVX(1)) )
-          END DO
-          TMP_REZ = MAX( TMP_REZ, TMP )
-
-          IF ( TMP > TOL2 ) THEN
-              NFAIL_REZ = NFAIL_REZ + 1
-          END IF
-
-         IF ( LSAME(JOBREF,'E') ) THEN
-            TMP = ZERO
-          DO i = 1, K
-          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
-          END DO
-          TMP_EX = MAX(TMP_EX,TMP)
-         END IF
-
-      END IF
-
-      ! ... store the results for inspection
-      DO i = 1, K
-          LAMBDA(i,1) = REIG(i)
-          LAMBDA(i,2) = IEIG(i)
-      END DO
-
-      DEALLOCATE(IWORK)
-      DEALLOCATE(WORK)
-
-      !======================================================================
-      !     Now test the SGEDMDQ, if requested.
-      !======================================================================
-      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
-          RJOBDATA(2) = 1
-          F1 = F
-
-          ! SGEDMDQ test: Workspace query and workspace allocation
-          CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
-               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
-               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
-               RES, AU, LDAU, W, LDW, S, LDS, WDUMMY,      &
-               -1, IDUMMY, -1, INFO )
-          LIWORK = IDUMMY(1)
-          ALLOCATE( IWORK(LIWORK) )
-          LWORK = INT(WDUMMY(LWMINOPT))
-          ALLOCATE(WORK(LWORK))
-
-          ! SGEDMDQ test: CALL SGEDMDQ
-          CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
-               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
-               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
-               RES, AU, LDAU, W, LDW, S, LDS,              &
-               WORK, LWORK, IWORK, LIWORK, INFO )
-
-          SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ)
-
-          !..... SGEDMDQ check point
-          IF ( KQ /= K ) THEN
-             KDIFF = KDIFF+1
-          END IF
-
-          TMP = ZERO
-          DO i = 1, MIN(K, KQ)
-             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
-                                   SINGVX(1) )
-          END DO
-          SVDIFF = MAX( SVDIFF, TMP )
-          IF ( TMP > M*N*EPS ) THEN
-             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
-          END IF
-
-          !..... SGEDMDQ check point
-          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
-             ! Check that the QR factors are computed and returned
-             ! as requested. The residual ||F-Q*R||_F / ||F||_F
-             ! is compared to M*N*EPS.
-             F2 = F
-             CALL SGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, &
-                         LDF, Y, LDY, ONE, F2, LDF )
-             TMP_FQR = SLANGE( 'F', M, N+1, F2, LDF, WORK ) / &
-                   SLANGE( 'F', M, N+1, F,  LDF, WORK )
-             IF ( TMP_FQR > TOL2 ) THEN
-                 NFAIL_F_QR = NFAIL_F_QR + 1
-             END IF
-          END IF
-
-          !..... SGEDMDQ checkpoint
-          IF ( LSAME(RESIDS, 'R') ) THEN
-              ! Compare the residuals returned by SGEDMDQ with the
-              ! explicitly computed residuals using the matrix A.
-              ! Compute explicitly Y1 = A*Z
-              CALL SGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M )
-              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
-              ! of the invariant subspaces that correspond to complex conjugate
-              ! pairs of eigencalues. (See the description of Z in SGEDMDQ)
-              i = 1
-              DO WHILE ( i <= KQ )
-                IF ( IEIGQ(i) == ZERO ) THEN
-                    ! have a real eigenvalue with real eigenvector
-                    CALL SAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 )
-                    ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
-                    RES1(i) = SNRM2( M, Y1(1,i), 1)
-                    i = i + 1
-                ELSE
-                   ! Have a complex conjugate pair
-                   ! REIG(i) +- sqrt(-1)*IMEIG(i).
-                   ! Since all computation is done in real
-                   ! arithmetic, the formula for the residual
-                   ! is recast for real representation of the
-                   ! complex conjugate eigenpair. See the
-                   ! description of RES.
-                   AB(1,1) =  REIGQ(i)
-                   AB(2,1) = -IEIGQ(i)
-                   AB(1,2) =  IEIGQ(i)
-                   AB(2,2) =  REIGQ(i)
-                   CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
-                               M, AB, 2, ONE, Y1(1,i), M )             ! BLAS CALL
-                   ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
-                   RES1(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
-                                      WORK )                           ! LAPACK CALL
-                   RES1(i+1) = RES1(i)
-                   i = i + 2
-                END IF
-              END DO
-              TMP = ZERO
-              DO i = 1, KQ
-              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
-                  SINGVQX(K)/(ANORM*SINGVQX(1)) )
-              END DO
-              TMP_REZQ = MAX( TMP_REZQ, TMP )
-              IF ( TMP > TOL2 ) THEN
-                  NFAIL_REZQ = NFAIL_REZQ + 1
-              END IF
-
-          END IF
-
-          DO i = 1, KQ
-              LAMBDAQ(i,1) = REIGQ(i)
-              LAMBDAQ(i,2) = IEIGQ(i)
-          END DO
-
-      DEALLOCATE(WORK)
-      DEALLOCATE(IWORK)
-      END IF            ! TEST_QRDMD
-!======================================================================
-
-      END DO ! LWMINOPT
-      !write(*,*) 'LWMINOPT loop completed'
-      END DO ! WHTSVD LOOP
-      !write(*,*) 'WHTSVD loop completed'
-      END DO ! NRNK LOOP
-      !write(*,*) 'NRNK loop completed'
-      END DO ! SCALE LOOP
-      !write(*,*) 'SCALE loop completed'
-      END DO ! JOBF LOOP
-      !write(*,*) 'JOBREF loop completed'
-      END DO ! JOBZ LOOP
-      !write(*,*) 'JOBZ loop completed'
-
-      END DO ! MODE -6:6
-      !write(*,*) 'MODE loop completed'
-      END DO ! 1 or 2 trajectories
-      !write(*,*) 'trajectories  loop completed'
-
-      DEALLOCATE(A)
-      DEALLOCATE(AC)
-      DEALLOCATE(DA)
-      DEALLOCATE(DL)
-      DEALLOCATE(F)
-      DEALLOCATE(F1)
-      DEALLOCATE(F2)
-      DEALLOCATE(X)
-      DEALLOCATE(X0)
-      DEALLOCATE(SINGVX)
-      DEALLOCATE(SINGVQX)
-      DEALLOCATE(Y)
-      DEALLOCATE(Y0)
-      DEALLOCATE(Y1)
-      DEALLOCATE(Z)
-      DEALLOCATE(Z1)
-      DEALLOCATE(RES)
-      DEALLOCATE(RES1)
-      DEALLOCATE(RESEX)
-      DEALLOCATE(REIG)
-      DEALLOCATE(IEIG)
-      DEALLOCATE(REIGQ)
-      DEALLOCATE(IEIGQ)
-      DEALLOCATE(REIGA)
-      DEALLOCATE(IEIGA)
-      DEALLOCATE(VA)
-      DEALLOCATE(LAMBDA)
-      DEALLOCATE(LAMBDAQ)
-      DEALLOCATE(EIGA)
-      DEALLOCATE(W)
-      DEALLOCATE(AU)
-      DEALLOCATE(S)
-
-!............................................................
-      !     Generate random M-by-M matrix A. Use DLATMR from
-      END DO ! LLOOP
-
-
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*) ' Test summary for SGEDMD :'
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*)
-      IF ( NFAIL_Z_XV == 0 ) THEN
-          WRITE(*,*) '>>>> Z - U*V test PASSED.'
-      ELSE
-          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
-          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
-          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
-      END IF
-      IF ( NFAIL_AU == 0 ) THEN
-          WRITE(*,*) '>>>> A*U test PASSED. '
-      ELSE
-          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
-          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
-      END IF
-
-      IF ( NFAIL_REZ == 0 ) THEN
-          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
-      ELSE
-          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
-          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
-      END IF
-
-      IF ( NFAIL_TOTAL == 0 ) THEN
-          WRITE(*,*) '>>>> SGEDMD :: ALL TESTS PASSED.'
-      ELSE
-          WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
-          WRITE(*,*) '>>>>>>>>>>>>>> SGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
-      END IF
-
-      IF ( TEST_QRDMD ) THEN
-      WRITE(*,*)
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*) ' Test summary for SGEDMDQ :'
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*)
-
-      IF ( NFAIL_SVDIFF == 0 ) THEN
-          WRITE(*,*) '>>>> SGEDMD and SGEDMDQ computed singular &
-              &values test PASSED.'
-      ELSE
-          WRITE(*,*) 'SGEDMD and SGEDMDQ discrepancies in &
-              &the singular values unacceptable ', &
-              NFAIL_SVDIFF, ' times. Test FAILED.'
-          WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
-      END IF
-
-      IF ( NFAIL_F_QR == 0 ) THEN
-          WRITE(*,*) '>>>> F - Q*R test PASSED.'
-      ELSE
-          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
-          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
-      END IF
-
-      IF ( NFAIL_REZQ == 0 ) THEN
-          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
-      ELSE
-          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
-          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
-      END IF
-
-      IF ( NFAILQ_TOTAL == 0 ) THEN
-          WRITE(*,*) '>>>>>>> SGEDMDQ :: ALL TESTS PASSED.'
-      ELSE
-         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
-         WRITE(*,*) '>>>>>>> SGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
-      END IF
-
-      END IF
-
-      WRITE(*,*)
-      WRITE(*,*) 'Test completed.'
-      STOP
-      END
+!     This is a test program for checking the implementations of
+!     the implementations of the following subroutines
+!
+!     SGEDMD  for computation of the
+!             Dynamic Mode Decomposition (DMD)
+!     SGEDMDQ for computation of a
+!             QR factorization based compressed DMD
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     ========================================================
+!     How to run the code (compiler, link info)
+!     ========================================================
+!     Compile as FORTRAN 90 (or later) and link with BLAS and
+!     LAPACK libraries.
+!     NOTE: The code is developed and tested on top of the
+!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
+!     using the Intel Fortran compiler.
+!
+!     For developers of the C++ implementation
+!     ========================================================
+!     See the LAPACK++ and Template Numerical Toolkit (TNT)
+!
+!     Note on a development of the GPU HP implementation
+!     ========================================================
+!     Work in progress. See CUDA, MAGMA, SLATE.
+!     NOTE: The four SVD subroutines used in this code are
+!     included as a part of R&D and for the completeness.
+!     This was also an opportunity to test those SVD codes.
+!     If the scaling option is used all four are essentially
+!     equally good. For implementations on HP platforms,
+!     one can use whichever SVD is available.
+!...  .........................................................
+!     NOTE:
+!     When using the Intel MKL 2022.0.3 the subroutine xGESVDQ
+!     (optionally used in xGEDMD) may cause access violation
+!     error for x = S, D, C, Z, but only if called with the
+!     work space query. (At least in our Windows 10 MSVS 2019.)
+!     The problem can be mitigated by downloading the source
+!     code of xGESVDQ from the LAPACK repository and use it
+!     localy instead of the one in the MKL. This seems to
+!     indicate that the problem is indeed in the MKL.
+!     This problem did not appear whith Intel MKL 2022.2.0.
+!
+!     NOTE:
+!     xGESDD seems to have a problem with workspace. In some
+!     cases the length of the optimal workspace is returned
+!     smaller than the minimal workspace, as specified in the
+!     code. As a precaution, all optimal workspaces are
+!     set as MAX(minimal, optimal).
+!     Latest implementations of complex xGESDD have different
+!     length of the real worksapce. We use max value over
+!     two versions.
+!............................................................
+!............................................................
+!
+      PROGRAM DMD_TEST
+      use iso_fortran_env, only: real32
+      IMPLICIT NONE
+      integer, parameter :: WP = real32
+
+!............................................................
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!............................................................
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::          &
+                     A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,&
+                     Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::          &
+                     DA, DL, DR, REIG, REIGA, REIGQ, IEIG,   &
+                     IEIGA, IEIGQ,  RES, RES1, RESEX, SINGVX,&
+                     SINGVQX, WORK
+      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
+      REAL(KIND=WP) :: AB(2,2),   WDUMMY(2)
+      INTEGER       :: IDUMMY(2), ISEED(4), RJOBDATA(8)
+      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, &
+                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
+                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
+                       TMP_EX, XNORM, YNORM
+!............................................................
+      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
+                 LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK
+      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, KDIFF,     &
+                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
+                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, &
+                 NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD
+      INTEGER    iNRNK, iWHTSVD, K_TRAJ, LWMINOPT
+      CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
+                       SCALE, RESIDS, WANTQ, WANTR
+
+      LOGICAL          TEST_QRDMD
+!..... external subroutines (BLAS and LAPACK)
+      EXTERNAL SAXPY,  SGEEV, SGEMM, SGEMV, SLACPY, SLASCL
+      EXTERNAL SLARNV, SLATMR
+!.....external subroutines DMD package, part 1
+!     subroutines under test
+      EXTERNAL SGEDMD, SGEDMDQ
+
+!..... external functions (BLAS and LAPACK)
+      EXTERNAL         SLAMCH, SLANGE, SNRM2
+      REAL(KIND=WP) :: SLAMCH, SLANGE, SNRM2
+      EXTERNAL         LSAME
+      LOGICAL          LSAME
+
+      INTRINSIC ABS, INT, MIN, MAX
+!............................................................
+
+      ! The test is always in pairs : ( SGEDMD and SGEDMDQ )
+      ! because the test includes comparing the results (in pairs).
+!.....................................................................................
+      TEST_QRDMD = .TRUE. ! This code by default performs tests on SGEDMDQ
+                          ! Since the QR factorizations based algorithm is designed for
+                          ! single trajectory data, only single trajectory tests will
+                          ! be performed with xGEDMDQ.
+      WANTQ = 'Q'
+      WANTR = 'R'
+!.................................................................................
+
+      EPS = SLAMCH( 'P' )  ! machine precision SP
+
+      ! Global counters of failures of some particular tests
+      NFAIL      = 0
+      NFAIL_REZ  = 0
+      NFAIL_REZQ = 0
+      NFAIL_Z_XV = 0
+      NFAIL_F_QR = 0
+      NFAIL_AU   = 0
+      KDIFF      = 0
+      NFAIL_SVDIFF = 0
+      NFAIL_TOTAL  = 0
+      NFAILQ_TOTAL = 0
+
+
+      DO LLOOP = 1, 4
+
+      WRITE(*,*) 'L Loop Index = ', LLOOP
+
+      ! Set the dimensions of the problem ...
+      WRITE(*,*) 'M = '
+      READ(*,*) M
+      WRITE(*,*) M
+      ! ... and the number of snapshots.
+      WRITE(*,*) 'N = '
+      READ(*,*) N
+      WRITE(*,*) N
+
+      ! ... Test the dimensions
+      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
+          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
+          STOP
+      END IF
+!.............
+      ! The seed inside the LLOOP so that each pass can be reproduced easily.
+
+      ISEED(1) = 4
+      ISEED(2) = 3
+      ISEED(3) = 2
+      ISEED(4) = 1
+
+      LDA = M
+      LDF = M
+      LDX = MAX(M,N+1)
+      LDY = MAX(M,N+1)
+      LDW = N
+      LDZ = M
+      LDAU = MAX(M,N+1)
+      LDS = N
+
+      TMP_ZXW  = ZERO
+      TMP_AU   = ZERO
+      TMP_REZ  = ZERO
+      TMP_REZQ = ZERO
+      SVDIFF   = ZERO
+      TMP_EX   = ZERO
+
+      !
+      ! Test the subroutines on real data snapshots. All
+      ! computation is done in real arithmetic, even when
+      ! Koopman eigenvalues and modes are real.
+      !
+      ! Allocate memory space
+      ALLOCATE( A(LDA,M) )
+      ALLOCATE( AC(LDA,M) )
+      ALLOCATE( DA(M) )
+      ALLOCATE( DL(M) )
+      ALLOCATE( F(LDF,N+1) )
+      ALLOCATE( F1(LDF,N+1) )
+      ALLOCATE( F2(LDF,N+1) )
+      ALLOCATE( X(LDX,N) )
+      ALLOCATE( X0(LDX,N) )
+      ALLOCATE( SINGVX(N) )
+      ALLOCATE( SINGVQX(N) )
+      ALLOCATE( Y(LDY,N+1) )
+      ALLOCATE( Y0(LDY,N+1) )
+      ALLOCATE( Y1(M,N+1) )
+      ALLOCATE( Z(LDZ,N) )
+      ALLOCATE( Z1(LDZ,N) )
+      ALLOCATE( RES(N)  )
+      ALLOCATE( RES1(N) )
+      ALLOCATE( RESEX(N) )
+      ALLOCATE( REIG(N) )
+      ALLOCATE( IEIG(N) )
+      ALLOCATE( REIGQ(N) )
+      ALLOCATE( IEIGQ(N) )
+      ALLOCATE( REIGA(M) )
+      ALLOCATE( IEIGA(M) )
+      ALLOCATE( VA(LDA,M) )
+      ALLOCATE( LAMBDA(N,2) )
+      ALLOCATE( LAMBDAQ(N,2) )
+      ALLOCATE( EIGA(M,2) )
+      ALLOCATE( W(LDW,N) )
+      ALLOCATE( AU(LDAU,N) )
+      ALLOCATE( S(N,N) )
+
+      TOL  = M*EPS
+      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
+      ! The factor 10 is somewhat arbitrary.
+      TOL2 = 10*M*N*EPS
+
+!.............
+
+      DO K_TRAJ = 1, 2
+      !  Number of intial conditions in the simulation/trajectories (1 or 2)
+
+      COND = 1.0D8
+      DMAX = 1.0D2
+      RSIGN = 'F'
+      GRADE = 'N'
+      MODEL = 6
+      CONDL = 1.0D2
+      MODER = 6
+      CONDR = 1.0D2
+      PIVTNG = 'N'
+
+      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
+      DO MODE = 1, 6
+
+      ALLOCATE( IWORK(2*M) )
+      ALLOCATE(DR(N))
+      CALL SLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, &
+                   DMAX, RSIGN, GRADE, DL, MODEL,  CONDL, &
+                   DR, MODER, CONDR, PIVTNG, IWORK, M, M, &
+                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
+      DEALLOCATE(IWORK)
+      DEALLOCATE(DR)
+
+      LWORK = 4*M+1
+      ALLOCATE(WORK(LWORK))
+      AC  = A
+      CALL SGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, &
+                  VA, M, WORK, LWORK, INFO ) ! LAPACK CALL
+      DEALLOCATE(WORK)
+      TMP = ZERO
+      DO i = 1, M
+          EIGA(i,1) = REIGA(i)
+          EIGA(i,2) = IEIGA(i)
+          TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2))
+      END DO
+
+      ! Scale A to have the desirable spectral radius.
+      CALL SLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO )
+      CALL SLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO )
+
+      ! Compute the norm of A
+      ANORM = SLANGE( 'F', N, N, A, M, WDUMMY )
+
+      IF ( K_TRAJ == 2 ) THEN
+          ! generate data with two inital conditions
+      CALL SLARNV(2, ISEED, M, F1(1,1) )
+      F1(1:M,1) = 1.0E-10*F1(1:M,1)
+      DO i = 1, N/2
+         CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
+              F1(1,i+1), 1 )
+      END DO
+      X0(1:M,1:N/2) = F1(1:M,1:N/2)
+      Y0(1:M,1:N/2) = F1(1:M,2:N/2+1)
+
+      CALL SLARNV(2, ISEED, M, F1(1,1) )
+      DO i = 1, N-N/2
+         CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
+              F1(1,i+1), 1 )
+      END DO
+      X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2)
+      Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1)
+      ELSE
+          ! single trajectory
+      CALL SLARNV(2, ISEED, M, F(1,1) )
+      DO i = 1, N
+         CALL SGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, &
+              F(1,i+1), 1 )
+      END DO
+      X0(1:M,1:N) = F(1:M,1:N)
+      Y0(1:M,1:N) = F(1:M,2:N+1)
+      END IF
+
+      XNORM = SLANGE( 'F', M, N, X0, LDX, WDUMMY )
+      YNORM = SLANGE( 'F', M, N, Y0, LDX, WDUMMY )
+!............................................................
+
+      DO iJOBZ = 1, 4
+
+          SELECT CASE ( iJOBZ )
+          CASE(1)
+              JOBZ   = 'V' ! Ritz vectors will be computed
+              RESIDS = 'R' ! Residuals will be computed
+          CASE(2)
+              JOBZ   = 'V'
+              RESIDS = 'N'
+          CASE(3)
+              JOBZ   = 'F' ! Ritz vectors in factored form
+              RESIDS = 'N'
+          CASE(4)
+              JOBZ   = 'N'
+              RESIDS = 'N'
+          END SELECT
+
+      DO iJOBREF = 1, 3
+
+          SELECT CASE ( iJOBREF )
+          CASE(1)
+              JOBREF = 'R' ! Data for refined Ritz vectors
+          CASE(2)
+              JOBREF = 'E' ! Exact DMD vectors
+          CASE(3)
+              JOBREF = 'N'
+          END SELECT
+
+      DO iSCALE = 1, 4
+
+          SELECT CASE ( iSCALE )
+          CASE(1)
+              SCALE = 'S' ! X data normalized
+          CASE(2)
+              SCALE = 'C' ! X normalized, consist. check
+          CASE(3)
+              SCALE = 'Y' ! Y data normalized
+          CASE(4)
+              SCALE = 'N'
+          END SELECT
+
+      DO iNRNK = -1, -2, -1
+          ! Two truncation strategies. The "-2" case for R&D
+          ! purposes only - it uses possibly low accuracy small
+          ! singular values, in which case the formulas used in
+          ! the DMD are highly sensitive.
+          NRNK   = iNRNK
+
+      DO iWHTSVD = 1, 4
+          ! Check all four options to compute the POD basis
+          ! via the SVD.
+          WHTSVD   = iWHTSVD
+
+      DO LWMINOPT = 1, 2
+          ! Workspace query for the minimal (1) and for the optimal
+          ! (2) workspace lengths determined by workspace query.
+
+       X(1:M,1:N) = X0(1:M,1:N)
+       Y(1:M,1:N) = Y0(1:M,1:N)
+
+       ! SGEDMD: Workspace query and workspace allocation
+       CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
+            N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
+            LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, &
+            IDUMMY, -1, INFO )
+
+       LIWORK = IDUMMY(1)
+       ALLOCATE( IWORK(LIWORK) )
+       LWORK = INT(WDUMMY(LWMINOPT))
+       ALLOCATE( WORK(LWORK) )
+
+       ! SGEDMD test: CALL SGEDMD
+       CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
+            N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
+            LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,&
+            IWORK, LIWORK, INFO )
+
+       SINGVX(1:N) = WORK(1:N)
+
+       !...... SGEDMD check point
+       IF ( LSAME(JOBZ,'V')  ) THEN
+          ! Check that Z = X*W, on return from SGEDMD
+          ! This checks that the returned aigenvectors in Z are
+          ! the product of the SVD'POD basis returned in X
+          ! and the eigenvectors of the rayleigh quotient
+          ! returned in W
+          CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                      ZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL SAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1)
+             TMP = MAX(TMP, SNRM2( M, Z1(1,i), 1 ) )
+          END DO
+          TMP_ZXW = MAX(TMP_ZXW, TMP )
+
+          IF ( TMP_ZXW > 10*M*EPS ) THEN
+              NFAIL_Z_XV = NFAIL_Z_XV + 1
+          END IF
+
+       END IF
+
+       !...... SGEDMD check point
+       IF ( LSAME(JOBREF,'R') ) THEN
+           ! The matrix A*U is returned for computing refined Ritz vectors.
+           ! Check that A*U is computed correctly using the formula
+           ! A*U = Y * V * inv(SIGMA). This depends on the
+           ! accuracy in the computed singular values and vectors of X.
+           ! See the paper for an error analysis.
+           ! Note that the left singular vectors of the input matrix X
+           ! are returned in the array X.
+           CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, &
+                      ZERO, Z1, LDZ )
+           TMP = ZERO
+           DO i = 1, K
+              CALL SAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1)
+              TMP = MAX( TMP, SNRM2( M, Z1(1,i),1 ) * &
+                       SINGVX(K)/(ANORM*SINGVX(1)) )
+           END DO
+           TMP_AU = MAX( TMP_AU, TMP )
+
+           IF ( TMP > TOL2 ) THEN
+               NFAIL_AU = NFAIL_AU + 1
+           END IF
+
+       ELSEIF ( LSAME(JOBREF,'E') ) THEN
+       ! The unscaled vectors of the Exact DMD are computed.
+       ! This option is included for the sake of completeness,
+       ! for users who prefer the Exact DMD vectors. The
+       ! returned vectors are in the real form, in the same way
+       ! as the Ritz vectors. Here we just save the vectors
+       ! and test them separately using a Matlab script.
+
+       CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M )
+       i=1
+       DO WHILE ( i <= K )
+       IF ( IEIG(i) == ZERO ) THEN
+        ! have a real eigenvalue with real eigenvector
+        CALL SAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 )
+        RESEX(i) = SNRM2( M, Y1(1,i), 1) / SNRM2(M,AU(1,i),1)
+        i = i + 1
+       ELSE
+       ! Have a complex conjugate pair
+       ! REIG(i) +- sqrt(-1)*IMEIG(i).
+       ! Since all computation is done in real
+       ! arithmetic, the formula for the residual
+       ! is recast for real representation of the
+       ! complex conjugate eigenpair. See the
+       ! description of RES.
+       AB(1,1) =  REIG(i)
+       AB(2,1) = -IEIG(i)
+       AB(1,2) =  IEIG(i)
+       AB(2,2) =  REIG(i)
+       CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), &
+                   M, AB, 2, ONE, Y1(1,i), M )
+       RESEX(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
+                    WORK )/ SLANGE( 'F', M, 2, AU(1,i), M, &
+                    WORK )
+       RESEX(i+1) = RESEX(i)
+       i = i + 2
+       END IF
+       END DO
+
+       END IF
+
+      !...... SGEDMD check point
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by SGEDMD with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in SGEDMD,)
+          i = 1
+          DO WHILE ( i <= K )
+            IF ( IEIG(i) == ZERO ) THEN
+                ! have a real eigenvalue with real eigenvector
+                CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 )
+                RES1(i) = SNRM2( M, Y1(1,i), 1)
+                i = i + 1
+            ELSE
+               ! Have a complex conjugate pair
+               ! REIG(i) +- sqrt(-1)*IMEIG(i).
+               ! Since all computation is done in real
+               ! arithmetic, the formula for the residual
+               ! is recast for real representation of the
+               ! complex conjugate eigenpair. See the
+               ! description of RES.
+               AB(1,1) =  REIG(i)
+               AB(2,1) = -IEIG(i)
+               AB(1,2) =  IEIG(i)
+               AB(2,2) =  REIG(i)
+               CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                           M, AB, 2, ONE, Y1(1,i), M )
+               RES1(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
+                                  WORK )
+               RES1(i+1) = RES1(i)
+               i = i + 2
+            END IF
+          END DO
+          TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                    SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_REZ = MAX( TMP_REZ, TMP )
+
+          IF ( TMP > TOL2 ) THEN
+              NFAIL_REZ = NFAIL_REZ + 1
+          END IF
+
+         IF ( LSAME(JOBREF,'E') ) THEN
+            TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
+          END DO
+          TMP_EX = MAX(TMP_EX,TMP)
+         END IF
+
+      END IF
+
+      ! ... store the results for inspection
+      DO i = 1, K
+          LAMBDA(i,1) = REIG(i)
+          LAMBDA(i,2) = IEIG(i)
+      END DO
+
+      DEALLOCATE(IWORK)
+      DEALLOCATE(WORK)
+
+      !======================================================================
+      !     Now test the SGEDMDQ, if requested.
+      !======================================================================
+      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
+          RJOBDATA(2) = 1
+          F1 = F
+
+          ! SGEDMDQ test: Workspace query and workspace allocation
+          CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
+               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
+               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
+               RES, AU, LDAU, W, LDW, S, LDS, WDUMMY,      &
+               -1, IDUMMY, -1, INFO )
+          LIWORK = IDUMMY(1)
+          ALLOCATE( IWORK(LIWORK) )
+          LWORK = INT(WDUMMY(LWMINOPT))
+          ALLOCATE(WORK(LWORK))
+
+          ! SGEDMDQ test: CALL SGEDMDQ
+          CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
+               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
+               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
+               RES, AU, LDAU, W, LDW, S, LDS,              &
+               WORK, LWORK, IWORK, LIWORK, INFO )
+
+          SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ)
+
+          !..... SGEDMDQ check point
+          IF ( KQ /= K ) THEN
+             KDIFF = KDIFF+1
+          END IF
+
+          TMP = ZERO
+          DO i = 1, MIN(K, KQ)
+             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
+                                   SINGVX(1) )
+          END DO
+          SVDIFF = MAX( SVDIFF, TMP )
+          IF ( TMP > M*N*EPS ) THEN
+             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
+          END IF
+
+          !..... SGEDMDQ check point
+          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
+             ! Check that the QR factors are computed and returned
+             ! as requested. The residual ||F-Q*R||_F / ||F||_F
+             ! is compared to M*N*EPS.
+             F2 = F
+             CALL SGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, &
+                         LDF, Y, LDY, ONE, F2, LDF )
+             TMP_FQR = SLANGE( 'F', M, N+1, F2, LDF, WORK ) / &
+                   SLANGE( 'F', M, N+1, F,  LDF, WORK )
+             IF ( TMP_FQR > TOL2 ) THEN
+                 NFAIL_F_QR = NFAIL_F_QR + 1
+             END IF
+          END IF
+
+          !..... SGEDMDQ checkpoint
+          IF ( LSAME(RESIDS, 'R') ) THEN
+              ! Compare the residuals returned by SGEDMDQ with the
+              ! explicitly computed residuals using the matrix A.
+              ! Compute explicitly Y1 = A*Z
+              CALL SGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M )
+              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+              ! of the invariant subspaces that correspond to complex conjugate
+              ! pairs of eigencalues. (See the description of Z in SGEDMDQ)
+              i = 1
+              DO WHILE ( i <= KQ )
+                IF ( IEIGQ(i) == ZERO ) THEN
+                    ! have a real eigenvalue with real eigenvector
+                    CALL SAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 )
+                    ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
+                    RES1(i) = SNRM2( M, Y1(1,i), 1)
+                    i = i + 1
+                ELSE
+                   ! Have a complex conjugate pair
+                   ! REIG(i) +- sqrt(-1)*IMEIG(i).
+                   ! Since all computation is done in real
+                   ! arithmetic, the formula for the residual
+                   ! is recast for real representation of the
+                   ! complex conjugate eigenpair. See the
+                   ! description of RES.
+                   AB(1,1) =  REIGQ(i)
+                   AB(2,1) = -IEIGQ(i)
+                   AB(1,2) =  IEIGQ(i)
+                   AB(2,2) =  REIGQ(i)
+                   CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                               M, AB, 2, ONE, Y1(1,i), M )             ! BLAS CALL
+                   ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+                   RES1(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
+                                      WORK )                           ! LAPACK CALL
+                   RES1(i+1) = RES1(i)
+                   i = i + 2
+                END IF
+              END DO
+              TMP = ZERO
+              DO i = 1, KQ
+              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                  SINGVQX(K)/(ANORM*SINGVQX(1)) )
+              END DO
+              TMP_REZQ = MAX( TMP_REZQ, TMP )
+              IF ( TMP > TOL2 ) THEN
+                  NFAIL_REZQ = NFAIL_REZQ + 1
+              END IF
+
+          END IF
+
+          DO i = 1, KQ
+              LAMBDAQ(i,1) = REIGQ(i)
+              LAMBDAQ(i,2) = IEIGQ(i)
+          END DO
+
+      DEALLOCATE(WORK)
+      DEALLOCATE(IWORK)
+      END IF            ! TEST_QRDMD
+!======================================================================
+
+      END DO ! LWMINOPT
+      !write(*,*) 'LWMINOPT loop completed'
+      END DO ! WHTSVD LOOP
+      !write(*,*) 'WHTSVD loop completed'
+      END DO ! NRNK LOOP
+      !write(*,*) 'NRNK loop completed'
+      END DO ! SCALE LOOP
+      !write(*,*) 'SCALE loop completed'
+      END DO ! JOBF LOOP
+      !write(*,*) 'JOBREF loop completed'
+      END DO ! JOBZ LOOP
+      !write(*,*) 'JOBZ loop completed'
+
+      END DO ! MODE -6:6
+      !write(*,*) 'MODE loop completed'
+      END DO ! 1 or 2 trajectories
+      !write(*,*) 'trajectories  loop completed'
+
+      DEALLOCATE(A)
+      DEALLOCATE(AC)
+      DEALLOCATE(DA)
+      DEALLOCATE(DL)
+      DEALLOCATE(F)
+      DEALLOCATE(F1)
+      DEALLOCATE(F2)
+      DEALLOCATE(X)
+      DEALLOCATE(X0)
+      DEALLOCATE(SINGVX)
+      DEALLOCATE(SINGVQX)
+      DEALLOCATE(Y)
+      DEALLOCATE(Y0)
+      DEALLOCATE(Y1)
+      DEALLOCATE(Z)
+      DEALLOCATE(Z1)
+      DEALLOCATE(RES)
+      DEALLOCATE(RES1)
+      DEALLOCATE(RESEX)
+      DEALLOCATE(REIG)
+      DEALLOCATE(IEIG)
+      DEALLOCATE(REIGQ)
+      DEALLOCATE(IEIGQ)
+      DEALLOCATE(REIGA)
+      DEALLOCATE(IEIGA)
+      DEALLOCATE(VA)
+      DEALLOCATE(LAMBDA)
+      DEALLOCATE(LAMBDAQ)
+      DEALLOCATE(EIGA)
+      DEALLOCATE(W)
+      DEALLOCATE(AU)
+      DEALLOCATE(S)
+
+!............................................................
+      !     Generate random M-by-M matrix A. Use DLATMR from
+      END DO ! LLOOP
+
+
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for SGEDMD :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+      IF ( NFAIL_Z_XV == 0 ) THEN
+          WRITE(*,*) '>>>> Z - U*V test PASSED.'
+      ELSE
+          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
+          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
+      END IF
+      IF ( NFAIL_AU == 0 ) THEN
+          WRITE(*,*) '>>>> A*U test PASSED. '
+      ELSE
+          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
+          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
+      END IF
+
+      IF ( NFAIL_REZ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
+      END IF
+
+      IF ( NFAIL_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>> SGEDMD :: ALL TESTS PASSED.'
+      ELSE
+          WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
+          WRITE(*,*) '>>>>>>>>>>>>>> SGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      IF ( TEST_QRDMD ) THEN
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for SGEDMDQ :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+
+      IF ( NFAIL_SVDIFF == 0 ) THEN
+          WRITE(*,*) '>>>> SGEDMD and SGEDMDQ computed singular &
+              &values test PASSED.'
+      ELSE
+          WRITE(*,*) 'SGEDMD and SGEDMDQ discrepancies in &
+              &the singular values unacceptable ', &
+              NFAIL_SVDIFF, ' times. Test FAILED.'
+          WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
+      END IF
+
+      IF ( NFAIL_F_QR == 0 ) THEN
+          WRITE(*,*) '>>>> F - Q*R test PASSED.'
+      ELSE
+          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
+          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
+      END IF
+
+      IF ( NFAIL_REZQ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
+      END IF
+
+      IF ( NFAILQ_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>>>>> SGEDMDQ :: ALL TESTS PASSED.'
+      ELSE
+         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
+         WRITE(*,*) '>>>>>>> SGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) 'Test completed.'
+      STOP
+      END
diff --git a/TESTING/EIG/zchkdmd.f90 b/TESTING/EIG/zchkdmd.f90
index 873d956c40..d22c32efd7 100644
--- a/TESTING/EIG/zchkdmd.f90
+++ b/TESTING/EIG/zchkdmd.f90
@@ -1,745 +1,745 @@
-!     This is a test program for checking the implementations of
-!     the implementations of the following subroutines
-!
-!     ZGEDMD,  for computation of the
-!              Dynamic Mode Decomposition (DMD)
-!     ZGEDMDQ, for computation of a
-!              QR factorization based compressed DMD
-!
-!     Developed and supported by:
-!     ===========================
-!     Developed and coded by Zlatko Drmac, Faculty of Science,
-!     University of Zagreb;  drmac@math.hr
-!     In cooperation with
-!     AIMdyn Inc., Santa Barbara, CA.
-!     ========================================================
-!     How to run the code (compiler, link info)
-!     ========================================================
-!     Compile as FORTRAN 90 (or later) and link with BLAS and
-!     LAPACK libraries.
-!     NOTE: The code is developed and tested on top of the
-!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
-!     using the Intel Fortran compiler.
-!
-!     For developers of the C++ implementation
-!     ========================================================
-!     See the LAPACK++ and Template Numerical Toolkit (TNT)
-!
-!     Note on a development of the GPU HP implementation
-!     ========================================================
-!     Work in progress. See CUDA, MAGMA, SLATE.
-!     NOTE: The four SVD subroutines used in this code are
-!     included as a part of R&D and for the completeness.
-!     This was also an opportunity to test those SVD codes.
-!     If the scaling option is used all four are essentially
-!     equally good. For implementations on HP platforms,
-!     one can use whichever SVD is available.
-!............................................................
-
-!............................................................
-!............................................................
-!
-      PROGRAM DMD_TEST
-      use iso_fortran_env, only: real64
-      IMPLICIT NONE
-      integer, parameter :: WP = real64
-
-!............................................................
-      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
-      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
-
-      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
-      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
-!............................................................
-      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   :: RES, &
-                     RES1, RESEX, SINGVX, SINGVQX, WORK
-      INTEGER      , ALLOCATABLE, DIMENSION(:)   :: IWORK
-      REAL(KIND=WP) :: WDUMMY(2)
-      INTEGER       :: IDUMMY(4), ISEED(4)
-      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS,       &
-                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
-                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
-                       TMP_EX
-
-!............................................................
-      COMPLEX(KIND=WP) :: ZMAX
-      INTEGER :: LZWORK
-      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::  ZA, ZAC,  &
-                                 ZAU, ZF, ZF0, ZF1, ZS, ZW,       &
-                                 ZX, ZX0, ZY, ZY0, ZY1, ZZ, ZZ1
-      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::  ZDA, ZDR, &
-                                       ZDL, ZEIGS, ZEIGSA, ZWORK
-      COMPLEX(KIND=WP) ::  ZDUMMY(22), ZDUM2X2(2,2)
-!............................................................
-      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
-                 LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK, NRNKsp
-      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j,     &
-                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
-                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL,  &
-                 NFAIL_Z_XV,  MODE, MODEL, MODER, WHTSVD,     &
-                 WHTSVDsp
-      INTEGER :: iNRNK, iWHTSVD,  K_TRAJ, LWMINOPT
-      CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
-                       SCALE, RESIDS, WANTQ, WANTR
-      LOGICAL :: TEST_QRDMD
-
-!.....external subroutines (BLAS and LAPACK)
-      EXTERNAL DAXPY,  DGEEV, DGEMM, DGEMV, DLACPY, DLASCL
-      EXTERNAL ZGEEV,  ZGEMV, ZLASCL
-      EXTERNAL ZLARNV, ZLATMR
-      EXTERNAL ZAXPY,  ZGEMM
-!.....external subroutines DMD package, part 1
-!     subroutines under test
-      EXTERNAL ZGEDMD, ZGEDMDQ
-!.....external functions (BLAS and LAPACK)
-      EXTERNAL         DLAMCH,  DZNRM2
-      REAL(KIND=WP) :: DLAMCH,  DZNRM2
-      REAL(KIND=WP) ::          ZLANGE
-      EXTERNAL IZAMAX
-      INTEGER  IZAMAX
-      EXTERNAL LSAME
-      LOGICAL  LSAME
-
-      INTRINSIC ABS, INT, MIN, MAX, SIGN
-!............................................................
-
-      ! The test is always in pairs : ( ZGEDMD and ZGEDMDQ )
-      ! because the test includes comparing the results (in pairs).
-!.....................................................................................
-      TEST_QRDMD = .TRUE. ! This code by default performs tests on ZGEDMDQ
-                          ! Since the QR factorizations based algorithm is designed for
-                          ! single trajectory data, only single trajectory tests will
-                          ! be performed with xGEDMDQ.
-      WANTQ = 'Q'
-      WANTR = 'R'
-!.................................................................................
-
-      EPS = DLAMCH( 'P' )  ! machine precision DP
-
-      ! Global counters of failures of some particular tests
-      NFAIL      = 0
-      NFAIL_REZ  = 0
-      NFAIL_REZQ = 0
-      NFAIL_Z_XV = 0
-      NFAIL_F_QR = 0
-      NFAIL_AU   = 0
-      NFAIL_SVDIFF = 0
-      NFAIL_TOTAL  = 0
-      NFAILQ_TOTAL = 0
-
-      DO LLOOP = 1, 4
-
-      WRITE(*,*) 'L Loop Index = ', LLOOP
-
-      ! Set the dimensions of the problem ...
-      WRITE(*,*) 'M = '
-      READ(*,*) M
-      WRITE(*,*) M
-      ! ... and the number of snapshots.
-      WRITE(*,*) 'N = '
-      READ(*,*) N
-      WRITE(*,*) N
-
-      ! ... Test the dimensions
-      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
-          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
-          STOP
-      END IF
-!.............
-      ! The seed inside the LLOOP so that each pass can be reproduced easily.
-      ISEED(1) = 4
-      ISEED(2) = 3
-      ISEED(3) = 2
-      ISEED(4) = 1
-
-      LDA  = M
-      LDF  = M
-      LDX  = M
-      LDY  = M
-      LDW  = N
-      LDZ  = M
-      LDAU = M
-      LDS  = N
-
-      TMP_ZXW  = ZERO
-      TMP_AU   = ZERO
-      TMP_REZ  = ZERO
-      TMP_REZQ = ZERO
-      SVDIFF   = ZERO
-      TMP_EX   = ZERO
-
-      ALLOCATE( ZA(LDA,M) )
-      ALLOCATE( ZAC(LDA,M) )
-      ALLOCATE( ZF(LDF,N+1) )
-      ALLOCATE( ZF0(LDF,N+1) )
-      ALLOCATE( ZF1(LDF,N+1) )
-      ALLOCATE( ZX(LDX,N) )
-      ALLOCATE( ZX0(LDX,N) )
-      ALLOCATE( ZY(LDY,N+1) )
-      ALLOCATE( ZY0(LDY,N+1) )
-      ALLOCATE( ZY1(LDY,N+1) )
-      ALLOCATE( ZAU(LDAU,N) )
-      ALLOCATE( ZW(LDW,N) )
-      ALLOCATE( ZS(LDS,N) )
-      ALLOCATE( ZZ(LDZ,N) )
-      ALLOCATE( ZZ1(LDZ,N) )
-      ALLOCATE( RES(N) )
-      ALLOCATE( RES1(N) )
-      ALLOCATE( RESEX(N) )
-      ALLOCATE( ZEIGS(N) )
-      ALLOCATE( SINGVX(N) )
-      ALLOCATE( SINGVQX(N) )
-
-      TOL  = M*EPS
-      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
-      ! The factor 10 is somewhat arbitrary.
-      TOL2 = 10*M*N*EPS
-
-!.............
-
-      DO K_TRAJ = 1, 2
-      !  Number of intial conditions in the simulation/trajectories (1 or 2)
-
-      COND = 1.0D4
-      ZMAX = (1.0D1,1.0D1)
-      RSIGN = 'F'
-      GRADE = 'N'
-      MODEL = 6
-      CONDL = 1.0D1
-      MODER = 6
-      CONDR = 1.0D1
-      PIVTNG = 'N'
-
-      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
-      DO MODE = 1, 6
-
-      ALLOCATE( IWORK(2*M) )
-      ALLOCATE( ZDA(M) )
-      ALLOCATE( ZDL(M) )
-      ALLOCATE( ZDR(M) )
-
-      CALL ZLATMR( M, M, 'N', ISEED, 'N', ZDA, MODE, COND, &
-                   ZMAX, RSIGN, GRADE, ZDL, MODEL,  CONDL, &
-                   ZDR, MODER, CONDR, PIVTNG, IWORK, M, M, &
-                   ZERO, -ONE, 'N', ZA, LDA, IWORK(M+1), INFO )
-      DEALLOCATE( ZDR )
-      DEALLOCATE( ZDL )
-      DEALLOCATE( ZDA )
-      DEALLOCATE( IWORK )
-
-      LZWORK = MAX(1,2*M)
-      ALLOCATE( ZEIGSA(M) )
-      ALLOCATE( ZWORK(LZWORK) )
-      ALLOCATE( WORK(2*M) )
-      ZAC(1:M,1:M) = ZA(1:M,1:M)
-      CALL ZGEEV( 'N','N', M, ZAC, LDA, ZEIGSA, ZDUM2X2, 2, &
-                  ZDUM2X2, 2, ZWORK, LZWORK, WORK, INFO ) ! LAPACK CALL
-      DEALLOCATE(WORK)
-      DEALLOCATE(ZWORK)
-
-      TMP = ABS(ZEIGSA(IZAMAX(M, ZEIGSA, 1))) ! The spectral radius of ZA
-      ! Scale the matrix ZA to have unit spectral radius.
-      CALL ZLASCL( 'G',0, 0, TMP, ONE, M, M, &
-                   ZA, LDA, INFO )
-      CALL ZLASCL( 'G',0, 0, TMP, ONE, M, 1, &
-                   ZEIGSA, M, INFO )
-      ANORM = ZLANGE( 'F', M, M, ZA, LDA, WDUMMY )
-
-      IF ( K_TRAJ == 2 ) THEN
-          ! generate data as two trajectories
-          ! with two inital conditions
-          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
-          DO i = 1, N/2
-             CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1,  &
-                  ZZERO, ZF(1,i+1), 1 )
-          END DO
-          ZX0(1:M,1:N/2) = ZF(1:M,1:N/2)
-          ZY0(1:M,1:N/2) = ZF(1:M,2:N/2+1)
-
-          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
-          DO i = 1, N-N/2
-             CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1,  &
-                  ZZERO, ZF(1,i+1), 1 )
-          END DO
-          ZX0(1:M,N/2+1:N) = ZF(1:M,1:N-N/2)
-          ZY0(1:M,N/2+1:N) = ZF(1:M,2:N-N/2+1)
-      ELSE
-          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
-          DO i = 1, N
-             CALL ZGEMV( 'N', M, M, ZONE, ZA, M, ZF(1,i), 1,  &
-                  ZZERO, ZF(1,i+1), 1 )
-          END DO
-          ZF0(1:M,1:N+1) = ZF(1:M,1:N+1)
-          ZX0(1:M,1:N) = ZF0(1:M,1:N)
-          ZY0(1:M,1:N) = ZF0(1:M,2:N+1)
-      END IF
-
-      DEALLOCATE( ZEIGSA )
-!........................................................................
-
-      DO iJOBZ = 1, 4
-
-          SELECT CASE ( iJOBZ )
-          CASE(1)
-              JOBZ   = 'V'
-              RESIDS = 'R'
-          CASE(2)
-              JOBZ   = 'V'
-              RESIDS = 'N'
-          CASE(3)
-              JOBZ   = 'F'
-              RESIDS = 'N'
-          CASE(4)
-              JOBZ   = 'N'
-              RESIDS = 'N'
-          END SELECT
-
-      DO iJOBREF = 1, 3
-
-          SELECT CASE ( iJOBREF )
-          CASE(1)
-              JOBREF = 'R'
-          CASE(2)
-              JOBREF = 'E'
-          CASE(3)
-              JOBREF = 'N'
-          END SELECT
-
-      DO iSCALE = 1, 4
-
-          SELECT CASE ( iSCALE )
-          CASE(1)
-              SCALE = 'S'
-          CASE(2)
-              SCALE = 'C'
-          CASE(3)
-              SCALE = 'Y'
-          CASE(4)
-              SCALE = 'N'
-          END SELECT
-
-      DO iNRNK = -1, -2, -1
-         NRNK   = iNRNK
-         NRNKsp = iNRNK
-
-      DO iWHTSVD = 1,  3
-         ! Check all four options to compute the POD basis
-         ! via the SVD.
-         WHTSVD   = iWHTSVD
-         WHTSVDsp = iWHTSVD
-
-      DO LWMINOPT = 1, 2
-         ! Workspace query for the minimal (1) and for the optimal
-         ! (2) workspace lengths determined by workspace query.
-
-      ! ZGEDMD is always tested and its results are also used for
-      ! comparisons with ZGEDMDQ.
-
-      ZX(1:M,1:N) = ZX0(1:M,1:N)
-      ZY(1:M,1:N) = ZY0(1:M,1:N)
-
-      CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,   &
-                   M,  N, ZX, LDX, ZY, LDY, NRNK, TOL,    &
-                   K, ZEIGS, ZZ, LDZ,  RES, ZAU, LDAU,    &
-                   ZW,  LDW, ZS, LDS,  ZDUMMY, -1,        &
-                   WDUMMY, -1, IDUMMY, -1, INFO )
-      IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) &
-                          .OR. ( INFO < 0 ) ) THEN
-           WRITE(*,*) 'Call to ZGEDMD workspace query failed. &
-                      &Check the calling sequence and the code.'
-           WRITE(*,*) 'The error code is ', INFO
-           WRITE(*,*) 'The input parameters were ',      &
-           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,          &
-           M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS
-           STOP
-      END IF
-
-      LZWORK = INT(ZDUMMY(LWMINOPT))
-      LWORK  = INT(WDUMMY(1))
-      LIWORK = IDUMMY(1)
-
-      ALLOCATE(ZWORK(LZWORK))
-      ALLOCATE( WORK(LWORK))
-      ALLOCATE(IWORK(LIWORK))
-
-      CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
-                   M,  N, ZX, LDX, ZY, LDY, NRNK, TOL,   &
-                   K, ZEIGS, ZZ, LDZ,  RES, ZAU, LDAU,   &
-                   ZW,  LDW,  ZS, LDS, ZWORK,  LZWORK,   &
-                   WORK, LWORK, IWORK, LIWORK, INFO )
-
-      IF ( INFO /= 0 ) THEN
-           WRITE(*,*) 'Call to ZGEDMD failed. &
-           &Check the calling sequence and the code.'
-           WRITE(*,*) 'The error code is ', INFO
-           WRITE(*,*) 'The input parameters were ',&
-           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-           M, N, LDX, LDY, NRNK, TOL
-           STOP
-      END IF
-
-      SINGVX(1:N) = WORK(1:N)
-
-      !...... ZGEDMD check point
-      IF ( LSAME(JOBZ,'V')  ) THEN
-          ! Check that Z = X*W, on return from ZGEDMD
-          ! This checks that the returned eigenvectors in Z are
-          ! the product of the SVD'POD basis returned in X
-          ! and the eigenvectors of the rayleigh quotient
-          ! returned in W
-          CALL ZGEMM( 'N', 'N', M, K, K, ZONE, ZX, LDX, ZW, LDW, &
-                      ZZERO, ZZ1, LDZ )
-          TMP = ZERO
-          DO i = 1, K
-             CALL ZAXPY( M, -ZONE, ZZ(1,i), 1, ZZ1(1,i), 1)
-             TMP = MAX(TMP, DZNRM2( M, ZZ1(1,i), 1 ) )
-          END DO
-          TMP_ZXW = MAX(TMP_ZXW, TMP )
-          IF ( TMP_ZXW <= 10*M*EPS ) THEN
-              !WRITE(*,*) ' :) .... OK .........ZGEDMD PASSED.'
-          ELSE
-              NFAIL_Z_XV = NFAIL_Z_XV + 1
-              WRITE(*,*) ':( .................ZGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-          END IF
-      END IF
-
-
-      !...... ZGEDMD check point
-      IF ( LSAME(JOBREF,'R') ) THEN
-           ! The matrix A*U is returned for computing refined Ritz vectors.
-           ! Check that A*U is computed correctly using the formula
-           ! A*U = Y * V * inv(SIGMA). This depends on the
-           ! accuracy in the computed singular values and vectors of X.
-           ! See the paper for an error analysis.
-           ! Note that the left singular vectors of the input matrix X
-           ! are returned in the array X.
-           CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZX, LDX, &
-                      ZZERO, ZZ1, LDZ )
-          TMP = ZERO
-          DO i = 1, K
-            CALL ZAXPY( M, -ZONE, ZAU(1,i), 1, ZZ1(1,i), 1)
-            TMP = MAX( TMP, DZNRM2( M, ZZ1(1,i),1 ) * &
-                     SINGVX(K)/(ANORM*SINGVX(1)) )
-          END DO
-          TMP_AU = MAX( TMP_AU, TMP )
-          IF ( TMP <= TOL2 ) THEN
-              !WRITE(*,*) ':) .... OK .........ZGEDMD PASSED.'
-          ELSE
-              NFAIL_AU = NFAIL_AU + 1
-              WRITE(*,*) ':( .................ZGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-          END IF
-      ELSEIF ( LSAME(JOBREF,'E') ) THEN
-       ! The unscaled vectors of the Exact DMD are computed.
-       ! This option is included for the sake of completeness,
-       ! for users who prefer the Exact DMD vectors. The
-       ! returned vectors are in the real form, in the same way
-       ! as the Ritz vectors. Here we just save the vectors
-       ! and test them separately using a Matlab script.
-
-
-       CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZAU, LDAU, ZZERO, ZY1, LDY )
-
-               DO i=1, K
-                  ! have a real eigenvalue with real eigenvector
-                CALL ZAXPY( M, -ZEIGS(i), ZAU(1,i), 1, ZY1(1,i), 1 )
-                RESEX(i) = DZNRM2( M, ZY1(1,i), 1) / DZNRM2(M,ZAU(1,i),1)
-               END DO
-      END IF
-      !...... ZGEDMD check point
-
-      IF ( LSAME(RESIDS, 'R') ) THEN
-          ! Compare the residuals returned by ZGEDMD with the
-          ! explicitly computed residuals using the matrix A.
-          ! Compute explicitly Y1 = A*Z
-          CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY )
-          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
-          ! of the invariant subspaces that correspond to complex conjugate
-          ! pairs of eigencalues. (See the description of Z in ZGEDMD,)
-
-          DO i=1, K
-                ! have a real eigenvalue with real eigenvector
-                CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 )
-                RES1(i) = DZNRM2( M, ZY1(1,i), 1)
-          END DO
-          TMP = ZERO
-          DO i = 1, K
-          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
-                    SINGVX(K)/(ANORM*SINGVX(1)) )
-          END DO
-          TMP_REZ = MAX( TMP_REZ, TMP )
-          IF ( TMP <= TOL2 ) THEN
-              !WRITE(*,*) ':) .... OK ..........ZGEDMD PASSED.'
-          ELSE
-              NFAIL_REZ = NFAIL_REZ + 1
-              WRITE(*,*) ':( ..................ZGEDMD FAILED!', &
-                  'Check the code for implementation errors.'
-              WRITE(*,*) 'The input parameters were ',&
-                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
-                 M, N, LDX, LDY, NRNK, TOL
-          END IF
-
-
-         IF ( LSAME(JOBREF,'E') ) THEN
-            TMP = ZERO
-          DO i = 1, K
-          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
-          END DO
-          TMP_EX = MAX(TMP_EX,TMP)
-         END IF
-
-      END IF
-
-      DEALLOCATE(ZWORK)
-      DEALLOCATE(WORK)
-      DEALLOCATE(IWORK)
-
-      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
-
-      ZF(1:M,1:N+1) = ZF0(1:M,1:N+1)
-
-      CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
-                    WHTSVD, M, N+1, ZF, LDF,  ZX, LDX,  ZY, LDY,  &
-                    NRNK,  TOL, K, ZEIGS, ZZ, LDZ, RES,  ZAU,  &
-                    LDAU, ZW, LDW, ZS, LDS, ZDUMMY, -1,   &
-                    WDUMMY,  -1, IDUMMY, -1, INFO )
-
-      LZWORK = INT(ZDUMMY(LWMINOPT))
-      ALLOCATE( ZWORK(LZWORK) )
-      LIWORK = IDUMMY(1)
-      ALLOCATE(IWORK(LIWORK))
-      LWORK = INT(WDUMMY(1))
-      ALLOCATE(WORK(LWORK))
-
-      CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
-                    WHTSVD, M, N+1, ZF, LDF,  ZX, LDX,  ZY, LDY,  &
-                    NRNK,  TOL, KQ, ZEIGS, ZZ, LDZ, RES,  ZAU,  &
-                    LDAU, ZW, LDW, ZS, LDS, ZWORK, LZWORK,   &
-                    WORK,  LWORK, IWORK, LIWORK, INFO )
-
-      IF ( INFO /= 0 ) THEN
-             WRITE(*,*) 'Call to ZGEDMDQ failed. &
-             &Check the calling sequence and the code.'
-             WRITE(*,*) 'The error code is ', INFO
-             WRITE(*,*) 'The input parameters were ',&
-             SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, &
-             M, N, LDX, LDY, NRNK, TOL
-             STOP
-      END IF
-      SINGVQX(1:N) = WORK(1:N)
-
-      !..... ZGEDMDQ check point
-
-          IF ( 1 == 0 ) THEN
-              ! Comparison of ZGEDMD and ZGEDMDQ singular values disabled
-          TMP = ZERO
-          DO i = 1, MIN(K, KQ)
-             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
-                                   SINGVX(1) )
-          END DO
-          SVDIFF = MAX( SVDIFF, TMP )
-          IF ( TMP > M*N*EPS ) THEN
-             WRITE(*,*) 'FAILED! Something was wrong with the run.'
-             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
-             DO j =1, 3
-                 write(*,*) j, SINGVX(j), SINGVQX(j)
-                 read(*,*)
-             END DO
-
-          END IF
-          END IF
-
-      !..... ZGEDMDQ check point
-      IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
-         ! Check that the QR factors are computed and returned
-         ! as requested. The residual ||F-Q*R||_F / ||F||_F
-         ! is compared to M*N*EPS.
-         ZF1(1:M,1:N+1) = ZF0(1:M,1:N+1)
-         CALL ZGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ZONE, ZF, &
-                     LDF, ZY, LDY, ZONE, ZF1, LDF )
-         TMP_FQR = ZLANGE( 'F', M, N+1, ZF1, LDF, WORK ) / &
-               ZLANGE( 'F', M, N+1, ZF0,  LDF, WORK )
-         IF ( TMP_FQR > TOL2 ) THEN
-              WRITE(*,*) 'FAILED! Something was wrong with the run.'
-             NFAIL_F_QR = NFAIL_F_QR + 1
-         ELSE
-             !WRITE(*,*) '........ PASSED.'
-         END IF
-      END IF
-
-      !..... ZGEDMDQ check point
-      IF ( LSAME(RESIDS, 'R') ) THEN
-          ! Compare the residuals returned by ZGEDMDQ with the
-          ! explicitly computed residuals using the matrix A.
-          ! Compute explicitly Y1 = A*Z
-          CALL ZGEMM( 'N', 'N', M, KQ, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY )
-          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
-          ! of the invariant subspaces that correspond to complex conjugate
-          ! pairs of eigencalues. (See the description of Z in ZGEDMDQ)
-
-          DO i=1, KQ
-                ! have a real eigenvalue with real eigenvector
-                CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 )
-                ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
-                RES1(i) = DZNRM2( M, ZY1(1,i), 1)
-          END DO
-          TMP = ZERO
-          DO i = 1, KQ
-          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
-              SINGVQX(KQ)/(ANORM*SINGVQX(1)) )
-          END DO
-          TMP_REZQ = MAX( TMP_REZQ, TMP )
-          IF ( TMP <= TOL2 ) THEN
-              !WRITE(*,*) '.... OK ........ ZGEDMDQ PASSED.'
-          ELSE
-              NFAIL_REZQ = NFAIL_REZQ + 1
-              WRITE(*,*) '................ ZGEDMDQ FAILED!', &
-                  'Check the code for implementation errors.'
-              STOP
-          END IF
-
-      END IF
-
-      DEALLOCATE( ZWORK )
-      DEALLOCATE( WORK  )
-      DEALLOCATE( IWORK )
-
-      END IF ! ZGEDMDQ
-
-!.......................................................................................................
-
-      END DO   ! LWMINOPT
-      !write(*,*) 'LWMINOPT loop completed'
-      END DO   ! iWHTSVD
-      !write(*,*) 'WHTSVD loop completed'
-      END DO   ! iNRNK  -2:-1
-      !write(*,*) 'NRNK loop completed'
-      END DO   ! iSCALE  1:4
-      !write(*,*) 'SCALE loop completed'
-      END DO
-      !write(*,*) 'JOBREF loop completed'
-      END DO   ! iJOBZ
-      !write(*,*) 'JOBZ loop completed'
-
-      END DO ! MODE -6:6
-      !write(*,*) 'MODE loop completed'
-      END DO ! 1 or 2 trajectories
-      !write(*,*) 'trajectories  loop completed'
-
-      DEALLOCATE( ZA )
-      DEALLOCATE( ZAC )
-      DEALLOCATE( ZZ )
-      DEALLOCATE( ZF )
-      DEALLOCATE( ZF0 )
-      DEALLOCATE( ZF1 )
-      DEALLOCATE( ZX )
-      DEALLOCATE( ZX0 )
-      DEALLOCATE( ZY )
-      DEALLOCATE( ZY0 )
-      DEALLOCATE( ZY1 )
-      DEALLOCATE( ZAU )
-      DEALLOCATE( ZW )
-      DEALLOCATE( ZS )
-      DEALLOCATE( ZZ1 )
-      DEALLOCATE( RES )
-      DEALLOCATE( RES1 )
-      DEALLOCATE( RESEX )
-      DEALLOCATE( ZEIGS )
-      DEALLOCATE( SINGVX )
-      DEALLOCATE( SINGVQX )
-
-      END DO ! LLOOP
-
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*) ' Test summary for ZGEDMD :'
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*)
-      IF ( NFAIL_Z_XV == 0 ) THEN
-         WRITE(*,*) '>>>> Z - U*V test PASSED.'
-      ELSE
-         WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
-         WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
-         NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
-      END IF
-      IF ( NFAIL_AU == 0 ) THEN
-        WRITE(*,*) '>>>> A*U test PASSED. '
-      ELSE
-        WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
-        WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
-        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
-      END IF
-
-      IF ( NFAIL_REZ == 0 ) THEN
-        WRITE(*,*) '>>>> Rezidual computation test PASSED.'
-      ELSE
-        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
-        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
-        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
-      END IF
-
-      IF ( NFAIL_TOTAL == 0 ) THEN
-        WRITE(*,*) '>>>> ZGEDMD :: ALL TESTS PASSED.'
-      ELSE
-        WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
-        WRITE(*,*) '>>>>>>>>>>>>>> ZGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
-      END IF
-
-      IF ( TEST_QRDMD ) THEN
-      WRITE(*,*)
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*) ' Test summary for ZGEDMDQ :'
-      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
-      WRITE(*,*)
-
-      IF ( NFAIL_SVDIFF == 0 ) THEN
-          WRITE(*,*) '>>>> ZGEDMD and ZGEDMDQ computed singular &
-              &values test PASSED.'
-      ELSE
-         WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in &
-             &the singular values unacceptable ', &
-             NFAIL_SVDIFF, ' times. Test FAILED.'
-         WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
-         WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-         NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
-      END IF
-
-      IF ( NFAIL_F_QR == 0 ) THEN
-          WRITE(*,*) '>>>> F - Q*R test PASSED.'
-      ELSE
-          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
-          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
-      END IF
-
-      IF ( NFAIL_REZQ == 0 ) THEN
-          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
-      ELSE
-          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
-          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
-          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
-          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
-      END IF
-
-      IF ( NFAILQ_TOTAL == 0 ) THEN
-          WRITE(*,*) '>>>>>>> ZGEDMDQ :: ALL TESTS PASSED.'
-      ELSE
-         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
-         WRITE(*,*) '>>>>>>> ZGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
-      END IF
-
-      END IF
-
-      WRITE(*,*)
-      WRITE(*,*) 'Test completed.'
-      STOP
-      END
+!     This is a test program for checking the implementations of
+!     the implementations of the following subroutines
+!
+!     ZGEDMD,  for computation of the
+!              Dynamic Mode Decomposition (DMD)
+!     ZGEDMDQ, for computation of a
+!              QR factorization based compressed DMD
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     ========================================================
+!     How to run the code (compiler, link info)
+!     ========================================================
+!     Compile as FORTRAN 90 (or later) and link with BLAS and
+!     LAPACK libraries.
+!     NOTE: The code is developed and tested on top of the
+!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
+!     using the Intel Fortran compiler.
+!
+!     For developers of the C++ implementation
+!     ========================================================
+!     See the LAPACK++ and Template Numerical Toolkit (TNT)
+!
+!     Note on a development of the GPU HP implementation
+!     ========================================================
+!     Work in progress. See CUDA, MAGMA, SLATE.
+!     NOTE: The four SVD subroutines used in this code are
+!     included as a part of R&D and for the completeness.
+!     This was also an opportunity to test those SVD codes.
+!     If the scaling option is used all four are essentially
+!     equally good. For implementations on HP platforms,
+!     one can use whichever SVD is available.
+!............................................................
+
+!............................................................
+!............................................................
+!
+      PROGRAM DMD_TEST
+      use iso_fortran_env, only: real64
+      IMPLICIT NONE
+      integer, parameter :: WP = real64
+
+!............................................................
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+
+      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!............................................................
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   :: RES, &
+                     RES1, RESEX, SINGVX, SINGVQX, WORK
+      INTEGER      , ALLOCATABLE, DIMENSION(:)   :: IWORK
+      REAL(KIND=WP) :: WDUMMY(2)
+      INTEGER       :: IDUMMY(4), ISEED(4)
+      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS,       &
+                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
+                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
+                       TMP_EX
+
+!............................................................
+      COMPLEX(KIND=WP) :: ZMAX
+      INTEGER :: LZWORK
+      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::  ZA, ZAC,  &
+                                 ZAU, ZF, ZF0, ZF1, ZS, ZW,       &
+                                 ZX, ZX0, ZY, ZY0, ZY1, ZZ, ZZ1
+      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::  ZDA, ZDR, &
+                                       ZDL, ZEIGS, ZEIGSA, ZWORK
+      COMPLEX(KIND=WP) ::  ZDUMMY(22), ZDUM2X2(2,2)
+!............................................................
+      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
+                 LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK, NRNKsp
+      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j,     &
+                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
+                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL,  &
+                 NFAIL_Z_XV,  MODE, MODEL, MODER, WHTSVD,     &
+                 WHTSVDsp
+      INTEGER :: iNRNK, iWHTSVD,  K_TRAJ, LWMINOPT
+      CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
+                       SCALE, RESIDS, WANTQ, WANTR
+      LOGICAL :: TEST_QRDMD
+
+!.....external subroutines (BLAS and LAPACK)
+      EXTERNAL DAXPY,  DGEEV, DGEMM, DGEMV, DLACPY, DLASCL
+      EXTERNAL ZGEEV,  ZGEMV, ZLASCL
+      EXTERNAL ZLARNV, ZLATMR
+      EXTERNAL ZAXPY,  ZGEMM
+!.....external subroutines DMD package, part 1
+!     subroutines under test
+      EXTERNAL ZGEDMD, ZGEDMDQ
+!.....external functions (BLAS and LAPACK)
+      EXTERNAL         DLAMCH,  DZNRM2
+      REAL(KIND=WP) :: DLAMCH,  DZNRM2
+      REAL(KIND=WP) ::          ZLANGE
+      EXTERNAL IZAMAX
+      INTEGER  IZAMAX
+      EXTERNAL LSAME
+      LOGICAL  LSAME
+
+      INTRINSIC ABS, INT, MIN, MAX, SIGN
+!............................................................
+
+      ! The test is always in pairs : ( ZGEDMD and ZGEDMDQ )
+      ! because the test includes comparing the results (in pairs).
+!.....................................................................................
+      TEST_QRDMD = .TRUE. ! This code by default performs tests on ZGEDMDQ
+                          ! Since the QR factorizations based algorithm is designed for
+                          ! single trajectory data, only single trajectory tests will
+                          ! be performed with xGEDMDQ.
+      WANTQ = 'Q'
+      WANTR = 'R'
+!.................................................................................
+
+      EPS = DLAMCH( 'P' )  ! machine precision DP
+
+      ! Global counters of failures of some particular tests
+      NFAIL      = 0
+      NFAIL_REZ  = 0
+      NFAIL_REZQ = 0
+      NFAIL_Z_XV = 0
+      NFAIL_F_QR = 0
+      NFAIL_AU   = 0
+      NFAIL_SVDIFF = 0
+      NFAIL_TOTAL  = 0
+      NFAILQ_TOTAL = 0
+
+      DO LLOOP = 1, 4
+
+      WRITE(*,*) 'L Loop Index = ', LLOOP
+
+      ! Set the dimensions of the problem ...
+      WRITE(*,*) 'M = '
+      READ(*,*) M
+      WRITE(*,*) M
+      ! ... and the number of snapshots.
+      WRITE(*,*) 'N = '
+      READ(*,*) N
+      WRITE(*,*) N
+
+      ! ... Test the dimensions
+      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
+          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
+          STOP
+      END IF
+!.............
+      ! The seed inside the LLOOP so that each pass can be reproduced easily.
+      ISEED(1) = 4
+      ISEED(2) = 3
+      ISEED(3) = 2
+      ISEED(4) = 1
+
+      LDA  = M
+      LDF  = M
+      LDX  = M
+      LDY  = M
+      LDW  = N
+      LDZ  = M
+      LDAU = M
+      LDS  = N
+
+      TMP_ZXW  = ZERO
+      TMP_AU   = ZERO
+      TMP_REZ  = ZERO
+      TMP_REZQ = ZERO
+      SVDIFF   = ZERO
+      TMP_EX   = ZERO
+
+      ALLOCATE( ZA(LDA,M) )
+      ALLOCATE( ZAC(LDA,M) )
+      ALLOCATE( ZF(LDF,N+1) )
+      ALLOCATE( ZF0(LDF,N+1) )
+      ALLOCATE( ZF1(LDF,N+1) )
+      ALLOCATE( ZX(LDX,N) )
+      ALLOCATE( ZX0(LDX,N) )
+      ALLOCATE( ZY(LDY,N+1) )
+      ALLOCATE( ZY0(LDY,N+1) )
+      ALLOCATE( ZY1(LDY,N+1) )
+      ALLOCATE( ZAU(LDAU,N) )
+      ALLOCATE( ZW(LDW,N) )
+      ALLOCATE( ZS(LDS,N) )
+      ALLOCATE( ZZ(LDZ,N) )
+      ALLOCATE( ZZ1(LDZ,N) )
+      ALLOCATE( RES(N) )
+      ALLOCATE( RES1(N) )
+      ALLOCATE( RESEX(N) )
+      ALLOCATE( ZEIGS(N) )
+      ALLOCATE( SINGVX(N) )
+      ALLOCATE( SINGVQX(N) )
+
+      TOL  = M*EPS
+      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
+      ! The factor 10 is somewhat arbitrary.
+      TOL2 = 10*M*N*EPS
+
+!.............
+
+      DO K_TRAJ = 1, 2
+      !  Number of intial conditions in the simulation/trajectories (1 or 2)
+
+      COND = 1.0D4
+      ZMAX = (1.0D1,1.0D1)
+      RSIGN = 'F'
+      GRADE = 'N'
+      MODEL = 6
+      CONDL = 1.0D1
+      MODER = 6
+      CONDR = 1.0D1
+      PIVTNG = 'N'
+
+      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
+      DO MODE = 1, 6
+
+      ALLOCATE( IWORK(2*M) )
+      ALLOCATE( ZDA(M) )
+      ALLOCATE( ZDL(M) )
+      ALLOCATE( ZDR(M) )
+
+      CALL ZLATMR( M, M, 'N', ISEED, 'N', ZDA, MODE, COND, &
+                   ZMAX, RSIGN, GRADE, ZDL, MODEL,  CONDL, &
+                   ZDR, MODER, CONDR, PIVTNG, IWORK, M, M, &
+                   ZERO, -ONE, 'N', ZA, LDA, IWORK(M+1), INFO )
+      DEALLOCATE( ZDR )
+      DEALLOCATE( ZDL )
+      DEALLOCATE( ZDA )
+      DEALLOCATE( IWORK )
+
+      LZWORK = MAX(1,2*M)
+      ALLOCATE( ZEIGSA(M) )
+      ALLOCATE( ZWORK(LZWORK) )
+      ALLOCATE( WORK(2*M) )
+      ZAC(1:M,1:M) = ZA(1:M,1:M)
+      CALL ZGEEV( 'N','N', M, ZAC, LDA, ZEIGSA, ZDUM2X2, 2, &
+                  ZDUM2X2, 2, ZWORK, LZWORK, WORK, INFO ) ! LAPACK CALL
+      DEALLOCATE(WORK)
+      DEALLOCATE(ZWORK)
+
+      TMP = ABS(ZEIGSA(IZAMAX(M, ZEIGSA, 1))) ! The spectral radius of ZA
+      ! Scale the matrix ZA to have unit spectral radius.
+      CALL ZLASCL( 'G',0, 0, TMP, ONE, M, M, &
+                   ZA, LDA, INFO )
+      CALL ZLASCL( 'G',0, 0, TMP, ONE, M, 1, &
+                   ZEIGSA, M, INFO )
+      ANORM = ZLANGE( 'F', M, M, ZA, LDA, WDUMMY )
+
+      IF ( K_TRAJ == 2 ) THEN
+          ! generate data as two trajectories
+          ! with two inital conditions
+          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
+          DO i = 1, N/2
+             CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1,  &
+                  ZZERO, ZF(1,i+1), 1 )
+          END DO
+          ZX0(1:M,1:N/2) = ZF(1:M,1:N/2)
+          ZY0(1:M,1:N/2) = ZF(1:M,2:N/2+1)
+
+          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
+          DO i = 1, N-N/2
+             CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1,  &
+                  ZZERO, ZF(1,i+1), 1 )
+          END DO
+          ZX0(1:M,N/2+1:N) = ZF(1:M,1:N-N/2)
+          ZY0(1:M,N/2+1:N) = ZF(1:M,2:N-N/2+1)
+      ELSE
+          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
+          DO i = 1, N
+             CALL ZGEMV( 'N', M, M, ZONE, ZA, M, ZF(1,i), 1,  &
+                  ZZERO, ZF(1,i+1), 1 )
+          END DO
+          ZF0(1:M,1:N+1) = ZF(1:M,1:N+1)
+          ZX0(1:M,1:N) = ZF0(1:M,1:N)
+          ZY0(1:M,1:N) = ZF0(1:M,2:N+1)
+      END IF
+
+      DEALLOCATE( ZEIGSA )
+!........................................................................
+
+      DO iJOBZ = 1, 4
+
+          SELECT CASE ( iJOBZ )
+          CASE(1)
+              JOBZ   = 'V'
+              RESIDS = 'R'
+          CASE(2)
+              JOBZ   = 'V'
+              RESIDS = 'N'
+          CASE(3)
+              JOBZ   = 'F'
+              RESIDS = 'N'
+          CASE(4)
+              JOBZ   = 'N'
+              RESIDS = 'N'
+          END SELECT
+
+      DO iJOBREF = 1, 3
+
+          SELECT CASE ( iJOBREF )
+          CASE(1)
+              JOBREF = 'R'
+          CASE(2)
+              JOBREF = 'E'
+          CASE(3)
+              JOBREF = 'N'
+          END SELECT
+
+      DO iSCALE = 1, 4
+
+          SELECT CASE ( iSCALE )
+          CASE(1)
+              SCALE = 'S'
+          CASE(2)
+              SCALE = 'C'
+          CASE(3)
+              SCALE = 'Y'
+          CASE(4)
+              SCALE = 'N'
+          END SELECT
+
+      DO iNRNK = -1, -2, -1
+         NRNK   = iNRNK
+         NRNKsp = iNRNK
+
+      DO iWHTSVD = 1,  3
+         ! Check all four options to compute the POD basis
+         ! via the SVD.
+         WHTSVD   = iWHTSVD
+         WHTSVDsp = iWHTSVD
+
+      DO LWMINOPT = 1, 2
+         ! Workspace query for the minimal (1) and for the optimal
+         ! (2) workspace lengths determined by workspace query.
+
+      ! ZGEDMD is always tested and its results are also used for
+      ! comparisons with ZGEDMDQ.
+
+      ZX(1:M,1:N) = ZX0(1:M,1:N)
+      ZY(1:M,1:N) = ZY0(1:M,1:N)
+
+      CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,   &
+                   M,  N, ZX, LDX, ZY, LDY, NRNK, TOL,    &
+                   K, ZEIGS, ZZ, LDZ,  RES, ZAU, LDAU,    &
+                   ZW,  LDW, ZS, LDS,  ZDUMMY, -1,        &
+                   WDUMMY, -1, IDUMMY, -1, INFO )
+      IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) &
+                          .OR. ( INFO < 0 ) ) THEN
+           WRITE(*,*) 'Call to ZGEDMD workspace query failed. &
+                      &Check the calling sequence and the code.'
+           WRITE(*,*) 'The error code is ', INFO
+           WRITE(*,*) 'The input parameters were ',      &
+           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,          &
+           M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS
+           STOP
+      END IF
+
+      LZWORK = INT(ZDUMMY(LWMINOPT))
+      LWORK  = INT(WDUMMY(1))
+      LIWORK = IDUMMY(1)
+
+      ALLOCATE(ZWORK(LZWORK))
+      ALLOCATE( WORK(LWORK))
+      ALLOCATE(IWORK(LIWORK))
+
+      CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
+                   M,  N, ZX, LDX, ZY, LDY, NRNK, TOL,   &
+                   K, ZEIGS, ZZ, LDZ,  RES, ZAU, LDAU,   &
+                   ZW,  LDW,  ZS, LDS, ZWORK,  LZWORK,   &
+                   WORK, LWORK, IWORK, LIWORK, INFO )
+
+      IF ( INFO /= 0 ) THEN
+           WRITE(*,*) 'Call to ZGEDMD failed. &
+           &Check the calling sequence and the code.'
+           WRITE(*,*) 'The error code is ', INFO
+           WRITE(*,*) 'The input parameters were ',&
+           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+           M, N, LDX, LDY, NRNK, TOL
+           STOP
+      END IF
+
+      SINGVX(1:N) = WORK(1:N)
+
+      !...... ZGEDMD check point
+      IF ( LSAME(JOBZ,'V')  ) THEN
+          ! Check that Z = X*W, on return from ZGEDMD
+          ! This checks that the returned eigenvectors in Z are
+          ! the product of the SVD'POD basis returned in X
+          ! and the eigenvectors of the rayleigh quotient
+          ! returned in W
+          CALL ZGEMM( 'N', 'N', M, K, K, ZONE, ZX, LDX, ZW, LDW, &
+                      ZZERO, ZZ1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL ZAXPY( M, -ZONE, ZZ(1,i), 1, ZZ1(1,i), 1)
+             TMP = MAX(TMP, DZNRM2( M, ZZ1(1,i), 1 ) )
+          END DO
+          TMP_ZXW = MAX(TMP_ZXW, TMP )
+          IF ( TMP_ZXW <= 10*M*EPS ) THEN
+              !WRITE(*,*) ' :) .... OK .........ZGEDMD PASSED.'
+          ELSE
+              NFAIL_Z_XV = NFAIL_Z_XV + 1
+              WRITE(*,*) ':( .................ZGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+      END IF
+
+
+      !...... ZGEDMD check point
+      IF ( LSAME(JOBREF,'R') ) THEN
+           ! The matrix A*U is returned for computing refined Ritz vectors.
+           ! Check that A*U is computed correctly using the formula
+           ! A*U = Y * V * inv(SIGMA). This depends on the
+           ! accuracy in the computed singular values and vectors of X.
+           ! See the paper for an error analysis.
+           ! Note that the left singular vectors of the input matrix X
+           ! are returned in the array X.
+           CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZX, LDX, &
+                      ZZERO, ZZ1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+            CALL ZAXPY( M, -ZONE, ZAU(1,i), 1, ZZ1(1,i), 1)
+            TMP = MAX( TMP, DZNRM2( M, ZZ1(1,i),1 ) * &
+                     SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_AU = MAX( TMP_AU, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) ':) .... OK .........ZGEDMD PASSED.'
+          ELSE
+              NFAIL_AU = NFAIL_AU + 1
+              WRITE(*,*) ':( .................ZGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+      ELSEIF ( LSAME(JOBREF,'E') ) THEN
+       ! The unscaled vectors of the Exact DMD are computed.
+       ! This option is included for the sake of completeness,
+       ! for users who prefer the Exact DMD vectors. The
+       ! returned vectors are in the real form, in the same way
+       ! as the Ritz vectors. Here we just save the vectors
+       ! and test them separately using a Matlab script.
+
+
+       CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZAU, LDAU, ZZERO, ZY1, LDY )
+
+               DO i=1, K
+                  ! have a real eigenvalue with real eigenvector
+                CALL ZAXPY( M, -ZEIGS(i), ZAU(1,i), 1, ZY1(1,i), 1 )
+                RESEX(i) = DZNRM2( M, ZY1(1,i), 1) / DZNRM2(M,ZAU(1,i),1)
+               END DO
+      END IF
+      !...... ZGEDMD check point
+
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by ZGEDMD with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in ZGEDMD,)
+
+          DO i=1, K
+                ! have a real eigenvalue with real eigenvector
+                CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 )
+                RES1(i) = DZNRM2( M, ZY1(1,i), 1)
+          END DO
+          TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                    SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_REZ = MAX( TMP_REZ, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) ':) .... OK ..........ZGEDMD PASSED.'
+          ELSE
+              NFAIL_REZ = NFAIL_REZ + 1
+              WRITE(*,*) ':( ..................ZGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+
+         IF ( LSAME(JOBREF,'E') ) THEN
+            TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
+          END DO
+          TMP_EX = MAX(TMP_EX,TMP)
+         END IF
+
+      END IF
+
+      DEALLOCATE(ZWORK)
+      DEALLOCATE(WORK)
+      DEALLOCATE(IWORK)
+
+      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
+
+      ZF(1:M,1:N+1) = ZF0(1:M,1:N+1)
+
+      CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
+                    WHTSVD, M, N+1, ZF, LDF,  ZX, LDX,  ZY, LDY,  &
+                    NRNK,  TOL, K, ZEIGS, ZZ, LDZ, RES,  ZAU,  &
+                    LDAU, ZW, LDW, ZS, LDS, ZDUMMY, -1,   &
+                    WDUMMY,  -1, IDUMMY, -1, INFO )
+
+      LZWORK = INT(ZDUMMY(LWMINOPT))
+      ALLOCATE( ZWORK(LZWORK) )
+      LIWORK = IDUMMY(1)
+      ALLOCATE(IWORK(LIWORK))
+      LWORK = INT(WDUMMY(1))
+      ALLOCATE(WORK(LWORK))
+
+      CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
+                    WHTSVD, M, N+1, ZF, LDF,  ZX, LDX,  ZY, LDY,  &
+                    NRNK,  TOL, KQ, ZEIGS, ZZ, LDZ, RES,  ZAU,  &
+                    LDAU, ZW, LDW, ZS, LDS, ZWORK, LZWORK,   &
+                    WORK,  LWORK, IWORK, LIWORK, INFO )
+
+      IF ( INFO /= 0 ) THEN
+             WRITE(*,*) 'Call to ZGEDMDQ failed. &
+             &Check the calling sequence and the code.'
+             WRITE(*,*) 'The error code is ', INFO
+             WRITE(*,*) 'The input parameters were ',&
+             SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, &
+             M, N, LDX, LDY, NRNK, TOL
+             STOP
+      END IF
+      SINGVQX(1:N) = WORK(1:N)
+
+      !..... ZGEDMDQ check point
+
+          IF ( 1 == 0 ) THEN
+              ! Comparison of ZGEDMD and ZGEDMDQ singular values disabled
+          TMP = ZERO
+          DO i = 1, MIN(K, KQ)
+             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
+                                   SINGVX(1) )
+          END DO
+          SVDIFF = MAX( SVDIFF, TMP )
+          IF ( TMP > M*N*EPS ) THEN
+             WRITE(*,*) 'FAILED! Something was wrong with the run.'
+             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
+             DO j =1, 3
+                 write(*,*) j, SINGVX(j), SINGVQX(j)
+                 read(*,*)
+             END DO
+
+          END IF
+          END IF
+
+      !..... ZGEDMDQ check point
+      IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
+         ! Check that the QR factors are computed and returned
+         ! as requested. The residual ||F-Q*R||_F / ||F||_F
+         ! is compared to M*N*EPS.
+         ZF1(1:M,1:N+1) = ZF0(1:M,1:N+1)
+         CALL ZGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ZONE, ZF, &
+                     LDF, ZY, LDY, ZONE, ZF1, LDF )
+         TMP_FQR = ZLANGE( 'F', M, N+1, ZF1, LDF, WORK ) / &
+               ZLANGE( 'F', M, N+1, ZF0,  LDF, WORK )
+         IF ( TMP_FQR > TOL2 ) THEN
+              WRITE(*,*) 'FAILED! Something was wrong with the run.'
+             NFAIL_F_QR = NFAIL_F_QR + 1
+         ELSE
+             !WRITE(*,*) '........ PASSED.'
+         END IF
+      END IF
+
+      !..... ZGEDMDQ check point
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by ZGEDMDQ with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL ZGEMM( 'N', 'N', M, KQ, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in ZGEDMDQ)
+
+          DO i=1, KQ
+                ! have a real eigenvalue with real eigenvector
+                CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 )
+                ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
+                RES1(i) = DZNRM2( M, ZY1(1,i), 1)
+          END DO
+          TMP = ZERO
+          DO i = 1, KQ
+          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+              SINGVQX(KQ)/(ANORM*SINGVQX(1)) )
+          END DO
+          TMP_REZQ = MAX( TMP_REZQ, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) '.... OK ........ ZGEDMDQ PASSED.'
+          ELSE
+              NFAIL_REZQ = NFAIL_REZQ + 1
+              WRITE(*,*) '................ ZGEDMDQ FAILED!', &
+                  'Check the code for implementation errors.'
+              STOP
+          END IF
+
+      END IF
+
+      DEALLOCATE( ZWORK )
+      DEALLOCATE( WORK  )
+      DEALLOCATE( IWORK )
+
+      END IF ! ZGEDMDQ
+
+!.......................................................................................................
+
+      END DO   ! LWMINOPT
+      !write(*,*) 'LWMINOPT loop completed'
+      END DO   ! iWHTSVD
+      !write(*,*) 'WHTSVD loop completed'
+      END DO   ! iNRNK  -2:-1
+      !write(*,*) 'NRNK loop completed'
+      END DO   ! iSCALE  1:4
+      !write(*,*) 'SCALE loop completed'
+      END DO
+      !write(*,*) 'JOBREF loop completed'
+      END DO   ! iJOBZ
+      !write(*,*) 'JOBZ loop completed'
+
+      END DO ! MODE -6:6
+      !write(*,*) 'MODE loop completed'
+      END DO ! 1 or 2 trajectories
+      !write(*,*) 'trajectories  loop completed'
+
+      DEALLOCATE( ZA )
+      DEALLOCATE( ZAC )
+      DEALLOCATE( ZZ )
+      DEALLOCATE( ZF )
+      DEALLOCATE( ZF0 )
+      DEALLOCATE( ZF1 )
+      DEALLOCATE( ZX )
+      DEALLOCATE( ZX0 )
+      DEALLOCATE( ZY )
+      DEALLOCATE( ZY0 )
+      DEALLOCATE( ZY1 )
+      DEALLOCATE( ZAU )
+      DEALLOCATE( ZW )
+      DEALLOCATE( ZS )
+      DEALLOCATE( ZZ1 )
+      DEALLOCATE( RES )
+      DEALLOCATE( RES1 )
+      DEALLOCATE( RESEX )
+      DEALLOCATE( ZEIGS )
+      DEALLOCATE( SINGVX )
+      DEALLOCATE( SINGVQX )
+
+      END DO ! LLOOP
+
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for ZGEDMD :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+      IF ( NFAIL_Z_XV == 0 ) THEN
+         WRITE(*,*) '>>>> Z - U*V test PASSED.'
+      ELSE
+         WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
+         WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
+         NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
+      END IF
+      IF ( NFAIL_AU == 0 ) THEN
+        WRITE(*,*) '>>>> A*U test PASSED. '
+      ELSE
+        WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
+        WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
+      END IF
+
+      IF ( NFAIL_REZ == 0 ) THEN
+        WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
+        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
+      END IF
+
+      IF ( NFAIL_TOTAL == 0 ) THEN
+        WRITE(*,*) '>>>> ZGEDMD :: ALL TESTS PASSED.'
+      ELSE
+        WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
+        WRITE(*,*) '>>>>>>>>>>>>>> ZGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      IF ( TEST_QRDMD ) THEN
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for ZGEDMDQ :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+
+      IF ( NFAIL_SVDIFF == 0 ) THEN
+          WRITE(*,*) '>>>> ZGEDMD and ZGEDMDQ computed singular &
+              &values test PASSED.'
+      ELSE
+         WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in &
+             &the singular values unacceptable ', &
+             NFAIL_SVDIFF, ' times. Test FAILED.'
+         WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
+         WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+         NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
+      END IF
+
+      IF ( NFAIL_F_QR == 0 ) THEN
+          WRITE(*,*) '>>>> F - Q*R test PASSED.'
+      ELSE
+          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
+          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
+      END IF
+
+      IF ( NFAIL_REZQ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
+      END IF
+
+      IF ( NFAILQ_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>>>>> ZGEDMDQ :: ALL TESTS PASSED.'
+      ELSE
+         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
+         WRITE(*,*) '>>>>>>> ZGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) 'Test completed.'
+      STOP
+      END

From aa41ed6b5eddbc0f0003f92809a19edec8eda64c Mon Sep 17 00:00:00 2001
From: Kyle Guinn <elyk03@gmail.com>
Date: Tue, 16 Jan 2024 22:01:41 -0600
Subject: [PATCH 2/4] Fix ZWORK parameter name

SRC/cgedmdq.f90:548: warning: argument 'lzwork' from the argument list of cgedmdq has multiple @param documentation sections
SRC/zgedmdq.f90:546: warning: argument 'lzwork' from the argument list of zgedmdq has multiple @param documentation sections
---
 SRC/cgedmdq.f90 | 2 +-
 SRC/zgedmdq.f90 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/SRC/cgedmdq.f90 b/SRC/cgedmdq.f90
index b4eebee5dc..dd70a34a9e 100644
--- a/SRC/cgedmdq.f90
+++ b/SRC/cgedmdq.f90
@@ -433,7 +433,7 @@
 !>    The leading dimension of the array S.
 !>    \endverbatim
 !.....
-!>    \param[out] LZWORK
+!>    \param[out] ZWORK
 !>    \verbatim
 !>    ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
 !>    On exit,
diff --git a/SRC/zgedmdq.f90 b/SRC/zgedmdq.f90
index 606c5666e7..ddea5c8375 100644
--- a/SRC/zgedmdq.f90
+++ b/SRC/zgedmdq.f90
@@ -432,7 +432,7 @@
 !>    The leading dimension of the array S.
 !>    \endverbatim
 !.....
-!>    \param[out] LZWORK
+!>    \param[out] ZWORK
 !>    \verbatim
 !>    ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
 !>    On exit,

From 6597d4803b99be03cea40f38f376230efd7c029a Mon Sep 17 00:00:00 2001
From: Kyle Guinn <elyk03@gmail.com>
Date: Tue, 16 Jan 2024 22:16:55 -0600
Subject: [PATCH 3/4] Fix mismatched verbatim/endverbatim commands

SRC/zgedmd.f90:213: warning: reached end of comment while inside a \verbatim block; check for missing \endverbatim tag!
SRC/zgedmdq.f90:710: warning: unexpected command endverbatim
---
 SRC/zgedmd.f90  | 5 ++++-
 SRC/zgedmdq.f90 | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/SRC/zgedmd.f90 b/SRC/zgedmd.f90
index 385b82061f..a2af6e04b9 100644
--- a/SRC/zgedmd.f90
+++ b/SRC/zgedmd.f90
@@ -199,7 +199,7 @@
 !>    (the number of columns of X and Y).
 !>    \endverbatim
 !.....
-!>    \param[in] LDX
+!>    \param[in,out] X
 !>    \verbatim
 !>    X (input/output) COMPLEX(KIND=WP) M-by-N array
 !>    > On entry, X contains the data snapshot matrix X. It is
@@ -210,7 +210,10 @@
 !>    data matrix X, U(:,1:K). All N columns of X contain all
 !>    left singular vectors of the input matrix X.
 !>    See the descriptions of K, Z and W.
+!>    \endverbatim
 !.....
+!>    \param[in] LDX
+!>    \verbatim
 !>    LDX (input) INTEGER, LDX >= M
 !>    The leading dimension of the array X.
 !>    \endverbatim
diff --git a/SRC/zgedmdq.f90 b/SRC/zgedmdq.f90
index ddea5c8375..c16288d0fa 100644
--- a/SRC/zgedmdq.f90
+++ b/SRC/zgedmdq.f90
@@ -196,6 +196,7 @@
 !>    \endverbatim
 !.....
 !>    \param[in] WHTSVD
+!>    \verbatim
 !>    WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
 !>    Allows for a selection of the SVD algorithm from the
 !>    LAPACK library.

From dead4e4dfd63593c8720479c007fe01143e23478 Mon Sep 17 00:00:00 2001
From: Kyle Guinn <elyk03@gmail.com>
Date: Tue, 16 Jan 2024 22:26:15 -0600
Subject: [PATCH 4/4] Fix missing DONE parameter name

SRC/slaqp3rk.f:585: warning: unexpected command endverbatim
SRC/dlaqp3rk.f:585: warning: unexpected command endverbatim
SRC/claqp3rk.f:579: warning: unexpected command endverbatim
SRC/zlaqp3rk.f:579: warning: unexpected command endverbatim
---
 SRC/claqp3rk.f | 2 +-
 SRC/dlaqp3rk.f | 2 +-
 SRC/slaqp3rk.f | 2 +-
 SRC/zlaqp3rk.f | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/SRC/claqp3rk.f b/SRC/claqp3rk.f
index 8fe5a220ff..a381c53f88 100644
--- a/SRC/claqp3rk.f
+++ b/SRC/claqp3rk.f
@@ -217,7 +217,7 @@
 *>          The leading dimension of the array A. LDA >= max(1,M).
 *> \endverbatim
 *>
-*> \param[out]
+*> \param[out] DONE
 *> \verbatim
 *>          DONE is LOGICAL
 *>          TRUE: a) if the factorization completed before processing
diff --git a/SRC/dlaqp3rk.f b/SRC/dlaqp3rk.f
index 8139345ed7..73926ebd37 100644
--- a/SRC/dlaqp3rk.f
+++ b/SRC/dlaqp3rk.f
@@ -223,7 +223,7 @@
 *>          The leading dimension of the array A. LDA >= max(1,M).
 *> \endverbatim
 *>
-*> \param[out]
+*> \param[out] DONE
 *> \verbatim
 *>          DONE is LOGICAL
 *>          TRUE: a) if the factorization completed before processing
diff --git a/SRC/slaqp3rk.f b/SRC/slaqp3rk.f
index b2dc2b334c..08b8bfcbdd 100644
--- a/SRC/slaqp3rk.f
+++ b/SRC/slaqp3rk.f
@@ -223,7 +223,7 @@
 *>          The leading dimension of the array A. LDA >= max(1,M).
 *> \endverbatim
 *>
-*> \param[out]
+*> \param[out] DONE
 *> \verbatim
 *>          DONE is LOGICAL
 *>          TRUE: a) if the factorization completed before processing
diff --git a/SRC/zlaqp3rk.f b/SRC/zlaqp3rk.f
index 0dd8bf8e35..28bc517c3c 100644
--- a/SRC/zlaqp3rk.f
+++ b/SRC/zlaqp3rk.f
@@ -217,7 +217,7 @@
 *>          The leading dimension of the array A. LDA >= max(1,M).
 *> \endverbatim
 *>
-*> \param[out]
+*> \param[out] DONE
 *> \verbatim
 *>          DONE is LOGICAL
 *>          TRUE: a) if the factorization completed before processing