From ef93c62eb7742ada69c082c8e21af7b4b7a951d1 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Sun, 29 Dec 2019 23:31:40 +0100
Subject: [PATCH] Update LAPACK to 3.9.0

---
 lapack-netlib/SRC/ilaenv.f               |   17 +-
 lapack-netlib/SRC/ilaenv2stage.f         |    4 +-
 lapack-netlib/SRC/iparam2stage.F         |   10 +-
 lapack-netlib/SRC/iparmq.f               |    6 +-
 lapack-netlib/SRC/meson.build            |   11 +
 lapack-netlib/SRC/sbdsvdx.f              |    2 +-
 lapack-netlib/SRC/scombssq.f             |   92 ++
 lapack-netlib/SRC/sgbrfsx.f              |   10 +-
 lapack-netlib/SRC/sgbsvxx.f              |   10 +-
 lapack-netlib/SRC/sgebak.f               |    8 +-
 lapack-netlib/SRC/sgeesx.f               |    4 +-
 lapack-netlib/SRC/sgejsv.f               |   54 +-
 lapack-netlib/SRC/sgelq.f                |   19 +-
 lapack-netlib/SRC/sgelq2.f               |   18 +-
 lapack-netlib/SRC/sgelqf.f               |   16 +-
 lapack-netlib/SRC/sgelqt.f               |    2 +
 lapack-netlib/SRC/sgelqt3.f              |    2 +
 lapack-netlib/SRC/sgemlq.f               |    3 +-
 lapack-netlib/SRC/sgemlqt.f              |    2 +
 lapack-netlib/SRC/sgemqr.f               |    3 +-
 lapack-netlib/SRC/sgeqr.f                |   20 +-
 lapack-netlib/SRC/sgeqr2.f               |   19 +-
 lapack-netlib/SRC/sgeqr2p.f              |   20 +-
 lapack-netlib/SRC/sgeqrf.f               |   17 +-
 lapack-netlib/SRC/sgeqrfp.f              |   20 +-
 lapack-netlib/SRC/sgerfsx.f              |   10 +-
 lapack-netlib/SRC/sgesc2.f               |    4 +-
 lapack-netlib/SRC/sgesdd.f               |    4 +-
 lapack-netlib/SRC/sgesvdq.f              | 1388 ++++++++++++++++++++++
 lapack-netlib/SRC/sgesvj.f               |   44 +-
 lapack-netlib/SRC/sgesvxx.f              |   10 +-
 lapack-netlib/SRC/sgetc2.f               |    2 +-
 lapack-netlib/SRC/sgetsls.f              |    4 +-
 lapack-netlib/SRC/sggesx.f               |    8 +-
 lapack-netlib/SRC/sgsvj0.f               |   20 +-
 lapack-netlib/SRC/sgsvj1.f               |   20 +-
 lapack-netlib/SRC/shseqr.f               |   46 +-
 lapack-netlib/SRC/sla_gbrcond.f          |    4 +-
 lapack-netlib/SRC/sla_gbrfsx_extended.f  |   12 +-
 lapack-netlib/SRC/sla_gercond.f          |    4 +-
 lapack-netlib/SRC/sla_gerfsx_extended.f  |   12 +-
 lapack-netlib/SRC/sla_porcond.f          |    4 +-
 lapack-netlib/SRC/sla_porfsx_extended.f  |    8 +-
 lapack-netlib/SRC/sla_syrcond.f          |    4 +-
 lapack-netlib/SRC/sla_syrfsx_extended.f  |    8 +-
 lapack-netlib/SRC/sla_syrpvgrw.f         |    2 +-
 lapack-netlib/SRC/sla_wwaddw.f           |    2 +-
 lapack-netlib/SRC/slaed4.f               |    2 +-
 lapack-netlib/SRC/slaed8.f               |    2 +-
 lapack-netlib/SRC/slagtf.f               |    6 +-
 lapack-netlib/SRC/slagts.f               |   20 +-
 lapack-netlib/SRC/slahqr.f               |   14 +-
 lapack-netlib/SRC/slaln2.f               |    2 +-
 lapack-netlib/SRC/slamswlq.f             |    1 +
 lapack-netlib/SRC/slamtsqr.f             |    1 +
 lapack-netlib/SRC/slangb.f               |   26 +-
 lapack-netlib/SRC/slange.f               |   22 +-
 lapack-netlib/SRC/slanhs.f               |   25 +-
 lapack-netlib/SRC/slansb.f               |   44 +-
 lapack-netlib/SRC/slansp.f               |   48 +-
 lapack-netlib/SRC/slansy.f               |   42 +-
 lapack-netlib/SRC/slantb.f               |   57 +-
 lapack-netlib/SRC/slantp.f               |   55 +-
 lapack-netlib/SRC/slantr.f               |   58 +-
 lapack-netlib/SRC/slanv2.f               |    8 +-
 lapack-netlib/SRC/slaorhr_col_getrfnp.f  |  248 ++++
 lapack-netlib/SRC/slaorhr_col_getrfnp2.f |  305 +++++
 67 files changed, 2649 insertions(+), 346 deletions(-)
 create mode 100644 lapack-netlib/SRC/meson.build
 create mode 100644 lapack-netlib/SRC/scombssq.f
 create mode 100644 lapack-netlib/SRC/sgesvdq.f
 create mode 100644 lapack-netlib/SRC/slaorhr_col_getrfnp.f
 create mode 100644 lapack-netlib/SRC/slaorhr_col_getrfnp2.f

diff --git a/lapack-netlib/SRC/ilaenv.f b/lapack-netlib/SRC/ilaenv.f
index a438ada38..7f68c383d 100644
--- a/lapack-netlib/SRC/ilaenv.f
+++ b/lapack-netlib/SRC/ilaenv.f
@@ -132,7 +132,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
-*> \date November 2017
+*> \date November 2019
 *
 *> \ingroup OTHERauxiliary
 *
@@ -162,10 +162,10 @@
 *  =====================================================================
       INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
 *
-*  -- LAPACK auxiliary routine (version 3.8.0) --
+*  -- LAPACK auxiliary routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     November 2017
+*     November 2019
 *
 *     .. Scalar Arguments ..
       CHARACTER*( * )    NAME, OPTS
@@ -271,7 +271,16 @@
 *
       NB = 1
 *
-      IF( C2.EQ.'GE' ) THEN
+      IF( SUBNAM(2:6).EQ.'LAORH' ) THEN
+*
+*        This is for *LAORHR_GETRFNP routine
+*
+         IF( SNAME ) THEN
+             NB = 32
+         ELSE
+             NB = 32
+         END IF
+      ELSE IF( C2.EQ.'GE' ) THEN
          IF( C3.EQ.'TRF' ) THEN
             IF( SNAME ) THEN
                NB = 64
diff --git a/lapack-netlib/SRC/ilaenv2stage.f b/lapack-netlib/SRC/ilaenv2stage.f
index 3c0d34a12..db30a1b4d 100644
--- a/lapack-netlib/SRC/ilaenv2stage.f
+++ b/lapack-netlib/SRC/ilaenv2stage.f
@@ -39,9 +39,9 @@
 *>
 *> ILAENV2STAGE returns an INTEGER
 *> if ILAENV2STAGE >= 0: ILAENV2STAGE returns the value of the parameter
-*                        specified by ISPEC
+*>                       specified by ISPEC
 *> if ILAENV2STAGE < 0:  if ILAENV2STAGE = -k, the k-th argument had an
-*                        illegal value.
+*>                       illegal value.
 *>
 *> This version provides a set of parameters which should give good,
 *> but not optimal, performance on many of the currently available
diff --git a/lapack-netlib/SRC/iparam2stage.F b/lapack-netlib/SRC/iparam2stage.F
index 836e20eed..1a37300a7 100644
--- a/lapack-netlib/SRC/iparam2stage.F
+++ b/lapack-netlib/SRC/iparam2stage.F
@@ -35,7 +35,7 @@
 *> \verbatim
 *>
 *>      This program sets problem and machine dependent parameters
-*>      useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST,
+*>      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
 *>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
 *>      and related subroutines for eigenvalue problems. 
 *>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
@@ -53,7 +53,7 @@
 *>              return.
 *>
 *>              ISPEC=17: the optimal blocksize nb for the reduction to
-*                         BAND
+*>                        BAND
 *>
 *>              ISPEC=18: the optimal blocksize ib for the eigenvectors
 *>                        singular vectors update routine
@@ -90,14 +90,14 @@
 *> \param[in] NBI
 *> \verbatim
 *>          NBI is INTEGER which is the used in the reduciton, 
-*           (e.g., the size of the band), needed to compute workspace
-*           and LHOUS2.
+*>          (e.g., the size of the band), needed to compute workspace
+*>          and LHOUS2.
 *> \endverbatim
 *>
 *> \param[in] IBI
 *> \verbatim
 *>          IBI is INTEGER which represent the IB of the reduciton,
-*           needed to compute workspace and LHOUS2.
+*>          needed to compute workspace and LHOUS2.
 *> \endverbatim
 *>
 *> \param[in] NXI
diff --git a/lapack-netlib/SRC/iparmq.f b/lapack-netlib/SRC/iparmq.f
index a9212b3e0..bb711243d 100644
--- a/lapack-netlib/SRC/iparmq.f
+++ b/lapack-netlib/SRC/iparmq.f
@@ -60,7 +60,7 @@
 *>                        invest in an (expensive) multi-shift QR sweep.
 *>                        If the aggressive early deflation subroutine
 *>                        finds LD converged eigenvalues from an order
-*>                        NW deflation window and LD.GT.(NW*NIBBLE)/100,
+*>                        NW deflation window and LD > (NW*NIBBLE)/100,
 *>                        then the next QR sweep is skipped and early
 *>                        deflation is applied immediately to the
 *>                        remaining active diagonal block.  Setting
@@ -184,8 +184,8 @@
 *>                        This depends on ILO, IHI and NS, the
 *>                        number of simultaneous shifts returned
 *>                        by IPARMQ(ISPEC=15).  The default for
-*>                        (IHI-ILO+1).LE.500 is NS.  The default
-*>                        for (IHI-ILO+1).GT.500 is 3*NS/2.
+*>                        (IHI-ILO+1) <= 500 is NS.  The default
+*>                        for (IHI-ILO+1) > 500 is 3*NS/2.
 *>
 *>       IPARMQ(ISPEC=14) Nibble crossover point.  Default: 14.
 *>
diff --git a/lapack-netlib/SRC/meson.build b/lapack-netlib/SRC/meson.build
new file mode 100644
index 000000000..bad682401
--- /dev/null
+++ b/lapack-netlib/SRC/meson.build
@@ -0,0 +1,11 @@
+ALLAUX = files('ilaenv.f', 'ilaenv2stage.f', 'ieeeck.f', 'lsamen.f', 'xerbla.f', 'xerbla_array.f', 'iparmq.f', 'iparam2stage.F', 'ilaprec.f', 'ilatrans.f', 'ilauplo.f', 'iladiag.f', 'chla_transtype.f', '../INSTALL/ilaver.f', '../INSTALL/lsame.f', '../INSTALL/slamch.f')
+
+
+SCLAUX = files('sbdsdc.f', 'sbdsqr.f', 'sdisna.f', 'slabad.f', 'slacpy.f', 'sladiv.f', 'slae2.f', 'slaebz.f', 'slaed0.f', 'slaed1.f', 'slaed2.f', 'slaed3.f', 'slaed4.f', 'slaed5.f', 'slaed6.f', 'slaed7.f', 'slaed8.f', 'slaed9.f', 'slaeda.f', 'slaev2.f', 'slagtf.f', 'slagts.f', 'slamrg.f', 'slanst.f', 'slapy2.f', 'slapy3.f', 'slarnv.f', 'slarra.f', 'slarrb.f', 'slarrc.f', 'slarrd.f', 'slarre.f', 'slarrf.f', 'slarrj.f', 'slarrk.f', 'slarrr.f', 'slaneg.f', 'slartg.f', 'slaruv.f', 'slas2.f', 'slascl.f', 'slasd0.f', 'slasd1.f', 'slasd2.f', 'slasd3.f', 'slasd4.f', 'slasd5.f', 'slasd6.f', 'slasd7.f', 'slasd8.f', 'slasda.f', 'slasdq.f', 'slasdt.f', 'slaset.f', 'slasq1.f', 'slasq2.f', 'slasq3.f', 'slasq4.f', 'slasq5.f', 'slasq6.f', 'slasr.f', 'slasrt.f', 'slassq.f', 'slasv2.f', 'spttrf.f', 'sstebz.f', 'sstedc.f', 'ssteqr.f', 'ssterf.f', 'slaisnan.f', 'sisnan.f', 'slartgp.f', 'slartgs.f', '../INSTALL/second_INT_CPU_TIME.f')
+
+DZLAUX = files('dbdsdc.f', 'dbdsqr.f', 'ddisna.f', 'dlabad.f', 'dlacpy.f', 'dladiv.f', 'dlae2.f', 'dlaebz.f', 'dlaed0.f', 'dlaed1.f', 'dlaed2.f', 'dlaed3.f', 'dlaed4.f', 'dlaed5.f', 'dlaed6.f', 'dlaed7.f', 'dlaed8.f', 'dlaed9.f', 'dlaeda.f', 'dlaev2.f', 'dlagtf.f', 'dlagts.f', 'dlamrg.f', 'dlanst.f', 'dlapy2.f', 'dlapy3.f', 'dlarnv.f', 'dlarra.f', 'dlarrb.f', 'dlarrc.f', 'dlarrd.f', 'dlarre.f', 'dlarrf.f', 'dlarrj.f', 'dlarrk.f', 'dlarrr.f', 'dlaneg.f', 'dlartg.f', 'dlaruv.f', 'dlas2.f', 'dlascl.f', 'dlasd0.f', 'dlasd1.f', 'dlasd2.f', 'dlasd3.f', 'dlasd4.f', 'dlasd5.f', 'dlasd6.f', 'dlasd7.f', 'dlasd8.f', 'dlasda.f', 'dlasdq.f', 'dlasdt.f', 'dlaset.f', 'dlasq1.f', 'dlasq2.f', 'dlasq3.f', 'dlasq4.f', 'dlasq5.f', 'dlasq6.f', 'dlasr.f', 'dlasrt.f', 'dlassq.f', 'dlasv2.f', 'dpttrf.f', 'dstebz.f', 'dstedc.f', 'dsteqr.f', 'dsterf.f', 'dlaisnan.f', 'disnan.f', 'dlartgp.f', 'dlartgs.f', '../INSTALL/dlamch.f', '../INSTALL/dsecnd_INT_CPU_TIME.f')
+
+
+SLASRC = files('sbdsvdx.f', 'spotrf2.f', 'sgetrf2.f', 'sgbbrd.f', 'sgbcon.f', 'sgbequ.f', 'sgbrfs.f', 'sgbsv.f', 'sgbsvx.f', 'sgbtf2.f', 'sgbtrf.f', 'sgbtrs.f', 'sgebak.f', 'sgebal.f', 'sgebd2.f', 'sgebrd.f', 'sgecon.f', 'sgeequ.f', 'sgees.f', 'sgeesx.f', 'sgeev.f', 'sgeevx.f', 'sgehd2.f', 'sgehrd.f', 'sgelq2.f', 'sgelqf.f', 'sgels.f', 'sgelsd.f', 'sgelss.f', 'sgelsy.f', 'sgeql2.f', 'sgeqlf.f', 'sgeqp3.f', 'sgeqr2.f', 'sgeqr2p.f', 'sgeqrf.f', 'sgeqrfp.f', 'sgerfs.f', 'sgerq2.f', 'sgerqf.f', 'sgesc2.f', 'sgesdd.f', 'sgesv.f', 'sgesvd.f', 'sgesvdx.f', 'sgesvx.f', 'sgetc2.f', 'sgetf2.f', 'sgetri.f', 'sggbak.f', 'sggbal.f', 'sgges.f', 'sgges3.f', 'sggesx.f', 'sggev.f', 'sggev3.f', 'sggevx.f', 'sggglm.f', 'sgghrd.f', 'sgghd3.f', 'sgglse.f', 'sggqrf.f', 'sggrqf.f', 'sggsvd3.f', 'sggsvp3.f', 'sgtcon.f', 'sgtrfs.f', 'sgtsv.f', 'sgtsvx.f', 'sgttrf.f', 'sgttrs.f', 'sgtts2.f', 'shgeqz.f', 'shsein.f', 'shseqr.f', 'slabrd.f', 'slacon.f', 'slacn2.f', 'slaein.f', 'slaexc.f', 'slag2.f', 'slags2.f', 'slagtm.f', 'slagv2.f', 'slahqr.f', 'slahr2.f', 'slaic1.f', 'slaln2.f', 'slals0.f', 'slalsa.f', 'slalsd.f', 'slangb.f', 'slange.f', 'slangt.f', 'slanhs.f', 'slansb.f', 'slansp.f', 'slansy.f', 'slantb.f', 'slantp.f', 'slantr.f', 'slanv2.f', 'slapll.f', 'slapmt.f', 'slaqgb.f', 'slaqge.f', 'slaqp2.f', 'slaqps.f', 'slaqsb.f', 'slaqsp.f', 'slaqsy.f', 'slaqr0.f', 'slaqr1.f', 'slaqr2.f', 'slaqr3.f', 'slaqr4.f', 'slaqr5.f', 'slaqtr.f', 'slar1v.f', 'slar2v.f', 'ilaslr.f', 'ilaslc.f', 'slarf.f', 'slarfb.f', 'slarfg.f', 'slarfgp.f', 'slarft.f', 'slarfx.f', 'slarfy.f', 'slargv.f', 'slarrv.f', 'slartv.f', 'slarz.f', 'slarzb.f', 'slarzt.f', 'slaswp.f', 'slasy2.f', 'slasyf.f', 'slasyf_rook.f', 'slasyf_rk.f', 'slatbs.f', 'slatdf.f', 'slatps.f', 'slatrd.f', 'slatrs.f', 'slatrz.f', 'slauu2.f', 'slauum.f', 'sopgtr.f', 'sopmtr.f', 'sorg2l.f', 'sorg2r.f', 'sorgbr.f', 'sorghr.f', 'sorgl2.f', 'sorglq.f', 'sorgql.f', 'sorgqr.f', 'sorgr2.f', 'sorgrq.f', 'sorgtr.f', 'sorm2l.f', 'sorm2r.f', 'sorm22.f', 'sormbr.f', 'sormhr.f', 'sorml2.f', 'sormlq.f', 'sormql.f', 'sormqr.f', 'sormr2.f', 'sormr3.f', 'sormrq.f', 'sormrz.f', 'sormtr.f', 'spbcon.f', 'spbequ.f', 'spbrfs.f', 'spbstf.f', 'spbsv.f', 'spbsvx.f', 'spbtf2.f', 'spbtrf.f', 'spbtrs.f', 'spocon.f', 'spoequ.f', 'sporfs.f', 'sposv.f', 'sposvx.f', 'spotf2.f', 'spotri.f', 'spstrf.f', 'spstf2.f', 'sppcon.f', 'sppequ.f', 'spprfs.f', 'sppsv.f', 'sppsvx.f', 'spptrf.f', 'spptri.f', 'spptrs.f', 'sptcon.f', 'spteqr.f', 'sptrfs.f', 'sptsv.f', 'sptsvx.f', 'spttrs.f', 'sptts2.f', 'srscl.f', 'ssbev.f', 'ssbevd.f', 'ssbevx.f', 'ssbgst.f', 'ssbgv.f', 'ssbgvd.f', 'ssbgvx.f', 'ssbtrd.f', 'sspcon.f', 'sspev.f', 'sspevd.f', 'sspevx.f', 'sspgst.f', 'sspgv.f', 'sspgvd.f', 'sspgvx.f', 'ssprfs.f', 'sspsv.f', 'sspsvx.f', 'ssptrd.f', 'ssptrf.f', 'ssptri.f', 'ssptrs.f', 'sstegr.f', 'sstein.f', 'sstev.f', 'sstevd.f', 'sstevr.f', 'sstevx.f', 'ssycon.f', 'ssyev.f', 'ssyevd.f', 'ssyevr.f', 'ssyevx.f', 'ssygs2.f', 'ssygst.f', 'ssygv.f', 'ssygvd.f', 'ssygvx.f', 'ssyrfs.f', 'ssysv.f', 'ssysvx.f', 'ssytd2.f', 'ssytf2.f', 'ssytrd.f', 'ssytrf.f', 'ssytri.f', 'ssytri2.f', 'ssytri2x.f', 'ssyswapr.f', 'ssytrs.f', 'ssytrs2.f', 'ssyconv.f', 'ssyconvf.f', 'ssyconvf_rook.f', 'ssytf2_rook.f', 'ssytrf_rook.f', 'ssytrs_rook.f', 'ssytri_rook.f', 'ssycon_rook.f', 'ssysv_rook.f', 'ssytf2_rk.f', 'ssytrf_rk.f', 'ssytrs_3.f', 'ssytri_3.f', 'ssytri_3x.f', 'ssycon_3.f', 'ssysv_rk.f', 'slasyf_aa.f', 'ssysv_aa.f', 'ssytrf_aa.f', 'ssytrs_aa.f', 'ssysv_aa_2stage.f', 'ssytrf_aa_2stage.f', 'ssytrs_aa_2stage.f', 'stbcon.f', 'stbrfs.f', 'stbtrs.f', 'stgevc.f', 'stgex2.f', 'stgexc.f', 'stgsen.f', 'stgsja.f', 'stgsna.f', 'stgsy2.f', 'stgsyl.f', 'stpcon.f', 'stprfs.f', 'stptri.f', 'stptrs.f', 'strcon.f', 'strevc.f', 'strevc3.f', 'strexc.f', 'strrfs.f', 'strsen.f', 'strsna.f', 'strsyl.f', 'strti2.f', 'strtri.f', 'strtrs.f', 'stzrzf.f', 'sstemr.f', 'slansf.f', 'spftrf.f', 'spftri.f', 'spftrs.f', 'ssfrk.f', 'stfsm.f', 'stftri.f', 'stfttp.f', 'stfttr.f', 'stpttf.f', 'stpttr.f', 'strttf.f', 'strttp.f', 'sgejsv.f', 'sgesvj.f', 'sgsvj0.f', 'sgsvj1.f', 'sgeequb.f', 'ssyequb.f', 'spoequb.f', 'sgbequb.f', 'sbbcsd.f', 'slapmr.f', 'sorbdb.f', 'sorbdb1.f', 'sorbdb2.f', 'sorbdb3.f', 'sorbdb4.f', 'sorbdb5.f', 'sorbdb6.f', 'sorcsd.f', 'sorcsd2by1.f', 'sgeqrt.f', 'sgeqrt2.f', 'sgeqrt3.f', 'sgemqrt.f', 'stpqrt.f', 'stpqrt2.f', 'stpmqrt.f', 'stprfb.f', 'sgelqt.f', 'sgelqt3.f', 'sgemlqt.f', 'sgetsls.f', 'sgeqr.f', 'slatsqr.f', 'slamtsqr.f', 'sgemqr.f', 'sgelq.f', 'slaswlq.f', 'slamswlq.f', 'sgemlq.f', 'stplqt.f', 'stplqt2.f', 'stpmlqt.f', 'ssytrd_2stage.f', 'ssytrd_sy2sb.f', 'ssytrd_sb2st.F', 'ssb2st_kernels.f', 'ssyevd_2stage.f', 'ssyev_2stage.f', 'ssyevx_2stage.f', 'ssyevr_2stage.f', 'ssbev_2stage.f', 'ssbevx_2stage.f', 'ssbevd_2stage.f', 'ssygv_2stage.f', 'sgesvdq.f', 'scombssq.f')
+
+DSLASRC = files('spotrs.f', 'sgetrs.f', 'spotrf.f', 'sgetrf.f')
diff --git a/lapack-netlib/SRC/sbdsvdx.f b/lapack-netlib/SRC/sbdsvdx.f
index a4b1887b2..c46674c47 100644
--- a/lapack-netlib/SRC/sbdsvdx.f
+++ b/lapack-netlib/SRC/sbdsvdx.f
@@ -165,7 +165,7 @@
 *>
 *> \param[out] Z
 *> \verbatim
-*>          Z is REAL array, dimension (2*N,K) )
+*>          Z is REAL array, dimension (2*N,K)
 *>          If JOBZ = 'V', then if INFO = 0 the first NS columns of Z
 *>          contain the singular vectors of the matrix B corresponding to
 *>          the selected singular values, with U in rows 1 to N and V
diff --git a/lapack-netlib/SRC/scombssq.f b/lapack-netlib/SRC/scombssq.f
new file mode 100644
index 000000000..76bc0e320
--- /dev/null
+++ b/lapack-netlib/SRC/scombssq.f
@@ -0,0 +1,92 @@
+*> \brief \b SCOMBSSQ adds two scaled sum of squares quantities
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE SCOMBSSQ( V1, V2 )
+*
+*       .. Array Arguments ..
+*       REAL               V1( 2 ), V2( 2 )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> SCOMBSSQ adds two scaled sum of squares quantities, V1 := V1 + V2.
+*> That is,
+*>
+*>    V1_scale**2 * V1_sumsq := V1_scale**2 * V1_sumsq
+*>                            + V2_scale**2 * V2_sumsq
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in,out] V1
+*> \verbatim
+*>          V1 is REAL array, dimension (2).
+*>          The first scaled sum.
+*>          V1(1) = V1_scale, V1(2) = V1_sumsq.
+*> \endverbatim
+*>
+*> \param[in] V2
+*> \verbatim
+*>          V2 is REAL array, dimension (2).
+*>          The second scaled sum.
+*>          V2(1) = V2_scale, V2(2) = V2_sumsq.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2018
+*
+*> \ingroup OTHERauxiliary
+*
+*  =====================================================================
+      SUBROUTINE SCOMBSSQ( V1, V2 )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     November 2018
+*
+*     .. Array Arguments ..
+      REAL               V1( 2 ), V2( 2 )
+*     ..
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+      IF( V1( 1 ).GE.V2( 1 ) ) THEN
+         IF( V1( 1 ).NE.ZERO ) THEN
+            V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 )
+         END IF
+      ELSE
+         V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 )
+         V1( 1 ) = V2( 1 )
+      END IF
+      RETURN
+*
+*     End of SCOMBSSQ
+*
+      END
diff --git a/lapack-netlib/SRC/sgbrfsx.f b/lapack-netlib/SRC/sgbrfsx.f
index 032b78b80..78ae584e1 100644
--- a/lapack-netlib/SRC/sgbrfsx.f
+++ b/lapack-netlib/SRC/sgbrfsx.f
@@ -308,7 +308,7 @@
 *>     information as described below. There currently are up to three
 *>     pieces of information returned for each right-hand side. If
 *>     componentwise accuracy is not requested (PARAMS(3) = 0.0), then
-*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most
+*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most
 *>     the first (:,N_ERR_BNDS) entries are returned.
 *>
 *>     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith
@@ -344,14 +344,14 @@
 *> \param[in] NPARAMS
 *> \verbatim
 *>          NPARAMS is INTEGER
-*>     Specifies the number of parameters set in PARAMS.  If .LE. 0, the
+*>     Specifies the number of parameters set in PARAMS.  If <= 0, the
 *>     PARAMS array is never referenced and default values are used.
 *> \endverbatim
 *>
 *> \param[in,out] PARAMS
 *> \verbatim
 *>          PARAMS is REAL array, dimension NPARAMS
-*>     Specifies algorithm parameters.  If an entry is .LT. 0.0, then
+*>     Specifies algorithm parameters.  If an entry is < 0.0, then
 *>     that entry will be filled with default value used for that
 *>     parameter.  Only positions up to NPARAMS are accessed; defaults
 *>     are used for higher-numbered parameters.
@@ -359,9 +359,9 @@
 *>       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative
 *>            refinement or not.
 *>         Default: 1.0
-*>            = 0.0 : No refinement is performed, and no error bounds are
+*>            = 0.0:  No refinement is performed, and no error bounds are
 *>                    computed.
-*>            = 1.0 : Use the double-precision refinement algorithm,
+*>            = 1.0:  Use the double-precision refinement algorithm,
 *>                    possibly with doubled-single computations if the
 *>                    compilation environment does not support DOUBLE
 *>                    PRECISION.
diff --git a/lapack-netlib/SRC/sgbsvxx.f b/lapack-netlib/SRC/sgbsvxx.f
index b2132325e..3c3d737b3 100644
--- a/lapack-netlib/SRC/sgbsvxx.f
+++ b/lapack-netlib/SRC/sgbsvxx.f
@@ -431,7 +431,7 @@
 *>     information as described below. There currently are up to three
 *>     pieces of information returned for each right-hand side. If
 *>     componentwise accuracy is not requested (PARAMS(3) = 0.0), then
-*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most
+*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most
 *>     the first (:,N_ERR_BNDS) entries are returned.
 *>
 *>     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith
@@ -467,14 +467,14 @@
 *> \param[in] NPARAMS
 *> \verbatim
 *>          NPARAMS is INTEGER
-*>     Specifies the number of parameters set in PARAMS.  If .LE. 0, the
+*>     Specifies the number of parameters set in PARAMS.  If <= 0, the
 *>     PARAMS array is never referenced and default values are used.
 *> \endverbatim
 *>
 *> \param[in,out] PARAMS
 *> \verbatim
 *>          PARAMS is REAL array, dimension NPARAMS
-*>     Specifies algorithm parameters.  If an entry is .LT. 0.0, then
+*>     Specifies algorithm parameters.  If an entry is < 0.0, then
 *>     that entry will be filled with default value used for that
 *>     parameter.  Only positions up to NPARAMS are accessed; defaults
 *>     are used for higher-numbered parameters.
@@ -482,9 +482,9 @@
 *>       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative
 *>            refinement or not.
 *>         Default: 1.0
-*>            = 0.0 : No refinement is performed, and no error bounds are
+*>            = 0.0:  No refinement is performed, and no error bounds are
 *>                    computed.
-*>            = 1.0 : Use the double-precision refinement algorithm,
+*>            = 1.0:  Use the double-precision refinement algorithm,
 *>                    possibly with doubled-single computations if the
 *>                    compilation environment does not support DOUBLE
 *>                    PRECISION.
diff --git a/lapack-netlib/SRC/sgebak.f b/lapack-netlib/SRC/sgebak.f
index ec58bf335..5c64c8b97 100644
--- a/lapack-netlib/SRC/sgebak.f
+++ b/lapack-netlib/SRC/sgebak.f
@@ -47,10 +47,10 @@
 *> \verbatim
 *>          JOB is CHARACTER*1
 *>          Specifies the type of backward transformation required:
-*>          = 'N', do nothing, return immediately;
-*>          = 'P', do backward transformation for permutation only;
-*>          = 'S', do backward transformation for scaling only;
-*>          = 'B', do backward transformations for both permutation and
+*>          = 'N': do nothing, return immediately;
+*>          = 'P': do backward transformation for permutation only;
+*>          = 'S': do backward transformation for scaling only;
+*>          = 'B': do backward transformations for both permutation and
 *>                 scaling.
 *>          JOB must be the same as the argument JOB supplied to SGEBAL.
 *> \endverbatim
diff --git a/lapack-netlib/SRC/sgeesx.f b/lapack-netlib/SRC/sgeesx.f
index c90de9b81..5ffa3bc37 100644
--- a/lapack-netlib/SRC/sgeesx.f
+++ b/lapack-netlib/SRC/sgeesx.f
@@ -583,7 +583,9 @@
                      IF( N.GT.I+1 )
      $                  CALL SSWAP( N-I-1, A( I, I+2 ), LDA,
      $                              A( I+1, I+2 ), LDA )
-                     CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 )
+                     IF( WANTVS ) THEN
+                       CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 )
+                     END IF
                      A( I, I+1 ) = A( I+1, I )
                      A( I+1, I ) = ZERO
                   END IF
diff --git a/lapack-netlib/SRC/sgejsv.f b/lapack-netlib/SRC/sgejsv.f
index e4cbe8d0e..4ad316d99 100644
--- a/lapack-netlib/SRC/sgejsv.f
+++ b/lapack-netlib/SRC/sgejsv.f
@@ -82,7 +82,7 @@
 *>              desirable, then this option is advisable. The input matrix A
 *>              is preprocessed with QR factorization with FULL (row and
 *>              column) pivoting.
-*>       = 'G'  Computation as with 'F' with an additional estimate of the
+*>       = 'G': Computation as with 'F' with an additional estimate of the
 *>              condition number of B, where A=D*B. If A has heavily weighted
 *>              rows, then using this condition number gives too pessimistic
 *>              error bound.
@@ -133,7 +133,7 @@
 *>         specified range. If A .NE. 0 is scaled so that the largest singular
 *>         value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
 *>         the licence to kill columns of A whose norm in c*A is less than
-*>         SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
+*>         SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
 *>         where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
 *>       = 'N': Do not kill small columns of c*A. This option assumes that
 *>              BLAS and QR factorizations and triangular solvers are
@@ -230,7 +230,7 @@
 *>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
 *>                         the left singular vectors, including an ONB
 *>                         of the orthogonal complement of the Range(A).
-*>          If JOBU = 'W'  .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
+*>          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
 *>                         then U is used as workspace if the procedure
 *>                         replaces A with A^t. In that case, [V] is computed
 *>                         in U as left singular vectors of A^t and then
@@ -252,7 +252,7 @@
 *>          V is REAL array, dimension ( LDV, N )
 *>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
 *>                         the right singular vectors;
-*>          If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
+*>          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
 *>                         then V is used as workspace if the pprocedure
 *>                         replaces A with A^t. In that case, [U] is computed
 *>                         in V as right singular vectors of A^t and then
@@ -278,7 +278,7 @@
 *>                    of A. (See the description of SVA().)
 *>          WORK(2) = See the description of WORK(1).
 *>          WORK(3) = SCONDA is an estimate for the condition number of
-*>                    column equilibrated A. (If JOBA .EQ. 'E' or 'G')
+*>                    column equilibrated A. (If JOBA = 'E' or 'G')
 *>                    SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1).
 *>                    It is computed using SPOCON. It holds
 *>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
@@ -297,7 +297,7 @@
 *>                    triangular factor in the first QR factorization.
 *>          WORK(5) = an estimate of the scaled condition number of the
 *>                    triangular factor in the second QR factorization.
-*>          The following two parameters are computed if JOBT .EQ. 'T'.
+*>          The following two parameters are computed if JOBT = 'T'.
 *>          They are provided for a developer/implementer who is familiar
 *>          with the details of the method.
 *>
@@ -313,8 +313,8 @@
 *>          Length of WORK to confirm proper allocation of work space.
 *>          LWORK depends on the job:
 *>
-*>          If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
-*>            -> .. no scaled condition estimate required (JOBE.EQ.'N'):
+*>          If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and
+*>            -> .. no scaled condition estimate required (JOBE = 'N'):
 *>               LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
 *>               ->> For optimal performance (blocked code) the optimal value
 *>               is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
@@ -330,7 +330,7 @@
 *>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF),
 *>                                                     N+N*N+LWORK(DPOCON),7).
 *>
-*>          If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
+*>          If SIGMA and the right singular vectors are needed (JOBV = 'V'),
 *>            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
 *>            -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
 *>               where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ,
@@ -341,19 +341,19 @@
 *>          If SIGMA and the left singular vectors are needed
 *>            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
 *>            -> For optimal performance:
-*>               if JOBU.EQ.'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
-*>               if JOBU.EQ.'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7),
+*>               if JOBU = 'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
+*>               if JOBU = 'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7),
 *>               where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR.
 *>               In general, the optimal length LWORK is computed as
 *>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON),
 *>                        2*N+LWORK(DGEQRF), N+LWORK(DORMQR)).
-*>               Here LWORK(DORMQR) equals N*NB (for JOBU.EQ.'U') or
-*>               M*NB (for JOBU.EQ.'F').
+*>               Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or
+*>               M*NB (for JOBU = 'F').
 *>
-*>          If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and
-*>            -> if JOBV.EQ.'V'
+*>          If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and
+*>            -> if JOBV = 'V'
 *>               the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N).
-*>            -> if JOBV.EQ.'J' the minimal requirement is
+*>            -> if JOBV = 'J' the minimal requirement is
 *>               LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6).
 *>            -> For optimal performance, LWORK should be additionally
 *>               larger than N+M*NB, where NB is the optimal block size
@@ -369,7 +369,7 @@
 *>                     of JOBA and JOBR.
 *>          IWORK(2) = the number of the computed nonzero singular values
 *>          IWORK(3) = if nonzero, a warning message:
-*>                     If IWORK(3).EQ.1 then some of the column norms of A
+*>                     If IWORK(3) = 1 then some of the column norms of A
 *>                     were denormalized floats. The requested high accuracy
 *>                     is not warranted by the data.
 *> \endverbatim
@@ -377,10 +377,10 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>           < 0  : if INFO = -i, then the i-th argument had an illegal value.
-*>           = 0 :  successful exit;
-*>           > 0 :  SGEJSV  did not converge in the maximal allowed number
-*>                  of sweeps. The computed values may be inaccurate.
+*>           < 0:  if INFO = -i, then the i-th argument had an illegal value.
+*>           = 0:  successful exit;
+*>           > 0:  SGEJSV  did not converge in the maximal allowed number
+*>                 of sweeps. The computed values may be inaccurate.
 *> \endverbatim
 *
 *  Authors:
@@ -953,7 +953,7 @@
       IF ( L2ABER ) THEN
 *        Standard absolute error bound suffices. All sigma_i with
 *        sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
-*        agressive enforcement of lower numerical rank by introducing a
+*        aggressive enforcement of lower numerical rank by introducing a
 *        backward error of the order of N*EPSLN*||A||.
          TEMP1 = SQRT(FLOAT(N))*EPSLN
          DO 3001 p = 2, N
@@ -965,7 +965,7 @@
  3001    CONTINUE
  3002    CONTINUE
       ELSE IF ( L2RANK ) THEN
-*        .. similarly as above, only slightly more gentle (less agressive).
+*        .. similarly as above, only slightly more gentle (less aggressive).
 *        Sudden drop on the diagonal of R1 is used as the criterion for
 *        close-to-rank-deficient.
          TEMP1 = SQRT(SFMIN)
@@ -1294,7 +1294,7 @@
             CALL SPOCON('Lower',NR,WORK(2*N+1),NR,ONE,TEMP1,
      $                   WORK(2*N+NR*NR+1),IWORK(M+2*N+1),IERR)
             CONDR1 = ONE / SQRT(TEMP1)
-*           .. here need a second oppinion on the condition number
+*           .. here need a second opinion on the condition number
 *           .. then assume worst case scenario
 *           R1 is OK for inverse <=> CONDR1 .LT. FLOAT(N)
 *           more conservative    <=> CONDR1 .LT. SQRT(FLOAT(N))
@@ -1335,7 +1335,7 @@
             ELSE
 *
 *              .. ill-conditioned case: second QRF with pivoting
-*              Note that windowed pivoting would be equaly good
+*              Note that windowed pivoting would be equally good
 *              numerically, and more run-time efficient. So, in
 *              an optimal implementation, the next call to SGEQP3
 *              should be replaced with eg. CALL SGEQPX (ACM TOMS #782)
@@ -1388,7 +1388,7 @@
 *
                IF ( CONDR2 .GE. COND_OK ) THEN
 *                 .. save the Householder vectors used for Q3
-*                 (this overwrittes the copy of R2, as it will not be
+*                 (this overwrites the copy of R2, as it will not be
 *                 needed in this branch, but it does not overwritte the
 *                 Huseholder vectors of Q2.).
                   CALL SLACPY( 'U', NR, NR, V, LDV, WORK(2*N+1), N )
@@ -1638,7 +1638,7 @@
 *
 *        This branch deploys a preconditioned Jacobi SVD with explicitly
 *        accumulated rotations. It is included as optional, mainly for
-*        experimental purposes. It does perfom well, and can also be used.
+*        experimental purposes. It does perform well, and can also be used.
 *        In this implementation, this branch will be automatically activated
 *        if the  condition number sigma_max(A) / sigma_min(A) is predicted
 *        to be greater than the overflow threshold. This is because the
diff --git a/lapack-netlib/SRC/sgelq.f b/lapack-netlib/SRC/sgelq.f
index 4fe4d191d..96c4097e8 100644
--- a/lapack-netlib/SRC/sgelq.f
+++ b/lapack-netlib/SRC/sgelq.f
@@ -1,3 +1,4 @@
+*> \brief \b SGELQ
 *
 *  Definition:
 *  ===========
@@ -17,7 +18,17 @@
 *  =============
 *>
 *> \verbatim
-*> SGELQ computes a LQ factorization of an M-by-N matrix A.
+*>
+*> SGELQ computes an LQ factorization of a real M-by-N matrix A:
+*>
+*>    A = ( L 0 ) *  Q
+*>
+*> where:
+*>
+*>    Q is a N-by-N orthogonal matrix;
+*>    L is an lower-triangular M-by-M matrix;
+*>    0 is a M-by-(N-M) zero matrix, if M < N.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -138,7 +149,7 @@
 *> \verbatim
 *>
 *> These details are particular for this LAPACK implementation. Users should not 
-*> take them for granted. These details may change in the future, and are unlikely not
+*> take them for granted. These details may change in the future, and are not likely
 *> true for another LAPACK implementation. These details are relevant if one wants
 *> to try to understand the code. They are not part of the interface.
 *>
@@ -159,10 +170,10 @@
       SUBROUTINE SGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK,
      $                  INFO )
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N, TSIZE, LWORK
diff --git a/lapack-netlib/SRC/sgelq2.f b/lapack-netlib/SRC/sgelq2.f
index 5b1ad215a..df8128d53 100644
--- a/lapack-netlib/SRC/sgelq2.f
+++ b/lapack-netlib/SRC/sgelq2.f
@@ -33,8 +33,16 @@
 *>
 *> \verbatim
 *>
-*> SGELQ2 computes an LQ factorization of a real m by n matrix A:
-*> A = L * Q.
+*> SGELQ2 computes an LQ factorization of a real m-by-n matrix A:
+*>
+*>    A = ( L 0 ) *  Q
+*>
+*> where:
+*>
+*>    Q is a n-by-n orthogonal matrix;
+*>    L is an lower-triangular m-by-m matrix;
+*>    0 is a m-by-(n-m) zero matrix, if m < n.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -96,7 +104,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
-*> \date December 2016
+*> \date November 2019
 *
 *> \ingroup realGEcomputational
 *
@@ -121,10 +129,10 @@
 *  =====================================================================
       SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N
diff --git a/lapack-netlib/SRC/sgelqf.f b/lapack-netlib/SRC/sgelqf.f
index 99c03c0a3..90357e623 100644
--- a/lapack-netlib/SRC/sgelqf.f
+++ b/lapack-netlib/SRC/sgelqf.f
@@ -34,7 +34,15 @@
 *> \verbatim
 *>
 *> SGELQF computes an LQ factorization of a real M-by-N matrix A:
-*> A = L * Q.
+*>
+*>    A = ( L 0 ) *  Q
+*>
+*> where:
+*>
+*>    Q is a N-by-N orthogonal matrix;
+*>    L is an lower-triangular M-by-M matrix;
+*>    0 is a M-by-(N-M) zero matrix, if M < N.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -110,7 +118,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
-*> \date December 2016
+*> \date November 2019
 *
 *> \ingroup realGEcomputational
 *
@@ -135,10 +143,10 @@
 *  =====================================================================
       SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, LWORK, M, N
diff --git a/lapack-netlib/SRC/sgelqt.f b/lapack-netlib/SRC/sgelqt.f
index 9a93af332..64d46025c 100644
--- a/lapack-netlib/SRC/sgelqt.f
+++ b/lapack-netlib/SRC/sgelqt.f
@@ -1,3 +1,5 @@
+*> \brief \b SGELQT
+*
 *  Definition:
 *  ===========
 *
diff --git a/lapack-netlib/SRC/sgelqt3.f b/lapack-netlib/SRC/sgelqt3.f
index 292ae88a3..edf5d6d30 100644
--- a/lapack-netlib/SRC/sgelqt3.f
+++ b/lapack-netlib/SRC/sgelqt3.f
@@ -1,3 +1,5 @@
+*> \brief \b SGELQT3
+*
 *  Definition:
 *  ===========
 *
diff --git a/lapack-netlib/SRC/sgemlq.f b/lapack-netlib/SRC/sgemlq.f
index dedbe7752..5f2e02a8e 100644
--- a/lapack-netlib/SRC/sgemlq.f
+++ b/lapack-netlib/SRC/sgemlq.f
@@ -1,3 +1,4 @@
+*> \brief \b SGEMLQ
 *
 *  Definition:
 *  ===========
@@ -143,7 +144,7 @@
 *> \verbatim
 *>
 *> These details are particular for this LAPACK implementation. Users should not 
-*> take them for granted. These details may change in the future, and are unlikely not
+*> take them for granted. These details may change in the future, and are not likely
 *> true for another LAPACK implementation. These details are relevant if one wants
 *> to try to understand the code. They are not part of the interface.
 *>
diff --git a/lapack-netlib/SRC/sgemlqt.f b/lapack-netlib/SRC/sgemlqt.f
index a8f022bdc..37850fdf5 100644
--- a/lapack-netlib/SRC/sgemlqt.f
+++ b/lapack-netlib/SRC/sgemlqt.f
@@ -1,3 +1,5 @@
+*> \brief \b SGEMLQT
+*
 *  Definition:
 *  ===========
 *
diff --git a/lapack-netlib/SRC/sgemqr.f b/lapack-netlib/SRC/sgemqr.f
index 307fc8ca9..66c5117c9 100644
--- a/lapack-netlib/SRC/sgemqr.f
+++ b/lapack-netlib/SRC/sgemqr.f
@@ -1,3 +1,4 @@
+*> \brief \b SGEMQR
 *
 *  Definition:
 *  ===========
@@ -144,7 +145,7 @@
 *> \verbatim
 *>
 *> These details are particular for this LAPACK implementation. Users should not 
-*> take them for granted. These details may change in the future, and are unlikely not
+*> take them for granted. These details may change in the future, and are not likely
 *> true for another LAPACK implementation. These details are relevant if one wants
 *> to try to understand the code. They are not part of the interface.
 *>
diff --git a/lapack-netlib/SRC/sgeqr.f b/lapack-netlib/SRC/sgeqr.f
index f939abd9d..4a6bb9ea5 100644
--- a/lapack-netlib/SRC/sgeqr.f
+++ b/lapack-netlib/SRC/sgeqr.f
@@ -1,3 +1,4 @@
+*> \brief \b SGEQR
 *
 *  Definition:
 *  ===========
@@ -17,7 +18,18 @@
 *  =============
 *>
 *> \verbatim
-*> SGEQR computes a QR factorization of an M-by-N matrix A.
+*>
+*> SGEQR computes a QR factorization of a real M-by-N matrix A:
+*>
+*>    A = Q * ( R ),
+*>            ( 0 )
+*>
+*> where:
+*>
+*>    Q is a M-by-M orthogonal matrix;
+*>    R is an upper-triangular N-by-N matrix;
+*>    0 is a (M-N)-by-N zero matrix, if M > N.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -138,7 +150,7 @@
 *> \verbatim
 *>
 *> These details are particular for this LAPACK implementation. Users should not 
-*> take them for granted. These details may change in the future, and are unlikely not
+*> take them for granted. These details may change in the future, and are not likely
 *> true for another LAPACK implementation. These details are relevant if one wants
 *> to try to understand the code. They are not part of the interface.
 *>
@@ -160,10 +172,10 @@
       SUBROUTINE SGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK,
      $                  INFO )
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N, TSIZE, LWORK
diff --git a/lapack-netlib/SRC/sgeqr2.f b/lapack-netlib/SRC/sgeqr2.f
index 3b990f825..0a1ff304f 100644
--- a/lapack-netlib/SRC/sgeqr2.f
+++ b/lapack-netlib/SRC/sgeqr2.f
@@ -33,8 +33,17 @@
 *>
 *> \verbatim
 *>
-*> SGEQR2 computes a QR factorization of a real m by n matrix A:
-*> A = Q * R.
+*> SGEQR2 computes a QR factorization of a real m-by-n matrix A:
+*>
+*>    A = Q * ( R ),
+*>            ( 0 )
+*>
+*> where:
+*>
+*>    Q is a m-by-m orthogonal matrix;
+*>    R is an upper-triangular n-by-n matrix;
+*>    0 is a (m-n)-by-n zero matrix, if m > n.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -96,7 +105,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
-*> \date December 2016
+*> \date November 2019
 *
 *> \ingroup realGEcomputational
 *
@@ -121,10 +130,10 @@
 *  =====================================================================
       SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N
diff --git a/lapack-netlib/SRC/sgeqr2p.f b/lapack-netlib/SRC/sgeqr2p.f
index f48af9d2d..08d124797 100644
--- a/lapack-netlib/SRC/sgeqr2p.f
+++ b/lapack-netlib/SRC/sgeqr2p.f
@@ -33,8 +33,18 @@
 *>
 *> \verbatim
 *>
-*> SGEQR2P computes a QR factorization of a real m by n matrix A:
-*> A = Q * R. The diagonal entries of R are nonnegative.
+*> SGEQR2P computes a QR factorization of a real m-by-n matrix A:
+*>
+*>    A = Q * ( R ),
+*>            ( 0 )
+*>
+*> where:
+*>
+*>    Q is a m-by-m orthogonal matrix;
+*>    R is an upper-triangular n-by-n matrix with nonnegative diagonal
+*>    entries;
+*>    0 is a (m-n)-by-n zero matrix, if m > n.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -97,7 +107,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
-*> \date December 2016
+*> \date November 2019
 *
 *> \ingroup realGEcomputational
 *
@@ -124,10 +134,10 @@
 *  =====================================================================
       SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, M, N
diff --git a/lapack-netlib/SRC/sgeqrf.f b/lapack-netlib/SRC/sgeqrf.f
index 0f79c2ca5..7df495e04 100644
--- a/lapack-netlib/SRC/sgeqrf.f
+++ b/lapack-netlib/SRC/sgeqrf.f
@@ -34,7 +34,16 @@
 *> \verbatim
 *>
 *> SGEQRF computes a QR factorization of a real M-by-N matrix A:
-*> A = Q * R.
+*>
+*>    A = Q * ( R ),
+*>            ( 0 )
+*>
+*> where:
+*>
+*>    Q is a M-by-M orthogonal matrix;
+*>    R is an upper-triangular N-by-N matrix;
+*>    0 is a (M-N)-by-N zero matrix, if M > N.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -111,7 +120,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
-*> \date December 2016
+*> \date November 2019
 *
 *> \ingroup realGEcomputational
 *
@@ -136,10 +145,10 @@
 *  =====================================================================
       SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, LWORK, M, N
diff --git a/lapack-netlib/SRC/sgeqrfp.f b/lapack-netlib/SRC/sgeqrfp.f
index 654c0a13a..7f6741570 100644
--- a/lapack-netlib/SRC/sgeqrfp.f
+++ b/lapack-netlib/SRC/sgeqrfp.f
@@ -33,8 +33,18 @@
 *>
 *> \verbatim
 *>
-*> SGEQRFP computes a QR factorization of a real M-by-N matrix A:
-*> A = Q * R. The diagonal entries of R are nonnegative.
+*> SGEQR2P computes a QR factorization of a real M-by-N matrix A:
+*>
+*>    A = Q * ( R ),
+*>            ( 0 )
+*>
+*> where:
+*>
+*>    Q is a M-by-M orthogonal matrix;
+*>    R is an upper-triangular N-by-N matrix with nonnegative diagonal
+*>    entries;
+*>    0 is a (M-N)-by-N zero matrix, if M > N.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -112,7 +122,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
-*> \date December 2016
+*> \date November 2019
 *
 *> \ingroup realGEcomputational
 *
@@ -139,10 +149,10 @@
 *  =====================================================================
       SUBROUTINE SGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER            INFO, LDA, LWORK, M, N
diff --git a/lapack-netlib/SRC/sgerfsx.f b/lapack-netlib/SRC/sgerfsx.f
index 3f518899e..b1a1eb13d 100644
--- a/lapack-netlib/SRC/sgerfsx.f
+++ b/lapack-netlib/SRC/sgerfsx.f
@@ -283,7 +283,7 @@
 *>     information as described below. There currently are up to three
 *>     pieces of information returned for each right-hand side. If
 *>     componentwise accuracy is not requested (PARAMS(3) = 0.0), then
-*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most
+*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most
 *>     the first (:,N_ERR_BNDS) entries are returned.
 *>
 *>     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith
@@ -319,14 +319,14 @@
 *> \param[in] NPARAMS
 *> \verbatim
 *>          NPARAMS is INTEGER
-*>     Specifies the number of parameters set in PARAMS.  If .LE. 0, the
+*>     Specifies the number of parameters set in PARAMS.  If <= 0, the
 *>     PARAMS array is never referenced and default values are used.
 *> \endverbatim
 *>
 *> \param[in,out] PARAMS
 *> \verbatim
 *>          PARAMS is REAL array, dimension NPARAMS
-*>     Specifies algorithm parameters.  If an entry is .LT. 0.0, then
+*>     Specifies algorithm parameters.  If an entry is < 0.0, then
 *>     that entry will be filled with default value used for that
 *>     parameter.  Only positions up to NPARAMS are accessed; defaults
 *>     are used for higher-numbered parameters.
@@ -334,9 +334,9 @@
 *>       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative
 *>            refinement or not.
 *>         Default: 1.0
-*>            = 0.0 : No refinement is performed, and no error bounds are
+*>            = 0.0:  No refinement is performed, and no error bounds are
 *>                    computed.
-*>            = 1.0 : Use the double-precision refinement algorithm,
+*>            = 1.0:  Use the double-precision refinement algorithm,
 *>                    possibly with doubled-single computations if the
 *>                    compilation environment does not support DOUBLE
 *>                    PRECISION.
diff --git a/lapack-netlib/SRC/sgesc2.f b/lapack-netlib/SRC/sgesc2.f
index c78daa334..3a6f34584 100644
--- a/lapack-netlib/SRC/sgesc2.f
+++ b/lapack-netlib/SRC/sgesc2.f
@@ -90,7 +90,7 @@
 *> \verbatim
 *>          SCALE is REAL
 *>           On exit, SCALE contains the scale factor. SCALE is chosen
-*>           0 <= SCALE <= 1 to prevent owerflow in the solution.
+*>           0 <= SCALE <= 1 to prevent overflow in the solution.
 *> \endverbatim
 *
 *  Authors:
@@ -151,7 +151,7 @@
 *     ..
 *     .. Executable Statements ..
 *
-*      Set constant to control owerflow
+*      Set constant to control overflow
 *
       EPS = SLAMCH( 'P' )
       SMLNUM = SLAMCH( 'S' ) / EPS
diff --git a/lapack-netlib/SRC/sgesdd.f b/lapack-netlib/SRC/sgesdd.f
index 0ba2a78c7..689494dd1 100644
--- a/lapack-netlib/SRC/sgesdd.f
+++ b/lapack-netlib/SRC/sgesdd.f
@@ -322,7 +322,7 @@
 *
             IF( WNTQN ) THEN
 *              sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
-*              keep 7*N for backwards compatability.
+*              keep 7*N for backwards compatibility.
                BDSPAC = 7*N
             ELSE
                BDSPAC = 3*N*N + 4*N
@@ -448,7 +448,7 @@
 *
             IF( WNTQN ) THEN
 *              sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
-*              keep 7*N for backwards compatability.
+*              keep 7*N for backwards compatibility.
                BDSPAC = 7*M
             ELSE
                BDSPAC = 3*M*M + 4*M
diff --git a/lapack-netlib/SRC/sgesvdq.f b/lapack-netlib/SRC/sgesvdq.f
new file mode 100644
index 000000000..73e34862f
--- /dev/null
+++ b/lapack-netlib/SRC/sgesvdq.f
@@ -0,0 +1,1388 @@
+*> \brief <b> SGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download SGESVDQ + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvdq.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvdq.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvdq.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*      SUBROUTINE SGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA,
+*                          S, U, LDU, V, LDV, NUMRANK, IWORK, LIWORK,
+*                          WORK, LWORK, RWORK, LRWORK, INFO )
+*
+*     .. Scalar Arguments ..
+*      IMPLICIT    NONE
+*      CHARACTER   JOBA, JOBP, JOBR, JOBU, JOBV
+*      INTEGER     M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LWORK, LRWORK,
+*                  INFO
+*     ..
+*     .. Array Arguments ..
+*      REAL        A( LDA, * ), U( LDU, * ), V( LDV, * ), WORK( * )
+*      REAL        S( * ), RWORK( * )
+*      INTEGER     IWORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> SGESVDQ computes the singular value decomposition (SVD) of a real
+*> M-by-N matrix A, where M >= N. The SVD of A is written as
+*>                                    [++]   [xx]   [x0]   [xx]
+*>              A = U * SIGMA * V^*,  [++] = [xx] * [ox] * [xx]
+*>                                    [++]   [xx]
+*> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
+*> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements
+*> of SIGMA are the singular values of A. The columns of U and V are the
+*> left and the right singular vectors of A, respectively.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] JOBA
+*> \verbatim
+*>  JOBA is CHARACTER*1
+*>  Specifies the level of accuracy in the computed SVD
+*>  = 'A' The requested accuracy corresponds to having the backward
+*>        error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F,
+*>        where EPS = SLAMCH('Epsilon'). This authorises CGESVDQ to
+*>        truncate the computed triangular factor in a rank revealing
+*>        QR factorization whenever the truncated part is below the
+*>        threshold of the order of EPS * ||A||_F. This is aggressive
+*>        truncation level.
+*>  = 'M' Similarly as with 'A', but the truncation is more gentle: it
+*>        is allowed only when there is a drop on the diagonal of the
+*>        triangular factor in the QR factorization. This is medium
+*>        truncation level.
+*>  = 'H' High accuracy requested. No numerical rank determination based
+*>        on the rank revealing QR factorization is attempted.
+*>  = 'E' Same as 'H', and in addition the condition number of column
+*>        scaled A is estimated and returned in  RWORK(1).
+*>        N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1)
+*> \endverbatim
+*>
+*> \param[in] JOBP
+*> \verbatim
+*>  JOBP is CHARACTER*1
+*>  = 'P' The rows of A are ordered in decreasing order with respect to
+*>        ||A(i,:)||_\infty. This enhances numerical accuracy at the cost
+*>        of extra data movement. Recommended for numerical robustness.
+*>  = 'N' No row pivoting.
+*> \endverbatim
+*>
+*> \param[in] JOBR
+*> \verbatim
+*>          JOBR is CHARACTER*1
+*>          = 'T' After the initial pivoted QR factorization, SGESVD is applied to
+*>          the transposed R**T of the computed triangular factor R. This involves
+*>          some extra data movement (matrix transpositions). Useful for
+*>          experiments, research and development.
+*>          = 'N' The triangular factor R is given as input to SGESVD. This may be
+*>          preferred as it involves less data movement.
+*> \endverbatim
+*>
+*> \param[in] JOBU
+*> \verbatim
+*>          JOBU is CHARACTER*1
+*>          = 'A' All M left singular vectors are computed and returned in the
+*>          matrix U. See the description of U.
+*>          = 'S' or 'U' N = min(M,N) left singular vectors are computed and returned
+*>          in the matrix U. See the description of U.
+*>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular
+*>          vectors are computed and returned in the matrix U.
+*>          = 'F' The N left singular vectors are returned in factored form as the
+*>          product of the Q factor from the initial QR factorization and the
+*>          N left singular vectors of (R**T , 0)**T. If row pivoting is used,
+*>          then the necessary information on the row pivoting is stored in
+*>          IWORK(N+1:N+M-1).
+*>          = 'N' The left singular vectors are not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBV
+*> \verbatim
+*>          JOBV is CHARACTER*1
+*>          = 'A', 'V' All N right singular vectors are computed and returned in
+*>          the matrix V.
+*>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular
+*>          vectors are computed and returned in the matrix V. This option is
+*>          allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal.
+*>          = 'N' The right singular vectors are not computed.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the input matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the input matrix A.  M >= N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is REAL array of dimensions LDA x N
+*>          On entry, the input matrix A.
+*>          On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains
+*>          the Householder vectors as stored by SGEQP3. If JOBU = 'F', these Householder
+*>          vectors together with WORK(1:N) can be used to restore the Q factors from
+*>          the initial pivoted QR factorization of A. See the description of U.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER.
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is REAL array of dimension N.
+*>          The singular values of A, ordered so that S(i) >= S(i+1).
+*> \endverbatim
+*>
+*> \param[out] U
+*> \verbatim
+*>          U is REAL array, dimension
+*>          LDU x M if JOBU = 'A'; see the description of LDU. In this case,
+*>          on exit, U contains the M left singular vectors.
+*>          LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this
+*>          case, U contains the leading N or the leading NUMRANK left singular vectors.
+*>          LDU x N if JOBU = 'F' ; see the description of LDU. In this case U
+*>          contains N x N orthogonal matrix that can be used to form the left
+*>          singular vectors.
+*>          If JOBU = 'N', U is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER.
+*>          The leading dimension of the array U.
+*>          If JOBU = 'A', 'S', 'U', 'R',  LDU >= max(1,M).
+*>          If JOBU = 'F',                 LDU >= max(1,N).
+*>          Otherwise,                     LDU >= 1.
+*> \endverbatim
+*>
+*> \param[out] V
+*> \verbatim
+*>          V is REAL array, dimension
+*>          LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' .
+*>          If JOBV = 'A', or 'V',  V contains the N-by-N orthogonal matrix  V**T;
+*>          If JOBV = 'R', V contains the first NUMRANK rows of V**T (the right
+*>          singular vectors, stored rowwise, of the NUMRANK largest singular values).
+*>          If JOBV = 'N' and JOBA = 'E', V is used as a workspace.
+*>          If JOBV = 'N', and JOBA.NE.'E', V is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*>          LDV is INTEGER
+*>          The leading dimension of the array V.
+*>          If JOBV = 'A', 'V', 'R',  or JOBA = 'E', LDV >= max(1,N).
+*>          Otherwise,                               LDV >= 1.
+*> \endverbatim
+*>
+*> \param[out] NUMRANK
+*> \verbatim
+*>          NUMRANK is INTEGER
+*>          NUMRANK is the numerical rank first determined after the rank
+*>          revealing QR factorization, following the strategy specified by the
+*>          value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK
+*>          leading singular values and vectors are then requested in the call
+*>          of SGESVD. The final value of NUMRANK might be further reduced if
+*>          some singular values are computed as zeros.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (max(1, LIWORK)).
+*>          On exit, IWORK(1:N) contains column pivoting permutation of the
+*>          rank revealing QR factorization.
+*>          If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence
+*>          of row swaps used in row pivoting. These can be used to restore the
+*>          left singular vectors in the case JOBU = 'F'.
+*>
+*>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
+*>          LIWORK(1) returns the minimal LIWORK.
+*> \endverbatim
+*>
+*> \param[in] LIWORK
+*> \verbatim
+*>          LIWORK is INTEGER
+*>          The dimension of the array IWORK.
+*>          LIWORK >= N + M - 1,     if JOBP = 'P' and JOBA .NE. 'E';
+*>          LIWORK >= N              if JOBP = 'N' and JOBA .NE. 'E';
+*>          LIWORK >= N + M - 1 + N, if JOBP = 'P' and JOBA = 'E';
+*>          LIWORK >= N + N          if JOBP = 'N' and JOBA = 'E'.
+*
+*>          If LIWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates and returns the optimal and minimal sizes
+*>          for the WORK, IWORK, and RWORK arrays, and no error
+*>          message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is REAL array, dimension (max(2, LWORK)), used as a workspace.
+*>          On exit, if, on entry, LWORK.NE.-1, WORK(1:N) contains parameters
+*>          needed to recover the Q factor from the QR factorization computed by
+*>          SGEQP3.
+*>
+*>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
+*>          WORK(1) returns the optimal LWORK, and
+*>          WORK(2) returns the minimal LWORK.
+*> \endverbatim
+*>
+*> \param[in,out] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK. It is determined as follows:
+*>          Let  LWQP3 = 3*N+1,  LWCON = 3*N, and let
+*>          LWORQ = { MAX( N, 1 ),  if JOBU = 'R', 'S', or 'U'
+*>                  { MAX( M, 1 ),  if JOBU = 'A'
+*>          LWSVD = MAX( 5*N, 1 )
+*>          LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 5*(N/2), 1 ), LWORLQ = MAX( N, 1 ),
+*>          LWQRF = MAX( N/2, 1 ), LWORQ2 = MAX( N, 1 )
+*>          Then the minimal value of LWORK is:
+*>          = MAX( N + LWQP3, LWSVD )        if only the singular values are needed;
+*>          = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed,
+*>                                   and a scaled condition estimate requested;
+*>
+*>          = N + MAX( LWQP3, LWSVD, LWORQ ) if the singular values and the left
+*>                                   singular vectors are requested;
+*>          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the singular values and the left
+*>                                   singular vectors are requested, and also
+*>                                   a scaled condition estimate requested;
+*>
+*>          = N + MAX( LWQP3, LWSVD )        if the singular values and the right
+*>                                   singular vectors are requested;
+*>          = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right
+*>                                   singular vectors are requested, and also
+*>                                   a scaled condition etimate requested;
+*>
+*>          = N + MAX( LWQP3, LWSVD, LWORQ ) if the full SVD is requested with JOBV = 'R';
+*>                                   independent of JOBR;
+*>          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the full SVD is requested,
+*>                                   JOBV = 'R' and, also a scaled condition
+*>                                   estimate requested; independent of JOBR;
+*>          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ),
+*>         N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ) ) if the
+*>                         full SVD is requested with JOBV = 'A' or 'V', and
+*>                         JOBR ='N'
+*>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ),
+*>         N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ ) )
+*>                         if the full SVD is requested with JOBV = 'A' or 'V', and
+*>                         JOBR ='N', and also a scaled condition number estimate
+*>                         requested.
+*>          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ),
+*>         N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) if the
+*>                         full SVD is requested with JOBV = 'A', 'V', and JOBR ='T'
+*>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ),
+*>         N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) )
+*>                         if the full SVD is requested with JOBV = 'A' or 'V', and
+*>                         JOBR ='T', and also a scaled condition number estimate
+*>                         requested.
+*>          Finally, LWORK must be at least two: LWORK = MAX( 2, LWORK ).
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates and returns the optimal and minimal sizes
+*>          for the WORK, IWORK, and RWORK arrays, and no error
+*>          message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] RWORK
+*> \verbatim
+*>          RWORK is REAL array, dimension (max(1, LRWORK)).
+*>          On exit,
+*>          1. If JOBA = 'E', RWORK(1) contains an estimate of the condition
+*>          number of column scaled A. If A = C * D where D is diagonal and C
+*>          has unit columns in the Euclidean norm, then, assuming full column rank,
+*>          N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1).
+*>          Otherwise, RWORK(1) = -1.
+*>          2. RWORK(2) contains the number of singular values computed as
+*>          exact zeros in SGESVD applied to the upper triangular or trapeziodal
+*>          R (from the initial QR factorization). In case of early exit (no call to
+*>          SGESVD, such as in the case of zero matrix) RWORK(2) = -1.
+*>
+*>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
+*>          RWORK(1) returns the minimal LRWORK.
+*> \endverbatim
+*>
+*> \param[in] LRWORK
+*> \verbatim
+*>          LRWORK is INTEGER.
+*>          The dimension of the array RWORK.
+*>          If JOBP ='P', then LRWORK >= MAX(2, M).
+*>          Otherwise, LRWORK >= 2
+*
+*>          If LRWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates and returns the optimal and minimal sizes
+*>          for the WORK, IWORK, and RWORK arrays, and no error
+*>          message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  if SBDSQR did not converge, INFO specifies how many superdiagonals
+*>          of an intermediate bidiagonal form B (computed in SGESVD) did not
+*>          converge to zero.
+*> \endverbatim
+*
+*> \par Further Details:
+*  ========================
+*>
+*> \verbatim
+*>
+*>   1. The data movement (matrix transpose) is coded using simple nested
+*>   DO-loops because BLAS and LAPACK do not provide corresponding subroutines.
+*>   Those DO-loops are easily identified in this source code - by the CONTINUE
+*>   statements labeled with 11**. In an optimized version of this code, the
+*>   nested DO loops should be replaced with calls to an optimized subroutine.
+*>   2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause
+*>   column norm overflow. This is the minial precaution and it is left to the
+*>   SVD routine (CGESVD) to do its own preemptive scaling if potential over-
+*>   or underflows are detected. To avoid repeated scanning of the array A,
+*>   an optimal implementation would do all necessary scaling before calling
+*>   CGESVD and the scaling in CGESVD can be switched off.
+*>   3. Other comments related to code optimization are given in comments in the
+*>   code, enlosed in [[double brackets]].
+*> \endverbatim
+*
+*> \par Bugs, examples and comments
+*  ===========================
+*
+*> \verbatim
+*>  Please report all bugs and send interesting examples and/or comments to
+*>  drmac@math.hr. Thank you.
+*> \endverbatim
+*
+*> \par References
+*  ===============
+*
+*> \verbatim
+*>  [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for
+*>      Computing the SVD with High Accuracy. ACM Trans. Math. Softw.
+*>      44(1): 11:1-11:30 (2017)
+*>
+*>  SIGMA library, xGESVDQ section updated February 2016.
+*>  Developed and coded by Zlatko Drmac, Department of Mathematics
+*>  University of Zagreb, Croatia, drmac@math.hr
+*> \endverbatim
+*
+*
+*> \par Contributors:
+*  ==================
+*>
+*> \verbatim
+*> Developed and coded by Zlatko Drmac, Department of Mathematics
+*>  University of Zagreb, Croatia, drmac@math.hr
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2018
+*
+*> \ingroup realGEsing
+*
+*  =====================================================================
+      SUBROUTINE SGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA,
+     $                    S, U, LDU, V, LDV, NUMRANK, IWORK, LIWORK,
+     $                    WORK, LWORK, RWORK, LRWORK, INFO )
+*     .. Scalar Arguments ..
+      IMPLICIT    NONE
+      CHARACTER   JOBA, JOBP, JOBR, JOBU, JOBV
+      INTEGER     M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LWORK, LRWORK,
+     $            INFO
+*     ..
+*     .. Array Arguments ..
+      REAL        A( LDA, * ), U( LDU, * ), V( LDV, * ), WORK( * )
+      REAL        S( * ), RWORK( * )
+      INTEGER     IWORK( * )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL        ZERO,         ONE
+      PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER     IERR, IWOFF, NR, N1, OPTRATIO, p, q
+      INTEGER     LWCON, LWQP3, LWRK_SGELQF, LWRK_SGESVD, LWRK_SGESVD2,
+     $            LWRK_SGEQP3,  LWRK_SGEQRF, LWRK_SORMLQ, LWRK_SORMQR,
+     $            LWRK_SORMQR2, LWLQF, LWQRF, LWSVD, LWSVD2, LWORQ,
+     $            LWORQ2, LWUNLQ, MINWRK, MINWRK2, OPTWRK, OPTWRK2,
+     $            IMINWRK, RMINWRK
+      LOGICAL     ACCLA,  ACCLM, ACCLH, ASCALED, CONDA, DNTWU,  DNTWV,
+     $            LQUERY, LSVC0, LSVEC, ROWPRM,  RSVEC, RTRANS, WNTUA,
+     $            WNTUF,  WNTUR, WNTUS, WNTVA,   WNTVR
+      REAL        BIG, EPSLN, RTMP, SCONDA, SFMIN
+*     ..
+*     .. Local Arrays
+      REAL        RDUMMY(1)
+*     ..
+*     .. External Subroutines (BLAS, LAPACK)
+      EXTERNAL    SGELQF, SGEQP3, SGEQRF, SGESVD, SLACPY, SLAPMT,
+     $            SLASCL, SLASET, SLASWP, SSCAL,  SPOCON, SORMLQ,
+     $            SORMQR, XERBLA
+*     ..
+*     .. External Functions (BLAS, LAPACK)
+      LOGICAL    LSAME
+      INTEGER    ISAMAX
+      REAL        SLANGE, SNRM2, SLAMCH
+      EXTERNAL    SLANGE, LSAME, ISAMAX, SNRM2, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC   ABS, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      WNTUS  = LSAME( JOBU, 'S' ) .OR. LSAME( JOBU, 'U' )
+      WNTUR  = LSAME( JOBU, 'R' )
+      WNTUA  = LSAME( JOBU, 'A' )
+      WNTUF  = LSAME( JOBU, 'F' )
+      LSVC0  = WNTUS .OR. WNTUR .OR. WNTUA
+      LSVEC  = LSVC0 .OR. WNTUF
+      DNTWU  = LSAME( JOBU, 'N' )
+*
+      WNTVR  = LSAME( JOBV, 'R' )
+      WNTVA  = LSAME( JOBV, 'A' ) .OR. LSAME( JOBV, 'V' )
+      RSVEC  = WNTVR .OR. WNTVA
+      DNTWV  = LSAME( JOBV, 'N' )
+*
+      ACCLA  = LSAME( JOBA, 'A' )
+      ACCLM  = LSAME( JOBA, 'M' )
+      CONDA  = LSAME( JOBA, 'E' )
+      ACCLH  = LSAME( JOBA, 'H' ) .OR. CONDA
+*
+      ROWPRM = LSAME( JOBP, 'P' )
+      RTRANS = LSAME( JOBR, 'T' )
+*
+      IF ( ROWPRM ) THEN
+         IF ( CONDA ) THEN
+            IMINWRK = MAX( 1, N + M - 1 + N )
+         ELSE
+            IMINWRK = MAX( 1, N + M - 1 )
+         END IF
+         RMINWRK = MAX( 2, M )
+      ELSE
+         IF ( CONDA ) THEN
+            IMINWRK = MAX( 1, N + N )
+         ELSE
+            IMINWRK = MAX( 1, N )
+         END IF
+         RMINWRK = 2
+      END IF
+      LQUERY = (LIWORK .EQ. -1 .OR. LWORK .EQ. -1 .OR. LRWORK .EQ. -1)
+      INFO  = 0
+      IF ( .NOT. ( ACCLA .OR. ACCLM .OR. ACCLH ) ) THEN
+         INFO = -1
+      ELSE IF ( .NOT.( ROWPRM .OR. LSAME( JOBP, 'N' ) ) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT.( RTRANS .OR. LSAME( JOBR, 'N' ) ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT.( LSVEC .OR. DNTWU ) ) THEN
+         INFO = -4
+      ELSE IF ( WNTUR .AND. WNTVA ) THEN
+         INFO = -5
+      ELSE IF ( .NOT.( RSVEC .OR. DNTWV )) THEN
+         INFO = -5
+      ELSE IF ( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF ( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN
+         INFO = -7
+      ELSE IF ( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      ELSE IF ( LDU.LT.1 .OR. ( LSVC0 .AND. LDU.LT.M ) .OR.
+     $       ( WNTUF .AND. LDU.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF ( LDV.LT.1 .OR. ( RSVEC .AND. LDV.LT.N ) .OR.
+     $          ( CONDA .AND. LDV.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF ( LIWORK .LT. IMINWRK .AND. .NOT. LQUERY ) THEN
+         INFO = -17
+      END IF
+*
+*
+      IF ( INFO .EQ. 0 ) THEN
+*        .. compute the minimal and the optimal workspace lengths
+*        [[The expressions for computing the minimal and the optimal
+*        values of LWORK are written with a lot of redundancy and
+*        can be simplified. However, this detailed form is easier for
+*        maintenance and modifications of the code.]]
+*
+*        .. minimal workspace length for SGEQP3 of an M x N matrix
+         LWQP3 = 3 * N + 1
+*        .. minimal workspace length for SORMQR to build left singular vectors
+         IF ( WNTUS .OR. WNTUR ) THEN
+             LWORQ  = MAX( N  , 1 )
+         ELSE IF ( WNTUA ) THEN
+             LWORQ = MAX( M , 1 )
+         END IF
+*        .. minimal workspace length for SPOCON of an N x N matrix
+         LWCON = 3 * N
+*        .. SGESVD of an N x N matrix
+         LWSVD = MAX( 5 * N, 1 )
+         IF ( LQUERY ) THEN
+             CALL SGEQP3( M, N, A, LDA, IWORK, RDUMMY, RDUMMY, -1,
+     $           IERR )
+             LWRK_SGEQP3 = INT( RDUMMY(1) )
+             IF ( WNTUS .OR. WNTUR ) THEN
+                 CALL SORMQR( 'L', 'N', M, N, N, A, LDA, RDUMMY, U,
+     $                LDU, RDUMMY, -1, IERR )
+                 LWRK_SORMQR = INT( RDUMMY(1) )
+             ELSE IF ( WNTUA ) THEN
+                 CALL SORMQR( 'L', 'N', M, M, N, A, LDA, RDUMMY, U,
+     $                LDU, RDUMMY, -1, IERR )
+                 LWRK_SORMQR = INT( RDUMMY(1) )
+             ELSE
+                 LWRK_SORMQR = 0
+             END IF
+         END IF
+         MINWRK = 2
+         OPTWRK = 2
+         IF ( .NOT. (LSVEC .OR. RSVEC )) THEN
+*            .. minimal and optimal sizes of the workspace if
+*            only the singular values are requested
+             IF ( CONDA ) THEN
+                MINWRK = MAX( N+LWQP3, LWCON, LWSVD )
+             ELSE
+                MINWRK = MAX( N+LWQP3, LWSVD )
+             END IF
+             IF ( LQUERY ) THEN
+                 CALL SGESVD( 'N', 'N', N, N, A, LDA, S, U, LDU,
+     $                V, LDV, RDUMMY, -1, IERR )
+                 LWRK_SGESVD = INT( RDUMMY(1) )
+                 IF ( CONDA ) THEN
+                    OPTWRK = MAX( N+LWRK_SGEQP3, N+LWCON, LWRK_SGESVD )
+                 ELSE
+                    OPTWRK = MAX( N+LWRK_SGEQP3, LWRK_SGESVD )
+                 END IF
+             END IF
+         ELSE IF ( LSVEC .AND. (.NOT.RSVEC) ) THEN
+*            .. minimal and optimal sizes of the workspace if the
+*            singular values and the left singular vectors are requested
+             IF ( CONDA ) THEN
+                 MINWRK = N + MAX( LWQP3, LWCON, LWSVD, LWORQ )
+             ELSE
+                 MINWRK = N + MAX( LWQP3, LWSVD, LWORQ )
+             END IF
+             IF ( LQUERY ) THEN
+                IF ( RTRANS ) THEN
+                   CALL SGESVD( 'N', 'O', N, N, A, LDA, S, U, LDU,
+     $                  V, LDV, RDUMMY, -1, IERR )
+                ELSE
+                   CALL SGESVD( 'O', 'N', N, N, A, LDA, S, U, LDU,
+     $                  V, LDV, RDUMMY, -1, IERR )
+                END IF
+                LWRK_SGESVD = INT( RDUMMY(1) )
+                IF ( CONDA ) THEN
+                    OPTWRK = N + MAX( LWRK_SGEQP3, LWCON, LWRK_SGESVD,
+     $                               LWRK_SORMQR )
+                ELSE
+                    OPTWRK = N + MAX( LWRK_SGEQP3, LWRK_SGESVD,
+     $                               LWRK_SORMQR )
+                END IF
+             END IF
+         ELSE IF ( RSVEC .AND. (.NOT.LSVEC) ) THEN
+*            .. minimal and optimal sizes of the workspace if the
+*            singular values and the right singular vectors are requested
+             IF ( CONDA ) THEN
+                 MINWRK = N + MAX( LWQP3, LWCON, LWSVD )
+             ELSE
+                 MINWRK = N + MAX( LWQP3, LWSVD )
+             END IF
+             IF ( LQUERY ) THEN
+                 IF ( RTRANS ) THEN
+                     CALL SGESVD( 'O', 'N', N, N, A, LDA, S, U, LDU,
+     $                    V, LDV, RDUMMY, -1, IERR )
+                 ELSE
+                     CALL SGESVD( 'N', 'O', N, N, A, LDA, S, U, LDU,
+     $                    V, LDV, RDUMMY, -1, IERR )
+                 END IF
+                 LWRK_SGESVD = INT( RDUMMY(1) )
+                 IF ( CONDA ) THEN
+                     OPTWRK = N + MAX( LWRK_SGEQP3, LWCON, LWRK_SGESVD )
+                 ELSE
+                     OPTWRK = N + MAX( LWRK_SGEQP3, LWRK_SGESVD )
+                 END IF
+             END IF
+         ELSE
+*            .. minimal and optimal sizes of the workspace if the
+*            full SVD is requested
+             IF ( RTRANS ) THEN
+                 MINWRK = MAX( LWQP3, LWSVD, LWORQ )
+                 IF ( CONDA ) MINWRK = MAX( MINWRK, LWCON )
+                 MINWRK = MINWRK + N
+                 IF ( WNTVA ) THEN
+*                   .. minimal workspace length for N x N/2 SGEQRF
+                    LWQRF  = MAX( N/2, 1 )
+*                   .. minimal workspace lengt for N/2 x N/2 SGESVD
+                    LWSVD2 = MAX( 5 * (N/2), 1 )
+                    LWORQ2 = MAX( N, 1 )
+                    MINWRK2 = MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2,
+     $                        N/2+LWORQ2, LWORQ )
+                    IF ( CONDA ) MINWRK2 = MAX( MINWRK2, LWCON )
+                    MINWRK2 = N + MINWRK2
+                    MINWRK = MAX( MINWRK, MINWRK2 )
+                 END IF
+             ELSE
+                 MINWRK = MAX( LWQP3, LWSVD, LWORQ )
+                 IF ( CONDA ) MINWRK = MAX( MINWRK, LWCON )
+                 MINWRK = MINWRK + N
+                 IF ( WNTVA ) THEN
+*                   .. minimal workspace length for N/2 x N SGELQF
+                    LWLQF  = MAX( N/2, 1 )
+                    LWSVD2 = MAX( 5 * (N/2), 1 )
+                    LWUNLQ = MAX( N , 1 )
+                    MINWRK2 = MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2,
+     $                        N/2+LWUNLQ, LWORQ )
+                    IF ( CONDA ) MINWRK2 = MAX( MINWRK2, LWCON )
+                    MINWRK2 = N + MINWRK2
+                    MINWRK = MAX( MINWRK, MINWRK2 )
+                 END IF
+             END IF
+             IF ( LQUERY ) THEN
+                IF ( RTRANS ) THEN
+                   CALL SGESVD( 'O', 'A', N, N, A, LDA, S, U, LDU,
+     $                  V, LDV, RDUMMY, -1, IERR )
+                   LWRK_SGESVD = INT( RDUMMY(1) )
+                   OPTWRK = MAX(LWRK_SGEQP3,LWRK_SGESVD,LWRK_SORMQR)
+                   IF ( CONDA ) OPTWRK = MAX( OPTWRK, LWCON )
+                   OPTWRK = N + OPTWRK
+                   IF ( WNTVA ) THEN
+                       CALL SGEQRF(N,N/2,U,LDU,RDUMMY,RDUMMY,-1,IERR)
+                       LWRK_SGEQRF = INT( RDUMMY(1) )
+                       CALL SGESVD( 'S', 'O', N/2,N/2, V,LDV, S, U,LDU,
+     $                      V, LDV, RDUMMY, -1, IERR )
+                       LWRK_SGESVD2 = INT( RDUMMY(1) )
+                       CALL SORMQR( 'R', 'C', N, N, N/2, U, LDU, RDUMMY,
+     $                      V, LDV, RDUMMY, -1, IERR )
+                       LWRK_SORMQR2 = INT( RDUMMY(1) )
+                       OPTWRK2 = MAX( LWRK_SGEQP3, N/2+LWRK_SGEQRF,
+     $                           N/2+LWRK_SGESVD2, N/2+LWRK_SORMQR2 )
+                       IF ( CONDA ) OPTWRK2 = MAX( OPTWRK2, LWCON )
+                       OPTWRK2 = N + OPTWRK2
+                       OPTWRK = MAX( OPTWRK, OPTWRK2 )
+                   END IF
+                ELSE
+                   CALL SGESVD( 'S', 'O', N, N, A, LDA, S, U, LDU,
+     $                  V, LDV, RDUMMY, -1, IERR )
+                   LWRK_SGESVD = INT( RDUMMY(1) )
+                   OPTWRK = MAX(LWRK_SGEQP3,LWRK_SGESVD,LWRK_SORMQR)
+                   IF ( CONDA ) OPTWRK = MAX( OPTWRK, LWCON )
+                   OPTWRK = N + OPTWRK
+                   IF ( WNTVA ) THEN
+                      CALL SGELQF(N/2,N,U,LDU,RDUMMY,RDUMMY,-1,IERR)
+                      LWRK_SGELQF = INT( RDUMMY(1) )
+                      CALL SGESVD( 'S','O', N/2,N/2, V, LDV, S, U, LDU,
+     $                     V, LDV, RDUMMY, -1, IERR )
+                      LWRK_SGESVD2 = INT( RDUMMY(1) )
+                      CALL SORMLQ( 'R', 'N', N, N, N/2, U, LDU, RDUMMY,
+     $                     V, LDV, RDUMMY,-1,IERR )
+                      LWRK_SORMLQ = INT( RDUMMY(1) )
+                      OPTWRK2 = MAX( LWRK_SGEQP3, N/2+LWRK_SGELQF,
+     $                           N/2+LWRK_SGESVD2, N/2+LWRK_SORMLQ )
+                       IF ( CONDA ) OPTWRK2 = MAX( OPTWRK2, LWCON )
+                       OPTWRK2 = N + OPTWRK2
+                       OPTWRK = MAX( OPTWRK, OPTWRK2 )
+                   END IF
+                END IF
+             END IF
+         END IF
+*
+         MINWRK = MAX( 2, MINWRK )
+         OPTWRK = MAX( 2, OPTWRK )
+         IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = -19
+*
+      END IF
+*
+      IF (INFO .EQ. 0 .AND. LRWORK .LT. RMINWRK .AND. .NOT. LQUERY) THEN
+         INFO = -21
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGESVDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+*
+*     Return optimal workspace
+*
+          IWORK(1) = IMINWRK
+          WORK(1) = OPTWRK
+          WORK(2) = MINWRK
+          RWORK(1) = RMINWRK
+          RETURN
+      END IF
+*
+*     Quick return if the matrix is void.
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) THEN
+*     .. all output is void.
+         RETURN
+      END IF
+*
+      BIG = SLAMCH('O')
+      ASCALED = .FALSE.
+      IWOFF = 1
+      IF ( ROWPRM ) THEN
+            IWOFF = M
+*           .. reordering the rows in decreasing sequence in the
+*           ell-infinity norm - this enhances numerical robustness in
+*           the case of differently scaled rows.
+            DO 1904 p = 1, M
+*               RWORK(p) = ABS( A(p,ICAMAX(N,A(p,1),LDA)) )
+*               [[SLANGE will return NaN if an entry of the p-th row is Nan]]
+                RWORK(p) = SLANGE( 'M', 1, N, A(p,1), LDA, RDUMMY )
+*               .. check for NaN's and Inf's
+                IF ( ( RWORK(p) .NE. RWORK(p) ) .OR.
+     $               ( (RWORK(p)*ZERO) .NE. ZERO ) ) THEN
+                    INFO = -8
+                    CALL XERBLA( 'SGESVDQ', -INFO )
+                    RETURN
+                END IF
+ 1904       CONTINUE
+            DO 1952 p = 1, M - 1
+            q = ISAMAX( M-p+1, RWORK(p), 1 ) + p - 1
+            IWORK(N+p) = q
+            IF ( p .NE. q ) THEN
+               RTMP     = RWORK(p)
+               RWORK(p) = RWORK(q)
+               RWORK(q) = RTMP
+            END IF
+ 1952       CONTINUE
+*
+            IF ( RWORK(1) .EQ. ZERO ) THEN
+*              Quick return: A is the M x N zero matrix.
+               NUMRANK = 0
+               CALL SLASET( 'G', N, 1, ZERO, ZERO, S, N )
+               IF ( WNTUS ) CALL SLASET('G', M, N, ZERO, ONE, U, LDU)
+               IF ( WNTUA ) CALL SLASET('G', M, M, ZERO, ONE, U, LDU)
+               IF ( WNTVA ) CALL SLASET('G', N, N, ZERO, ONE, V, LDV)
+               IF ( WNTUF ) THEN
+                   CALL SLASET( 'G', N, 1, ZERO, ZERO, WORK, N )
+                   CALL SLASET( 'G', M, N, ZERO,  ONE, U, LDU )
+               END IF
+               DO 5001 p = 1, N
+                   IWORK(p) = p
+ 5001          CONTINUE
+               IF ( ROWPRM ) THEN
+                   DO 5002 p = N + 1, N + M - 1
+                       IWORK(p) = p - N
+ 5002              CONTINUE
+               END IF
+               IF ( CONDA ) RWORK(1) = -1
+               RWORK(2) = -1
+               RETURN
+            END IF
+*
+            IF ( RWORK(1) .GT. BIG / SQRT(REAL(M)) ) THEN
+*               .. to prevent overflow in the QR factorization, scale the
+*               matrix by 1/sqrt(M) if too large entry detected
+                CALL SLASCL('G',0,0,SQRT(REAL(M)),ONE, M,N, A,LDA, IERR)
+                ASCALED = .TRUE.
+            END IF
+            CALL SLASWP( N, A, LDA, 1, M-1, IWORK(N+1), 1 )
+      END IF
+*
+*    .. At this stage, preemptive scaling is done only to avoid column
+*    norms overflows during the QR factorization. The SVD procedure should
+*    have its own scaling to save the singular values from overflows and
+*    underflows. That depends on the SVD procedure.
+*
+      IF ( .NOT.ROWPRM ) THEN
+          RTMP = SLANGE( 'M', M, N, A, LDA, RDUMMY )
+          IF ( ( RTMP .NE. RTMP ) .OR.
+     $         ( (RTMP*ZERO) .NE. ZERO ) ) THEN
+               INFO = -8
+               CALL XERBLA( 'SGESVDQ', -INFO )
+               RETURN
+          END IF
+          IF ( RTMP .GT. BIG / SQRT(REAL(M)) ) THEN
+*             .. to prevent overflow in the QR factorization, scale the
+*             matrix by 1/sqrt(M) if too large entry detected
+              CALL SLASCL('G',0,0, SQRT(REAL(M)),ONE, M,N, A,LDA, IERR)
+              ASCALED = .TRUE.
+          END IF
+      END IF
+*
+*     .. QR factorization with column pivoting
+*
+*     A * P = Q * [ R ]
+*                 [ 0 ]
+*
+      DO 1963 p = 1, N
+*        .. all columns are free columns
+         IWORK(p) = 0
+ 1963 CONTINUE
+      CALL SGEQP3( M, N, A, LDA, IWORK, WORK, WORK(N+1), LWORK-N,
+     $      IERR )
+*
+*    If the user requested accuracy level allows truncation in the
+*    computed upper triangular factor, the matrix R is examined and,
+*    if possible, replaced with its leading upper trapezoidal part.
+*
+      EPSLN = SLAMCH('E')
+      SFMIN = SLAMCH('S')
+*     SMALL = SFMIN / EPSLN
+      NR = N
+*
+      IF ( ACCLA ) THEN
+*
+*        Standard absolute error bound suffices. All sigma_i with
+*        sigma_i < N*EPS*||A||_F are flushed to zero. This is an
+*        aggressive enforcement of lower numerical rank by introducing a
+*        backward error of the order of N*EPS*||A||_F.
+         NR = 1
+         RTMP = SQRT(REAL(N))*EPSLN
+         DO 3001 p = 2, N
+            IF ( ABS(A(p,p)) .LT. (RTMP*ABS(A(1,1))) ) GO TO 3002
+               NR = NR + 1
+ 3001    CONTINUE
+ 3002    CONTINUE
+*
+      ELSEIF ( ACCLM ) THEN
+*        .. similarly as above, only slightly more gentle (less aggressive).
+*        Sudden drop on the diagonal of R is used as the criterion for being
+*        close-to-rank-deficient. The threshold is set to EPSLN=SLAMCH('E').
+*        [[This can be made more flexible by replacing this hard-coded value
+*        with a user specified threshold.]] Also, the values that underflow
+*        will be truncated.
+         NR = 1
+         DO 3401 p = 2, N
+            IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR.
+     $           ( ABS(A(p,p)) .LT. SFMIN ) ) GO TO 3402
+            NR = NR + 1
+ 3401    CONTINUE
+ 3402    CONTINUE
+*
+      ELSE
+*        .. RRQR not authorized to determine numerical rank except in the
+*        obvious case of zero pivots.
+*        .. inspect R for exact zeros on the diagonal;
+*        R(i,i)=0 => R(i:N,i:N)=0.
+         NR = 1
+         DO 3501 p = 2, N
+            IF ( ABS(A(p,p)) .EQ. ZERO ) GO TO 3502
+            NR = NR + 1
+ 3501    CONTINUE
+ 3502    CONTINUE
+*
+         IF ( CONDA ) THEN
+*           Estimate the scaled condition number of A. Use the fact that it is
+*           the same as the scaled condition number of R.
+*              .. V is used as workspace
+               CALL SLACPY( 'U', N, N, A, LDA, V, LDV )
+*              Only the leading NR x NR submatrix of the triangular factor
+*              is considered. Only if NR=N will this give a reliable error
+*              bound. However, even for NR < N, this can be used on an
+*              expert level and obtain useful information in the sense of
+*              perturbation theory.
+               DO 3053 p = 1, NR
+                  RTMP = SNRM2( p, V(1,p), 1 )
+                  CALL SSCAL( p, ONE/RTMP, V(1,p), 1 )
+ 3053          CONTINUE
+               IF ( .NOT. ( LSVEC .OR. RSVEC ) ) THEN
+                   CALL SPOCON( 'U', NR, V, LDV, ONE, RTMP,
+     $                  WORK, IWORK(N+IWOFF), IERR )
+               ELSE
+                   CALL SPOCON( 'U', NR, V, LDV, ONE, RTMP,
+     $                  WORK(N+1), IWORK(N+IWOFF), IERR )
+               END IF
+               SCONDA = ONE / SQRT(RTMP)
+*           For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1),
+*           N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
+*           See the reference [1] for more details.
+         END IF
+*
+      ENDIF
+*
+      IF ( WNTUR ) THEN
+          N1 = NR
+      ELSE IF ( WNTUS .OR. WNTUF) THEN
+          N1 = N
+      ELSE IF ( WNTUA ) THEN
+          N1 = M
+      END IF
+*
+      IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN
+*.......................................................................
+*        .. only the singular values are requested
+*.......................................................................
+         IF ( RTRANS ) THEN
+*
+*         .. compute the singular values of R**T = [A](1:NR,1:N)**T
+*           .. set the lower triangle of [A] to [A](1:NR,1:N)**T and
+*           the upper triangle of [A] to zero.
+            DO 1146 p = 1, MIN( N, NR )
+               DO 1147 q = p + 1, N
+                  A(q,p) = A(p,q)
+                  IF ( q .LE. NR ) A(p,q) = ZERO
+ 1147          CONTINUE
+ 1146       CONTINUE
+*
+            CALL SGESVD( 'N', 'N', N, NR, A, LDA, S, U, LDU,
+     $           V, LDV, WORK, LWORK, INFO )
+*
+         ELSE
+*
+*           .. compute the singular values of R = [A](1:NR,1:N)
+*
+            IF ( NR .GT. 1 )
+     $          CALL SLASET( 'L', NR-1,NR-1, ZERO,ZERO, A(2,1), LDA )
+            CALL SGESVD( 'N', 'N', NR, N, A, LDA, S, U, LDU,
+     $           V, LDV, WORK, LWORK, INFO )
+*
+         END IF
+*
+      ELSE IF ( LSVEC .AND. ( .NOT. RSVEC) ) THEN
+*.......................................................................
+*       .. the singular values and the left singular vectors requested
+*.......................................................................""""""""
+         IF ( RTRANS ) THEN
+*            .. apply SGESVD to R**T
+*            .. copy R**T into [U] and overwrite [U] with the right singular
+*            vectors of R
+            DO 1192 p = 1, NR
+               DO 1193 q = p, N
+                  U(q,p) = A(p,q)
+ 1193          CONTINUE
+ 1192       CONTINUE
+            IF ( NR .GT. 1 )
+     $          CALL SLASET( 'U', NR-1,NR-1, ZERO,ZERO, U(1,2), LDU )
+*           .. the left singular vectors not computed, the NR right singular
+*           vectors overwrite [U](1:NR,1:NR) as transposed. These
+*           will be pre-multiplied by Q to build the left singular vectors of A.
+               CALL SGESVD( 'N', 'O', N, NR, U, LDU, S, U, LDU,
+     $              U, LDU, WORK(N+1), LWORK-N, INFO )
+*
+               DO 1119 p = 1, NR
+                   DO 1120 q = p + 1, NR
+                      RTMP   = U(q,p)
+                      U(q,p) = U(p,q)
+                      U(p,q) = RTMP
+ 1120              CONTINUE
+ 1119          CONTINUE
+*
+         ELSE
+*            .. apply SGESVD to R
+*            .. copy R into [U] and overwrite [U] with the left singular vectors
+             CALL SLACPY( 'U', NR, N, A, LDA, U, LDU )
+             IF ( NR .GT. 1 )
+     $         CALL SLASET( 'L', NR-1, NR-1, ZERO, ZERO, U(2,1), LDU )
+*            .. the right singular vectors not computed, the NR left singular
+*            vectors overwrite [U](1:NR,1:NR)
+                CALL SGESVD( 'O', 'N', NR, N, U, LDU, S, U, LDU,
+     $               V, LDV, WORK(N+1), LWORK-N, INFO )
+*               .. now [U](1:NR,1:NR) contains the NR left singular vectors of
+*               R. These will be pre-multiplied by Q to build the left singular
+*               vectors of A.
+         END IF
+*
+*           .. assemble the left singular vector matrix U of dimensions
+*              (M x NR) or (M x N) or (M x M).
+         IF ( ( NR .LT. M ) .AND. ( .NOT.WNTUF ) ) THEN
+             CALL SLASET('A', M-NR, NR, ZERO, ZERO, U(NR+1,1), LDU)
+             IF ( NR .LT. N1 ) THEN
+                CALL SLASET( 'A',NR,N1-NR,ZERO,ZERO,U(1,NR+1), LDU )
+                CALL SLASET( 'A',M-NR,N1-NR,ZERO,ONE,
+     $               U(NR+1,NR+1), LDU )
+             END IF
+         END IF
+*
+*           The Q matrix from the first QRF is built into the left singular
+*           vectors matrix U.
+*
+         IF ( .NOT.WNTUF )
+     $       CALL SORMQR( 'L', 'N', M, N1, N, A, LDA, WORK, U,
+     $            LDU, WORK(N+1), LWORK-N, IERR )
+         IF ( ROWPRM .AND. .NOT.WNTUF )
+     $          CALL SLASWP( N1, U, LDU, 1, M-1, IWORK(N+1), -1 )
+*
+      ELSE IF ( RSVEC .AND. ( .NOT. LSVEC ) ) THEN
+*.......................................................................
+*       .. the singular values and the right singular vectors requested
+*.......................................................................
+          IF ( RTRANS ) THEN
+*            .. apply SGESVD to R**T
+*            .. copy R**T into V and overwrite V with the left singular vectors
+            DO 1165 p = 1, NR
+               DO 1166 q = p, N
+                  V(q,p) = (A(p,q))
+ 1166          CONTINUE
+ 1165       CONTINUE
+            IF ( NR .GT. 1 )
+     $          CALL SLASET( 'U', NR-1,NR-1, ZERO,ZERO, V(1,2), LDV )
+*           .. the left singular vectors of R**T overwrite V, the right singular
+*           vectors not computed
+            IF ( WNTVR .OR. ( NR .EQ. N ) ) THEN
+               CALL SGESVD( 'O', 'N', N, NR, V, LDV, S, U, LDU,
+     $              U, LDU, WORK(N+1), LWORK-N, INFO )
+*
+               DO 1121 p = 1, NR
+                   DO 1122 q = p + 1, NR
+                      RTMP   = V(q,p)
+                      V(q,p) = V(p,q)
+                      V(p,q) = RTMP
+ 1122              CONTINUE
+ 1121          CONTINUE
+*
+               IF ( NR .LT. N ) THEN
+                   DO 1103 p = 1, NR
+                      DO 1104 q = NR + 1, N
+                          V(p,q) = V(q,p)
+ 1104                 CONTINUE
+ 1103              CONTINUE
+               END IF
+               CALL SLAPMT( .FALSE., NR, N, V, LDV, IWORK )
+            ELSE
+*               .. need all N right singular vectors and NR < N
+*               [!] This is simple implementation that augments [V](1:N,1:NR)
+*               by padding a zero block. In the case NR << N, a more efficient
+*               way is to first use the QR factorization. For more details
+*               how to implement this, see the " FULL SVD " branch.
+                CALL SLASET('G', N, N-NR, ZERO, ZERO, V(1,NR+1), LDV)
+                CALL SGESVD( 'O', 'N', N, N, V, LDV, S, U, LDU,
+     $               U, LDU, WORK(N+1), LWORK-N, INFO )
+*
+                DO 1123 p = 1, N
+                   DO 1124 q = p + 1, N
+                      RTMP   = V(q,p)
+                      V(q,p) = V(p,q)
+                      V(p,q) = RTMP
+ 1124              CONTINUE
+ 1123           CONTINUE
+                CALL SLAPMT( .FALSE., N, N, V, LDV, IWORK )
+            END IF
+*
+          ELSE
+*            .. aply SGESVD to R
+*            .. copy R into V and overwrite V with the right singular vectors
+             CALL SLACPY( 'U', NR, N, A, LDA, V, LDV )
+             IF ( NR .GT. 1 )
+     $         CALL SLASET( 'L', NR-1, NR-1, ZERO, ZERO, V(2,1), LDV )
+*            .. the right singular vectors overwrite V, the NR left singular
+*            vectors stored in U(1:NR,1:NR)
+             IF ( WNTVR .OR. ( NR .EQ. N ) ) THEN
+                CALL SGESVD( 'N', 'O', NR, N, V, LDV, S, U, LDU,
+     $               V, LDV, WORK(N+1), LWORK-N, INFO )
+                CALL SLAPMT( .FALSE., NR, N, V, LDV, IWORK )
+*               .. now [V](1:NR,1:N) contains V(1:N,1:NR)**T
+             ELSE
+*               .. need all N right singular vectors and NR < N
+*               [!] This is simple implementation that augments [V](1:NR,1:N)
+*               by padding a zero block. In the case NR << N, a more efficient
+*               way is to first use the LQ factorization. For more details
+*               how to implement this, see the " FULL SVD " branch.
+                 CALL SLASET('G', N-NR, N, ZERO,ZERO, V(NR+1,1), LDV)
+                 CALL SGESVD( 'N', 'O', N, N, V, LDV, S, U, LDU,
+     $                V, LDV, WORK(N+1), LWORK-N, INFO )
+                 CALL SLAPMT( .FALSE., N, N, V, LDV, IWORK )
+             END IF
+*            .. now [V] contains the transposed matrix of the right singular
+*            vectors of A.
+          END IF
+*
+      ELSE
+*.......................................................................
+*       .. FULL SVD requested
+*.......................................................................
+         IF ( RTRANS ) THEN
+*
+*            .. apply SGESVD to R**T [[this option is left for R&D&T]]
+*
+            IF ( WNTVR .OR. ( NR .EQ. N ) ) THEN
+*            .. copy R**T into [V] and overwrite [V] with the left singular
+*            vectors of R**T
+            DO 1168 p = 1, NR
+               DO 1169 q = p, N
+                  V(q,p) = A(p,q)
+ 1169          CONTINUE
+ 1168       CONTINUE
+            IF ( NR .GT. 1 )
+     $          CALL SLASET( 'U', NR-1,NR-1, ZERO,ZERO, V(1,2), LDV )
+*
+*           .. the left singular vectors of R**T overwrite [V], the NR right
+*           singular vectors of R**T stored in [U](1:NR,1:NR) as transposed
+               CALL SGESVD( 'O', 'A', N, NR, V, LDV, S, V, LDV,
+     $              U, LDU, WORK(N+1), LWORK-N, INFO )
+*              .. assemble V
+               DO 1115 p = 1, NR
+                  DO 1116 q = p + 1, NR
+                     RTMP   = V(q,p)
+                     V(q,p) = V(p,q)
+                     V(p,q) = RTMP
+ 1116             CONTINUE
+ 1115          CONTINUE
+               IF ( NR .LT. N ) THEN
+                   DO 1101 p = 1, NR
+                      DO 1102 q = NR+1, N
+                         V(p,q) = V(q,p)
+ 1102                 CONTINUE
+ 1101              CONTINUE
+               END IF
+               CALL SLAPMT( .FALSE., NR, N, V, LDV, IWORK )
+*
+                DO 1117 p = 1, NR
+                   DO 1118 q = p + 1, NR
+                      RTMP   = U(q,p)
+                      U(q,p) = U(p,q)
+                      U(p,q) = RTMP
+ 1118              CONTINUE
+ 1117           CONTINUE
+*
+                IF ( ( NR .LT. M ) .AND. .NOT.(WNTUF)) THEN
+                  CALL SLASET('A', M-NR,NR, ZERO,ZERO, U(NR+1,1), LDU)
+                  IF ( NR .LT. N1 ) THEN
+                     CALL SLASET('A',NR,N1-NR,ZERO,ZERO,U(1,NR+1),LDU)
+                     CALL SLASET( 'A',M-NR,N1-NR,ZERO,ONE,
+     $                    U(NR+1,NR+1), LDU )
+                  END IF
+               END IF
+*
+            ELSE
+*               .. need all N right singular vectors and NR < N
+*            .. copy R**T into [V] and overwrite [V] with the left singular
+*            vectors of R**T
+*               [[The optimal ratio N/NR for using QRF instead of padding
+*                 with zeros. Here hard coded to 2; it must be at least
+*                 two due to work space constraints.]]
+*               OPTRATIO = ILAENV(6, 'SGESVD', 'S' // 'O', NR,N,0,0)
+*               OPTRATIO = MAX( OPTRATIO, 2 )
+                OPTRATIO = 2
+                IF ( OPTRATIO*NR .GT. N ) THEN
+                   DO 1198 p = 1, NR
+                      DO 1199 q = p, N
+                         V(q,p) = A(p,q)
+ 1199                 CONTINUE
+ 1198              CONTINUE
+                   IF ( NR .GT. 1 )
+     $             CALL SLASET('U',NR-1,NR-1, ZERO,ZERO, V(1,2),LDV)
+*
+                   CALL SLASET('A',N,N-NR,ZERO,ZERO,V(1,NR+1),LDV)
+                   CALL SGESVD( 'O', 'A', N, N, V, LDV, S, V, LDV,
+     $                  U, LDU, WORK(N+1), LWORK-N, INFO )
+*
+                   DO 1113 p = 1, N
+                      DO 1114 q = p + 1, N
+                         RTMP   = V(q,p)
+                         V(q,p) = V(p,q)
+                         V(p,q) = RTMP
+ 1114                 CONTINUE
+ 1113              CONTINUE
+                   CALL SLAPMT( .FALSE., N, N, V, LDV, IWORK )
+*              .. assemble the left singular vector matrix U of dimensions
+*              (M x N1), i.e. (M x N) or (M x M).
+*
+                   DO 1111 p = 1, N
+                      DO 1112 q = p + 1, N
+                         RTMP   = U(q,p)
+                         U(q,p) = U(p,q)
+                         U(p,q) = RTMP
+ 1112                 CONTINUE
+ 1111              CONTINUE
+*
+                   IF ( ( N .LT. M ) .AND. .NOT.(WNTUF)) THEN
+                      CALL SLASET('A',M-N,N,ZERO,ZERO,U(N+1,1),LDU)
+                      IF ( N .LT. N1 ) THEN
+                        CALL SLASET('A',N,N1-N,ZERO,ZERO,U(1,N+1),LDU)
+                        CALL SLASET('A',M-N,N1-N,ZERO,ONE,
+     $                       U(N+1,N+1), LDU )
+                      END IF
+                   END IF
+                ELSE
+*                  .. copy R**T into [U] and overwrite [U] with the right
+*                  singular vectors of R
+                   DO 1196 p = 1, NR
+                      DO 1197 q = p, N
+                         U(q,NR+p) = A(p,q)
+ 1197                 CONTINUE
+ 1196              CONTINUE
+                   IF ( NR .GT. 1 )
+     $             CALL SLASET('U',NR-1,NR-1,ZERO,ZERO,U(1,NR+2),LDU)
+                   CALL SGEQRF( N, NR, U(1,NR+1), LDU, WORK(N+1),
+     $                  WORK(N+NR+1), LWORK-N-NR, IERR )
+                   DO 1143 p = 1, NR
+                       DO 1144 q = 1, N
+                           V(q,p) = U(p,NR+q)
+ 1144                  CONTINUE
+ 1143              CONTINUE
+                  CALL SLASET('U',NR-1,NR-1,ZERO,ZERO,V(1,2),LDV)
+                  CALL SGESVD( 'S', 'O', NR, NR, V, LDV, S, U, LDU,
+     $                 V,LDV, WORK(N+NR+1),LWORK-N-NR, INFO )
+                  CALL SLASET('A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV)
+                  CALL SLASET('A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV)
+                  CALL SLASET('A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV)
+                  CALL SORMQR('R','C', N, N, NR, U(1,NR+1), LDU,
+     $                 WORK(N+1),V,LDV,WORK(N+NR+1),LWORK-N-NR,IERR)
+                  CALL SLAPMT( .FALSE., N, N, V, LDV, IWORK )
+*                 .. assemble the left singular vector matrix U of dimensions
+*                 (M x NR) or (M x N) or (M x M).
+                  IF ( ( NR .LT. M ) .AND. .NOT.(WNTUF)) THEN
+                     CALL SLASET('A',M-NR,NR,ZERO,ZERO,U(NR+1,1),LDU)
+                     IF ( NR .LT. N1 ) THEN
+                     CALL SLASET('A',NR,N1-NR,ZERO,ZERO,U(1,NR+1),LDU)
+                     CALL SLASET( 'A',M-NR,N1-NR,ZERO,ONE,
+     $                    U(NR+1,NR+1),LDU)
+                     END IF
+                  END IF
+                END IF
+            END IF
+*
+         ELSE
+*
+*            .. apply SGESVD to R [[this is the recommended option]]
+*
+             IF ( WNTVR .OR. ( NR .EQ. N ) ) THEN
+*                .. copy R into [V] and overwrite V with the right singular vectors
+                 CALL SLACPY( 'U', NR, N, A, LDA, V, LDV )
+                IF ( NR .GT. 1 )
+     $          CALL SLASET( 'L', NR-1,NR-1, ZERO,ZERO, V(2,1), LDV )
+*               .. the right singular vectors of R overwrite [V], the NR left
+*               singular vectors of R stored in [U](1:NR,1:NR)
+                CALL SGESVD( 'S', 'O', NR, N, V, LDV, S, U, LDU,
+     $               V, LDV, WORK(N+1), LWORK-N, INFO )
+                CALL SLAPMT( .FALSE., NR, N, V, LDV, IWORK )
+*               .. now [V](1:NR,1:N) contains V(1:N,1:NR)**T
+*               .. assemble the left singular vector matrix U of dimensions
+*              (M x NR) or (M x N) or (M x M).
+               IF ( ( NR .LT. M ) .AND. .NOT.(WNTUF)) THEN
+                  CALL SLASET('A', M-NR,NR, ZERO,ZERO, U(NR+1,1), LDU)
+                  IF ( NR .LT. N1 ) THEN
+                     CALL SLASET('A',NR,N1-NR,ZERO,ZERO,U(1,NR+1),LDU)
+                     CALL SLASET( 'A',M-NR,N1-NR,ZERO,ONE,
+     $                    U(NR+1,NR+1), LDU )
+                  END IF
+               END IF
+*
+             ELSE
+*              .. need all N right singular vectors and NR < N
+*              .. the requested number of the left singular vectors
+*               is then N1 (N or M)
+*               [[The optimal ratio N/NR for using LQ instead of padding
+*                 with zeros. Here hard coded to 2; it must be at least
+*                 two due to work space constraints.]]
+*               OPTRATIO = ILAENV(6, 'SGESVD', 'S' // 'O', NR,N,0,0)
+*               OPTRATIO = MAX( OPTRATIO, 2 )
+               OPTRATIO = 2
+               IF ( OPTRATIO * NR .GT. N ) THEN
+                  CALL SLACPY( 'U', NR, N, A, LDA, V, LDV )
+                  IF ( NR .GT. 1 )
+     $            CALL SLASET('L', NR-1,NR-1, ZERO,ZERO, V(2,1),LDV)
+*              .. the right singular vectors of R overwrite [V], the NR left
+*                 singular vectors of R stored in [U](1:NR,1:NR)
+                  CALL SLASET('A', N-NR,N, ZERO,ZERO, V(NR+1,1),LDV)
+                  CALL SGESVD( 'S', 'O', N, N, V, LDV, S, U, LDU,
+     $                 V, LDV, WORK(N+1), LWORK-N, INFO )
+                  CALL SLAPMT( .FALSE., N, N, V, LDV, IWORK )
+*                 .. now [V] contains the transposed matrix of the right
+*                 singular vectors of A. The leading N left singular vectors
+*                 are in [U](1:N,1:N)
+*                 .. assemble the left singular vector matrix U of dimensions
+*                 (M x N1), i.e. (M x N) or (M x M).
+                  IF ( ( N .LT. M ) .AND. .NOT.(WNTUF)) THEN
+                      CALL SLASET('A',M-N,N,ZERO,ZERO,U(N+1,1),LDU)
+                      IF ( N .LT. N1 ) THEN
+                        CALL SLASET('A',N,N1-N,ZERO,ZERO,U(1,N+1),LDU)
+                        CALL SLASET( 'A',M-N,N1-N,ZERO,ONE,
+     $                       U(N+1,N+1), LDU )
+                      END IF
+                  END IF
+               ELSE
+                  CALL SLACPY( 'U', NR, N, A, LDA, U(NR+1,1), LDU )
+                  IF ( NR .GT. 1 )
+     $            CALL SLASET('L',NR-1,NR-1,ZERO,ZERO,U(NR+2,1),LDU)
+                  CALL SGELQF( NR, N, U(NR+1,1), LDU, WORK(N+1),
+     $                 WORK(N+NR+1), LWORK-N-NR, IERR )
+                  CALL SLACPY('L',NR,NR,U(NR+1,1),LDU,V,LDV)
+                  IF ( NR .GT. 1 )
+     $            CALL SLASET('U',NR-1,NR-1,ZERO,ZERO,V(1,2),LDV)
+                  CALL SGESVD( 'S', 'O', NR, NR, V, LDV, S, U, LDU,
+     $                 V, LDV, WORK(N+NR+1), LWORK-N-NR, INFO )
+                  CALL SLASET('A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV)
+                  CALL SLASET('A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV)
+                  CALL SLASET('A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV)
+                  CALL SORMLQ('R','N',N,N,NR,U(NR+1,1),LDU,WORK(N+1),
+     $                 V, LDV, WORK(N+NR+1),LWORK-N-NR,IERR)
+                  CALL SLAPMT( .FALSE., N, N, V, LDV, IWORK )
+*               .. assemble the left singular vector matrix U of dimensions
+*              (M x NR) or (M x N) or (M x M).
+                  IF ( ( NR .LT. M ) .AND. .NOT.(WNTUF)) THEN
+                     CALL SLASET('A',M-NR,NR,ZERO,ZERO,U(NR+1,1),LDU)
+                     IF ( NR .LT. N1 ) THEN
+                     CALL SLASET('A',NR,N1-NR,ZERO,ZERO,U(1,NR+1),LDU)
+                     CALL SLASET( 'A',M-NR,N1-NR,ZERO,ONE,
+     $                    U(NR+1,NR+1), LDU )
+                     END IF
+                  END IF
+               END IF
+             END IF
+*        .. end of the "R**T or R" branch
+         END IF
+*
+*           The Q matrix from the first QRF is built into the left singular
+*           vectors matrix U.
+*
+         IF ( .NOT. WNTUF )
+     $       CALL SORMQR( 'L', 'N', M, N1, N, A, LDA, WORK, U,
+     $            LDU, WORK(N+1), LWORK-N, IERR )
+         IF ( ROWPRM .AND. .NOT.WNTUF )
+     $          CALL SLASWP( N1, U, LDU, 1, M-1, IWORK(N+1), -1 )
+*
+*     ... end of the "full SVD" branch
+      END IF
+*
+*     Check whether some singular values are returned as zeros, e.g.
+*     due to underflow, and update the numerical rank.
+      p = NR
+      DO 4001 q = p, 1, -1
+          IF ( S(q) .GT. ZERO ) GO TO 4002
+          NR = NR - 1
+ 4001 CONTINUE
+ 4002 CONTINUE
+*
+*     .. if numerical rank deficiency is detected, the truncated
+*     singular values are set to zero.
+      IF ( NR .LT. N ) CALL SLASET( 'G', N-NR,1, ZERO,ZERO, S(NR+1), N )
+*     .. undo scaling; this may cause overflow in the largest singular
+*     values.
+      IF ( ASCALED )
+     $   CALL SLASCL( 'G',0,0, ONE,SQRT(REAL(M)), NR,1, S, N, IERR )
+      IF ( CONDA ) RWORK(1) = SCONDA
+      RWORK(2) = p - NR
+*     .. p-NR is the number of singular values that are computed as
+*     exact zeros in SGESVD() applied to the (possibly truncated)
+*     full row rank triangular (trapezoidal) factor of A.
+      NUMRANK = NR
+*
+      RETURN
+*
+*     End of SGESVDQ
+*
+      END
diff --git a/lapack-netlib/SRC/sgesvj.f b/lapack-netlib/SRC/sgesvj.f
index 7a7901135..fee5aba4a 100644
--- a/lapack-netlib/SRC/sgesvj.f
+++ b/lapack-netlib/SRC/sgesvj.f
@@ -90,13 +90,13 @@
 *>          JOBV is CHARACTER*1
 *>          Specifies whether to compute the right singular vectors, that
 *>          is, the matrix V:
-*>          = 'V' : the matrix V is computed and returned in the array V
-*>          = 'A' : the Jacobi rotations are applied to the MV-by-N
+*>          = 'V':  the matrix V is computed and returned in the array V
+*>          = 'A':  the Jacobi rotations are applied to the MV-by-N
 *>                  array V. In other words, the right singular vector
 *>                  matrix V is not computed explicitly; instead it is
 *>                  applied to an MV-by-N matrix initially stored in the
 *>                  first MV rows of V.
-*>          = 'N' : the matrix V is not computed and the array V is not
+*>          = 'N':  the matrix V is not computed and the array V is not
 *>                  referenced
 *> \endverbatim
 *>
@@ -118,8 +118,8 @@
 *>          A is REAL array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix A.
 *>          On exit,
-*>          If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C':
-*>                 If INFO .EQ. 0 :
+*>          If JOBU = 'U' .OR. JOBU = 'C':
+*>                 If INFO = 0:
 *>                 RANKA orthonormal columns of U are returned in the
 *>                 leading RANKA columns of the array A. Here RANKA <= N
 *>                 is the number of computed singular values of A that are
@@ -129,9 +129,9 @@
 *>                 in the array WORK as RANKA=NINT(WORK(2)). Also see the
 *>                 descriptions of SVA and WORK. The computed columns of U
 *>                 are mutually numerically orthogonal up to approximately
-*>                 TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'),
+*>                 TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'),
 *>                 see the description of JOBU.
-*>                 If INFO .GT. 0,
+*>                 If INFO > 0,
 *>                 the procedure SGESVJ did not converge in the given number
 *>                 of iterations (sweeps). In that case, the computed
 *>                 columns of U may not be orthogonal up to TOL. The output
@@ -139,8 +139,8 @@
 *>                 values in SVA(1:N)) and V is still a decomposition of the
 *>                 input matrix A in the sense that the residual
 *>                 ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small.
-*>          If JOBU .EQ. 'N':
-*>                 If INFO .EQ. 0 :
+*>          If JOBU = 'N':
+*>                 If INFO = 0:
 *>                 Note that the left singular vectors are 'for free' in the
 *>                 one-sided Jacobi SVD algorithm. However, if only the
 *>                 singular values are needed, the level of numerical
@@ -149,7 +149,7 @@
 *>                 numerically orthogonal up to approximately M*EPS. Thus,
 *>                 on exit, A contains the columns of U scaled with the
 *>                 corresponding singular values.
-*>                 If INFO .GT. 0 :
+*>                 If INFO > 0:
 *>                 the procedure SGESVJ did not converge in the given number
 *>                 of iterations (sweeps).
 *> \endverbatim
@@ -164,9 +164,9 @@
 *> \verbatim
 *>          SVA is REAL array, dimension (N)
 *>          On exit,
-*>          If INFO .EQ. 0 :
+*>          If INFO = 0 :
 *>          depending on the value SCALE = WORK(1), we have:
-*>                 If SCALE .EQ. ONE:
+*>                 If SCALE = ONE:
 *>                 SVA(1:N) contains the computed singular values of A.
 *>                 During the computation SVA contains the Euclidean column
 *>                 norms of the iterated matrices in the array A.
@@ -175,7 +175,7 @@
 *>                 factored representation is due to the fact that some of the
 *>                 singular values of A might underflow or overflow.
 *>
-*>          If INFO .GT. 0 :
+*>          If INFO > 0 :
 *>          the procedure SGESVJ did not converge in the given number of
 *>          iterations (sweeps) and SCALE*SVA(1:N) may not be accurate.
 *> \endverbatim
@@ -183,7 +183,7 @@
 *> \param[in] MV
 *> \verbatim
 *>          MV is INTEGER
-*>          If JOBV .EQ. 'A', then the product of Jacobi rotations in SGESVJ
+*>          If JOBV = 'A', then the product of Jacobi rotations in SGESVJ
 *>          is applied to the first MV rows of V. See the description of JOBV.
 *> \endverbatim
 *>
@@ -201,16 +201,16 @@
 *> \param[in] LDV
 *> \verbatim
 *>          LDV is INTEGER
-*>          The leading dimension of the array V, LDV .GE. 1.
-*>          If JOBV .EQ. 'V', then LDV .GE. max(1,N).
-*>          If JOBV .EQ. 'A', then LDV .GE. max(1,MV) .
+*>          The leading dimension of the array V, LDV >= 1.
+*>          If JOBV = 'V', then LDV >= max(1,N).
+*>          If JOBV = 'A', then LDV >= max(1,MV) .
 *> \endverbatim
 *>
 *> \param[in,out] WORK
 *> \verbatim
 *>          WORK is REAL array, dimension (LWORK)
 *>          On entry,
-*>          If JOBU .EQ. 'C' :
+*>          If JOBU = 'C' :
 *>          WORK(1) = CTOL, where CTOL defines the threshold for convergence.
 *>                    The process stops if all columns of A are mutually
 *>                    orthogonal up to CTOL*EPS, EPS=SLAMCH('E').
@@ -230,7 +230,7 @@
 *>          WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep.
 *>                    This is useful information in cases when SGESVJ did
 *>                    not converge, as it can be used to estimate whether
-*>                    the output is stil useful and for post festum analysis.
+*>                    the output is still useful and for post festum analysis.
 *>          WORK(6) = the largest absolute value over all sines of the
 *>                    Jacobi rotation angles in the last sweep. It can be
 *>                    useful for a post festum analysis.
@@ -245,9 +245,9 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>          = 0 : successful exit.
-*>          < 0 : if INFO = -i, then the i-th argument had an illegal value
-*>          > 0 : SGESVJ did not converge in the maximal allowed number (30)
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, then the i-th argument had an illegal value
+*>          > 0:  SGESVJ did not converge in the maximal allowed number (30)
 *>                of sweeps. The output may still be useful. See the
 *>                description of WORK.
 *> \endverbatim
diff --git a/lapack-netlib/SRC/sgesvxx.f b/lapack-netlib/SRC/sgesvxx.f
index 281f198d5..7cb29d5ab 100644
--- a/lapack-netlib/SRC/sgesvxx.f
+++ b/lapack-netlib/SRC/sgesvxx.f
@@ -411,7 +411,7 @@
 *>     information as described below. There currently are up to three
 *>     pieces of information returned for each right-hand side. If
 *>     componentwise accuracy is not requested (PARAMS(3) = 0.0), then
-*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most
+*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most
 *>     the first (:,N_ERR_BNDS) entries are returned.
 *>
 *>     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith
@@ -447,14 +447,14 @@
 *> \param[in] NPARAMS
 *> \verbatim
 *>          NPARAMS is INTEGER
-*>     Specifies the number of parameters set in PARAMS.  If .LE. 0, the
+*>     Specifies the number of parameters set in PARAMS.  If <= 0, the
 *>     PARAMS array is never referenced and default values are used.
 *> \endverbatim
 *>
 *> \param[in,out] PARAMS
 *> \verbatim
 *>          PARAMS is REAL array, dimension NPARAMS
-*>     Specifies algorithm parameters.  If an entry is .LT. 0.0, then
+*>     Specifies algorithm parameters.  If an entry is < 0.0, then
 *>     that entry will be filled with default value used for that
 *>     parameter.  Only positions up to NPARAMS are accessed; defaults
 *>     are used for higher-numbered parameters.
@@ -462,9 +462,9 @@
 *>       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative
 *>            refinement or not.
 *>         Default: 1.0
-*>            = 0.0 : No refinement is performed, and no error bounds are
+*>            = 0.0:  No refinement is performed, and no error bounds are
 *>                    computed.
-*>            = 1.0 : Use the double-precision refinement algorithm,
+*>            = 1.0:  Use the double-precision refinement algorithm,
 *>                    possibly with doubled-single computations if the
 *>                    compilation environment does not support DOUBLE
 *>                    PRECISION.
diff --git a/lapack-netlib/SRC/sgetc2.f b/lapack-netlib/SRC/sgetc2.f
index b0301b953..6bf0a93c6 100644
--- a/lapack-netlib/SRC/sgetc2.f
+++ b/lapack-netlib/SRC/sgetc2.f
@@ -85,7 +85,7 @@
 *> \verbatim
 *>          INFO is INTEGER
 *>           = 0: successful exit
-*>           > 0: if INFO = k, U(k, k) is likely to produce owerflow if
+*>           > 0: if INFO = k, U(k, k) is likely to produce overflow if
 *>                we try to solve for x in Ax = b. So U is perturbed to
 *>                avoid the overflow.
 *> \endverbatim
diff --git a/lapack-netlib/SRC/sgetsls.f b/lapack-netlib/SRC/sgetsls.f
index 35af66c19..53d2f9431 100644
--- a/lapack-netlib/SRC/sgetsls.f
+++ b/lapack-netlib/SRC/sgetsls.f
@@ -1,3 +1,5 @@
+*> \brief \b SGETSLS
+*
 *  Definition:
 *  ===========
 *
@@ -154,7 +156,7 @@
 *
 *> \date June 2017
 *
-*> \ingroup doubleGEsolve
+*> \ingroup realGEsolve
 *
 *  =====================================================================
       SUBROUTINE SGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB,
diff --git a/lapack-netlib/SRC/sggesx.f b/lapack-netlib/SRC/sggesx.f
index 3c6273dcf..25691d164 100644
--- a/lapack-netlib/SRC/sggesx.f
+++ b/lapack-netlib/SRC/sggesx.f
@@ -131,10 +131,10 @@
 *> \verbatim
 *>          SENSE is CHARACTER*1
 *>          Determines which reciprocal condition numbers are computed.
-*>          = 'N' : None are computed;
-*>          = 'E' : Computed for average of selected eigenvalues only;
-*>          = 'V' : Computed for selected deflating subspaces only;
-*>          = 'B' : Computed for both.
+*>          = 'N':  None are computed;
+*>          = 'E':  Computed for average of selected eigenvalues only;
+*>          = 'V':  Computed for selected deflating subspaces only;
+*>          = 'B':  Computed for both.
 *>          If SENSE = 'E', 'V', or 'B', SORT must equal 'S'.
 *> \endverbatim
 *>
diff --git a/lapack-netlib/SRC/sgsvj0.f b/lapack-netlib/SRC/sgsvj0.f
index e580efc30..d9177d818 100644
--- a/lapack-netlib/SRC/sgsvj0.f
+++ b/lapack-netlib/SRC/sgsvj0.f
@@ -117,7 +117,7 @@
 *> \param[in] MV
 *> \verbatim
 *>          MV is INTEGER
-*>          If JOBV .EQ. 'A', then MV rows of V are post-multipled by a
+*>          If JOBV = 'A', then MV rows of V are post-multipled by a
 *>                           sequence of Jacobi rotations.
 *>          If JOBV = 'N',   then MV is not referenced.
 *> \endverbatim
@@ -125,9 +125,9 @@
 *> \param[in,out] V
 *> \verbatim
 *>          V is REAL array, dimension (LDV,N)
-*>          If JOBV .EQ. 'V' then N rows of V are post-multipled by a
+*>          If JOBV = 'V' then N rows of V are post-multipled by a
 *>                           sequence of Jacobi rotations.
-*>          If JOBV .EQ. 'A' then MV rows of V are post-multipled by a
+*>          If JOBV = 'A' then MV rows of V are post-multipled by a
 *>                           sequence of Jacobi rotations.
 *>          If JOBV = 'N',   then V is not referenced.
 *> \endverbatim
@@ -136,8 +136,8 @@
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of the array V,  LDV >= 1.
-*>          If JOBV = 'V', LDV .GE. N.
-*>          If JOBV = 'A', LDV .GE. MV.
+*>          If JOBV = 'V', LDV >= N.
+*>          If JOBV = 'A', LDV >= MV.
 *> \endverbatim
 *>
 *> \param[in] EPS
@@ -157,7 +157,7 @@
 *>          TOL is REAL
 *>          TOL is the threshold for Jacobi rotations. For a pair
 *>          A(:,p), A(:,q) of pivot columns, the Jacobi rotation is
-*>          applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL.
+*>          applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL.
 *> \endverbatim
 *>
 *> \param[in] NSWEEP
@@ -175,14 +175,14 @@
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
-*>          LWORK is the dimension of WORK. LWORK .GE. M.
+*>          LWORK is the dimension of WORK. LWORK >= M.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>          = 0 : successful exit.
-*>          < 0 : if INFO = -i, then the i-th argument had an illegal value
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, then the i-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
@@ -1045,7 +1045,7 @@
 
  1993 CONTINUE
 *     end i=1:NSWEEP loop
-* #:) Reaching this point means that the procedure has comleted the given
+* #:) Reaching this point means that the procedure has completed the given
 *     number of iterations.
       INFO = NSWEEP - 1
       GO TO 1995
diff --git a/lapack-netlib/SRC/sgsvj1.f b/lapack-netlib/SRC/sgsvj1.f
index 49b81cf4f..ea4ba2e0e 100644
--- a/lapack-netlib/SRC/sgsvj1.f
+++ b/lapack-netlib/SRC/sgsvj1.f
@@ -61,7 +61,7 @@
 *> In terms of the columns of A, the first N1 columns are rotated 'against'
 *> the remaining N-N1 columns, trying to increase the angle between the
 *> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is
-*> tiled using quadratic tiles of side KBL. Here, KBL is a tunning parmeter.
+*> tiled using quadratic tiles of side KBL. Here, KBL is a tunning parameter.
 *> The number of sweeps is given in NSWEEP and the orthogonality threshold
 *> is given in TOL.
 *> \endverbatim
@@ -147,7 +147,7 @@
 *> \param[in] MV
 *> \verbatim
 *>          MV is INTEGER
-*>          If JOBV .EQ. 'A', then MV rows of V are post-multipled by a
+*>          If JOBV = 'A', then MV rows of V are post-multipled by a
 *>                           sequence of Jacobi rotations.
 *>          If JOBV = 'N',   then MV is not referenced.
 *> \endverbatim
@@ -155,9 +155,9 @@
 *> \param[in,out] V
 *> \verbatim
 *>          V is REAL array, dimension (LDV,N)
-*>          If JOBV .EQ. 'V' then N rows of V are post-multipled by a
+*>          If JOBV = 'V' then N rows of V are post-multipled by a
 *>                           sequence of Jacobi rotations.
-*>          If JOBV .EQ. 'A' then MV rows of V are post-multipled by a
+*>          If JOBV = 'A' then MV rows of V are post-multipled by a
 *>                           sequence of Jacobi rotations.
 *>          If JOBV = 'N',   then V is not referenced.
 *> \endverbatim
@@ -166,8 +166,8 @@
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of the array V,  LDV >= 1.
-*>          If JOBV = 'V', LDV .GE. N.
-*>          If JOBV = 'A', LDV .GE. MV.
+*>          If JOBV = 'V', LDV >= N.
+*>          If JOBV = 'A', LDV >= MV.
 *> \endverbatim
 *>
 *> \param[in] EPS
@@ -187,7 +187,7 @@
 *>          TOL is REAL
 *>          TOL is the threshold for Jacobi rotations. For a pair
 *>          A(:,p), A(:,q) of pivot columns, the Jacobi rotation is
-*>          applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL.
+*>          applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL.
 *> \endverbatim
 *>
 *> \param[in] NSWEEP
@@ -205,14 +205,14 @@
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
-*>          LWORK is the dimension of WORK. LWORK .GE. M.
+*>          LWORK is the dimension of WORK. LWORK >= M.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>          = 0 : successful exit.
-*>          < 0 : if INFO = -i, then the i-th argument had an illegal value
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, then the i-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
diff --git a/lapack-netlib/SRC/shseqr.f b/lapack-netlib/SRC/shseqr.f
index 5654a4682..b5707f2c3 100644
--- a/lapack-netlib/SRC/shseqr.f
+++ b/lapack-netlib/SRC/shseqr.f
@@ -70,7 +70,7 @@
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
-*>           The order of the matrix H.  N .GE. 0.
+*>           The order of the matrix H.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] ILO
@@ -87,7 +87,7 @@
 *>           set by a previous call to SGEBAL, and then passed to ZGEHRD
 *>           when the matrix output by SGEBAL is reduced to Hessenberg
 *>           form. Otherwise ILO and IHI should be set to 1 and N
-*>           respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+*>           respectively.  If N > 0, then 1 <= ILO <= IHI <= N.
 *>           If N = 0, then ILO = 1 and IHI = 0.
 *> \endverbatim
 *>
@@ -100,20 +100,20 @@
 *>           (the Schur form); 2-by-2 diagonal blocks (corresponding to
 *>           complex conjugate pairs of eigenvalues) are returned in
 *>           standard form, with H(i,i) = H(i+1,i+1) and
-*>           H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the
+*>           H(i+1,i)*H(i,i+1) < 0. If INFO = 0 and JOB = 'E', the
 *>           contents of H are unspecified on exit.  (The output value of
-*>           H when INFO.GT.0 is given under the description of INFO
+*>           H when INFO > 0 is given under the description of INFO
 *>           below.)
 *>
 *>           Unlike earlier versions of SHSEQR, this subroutine may
-*>           explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1
+*>           explicitly H(i,j) = 0 for i > j and j = 1, 2, ... ILO-1
 *>           or j = IHI+1, IHI+2, ... N.
 *> \endverbatim
 *>
 *> \param[in] LDH
 *> \verbatim
 *>          LDH is INTEGER
-*>           The leading dimension of the array H. LDH .GE. max(1,N).
+*>           The leading dimension of the array H. LDH >= max(1,N).
 *> \endverbatim
 *>
 *> \param[out] WR
@@ -128,8 +128,8 @@
 *>           The real and imaginary parts, respectively, of the computed
 *>           eigenvalues. If two eigenvalues are computed as a complex
 *>           conjugate pair, they are stored in consecutive elements of
-*>           WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and
-*>           WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in
+*>           WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
+*>           WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in
 *>           the same order as on the diagonal of the Schur form returned
 *>           in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
 *>           diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and
@@ -148,7 +148,7 @@
 *>           if INFO = 0, Z contains Q*Z.
 *>           Normally Q is the orthogonal matrix generated by SORGHR
 *>           after the call to SGEHRD which formed the Hessenberg matrix
-*>           H. (The output value of Z when INFO.GT.0 is given under
+*>           H. (The output value of Z when INFO > 0 is given under
 *>           the description of INFO below.)
 *> \endverbatim
 *>
@@ -156,7 +156,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>           The leading dimension of the array Z.  if COMPZ = 'I' or
-*>           COMPZ = 'V', then LDZ.GE.MAX(1,N).  Otherwize, LDZ.GE.1.
+*>           COMPZ = 'V', then LDZ >= MAX(1,N).  Otherwise, LDZ >= 1.
 *> \endverbatim
 *>
 *> \param[out] WORK
@@ -169,7 +169,7 @@
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
-*>           The dimension of the array WORK.  LWORK .GE. max(1,N)
+*>           The dimension of the array WORK.  LWORK >= max(1,N)
 *>           is sufficient and delivers very good and sometimes
 *>           optimal performance.  However, LWORK as large as 11*N
 *>           may be required for optimal performance.  A workspace
@@ -187,21 +187,21 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>             =  0:  successful exit
-*>           .LT. 0:  if INFO = -i, the i-th argument had an illegal
+*>             = 0:  successful exit
+*>             < 0:  if INFO = -i, the i-th argument had an illegal
 *>                    value
-*>           .GT. 0:  if INFO = i, SHSEQR failed to compute all of
+*>             > 0:  if INFO = i, SHSEQR failed to compute all of
 *>                the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
 *>                and WI contain those eigenvalues which have been
 *>                successfully computed.  (Failures are rare.)
 *>
-*>                If INFO .GT. 0 and JOB = 'E', then on exit, the
+*>                If INFO > 0 and JOB = 'E', then on exit, the
 *>                remaining unconverged eigenvalues are the eigen-
 *>                values of the upper Hessenberg matrix rows and
 *>                columns ILO through INFO of the final, output
 *>                value of H.
 *>
-*>                If INFO .GT. 0 and JOB   = 'S', then on exit
+*>                If INFO > 0 and JOB   = 'S', then on exit
 *>
 *>           (*)  (initial value of H)*U  = U*(final value of H)
 *>
@@ -209,19 +209,19 @@
 *>                value of H is upper Hessenberg and quasi-triangular
 *>                in rows and columns INFO+1 through IHI.
 *>
-*>                If INFO .GT. 0 and COMPZ = 'V', then on exit
+*>                If INFO > 0 and COMPZ = 'V', then on exit
 *>
 *>                  (final value of Z)  =  (initial value of Z)*U
 *>
 *>                where U is the orthogonal matrix in (*) (regard-
 *>                less of the value of JOB.)
 *>
-*>                If INFO .GT. 0 and COMPZ = 'I', then on exit
+*>                If INFO > 0 and COMPZ = 'I', then on exit
 *>                      (final value of Z)  = U
 *>                where U is the orthogonal matrix in (*) (regard-
 *>                less of the value of JOB.)
 *>
-*>                If INFO .GT. 0 and COMPZ = 'N', then Z is not
+*>                If INFO > 0 and COMPZ = 'N', then Z is not
 *>                accessed.
 *> \endverbatim
 *
@@ -261,8 +261,8 @@
 *>                      This depends on ILO, IHI and NS.  NS is the
 *>                      number of simultaneous shifts returned
 *>                      by ILAENV(ISPEC=15).  (See ISPEC=15 below.)
-*>                      The default for (IHI-ILO+1).LE.500 is NS.
-*>                      The default for (IHI-ILO+1).GT.500 is 3*NS/2.
+*>                      The default for (IHI-ILO+1) <= 500 is NS.
+*>                      The default for (IHI-ILO+1) >  500 is 3*NS/2.
 *>
 *>            ISPEC=14: Nibble crossover point. (See IPARMQ for
 *>                      details.)  Default: 14% of deflation window
@@ -341,8 +341,8 @@
       PARAMETER          ( NTINY = 11 )
 *
 *     ==== NL allocates some local workspace to help small matrices
-*     .    through a rare SLAHQR failure.  NL .GT. NTINY = 11 is
-*     .    required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom-
+*     .    through a rare SLAHQR failure.  NL > NTINY = 11 is
+*     .    required and NL <= NMIN = ILAENV(ISPEC=12,...) is recom-
 *     .    mended.  (The default value of NMIN is 75.)  Using NL = 49
 *     .    allows up to six simultaneous shifts and a 16-by-16
 *     .    deflation window.  ====
diff --git a/lapack-netlib/SRC/sla_gbrcond.f b/lapack-netlib/SRC/sla_gbrcond.f
index 36aa93dc9..7f2c4062e 100644
--- a/lapack-netlib/SRC/sla_gbrcond.f
+++ b/lapack-netlib/SRC/sla_gbrcond.f
@@ -140,13 +140,13 @@
 *>     i > 0:  The ith argument is invalid.
 *> \endverbatim
 *>
-*> \param[in] WORK
+*> \param[out] WORK
 *> \verbatim
 *>          WORK is REAL array, dimension (5*N).
 *>     Workspace.
 *> \endverbatim
 *>
-*> \param[in] IWORK
+*> \param[out] IWORK
 *> \verbatim
 *>          IWORK is INTEGER array, dimension (N).
 *>     Workspace.
diff --git a/lapack-netlib/SRC/sla_gbrfsx_extended.f b/lapack-netlib/SRC/sla_gbrfsx_extended.f
index a81feb45e..0fd1fd350 100644
--- a/lapack-netlib/SRC/sla_gbrfsx_extended.f
+++ b/lapack-netlib/SRC/sla_gbrfsx_extended.f
@@ -65,19 +65,19 @@
 *> \verbatim
 *>          PREC_TYPE is INTEGER
 *>     Specifies the intermediate precision to be used in refinement.
-*>     The value is defined by ILAPREC(P) where P is a CHARACTER and
-*>     P    = 'S':  Single
+*>     The value is defined by ILAPREC(P) where P is a CHARACTER and P
+*>          = 'S':  Single
 *>          = 'D':  Double
 *>          = 'I':  Indigenous
-*>          = 'X', 'E':  Extra
+*>          = 'X' or 'E':  Extra
 *> \endverbatim
 *>
 *> \param[in] TRANS_TYPE
 *> \verbatim
 *>          TRANS_TYPE is INTEGER
 *>     Specifies the transposition operation on A.
-*>     The value is defined by ILATRANS(T) where T is a CHARACTER and
-*>     T    = 'N':  No transpose
+*>     The value is defined by ILATRANS(T) where T is a CHARACTER and T
+*>          = 'N':  No transpose
 *>          = 'T':  Transpose
 *>          = 'C':  Conjugate transpose
 *> \endverbatim
@@ -269,7 +269,7 @@
 *>     information as described below. There currently are up to three
 *>     pieces of information returned for each right-hand side. If
 *>     componentwise accuracy is not requested (PARAMS(3) = 0.0), then
-*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most
+*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most
 *>     the first (:,N_ERR_BNDS) entries are returned.
 *>
 *>     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith
diff --git a/lapack-netlib/SRC/sla_gercond.f b/lapack-netlib/SRC/sla_gercond.f
index 349a1b5be..e54e0d7b4 100644
--- a/lapack-netlib/SRC/sla_gercond.f
+++ b/lapack-netlib/SRC/sla_gercond.f
@@ -122,13 +122,13 @@
 *>     i > 0:  The ith argument is invalid.
 *> \endverbatim
 *>
-*> \param[in] WORK
+*> \param[out] WORK
 *> \verbatim
 *>          WORK is REAL array, dimension (3*N).
 *>     Workspace.
 *> \endverbatim
 *>
-*> \param[in] IWORK
+*> \param[out] IWORK
 *> \verbatim
 *>          IWORK is INTEGER array, dimension (N).
 *>     Workspace.2
diff --git a/lapack-netlib/SRC/sla_gerfsx_extended.f b/lapack-netlib/SRC/sla_gerfsx_extended.f
index 1795ea975..84d1ae31b 100644
--- a/lapack-netlib/SRC/sla_gerfsx_extended.f
+++ b/lapack-netlib/SRC/sla_gerfsx_extended.f
@@ -65,19 +65,19 @@
 *> \verbatim
 *>          PREC_TYPE is INTEGER
 *>     Specifies the intermediate precision to be used in refinement.
-*>     The value is defined by ILAPREC(P) where P is a CHARACTER and
-*>     P    = 'S':  Single
+*>     The value is defined by ILAPREC(P) where P is a CHARACTER and P
+*>          = 'S':  Single
 *>          = 'D':  Double
 *>          = 'I':  Indigenous
-*>          = 'X', 'E':  Extra
+*>          = 'X' or 'E':  Extra
 *> \endverbatim
 *>
 *> \param[in] TRANS_TYPE
 *> \verbatim
 *>          TRANS_TYPE is INTEGER
 *>     Specifies the transposition operation on A.
-*>     The value is defined by ILATRANS(T) where T is a CHARACTER and
-*>     T    = 'N':  No transpose
+*>     The value is defined by ILATRANS(T) where T is a CHARACTER and T
+*>          = 'N':  No transpose
 *>          = 'T':  Transpose
 *>          = 'C':  Conjugate transpose
 *> \endverbatim
@@ -257,7 +257,7 @@
 *>     information as described below. There currently are up to three
 *>     pieces of information returned for each right-hand side. If
 *>     componentwise accuracy is not requested (PARAMS(3) = 0.0), then
-*>     ERRS_C is not accessed.  If N_ERR_BNDS .LT. 3, then at most
+*>     ERRS_C is not accessed.  If N_ERR_BNDS < 3, then at most
 *>     the first (:,N_ERR_BNDS) entries are returned.
 *>
 *>     The first index in ERRS_C(i,:) corresponds to the ith
diff --git a/lapack-netlib/SRC/sla_porcond.f b/lapack-netlib/SRC/sla_porcond.f
index 9dd7c587b..729581f46 100644
--- a/lapack-netlib/SRC/sla_porcond.f
+++ b/lapack-netlib/SRC/sla_porcond.f
@@ -112,13 +112,13 @@
 *>     i > 0:  The ith argument is invalid.
 *> \endverbatim
 *>
-*> \param[in] WORK
+*> \param[out] WORK
 *> \verbatim
 *>          WORK is REAL array, dimension (3*N).
 *>     Workspace.
 *> \endverbatim
 *>
-*> \param[in] IWORK
+*> \param[out] IWORK
 *> \verbatim
 *>          IWORK is INTEGER array, dimension (N).
 *>     Workspace.
diff --git a/lapack-netlib/SRC/sla_porfsx_extended.f b/lapack-netlib/SRC/sla_porfsx_extended.f
index 27baa20d1..abbfebb83 100644
--- a/lapack-netlib/SRC/sla_porfsx_extended.f
+++ b/lapack-netlib/SRC/sla_porfsx_extended.f
@@ -65,11 +65,11 @@
 *> \verbatim
 *>          PREC_TYPE is INTEGER
 *>     Specifies the intermediate precision to be used in refinement.
-*>     The value is defined by ILAPREC(P) where P is a CHARACTER and
-*>     P    = 'S':  Single
+*>     The value is defined by ILAPREC(P) where P is a CHARACTER and P
+*>          = 'S':  Single
 *>          = 'D':  Double
 *>          = 'I':  Indigenous
-*>          = 'X', 'E':  Extra
+*>          = 'X' or 'E':  Extra
 *> \endverbatim
 *>
 *> \param[in] UPLO
@@ -246,7 +246,7 @@
 *>     information as described below. There currently are up to three
 *>     pieces of information returned for each right-hand side. If
 *>     componentwise accuracy is not requested (PARAMS(3) = 0.0), then
-*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most
+*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most
 *>     the first (:,N_ERR_BNDS) entries are returned.
 *>
 *>     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith
diff --git a/lapack-netlib/SRC/sla_syrcond.f b/lapack-netlib/SRC/sla_syrcond.f
index c4b204cc6..0c9e2b361 100644
--- a/lapack-netlib/SRC/sla_syrcond.f
+++ b/lapack-netlib/SRC/sla_syrcond.f
@@ -118,13 +118,13 @@
 *>     i > 0:  The ith argument is invalid.
 *> \endverbatim
 *>
-*> \param[in] WORK
+*> \param[out] WORK
 *> \verbatim
 *>          WORK is REAL array, dimension (3*N).
 *>     Workspace.
 *> \endverbatim
 *>
-*> \param[in] IWORK
+*> \param[out] IWORK
 *> \verbatim
 *>          IWORK is INTEGER array, dimension (N).
 *>     Workspace.
diff --git a/lapack-netlib/SRC/sla_syrfsx_extended.f b/lapack-netlib/SRC/sla_syrfsx_extended.f
index f7b909ac0..a83a9db98 100644
--- a/lapack-netlib/SRC/sla_syrfsx_extended.f
+++ b/lapack-netlib/SRC/sla_syrfsx_extended.f
@@ -67,11 +67,11 @@
 *> \verbatim
 *>          PREC_TYPE is INTEGER
 *>     Specifies the intermediate precision to be used in refinement.
-*>     The value is defined by ILAPREC(P) where P is a CHARACTER and
-*>     P    = 'S':  Single
+*>     The value is defined by ILAPREC(P) where P is a CHARACTER and P
+*>          = 'S':  Single
 *>          = 'D':  Double
 *>          = 'I':  Indigenous
-*>          = 'X', 'E':  Extra
+*>          = 'X' or 'E':  Extra
 *> \endverbatim
 *>
 *> \param[in] UPLO
@@ -255,7 +255,7 @@
 *>     information as described below. There currently are up to three
 *>     pieces of information returned for each right-hand side. If
 *>     componentwise accuracy is not requested (PARAMS(3) = 0.0), then
-*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most
+*>     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most
 *>     the first (:,N_ERR_BNDS) entries are returned.
 *>
 *>     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith
diff --git a/lapack-netlib/SRC/sla_syrpvgrw.f b/lapack-netlib/SRC/sla_syrpvgrw.f
index f5eb81b1f..a0a235ee3 100644
--- a/lapack-netlib/SRC/sla_syrpvgrw.f
+++ b/lapack-netlib/SRC/sla_syrpvgrw.f
@@ -101,7 +101,7 @@
 *>     as determined by SSYTRF.
 *> \endverbatim
 *>
-*> \param[in] WORK
+*> \param[out] WORK
 *> \verbatim
 *>          WORK is REAL array, dimension (2*N)
 *> \endverbatim
diff --git a/lapack-netlib/SRC/sla_wwaddw.f b/lapack-netlib/SRC/sla_wwaddw.f
index 96a7d3542..e390c9fab 100644
--- a/lapack-netlib/SRC/sla_wwaddw.f
+++ b/lapack-netlib/SRC/sla_wwaddw.f
@@ -36,7 +36,7 @@
 *>    SLA_WWADDW adds a vector W into a doubled-single vector (X, Y).
 *>
 *>    This works for all extant IBM's hex and binary floating point
-*>    arithmetics, but not for decimal.
+*>    arithmetic, but not for decimal.
 *> \endverbatim
 *
 *  Arguments:
diff --git a/lapack-netlib/SRC/slaed4.f b/lapack-netlib/SRC/slaed4.f
index c65cba75a..64260843f 100644
--- a/lapack-netlib/SRC/slaed4.f
+++ b/lapack-netlib/SRC/slaed4.f
@@ -82,7 +82,7 @@
 *> \param[out] DELTA
 *> \verbatim
 *>          DELTA is REAL array, dimension (N)
-*>         If N .GT. 2, DELTA contains (D(j) - lambda_I) in its  j-th
+*>         If N > 2, DELTA contains (D(j) - lambda_I) in its  j-th
 *>         component.  If N = 1, then DELTA(1) = 1. If N = 2, see SLAED5
 *>         for detail. The vector DELTA contains the information necessary
 *>         to construct the eigenvectors by SLAED3 and SLAED9.
diff --git a/lapack-netlib/SRC/slaed8.f b/lapack-netlib/SRC/slaed8.f
index 5ec117cb5..2e3f6f51f 100644
--- a/lapack-netlib/SRC/slaed8.f
+++ b/lapack-netlib/SRC/slaed8.f
@@ -353,7 +353,7 @@
          Z( I ) = W( INDX( I ) )
    40 CONTINUE
 *
-*     Calculate the allowable deflation tolerence
+*     Calculate the allowable deflation tolerance
 *
       IMAX = ISAMAX( N, Z, 1 )
       JMAX = ISAMAX( N, D, 1 )
diff --git a/lapack-netlib/SRC/slagtf.f b/lapack-netlib/SRC/slagtf.f
index d3f0b6813..59ef097a7 100644
--- a/lapack-netlib/SRC/slagtf.f
+++ b/lapack-netlib/SRC/slagtf.f
@@ -125,7 +125,7 @@
 *>          then IN(k) = 1, otherwise IN(k) = 0. The element IN(n)
 *>          returns the smallest positive integer j such that
 *>
-*>             abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL,
+*>             abs( u(j,j) ) <= norm( (T - lambda*I)(j) )*TOL,
 *>
 *>          where norm( A(j) ) denotes the sum of the absolute values of
 *>          the jth row of the matrix A. If no such j exists then IN(n)
@@ -137,8 +137,8 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>          = 0   : successful exit
-*>          .lt. 0: if INFO = -k, the kth argument had an illegal value
+*>          = 0: successful exit
+*>          < 0: if INFO = -k, the kth argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
diff --git a/lapack-netlib/SRC/slagts.f b/lapack-netlib/SRC/slagts.f
index 0c3c5239f..e0c8892d7 100644
--- a/lapack-netlib/SRC/slagts.f
+++ b/lapack-netlib/SRC/slagts.f
@@ -122,12 +122,12 @@
 *> \param[in,out] TOL
 *> \verbatim
 *>          TOL is REAL
-*>          On entry, with  JOB .lt. 0, TOL should be the minimum
+*>          On entry, with  JOB < 0, TOL should be the minimum
 *>          perturbation to be made to very small diagonal elements of U.
 *>          TOL should normally be chosen as about eps*norm(U), where eps
 *>          is the relative machine precision, but if TOL is supplied as
 *>          non-positive, then it is reset to eps*max( abs( u(i,j) ) ).
-*>          If  JOB .gt. 0  then TOL is not referenced.
+*>          If  JOB > 0  then TOL is not referenced.
 *>
 *>          On exit, TOL is changed as described above, only if TOL is
 *>          non-positive on entry. Otherwise TOL is unchanged.
@@ -136,14 +136,14 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>          = 0   : successful exit
-*>          .lt. 0: if INFO = -i, the i-th argument had an illegal value
-*>          .gt. 0: overflow would occur when computing the INFO(th)
-*>                  element of the solution vector x. This can only occur
-*>                  when JOB is supplied as positive and either means
-*>                  that a diagonal element of U is very small, or that
-*>                  the elements of the right-hand side vector y are very
-*>                  large.
+*>          = 0: successful exit
+*>          < 0: if INFO = -i, the i-th argument had an illegal value
+*>          > 0: overflow would occur when computing the INFO(th)
+*>               element of the solution vector x. This can only occur
+*>               when JOB is supplied as positive and either means
+*>               that a diagonal element of U is very small, or that
+*>               the elements of the right-hand side vector y are very
+*>               large.
 *> \endverbatim
 *
 *  Authors:
diff --git a/lapack-netlib/SRC/slahqr.f b/lapack-netlib/SRC/slahqr.f
index d91826e61..e5642d2bf 100644
--- a/lapack-netlib/SRC/slahqr.f
+++ b/lapack-netlib/SRC/slahqr.f
@@ -150,26 +150,26 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>           =   0: successful exit
-*>          .GT. 0: If INFO = i, SLAHQR failed to compute all the
+*>           = 0:   successful exit
+*>           > 0:   If INFO = i, SLAHQR failed to compute all the
 *>                  eigenvalues ILO to IHI in a total of 30 iterations
 *>                  per eigenvalue; elements i+1:ihi of WR and WI
 *>                  contain those eigenvalues which have been
 *>                  successfully computed.
 *>
-*>                  If INFO .GT. 0 and WANTT is .FALSE., then on exit,
+*>                  If INFO > 0 and WANTT is .FALSE., then on exit,
 *>                  the remaining unconverged eigenvalues are the
 *>                  eigenvalues of the upper Hessenberg matrix rows
-*>                  and columns ILO thorugh INFO of the final, output
+*>                  and columns ILO through INFO of the final, output
 *>                  value of H.
 *>
-*>                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+*>                  If INFO > 0 and WANTT is .TRUE., then on exit
 *>          (*)       (initial value of H)*U  = U*(final value of H)
-*>                  where U is an orthognal matrix.    The final
+*>                  where U is an orthogonal matrix.    The final
 *>                  value of H is upper Hessenberg and triangular in
 *>                  rows and columns INFO+1 through IHI.
 *>
-*>                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+*>                  If INFO > 0 and WANTZ is .TRUE., then on exit
 *>                      (final value of Z)  = (initial value of Z)*U
 *>                  where U is the orthogonal matrix in (*)
 *>                  (regardless of the value of WANTT.)
diff --git a/lapack-netlib/SRC/slaln2.f b/lapack-netlib/SRC/slaln2.f
index f9ceee7b7..4c6a55ec7 100644
--- a/lapack-netlib/SRC/slaln2.f
+++ b/lapack-netlib/SRC/slaln2.f
@@ -49,7 +49,7 @@
 *> the first column of each being the real part and the second
 *> being the imaginary part.
 *>
-*> "s" is a scaling factor (.LE. 1), computed by SLALN2, which is
+*> "s" is a scaling factor (<= 1), computed by SLALN2, which is
 *> so chosen that X can be computed without overflow.  X is further
 *> scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
 *> than overflow.
diff --git a/lapack-netlib/SRC/slamswlq.f b/lapack-netlib/SRC/slamswlq.f
index b13d02b6c..59ab1a6ee 100644
--- a/lapack-netlib/SRC/slamswlq.f
+++ b/lapack-netlib/SRC/slamswlq.f
@@ -1,3 +1,4 @@
+*> \brief \b SLAMSWLQ
 *
 *  Definition:
 *  ===========
diff --git a/lapack-netlib/SRC/slamtsqr.f b/lapack-netlib/SRC/slamtsqr.f
index 84ac86ee2..58905ab46 100644
--- a/lapack-netlib/SRC/slamtsqr.f
+++ b/lapack-netlib/SRC/slamtsqr.f
@@ -1,3 +1,4 @@
+*> \brief \b SLAMTSQR
 *
 *  Definition:
 *  ===========
diff --git a/lapack-netlib/SRC/slangb.f b/lapack-netlib/SRC/slangb.f
index fd538b1b7..706e07501 100644
--- a/lapack-netlib/SRC/slangb.f
+++ b/lapack-netlib/SRC/slangb.f
@@ -129,6 +129,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM
       INTEGER            KL, KU, LDAB, N
@@ -139,22 +140,24 @@
 *
 * =====================================================================
 *
-*
 *     .. Parameters ..
       REAL               ONE, ZERO
       PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J, K, L
-      REAL               SCALE, SUM, VALUE, TEMP
+      REAL               SUM, VALUE, TEMP
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
       EXTERNAL           LSAME, SISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ, SCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -206,15 +209,22 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
-         SCALE = ZERO
-         SUM = ONE
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
          DO 90 J = 1, N
             L = MAX( 1, J-KU )
             K = KU + 1 - J + L
-            CALL SLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
+            COLSSQ( 1 ) = ZERO
+            COLSSQ( 2 ) = ONE
+            CALL SLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1,
+     $                   COLSSQ( 1 ), COLSSQ( 2 ) )
+            CALL SCOMBSSQ( SSQ, COLSSQ )
    90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANGB = VALUE
diff --git a/lapack-netlib/SRC/slange.f b/lapack-netlib/SRC/slange.f
index 2eb8d7d14..0c80f1d40 100644
--- a/lapack-netlib/SRC/slange.f
+++ b/lapack-netlib/SRC/slange.f
@@ -119,6 +119,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM
       INTEGER            LDA, M, N
@@ -135,10 +136,13 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J
-      REAL               SCALE, SUM, VALUE, TEMP
+      REAL               SUM, VALUE, TEMP
+*     ..
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+      EXTERNAL           SLASSQ, SCOMBSSQ
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
@@ -194,13 +198,19 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
-         SCALE = ZERO
-         SUM = ONE
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
          DO 90 J = 1, N
-            CALL SLASSQ( M, A( 1, J ), 1, SCALE, SUM )
+            COLSSQ( 1 ) = ZERO
+            COLSSQ( 2 ) = ONE
+            CALL SLASSQ( M, A( 1, J ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
+            CALL SCOMBSSQ( SSQ, COLSSQ )
    90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANGE = VALUE
diff --git a/lapack-netlib/SRC/slanhs.f b/lapack-netlib/SRC/slanhs.f
index c5a077fbf..8913031a2 100644
--- a/lapack-netlib/SRC/slanhs.f
+++ b/lapack-netlib/SRC/slanhs.f
@@ -113,6 +113,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM
       INTEGER            LDA, N
@@ -129,15 +130,18 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J
-      REAL               SCALE, SUM, VALUE
+      REAL               SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
       EXTERNAL           LSAME, SISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ, SCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MIN, SQRT
 *     ..
@@ -188,13 +192,20 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
-         SCALE = ZERO
-         SUM = ONE
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
          DO 90 J = 1, N
-            CALL SLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
+            COLSSQ( 1 ) = ZERO
+            COLSSQ( 2 ) = ONE
+            CALL SLASSQ( MIN( N, J+1 ), A( 1, J ), 1,
+     $                   COLSSQ( 1 ), COLSSQ( 2 ) )
+            CALL SCOMBSSQ( SSQ, COLSSQ )
    90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANHS = VALUE
diff --git a/lapack-netlib/SRC/slansb.f b/lapack-netlib/SRC/slansb.f
index 8f3fe1eb9..23519025d 100644
--- a/lapack-netlib/SRC/slansb.f
+++ b/lapack-netlib/SRC/slansb.f
@@ -134,6 +134,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM, UPLO
       INTEGER            K, LDAB, N
@@ -150,15 +151,18 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J, L
-      REAL               ABSA, SCALE, SUM, VALUE
+      REAL               ABSA, SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
       EXTERNAL           LSAME, SISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ, SCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -225,29 +229,47 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
+*
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
+*
+*        Sum off-diagonals
 *
-         SCALE = ZERO
-         SUM = ONE
          IF( K.GT.0 ) THEN
             IF( LSAME( UPLO, 'U' ) ) THEN
                DO 110 J = 2, N
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
                   CALL SLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
+     $                         1, COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
   110          CONTINUE
                L = K + 1
             ELSE
                DO 120 J = 1, N - 1
-                  CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                         SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
   120          CONTINUE
                L = 1
             END IF
-            SUM = 2*SUM
+            SSQ( 2 ) = 2*SSQ( 2 )
          ELSE
             L = 1
          END IF
-         CALL SLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
-         VALUE = SCALE*SQRT( SUM )
+*
+*        Sum diagonal
+*
+         COLSSQ( 1 ) = ZERO
+         COLSSQ( 2 ) = ONE
+         CALL SLASSQ( N, AB( L, 1 ), LDAB, COLSSQ( 1 ), COLSSQ( 2 ) )
+         CALL SCOMBSSQ( SSQ, COLSSQ )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANSB = VALUE
diff --git a/lapack-netlib/SRC/slansp.f b/lapack-netlib/SRC/slansp.f
index 35390cd1c..7e29d778b 100644
--- a/lapack-netlib/SRC/slansp.f
+++ b/lapack-netlib/SRC/slansp.f
@@ -119,6 +119,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM, UPLO
       INTEGER            N
@@ -135,15 +136,18 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J, K
-      REAL               ABSA, SCALE, SUM, VALUE
+      REAL               ABSA, SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
       EXTERNAL           LSAME, SISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ, SCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, SQRT
 *     ..
@@ -217,31 +221,48 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
+*
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
+*
+*        Sum off-diagonals
 *
-         SCALE = ZERO
-         SUM = ONE
          K = 2
          IF( LSAME( UPLO, 'U' ) ) THEN
             DO 110 J = 2, N
-               CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+               COLSSQ( 1 ) = ZERO
+               COLSSQ( 2 ) = ONE
+               CALL SLASSQ( J-1, AP( K ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
+               CALL SCOMBSSQ( SSQ, COLSSQ )
                K = K + J
   110       CONTINUE
          ELSE
             DO 120 J = 1, N - 1
-               CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+               COLSSQ( 1 ) = ZERO
+               COLSSQ( 2 ) = ONE
+               CALL SLASSQ( N-J, AP( K ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
+               CALL SCOMBSSQ( SSQ, COLSSQ )
                K = K + N - J + 1
   120       CONTINUE
          END IF
-         SUM = 2*SUM
+         SSQ( 2 ) = 2*SSQ( 2 )
+*
+*        Sum diagonal
+*
          K = 1
+         COLSSQ( 1 ) = ZERO
+         COLSSQ( 2 ) = ONE
          DO 130 I = 1, N
             IF( AP( K ).NE.ZERO ) THEN
                ABSA = ABS( AP( K ) )
-               IF( SCALE.LT.ABSA ) THEN
-                  SUM = ONE + SUM*( SCALE / ABSA )**2
-                  SCALE = ABSA
+               IF( COLSSQ( 1 ).LT.ABSA ) THEN
+                  COLSSQ( 2 ) = ONE + COLSSQ(2)*( COLSSQ(1) / ABSA )**2
+                  COLSSQ( 1 ) = ABSA
                ELSE
-                  SUM = SUM + ( ABSA / SCALE )**2
+                  COLSSQ( 2 ) = COLSSQ( 2 ) + ( ABSA / COLSSQ( 1 ) )**2
                END IF
             END IF
             IF( LSAME( UPLO, 'U' ) ) THEN
@@ -250,7 +271,8 @@
                K = K + N - I + 1
             END IF
   130    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
+         CALL SCOMBSSQ( SSQ, COLSSQ )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANSP = VALUE
diff --git a/lapack-netlib/SRC/slansy.f b/lapack-netlib/SRC/slansy.f
index c8400e530..66ff1c5c7 100644
--- a/lapack-netlib/SRC/slansy.f
+++ b/lapack-netlib/SRC/slansy.f
@@ -127,6 +127,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM, UPLO
       INTEGER            LDA, N
@@ -143,15 +144,18 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J
-      REAL               ABSA, SCALE, SUM, VALUE
+      REAL               ABSA, SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
       EXTERNAL           LSAME, SISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ, SCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, SQRT
 *     ..
@@ -216,21 +220,39 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
+*
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
+*
+*        Sum off-diagonals
 *
-         SCALE = ZERO
-         SUM = ONE
          IF( LSAME( UPLO, 'U' ) ) THEN
             DO 110 J = 2, N
-               CALL SLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
+               COLSSQ( 1 ) = ZERO
+               COLSSQ( 2 ) = ONE
+               CALL SLASSQ( J-1, A( 1, J ), 1, COLSSQ(1), COLSSQ(2) )
+               CALL SCOMBSSQ( SSQ, COLSSQ )
   110       CONTINUE
          ELSE
             DO 120 J = 1, N - 1
-               CALL SLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
+               COLSSQ( 1 ) = ZERO
+               COLSSQ( 2 ) = ONE
+               CALL SLASSQ( N-J, A( J+1, J ), 1, COLSSQ(1), COLSSQ(2) )
+               CALL SCOMBSSQ( SSQ, COLSSQ )
   120       CONTINUE
          END IF
-         SUM = 2*SUM
-         CALL SLASSQ( N, A, LDA+1, SCALE, SUM )
-         VALUE = SCALE*SQRT( SUM )
+         SSQ( 2 ) = 2*SSQ( 2 )
+*
+*        Sum diagonal
+*
+         COLSSQ( 1 ) = ZERO
+         COLSSQ( 2 ) = ONE
+         CALL SLASSQ( N, A, LDA+1, COLSSQ( 1 ), COLSSQ( 2 ) )
+         CALL SCOMBSSQ( SSQ, COLSSQ )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANSY = VALUE
diff --git a/lapack-netlib/SRC/slantb.f b/lapack-netlib/SRC/slantb.f
index 3588779cb..5b94618e1 100644
--- a/lapack-netlib/SRC/slantb.f
+++ b/lapack-netlib/SRC/slantb.f
@@ -145,6 +145,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, NORM, UPLO
       INTEGER            K, LDAB, N
@@ -162,15 +163,18 @@
 *     .. Local Scalars ..
       LOGICAL            UDIAG
       INTEGER            I, J, L
-      REAL               SCALE, SUM, VALUE
+      REAL               SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
       EXTERNAL           LSAME, SISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ, SCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -311,46 +315,61 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
          IF( LSAME( UPLO, 'U' ) ) THEN
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = N
                IF( K.GT.0 ) THEN
                   DO 280 J = 2, N
+                     COLSSQ( 1 ) = ZERO
+                     COLSSQ( 2 ) = ONE
                      CALL SLASSQ( MIN( J-1, K ),
-     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
-     $                            SUM )
+     $                            AB( MAX( K+2-J, 1 ), J ), 1,
+     $                            COLSSQ( 1 ), COLSSQ( 2 ) )
+                     CALL SCOMBSSQ( SSQ, COLSSQ )
   280             CONTINUE
                END IF
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                DO 290 J = 1, N
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
                   CALL SLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
+     $                         1, COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
   290          CONTINUE
             END IF
          ELSE
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = N
                IF( K.GT.0 ) THEN
                   DO 300 J = 1, N - 1
-                     CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                            SUM )
+                     COLSSQ( 1 ) = ZERO
+                     COLSSQ( 2 ) = ONE
+                     CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1,
+     $                            COLSSQ( 1 ), COLSSQ( 2 ) )
+                     CALL SCOMBSSQ( SSQ, COLSSQ )
   300             CONTINUE
                END IF
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                DO 310 J = 1, N
-                  CALL SLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
-     $                         SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
   310          CONTINUE
             END IF
          END IF
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANTB = VALUE
diff --git a/lapack-netlib/SRC/slantp.f b/lapack-netlib/SRC/slantp.f
index 1423f5ca3..ab781deac 100644
--- a/lapack-netlib/SRC/slantp.f
+++ b/lapack-netlib/SRC/slantp.f
@@ -129,6 +129,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, NORM, UPLO
       INTEGER            N
@@ -146,15 +147,18 @@
 *     .. Local Scalars ..
       LOGICAL            UDIAG
       INTEGER            I, J, K
-      REAL               SCALE, SUM, VALUE
+      REAL               SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
       EXTERNAL           LSAME, SISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ, SCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, SQRT
 *     ..
@@ -306,45 +310,64 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
          IF( LSAME( UPLO, 'U' ) ) THEN
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = N
                K = 2
                DO 280 J = 2, N
-                  CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( J-1, AP( K ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
                   K = K + J
   280          CONTINUE
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                K = 1
                DO 290 J = 1, N
-                  CALL SLASSQ( J, AP( K ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( J, AP( K ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
                   K = K + J
   290          CONTINUE
             END IF
          ELSE
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = N
                K = 2
                DO 300 J = 1, N - 1
-                  CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( N-J, AP( K ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
                   K = K + N - J + 1
   300          CONTINUE
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                K = 1
                DO 310 J = 1, N
-                  CALL SLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( N-J+1, AP( K ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
                   K = K + N - J + 1
   310          CONTINUE
             END IF
          END IF
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANTP = VALUE
diff --git a/lapack-netlib/SRC/slantr.f b/lapack-netlib/SRC/slantr.f
index 63b855892..04d29f537 100644
--- a/lapack-netlib/SRC/slantr.f
+++ b/lapack-netlib/SRC/slantr.f
@@ -146,6 +146,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, NORM, UPLO
       INTEGER            LDA, M, N
@@ -163,15 +164,18 @@
 *     .. Local Scalars ..
       LOGICAL            UDIAG
       INTEGER            I, J
-      REAL               SCALE, SUM, VALUE
+      REAL               SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           SLASSQ
+*     .. Local Arrays ..
+      REAL               SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, SISNAN
       EXTERNAL           LSAME, SISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ, SCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MIN, SQRT
 *     ..
@@ -281,7 +285,7 @@
             END IF
          ELSE
             IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 210 I = 1, N
+               DO 210 I = 1, MIN( M, N )
                   WORK( I ) = ONE
   210          CONTINUE
                DO 220 I = N + 1, M
@@ -311,38 +315,56 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
          IF( LSAME( UPLO, 'U' ) ) THEN
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = MIN( M, N )
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = MIN( M, N )
                DO 290 J = 2, N
-                  CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
   290          CONTINUE
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                DO 300 J = 1, N
-                  CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( MIN( M, J ), A( 1, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
   300          CONTINUE
             END IF
          ELSE
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = MIN( M, N )
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = MIN( M, N )
                DO 310 J = 1, N
-                  CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
-     $                         SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
   310          CONTINUE
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                DO 320 J = 1, N
-                  CALL SLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL SLASSQ( M-J+1, A( J, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL SCOMBSSQ( SSQ, COLSSQ )
   320          CONTINUE
             END IF
          END IF
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       SLANTR = VALUE
diff --git a/lapack-netlib/SRC/slanv2.f b/lapack-netlib/SRC/slanv2.f
index e73e5455c..1163446fa 100644
--- a/lapack-netlib/SRC/slanv2.f
+++ b/lapack-netlib/SRC/slanv2.f
@@ -161,7 +161,6 @@
       IF( C.EQ.ZERO ) THEN
          CS = ONE
          SN = ZERO
-         GO TO 10
 *
       ELSE IF( B.EQ.ZERO ) THEN
 *
@@ -174,12 +173,12 @@
          A = TEMP
          B = -C
          C = ZERO
-         GO TO 10
+*
       ELSE IF( (A-D).EQ.ZERO .AND. SIGN( ONE, B ).NE.
      $   SIGN( ONE, C ) ) THEN
          CS = ONE
          SN = ZERO
-         GO TO 10
+*
       ELSE
 *
          TEMP = A - D
@@ -207,6 +206,7 @@
             SN = C / TAU
             B = B - C
             C = ZERO
+*
          ELSE
 *
 *           Complex eigenvalues, or real (almost) equal eigenvalues.
@@ -268,8 +268,6 @@
          END IF
 *
       END IF
-*
-   10 CONTINUE
 *
 *     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
 *
diff --git a/lapack-netlib/SRC/slaorhr_col_getrfnp.f b/lapack-netlib/SRC/slaorhr_col_getrfnp.f
new file mode 100644
index 000000000..6cc59e538
--- /dev/null
+++ b/lapack-netlib/SRC/slaorhr_col_getrfnp.f
@@ -0,0 +1,248 @@
+*> \brief \b SLAORHR_COL_GETRFNP
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download SLAORHR_COL_GETRFNP + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaorhr_col_getrfnp.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaorhr_col_getrfnp.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaorhr_col_getrfnp.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE SLAORHR_COL_GETRFNP( M, N, A, LDA, D, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, M, N
+*       ..
+*       .. Array Arguments ..
+*       REAL               A( LDA, * ), D( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> SLAORHR_COL_GETRFNP computes the modified LU factorization without
+*> pivoting of a real general M-by-N matrix A. The factorization has
+*> the form:
+*>
+*>     A - S = L * U,
+*>
+*> where:
+*>    S is a m-by-n diagonal sign matrix with the diagonal D, so that
+*>    D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
+*>    as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
+*>    i-1 steps of Gaussian elimination. This means that the diagonal
+*>    element at each step of "modified" Gaussian elimination is
+*>    at least one in absolute value (so that division-by-zero not
+*>    not possible during the division by the diagonal element);
+*>
+*>    L is a M-by-N lower triangular matrix with unit diagonal elements
+*>    (lower trapezoidal if M > N);
+*>
+*>    and U is a M-by-N upper triangular matrix
+*>    (upper trapezoidal if M < N).
+*>
+*> This routine is an auxiliary routine used in the Householder
+*> reconstruction routine SORHR_COL. In SORHR_COL, this routine is
+*> applied to an M-by-N matrix A with orthonormal columns, where each
+*> element is bounded by one in absolute value. With the choice of
+*> the matrix S above, one can show that the diagonal element at each
+*> step of Gaussian elimination is the largest (in absolute value) in
+*> the column on or below the diagonal, so that no pivoting is required
+*> for numerical stability [1].
+*>
+*> For more details on the Householder reconstruction algorithm,
+*> including the modified LU factorization, see [1].
+*>
+*> This is the blocked right-looking version of the algorithm,
+*> calling Level 3 BLAS to update the submatrix. To factorize a block,
+*> this routine calls the recursive routine SLAORHR_COL_GETRFNP2.
+*>
+*> [1] "Reconstructing Householder vectors from tall-skinny QR",
+*>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
+*>     E. Solomonik, J. Parallel Distrib. Comput.,
+*>     vol. 85, pp. 3-31, 2015.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is REAL array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix to be factored.
+*>          On exit, the factors L and U from the factorization
+*>          A-S=L*U; the unit diagonal elements of L are not stored.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] D
+*> \verbatim
+*>          D is REAL array, dimension min(M,N)
+*>          The diagonal elements of the diagonal M-by-N sign matrix S,
+*>          D(i) = S(i,i), where 1 <= i <= min(M,N). The elements can
+*>          be only plus or minus one.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*>
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2019
+*
+*> \ingroup realGEcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*> \verbatim
+*>
+*> November 2019, Igor Kozachenko,
+*>                Computer Science Division,
+*>                University of California, Berkeley
+*>
+*> \endverbatim
+*
+*  =====================================================================
+      SUBROUTINE SLAORHR_COL_GETRFNP( M, N, A, LDA, D, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK computational routine (version 3.9.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     November 2019
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), D( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SLAORHR_COL_GETRFNP2, STRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAORHR_COL_GETRFNP', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N ).EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+
+      NB = ILAENV( 1, 'SLAORHR_COL_GETRFNP', ' ', M, N, -1, -1 )
+
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL SLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Factor diagonal and subdiagonal blocks.
+*
+            CALL SLAORHR_COL_GETRFNP2( M-J+1, JB, A( J, J ), LDA,
+     $                                 D( J ), IINFO )
+*
+            IF( J+JB.LE.N ) THEN
+*
+*              Compute block row of U.
+*
+               CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
+     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
+     $                     LDA )
+               IF( J+JB.LE.M ) THEN
+*
+*                 Update trailing submatrix.
+*
+                  CALL SGEMM( 'No transpose', 'No transpose', M-J-JB+1,
+     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
+     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
+     $                        LDA )
+               END IF
+            END IF
+         END DO
+      END IF
+      RETURN
+*
+*     End of SLAORHR_COL_GETRFNP
+*
+      END
\ No newline at end of file
diff --git a/lapack-netlib/SRC/slaorhr_col_getrfnp2.f b/lapack-netlib/SRC/slaorhr_col_getrfnp2.f
new file mode 100644
index 000000000..de604602f
--- /dev/null
+++ b/lapack-netlib/SRC/slaorhr_col_getrfnp2.f
@@ -0,0 +1,305 @@
+*> \brief \b SLAORHR_COL_GETRFNP2
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLAORHR_GETRF2NP + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaorhr_col_getrfnp2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaorhr_col_getrfnp2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaorhr_col_getrfnp2.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       RECURSIVE SUBROUTINE SLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, M, N
+*       ..
+*       .. Array Arguments ..
+*       REAL               A( LDA, * ), D( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> SLAORHR_COL_GETRFNP2 computes the modified LU factorization without
+*> pivoting of a real general M-by-N matrix A. The factorization has
+*> the form:
+*>
+*>     A - S = L * U,
+*>
+*> where:
+*>    S is a m-by-n diagonal sign matrix with the diagonal D, so that
+*>    D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
+*>    as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
+*>    i-1 steps of Gaussian elimination. This means that the diagonal
+*>    element at each step of "modified" Gaussian elimination is at
+*>    least one in absolute value (so that division-by-zero not
+*>    possible during the division by the diagonal element);
+*>
+*>    L is a M-by-N lower triangular matrix with unit diagonal elements
+*>    (lower trapezoidal if M > N);
+*>
+*>    and U is a M-by-N upper triangular matrix
+*>    (upper trapezoidal if M < N).
+*>
+*> This routine is an auxiliary routine used in the Householder
+*> reconstruction routine SORHR_COL. In SORHR_COL, this routine is
+*> applied to an M-by-N matrix A with orthonormal columns, where each
+*> element is bounded by one in absolute value. With the choice of
+*> the matrix S above, one can show that the diagonal element at each
+*> step of Gaussian elimination is the largest (in absolute value) in
+*> the column on or below the diagonal, so that no pivoting is required
+*> for numerical stability [1].
+*>
+*> For more details on the Householder reconstruction algorithm,
+*> including the modified LU factorization, see [1].
+*>
+*> This is the recursive version of the LU factorization algorithm.
+*> Denote A - S by B. The algorithm divides the matrix B into four
+*> submatrices:
+*>
+*>        [  B11 | B12  ]  where B11 is n1 by n1,
+*>    B = [ -----|----- ]        B21 is (m-n1) by n1,
+*>        [  B21 | B22  ]        B12 is n1 by n2,
+*>                               B22 is (m-n1) by n2,
+*>                               with n1 = min(m,n)/2, n2 = n-n1.
+*>
+*>
+*> The subroutine calls itself to factor B11, solves for B21,
+*> solves for B12, updates B22, then calls itself to factor B22.
+*>
+*> For more details on the recursive LU algorithm, see [2].
+*>
+*> SLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
+*> routine SLAORHR_COL_GETRFNP, which uses blocked code calling
+*. Level 3 BLAS to update the submatrix. However, SLAORHR_COL_GETRFNP2
+*> is self-sufficient and can be used without SLAORHR_COL_GETRFNP.
+*>
+*> [1] "Reconstructing Householder vectors from tall-skinny QR",
+*>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
+*>     E. Solomonik, J. Parallel Distrib. Comput.,
+*>     vol. 85, pp. 3-31, 2015.
+*>
+*> [2] "Recursion leads to automatic variable blocking for dense linear
+*>     algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
+*>     vol. 41, no. 6, pp. 737-755, 1997.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is REAL array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix to be factored.
+*>          On exit, the factors L and U from the factorization
+*>          A-S=L*U; the unit diagonal elements of L are not stored.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] D
+*> \verbatim
+*>          D is REAL array, dimension min(M,N)
+*>          The diagonal elements of the diagonal M-by-N sign matrix S,
+*>          D(i) = S(i,i), where 1 <= i <= min(M,N). The elements can
+*>          be only plus or minus one.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*>
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2019
+*
+*> \ingroup realGEcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*> \verbatim
+*>
+*> November 2019, Igor Kozachenko,
+*>                Computer Science Division,
+*>                University of California, Berkeley
+*>
+*> \endverbatim
+*
+*  =====================================================================
+      RECURSIVE SUBROUTINE SLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK computational routine (version 3.9.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     November 2019
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), D( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               SFMIN
+      INTEGER            I, IINFO, N1, N2
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SSCAL, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAORHR_COL_GETRFNP2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N ).EQ.0 )
+     $   RETURN
+
+      IF ( M.EQ.1 ) THEN
+*
+*        One row case, (also recursion termination case),
+*        use unblocked code
+*
+*        Transfer the sign
+*
+         D( 1 ) = -SIGN( ONE, A( 1, 1 ) )
+*
+*        Construct the row of U
+*
+         A( 1, 1 ) = A( 1, 1 ) - D( 1 )
+*
+      ELSE IF( N.EQ.1 ) THEN
+*
+*        One column case, (also recursion termination case),
+*        use unblocked code
+*
+*        Transfer the sign
+*
+         D( 1 ) = -SIGN( ONE, A( 1, 1 ) )
+*
+*        Construct the row of U
+*
+         A( 1, 1 ) = A( 1, 1 ) - D( 1 )
+*
+*        Scale the elements 2:M of the column
+*
+*        Determine machine safe minimum
+*
+         SFMIN = SLAMCH('S')
+*
+*        Construct the subdiagonal elements of L
+*
+         IF( ABS( A( 1, 1 ) ) .GE. SFMIN ) THEN
+            CALL SSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
+         ELSE
+            DO I = 2, M
+               A( I, 1 ) = A( I, 1 ) / A( 1, 1 )
+            END DO
+         END IF
+*
+      ELSE
+*
+*        Divide the matrix B into four submatrices
+*
+         N1 = MIN( M, N ) / 2
+         N2 = N-N1
+
+*
+*        Factor B11, recursive call
+*
+         CALL SLAORHR_COL_GETRFNP2( N1, N1, A, LDA, D, IINFO )
+*
+*        Solve for B21
+*
+         CALL STRSM( 'R', 'U', 'N', 'N', M-N1, N1, ONE, A, LDA,
+     $               A( N1+1, 1 ), LDA )
+*
+*        Solve for B12
+*
+         CALL STRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
+     $               A( 1, N1+1 ), LDA )
+*
+*        Update B22, i.e. compute the Schur complement
+*        B22 := B22 - B21*B12
+*
+         CALL SGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
+     $               A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
+*
+*        Factor B22, recursive call
+*
+         CALL SLAORHR_COL_GETRFNP2( M-N1, N2, A( N1+1, N1+1 ), LDA,
+     $                          D( N1+1 ), IINFO )
+*
+      END IF
+      RETURN
+*
+*     End of SLAORHR_COL_GETRFNP2
+*
+      END