diff --git a/lapack-netlib/SRC/dlaed4.f b/lapack-netlib/SRC/dlaed4.f
index e7dc839df..033438d73 100644
--- a/lapack-netlib/SRC/dlaed4.f
+++ b/lapack-netlib/SRC/dlaed4.f
@@ -82,7 +82,7 @@
 *> \param[out] DELTA
 *> \verbatim
 *>          DELTA is DOUBLE PRECISION array, dimension (N)
-*>         If N .GT. 2, DELTA contains (D(j) - lambda_I) in its  j-th
+*>         If N > 2, DELTA contains (D(j) - lambda_I) in its  j-th
 *>         component.  If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5
 *>         for detail. The vector DELTA contains the information necessary
 *>         to construct the eigenvectors by DLAED3 and DLAED9.
diff --git a/lapack-netlib/SRC/dlaed8.f b/lapack-netlib/SRC/dlaed8.f
index c053347b1..f64679dc0 100644
--- a/lapack-netlib/SRC/dlaed8.f
+++ b/lapack-netlib/SRC/dlaed8.f
@@ -353,7 +353,7 @@
          Z( I ) = W( INDX( I ) )
    40 CONTINUE
 *
-*     Calculate the allowable deflation tolerence
+*     Calculate the allowable deflation tolerance
 *
       IMAX = IDAMAX( N, Z, 1 )
       JMAX = IDAMAX( N, D, 1 )
diff --git a/lapack-netlib/SRC/dlagtf.f b/lapack-netlib/SRC/dlagtf.f
index 4b257c64f..b92c84f39 100644
--- a/lapack-netlib/SRC/dlagtf.f
+++ b/lapack-netlib/SRC/dlagtf.f
@@ -125,7 +125,7 @@
 *>          then IN(k) = 1, otherwise IN(k) = 0. The element IN(n)
 *>          returns the smallest positive integer j such that
 *>
-*>             abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL,
+*>             abs( u(j,j) ) <= norm( (T - lambda*I)(j) )*TOL,
 *>
 *>          where norm( A(j) ) denotes the sum of the absolute values of
 *>          the jth row of the matrix A. If no such j exists then IN(n)
@@ -137,8 +137,8 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>          = 0   : successful exit
-*>          .lt. 0: if INFO = -k, the kth argument had an illegal value
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -k, the kth argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
diff --git a/lapack-netlib/SRC/dlagts.f b/lapack-netlib/SRC/dlagts.f
index 926075827..cbd35ae14 100644
--- a/lapack-netlib/SRC/dlagts.f
+++ b/lapack-netlib/SRC/dlagts.f
@@ -122,12 +122,12 @@
 *> \param[in,out] TOL
 *> \verbatim
 *>          TOL is DOUBLE PRECISION
-*>          On entry, with  JOB .lt. 0, TOL should be the minimum
+*>          On entry, with  JOB < 0, TOL should be the minimum
 *>          perturbation to be made to very small diagonal elements of U.
 *>          TOL should normally be chosen as about eps*norm(U), where eps
 *>          is the relative machine precision, but if TOL is supplied as
 *>          non-positive, then it is reset to eps*max( abs( u(i,j) ) ).
-*>          If  JOB .gt. 0  then TOL is not referenced.
+*>          If  JOB > 0  then TOL is not referenced.
 *>
 *>          On exit, TOL is changed as described above, only if TOL is
 *>          non-positive on entry. Otherwise TOL is unchanged.
@@ -136,14 +136,14 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>          = 0   : successful exit
-*>          .lt. 0: if INFO = -i, the i-th argument had an illegal value
-*>          .gt. 0: overflow would occur when computing the INFO(th)
-*>                  element of the solution vector x. This can only occur
-*>                  when JOB is supplied as positive and either means
-*>                  that a diagonal element of U is very small, or that
-*>                  the elements of the right-hand side vector y are very
-*>                  large.
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*>          > 0:  overflow would occur when computing the INFO(th)
+*>                element of the solution vector x. This can only occur
+*>                when JOB is supplied as positive and either means
+*>                that a diagonal element of U is very small, or that
+*>                the elements of the right-hand side vector y are very
+*>                large.
 *> \endverbatim
 *
 *  Authors:
diff --git a/lapack-netlib/SRC/dlahqr.f b/lapack-netlib/SRC/dlahqr.f
index f7365d21e..e863829ec 100644
--- a/lapack-netlib/SRC/dlahqr.f
+++ b/lapack-netlib/SRC/dlahqr.f
@@ -150,26 +150,26 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>           =   0: successful exit
-*>          .GT. 0: If INFO = i, DLAHQR failed to compute all the
+*>           = 0:  successful exit
+*>           > 0:  If INFO = i, DLAHQR failed to compute all the
 *>                  eigenvalues ILO to IHI in a total of 30 iterations
 *>                  per eigenvalue; elements i+1:ihi of WR and WI
 *>                  contain those eigenvalues which have been
 *>                  successfully computed.
 *>
-*>                  If INFO .GT. 0 and WANTT is .FALSE., then on exit,
+*>                  If INFO > 0 and WANTT is .FALSE., then on exit,
 *>                  the remaining unconverged eigenvalues are the
 *>                  eigenvalues of the upper Hessenberg matrix rows
-*>                  and columns ILO thorugh INFO of the final, output
+*>                  and columns ILO through INFO of the final, output
 *>                  value of H.
 *>
-*>                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+*>                  If INFO > 0 and WANTT is .TRUE., then on exit
 *>          (*)       (initial value of H)*U  = U*(final value of H)
-*>                  where U is an orthognal matrix.    The final
+*>                  where U is an orthogonal matrix.    The final
 *>                  value of H is upper Hessenberg and triangular in
 *>                  rows and columns INFO+1 through IHI.
 *>
-*>                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+*>                  If INFO > 0 and WANTZ is .TRUE., then on exit
 *>                      (final value of Z)  = (initial value of Z)*U
 *>                  where U is the orthogonal matrix in (*)
 *>                  (regardless of the value of WANTT.)
diff --git a/lapack-netlib/SRC/dlaln2.f b/lapack-netlib/SRC/dlaln2.f
index a094b737b..0c94ea308 100644
--- a/lapack-netlib/SRC/dlaln2.f
+++ b/lapack-netlib/SRC/dlaln2.f
@@ -49,7 +49,7 @@
 *> the first column of each being the real part and the second
 *> being the imaginary part.
 *>
-*> "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
+*> "s" is a scaling factor (<= 1), computed by DLALN2, which is
 *> so chosen that X can be computed without overflow.  X is further
 *> scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
 *> than overflow.
diff --git a/lapack-netlib/SRC/dlamswlq.f b/lapack-netlib/SRC/dlamswlq.f
index 19e32f888..306c3d3de 100644
--- a/lapack-netlib/SRC/dlamswlq.f
+++ b/lapack-netlib/SRC/dlamswlq.f
@@ -1,3 +1,4 @@
+*> \brief \b DLAMSWLQ
 *
 *  Definition:
 *  ===========
diff --git a/lapack-netlib/SRC/dlamtsqr.f b/lapack-netlib/SRC/dlamtsqr.f
index 6af89d28e..41a067780 100644
--- a/lapack-netlib/SRC/dlamtsqr.f
+++ b/lapack-netlib/SRC/dlamtsqr.f
@@ -1,3 +1,4 @@
+*> \brief \b DLAMTSQR
 *
 *  Definition:
 *  ===========
diff --git a/lapack-netlib/SRC/dlangb.f b/lapack-netlib/SRC/dlangb.f
index 078573b87..0c4f938f7 100644
--- a/lapack-netlib/SRC/dlangb.f
+++ b/lapack-netlib/SRC/dlangb.f
@@ -129,6 +129,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM
       INTEGER            KL, KU, LDAB, N
@@ -139,22 +140,24 @@
 *
 * =====================================================================
 *
-*
 *     .. Parameters ..
       DOUBLE PRECISION   ONE, ZERO
       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J, K, L
-      DOUBLE PRECISION   SCALE, SUM, VALUE, TEMP
+      DOUBLE PRECISION   SUM, VALUE, TEMP
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
       EXTERNAL           LSAME, DISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, DCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -206,15 +209,22 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
-         SCALE = ZERO
-         SUM = ONE
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
          DO 90 J = 1, N
             L = MAX( 1, J-KU )
             K = KU + 1 - J + L
-            CALL DLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
+            COLSSQ( 1 ) = ZERO
+            COLSSQ( 2 ) = ONE
+            CALL DLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1,
+     $                   COLSSQ( 1 ), COLSSQ( 2 ) )
+            CALL DCOMBSSQ( SSQ, COLSSQ )
    90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANGB = VALUE
diff --git a/lapack-netlib/SRC/dlange.f b/lapack-netlib/SRC/dlange.f
index 9dbf45e81..6b32fbefd 100644
--- a/lapack-netlib/SRC/dlange.f
+++ b/lapack-netlib/SRC/dlange.f
@@ -119,6 +119,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM
       INTEGER            LDA, M, N
@@ -135,10 +136,13 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J
-      DOUBLE PRECISION   SCALE, SUM, VALUE, TEMP
+      DOUBLE PRECISION   SUM, VALUE, TEMP
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
@@ -194,13 +198,19 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
-         SCALE = ZERO
-         SUM = ONE
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
          DO 90 J = 1, N
-            CALL DLASSQ( M, A( 1, J ), 1, SCALE, SUM )
+            COLSSQ( 1 ) = ZERO
+            COLSSQ( 2 ) = ONE
+            CALL DLASSQ( M, A( 1, J ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
+            CALL DCOMBSSQ( SSQ, COLSSQ )
    90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANGE = VALUE
diff --git a/lapack-netlib/SRC/dlanhs.f b/lapack-netlib/SRC/dlanhs.f
index 691dbc21e..a859d2216 100644
--- a/lapack-netlib/SRC/dlanhs.f
+++ b/lapack-netlib/SRC/dlanhs.f
@@ -113,6 +113,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM
       INTEGER            LDA, N
@@ -129,15 +130,18 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J
-      DOUBLE PRECISION   SCALE, SUM, VALUE
+      DOUBLE PRECISION   SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
       EXTERNAL           LSAME, DISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, DCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MIN, SQRT
 *     ..
@@ -188,13 +192,20 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
-         SCALE = ZERO
-         SUM = ONE
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
          DO 90 J = 1, N
-            CALL DLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
+            COLSSQ( 1 ) = ZERO
+            COLSSQ( 2 ) = ONE
+            CALL DLASSQ( MIN( N, J+1 ), A( 1, J ), 1,
+     $                   COLSSQ( 1 ), COLSSQ( 2 ) )
+            CALL DCOMBSSQ( SSQ, COLSSQ )
    90    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANHS = VALUE
diff --git a/lapack-netlib/SRC/dlansb.f b/lapack-netlib/SRC/dlansb.f
index 4ccf5f27e..a82dc41b1 100644
--- a/lapack-netlib/SRC/dlansb.f
+++ b/lapack-netlib/SRC/dlansb.f
@@ -134,6 +134,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM, UPLO
       INTEGER            K, LDAB, N
@@ -150,15 +151,18 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J, L
-      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+      DOUBLE PRECISION   ABSA, SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
       EXTERNAL           LSAME, DISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, DCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -225,29 +229,47 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
+*
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
+*
+*        Sum off-diagonals
 *
-         SCALE = ZERO
-         SUM = ONE
          IF( K.GT.0 ) THEN
             IF( LSAME( UPLO, 'U' ) ) THEN
                DO 110 J = 2, N
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
                   CALL DLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
+     $                         1, COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
   110          CONTINUE
                L = K + 1
             ELSE
                DO 120 J = 1, N - 1
-                  CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                         SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
   120          CONTINUE
                L = 1
             END IF
-            SUM = 2*SUM
+            SSQ( 2 ) = 2*SSQ( 2 )
          ELSE
             L = 1
          END IF
-         CALL DLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
-         VALUE = SCALE*SQRT( SUM )
+*
+*        Sum diagonal
+*
+         COLSSQ( 1 ) = ZERO
+         COLSSQ( 2 ) = ONE
+         CALL DLASSQ( N, AB( L, 1 ), LDAB, COLSSQ( 1 ), COLSSQ( 2 ) )
+         CALL DCOMBSSQ( SSQ, COLSSQ )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANSB = VALUE
diff --git a/lapack-netlib/SRC/dlansp.f b/lapack-netlib/SRC/dlansp.f
index a1829db75..b6ad1ffcf 100644
--- a/lapack-netlib/SRC/dlansp.f
+++ b/lapack-netlib/SRC/dlansp.f
@@ -119,6 +119,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM, UPLO
       INTEGER            N
@@ -135,15 +136,18 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J, K
-      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+      DOUBLE PRECISION   ABSA, SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
       EXTERNAL           LSAME, DISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, DCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, SQRT
 *     ..
@@ -217,31 +221,48 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
+*
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
+*
+*        Sum off-diagonals
 *
-         SCALE = ZERO
-         SUM = ONE
          K = 2
          IF( LSAME( UPLO, 'U' ) ) THEN
             DO 110 J = 2, N
-               CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+               COLSSQ( 1 ) = ZERO
+               COLSSQ( 2 ) = ONE
+               CALL DLASSQ( J-1, AP( K ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
+               CALL DCOMBSSQ( SSQ, COLSSQ )
                K = K + J
   110       CONTINUE
          ELSE
             DO 120 J = 1, N - 1
-               CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+               COLSSQ( 1 ) = ZERO
+               COLSSQ( 2 ) = ONE
+               CALL DLASSQ( N-J, AP( K ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
+               CALL DCOMBSSQ( SSQ, COLSSQ )
                K = K + N - J + 1
   120       CONTINUE
          END IF
-         SUM = 2*SUM
+         SSQ( 2 ) = 2*SSQ( 2 )
+*
+*        Sum diagonal
+*
          K = 1
+         COLSSQ( 1 ) = ZERO
+         COLSSQ( 2 ) = ONE
          DO 130 I = 1, N
             IF( AP( K ).NE.ZERO ) THEN
                ABSA = ABS( AP( K ) )
-               IF( SCALE.LT.ABSA ) THEN
-                  SUM = ONE + SUM*( SCALE / ABSA )**2
-                  SCALE = ABSA
+               IF( COLSSQ( 1 ).LT.ABSA ) THEN
+                  COLSSQ( 2 ) = ONE + COLSSQ(2)*( COLSSQ(1) / ABSA )**2
+                  COLSSQ( 1 ) = ABSA
                ELSE
-                  SUM = SUM + ( ABSA / SCALE )**2
+                  COLSSQ( 2 ) = COLSSQ( 2 ) + ( ABSA / COLSSQ( 1 ) )**2
                END IF
             END IF
             IF( LSAME( UPLO, 'U' ) ) THEN
@@ -250,7 +271,8 @@
                K = K + N - I + 1
             END IF
   130    CONTINUE
-         VALUE = SCALE*SQRT( SUM )
+         CALL DCOMBSSQ( SSQ, COLSSQ )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANSP = VALUE
diff --git a/lapack-netlib/SRC/dlansy.f b/lapack-netlib/SRC/dlansy.f
index 2372fce0a..87d514c11 100644
--- a/lapack-netlib/SRC/dlansy.f
+++ b/lapack-netlib/SRC/dlansy.f
@@ -127,6 +127,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          NORM, UPLO
       INTEGER            LDA, N
@@ -143,15 +144,18 @@
 *     ..
 *     .. Local Scalars ..
       INTEGER            I, J
-      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+      DOUBLE PRECISION   ABSA, SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
       EXTERNAL           LSAME, DISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, DCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, SQRT
 *     ..
@@ -216,21 +220,39 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
+*
+         SSQ( 1 ) = ZERO
+         SSQ( 2 ) = ONE
+*
+*        Sum off-diagonals
 *
-         SCALE = ZERO
-         SUM = ONE
          IF( LSAME( UPLO, 'U' ) ) THEN
             DO 110 J = 2, N
-               CALL DLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
+               COLSSQ( 1 ) = ZERO
+               COLSSQ( 2 ) = ONE
+               CALL DLASSQ( J-1, A( 1, J ), 1, COLSSQ(1), COLSSQ(2) )
+               CALL DCOMBSSQ( SSQ, COLSSQ )
   110       CONTINUE
          ELSE
             DO 120 J = 1, N - 1
-               CALL DLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
+               COLSSQ( 1 ) = ZERO
+               COLSSQ( 2 ) = ONE
+               CALL DLASSQ( N-J, A( J+1, J ), 1, COLSSQ(1), COLSSQ(2) )
+               CALL DCOMBSSQ( SSQ, COLSSQ )
   120       CONTINUE
          END IF
-         SUM = 2*SUM
-         CALL DLASSQ( N, A, LDA+1, SCALE, SUM )
-         VALUE = SCALE*SQRT( SUM )
+         SSQ( 2 ) = 2*SSQ( 2 )
+*
+*        Sum diagonal
+*
+         COLSSQ( 1 ) = ZERO
+         COLSSQ( 2 ) = ONE
+         CALL DLASSQ( N, A, LDA+1, COLSSQ( 1 ), COLSSQ( 2 ) )
+         CALL DCOMBSSQ( SSQ, COLSSQ )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANSY = VALUE
diff --git a/lapack-netlib/SRC/dlantb.f b/lapack-netlib/SRC/dlantb.f
index 3d2bfe7e4..0d46f6cc8 100644
--- a/lapack-netlib/SRC/dlantb.f
+++ b/lapack-netlib/SRC/dlantb.f
@@ -145,6 +145,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, NORM, UPLO
       INTEGER            K, LDAB, N
@@ -162,15 +163,18 @@
 *     .. Local Scalars ..
       LOGICAL            UDIAG
       INTEGER            I, J, L
-      DOUBLE PRECISION   SCALE, SUM, VALUE
+      DOUBLE PRECISION   SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
       EXTERNAL           LSAME, DISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, DCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -311,46 +315,61 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
          IF( LSAME( UPLO, 'U' ) ) THEN
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = N
                IF( K.GT.0 ) THEN
                   DO 280 J = 2, N
+                     COLSSQ( 1 ) = ZERO
+                     COLSSQ( 2 ) = ONE
                      CALL DLASSQ( MIN( J-1, K ),
-     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
-     $                            SUM )
+     $                            AB( MAX( K+2-J, 1 ), J ), 1,
+     $                            COLSSQ( 1 ), COLSSQ( 2 ) )
+                     CALL DCOMBSSQ( SSQ, COLSSQ )
   280             CONTINUE
                END IF
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                DO 290 J = 1, N
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
                   CALL DLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
-     $                         1, SCALE, SUM )
+     $                         1, COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
   290          CONTINUE
             END IF
          ELSE
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = N
                IF( K.GT.0 ) THEN
                   DO 300 J = 1, N - 1
-                     CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
-     $                            SUM )
+                     COLSSQ( 1 ) = ZERO
+                     COLSSQ( 2 ) = ONE
+                     CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1,
+     $                            COLSSQ( 1 ), COLSSQ( 2 ) )
+                     CALL DCOMBSSQ( SSQ, COLSSQ )
   300             CONTINUE
                END IF
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                DO 310 J = 1, N
-                  CALL DLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
-     $                         SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
   310          CONTINUE
             END IF
          END IF
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANTB = VALUE
diff --git a/lapack-netlib/SRC/dlantp.f b/lapack-netlib/SRC/dlantp.f
index f84a9e9d7..a7b89dec7 100644
--- a/lapack-netlib/SRC/dlantp.f
+++ b/lapack-netlib/SRC/dlantp.f
@@ -129,6 +129,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, NORM, UPLO
       INTEGER            N
@@ -146,15 +147,18 @@
 *     .. Local Scalars ..
       LOGICAL            UDIAG
       INTEGER            I, J, K
-      DOUBLE PRECISION   SCALE, SUM, VALUE
+      DOUBLE PRECISION   SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
       EXTERNAL           LSAME, DISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, DCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, SQRT
 *     ..
@@ -306,45 +310,64 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
          IF( LSAME( UPLO, 'U' ) ) THEN
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = N
                K = 2
                DO 280 J = 2, N
-                  CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( J-1, AP( K ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
                   K = K + J
   280          CONTINUE
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                K = 1
                DO 290 J = 1, N
-                  CALL DLASSQ( J, AP( K ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( J, AP( K ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
                   K = K + J
   290          CONTINUE
             END IF
          ELSE
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = N
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = N
                K = 2
                DO 300 J = 1, N - 1
-                  CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( N-J, AP( K ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
                   K = K + N - J + 1
   300          CONTINUE
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                K = 1
                DO 310 J = 1, N
-                  CALL DLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( N-J+1, AP( K ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
                   K = K + N - J + 1
   310          CONTINUE
             END IF
          END IF
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANTP = VALUE
diff --git a/lapack-netlib/SRC/dlantr.f b/lapack-netlib/SRC/dlantr.f
index 8585b2f68..adc7da4c4 100644
--- a/lapack-netlib/SRC/dlantr.f
+++ b/lapack-netlib/SRC/dlantr.f
@@ -146,6 +146,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
+      IMPLICIT NONE
 *     .. Scalar Arguments ..
       CHARACTER          DIAG, NORM, UPLO
       INTEGER            LDA, M, N
@@ -163,15 +164,18 @@
 *     .. Local Scalars ..
       LOGICAL            UDIAG
       INTEGER            I, J
-      DOUBLE PRECISION   SCALE, SUM, VALUE
+      DOUBLE PRECISION   SUM, VALUE
 *     ..
-*     .. External Subroutines ..
-      EXTERNAL           DLASSQ
+*     .. Local Arrays ..
+      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME, DISNAN
       EXTERNAL           LSAME, DISNAN
 *     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, DCOMBSSQ
+*     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          ABS, MIN, SQRT
 *     ..
@@ -281,7 +285,7 @@
             END IF
          ELSE
             IF( LSAME( DIAG, 'U' ) ) THEN
-               DO 210 I = 1, N
+               DO 210 I = 1, MIN( M, N )
                   WORK( I ) = ONE
   210          CONTINUE
                DO 220 I = N + 1, M
@@ -311,38 +315,56 @@
       ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
+*        SSQ(1) is scale
+*        SSQ(2) is sum-of-squares
+*        For better accuracy, sum each column separately.
 *
          IF( LSAME( UPLO, 'U' ) ) THEN
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = MIN( M, N )
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = MIN( M, N )
                DO 290 J = 2, N
-                  CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
   290          CONTINUE
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                DO 300 J = 1, N
-                  CALL DLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( MIN( M, J ), A( 1, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
   300          CONTINUE
             END IF
          ELSE
             IF( LSAME( DIAG, 'U' ) ) THEN
-               SCALE = ONE
-               SUM = MIN( M, N )
+               SSQ( 1 ) = ONE
+               SSQ( 2 ) = MIN( M, N )
                DO 310 J = 1, N
-                  CALL DLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
-     $                         SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( M-J, A( MIN( M, J+1 ), J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
   310          CONTINUE
             ELSE
-               SCALE = ZERO
-               SUM = ONE
+               SSQ( 1 ) = ZERO
+               SSQ( 2 ) = ONE
                DO 320 J = 1, N
-                  CALL DLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
+                  COLSSQ( 1 ) = ZERO
+                  COLSSQ( 2 ) = ONE
+                  CALL DLASSQ( M-J+1, A( J, J ), 1,
+     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
+                  CALL DCOMBSSQ( SSQ, COLSSQ )
   320          CONTINUE
             END IF
          END IF
-         VALUE = SCALE*SQRT( SUM )
+         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
       END IF
 *
       DLANTR = VALUE
diff --git a/lapack-netlib/SRC/dlanv2.f b/lapack-netlib/SRC/dlanv2.f
index 91fa14ff2..d68481f7e 100644
--- a/lapack-netlib/SRC/dlanv2.f
+++ b/lapack-netlib/SRC/dlanv2.f
@@ -161,7 +161,6 @@
       IF( C.EQ.ZERO ) THEN
          CS = ONE
          SN = ZERO
-         GO TO 10
 *
       ELSE IF( B.EQ.ZERO ) THEN
 *
@@ -174,12 +173,12 @@
          A = TEMP
          B = -C
          C = ZERO
-         GO TO 10
+*
       ELSE IF( ( A-D ).EQ.ZERO .AND. SIGN( ONE, B ).NE.SIGN( ONE, C ) )
      $          THEN
          CS = ONE
          SN = ZERO
-         GO TO 10
+*
       ELSE
 *
          TEMP = A - D
@@ -207,6 +206,7 @@
             SN = C / TAU
             B = B - C
             C = ZERO
+*
          ELSE
 *
 *           Complex eigenvalues, or real (almost) equal eigenvalues.
@@ -268,8 +268,6 @@
          END IF
 *
       END IF
-*
-   10 CONTINUE
 *
 *     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
 *
diff --git a/lapack-netlib/SRC/dlaorhr_col_getrfnp.f b/lapack-netlib/SRC/dlaorhr_col_getrfnp.f
new file mode 100644
index 000000000..6a7c629e8
--- /dev/null
+++ b/lapack-netlib/SRC/dlaorhr_col_getrfnp.f
@@ -0,0 +1,248 @@
+*> \brief \b DLAORHR_COL_GETRFNP
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLAORHR_COL_GETRFNP + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLAORHR_COL_GETRFNP( M, N, A, LDA, D, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, M, N
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   A( LDA, * ), D( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLAORHR_COL_GETRFNP computes the modified LU factorization without
+*> pivoting of a real general M-by-N matrix A. The factorization has
+*> the form:
+*>
+*>     A - S = L * U,
+*>
+*> where:
+*>    S is a m-by-n diagonal sign matrix with the diagonal D, so that
+*>    D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
+*>    as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
+*>    i-1 steps of Gaussian elimination. This means that the diagonal
+*>    element at each step of "modified" Gaussian elimination is
+*>    at least one in absolute value (so that division-by-zero not
+*>    not possible during the division by the diagonal element);
+*>
+*>    L is a M-by-N lower triangular matrix with unit diagonal elements
+*>    (lower trapezoidal if M > N);
+*>
+*>    and U is a M-by-N upper triangular matrix
+*>    (upper trapezoidal if M < N).
+*>
+*> This routine is an auxiliary routine used in the Householder
+*> reconstruction routine DORHR_COL. In DORHR_COL, this routine is
+*> applied to an M-by-N matrix A with orthonormal columns, where each
+*> element is bounded by one in absolute value. With the choice of
+*> the matrix S above, one can show that the diagonal element at each
+*> step of Gaussian elimination is the largest (in absolute value) in
+*> the column on or below the diagonal, so that no pivoting is required
+*> for numerical stability [1].
+*>
+*> For more details on the Householder reconstruction algorithm,
+*> including the modified LU factorization, see [1].
+*>
+*> This is the blocked right-looking version of the algorithm,
+*> calling Level 3 BLAS to update the submatrix. To factorize a block,
+*> this routine calls the recursive routine DLAORHR_COL_GETRFNP2.
+*>
+*> [1] "Reconstructing Householder vectors from tall-skinny QR",
+*>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
+*>     E. Solomonik, J. Parallel Distrib. Comput.,
+*>     vol. 85, pp. 3-31, 2015.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix to be factored.
+*>          On exit, the factors L and U from the factorization
+*>          A-S=L*U; the unit diagonal elements of L are not stored.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension min(M,N)
+*>          The diagonal elements of the diagonal M-by-N sign matrix S,
+*>          D(i) = S(i,i), where 1 <= i <= min(M,N). The elements can
+*>          be only plus or minus one.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*>
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2019
+*
+*> \ingroup doubleGEcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*> \verbatim
+*>
+*> November 2019, Igor Kozachenko,
+*>                Computer Science Division,
+*>                University of California, Berkeley
+*>
+*> \endverbatim
+*
+*  =====================================================================
+      SUBROUTINE DLAORHR_COL_GETRFNP( M, N, A, LDA, D, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK computational routine (version 3.9.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     November 2019
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), D( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLAORHR_COL_GETRFNP2, DTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAORHR_COL_GETRFNP', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N ).EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+
+      NB = ILAENV( 1, 'DLAORHR_COL_GETRFNP', ' ', M, N, -1, -1 )
+
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL DLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Factor diagonal and subdiagonal blocks.
+*
+            CALL DLAORHR_COL_GETRFNP2( M-J+1, JB, A( J, J ), LDA,
+     $                                 D( J ), IINFO )
+*
+            IF( J+JB.LE.N ) THEN
+*
+*              Compute block row of U.
+*
+               CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
+     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
+     $                     LDA )
+               IF( J+JB.LE.M ) THEN
+*
+*                 Update trailing submatrix.
+*
+                  CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1,
+     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
+     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
+     $                        LDA )
+               END IF
+            END IF
+         END DO
+      END IF
+      RETURN
+*
+*     End of DLAORHR_COL_GETRFNP
+*
+      END
diff --git a/lapack-netlib/SRC/dlaorhr_col_getrfnp2.f b/lapack-netlib/SRC/dlaorhr_col_getrfnp2.f
new file mode 100644
index 000000000..f7781f2e5
--- /dev/null
+++ b/lapack-netlib/SRC/dlaorhr_col_getrfnp2.f
@@ -0,0 +1,305 @@
+*> \brief \b DLAORHR_COL_GETRFNP2
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLAORHR_GETRF2NP + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       RECURSIVE SUBROUTINE DLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, M, N
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   A( LDA, * ), D( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLAORHR_COL_GETRFNP2 computes the modified LU factorization without
+*> pivoting of a real general M-by-N matrix A. The factorization has
+*> the form:
+*>
+*>     A - S = L * U,
+*>
+*> where:
+*>    S is a m-by-n diagonal sign matrix with the diagonal D, so that
+*>    D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
+*>    as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
+*>    i-1 steps of Gaussian elimination. This means that the diagonal
+*>    element at each step of "modified" Gaussian elimination is at
+*>    least one in absolute value (so that division-by-zero not
+*>    possible during the division by the diagonal element);
+*>
+*>    L is a M-by-N lower triangular matrix with unit diagonal elements
+*>    (lower trapezoidal if M > N);
+*>
+*>    and U is a M-by-N upper triangular matrix
+*>    (upper trapezoidal if M < N).
+*>
+*> This routine is an auxiliary routine used in the Householder
+*> reconstruction routine DORHR_COL. In DORHR_COL, this routine is
+*> applied to an M-by-N matrix A with orthonormal columns, where each
+*> element is bounded by one in absolute value. With the choice of
+*> the matrix S above, one can show that the diagonal element at each
+*> step of Gaussian elimination is the largest (in absolute value) in
+*> the column on or below the diagonal, so that no pivoting is required
+*> for numerical stability [1].
+*>
+*> For more details on the Householder reconstruction algorithm,
+*> including the modified LU factorization, see [1].
+*>
+*> This is the recursive version of the LU factorization algorithm.
+*> Denote A - S by B. The algorithm divides the matrix B into four
+*> submatrices:
+*>
+*>        [  B11 | B12  ]  where B11 is n1 by n1,
+*>    B = [ -----|----- ]        B21 is (m-n1) by n1,
+*>        [  B21 | B22  ]        B12 is n1 by n2,
+*>                               B22 is (m-n1) by n2,
+*>                               with n1 = min(m,n)/2, n2 = n-n1.
+*>
+*>
+*> The subroutine calls itself to factor B11, solves for B21,
+*> solves for B12, updates B22, then calls itself to factor B22.
+*>
+*> For more details on the recursive LU algorithm, see [2].
+*>
+*> DLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
+*> routine DLAORHR_COL_GETRFNP, which uses blocked code calling
+*. Level 3 BLAS to update the submatrix. However, DLAORHR_COL_GETRFNP2
+*> is self-sufficient and can be used without DLAORHR_COL_GETRFNP.
+*>
+*> [1] "Reconstructing Householder vectors from tall-skinny QR",
+*>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
+*>     E. Solomonik, J. Parallel Distrib. Comput.,
+*>     vol. 85, pp. 3-31, 2015.
+*>
+*> [2] "Recursion leads to automatic variable blocking for dense linear
+*>     algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
+*>     vol. 41, no. 6, pp. 737-755, 1997.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix to be factored.
+*>          On exit, the factors L and U from the factorization
+*>          A-S=L*U; the unit diagonal elements of L are not stored.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension min(M,N)
+*>          The diagonal elements of the diagonal M-by-N sign matrix S,
+*>          D(i) = S(i,i), where 1 <= i <= min(M,N). The elements can
+*>          be only plus or minus one.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*>
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2019
+*
+*> \ingroup doubleGEcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*> \verbatim
+*>
+*> November 2019, Igor Kozachenko,
+*>                Computer Science Division,
+*>                University of California, Berkeley
+*>
+*> \endverbatim
+*
+*  =====================================================================
+      RECURSIVE SUBROUTINE DLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK computational routine (version 3.9.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     November 2019
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), D( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   SFMIN
+      INTEGER            I, IINFO, N1, N2
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DSCAL, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DSIGN, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAORHR_COL_GETRFNP2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N ).EQ.0 )
+     $   RETURN
+
+      IF ( M.EQ.1 ) THEN
+*
+*        One row case, (also recursion termination case),
+*        use unblocked code
+*
+*        Transfer the sign
+*
+         D( 1 ) = -DSIGN( ONE, A( 1, 1 ) )
+*
+*        Construct the row of U
+*
+         A( 1, 1 ) = A( 1, 1 ) - D( 1 )
+*
+      ELSE IF( N.EQ.1 ) THEN
+*
+*        One column case, (also recursion termination case),
+*        use unblocked code
+*
+*        Transfer the sign
+*
+         D( 1 ) = -DSIGN( ONE, A( 1, 1 ) )
+*
+*        Construct the row of U
+*
+         A( 1, 1 ) = A( 1, 1 ) - D( 1 )
+*
+*        Scale the elements 2:M of the column
+*
+*        Determine machine safe minimum
+*
+         SFMIN = DLAMCH('S')
+*
+*        Construct the subdiagonal elements of L
+*
+         IF( ABS( A( 1, 1 ) ) .GE. SFMIN ) THEN
+            CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
+         ELSE
+            DO I = 2, M
+               A( I, 1 ) = A( I, 1 ) / A( 1, 1 )
+            END DO
+         END IF
+*
+      ELSE
+*
+*        Divide the matrix B into four submatrices
+*
+         N1 = MIN( M, N ) / 2
+         N2 = N-N1
+
+*
+*        Factor B11, recursive call
+*
+         CALL DLAORHR_COL_GETRFNP2( N1, N1, A, LDA, D, IINFO )
+*
+*        Solve for B21
+*
+         CALL DTRSM( 'R', 'U', 'N', 'N', M-N1, N1, ONE, A, LDA,
+     $               A( N1+1, 1 ), LDA )
+*
+*        Solve for B12
+*
+         CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
+     $               A( 1, N1+1 ), LDA )
+*
+*        Update B22, i.e. compute the Schur complement
+*        B22 := B22 - B21*B12
+*
+         CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
+     $               A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
+*
+*        Factor B22, recursive call
+*
+         CALL DLAORHR_COL_GETRFNP2( M-N1, N2, A( N1+1, N1+1 ), LDA,
+     $                              D( N1+1 ), IINFO )
+*
+      END IF
+      RETURN
+*
+*     End of DLAORHR_COL_GETRFNP2
+*
+      END
diff --git a/lapack-netlib/SRC/dlaqps.f b/lapack-netlib/SRC/dlaqps.f
index 395d8e0b1..0009de951 100644
--- a/lapack-netlib/SRC/dlaqps.f
+++ b/lapack-netlib/SRC/dlaqps.f
@@ -127,7 +127,7 @@
 *> \param[in,out] AUXV
 *> \verbatim
 *>          AUXV is DOUBLE PRECISION array, dimension (NB)
-*>          Auxiliar vector.
+*>          Auxiliary vector.
 *> \endverbatim
 *>
 *> \param[in,out] F
diff --git a/lapack-netlib/SRC/dlaqr0.f b/lapack-netlib/SRC/dlaqr0.f
index 247d4ef30..f362c096c 100644
--- a/lapack-netlib/SRC/dlaqr0.f
+++ b/lapack-netlib/SRC/dlaqr0.f
@@ -67,7 +67,7 @@
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
-*>           The order of the matrix H.  N .GE. 0.
+*>           The order of the matrix H.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] ILO
@@ -79,12 +79,12 @@
 *> \verbatim
 *>          IHI is INTEGER
 *>           It is assumed that H is already upper triangular in rows
-*>           and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+*>           and columns 1:ILO-1 and IHI+1:N and, if ILO > 1,
 *>           H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
 *>           previous call to DGEBAL, and then passed to DGEHRD when the
 *>           matrix output by DGEBAL is reduced to Hessenberg form.
 *>           Otherwise, ILO and IHI should be set to 1 and N,
-*>           respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+*>           respectively.  If N > 0, then 1 <= ILO <= IHI <= N.
 *>           If N = 0, then ILO = 1 and IHI = 0.
 *> \endverbatim
 *>
@@ -97,19 +97,19 @@
 *>           decomposition (the Schur form); 2-by-2 diagonal blocks
 *>           (corresponding to complex conjugate pairs of eigenvalues)
 *>           are returned in standard form, with H(i,i) = H(i+1,i+1)
-*>           and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is
+*>           and H(i+1,i)*H(i,i+1) < 0. If INFO = 0 and WANTT is
 *>           .FALSE., then the contents of H are unspecified on exit.
-*>           (The output value of H when INFO.GT.0 is given under the
+*>           (The output value of H when INFO > 0 is given under the
 *>           description of INFO below.)
 *>
-*>           This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+*>           This subroutine may explicitly set H(i,j) = 0 for i > j and
 *>           j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
 *> \endverbatim
 *>
 *> \param[in] LDH
 *> \verbatim
 *>          LDH is INTEGER
-*>           The leading dimension of the array H. LDH .GE. max(1,N).
+*>           The leading dimension of the array H. LDH >= max(1,N).
 *> \endverbatim
 *>
 *> \param[out] WR
@@ -125,7 +125,7 @@
 *>           and WI(ILO:IHI). If two eigenvalues are computed as a
 *>           complex conjugate pair, they are stored in consecutive
 *>           elements of WR and WI, say the i-th and (i+1)th, with
-*>           WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then
+*>           WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., then
 *>           the eigenvalues are stored in the same order as on the
 *>           diagonal of the Schur form returned in H, with
 *>           WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal
@@ -143,7 +143,7 @@
 *>          IHIZ is INTEGER
 *>           Specify the rows of Z to which transformations must be
 *>           applied if WANTZ is .TRUE..
-*>           1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N.
+*>           1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -153,7 +153,7 @@
 *>           If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
 *>           replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
 *>           orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
-*>           (The output value of Z when INFO.GT.0 is given under
+*>           (The output value of Z when INFO > 0 is given under
 *>           the description of INFO below.)
 *> \endverbatim
 *>
@@ -161,7 +161,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>           The leading dimension of the array Z.  if WANTZ is .TRUE.
-*>           then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+*>           then LDZ >= MAX(1,IHIZ).  Otherwise, LDZ >= 1.
 *> \endverbatim
 *>
 *> \param[out] WORK
@@ -174,7 +174,7 @@
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
-*>           The dimension of the array WORK.  LWORK .GE. max(1,N)
+*>           The dimension of the array WORK.  LWORK >= max(1,N)
 *>           is sufficient, but LWORK typically as large as 6*N may
 *>           be required for optimal performance.  A workspace query
 *>           to determine the optimal workspace size is recommended.
@@ -190,19 +190,19 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>             =  0:  successful exit
-*>           .GT. 0:  if INFO = i, DLAQR0 failed to compute all of
+*>             = 0:  successful exit
+*>             > 0:  if INFO = i, DLAQR0 failed to compute all of
 *>                the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
 *>                and WI contain those eigenvalues which have been
 *>                successfully computed.  (Failures are rare.)
 *>
-*>                If INFO .GT. 0 and WANT is .FALSE., then on exit,
+*>                If INFO > 0 and WANT is .FALSE., then on exit,
 *>                the remaining unconverged eigenvalues are the eigen-
 *>                values of the upper Hessenberg matrix rows and
 *>                columns ILO through INFO of the final, output
 *>                value of H.
 *>
-*>                If INFO .GT. 0 and WANTT is .TRUE., then on exit
+*>                If INFO > 0 and WANTT is .TRUE., then on exit
 *>
 *>           (*)  (initial value of H)*U  = U*(final value of H)
 *>
@@ -210,7 +210,7 @@
 *>                value of H is upper Hessenberg and quasi-triangular
 *>                in rows and columns INFO+1 through IHI.
 *>
-*>                If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+*>                If INFO > 0 and WANTZ is .TRUE., then on exit
 *>
 *>                  (final value of Z(ILO:IHI,ILOZ:IHIZ)
 *>                   =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
@@ -218,7 +218,7 @@
 *>                where U is the orthogonal matrix in (*) (regard-
 *>                less of the value of WANTT.)
 *>
-*>                If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+*>                If INFO > 0 and WANTZ is .FALSE., then Z is not
 *>                accessed.
 *> \endverbatim
 *
@@ -678,7 +678,7 @@
                   END IF
                END IF
 *
-*              ==== Use up to NS of the the smallest magnatiude
+*              ==== Use up to NS of the the smallest magnitude
 *              .    shifts.  If there aren't NS shifts available,
 *              .    then use them all, possibly dropping one to
 *              .    make the number of shifts even. ====
diff --git a/lapack-netlib/SRC/dlaqr1.f b/lapack-netlib/SRC/dlaqr1.f
index 795b072ab..4ccf997e7 100644
--- a/lapack-netlib/SRC/dlaqr1.f
+++ b/lapack-netlib/SRC/dlaqr1.f
@@ -69,7 +69,7 @@
 *> \verbatim
 *>          LDH is INTEGER
 *>              The leading dimension of H as declared in
-*>              the calling procedure.  LDH.GE.N
+*>              the calling procedure.  LDH >= N
 *> \endverbatim
 *>
 *> \param[in] SR1
diff --git a/lapack-netlib/SRC/dlaqr2.f b/lapack-netlib/SRC/dlaqr2.f
index 431b3f123..01fdf3046 100644
--- a/lapack-netlib/SRC/dlaqr2.f
+++ b/lapack-netlib/SRC/dlaqr2.f
@@ -103,7 +103,7 @@
 *> \param[in] NW
 *> \verbatim
 *>          NW is INTEGER
-*>          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+*>          Deflation window size.  1 <= NW <= (KBOT-KTOP+1).
 *> \endverbatim
 *>
 *> \param[in,out] H
@@ -121,7 +121,7 @@
 *> \verbatim
 *>          LDH is INTEGER
 *>          Leading dimension of H just as declared in the calling
-*>          subroutine.  N .LE. LDH
+*>          subroutine.  N <= LDH
 *> \endverbatim
 *>
 *> \param[in] ILOZ
@@ -133,7 +133,7 @@
 *> \verbatim
 *>          IHIZ is INTEGER
 *>          Specify the rows of Z to which transformations must be
-*>          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+*>          applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N.
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -149,7 +149,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>          The leading dimension of Z just as declared in the
-*>          calling subroutine.  1 .LE. LDZ.
+*>          calling subroutine.  1 <= LDZ.
 *> \endverbatim
 *>
 *> \param[out] NS
@@ -194,13 +194,13 @@
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of V just as declared in the
-*>          calling subroutine.  NW .LE. LDV
+*>          calling subroutine.  NW <= LDV
 *> \endverbatim
 *>
 *> \param[in] NH
 *> \verbatim
 *>          NH is INTEGER
-*>          The number of columns of T.  NH.GE.NW.
+*>          The number of columns of T.  NH >= NW.
 *> \endverbatim
 *>
 *> \param[out] T
@@ -212,14 +212,14 @@
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of T just as declared in the
-*>          calling subroutine.  NW .LE. LDT
+*>          calling subroutine.  NW <= LDT
 *> \endverbatim
 *>
 *> \param[in] NV
 *> \verbatim
 *>          NV is INTEGER
 *>          The number of rows of work array WV available for
-*>          workspace.  NV.GE.NW.
+*>          workspace.  NV >= NW.
 *> \endverbatim
 *>
 *> \param[out] WV
@@ -231,7 +231,7 @@
 *> \verbatim
 *>          LDWV is INTEGER
 *>          The leading dimension of W just as declared in the
-*>          calling subroutine.  NW .LE. LDV
+*>          calling subroutine.  NW <= LDV
 *> \endverbatim
 *>
 *> \param[out] WORK
diff --git a/lapack-netlib/SRC/dlaqr3.f b/lapack-netlib/SRC/dlaqr3.f
index aa23617c3..1dbf55c9e 100644
--- a/lapack-netlib/SRC/dlaqr3.f
+++ b/lapack-netlib/SRC/dlaqr3.f
@@ -100,7 +100,7 @@
 *> \param[in] NW
 *> \verbatim
 *>          NW is INTEGER
-*>          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+*>          Deflation window size.  1 <= NW <= (KBOT-KTOP+1).
 *> \endverbatim
 *>
 *> \param[in,out] H
@@ -118,7 +118,7 @@
 *> \verbatim
 *>          LDH is INTEGER
 *>          Leading dimension of H just as declared in the calling
-*>          subroutine.  N .LE. LDH
+*>          subroutine.  N <= LDH
 *> \endverbatim
 *>
 *> \param[in] ILOZ
@@ -130,7 +130,7 @@
 *> \verbatim
 *>          IHIZ is INTEGER
 *>          Specify the rows of Z to which transformations must be
-*>          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+*>          applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N.
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -146,7 +146,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>          The leading dimension of Z just as declared in the
-*>          calling subroutine.  1 .LE. LDZ.
+*>          calling subroutine.  1 <= LDZ.
 *> \endverbatim
 *>
 *> \param[out] NS
@@ -191,13 +191,13 @@
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of V just as declared in the
-*>          calling subroutine.  NW .LE. LDV
+*>          calling subroutine.  NW <= LDV
 *> \endverbatim
 *>
 *> \param[in] NH
 *> \verbatim
 *>          NH is INTEGER
-*>          The number of columns of T.  NH.GE.NW.
+*>          The number of columns of T.  NH >= NW.
 *> \endverbatim
 *>
 *> \param[out] T
@@ -209,14 +209,14 @@
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of T just as declared in the
-*>          calling subroutine.  NW .LE. LDT
+*>          calling subroutine.  NW <= LDT
 *> \endverbatim
 *>
 *> \param[in] NV
 *> \verbatim
 *>          NV is INTEGER
 *>          The number of rows of work array WV available for
-*>          workspace.  NV.GE.NW.
+*>          workspace.  NV >= NW.
 *> \endverbatim
 *>
 *> \param[out] WV
@@ -228,7 +228,7 @@
 *> \verbatim
 *>          LDWV is INTEGER
 *>          The leading dimension of W just as declared in the
-*>          calling subroutine.  NW .LE. LDV
+*>          calling subroutine.  NW <= LDV
 *> \endverbatim
 *>
 *> \param[out] WORK
diff --git a/lapack-netlib/SRC/dlaqr4.f b/lapack-netlib/SRC/dlaqr4.f
index 89b9b7f20..454bf9608 100644
--- a/lapack-netlib/SRC/dlaqr4.f
+++ b/lapack-netlib/SRC/dlaqr4.f
@@ -74,7 +74,7 @@
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
-*>           The order of the matrix H.  N .GE. 0.
+*>           The order of the matrix H.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] ILO
@@ -86,12 +86,12 @@
 *> \verbatim
 *>          IHI is INTEGER
 *>           It is assumed that H is already upper triangular in rows
-*>           and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+*>           and columns 1:ILO-1 and IHI+1:N and, if ILO > 1,
 *>           H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
 *>           previous call to DGEBAL, and then passed to DGEHRD when the
 *>           matrix output by DGEBAL is reduced to Hessenberg form.
 *>           Otherwise, ILO and IHI should be set to 1 and N,
-*>           respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+*>           respectively.  If N > 0, then 1 <= ILO <= IHI <= N.
 *>           If N = 0, then ILO = 1 and IHI = 0.
 *> \endverbatim
 *>
@@ -104,19 +104,19 @@
 *>           decomposition (the Schur form); 2-by-2 diagonal blocks
 *>           (corresponding to complex conjugate pairs of eigenvalues)
 *>           are returned in standard form, with H(i,i) = H(i+1,i+1)
-*>           and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is
+*>           and H(i+1,i)*H(i,i+1) < 0. If INFO = 0 and WANTT is
 *>           .FALSE., then the contents of H are unspecified on exit.
-*>           (The output value of H when INFO.GT.0 is given under the
+*>           (The output value of H when INFO > 0 is given under the
 *>           description of INFO below.)
 *>
-*>           This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+*>           This subroutine may explicitly set H(i,j) = 0 for i > j and
 *>           j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
 *> \endverbatim
 *>
 *> \param[in] LDH
 *> \verbatim
 *>          LDH is INTEGER
-*>           The leading dimension of the array H. LDH .GE. max(1,N).
+*>           The leading dimension of the array H. LDH >= max(1,N).
 *> \endverbatim
 *>
 *> \param[out] WR
@@ -132,7 +132,7 @@
 *>           and WI(ILO:IHI). If two eigenvalues are computed as a
 *>           complex conjugate pair, they are stored in consecutive
 *>           elements of WR and WI, say the i-th and (i+1)th, with
-*>           WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then
+*>           WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., then
 *>           the eigenvalues are stored in the same order as on the
 *>           diagonal of the Schur form returned in H, with
 *>           WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal
@@ -150,7 +150,7 @@
 *>          IHIZ is INTEGER
 *>           Specify the rows of Z to which transformations must be
 *>           applied if WANTZ is .TRUE..
-*>           1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N.
+*>           1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -160,7 +160,7 @@
 *>           If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
 *>           replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
 *>           orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
-*>           (The output value of Z when INFO.GT.0 is given under
+*>           (The output value of Z when INFO > 0 is given under
 *>           the description of INFO below.)
 *> \endverbatim
 *>
@@ -168,7 +168,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>           The leading dimension of the array Z.  if WANTZ is .TRUE.
-*>           then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+*>           then LDZ >= MAX(1,IHIZ).  Otherwise, LDZ >= 1.
 *> \endverbatim
 *>
 *> \param[out] WORK
@@ -181,7 +181,7 @@
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
-*>           The dimension of the array WORK.  LWORK .GE. max(1,N)
+*>           The dimension of the array WORK.  LWORK >= max(1,N)
 *>           is sufficient, but LWORK typically as large as 6*N may
 *>           be required for optimal performance.  A workspace query
 *>           to determine the optimal workspace size is recommended.
@@ -197,19 +197,19 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
-*>             =  0:  successful exit
-*>           .GT. 0:  if INFO = i, DLAQR4 failed to compute all of
+*>             = 0:  successful exit
+*>             > 0:  if INFO = i, DLAQR4 failed to compute all of
 *>                the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
 *>                and WI contain those eigenvalues which have been
 *>                successfully computed.  (Failures are rare.)
 *>
-*>                If INFO .GT. 0 and WANT is .FALSE., then on exit,
+*>                If INFO > 0 and WANT is .FALSE., then on exit,
 *>                the remaining unconverged eigenvalues are the eigen-
 *>                values of the upper Hessenberg matrix rows and
 *>                columns ILO through INFO of the final, output
 *>                value of H.
 *>
-*>                If INFO .GT. 0 and WANTT is .TRUE., then on exit
+*>                If INFO > 0 and WANTT is .TRUE., then on exit
 *>
 *>           (*)  (initial value of H)*U  = U*(final value of H)
 *>
@@ -217,7 +217,7 @@
 *>                value of  H is upper Hessenberg and triangular in
 *>                rows and columns INFO+1 through IHI.
 *>
-*>                If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+*>                If INFO > 0 and WANTZ is .TRUE., then on exit
 *>
 *>                  (final value of Z(ILO:IHI,ILOZ:IHIZ)
 *>                   =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
@@ -225,7 +225,7 @@
 *>                where U is the orthogonal matrix in (*) (regard-
 *>                less of the value of WANTT.)
 *>
-*>                If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+*>                If INFO > 0 and WANTZ is .FALSE., then Z is not
 *>                accessed.
 *> \endverbatim
 *
@@ -677,7 +677,7 @@
                   END IF
                END IF
 *
-*              ==== Use up to NS of the the smallest magnatiude
+*              ==== Use up to NS of the the smallest magnitude
 *              .    shifts.  If there aren't NS shifts available,
 *              .    then use them all, possibly dropping one to
 *              .    make the number of shifts even. ====
diff --git a/lapack-netlib/SRC/dlaqr5.f b/lapack-netlib/SRC/dlaqr5.f
index 5cc4eda1a..f58db9c89 100644
--- a/lapack-netlib/SRC/dlaqr5.f
+++ b/lapack-netlib/SRC/dlaqr5.f
@@ -133,7 +133,7 @@
 *> \verbatim
 *>          LDH is INTEGER
 *>             LDH is the leading dimension of H just as declared in the
-*>             calling procedure.  LDH.GE.MAX(1,N).
+*>             calling procedure.  LDH >= MAX(1,N).
 *> \endverbatim
 *>
 *> \param[in] ILOZ
@@ -145,7 +145,7 @@
 *> \verbatim
 *>          IHIZ is INTEGER
 *>             Specify the rows of Z to which transformations must be
-*>             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
+*>             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -161,7 +161,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>             LDA is the leading dimension of Z just as declared in
-*>             the calling procedure. LDZ.GE.N.
+*>             the calling procedure. LDZ >= N.
 *> \endverbatim
 *>
 *> \param[out] V
@@ -173,7 +173,7 @@
 *> \verbatim
 *>          LDV is INTEGER
 *>             LDV is the leading dimension of V as declared in the
-*>             calling procedure.  LDV.GE.3.
+*>             calling procedure.  LDV >= 3.
 *> \endverbatim
 *>
 *> \param[out] U
@@ -185,33 +185,14 @@
 *> \verbatim
 *>          LDU is INTEGER
 *>             LDU is the leading dimension of U just as declared in the
-*>             in the calling subroutine.  LDU.GE.3*NSHFTS-3.
-*> \endverbatim
-*>
-*> \param[in] NH
-*> \verbatim
-*>          NH is INTEGER
-*>             NH is the number of columns in array WH available for
-*>             workspace. NH.GE.1.
-*> \endverbatim
-*>
-*> \param[out] WH
-*> \verbatim
-*>          WH is DOUBLE PRECISION array, dimension (LDWH,NH)
-*> \endverbatim
-*>
-*> \param[in] LDWH
-*> \verbatim
-*>          LDWH is INTEGER
-*>             Leading dimension of WH just as declared in the
-*>             calling procedure.  LDWH.GE.3*NSHFTS-3.
+*>             in the calling subroutine.  LDU >= 3*NSHFTS-3.
 *> \endverbatim
 *>
 *> \param[in] NV
 *> \verbatim
 *>          NV is INTEGER
 *>             NV is the number of rows in WV agailable for workspace.
-*>             NV.GE.1.
+*>             NV >= 1.
 *> \endverbatim
 *>
 *> \param[out] WV
@@ -223,9 +204,28 @@
 *> \verbatim
 *>          LDWV is INTEGER
 *>             LDWV is the leading dimension of WV as declared in the
-*>             in the calling subroutine.  LDWV.GE.NV.
+*>             in the calling subroutine.  LDWV >= NV.
 *> \endverbatim
 *
+*> \param[in] NH
+*> \verbatim
+*>          NH is INTEGER
+*>             NH is the number of columns in array WH available for
+*>             workspace. NH >= 1.
+*> \endverbatim
+*>
+*> \param[out] WH
+*> \verbatim
+*>          WH is DOUBLE PRECISION array, dimension (LDWH,NH)
+*> \endverbatim
+*>
+*> \param[in] LDWH
+*> \verbatim
+*>          LDWH is INTEGER
+*>             Leading dimension of WH just as declared in the
+*>             calling procedure.  LDWH >= 3*NSHFTS-3.
+*> \endverbatim
+*>
 *  Authors:
 *  ========
 *
diff --git a/lapack-netlib/SRC/dlarfb.f b/lapack-netlib/SRC/dlarfb.f
index 5b2cc2ba8..e63641213 100644
--- a/lapack-netlib/SRC/dlarfb.f
+++ b/lapack-netlib/SRC/dlarfb.f
@@ -92,6 +92,8 @@
 *>          K is INTEGER
 *>          The order of the matrix T (= the number of elementary
 *>          reflectors whose product defines the block reflector).
+*>          If SIDE = 'L', M >= K >= 0;
+*>          if SIDE = 'R', N >= K >= 0.
 *> \endverbatim
 *>
 *> \param[in] V
diff --git a/lapack-netlib/SRC/dlarfx.f b/lapack-netlib/SRC/dlarfx.f
index 260d367d4..a9e4496f9 100644
--- a/lapack-netlib/SRC/dlarfx.f
+++ b/lapack-netlib/SRC/dlarfx.f
@@ -94,7 +94,7 @@
 *> \param[in] LDC
 *> \verbatim
 *>          LDC is INTEGER
-*>          The leading dimension of the array C. LDA >= (1,M).
+*>          The leading dimension of the array C. LDC >= (1,M).
 *> \endverbatim
 *>
 *> \param[out] WORK
diff --git a/lapack-netlib/SRC/dlarfy.f b/lapack-netlib/SRC/dlarfy.f
index a0b0ebb31..3000b38bc 100644
--- a/lapack-netlib/SRC/dlarfy.f
+++ b/lapack-netlib/SRC/dlarfy.f
@@ -103,7 +103,7 @@
 *
 *> \date December 2016
 *
-*> \ingroup double_eig
+*> \ingroup doubleOTHERauxiliary
 *
 *  =====================================================================
       SUBROUTINE DLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK )
diff --git a/lapack-netlib/SRC/dlarrb.f b/lapack-netlib/SRC/dlarrb.f
index 2b6389e25..ddf3888b9 100644
--- a/lapack-netlib/SRC/dlarrb.f
+++ b/lapack-netlib/SRC/dlarrb.f
@@ -91,7 +91,7 @@
 *>          RTOL2 is DOUBLE PRECISION
 *>          Tolerance for the convergence of the bisection intervals.
 *>          An interval [LEFT,RIGHT] has converged if
-*>          RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
+*>          RIGHT-LEFT < MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *>          where GAP is the (estimated) distance to the nearest
 *>          eigenvalue.
 *> \endverbatim
@@ -117,7 +117,7 @@
 *>          WGAP is DOUBLE PRECISION array, dimension (N-1)
 *>          On input, the (estimated) gaps between consecutive
 *>          eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between
-*>          eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST
+*>          eigenvalues I and I+1. Note that if IFIRST = ILAST
 *>          then WGAP(IFIRST-OFFSET) must be set to ZERO.
 *>          On output, these gaps are refined.
 *> \endverbatim
diff --git a/lapack-netlib/SRC/dlarre.f b/lapack-netlib/SRC/dlarre.f
index 0613efbc3..ce55442e2 100644
--- a/lapack-netlib/SRC/dlarre.f
+++ b/lapack-netlib/SRC/dlarre.f
@@ -150,7 +150,7 @@
 *>          RTOL2 is DOUBLE PRECISION
 *>           Parameters for bisection.
 *>           An interval [LEFT,RIGHT] has converged if
-*>           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
+*>           RIGHT-LEFT < MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *> \endverbatim
 *>
 *> \param[in] SPLTOL
diff --git a/lapack-netlib/SRC/dlarrj.f b/lapack-netlib/SRC/dlarrj.f
index 097ba9f77..a4bfb210c 100644
--- a/lapack-netlib/SRC/dlarrj.f
+++ b/lapack-netlib/SRC/dlarrj.f
@@ -85,7 +85,7 @@
 *>          RTOL is DOUBLE PRECISION
 *>          Tolerance for the convergence of the bisection intervals.
 *>          An interval [LEFT,RIGHT] has converged if
-*>          RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|).
+*>          RIGHT-LEFT < RTOL*MAX(|LEFT|,|RIGHT|).
 *> \endverbatim
 *>
 *> \param[in] OFFSET
diff --git a/lapack-netlib/SRC/dlarrv.f b/lapack-netlib/SRC/dlarrv.f
index cace17c0e..4a59a2bbf 100644
--- a/lapack-netlib/SRC/dlarrv.f
+++ b/lapack-netlib/SRC/dlarrv.f
@@ -149,7 +149,7 @@
 *>          RTOL2 is DOUBLE PRECISION
 *>           Parameters for bisection.
 *>           An interval [LEFT,RIGHT] has converged if
-*>           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
+*>           RIGHT-LEFT < MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *> \endverbatim
 *>
 *> \param[in,out] W
diff --git a/lapack-netlib/SRC/dlasd7.f b/lapack-netlib/SRC/dlasd7.f
index e0ddedeb5..66f665cf8 100644
--- a/lapack-netlib/SRC/dlasd7.f
+++ b/lapack-netlib/SRC/dlasd7.f
@@ -400,7 +400,7 @@
          VL( I ) = VLW( IDXI )
    50 CONTINUE
 *
-*     Calculate the allowable deflation tolerence
+*     Calculate the allowable deflation tolerance
 *
       EPS = DLAMCH( 'Epsilon' )
       TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
diff --git a/lapack-netlib/SRC/dlasr.f b/lapack-netlib/SRC/dlasr.f
index 6059c6293..f707970e4 100644
--- a/lapack-netlib/SRC/dlasr.f
+++ b/lapack-netlib/SRC/dlasr.f
@@ -175,7 +175,7 @@
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          The M-by-N matrix A.  On exit, A is overwritten by P*A if
-*>          SIDE = 'R' or by A*P**T if SIDE = 'L'.
+*>          SIDE = 'L' or by A*P**T if SIDE = 'R'.
 *> \endverbatim
 *>
 *> \param[in] LDA
diff --git a/lapack-netlib/SRC/dlassq.f b/lapack-netlib/SRC/dlassq.f
index 885395e3c..5922360f9 100644
--- a/lapack-netlib/SRC/dlassq.f
+++ b/lapack-netlib/SRC/dlassq.f
@@ -60,7 +60,7 @@
 *>
 *> \param[in] X
 *> \verbatim
-*>          X is DOUBLE PRECISION array, dimension (N)
+*>          X is DOUBLE PRECISION array, dimension (1+(N-1)*INCX)
 *>          The vector for which a scaled sum of squares is computed.
 *>             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
 *> \endverbatim
diff --git a/lapack-netlib/SRC/dlaswlq.f b/lapack-netlib/SRC/dlaswlq.f
index 6e4ca20fd..619a1f1a2 100644
--- a/lapack-netlib/SRC/dlaswlq.f
+++ b/lapack-netlib/SRC/dlaswlq.f
@@ -1,3 +1,4 @@
+*> \brief \b DLASWLQ
 *
 *  Definition:
 *  ===========
@@ -18,9 +19,20 @@
 *>
 *> \verbatim
 *>
-*>          DLASWLQ computes a blocked Short-Wide LQ factorization of a
-*>          M-by-N matrix A, where N >= M:
-*>          A = L * Q
+*> DLASWLQ computes a blocked Tall-Skinny LQ factorization of
+*> a real M-by-N matrix A for M <= N:
+*>
+*>    A = ( L 0 ) *  Q,
+*>
+*> where:
+*>
+*>    Q is a n-by-N orthogonal matrix, stored on exit in an implicit
+*>    form in the elements above the digonal of the array A and in
+*>    the elemenst of the array T;
+*>    L is an lower-triangular M-by-M matrix stored on exit in
+*>    the elements on and below the diagonal of the array A.
+*>    0 is a M-by-(N-M) zero matrix, if M < N, and is not stored.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -150,7 +162,7 @@
       SUBROUTINE DLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK,
      $                  INFO)
 *
-*  -- LAPACK computational routine (version 3.7.1) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
 *     June 2017
diff --git a/lapack-netlib/SRC/dlasyf_aa.f b/lapack-netlib/SRC/dlasyf_aa.f
index 6b75e46e0..793537e04 100644
--- a/lapack-netlib/SRC/dlasyf_aa.f
+++ b/lapack-netlib/SRC/dlasyf_aa.f
@@ -284,8 +284,9 @@
 *
 *              Swap A(I1, I2+1:M) with A(I2, I2+1:M)
 *
-               CALL DSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA,
-     $                           A( J1+I2-1, I2+1 ), LDA )
+               IF( I2.LT.M )
+     $            CALL DSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA,
+     $                              A( J1+I2-1, I2+1 ), LDA )
 *
 *              Swap A(I1, I1) with A(I2,I2)
 *
@@ -325,13 +326,15 @@
 *           Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1),
 *            where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1)
 *
-            IF( A( K, J+1 ).NE.ZERO ) THEN
-               ALPHA = ONE / A( K, J+1 )
-               CALL DCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA )
-               CALL DSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA )
-            ELSE
-               CALL DLASET( 'Full', 1, M-J-1, ZERO, ZERO,
-     $                      A( K, J+2 ), LDA)
+            IF( J.LT.(M-1) ) THEN
+               IF( A( K, J+1 ).NE.ZERO ) THEN
+                  ALPHA = ONE / A( K, J+1 )
+                  CALL DCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA )
+                  CALL DSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA )
+               ELSE
+                  CALL DLASET( 'Full', 1, M-J-1, ZERO, ZERO,
+     $                         A( K, J+2 ), LDA)
+               END IF
             END IF
          END IF
          J = J + 1
@@ -432,8 +435,9 @@
 *
 *              Swap A(I2+1:M, I1) with A(I2+1:M, I2)
 *
-               CALL DSWAP( M-I2, A( I2+1, J1+I1-1 ), 1,
-     $                           A( I2+1, J1+I2-1 ), 1 )
+               IF( I2.LT.M )
+     $            CALL DSWAP( M-I2, A( I2+1, J1+I1-1 ), 1,
+     $                              A( I2+1, J1+I2-1 ), 1 )
 *
 *              Swap A(I1, I1) with A(I2, I2)
 *
@@ -473,13 +477,15 @@
 *           Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1),
 *            where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1)
 *
-            IF( A( J+1, K ).NE.ZERO ) THEN
-               ALPHA = ONE / A( J+1, K )
-               CALL DCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 )
-               CALL DSCAL( M-J-1, ALPHA, A( J+2, K ), 1 )
-            ELSE
-               CALL DLASET( 'Full', M-J-1, 1, ZERO, ZERO,
-     $                      A( J+2, K ), LDA )
+            IF( J.LT.(M-1) ) THEN
+               IF( A( J+1, K ).NE.ZERO ) THEN
+                  ALPHA = ONE / A( J+1, K )
+                  CALL DCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 )
+                  CALL DSCAL( M-J-1, ALPHA, A( J+2, K ), 1 )
+               ELSE
+                  CALL DLASET( 'Full', M-J-1, 1, ZERO, ZERO,
+     $                         A( J+2, K ), LDA )
+               END IF
             END IF
          END IF
          J = J + 1
diff --git a/lapack-netlib/SRC/dlasyf_rk.f b/lapack-netlib/SRC/dlasyf_rk.f
index 209b4c89d..d581eeedc 100644
--- a/lapack-netlib/SRC/dlasyf_rk.f
+++ b/lapack-netlib/SRC/dlasyf_rk.f
@@ -321,7 +321,7 @@
 *        of A and working backwards, and compute the matrix W = U12*D
 *        for use in updating A11
 *
-*        Initilize the first entry of array E, where superdiagonal
+*        Initialize the first entry of array E, where superdiagonal
 *        elements of D are stored
 *
          E( 1 ) = ZERO
@@ -649,7 +649,7 @@
 *        of A and working forwards, and compute the matrix W = L21*D
 *        for use in updating A22
 *
-*        Initilize the unused last entry of the subdiagonal array E.
+*        Initialize the unused last entry of the subdiagonal array E.
 *
          E( N ) = ZERO
 *
diff --git a/lapack-netlib/SRC/dlasyf_rook.f b/lapack-netlib/SRC/dlasyf_rook.f
index 49ee7a6c9..557032104 100644
--- a/lapack-netlib/SRC/dlasyf_rook.f
+++ b/lapack-netlib/SRC/dlasyf_rook.f
@@ -21,7 +21,7 @@
 *       SUBROUTINE DLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
 *
 *       .. Scalar Arguments ..
-*       CHARADLATER          UPLO
+*       CHARACTER          UPLO
 *       INTEGER            INFO, KB, LDA, LDW, N, NB
 *       ..
 *       .. Array Arguments ..
diff --git a/lapack-netlib/SRC/dlatdf.f b/lapack-netlib/SRC/dlatdf.f
index fd05059b3..8001e0830 100644
--- a/lapack-netlib/SRC/dlatdf.f
+++ b/lapack-netlib/SRC/dlatdf.f
@@ -85,7 +85,7 @@
 *>          RHS is DOUBLE PRECISION array, dimension (N)
 *>          On entry, RHS contains contributions from other subsystems.
 *>          On exit, RHS contains the solution of the subsystem with
-*>          entries acoording to the value of IJOB (see above).
+*>          entries according to the value of IJOB (see above).
 *> \endverbatim
 *>
 *> \param[in,out] RDSUM
@@ -260,7 +260,7 @@
 *
 *        Solve for U-part, look-ahead for RHS(N) = +-1. This is not done
 *        in BSOLVE and will hopefully give us a better estimate because
-*        any ill-conditioning of the original matrix is transfered to U
+*        any ill-conditioning of the original matrix is transferred to U
 *        and not to L. U(N, N) is an approximation to sigma_min(LU).
 *
          CALL DCOPY( N-1, RHS, 1, XP, 1 )
diff --git a/lapack-netlib/SRC/dlatsqr.f b/lapack-netlib/SRC/dlatsqr.f
index 1ce7c4de0..598d2938e 100644
--- a/lapack-netlib/SRC/dlatsqr.f
+++ b/lapack-netlib/SRC/dlatsqr.f
@@ -1,3 +1,4 @@
+*> \brief \b DLATSQR
 *
 *  Definition:
 *  ===========
@@ -19,8 +20,22 @@
 *> \verbatim
 *>
 *> DLATSQR computes a blocked Tall-Skinny QR factorization of
-*> an M-by-N matrix A, where M >= N:
-*> A = Q * R .
+*> a real M-by-N matrix A for M >= N:
+*>
+*>    A = Q * ( R ),
+*>            ( 0 )
+*>
+*> where:
+*>
+*>    Q is a M-by-M orthogonal matrix, stored on exit in an implicit
+*>    form in the elements below the digonal of the array A and in
+*>    the elemenst of the array T;
+*>
+*>    R is an upper-triangular N-by-N matrix, stored on exit in
+*>    the elements on and above the diagonal of the array A.
+*>
+*>    0 is a (M-N)-by-N zero matrix, and is not stored.
+*>
 *> \endverbatim
 *
 *  Arguments:
@@ -149,10 +164,10 @@
       SUBROUTINE DLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK,
      $                    LWORK, INFO)
 *
-*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
-*     December 2016
+*     November 2019
 *
 *     .. Scalar Arguments ..
       INTEGER           INFO, LDA, M, N, MB, NB, LDT, LWORK
diff --git a/lapack-netlib/SRC/dorgtsqr.f b/lapack-netlib/SRC/dorgtsqr.f
new file mode 100644
index 000000000..85b05b6b5
--- /dev/null
+++ b/lapack-netlib/SRC/dorgtsqr.f
@@ -0,0 +1,306 @@
+*> \brief \b DORGTSQR
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DORGTSQR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgtsqr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgtsqr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgtsqr.f">
+*> [TXT]</a>
+*>
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DORGTSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK,
+*      $                     INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER           INFO, LDA, LDT, LWORK, M, N, MB, NB
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION  A( LDA, * ), T( LDT, * ), WORK( * )
+*       ..
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DORGTSQR generates an M-by-N real matrix Q_out with orthonormal columns,
+*> which are the first N columns of a product of real orthogonal
+*> matrices of order M which are returned by DLATSQR
+*>
+*>      Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
+*>
+*> See the documentation for DLATSQR.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A. M >= N >= 0.
+*> \endverbatim
+*>
+*> \param[in] MB
+*> \verbatim
+*>          MB is INTEGER
+*>          The row block size used by DLATSQR to return
+*>          arrays A and T. MB > N.
+*>          (Note that if MB > M, then M is used instead of MB
+*>          as the row block size).
+*> \endverbatim
+*>
+*> \param[in] NB
+*> \verbatim
+*>          NB is INTEGER
+*>          The column block size used by DLATSQR to return
+*>          arrays A and T. NB >= 1.
+*>          (Note that if NB > N, then N is used instead of NB
+*>          as the column block size).
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>
+*>          On entry:
+*>
+*>             The elements on and above the diagonal are not accessed.
+*>             The elements below the diagonal represent the unit
+*>             lower-trapezoidal blocked matrix V computed by DLATSQR
+*>             that defines the input matrices Q_in(k) (ones on the
+*>             diagonal are not stored) (same format as the output A
+*>             below the diagonal in DLATSQR).
+*>
+*>          On exit:
+*>
+*>             The array A contains an M-by-N orthonormal matrix Q_out,
+*>             i.e the columns of A are orthogonal unit vectors.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in] T
+*> \verbatim
+*>          T is DOUBLE PRECISION array,
+*>          dimension (LDT, N * NIRB)
+*>          where NIRB = Number_of_input_row_blocks
+*>                     = MAX( 1, CEIL((M-N)/(MB-N)) )
+*>          Let NICB = Number_of_input_col_blocks
+*>                   = CEIL(N/NB)
+*>
+*>          The upper-triangular block reflectors used to define the
+*>          input matrices Q_in(k), k=(1:NIRB*NICB). The block
+*>          reflectors are stored in compact form in NIRB block
+*>          reflector sequences. Each of NIRB block reflector sequences
+*>          is stored in a larger NB-by-N column block of T and consists
+*>          of NICB smaller NB-by-NB upper-triangular column blocks.
+*>          (same format as the output T in DLATSQR).
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*>          LDT is INTEGER
+*>          The leading dimension of the array T.
+*>          LDT >= max(1,min(NB1,N)).
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          (workspace) DOUBLE PRECISION array, dimension (MAX(2,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          The dimension of the array WORK.  LWORK >= (M+NB)*N.
+*>          If LWORK = -1, then a workspace query is assumed.
+*>          The routine only calculates the optimal size of the WORK
+*>          array, returns this value as the first entry of the WORK
+*>          array, and no error message related to LWORK is issued
+*>          by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*>
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2019
+*
+*> \ingroup doubleOTHERcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*> \verbatim
+*>
+*> November 2019, Igor Kozachenko,
+*>                Computer Science Division,
+*>                University of California, Berkeley
+*>
+*> \endverbatim
+*
+*  =====================================================================
+      SUBROUTINE DORGTSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK,
+     $                     INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK computational routine (version 3.9.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     November 2019
+*
+*     .. Scalar Arguments ..
+      INTEGER           INFO, LDA, LDT, LWORK, M, N, MB, NB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION  A( LDA, * ), T( LDT, * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            IINFO, LDC, LWORKOPT, LC, LW, NBLOCAL, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAMTSQR, DLASET, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      LQUERY  = LWORK.EQ.-1
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
+         INFO = -2
+      ELSE IF( MB.LE.N ) THEN
+         INFO = -3
+      ELSE IF( NB.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
+         INFO = -8
+      ELSE
+*
+*        Test the input LWORK for the dimension of the array WORK.
+*        This workspace is used to store array C(LDC, N) and WORK(LWORK)
+*        in the call to DLAMTSQR. See the documentation for DLAMTSQR.
+*
+         IF( LWORK.LT.2 .AND. (.NOT.LQUERY) ) THEN
+            INFO = -10
+         ELSE
+*
+*           Set block size for column blocks
+*
+            NBLOCAL = MIN( NB, N )
+*
+*           LWORK = -1, then set the size for the array C(LDC,N)
+*           in DLAMTSQR call and set the optimal size of the work array
+*           WORK(LWORK) in DLAMTSQR call.
+*
+            LDC = M
+            LC = LDC*N
+            LW = N * NBLOCAL
+*
+            LWORKOPT = LC+LW
+*
+            IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN
+               INFO = -10
+            END IF
+         END IF
+*
+      END IF
+*
+*     Handle error in the input parameters and return workspace query.
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGTSQR', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+         WORK( 1 ) = DBLE( LWORKOPT )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         WORK( 1 ) = DBLE( LWORKOPT )
+         RETURN
+      END IF
+*
+*     (1) Form explicitly the tall-skinny M-by-N left submatrix Q1_in
+*     of M-by-M orthogonal matrix Q_in, which is implicitly stored in
+*     the subdiagonal part of input array A and in the input array T.
+*     Perform by the following operation using the routine DLAMTSQR.
+*
+*         Q1_in = Q_in * ( I ), where I is a N-by-N identity matrix,
+*                        ( 0 )        0 is a (M-N)-by-N zero matrix.
+*
+*     (1a) Form M-by-N matrix in the array WORK(1:LDC*N) with ones
+*     on the diagonal and zeros elsewhere.
+*
+      CALL DLASET( 'F', M, N, ZERO, ONE, WORK, LDC )
+*
+*     (1b)  On input, WORK(1:LDC*N) stores ( I );
+*                                          ( 0 )
+*
+*           On output, WORK(1:LDC*N) stores Q1_in.
+*
+      CALL DLAMTSQR( 'L', 'N', M, N, N, MB, NBLOCAL, A, LDA, T, LDT,
+     $               WORK, LDC, WORK( LC+1 ), LW, IINFO )
+*
+*     (2) Copy the result from the part of the work array (1:M,1:N)
+*     with the leading dimension LDC that starts at WORK(1) into
+*     the output array A(1:M,1:N) column-by-column.
+*
+      DO J = 1, N
+         CALL DCOPY( M, WORK( (J-1)*LDC + 1 ), 1, A( 1, J ), 1 )
+      END DO
+*
+      WORK( 1 ) = DBLE( LWORKOPT )
+      RETURN
+*
+*     End of DORGTSQR
+*
+      END
\ No newline at end of file
diff --git a/lapack-netlib/SRC/dorhr_col.f b/lapack-netlib/SRC/dorhr_col.f
new file mode 100644
index 000000000..b5a65973d
--- /dev/null
+++ b/lapack-netlib/SRC/dorhr_col.f
@@ -0,0 +1,440 @@
+*> \brief \b DORHR_COL
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DORHR_COL + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorhr_col.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorhr_col.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorhr_col.f">
+*> [TXT]</a>
+*>
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DORHR_COL( M, N, NB, A, LDA, T, LDT, D, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER           INFO, LDA, LDT, M, N, NB
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION  A( LDA, * ), D( * ), T( LDT, * )
+*       ..
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*>  DORHR_COL takes an M-by-N real matrix Q_in with orthonormal columns
+*>  as input, stored in A, and performs Householder Reconstruction (HR),
+*>  i.e. reconstructs Householder vectors V(i) implicitly representing
+*>  another M-by-N matrix Q_out, with the property that Q_in = Q_out*S,
+*>  where S is an N-by-N diagonal matrix with diagonal entries
+*>  equal to +1 or -1. The Householder vectors (columns V(i) of V) are
+*>  stored in A on output, and the diagonal entries of S are stored in D.
+*>  Block reflectors are also returned in T
+*>  (same output format as DGEQRT).
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A. M >= N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NB
+*> \verbatim
+*>          NB is INTEGER
+*>          The column block size to be used in the reconstruction
+*>          of Householder column vector blocks in the array A and
+*>          corresponding block reflectors in the array T. NB >= 1.
+*>          (Note that if NB > N, then N is used instead of NB
+*>          as the column block size.)
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>
+*>          On entry:
+*>
+*>             The array A contains an M-by-N orthonormal matrix Q_in,
+*>             i.e the columns of A are orthogonal unit vectors.
+*>
+*>          On exit:
+*>
+*>             The elements below the diagonal of A represent the unit
+*>             lower-trapezoidal matrix V of Householder column vectors
+*>             V(i). The unit diagonal entries of V are not stored
+*>             (same format as the output below the diagonal in A from
+*>             DGEQRT). The matrix T and the matrix V stored on output
+*>             in A implicitly define Q_out.
+*>
+*>             The elements above the diagonal contain the factor U
+*>             of the "modified" LU-decomposition:
+*>                Q_in - ( S ) = V * U
+*>                       ( 0 )
+*>             where 0 is a (M-N)-by-(M-N) zero matrix.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*>          T is DOUBLE PRECISION array,
+*>          dimension (LDT, N)
+*>
+*>          Let NOCB = Number_of_output_col_blocks
+*>                   = CEIL(N/NB)
+*>
+*>          On exit, T(1:NB, 1:N) contains NOCB upper-triangular
+*>          block reflectors used to define Q_out stored in compact
+*>          form as a sequence of upper-triangular NB-by-NB column
+*>          blocks (same format as the output T in DGEQRT).
+*>          The matrix T and the matrix V stored on output in A
+*>          implicitly define Q_out. NOTE: The lower triangles
+*>          below the upper-triangular blcoks will be filled with
+*>          zeros. See Further Details.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*>          LDT is INTEGER
+*>          The leading dimension of the array T.
+*>          LDT >= max(1,min(NB,N)).
+*> \endverbatim
+*>
+*> \param[out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension min(M,N).
+*>          The elements can be only plus or minus one.
+*>
+*>          D(i) is constructed as D(i) = -SIGN(Q_in_i(i,i)), where
+*>          1 <= i <= min(M,N), and Q_in_i is Q_in after performing
+*>          i-1 steps of “modified” Gaussian elimination.
+*>          See Further Details.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*>
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*> The computed M-by-M orthogonal factor Q_out is defined implicitly as
+*> a product of orthogonal matrices Q_out(i). Each Q_out(i) is stored in
+*> the compact WY-representation format in the corresponding blocks of
+*> matrices V (stored in A) and T.
+*>
+*> The M-by-N unit lower-trapezoidal matrix V stored in the M-by-N
+*> matrix A contains the column vectors V(i) in NB-size column
+*> blocks VB(j). For example, VB(1) contains the columns
+*> V(1), V(2), ... V(NB). NOTE: The unit entries on
+*> the diagonal of Y are not stored in A.
+*>
+*> The number of column blocks is
+*>
+*>     NOCB = Number_of_output_col_blocks = CEIL(N/NB)
+*>
+*> where each block is of order NB except for the last block, which
+*> is of order LAST_NB = N - (NOCB-1)*NB.
+*>
+*> For example, if M=6,  N=5 and NB=2, the matrix V is
+*>
+*>
+*>     V = (    VB(1),   VB(2), VB(3) ) =
+*>
+*>       = (   1                      )
+*>         ( v21    1                 )
+*>         ( v31  v32    1            )
+*>         ( v41  v42  v43   1        )
+*>         ( v51  v52  v53  v54    1  )
+*>         ( v61  v62  v63  v54   v65 )
+*>
+*>
+*> For each of the column blocks VB(i), an upper-triangular block
+*> reflector TB(i) is computed. These blocks are stored as
+*> a sequence of upper-triangular column blocks in the NB-by-N
+*> matrix T. The size of each TB(i) block is NB-by-NB, except
+*> for the last block, whose size is LAST_NB-by-LAST_NB.
+*>
+*> For example, if M=6,  N=5 and NB=2, the matrix T is
+*>
+*>     T  = (    TB(1),    TB(2), TB(3) ) =
+*>
+*>        = ( t11  t12  t13  t14   t15  )
+*>          (      t22       t24        )
+*>
+*>
+*> The M-by-M factor Q_out is given as a product of NOCB
+*> orthogonal M-by-M matrices Q_out(i).
+*>
+*>     Q_out = Q_out(1) * Q_out(2) * ... * Q_out(NOCB),
+*>
+*> where each matrix Q_out(i) is given by the WY-representation
+*> using corresponding blocks from the matrices V and T:
+*>
+*>     Q_out(i) = I - VB(i) * TB(i) * (VB(i))**T,
+*>
+*> where I is the identity matrix. Here is the formula with matrix
+*> dimensions:
+*>
+*>  Q(i){M-by-M} = I{M-by-M} -
+*>    VB(i){M-by-INB} * TB(i){INB-by-INB} * (VB(i))**T {INB-by-M},
+*>
+*> where INB = NB, except for the last block NOCB
+*> for which INB=LAST_NB.
+*>
+*> =====
+*> NOTE:
+*> =====
+*>
+*> If Q_in is the result of doing a QR factorization
+*> B = Q_in * R_in, then:
+*>
+*> B = (Q_out*S) * R_in = Q_out * (S * R_in) = O_out * R_out.
+*>
+*> So if one wants to interpret Q_out as the result
+*> of the QR factorization of B, then corresponding R_out
+*> should be obtained by R_out = S * R_in, i.e. some rows of R_in
+*> should be multiplied by -1.
+*>
+*> For the details of the algorithm, see [1].
+*>
+*> [1] "Reconstructing Householder vectors from tall-skinny QR",
+*>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
+*>     E. Solomonik, J. Parallel Distrib. Comput.,
+*>     vol. 85, pp. 3-31, 2015.
+*> \endverbatim
+*>
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date November 2019
+*
+*> \ingroup doubleOTHERcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*> \verbatim
+*>
+*> November   2019, Igor Kozachenko,
+*>            Computer Science Division,
+*>            University of California, Berkeley
+*>
+*> \endverbatim
+*
+*  =====================================================================
+      SUBROUTINE DORHR_COL( M, N, NB, A, LDA, T, LDT, D, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK computational routine (version 3.9.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     November 2019
+*
+*     .. Scalar Arguments ..
+      INTEGER           INFO, LDA, LDT, M, N, NB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION  A( LDA, * ), D( * ), T( LDT, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, JBTEMP1, JBTEMP2, JNB,
+     $                   NPLUSONE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAORHR_COL_GETRFNP, DSCAL, DTRSM,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( NB.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
+         INFO = -7
+      END IF
+*
+*     Handle error in the input parameters.
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORHR_COL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         RETURN
+      END IF
+*
+*     On input, the M-by-N matrix A contains the orthogonal
+*     M-by-N matrix Q_in.
+*
+*     (1) Compute the unit lower-trapezoidal V (ones on the diagonal
+*     are not stored) by performing the "modified" LU-decomposition.
+*
+*     Q_in - ( S ) = V * U = ( V1 ) * U,
+*            ( 0 )           ( V2 )
+*
+*     where 0 is an (M-N)-by-N zero matrix.
+*
+*     (1-1) Factor V1 and U.
+
+      CALL DLAORHR_COL_GETRFNP( N, N, A, LDA, D, IINFO )
+*
+*     (1-2) Solve for V2.
+*
+      IF( M.GT.N ) THEN
+         CALL DTRSM( 'R', 'U', 'N', 'N', M-N, N, ONE, A, LDA,
+     $               A( N+1, 1 ), LDA )
+      END IF
+*
+*     (2) Reconstruct the block reflector T stored in T(1:NB, 1:N)
+*     as a sequence of upper-triangular blocks with NB-size column
+*     blocking.
+*
+*     Loop over the column blocks of size NB of the array A(1:M,1:N)
+*     and the array T(1:NB,1:N), JB is the column index of a column
+*     block, JNB is the column block size at each step JB.
+*
+      NPLUSONE = N + 1
+      DO JB = 1, N, NB
+*
+*        (2-0) Determine the column block size JNB.
+*
+         JNB = MIN( NPLUSONE-JB, NB )
+*
+*        (2-1) Copy the upper-triangular part of the current JNB-by-JNB
+*        diagonal block U(JB) (of the N-by-N matrix U) stored
+*        in A(JB:JB+JNB-1,JB:JB+JNB-1) into the upper-triangular part
+*        of the current JNB-by-JNB block T(1:JNB,JB:JB+JNB-1)
+*        column-by-column, total JNB*(JNB+1)/2 elements.
+*
+         JBTEMP1 = JB - 1
+         DO J = JB, JB+JNB-1
+            CALL DCOPY( J-JBTEMP1, A( JB, J ), 1, T( 1, J ), 1 )
+         END DO
+*
+*        (2-2) Perform on the upper-triangular part of the current
+*        JNB-by-JNB diagonal block U(JB) (of the N-by-N matrix U) stored
+*        in T(1:JNB,JB:JB+JNB-1) the following operation in place:
+*        (-1)*U(JB)*S(JB), i.e the result will be stored in the upper-
+*        triangular part of T(1:JNB,JB:JB+JNB-1). This multiplication
+*        of the JNB-by-JNB diagonal block U(JB) by the JNB-by-JNB
+*        diagonal block S(JB) of the N-by-N sign matrix S from the
+*        right means changing the sign of each J-th column of the block
+*        U(JB) according to the sign of the diagonal element of the block
+*        S(JB), i.e. S(J,J) that is stored in the array element D(J).
+*
+         DO J = JB, JB+JNB-1
+            IF( D( J ).EQ.ONE ) THEN
+               CALL DSCAL( J-JBTEMP1, -ONE, T( 1, J ), 1 )
+            END IF
+         END DO
+*
+*        (2-3) Perform the triangular solve for the current block
+*        matrix X(JB):
+*
+*               X(JB) * (A(JB)**T) = B(JB), where:
+*
+*               A(JB)**T  is a JNB-by-JNB unit upper-triangular
+*                         coefficient block, and A(JB)=V1(JB), which
+*                         is a JNB-by-JNB unit lower-triangular block
+*                         stored in A(JB:JB+JNB-1,JB:JB+JNB-1).
+*                         The N-by-N matrix V1 is the upper part
+*                         of the M-by-N lower-trapezoidal matrix V
+*                         stored in A(1:M,1:N);
+*
+*               B(JB)     is a JNB-by-JNB  upper-triangular right-hand
+*                         side block, B(JB) = (-1)*U(JB)*S(JB), and
+*                         B(JB) is stored in T(1:JNB,JB:JB+JNB-1);
+*
+*               X(JB)     is a JNB-by-JNB upper-triangular solution
+*                         block, X(JB) is the upper-triangular block
+*                         reflector T(JB), and X(JB) is stored
+*                         in T(1:JNB,JB:JB+JNB-1).
+*
+*             In other words, we perform the triangular solve for the
+*             upper-triangular block T(JB):
+*
+*               T(JB) * (V1(JB)**T) = (-1)*U(JB)*S(JB).
+*
+*             Even though the blocks X(JB) and B(JB) are upper-
+*             triangular, the routine DTRSM will access all JNB**2
+*             elements of the square T(1:JNB,JB:JB+JNB-1). Therefore,
+*             we need to set to zero the elements of the block
+*             T(1:JNB,JB:JB+JNB-1) below the diagonal before the call
+*             to DTRSM.
+*
+*        (2-3a) Set the elements to zero.
+*
+         JBTEMP2 = JB - 2
+         DO J = JB, JB+JNB-2
+            DO I = J-JBTEMP2, NB
+               T( I, J ) = ZERO
+            END DO
+         END DO
+*
+*        (2-3b) Perform the triangular solve.
+*
+         CALL DTRSM( 'R', 'L', 'T', 'U', JNB, JNB, ONE,
+     $               A( JB, JB ), LDA, T( 1, JB ), LDT )
+*
+      END DO
+*
+      RETURN
+*
+*     End of DORHR_COL
+*
+      END
\ No newline at end of file