Merge pull request #3834 from martin-frbg/lapack631

Use new algorithms for computing Givens rotations (Reference-LAPACK PR631)
2022-11-21 08:30:14 +01:00 · 2022-11-21 08:30:14 +01:00 · 0b68dd6a9b
parent 9343499256 7ae4269add
commit 0b68dd6a9b
4 changed files with 247 additions and 133 deletions
--- a/lapack-netlib/SRC/clartg.f90
+++ b/lapack-netlib/SRC/clartg.f90
@ -30,7 +30,7 @@
 !> The mathematical formulas used for C and S are
 !>
 !>    sgn(x) = {  x / |x|,   x != 0
-!>             {  1,         x = 0
+!>             {  1,         x  = 0
 !>
 !>    R = sgn(F) * sqrt(|F|**2 + |G|**2)
 !>
@ -38,19 +38,20 @@
 !>
 !>    S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
 !>
+!> Special conditions:
+!>    If G=0, then C=1 and S=0.
+!>    If F=0, then C=0 and S is chosen so that R is real.
+!>
 !> When F and G are real, the formulas simplify to C = F/R and
 !> S = G/R, and the returned values of C, S, and R should be
-!> identical to those returned by CLARTG.
+!> identical to those returned by SLARTG.
 !>
 !> The algorithm used to compute these quantities incorporates scaling
 !> to avoid overflow or underflow in computing the square root of the
 !> sum of squares.
 !>
-!> This is a faster version of the BLAS1 routine CROTG, except for
-!> the following differences:
-!>    F and G are unchanged on return.
-!>    If G=0, then C=1 and S=0.
-!>    If F=0, then C=0 and S is chosen so that R is real.
+!> This is the same routine CROTG fom BLAS1, except that
+!> F and G are unchanged on return.
 !>
 !> Below, wp=>sp stands for single precision from LA_CONSTANTS module.
 !> \endverbatim
@ -91,22 +92,19 @@
 !  Authors:
 !  ========
 !
-!> \author Edward Anderson, Lockheed Martin
+!> \author Weslley Pereira, University of Colorado Denver, USA
 !
-!> \date August 2016
+!> \date December 2021
 !
 !> \ingroup OTHERauxiliary
 !
-!> \par Contributors:
-!  ==================
-!>
-!> Weslley Pereira, University of Colorado Denver, USA
-!
 !> \par Further Details:
 !  =====================
 !>
 !> \verbatim
 !>
+!> Based on the algorithm from
+!>
 !>  Anderson E. (2017)
 !>  Algorithm 978: Safe Scaling in the Level 1 BLAS
 !>  ACM Trans Math Softw 44:1--28
@ -117,7 +115,7 @@
 subroutine CLARTG( f, g, c, s, r )
   use LA_CONSTANTS, &
   only: wp=>sp, zero=>szero, one=>sone, two=>stwo, czero, &
-         rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax
+         safmin=>ssafmin, safmax=>ssafmax
 !
 !  -- LAPACK auxiliary routine --
 !  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@ -129,7 +127,7 @@ subroutine CLARTG( f, g, c, s, r )
   complex(wp)        f, g, r, s
 !  ..
 !  .. Local Scalars ..
-   real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
+   real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax
   complex(wp) :: fs, gs, t
 !  ..
 !  .. Intrinsic Functions ..
@ -141,6 +139,9 @@ subroutine CLARTG( f, g, c, s, r )
 !  .. Statement Function definitions ..
   ABSSQ( t ) = real( t )**2 + aimag( t )**2
 !  ..
+!  .. Constants ..
+   rtmin = sqrt( safmin )
+!  ..
 !  .. Executable Statements ..
 !
   if( g == czero ) then
@ -149,30 +150,43 @@ subroutine CLARTG( f, g, c, s, r )
      r = f
   else if( f == czero ) then
      c = zero
-      g1 = max( abs(real(g)), abs(aimag(g)) )
-      if( g1 > rtmin .and. g1 < rtmax ) then
+      if( real(g) == zero ) then
+         r = abs(aimag(g))
+         s = conjg( g ) / r
+      elseif( aimag(g) == zero ) then
+         r = abs(real(g))
+         s = conjg( g ) / r
+      else
+         g1 = max( abs(real(g)), abs(aimag(g)) )
+         rtmax = sqrt( safmax/2 )
+         if( g1 > rtmin .and. g1 < rtmax ) then
 !
 !        Use unscaled algorithm
 !
-         g2 = ABSSQ( g )
-         d = sqrt( g2 )
-         s = conjg( g ) / d
-         r = d
-      else
+!           The following two lines can be replaced by `d = abs( g )`.
+!           This algorithm do not use the intrinsic complex abs.
+            g2 = ABSSQ( g )
+            d = sqrt( g2 )
+            s = conjg( g ) / d
+            r = d
+         else
 !
 !        Use scaled algorithm
 !
-         u = min( safmax, max( safmin, g1 ) )
-         uu = one / u
-         gs = g*uu
-         g2 = ABSSQ( gs )
-         d = sqrt( g2 )
-         s = conjg( gs ) / d
-         r = d*u
+            u = min( safmax, max( safmin, g1 ) )
+            gs = g / u
+!           The following two lines can be replaced by `d = abs( gs )`.
+!           This algorithm do not use the intrinsic complex abs.
+            g2 = ABSSQ( gs )
+            d = sqrt( g2 )
+            s = conjg( gs ) / d
+            r = d*u
+         end if
      end if
   else
      f1 = max( abs(real(f)), abs(aimag(f)) )
      g1 = max( abs(real(g)), abs(aimag(g)) )
+      rtmax = sqrt( safmax/4 )
      if( f1 > rtmin .and. f1 < rtmax .and. &
          g1 > rtmin .and. g1 < rtmax ) then
 !
@ -181,32 +195,51 @@ subroutine CLARTG( f, g, c, s, r )
         f2 = ABSSQ( f )
         g2 = ABSSQ( g )
         h2 = f2 + g2
-         if( f2 > rtmin .and. h2 < rtmax ) then
-            d = sqrt( f2*h2 )
+         ! safmin <= f2 <= h2 <= safmax 
+         if( f2 >= h2 * safmin ) then
+            ! safmin <= f2/h2 <= 1, and h2/f2 is finite
+            c = sqrt( f2 / h2 )
+            r = f / c
+            rtmax = rtmax * 2
+            if( f2 > rtmin .and. h2 < rtmax ) then
+               ! safmin <= sqrt( f2*h2 ) <= safmax
+               s = conjg( g ) * ( f / sqrt( f2*h2 ) )
+            else
+               s = conjg( g ) * ( r / h2 )
+            end if
         else
-            d = sqrt( f2 )*sqrt( h2 )
+            ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
+            ! Moreover,
+            !  safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
+            !  sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
+            ! Also,
+            !  g2 >> f2, which means that h2 = g2.
+            d = sqrt( f2 * h2 )
+            c = f2 / d
+            if( c >= safmin ) then
+               r = f / c
+            else
+               ! f2 / sqrt(f2 * h2) < safmin, then
+               !  sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
+               r = f * ( h2 / d )
+            end if
+            s = conjg( g ) * ( f / d )
         end if
-         p = 1 / d
-         c = f2*p
-         s = conjg( g )*( f*p )
-         r = f*( h2*p )
      else
 !
 !        Use scaled algorithm
 !
         u = min( safmax, max( safmin, f1, g1 ) )
-         uu = one / u
-         gs = g*uu
+         gs = g / u
         g2 = ABSSQ( gs )
-         if( f1*uu < rtmin ) then
+         if( f1 / u < rtmin ) then
 !
 !           f is not well-scaled when scaled by g1.
 !           Use a different scaling for f.
 !
            v = min( safmax, max( safmin, f1 ) )
-            vv = one / v
-            w = v * uu
-            fs = f*vv
+            w = v / u
+            fs = f / v
            f2 = ABSSQ( fs )
            h2 = f2*w**2 + g2
         else
@ -214,19 +247,43 @@ subroutine CLARTG( f, g, c, s, r )
 !           Otherwise use the same scaling for f and g.
 !
            w = one
-            fs = f*uu
+            fs = f / u
            f2 = ABSSQ( fs )
            h2 = f2 + g2
         end if
-         if( f2 > rtmin .and. h2 < rtmax ) then
-            d = sqrt( f2*h2 )
+         ! safmin <= f2 <= h2 <= safmax 
+         if( f2 >= h2 * safmin ) then
+            ! safmin <= f2/h2 <= 1, and h2/f2 is finite
+            c = sqrt( f2 / h2 )
+            r = fs / c
+            rtmax = rtmax * 2
+            if( f2 > rtmin .and. h2 < rtmax ) then
+               ! safmin <= sqrt( f2*h2 ) <= safmax
+               s = conjg( gs ) * ( fs / sqrt( f2*h2 ) )
+            else
+               s = conjg( gs ) * ( r / h2 )
+            end if
         else
-            d = sqrt( f2 )*sqrt( h2 )
+            ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
+            ! Moreover,
+            !  safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
+            !  sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
+            ! Also,
+            !  g2 >> f2, which means that h2 = g2.
+            d = sqrt( f2 * h2 )
+            c = f2 / d
+            if( c >= safmin ) then
+               r = fs / c
+            else
+               ! f2 / sqrt(f2 * h2) < safmin, then
+               !  sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
+               r = fs * ( h2 / d )
+            end if
+            s = conjg( gs ) * ( fs / d )
         end if
-         p = 1 / d
-         c = ( f2*p )*w
-         s = conjg( gs )*( fs*p )
-         r = ( fs*( h2*p ) )*u
+         ! Rescale c and r
+         c = c * w
+         r = r * u
      end if
   end if
   return
--- a/lapack-netlib/SRC/dlartg.f90
+++ b/lapack-netlib/SRC/dlartg.f90
@ -11,7 +11,7 @@
 !       SUBROUTINE DLARTG( F, G, C, S, R )
 !
 !       .. Scalar Arguments ..
-!       REAL(wp)      C, F, G, R, S
+!       REAL(wp)          C, F, G, R, S
 !       ..
 !
 !> \par Purpose:
@ -45,8 +45,6 @@
 !>       floating point operations (saves work in DBDSQR when
 !>       there are zeros on the diagonal).
 !>
-!> If F exceeds G in magnitude, C will be positive.
-!>
 !> Below, wp=>dp stands for double precision from LA_CONSTANTS module.
 !> \endverbatim
 !
@ -112,7 +110,7 @@
 subroutine DLARTG( f, g, c, s, r )
   use LA_CONSTANTS, &
   only: wp=>dp, zero=>dzero, half=>dhalf, one=>done, &
-         rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax
+         safmin=>dsafmin, safmax=>dsafmax
 !
 !  -- LAPACK auxiliary routine --
 !  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@ -123,11 +121,15 @@ subroutine DLARTG( f, g, c, s, r )
   real(wp) :: c, f, g, r, s
 !  ..
 !  .. Local Scalars ..
-   real(wp) :: d, f1, fs, g1, gs, p, u, uu
+   real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax
 !  ..
 !  .. Intrinsic Functions ..
   intrinsic :: abs, sign, sqrt
 !  ..
+!  .. Constants ..
+   rtmin = sqrt( safmin )
+   rtmax = sqrt( safmax/2 )
+!  ..
 !  .. Executable Statements ..
 !
   f1 = abs( f )
@ -143,20 +145,18 @@ subroutine DLARTG( f, g, c, s, r )
   else if( f1 > rtmin .and. f1 < rtmax .and. &
            g1 > rtmin .and. g1 < rtmax ) then
      d = sqrt( f*f + g*g )
-      p = one / d
-      c = f1*p
-      s = g*sign( p, f )
+      c = f1 / d
      r = sign( d, f )
+      s = g / r
   else
      u = min( safmax, max( safmin, f1, g1 ) )
-      uu = one / u
-      fs = f*uu
-      gs = g*uu
+      fs = f / u
+      gs = g / u
      d = sqrt( fs*fs + gs*gs )
-      p = one / d
-      c = abs( fs )*p
-      s = gs*sign( p, f )
-      r = sign( d, f )*u
+      c = abs( fs ) / d
+      r = sign( d, f )
+      s = gs / r
+      r = r*u
   end if
   return
 end subroutine
--- a/lapack-netlib/SRC/slartg.f90
+++ b/lapack-netlib/SRC/slartg.f90
@ -35,7 +35,7 @@
 !> square root of the sum of squares.
 !>
 !> This version is discontinuous in R at F = 0 but it returns the same
-!> C and S as SLARTG for complex inputs (F,0) and (G,0).
+!> C and S as CLARTG for complex inputs (F,0) and (G,0).
 !>
 !> This is a more accurate version of the BLAS1 routine SROTG,
 !> with the following other differences:
@ -45,8 +45,6 @@
 !>       floating point operations (saves work in SBDSQR when
 !>       there are zeros on the diagonal).
 !>
-!> If F exceeds G in magnitude, C will be positive.
-!>
 !> Below, wp=>sp stands for single precision from LA_CONSTANTS module.
 !> \endverbatim
 !
@ -112,7 +110,7 @@
 subroutine SLARTG( f, g, c, s, r )
   use LA_CONSTANTS, &
   only: wp=>sp, zero=>szero, half=>shalf, one=>sone, &
-         rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax
+         safmin=>ssafmin, safmax=>ssafmax
 !
 !  -- LAPACK auxiliary routine --
 !  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@ -123,11 +121,15 @@ subroutine SLARTG( f, g, c, s, r )
   real(wp) :: c, f, g, r, s
 !  ..
 !  .. Local Scalars ..
-   real(wp) :: d, f1, fs, g1, gs, p, u, uu
+   real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax
 !  ..
 !  .. Intrinsic Functions ..
   intrinsic :: abs, sign, sqrt
 !  ..
+!  .. Constants ..
+   rtmin = sqrt( safmin )
+   rtmax = sqrt( safmax/2 )
+!  ..
 !  .. Executable Statements ..
 !
   f1 = abs( f )
@ -143,20 +145,18 @@ subroutine SLARTG( f, g, c, s, r )
   else if( f1 > rtmin .and. f1 < rtmax .and. &
            g1 > rtmin .and. g1 < rtmax ) then
      d = sqrt( f*f + g*g )
-      p = one / d
-      c = f1*p
-      s = g*sign( p, f )
+      c = f1 / d
      r = sign( d, f )
+      s = g / r
   else
      u = min( safmax, max( safmin, f1, g1 ) )
-      uu = one / u
-      fs = f*uu
-      gs = g*uu
+      fs = f / u
+      gs = g / u
      d = sqrt( fs*fs + gs*gs )
-      p = one / d
-      c = abs( fs )*p
-      s = gs*sign( p, f )
-      r = sign( d, f )*u
+      c = abs( fs ) / d
+      r = sign( d, f )
+      s = gs / r
+      r = r*u
   end if
   return
 end subroutine
--- a/lapack-netlib/SRC/zlartg.f90
+++ b/lapack-netlib/SRC/zlartg.f90
@ -11,8 +11,8 @@
 !       SUBROUTINE ZLARTG( F, G, C, S, R )
 !
 !       .. Scalar Arguments ..
-!       REAL(wp)           C
-!       COMPLEX(wp)        F, G, R, S
+!       REAL(wp)              C
+!       COMPLEX(wp)           F, G, R, S
 !       ..
 !
 !> \par Purpose:
@ -30,7 +30,7 @@
 !> The mathematical formulas used for C and S are
 !>
 !>    sgn(x) = {  x / |x|,   x != 0
-!>             {  1,         x = 0
+!>             {  1,         x  = 0
 !>
 !>    R = sgn(F) * sqrt(|F|**2 + |G|**2)
 !>
@ -38,6 +38,10 @@
 !>
 !>    S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
 !>
+!> Special conditions:
+!>    If G=0, then C=1 and S=0.
+!>    If F=0, then C=0 and S is chosen so that R is real.
+!>
 !> When F and G are real, the formulas simplify to C = F/R and
 !> S = G/R, and the returned values of C, S, and R should be
 !> identical to those returned by DLARTG.
@ -46,11 +50,8 @@
 !> to avoid overflow or underflow in computing the square root of the
 !> sum of squares.
 !>
-!> This is a faster version of the BLAS1 routine ZROTG, except for
-!> the following differences:
-!>    F and G are unchanged on return.
-!>    If G=0, then C=1 and S=0.
-!>    If F=0, then C=0 and S is chosen so that R is real.
+!> This is the same routine ZROTG fom BLAS1, except that
+!> F and G are unchanged on return.
 !>
 !> Below, wp=>dp stands for double precision from LA_CONSTANTS module.
 !> \endverbatim
@ -91,22 +92,19 @@
 !  Authors:
 !  ========
 !
-!> \author Edward Anderson, Lockheed Martin
+!> \author Weslley Pereira, University of Colorado Denver, USA
 !
-!> \date August 2016
+!> \date December 2021
 !
 !> \ingroup OTHERauxiliary
 !
-!> \par Contributors:
-!  ==================
-!>
-!> Weslley Pereira, University of Colorado Denver, USA
-!
 !> \par Further Details:
 !  =====================
 !>
 !> \verbatim
 !>
+!> Based on the algorithm from
+!>
 !>  Anderson E. (2017)
 !>  Algorithm 978: Safe Scaling in the Level 1 BLAS
 !>  ACM Trans Math Softw 44:1--28
@ -117,7 +115,7 @@
 subroutine ZLARTG( f, g, c, s, r )
   use LA_CONSTANTS, &
   only: wp=>dp, zero=>dzero, one=>done, two=>dtwo, czero=>zzero, &
-         rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax
+         safmin=>dsafmin, safmax=>dsafmax
 !
 !  -- LAPACK auxiliary routine --
 !  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@ -129,7 +127,7 @@ subroutine ZLARTG( f, g, c, s, r )
   complex(wp)        f, g, r, s
 !  ..
 !  .. Local Scalars ..
-   real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
+   real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax
   complex(wp) :: fs, gs, t
 !  ..
 !  .. Intrinsic Functions ..
@ -141,6 +139,9 @@ subroutine ZLARTG( f, g, c, s, r )
 !  .. Statement Function definitions ..
   ABSSQ( t ) = real( t )**2 + aimag( t )**2
 !  ..
+!  .. Constants ..
+   rtmin = sqrt( safmin )
+!  ..
 !  .. Executable Statements ..
 !
   if( g == czero ) then
@ -149,30 +150,43 @@ subroutine ZLARTG( f, g, c, s, r )
      r = f
   else if( f == czero ) then
      c = zero
-      g1 = max( abs(real(g)), abs(aimag(g)) )
-      if( g1 > rtmin .and. g1 < rtmax ) then
+      if( real(g) == zero ) then
+         r = abs(aimag(g))
+         s = conjg( g ) / r
+      elseif( aimag(g) == zero ) then
+         r = abs(real(g))
+         s = conjg( g ) / r
+      else
+         g1 = max( abs(real(g)), abs(aimag(g)) )
+         rtmax = sqrt( safmax/2 )
+         if( g1 > rtmin .and. g1 < rtmax ) then
 !
 !        Use unscaled algorithm
 !
-         g2 = ABSSQ( g )
-         d = sqrt( g2 )
-         s = conjg( g ) / d
-         r = d
-      else
+!           The following two lines can be replaced by `d = abs( g )`.
+!           This algorithm do not use the intrinsic complex abs.
+            g2 = ABSSQ( g )
+            d = sqrt( g2 )
+            s = conjg( g ) / d
+            r = d
+         else
 !
 !        Use scaled algorithm
 !
-         u = min( safmax, max( safmin, g1 ) )
-         uu = one / u
-         gs = g*uu
-         g2 = ABSSQ( gs )
-         d = sqrt( g2 )
-         s = conjg( gs ) / d
-         r = d*u
+            u = min( safmax, max( safmin, g1 ) )
+            gs = g / u
+!           The following two lines can be replaced by `d = abs( gs )`.
+!           This algorithm do not use the intrinsic complex abs.
+            g2 = ABSSQ( gs )
+            d = sqrt( g2 )
+            s = conjg( gs ) / d
+            r = d*u
+         end if
      end if
   else
      f1 = max( abs(real(f)), abs(aimag(f)) )
      g1 = max( abs(real(g)), abs(aimag(g)) )
+      rtmax = sqrt( safmax/4 )
      if( f1 > rtmin .and. f1 < rtmax .and. &
          g1 > rtmin .and. g1 < rtmax ) then
 !
@ -181,32 +195,51 @@ subroutine ZLARTG( f, g, c, s, r )
         f2 = ABSSQ( f )
         g2 = ABSSQ( g )
         h2 = f2 + g2
-         if( f2 > rtmin .and. h2 < rtmax ) then
-            d = sqrt( f2*h2 )
+         ! safmin <= f2 <= h2 <= safmax 
+         if( f2 >= h2 * safmin ) then
+            ! safmin <= f2/h2 <= 1, and h2/f2 is finite
+            c = sqrt( f2 / h2 )
+            r = f / c
+            rtmax = rtmax * 2
+            if( f2 > rtmin .and. h2 < rtmax ) then
+               ! safmin <= sqrt( f2*h2 ) <= safmax
+               s = conjg( g ) * ( f / sqrt( f2*h2 ) )
+            else
+               s = conjg( g ) * ( r / h2 )
+            end if
         else
-            d = sqrt( f2 )*sqrt( h2 )
+            ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
+            ! Moreover,
+            !  safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
+            !  sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
+            ! Also,
+            !  g2 >> f2, which means that h2 = g2.
+            d = sqrt( f2 * h2 )
+            c = f2 / d
+            if( c >= safmin ) then
+               r = f / c
+            else
+               ! f2 / sqrt(f2 * h2) < safmin, then
+               !  sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
+               r = f * ( h2 / d )
+            end if
+            s = conjg( g ) * ( f / d )
         end if
-         p = 1 / d
-         c = f2*p
-         s = conjg( g )*( f*p )
-         r = f*( h2*p )
      else
 !
 !        Use scaled algorithm
 !
         u = min( safmax, max( safmin, f1, g1 ) )
-         uu = one / u
-         gs = g*uu
+         gs = g / u
         g2 = ABSSQ( gs )
-         if( f1*uu < rtmin ) then
+         if( f1 / u < rtmin ) then
 !
 !           f is not well-scaled when scaled by g1.
 !           Use a different scaling for f.
 !
            v = min( safmax, max( safmin, f1 ) )
-            vv = one / v
-            w = v * uu
-            fs = f*vv
+            w = v / u
+            fs = f / v
            f2 = ABSSQ( fs )
            h2 = f2*w**2 + g2
         else
@ -214,19 +247,43 @@ subroutine ZLARTG( f, g, c, s, r )
 !           Otherwise use the same scaling for f and g.
 !
            w = one
-            fs = f*uu
+            fs = f / u
            f2 = ABSSQ( fs )
            h2 = f2 + g2
         end if
-         if( f2 > rtmin .and. h2 < rtmax ) then
-            d = sqrt( f2*h2 )
+         ! safmin <= f2 <= h2 <= safmax 
+         if( f2 >= h2 * safmin ) then
+            ! safmin <= f2/h2 <= 1, and h2/f2 is finite
+            c = sqrt( f2 / h2 )
+            r = fs / c
+            rtmax = rtmax * 2
+            if( f2 > rtmin .and. h2 < rtmax ) then
+               ! safmin <= sqrt( f2*h2 ) <= safmax
+               s = conjg( gs ) * ( fs / sqrt( f2*h2 ) )
+            else
+               s = conjg( gs ) * ( r / h2 )
+            end if
         else
-            d = sqrt( f2 )*sqrt( h2 )
+            ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
+            ! Moreover,
+            !  safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
+            !  sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
+            ! Also,
+            !  g2 >> f2, which means that h2 = g2.
+            d = sqrt( f2 * h2 )
+            c = f2 / d
+            if( c >= safmin ) then
+               r = fs / c
+            else
+               ! f2 / sqrt(f2 * h2) < safmin, then
+               !  sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
+               r = fs * ( h2 / d )
+            end if
+            s = conjg( gs ) * ( fs / d )
         end if
-         p = 1 / d
-         c = ( f2*p )*w
-         s = conjg( gs )*( fs*p )
-         r = ( fs*( h2*p ) )*u
+         ! Rescale c and r
+         c = c * w
+         r = r * u
      end if
   end if
   return