Merge pull request #3834 from martin-frbg/lapack631
Use new algorithms for computing Givens rotations (Reference-LAPACK PR631)
This commit is contained in:
commit
0b68dd6a9b
|
@ -30,7 +30,7 @@
|
|||
!> The mathematical formulas used for C and S are
|
||||
!>
|
||||
!> sgn(x) = { x / |x|, x != 0
|
||||
!> { 1, x = 0
|
||||
!> { 1, x = 0
|
||||
!>
|
||||
!> R = sgn(F) * sqrt(|F|**2 + |G|**2)
|
||||
!>
|
||||
|
@ -38,19 +38,20 @@
|
|||
!>
|
||||
!> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
|
||||
!>
|
||||
!> Special conditions:
|
||||
!> If G=0, then C=1 and S=0.
|
||||
!> If F=0, then C=0 and S is chosen so that R is real.
|
||||
!>
|
||||
!> When F and G are real, the formulas simplify to C = F/R and
|
||||
!> S = G/R, and the returned values of C, S, and R should be
|
||||
!> identical to those returned by CLARTG.
|
||||
!> identical to those returned by SLARTG.
|
||||
!>
|
||||
!> The algorithm used to compute these quantities incorporates scaling
|
||||
!> to avoid overflow or underflow in computing the square root of the
|
||||
!> sum of squares.
|
||||
!>
|
||||
!> This is a faster version of the BLAS1 routine CROTG, except for
|
||||
!> the following differences:
|
||||
!> F and G are unchanged on return.
|
||||
!> If G=0, then C=1 and S=0.
|
||||
!> If F=0, then C=0 and S is chosen so that R is real.
|
||||
!> This is the same routine CROTG fom BLAS1, except that
|
||||
!> F and G are unchanged on return.
|
||||
!>
|
||||
!> Below, wp=>sp stands for single precision from LA_CONSTANTS module.
|
||||
!> \endverbatim
|
||||
|
@ -91,22 +92,19 @@
|
|||
! Authors:
|
||||
! ========
|
||||
!
|
||||
!> \author Edward Anderson, Lockheed Martin
|
||||
!> \author Weslley Pereira, University of Colorado Denver, USA
|
||||
!
|
||||
!> \date August 2016
|
||||
!> \date December 2021
|
||||
!
|
||||
!> \ingroup OTHERauxiliary
|
||||
!
|
||||
!> \par Contributors:
|
||||
! ==================
|
||||
!>
|
||||
!> Weslley Pereira, University of Colorado Denver, USA
|
||||
!
|
||||
!> \par Further Details:
|
||||
! =====================
|
||||
!>
|
||||
!> \verbatim
|
||||
!>
|
||||
!> Based on the algorithm from
|
||||
!>
|
||||
!> Anderson E. (2017)
|
||||
!> Algorithm 978: Safe Scaling in the Level 1 BLAS
|
||||
!> ACM Trans Math Softw 44:1--28
|
||||
|
@ -117,7 +115,7 @@
|
|||
subroutine CLARTG( f, g, c, s, r )
|
||||
use LA_CONSTANTS, &
|
||||
only: wp=>sp, zero=>szero, one=>sone, two=>stwo, czero, &
|
||||
rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax
|
||||
safmin=>ssafmin, safmax=>ssafmax
|
||||
!
|
||||
! -- LAPACK auxiliary routine --
|
||||
! -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
|
@ -129,7 +127,7 @@ subroutine CLARTG( f, g, c, s, r )
|
|||
complex(wp) f, g, r, s
|
||||
! ..
|
||||
! .. Local Scalars ..
|
||||
real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
|
||||
real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax
|
||||
complex(wp) :: fs, gs, t
|
||||
! ..
|
||||
! .. Intrinsic Functions ..
|
||||
|
@ -141,6 +139,9 @@ subroutine CLARTG( f, g, c, s, r )
|
|||
! .. Statement Function definitions ..
|
||||
ABSSQ( t ) = real( t )**2 + aimag( t )**2
|
||||
! ..
|
||||
! .. Constants ..
|
||||
rtmin = sqrt( safmin )
|
||||
! ..
|
||||
! .. Executable Statements ..
|
||||
!
|
||||
if( g == czero ) then
|
||||
|
@ -149,30 +150,43 @@ subroutine CLARTG( f, g, c, s, r )
|
|||
r = f
|
||||
else if( f == czero ) then
|
||||
c = zero
|
||||
g1 = max( abs(real(g)), abs(aimag(g)) )
|
||||
if( g1 > rtmin .and. g1 < rtmax ) then
|
||||
if( real(g) == zero ) then
|
||||
r = abs(aimag(g))
|
||||
s = conjg( g ) / r
|
||||
elseif( aimag(g) == zero ) then
|
||||
r = abs(real(g))
|
||||
s = conjg( g ) / r
|
||||
else
|
||||
g1 = max( abs(real(g)), abs(aimag(g)) )
|
||||
rtmax = sqrt( safmax/2 )
|
||||
if( g1 > rtmin .and. g1 < rtmax ) then
|
||||
!
|
||||
! Use unscaled algorithm
|
||||
!
|
||||
g2 = ABSSQ( g )
|
||||
d = sqrt( g2 )
|
||||
s = conjg( g ) / d
|
||||
r = d
|
||||
else
|
||||
! The following two lines can be replaced by `d = abs( g )`.
|
||||
! This algorithm do not use the intrinsic complex abs.
|
||||
g2 = ABSSQ( g )
|
||||
d = sqrt( g2 )
|
||||
s = conjg( g ) / d
|
||||
r = d
|
||||
else
|
||||
!
|
||||
! Use scaled algorithm
|
||||
!
|
||||
u = min( safmax, max( safmin, g1 ) )
|
||||
uu = one / u
|
||||
gs = g*uu
|
||||
g2 = ABSSQ( gs )
|
||||
d = sqrt( g2 )
|
||||
s = conjg( gs ) / d
|
||||
r = d*u
|
||||
u = min( safmax, max( safmin, g1 ) )
|
||||
gs = g / u
|
||||
! The following two lines can be replaced by `d = abs( gs )`.
|
||||
! This algorithm do not use the intrinsic complex abs.
|
||||
g2 = ABSSQ( gs )
|
||||
d = sqrt( g2 )
|
||||
s = conjg( gs ) / d
|
||||
r = d*u
|
||||
end if
|
||||
end if
|
||||
else
|
||||
f1 = max( abs(real(f)), abs(aimag(f)) )
|
||||
g1 = max( abs(real(g)), abs(aimag(g)) )
|
||||
rtmax = sqrt( safmax/4 )
|
||||
if( f1 > rtmin .and. f1 < rtmax .and. &
|
||||
g1 > rtmin .and. g1 < rtmax ) then
|
||||
!
|
||||
|
@ -181,32 +195,51 @@ subroutine CLARTG( f, g, c, s, r )
|
|||
f2 = ABSSQ( f )
|
||||
g2 = ABSSQ( g )
|
||||
h2 = f2 + g2
|
||||
if( f2 > rtmin .and. h2 < rtmax ) then
|
||||
d = sqrt( f2*h2 )
|
||||
! safmin <= f2 <= h2 <= safmax
|
||||
if( f2 >= h2 * safmin ) then
|
||||
! safmin <= f2/h2 <= 1, and h2/f2 is finite
|
||||
c = sqrt( f2 / h2 )
|
||||
r = f / c
|
||||
rtmax = rtmax * 2
|
||||
if( f2 > rtmin .and. h2 < rtmax ) then
|
||||
! safmin <= sqrt( f2*h2 ) <= safmax
|
||||
s = conjg( g ) * ( f / sqrt( f2*h2 ) )
|
||||
else
|
||||
s = conjg( g ) * ( r / h2 )
|
||||
end if
|
||||
else
|
||||
d = sqrt( f2 )*sqrt( h2 )
|
||||
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
|
||||
! Moreover,
|
||||
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
|
||||
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
|
||||
! Also,
|
||||
! g2 >> f2, which means that h2 = g2.
|
||||
d = sqrt( f2 * h2 )
|
||||
c = f2 / d
|
||||
if( c >= safmin ) then
|
||||
r = f / c
|
||||
else
|
||||
! f2 / sqrt(f2 * h2) < safmin, then
|
||||
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
|
||||
r = f * ( h2 / d )
|
||||
end if
|
||||
s = conjg( g ) * ( f / d )
|
||||
end if
|
||||
p = 1 / d
|
||||
c = f2*p
|
||||
s = conjg( g )*( f*p )
|
||||
r = f*( h2*p )
|
||||
else
|
||||
!
|
||||
! Use scaled algorithm
|
||||
!
|
||||
u = min( safmax, max( safmin, f1, g1 ) )
|
||||
uu = one / u
|
||||
gs = g*uu
|
||||
gs = g / u
|
||||
g2 = ABSSQ( gs )
|
||||
if( f1*uu < rtmin ) then
|
||||
if( f1 / u < rtmin ) then
|
||||
!
|
||||
! f is not well-scaled when scaled by g1.
|
||||
! Use a different scaling for f.
|
||||
!
|
||||
v = min( safmax, max( safmin, f1 ) )
|
||||
vv = one / v
|
||||
w = v * uu
|
||||
fs = f*vv
|
||||
w = v / u
|
||||
fs = f / v
|
||||
f2 = ABSSQ( fs )
|
||||
h2 = f2*w**2 + g2
|
||||
else
|
||||
|
@ -214,19 +247,43 @@ subroutine CLARTG( f, g, c, s, r )
|
|||
! Otherwise use the same scaling for f and g.
|
||||
!
|
||||
w = one
|
||||
fs = f*uu
|
||||
fs = f / u
|
||||
f2 = ABSSQ( fs )
|
||||
h2 = f2 + g2
|
||||
end if
|
||||
if( f2 > rtmin .and. h2 < rtmax ) then
|
||||
d = sqrt( f2*h2 )
|
||||
! safmin <= f2 <= h2 <= safmax
|
||||
if( f2 >= h2 * safmin ) then
|
||||
! safmin <= f2/h2 <= 1, and h2/f2 is finite
|
||||
c = sqrt( f2 / h2 )
|
||||
r = fs / c
|
||||
rtmax = rtmax * 2
|
||||
if( f2 > rtmin .and. h2 < rtmax ) then
|
||||
! safmin <= sqrt( f2*h2 ) <= safmax
|
||||
s = conjg( gs ) * ( fs / sqrt( f2*h2 ) )
|
||||
else
|
||||
s = conjg( gs ) * ( r / h2 )
|
||||
end if
|
||||
else
|
||||
d = sqrt( f2 )*sqrt( h2 )
|
||||
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
|
||||
! Moreover,
|
||||
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
|
||||
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
|
||||
! Also,
|
||||
! g2 >> f2, which means that h2 = g2.
|
||||
d = sqrt( f2 * h2 )
|
||||
c = f2 / d
|
||||
if( c >= safmin ) then
|
||||
r = fs / c
|
||||
else
|
||||
! f2 / sqrt(f2 * h2) < safmin, then
|
||||
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
|
||||
r = fs * ( h2 / d )
|
||||
end if
|
||||
s = conjg( gs ) * ( fs / d )
|
||||
end if
|
||||
p = 1 / d
|
||||
c = ( f2*p )*w
|
||||
s = conjg( gs )*( fs*p )
|
||||
r = ( fs*( h2*p ) )*u
|
||||
! Rescale c and r
|
||||
c = c * w
|
||||
r = r * u
|
||||
end if
|
||||
end if
|
||||
return
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
! SUBROUTINE DLARTG( F, G, C, S, R )
|
||||
!
|
||||
! .. Scalar Arguments ..
|
||||
! REAL(wp) C, F, G, R, S
|
||||
! REAL(wp) C, F, G, R, S
|
||||
! ..
|
||||
!
|
||||
!> \par Purpose:
|
||||
|
@ -45,8 +45,6 @@
|
|||
!> floating point operations (saves work in DBDSQR when
|
||||
!> there are zeros on the diagonal).
|
||||
!>
|
||||
!> If F exceeds G in magnitude, C will be positive.
|
||||
!>
|
||||
!> Below, wp=>dp stands for double precision from LA_CONSTANTS module.
|
||||
!> \endverbatim
|
||||
!
|
||||
|
@ -112,7 +110,7 @@
|
|||
subroutine DLARTG( f, g, c, s, r )
|
||||
use LA_CONSTANTS, &
|
||||
only: wp=>dp, zero=>dzero, half=>dhalf, one=>done, &
|
||||
rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax
|
||||
safmin=>dsafmin, safmax=>dsafmax
|
||||
!
|
||||
! -- LAPACK auxiliary routine --
|
||||
! -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
|
@ -123,11 +121,15 @@ subroutine DLARTG( f, g, c, s, r )
|
|||
real(wp) :: c, f, g, r, s
|
||||
! ..
|
||||
! .. Local Scalars ..
|
||||
real(wp) :: d, f1, fs, g1, gs, p, u, uu
|
||||
real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax
|
||||
! ..
|
||||
! .. Intrinsic Functions ..
|
||||
intrinsic :: abs, sign, sqrt
|
||||
! ..
|
||||
! .. Constants ..
|
||||
rtmin = sqrt( safmin )
|
||||
rtmax = sqrt( safmax/2 )
|
||||
! ..
|
||||
! .. Executable Statements ..
|
||||
!
|
||||
f1 = abs( f )
|
||||
|
@ -143,20 +145,18 @@ subroutine DLARTG( f, g, c, s, r )
|
|||
else if( f1 > rtmin .and. f1 < rtmax .and. &
|
||||
g1 > rtmin .and. g1 < rtmax ) then
|
||||
d = sqrt( f*f + g*g )
|
||||
p = one / d
|
||||
c = f1*p
|
||||
s = g*sign( p, f )
|
||||
c = f1 / d
|
||||
r = sign( d, f )
|
||||
s = g / r
|
||||
else
|
||||
u = min( safmax, max( safmin, f1, g1 ) )
|
||||
uu = one / u
|
||||
fs = f*uu
|
||||
gs = g*uu
|
||||
fs = f / u
|
||||
gs = g / u
|
||||
d = sqrt( fs*fs + gs*gs )
|
||||
p = one / d
|
||||
c = abs( fs )*p
|
||||
s = gs*sign( p, f )
|
||||
r = sign( d, f )*u
|
||||
c = abs( fs ) / d
|
||||
r = sign( d, f )
|
||||
s = gs / r
|
||||
r = r*u
|
||||
end if
|
||||
return
|
||||
end subroutine
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
!> square root of the sum of squares.
|
||||
!>
|
||||
!> This version is discontinuous in R at F = 0 but it returns the same
|
||||
!> C and S as SLARTG for complex inputs (F,0) and (G,0).
|
||||
!> C and S as CLARTG for complex inputs (F,0) and (G,0).
|
||||
!>
|
||||
!> This is a more accurate version of the BLAS1 routine SROTG,
|
||||
!> with the following other differences:
|
||||
|
@ -45,8 +45,6 @@
|
|||
!> floating point operations (saves work in SBDSQR when
|
||||
!> there are zeros on the diagonal).
|
||||
!>
|
||||
!> If F exceeds G in magnitude, C will be positive.
|
||||
!>
|
||||
!> Below, wp=>sp stands for single precision from LA_CONSTANTS module.
|
||||
!> \endverbatim
|
||||
!
|
||||
|
@ -112,7 +110,7 @@
|
|||
subroutine SLARTG( f, g, c, s, r )
|
||||
use LA_CONSTANTS, &
|
||||
only: wp=>sp, zero=>szero, half=>shalf, one=>sone, &
|
||||
rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax
|
||||
safmin=>ssafmin, safmax=>ssafmax
|
||||
!
|
||||
! -- LAPACK auxiliary routine --
|
||||
! -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
|
@ -123,11 +121,15 @@ subroutine SLARTG( f, g, c, s, r )
|
|||
real(wp) :: c, f, g, r, s
|
||||
! ..
|
||||
! .. Local Scalars ..
|
||||
real(wp) :: d, f1, fs, g1, gs, p, u, uu
|
||||
real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax
|
||||
! ..
|
||||
! .. Intrinsic Functions ..
|
||||
intrinsic :: abs, sign, sqrt
|
||||
! ..
|
||||
! .. Constants ..
|
||||
rtmin = sqrt( safmin )
|
||||
rtmax = sqrt( safmax/2 )
|
||||
! ..
|
||||
! .. Executable Statements ..
|
||||
!
|
||||
f1 = abs( f )
|
||||
|
@ -143,20 +145,18 @@ subroutine SLARTG( f, g, c, s, r )
|
|||
else if( f1 > rtmin .and. f1 < rtmax .and. &
|
||||
g1 > rtmin .and. g1 < rtmax ) then
|
||||
d = sqrt( f*f + g*g )
|
||||
p = one / d
|
||||
c = f1*p
|
||||
s = g*sign( p, f )
|
||||
c = f1 / d
|
||||
r = sign( d, f )
|
||||
s = g / r
|
||||
else
|
||||
u = min( safmax, max( safmin, f1, g1 ) )
|
||||
uu = one / u
|
||||
fs = f*uu
|
||||
gs = g*uu
|
||||
fs = f / u
|
||||
gs = g / u
|
||||
d = sqrt( fs*fs + gs*gs )
|
||||
p = one / d
|
||||
c = abs( fs )*p
|
||||
s = gs*sign( p, f )
|
||||
r = sign( d, f )*u
|
||||
c = abs( fs ) / d
|
||||
r = sign( d, f )
|
||||
s = gs / r
|
||||
r = r*u
|
||||
end if
|
||||
return
|
||||
end subroutine
|
||||
|
|
|
@ -11,8 +11,8 @@
|
|||
! SUBROUTINE ZLARTG( F, G, C, S, R )
|
||||
!
|
||||
! .. Scalar Arguments ..
|
||||
! REAL(wp) C
|
||||
! COMPLEX(wp) F, G, R, S
|
||||
! REAL(wp) C
|
||||
! COMPLEX(wp) F, G, R, S
|
||||
! ..
|
||||
!
|
||||
!> \par Purpose:
|
||||
|
@ -30,7 +30,7 @@
|
|||
!> The mathematical formulas used for C and S are
|
||||
!>
|
||||
!> sgn(x) = { x / |x|, x != 0
|
||||
!> { 1, x = 0
|
||||
!> { 1, x = 0
|
||||
!>
|
||||
!> R = sgn(F) * sqrt(|F|**2 + |G|**2)
|
||||
!>
|
||||
|
@ -38,6 +38,10 @@
|
|||
!>
|
||||
!> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
|
||||
!>
|
||||
!> Special conditions:
|
||||
!> If G=0, then C=1 and S=0.
|
||||
!> If F=0, then C=0 and S is chosen so that R is real.
|
||||
!>
|
||||
!> When F and G are real, the formulas simplify to C = F/R and
|
||||
!> S = G/R, and the returned values of C, S, and R should be
|
||||
!> identical to those returned by DLARTG.
|
||||
|
@ -46,11 +50,8 @@
|
|||
!> to avoid overflow or underflow in computing the square root of the
|
||||
!> sum of squares.
|
||||
!>
|
||||
!> This is a faster version of the BLAS1 routine ZROTG, except for
|
||||
!> the following differences:
|
||||
!> F and G are unchanged on return.
|
||||
!> If G=0, then C=1 and S=0.
|
||||
!> If F=0, then C=0 and S is chosen so that R is real.
|
||||
!> This is the same routine ZROTG fom BLAS1, except that
|
||||
!> F and G are unchanged on return.
|
||||
!>
|
||||
!> Below, wp=>dp stands for double precision from LA_CONSTANTS module.
|
||||
!> \endverbatim
|
||||
|
@ -91,22 +92,19 @@
|
|||
! Authors:
|
||||
! ========
|
||||
!
|
||||
!> \author Edward Anderson, Lockheed Martin
|
||||
!> \author Weslley Pereira, University of Colorado Denver, USA
|
||||
!
|
||||
!> \date August 2016
|
||||
!> \date December 2021
|
||||
!
|
||||
!> \ingroup OTHERauxiliary
|
||||
!
|
||||
!> \par Contributors:
|
||||
! ==================
|
||||
!>
|
||||
!> Weslley Pereira, University of Colorado Denver, USA
|
||||
!
|
||||
!> \par Further Details:
|
||||
! =====================
|
||||
!>
|
||||
!> \verbatim
|
||||
!>
|
||||
!> Based on the algorithm from
|
||||
!>
|
||||
!> Anderson E. (2017)
|
||||
!> Algorithm 978: Safe Scaling in the Level 1 BLAS
|
||||
!> ACM Trans Math Softw 44:1--28
|
||||
|
@ -117,7 +115,7 @@
|
|||
subroutine ZLARTG( f, g, c, s, r )
|
||||
use LA_CONSTANTS, &
|
||||
only: wp=>dp, zero=>dzero, one=>done, two=>dtwo, czero=>zzero, &
|
||||
rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax
|
||||
safmin=>dsafmin, safmax=>dsafmax
|
||||
!
|
||||
! -- LAPACK auxiliary routine --
|
||||
! -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
|
@ -129,7 +127,7 @@ subroutine ZLARTG( f, g, c, s, r )
|
|||
complex(wp) f, g, r, s
|
||||
! ..
|
||||
! .. Local Scalars ..
|
||||
real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
|
||||
real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax
|
||||
complex(wp) :: fs, gs, t
|
||||
! ..
|
||||
! .. Intrinsic Functions ..
|
||||
|
@ -141,6 +139,9 @@ subroutine ZLARTG( f, g, c, s, r )
|
|||
! .. Statement Function definitions ..
|
||||
ABSSQ( t ) = real( t )**2 + aimag( t )**2
|
||||
! ..
|
||||
! .. Constants ..
|
||||
rtmin = sqrt( safmin )
|
||||
! ..
|
||||
! .. Executable Statements ..
|
||||
!
|
||||
if( g == czero ) then
|
||||
|
@ -149,30 +150,43 @@ subroutine ZLARTG( f, g, c, s, r )
|
|||
r = f
|
||||
else if( f == czero ) then
|
||||
c = zero
|
||||
g1 = max( abs(real(g)), abs(aimag(g)) )
|
||||
if( g1 > rtmin .and. g1 < rtmax ) then
|
||||
if( real(g) == zero ) then
|
||||
r = abs(aimag(g))
|
||||
s = conjg( g ) / r
|
||||
elseif( aimag(g) == zero ) then
|
||||
r = abs(real(g))
|
||||
s = conjg( g ) / r
|
||||
else
|
||||
g1 = max( abs(real(g)), abs(aimag(g)) )
|
||||
rtmax = sqrt( safmax/2 )
|
||||
if( g1 > rtmin .and. g1 < rtmax ) then
|
||||
!
|
||||
! Use unscaled algorithm
|
||||
!
|
||||
g2 = ABSSQ( g )
|
||||
d = sqrt( g2 )
|
||||
s = conjg( g ) / d
|
||||
r = d
|
||||
else
|
||||
! The following two lines can be replaced by `d = abs( g )`.
|
||||
! This algorithm do not use the intrinsic complex abs.
|
||||
g2 = ABSSQ( g )
|
||||
d = sqrt( g2 )
|
||||
s = conjg( g ) / d
|
||||
r = d
|
||||
else
|
||||
!
|
||||
! Use scaled algorithm
|
||||
!
|
||||
u = min( safmax, max( safmin, g1 ) )
|
||||
uu = one / u
|
||||
gs = g*uu
|
||||
g2 = ABSSQ( gs )
|
||||
d = sqrt( g2 )
|
||||
s = conjg( gs ) / d
|
||||
r = d*u
|
||||
u = min( safmax, max( safmin, g1 ) )
|
||||
gs = g / u
|
||||
! The following two lines can be replaced by `d = abs( gs )`.
|
||||
! This algorithm do not use the intrinsic complex abs.
|
||||
g2 = ABSSQ( gs )
|
||||
d = sqrt( g2 )
|
||||
s = conjg( gs ) / d
|
||||
r = d*u
|
||||
end if
|
||||
end if
|
||||
else
|
||||
f1 = max( abs(real(f)), abs(aimag(f)) )
|
||||
g1 = max( abs(real(g)), abs(aimag(g)) )
|
||||
rtmax = sqrt( safmax/4 )
|
||||
if( f1 > rtmin .and. f1 < rtmax .and. &
|
||||
g1 > rtmin .and. g1 < rtmax ) then
|
||||
!
|
||||
|
@ -181,32 +195,51 @@ subroutine ZLARTG( f, g, c, s, r )
|
|||
f2 = ABSSQ( f )
|
||||
g2 = ABSSQ( g )
|
||||
h2 = f2 + g2
|
||||
if( f2 > rtmin .and. h2 < rtmax ) then
|
||||
d = sqrt( f2*h2 )
|
||||
! safmin <= f2 <= h2 <= safmax
|
||||
if( f2 >= h2 * safmin ) then
|
||||
! safmin <= f2/h2 <= 1, and h2/f2 is finite
|
||||
c = sqrt( f2 / h2 )
|
||||
r = f / c
|
||||
rtmax = rtmax * 2
|
||||
if( f2 > rtmin .and. h2 < rtmax ) then
|
||||
! safmin <= sqrt( f2*h2 ) <= safmax
|
||||
s = conjg( g ) * ( f / sqrt( f2*h2 ) )
|
||||
else
|
||||
s = conjg( g ) * ( r / h2 )
|
||||
end if
|
||||
else
|
||||
d = sqrt( f2 )*sqrt( h2 )
|
||||
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
|
||||
! Moreover,
|
||||
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
|
||||
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
|
||||
! Also,
|
||||
! g2 >> f2, which means that h2 = g2.
|
||||
d = sqrt( f2 * h2 )
|
||||
c = f2 / d
|
||||
if( c >= safmin ) then
|
||||
r = f / c
|
||||
else
|
||||
! f2 / sqrt(f2 * h2) < safmin, then
|
||||
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
|
||||
r = f * ( h2 / d )
|
||||
end if
|
||||
s = conjg( g ) * ( f / d )
|
||||
end if
|
||||
p = 1 / d
|
||||
c = f2*p
|
||||
s = conjg( g )*( f*p )
|
||||
r = f*( h2*p )
|
||||
else
|
||||
!
|
||||
! Use scaled algorithm
|
||||
!
|
||||
u = min( safmax, max( safmin, f1, g1 ) )
|
||||
uu = one / u
|
||||
gs = g*uu
|
||||
gs = g / u
|
||||
g2 = ABSSQ( gs )
|
||||
if( f1*uu < rtmin ) then
|
||||
if( f1 / u < rtmin ) then
|
||||
!
|
||||
! f is not well-scaled when scaled by g1.
|
||||
! Use a different scaling for f.
|
||||
!
|
||||
v = min( safmax, max( safmin, f1 ) )
|
||||
vv = one / v
|
||||
w = v * uu
|
||||
fs = f*vv
|
||||
w = v / u
|
||||
fs = f / v
|
||||
f2 = ABSSQ( fs )
|
||||
h2 = f2*w**2 + g2
|
||||
else
|
||||
|
@ -214,19 +247,43 @@ subroutine ZLARTG( f, g, c, s, r )
|
|||
! Otherwise use the same scaling for f and g.
|
||||
!
|
||||
w = one
|
||||
fs = f*uu
|
||||
fs = f / u
|
||||
f2 = ABSSQ( fs )
|
||||
h2 = f2 + g2
|
||||
end if
|
||||
if( f2 > rtmin .and. h2 < rtmax ) then
|
||||
d = sqrt( f2*h2 )
|
||||
! safmin <= f2 <= h2 <= safmax
|
||||
if( f2 >= h2 * safmin ) then
|
||||
! safmin <= f2/h2 <= 1, and h2/f2 is finite
|
||||
c = sqrt( f2 / h2 )
|
||||
r = fs / c
|
||||
rtmax = rtmax * 2
|
||||
if( f2 > rtmin .and. h2 < rtmax ) then
|
||||
! safmin <= sqrt( f2*h2 ) <= safmax
|
||||
s = conjg( gs ) * ( fs / sqrt( f2*h2 ) )
|
||||
else
|
||||
s = conjg( gs ) * ( r / h2 )
|
||||
end if
|
||||
else
|
||||
d = sqrt( f2 )*sqrt( h2 )
|
||||
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
|
||||
! Moreover,
|
||||
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
|
||||
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
|
||||
! Also,
|
||||
! g2 >> f2, which means that h2 = g2.
|
||||
d = sqrt( f2 * h2 )
|
||||
c = f2 / d
|
||||
if( c >= safmin ) then
|
||||
r = fs / c
|
||||
else
|
||||
! f2 / sqrt(f2 * h2) < safmin, then
|
||||
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
|
||||
r = fs * ( h2 / d )
|
||||
end if
|
||||
s = conjg( gs ) * ( fs / d )
|
||||
end if
|
||||
p = 1 / d
|
||||
c = ( f2*p )*w
|
||||
s = conjg( gs )*( fs*p )
|
||||
r = ( fs*( h2*p ) )*u
|
||||
! Rescale c and r
|
||||
c = c * w
|
||||
r = r * u
|
||||
end if
|
||||
end if
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue