OpenBLAS/lapack-netlib/SRC/slarrv.c

1611 lines
53 KiB
C

#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <complex.h>
#ifdef complex
#undef complex
#endif
#ifdef I
#undef I
#endif
#if defined(_WIN64)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#ifdef LAPACK_ILP64
typedef BLASLONG blasint;
#if defined(_WIN64)
#define blasabs(x) llabs(x)
#else
#define blasabs(x) labs(x)
#endif
#else
typedef int blasint;
#define blasabs(x) abs(x)
#endif
typedef blasint integer;
typedef unsigned int uinteger;
typedef char *address;
typedef short int shortint;
typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef blasint logical;
typedef char logical1;
typedef char integer1;
#define TRUE_ (1)
#define FALSE_ (0)
/* Extern is for use with -E */
#ifndef Extern
#define Extern extern
#endif
/* I/O stuff */
typedef int flag;
typedef int ftnlen;
typedef int ftnint;
/*external read, write*/
typedef struct
{ flag cierr;
ftnint ciunit;
flag ciend;
char *cifmt;
ftnint cirec;
} cilist;
/*internal read, write*/
typedef struct
{ flag icierr;
char *iciunit;
flag iciend;
char *icifmt;
ftnint icirlen;
ftnint icirnum;
} icilist;
/*open*/
typedef struct
{ flag oerr;
ftnint ounit;
char *ofnm;
ftnlen ofnmlen;
char *osta;
char *oacc;
char *ofm;
ftnint orl;
char *oblnk;
} olist;
/*close*/
typedef struct
{ flag cerr;
ftnint cunit;
char *csta;
} cllist;
/*rewind, backspace, endfile*/
typedef struct
{ flag aerr;
ftnint aunit;
} alist;
/* inquire */
typedef struct
{ flag inerr;
ftnint inunit;
char *infile;
ftnlen infilen;
ftnint *inex; /*parameters in standard's order*/
ftnint *inopen;
ftnint *innum;
ftnint *innamed;
char *inname;
ftnlen innamlen;
char *inacc;
ftnlen inacclen;
char *inseq;
ftnlen inseqlen;
char *indir;
ftnlen indirlen;
char *infmt;
ftnlen infmtlen;
char *inform;
ftnint informlen;
char *inunf;
ftnlen inunflen;
ftnint *inrecl;
ftnint *innrec;
char *inblank;
ftnlen inblanklen;
} inlist;
#define VOID void
union Multitype { /* for multiple entry points */
integer1 g;
shortint h;
integer i;
/* longint j; */
real r;
doublereal d;
complex c;
doublecomplex z;
};
typedef union Multitype Multitype;
struct Vardesc { /* for Namelist */
char *name;
char *addr;
ftnlen *dims;
int type;
};
typedef struct Vardesc Vardesc;
struct Namelist {
char *name;
Vardesc **vars;
int nvars;
};
typedef struct Namelist Namelist;
#define abs(x) ((x) >= 0 ? (x) : -(x))
#define dabs(x) (fabs(x))
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
#define dmin(a,b) (f2cmin(a,b))
#define dmax(a,b) (f2cmax(a,b))
#define bit_test(a,b) ((a) >> (b) & 1)
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
#define abort_() { sig_die("Fortran abort routine called", 1); }
#define c_abs(z) (cabsf(Cf(z)))
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
#ifdef _MSC_VER
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
#else
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
#endif
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
#define d_abs(x) (fabs(*(x)))
#define d_acos(x) (acos(*(x)))
#define d_asin(x) (asin(*(x)))
#define d_atan(x) (atan(*(x)))
#define d_atn2(x, y) (atan2(*(x),*(y)))
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
#define d_cos(x) (cos(*(x)))
#define d_cosh(x) (cosh(*(x)))
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
#define d_exp(x) (exp(*(x)))
#define d_imag(z) (cimag(Cd(z)))
#define r_imag(z) (cimagf(Cf(z)))
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define d_log(x) (log(*(x)))
#define d_mod(x, y) (fmod(*(x), *(y)))
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
#define d_nint(x) u_nint(*(x))
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
#define d_sign(a,b) u_sign(*(a),*(b))
#define r_sign(a,b) u_sign(*(a),*(b))
#define d_sin(x) (sin(*(x)))
#define d_sinh(x) (sinh(*(x)))
#define d_sqrt(x) (sqrt(*(x)))
#define d_tan(x) (tan(*(x)))
#define d_tanh(x) (tanh(*(x)))
#define i_abs(x) abs(*(x))
#define i_dnnt(x) ((integer)u_nint(*(x)))
#define i_len(s, n) (n)
#define i_nint(x) ((integer)u_nint(*(x)))
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
#define pow_si(B,E) spow_ui(*(B),*(E))
#define pow_ri(B,E) spow_ui(*(B),*(E))
#define pow_di(B,E) dpow_ui(*(B),*(E))
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
#define myexit_() break;
#define mycycle() continue;
#define myceiling(w) {ceil(w)}
#define myhuge(w) {HUGE_VAL}
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
/* procedure parameter types for -A and -C++ */
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Table of constant values */
static real c_b5 = 0.f;
static integer c__1 = 1;
static integer c__2 = 2;
/* > \brief \b SLARRV computes the eigenvectors of the tridiagonal matrix T = L D LT given L, D and the eigenv
alues of L D LT. */
/* =========== DOCUMENTATION =========== */
/* Online html documentation available at */
/* http://www.netlib.org/lapack/explore-html/ */
/* > \htmlonly */
/* > Download SLARRV + dependencies */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slarrv.
f"> */
/* > [TGZ]</a> */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slarrv.
f"> */
/* > [ZIP]</a> */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slarrv.
f"> */
/* > [TXT]</a> */
/* > \endhtmlonly */
/* Definition: */
/* =========== */
/* SUBROUTINE SLARRV( N, VL, VU, D, L, PIVMIN, */
/* ISPLIT, M, DOL, DOU, MINRGP, */
/* RTOL1, RTOL2, W, WERR, WGAP, */
/* IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, */
/* WORK, IWORK, INFO ) */
/* INTEGER DOL, DOU, INFO, LDZ, M, N */
/* REAL MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU */
/* INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), */
/* $ ISUPPZ( * ), IWORK( * ) */
/* REAL D( * ), GERS( * ), L( * ), W( * ), WERR( * ), */
/* $ WGAP( * ), WORK( * ) */
/* REAL Z( LDZ, * ) */
/* > \par Purpose: */
/* ============= */
/* > */
/* > \verbatim */
/* > */
/* > SLARRV computes the eigenvectors of the tridiagonal matrix */
/* > T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. */
/* > The input eigenvalues should have been computed by SLARRE. */
/* > \endverbatim */
/* Arguments: */
/* ========== */
/* > \param[in] N */
/* > \verbatim */
/* > N is INTEGER */
/* > The order of the matrix. N >= 0. */
/* > \endverbatim */
/* > */
/* > \param[in] VL */
/* > \verbatim */
/* > VL is REAL */
/* > Lower bound of the interval that contains the desired */
/* > eigenvalues. VL < VU. Needed to compute gaps on the left or right */
/* > end of the extremal eigenvalues in the desired RANGE. */
/* > \endverbatim */
/* > */
/* > \param[in] VU */
/* > \verbatim */
/* > VU is REAL */
/* > Upper bound of the interval that contains the desired */
/* > eigenvalues. VL < VU. */
/* > Note: VU is currently not used by this implementation of SLARRV, VU is */
/* > passed to SLARRV because it could be used compute gaps on the right end */
/* > of the extremal eigenvalues. However, with not much initial accuracy in */
/* > LAMBDA and VU, the formula can lead to an overestimation of the right gap */
/* > and thus to inadequately early RQI 'convergence'. This is currently */
/* > prevented this by forcing a small right gap. And so it turns out that VU */
/* > is currently not used by this implementation of SLARRV. */
/* > \endverbatim */
/* > */
/* > \param[in,out] D */
/* > \verbatim */
/* > D is REAL array, dimension (N) */
/* > On entry, the N diagonal elements of the diagonal matrix D. */
/* > On exit, D may be overwritten. */
/* > \endverbatim */
/* > */
/* > \param[in,out] L */
/* > \verbatim */
/* > L is REAL array, dimension (N) */
/* > On entry, the (N-1) subdiagonal elements of the unit */
/* > bidiagonal matrix L are in elements 1 to N-1 of L */
/* > (if the matrix is not split.) At the end of each block */
/* > is stored the corresponding shift as given by SLARRE. */
/* > On exit, L is overwritten. */
/* > \endverbatim */
/* > */
/* > \param[in] PIVMIN */
/* > \verbatim */
/* > PIVMIN is REAL */
/* > The minimum pivot allowed in the Sturm sequence. */
/* > \endverbatim */
/* > */
/* > \param[in] ISPLIT */
/* > \verbatim */
/* > ISPLIT is INTEGER array, dimension (N) */
/* > The splitting points, at which T breaks up into blocks. */
/* > The first block consists of rows/columns 1 to */
/* > ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
/* > through ISPLIT( 2 ), etc. */
/* > \endverbatim */
/* > */
/* > \param[in] M */
/* > \verbatim */
/* > M is INTEGER */
/* > The total number of input eigenvalues. 0 <= M <= N. */
/* > \endverbatim */
/* > */
/* > \param[in] DOL */
/* > \verbatim */
/* > DOL is INTEGER */
/* > \endverbatim */
/* > */
/* > \param[in] DOU */
/* > \verbatim */
/* > DOU is INTEGER */
/* > If the user wants to compute only selected eigenvectors from all */
/* > the eigenvalues supplied, he can specify an index range DOL:DOU. */
/* > Or else the setting DOL=1, DOU=M should be applied. */
/* > Note that DOL and DOU refer to the order in which the eigenvalues */
/* > are stored in W. */
/* > If the user wants to compute only selected eigenpairs, then */
/* > the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
/* > computed eigenvectors. All other columns of Z are set to zero. */
/* > \endverbatim */
/* > */
/* > \param[in] MINRGP */
/* > \verbatim */
/* > MINRGP is REAL */
/* > \endverbatim */
/* > */
/* > \param[in] RTOL1 */
/* > \verbatim */
/* > RTOL1 is REAL */
/* > \endverbatim */
/* > */
/* > \param[in] RTOL2 */
/* > \verbatim */
/* > RTOL2 is REAL */
/* > Parameters for bisection. */
/* > An interval [LEFT,RIGHT] has converged if */
/* > RIGHT-LEFT < MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
/* > \endverbatim */
/* > */
/* > \param[in,out] W */
/* > \verbatim */
/* > W is REAL array, dimension (N) */
/* > The first M elements of W contain the APPROXIMATE eigenvalues for */
/* > which eigenvectors are to be computed. The eigenvalues */
/* > should be grouped by split-off block and ordered from */
/* > smallest to largest within the block ( The output array */
/* > W from SLARRE is expected here ). Furthermore, they are with */
/* > respect to the shift of the corresponding root representation */
/* > for their block. On exit, W holds the eigenvalues of the */
/* > UNshifted matrix. */
/* > \endverbatim */
/* > */
/* > \param[in,out] WERR */
/* > \verbatim */
/* > WERR is REAL array, dimension (N) */
/* > The first M elements contain the semiwidth of the uncertainty */
/* > interval of the corresponding eigenvalue in W */
/* > \endverbatim */
/* > */
/* > \param[in,out] WGAP */
/* > \verbatim */
/* > WGAP is REAL array, dimension (N) */
/* > The separation from the right neighbor eigenvalue in W. */
/* > \endverbatim */
/* > */
/* > \param[in] IBLOCK */
/* > \verbatim */
/* > IBLOCK is INTEGER array, dimension (N) */
/* > The indices of the blocks (submatrices) associated with the */
/* > corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
/* > W(i) belongs to the first block from the top, =2 if W(i) */
/* > belongs to the second block, etc. */
/* > \endverbatim */
/* > */
/* > \param[in] INDEXW */
/* > \verbatim */
/* > INDEXW is INTEGER array, dimension (N) */
/* > The indices of the eigenvalues within each block (submatrix); */
/* > for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
/* > i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
/* > \endverbatim */
/* > */
/* > \param[in] GERS */
/* > \verbatim */
/* > GERS is REAL array, dimension (2*N) */
/* > The N Gerschgorin intervals (the i-th Gerschgorin interval */
/* > is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
/* > be computed from the original UNshifted matrix. */
/* > \endverbatim */
/* > */
/* > \param[out] Z */
/* > \verbatim */
/* > Z is REAL array, dimension (LDZ, f2cmax(1,M) ) */
/* > If INFO = 0, the first M columns of Z contain the */
/* > orthonormal eigenvectors of the matrix T */
/* > corresponding to the input eigenvalues, with the i-th */
/* > column of Z holding the eigenvector associated with W(i). */
/* > Note: the user must ensure that at least f2cmax(1,M) columns are */
/* > supplied in the array Z. */
/* > \endverbatim */
/* > */
/* > \param[in] LDZ */
/* > \verbatim */
/* > LDZ is INTEGER */
/* > The leading dimension of the array Z. LDZ >= 1, and if */
/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */
/* > \endverbatim */
/* > */
/* > \param[out] ISUPPZ */
/* > \verbatim */
/* > ISUPPZ is INTEGER array, dimension ( 2*f2cmax(1,M) ) */
/* > The support of the eigenvectors in Z, i.e., the indices */
/* > indicating the nonzero elements in Z. The I-th eigenvector */
/* > is nonzero only in elements ISUPPZ( 2*I-1 ) through */
/* > ISUPPZ( 2*I ). */
/* > \endverbatim */
/* > */
/* > \param[out] WORK */
/* > \verbatim */
/* > WORK is REAL array, dimension (12*N) */
/* > \endverbatim */
/* > */
/* > \param[out] IWORK */
/* > \verbatim */
/* > IWORK is INTEGER array, dimension (7*N) */
/* > \endverbatim */
/* > */
/* > \param[out] INFO */
/* > \verbatim */
/* > INFO is INTEGER */
/* > = 0: successful exit */
/* > */
/* > > 0: A problem occurred in SLARRV. */
/* > < 0: One of the called subroutines signaled an internal problem. */
/* > Needs inspection of the corresponding parameter IINFO */
/* > for further information. */
/* > */
/* > =-1: Problem in SLARRB when refining a child's eigenvalues. */
/* > =-2: Problem in SLARRF when computing the RRR of a child. */
/* > When a child is inside a tight cluster, it can be difficult */
/* > to find an RRR. A partial remedy from the user's point of */
/* > view is to make the parameter MINRGP smaller and recompile. */
/* > However, as the orthogonality of the computed vectors is */
/* > proportional to 1/MINRGP, the user should be aware that */
/* > he might be trading in precision when he decreases MINRGP. */
/* > =-3: Problem in SLARRB when refining a single eigenvalue */
/* > after the Rayleigh correction was rejected. */
/* > = 5: The Rayleigh Quotient Iteration failed to converge to */
/* > full accuracy in MAXITR steps. */
/* > \endverbatim */
/* Authors: */
/* ======== */
/* > \author Univ. of Tennessee */
/* > \author Univ. of California Berkeley */
/* > \author Univ. of Colorado Denver */
/* > \author NAG Ltd. */
/* > \date June 2016 */
/* > \ingroup realOTHERauxiliary */
/* > \par Contributors: */
/* ================== */
/* > */
/* > Beresford Parlett, University of California, Berkeley, USA \n */
/* > Jim Demmel, University of California, Berkeley, USA \n */
/* > Inderjit Dhillon, University of Texas, Austin, USA \n */
/* > Osni Marques, LBNL/NERSC, USA \n */
/* > Christof Voemel, University of California, Berkeley, USA */
/* ===================================================================== */
/* Subroutine */ void slarrv_(integer *n, real *vl, real *vu, real *d__, real *
l, real *pivmin, integer *isplit, integer *m, integer *dol, integer *
dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr,
real *wgap, integer *iblock, integer *indexw, real *gers, real *z__,
integer *ldz, integer *isuppz, real *work, integer *iwork, integer *
info)
{
/* System generated locals */
integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
real r__1, r__2;
logical L__1;
/* Local variables */
integer iend, jblk;
real lgap;
integer done;
real rgap, left;
integer wend, iter;
real bstw;
integer minwsize, itmp1, i__, j, k, p, q, indld;
real fudge;
integer idone;
real sigma;
integer iinfo, iindr;
real resid;
extern /* Subroutine */ void sscal_(integer *, real *, real *, integer *);
logical eskip;
real right;
integer nclus, zfrom;
extern /* Subroutine */ void scopy_(integer *, real *, integer *, real *,
integer *);
real rqtol;
integer iindc1, iindc2, miniwsize;
extern /* Subroutine */ void slar1v_(integer *, integer *, integer *, real
*, real *, real *, real *, real *, real *, real *, real *,
logical *, integer *, real *, real *, integer *, integer *, real *
, real *, real *, real *);
logical stp2ii;
real lambda;
integer ii;
real gl;
integer im, in;
real gu;
integer ibegin, indeig;
logical needbs;
integer indlld;
real sgndef, mingma;
extern real slamch_(char *);
integer oldien, oldncl, wbegin, negcnt;
real spdiam;
integer oldcls;
real savgap;
integer ndepth;
real ssigma;
logical usedbs;
integer iindwk, offset;
real gaptol;
extern /* Subroutine */ void slarrb_(integer *, real *, real *, integer *,
integer *, real *, real *, integer *, real *, real *, real *,
real *, integer *, real *, real *, integer *, integer *), slarrf_(
integer *, real *, real *, real *, integer *, integer *, real *,
real *, real *, real *, real *, real *, real *, real *, real *,
real *, real *, integer *);
integer newcls, oldfst, indwrk, windex, oldlst;
logical usedrq;
integer newfst, newftt, parity, windmn, isupmn, newlst, windpl, zusedl,
newsiz, zusedu, zusedw;
real bstres, nrminv;
logical tryrqc;
integer isupmx;
real rqcorr;
extern /* Subroutine */ void slaset_(char *, integer *, integer *, real *,
real *, real *, integer *);
real gap, eps, tau, tol, tmp;
integer zto;
real ztz;
/* -- LAPACK auxiliary routine (version 3.8.0) -- */
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
/* June 2016 */
/* ===================================================================== */
/* Parameter adjustments */
--d__;
--l;
--isplit;
--w;
--werr;
--wgap;
--iblock;
--indexw;
--gers;
z_dim1 = *ldz;
z_offset = 1 + z_dim1 * 1;
z__ -= z_offset;
--isuppz;
--work;
--iwork;
/* Function Body */
*info = 0;
/* Quick return if possible */
if (*n <= 0 || *m <= 0) {
return;
}
/* The first N entries of WORK are reserved for the eigenvalues */
indld = *n + 1;
indlld = (*n << 1) + 1;
indwrk = *n * 3 + 1;
minwsize = *n * 12;
i__1 = minwsize;
for (i__ = 1; i__ <= i__1; ++i__) {
work[i__] = 0.f;
/* L5: */
}
/* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
/* factorization used to compute the FP vector */
iindr = 0;
/* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
/* layer and the one above. */
iindc1 = *n;
iindc2 = *n << 1;
iindwk = *n * 3 + 1;
miniwsize = *n * 7;
i__1 = miniwsize;
for (i__ = 1; i__ <= i__1; ++i__) {
iwork[i__] = 0;
/* L10: */
}
zusedl = 1;
if (*dol > 1) {
/* Set lower bound for use of Z */
zusedl = *dol - 1;
}
zusedu = *m;
if (*dou < *m) {
/* Set lower bound for use of Z */
zusedu = *dou + 1;
}
/* The width of the part of Z that is used */
zusedw = zusedu - zusedl + 1;
slaset_("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
eps = slamch_("Precision");
rqtol = eps * 2.f;
/* Set expert flags for standard code. */
tryrqc = TRUE_;
if (*dol == 1 && *dou == *m) {
} else {
/* Only selected eigenpairs are computed. Since the other evalues */
/* are not refined by RQ iteration, bisection has to compute to full */
/* accuracy. */
*rtol1 = eps * 4.f;
*rtol2 = eps * 4.f;
}
/* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
/* desired eigenvalues. The support of the nonzero eigenvector */
/* entries is contained in the interval IBEGIN:IEND. */
/* Remark that if k eigenpairs are desired, then the eigenvectors */
/* are stored in k contiguous columns of Z. */
/* DONE is the number of eigenvectors already computed */
done = 0;
ibegin = 1;
wbegin = 1;
i__1 = iblock[*m];
for (jblk = 1; jblk <= i__1; ++jblk) {
iend = isplit[jblk];
sigma = l[iend];
/* Find the eigenvectors of the submatrix indexed IBEGIN */
/* through IEND. */
wend = wbegin - 1;
L15:
if (wend < *m) {
if (iblock[wend + 1] == jblk) {
++wend;
goto L15;
}
}
if (wend < wbegin) {
ibegin = iend + 1;
goto L170;
} else if (wend < *dol || wbegin > *dou) {
ibegin = iend + 1;
wbegin = wend + 1;
goto L170;
}
/* Find local spectral diameter of the block */
gl = gers[(ibegin << 1) - 1];
gu = gers[ibegin * 2];
i__2 = iend;
for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
/* Computing MIN */
r__1 = gers[(i__ << 1) - 1];
gl = f2cmin(r__1,gl);
/* Computing MAX */
r__1 = gers[i__ * 2];
gu = f2cmax(r__1,gu);
/* L20: */
}
spdiam = gu - gl;
/* OLDIEN is the last index of the previous block */
oldien = ibegin - 1;
/* Calculate the size of the current block */
in = iend - ibegin + 1;
/* The number of eigenvalues in the current block */
im = wend - wbegin + 1;
/* This is for a 1x1 block */
if (ibegin == iend) {
++done;
z__[ibegin + wbegin * z_dim1] = 1.f;
isuppz[(wbegin << 1) - 1] = ibegin;
isuppz[wbegin * 2] = ibegin;
w[wbegin] += sigma;
work[wbegin] = w[wbegin];
ibegin = iend + 1;
++wbegin;
goto L170;
}
/* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
/* Note that these can be approximations, in this case, the corresp. */
/* entries of WERR give the size of the uncertainty interval. */
/* The eigenvalue approximations will be refined when necessary as */
/* high relative accuracy is required for the computation of the */
/* corresponding eigenvectors. */
scopy_(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
/* We store in W the eigenvalue approximations w.r.t. the original */
/* matrix T. */
i__2 = im;
for (i__ = 1; i__ <= i__2; ++i__) {
w[wbegin + i__ - 1] += sigma;
/* L30: */
}
/* NDEPTH is the current depth of the representation tree */
ndepth = 0;
/* PARITY is either 1 or 0 */
parity = 1;
/* NCLUS is the number of clusters for the next level of the */
/* representation tree, we start with NCLUS = 1 for the root */
nclus = 1;
iwork[iindc1 + 1] = 1;
iwork[iindc1 + 2] = im;
/* IDONE is the number of eigenvectors already computed in the current */
/* block */
idone = 0;
/* loop while( IDONE.LT.IM ) */
/* generate the representation tree for the current block and */
/* compute the eigenvectors */
L40:
if (idone < im) {
/* This is a crude protection against infinitely deep trees */
if (ndepth > *m) {
*info = -2;
return;
}
/* breadth first processing of the current level of the representation */
/* tree: OLDNCL = number of clusters on current level */
oldncl = nclus;
/* reset NCLUS to count the number of child clusters */
nclus = 0;
parity = 1 - parity;
if (parity == 0) {
oldcls = iindc1;
newcls = iindc2;
} else {
oldcls = iindc2;
newcls = iindc1;
}
/* Process the clusters on the current level */
i__2 = oldncl;
for (i__ = 1; i__ <= i__2; ++i__) {
j = oldcls + (i__ << 1);
/* OLDFST, OLDLST = first, last index of current cluster. */
/* cluster indices start with 1 and are relative */
/* to WBEGIN when accessing W, WGAP, WERR, Z */
oldfst = iwork[j - 1];
oldlst = iwork[j];
if (ndepth > 0) {
/* Retrieve relatively robust representation (RRR) of cluster */
/* that has been computed at the previous level */
/* The RRR is stored in Z and overwritten once the eigenvectors */
/* have been computed or when the cluster is refined */
if (*dol == 1 && *dou == *m) {
/* Get representation from location of the leftmost evalue */
/* of the cluster */
j = wbegin + oldfst - 1;
} else {
if (wbegin + oldfst - 1 < *dol) {
/* Get representation from the left end of Z array */
j = *dol - 1;
} else if (wbegin + oldfst - 1 > *dou) {
/* Get representation from the right end of Z array */
j = *dou;
} else {
j = wbegin + oldfst - 1;
}
}
scopy_(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
, &c__1);
i__3 = in - 1;
scopy_(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
ibegin], &c__1);
sigma = z__[iend + (j + 1) * z_dim1];
/* Set the corresponding entries in Z to zero */
slaset_("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j
* z_dim1], ldz);
}
/* Compute DL and DLL of current RRR */
i__3 = iend - 1;
for (j = ibegin; j <= i__3; ++j) {
tmp = d__[j] * l[j];
work[indld - 1 + j] = tmp;
work[indlld - 1 + j] = tmp * l[j];
/* L50: */
}
if (ndepth > 0) {
/* P and Q are index of the first and last eigenvalue to compute */
/* within the current block */
p = indexw[wbegin - 1 + oldfst];
q = indexw[wbegin - 1 + oldlst];
/* Offset for the arrays WORK, WGAP and WERR, i.e., the P-OFFSET */
/* through the Q-OFFSET elements of these arrays are to be used. */
/* OFFSET = P-OLDFST */
offset = indexw[wbegin] - 1;
/* perform limited bisection (if necessary) to get approximate */
/* eigenvalues to the precision needed. */
slarrb_(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p,
&q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
wbegin], &werr[wbegin], &work[indwrk], &iwork[
iindwk], pivmin, &spdiam, &in, &iinfo);
if (iinfo != 0) {
*info = -1;
return;
}
/* We also recompute the extremal gaps. W holds all eigenvalues */
/* of the unshifted matrix and must be used for computation */
/* of WGAP, the entries of WORK might stem from RRRs with */
/* different shifts. The gaps from WBEGIN-1+OLDFST to */
/* WBEGIN-1+OLDLST are correctly computed in SLARRB. */
/* However, we only allow the gaps to become greater since */
/* this is what should happen when we decrease WERR */
if (oldfst > 1) {
/* Computing MAX */
r__1 = wgap[wbegin + oldfst - 2], r__2 = w[wbegin +
oldfst - 1] - werr[wbegin + oldfst - 1] - w[
wbegin + oldfst - 2] - werr[wbegin + oldfst -
2];
wgap[wbegin + oldfst - 2] = f2cmax(r__1,r__2);
}
if (wbegin + oldlst - 1 < wend) {
/* Computing MAX */
r__1 = wgap[wbegin + oldlst - 1], r__2 = w[wbegin +
oldlst] - werr[wbegin + oldlst] - w[wbegin +
oldlst - 1] - werr[wbegin + oldlst - 1];
wgap[wbegin + oldlst - 1] = f2cmax(r__1,r__2);
}
/* Each time the eigenvalues in WORK get refined, we store */
/* the newly found approximation with all shifts applied in W */
i__3 = oldlst;
for (j = oldfst; j <= i__3; ++j) {
w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
/* L53: */
}
}
/* Process the current node. */
newfst = oldfst;
i__3 = oldlst;
for (j = oldfst; j <= i__3; ++j) {
if (j == oldlst) {
/* we are at the right end of the cluster, this is also the */
/* boundary of the child cluster */
newlst = j;
} else if (wgap[wbegin + j - 1] >= *minrgp * (r__1 = work[
wbegin + j - 1], abs(r__1))) {
/* the right relative gap is big enough, the child cluster */
/* (NEWFST,..,NEWLST) is well separated from the following */
newlst = j;
} else {
/* inside a child cluster, the relative gap is not */
/* big enough. */
goto L140;
}
/* Compute size of child cluster found */
newsiz = newlst - newfst + 1;
/* NEWFTT is the place in Z where the new RRR or the computed */
/* eigenvector is to be stored */
if (*dol == 1 && *dou == *m) {
/* Store representation at location of the leftmost evalue */
/* of the cluster */
newftt = wbegin + newfst - 1;
} else {
if (wbegin + newfst - 1 < *dol) {
/* Store representation at the left end of Z array */
newftt = *dol - 1;
} else if (wbegin + newfst - 1 > *dou) {
/* Store representation at the right end of Z array */
newftt = *dou;
} else {
newftt = wbegin + newfst - 1;
}
}
if (newsiz > 1) {
/* Current child is not a singleton but a cluster. */
/* Compute and store new representation of child. */
/* Compute left and right cluster gap. */
/* LGAP and RGAP are not computed from WORK because */
/* the eigenvalue approximations may stem from RRRs */
/* different shifts. However, W hold all eigenvalues */
/* of the unshifted matrix. Still, the entries in WGAP */
/* have to be computed from WORK since the entries */
/* in W might be of the same order so that gaps are not */
/* exhibited correctly for very close eigenvalues. */
if (newfst == 1) {
/* Computing MAX */
r__1 = 0.f, r__2 = w[wbegin] - werr[wbegin] - *vl;
lgap = f2cmax(r__1,r__2);
} else {
lgap = wgap[wbegin + newfst - 2];
}
rgap = wgap[wbegin + newlst - 1];
/* Compute left- and rightmost eigenvalue of child */
/* to high precision in order to shift as close */
/* as possible and obtain as large relative gaps */
/* as possible */
for (k = 1; k <= 2; ++k) {
if (k == 1) {
p = indexw[wbegin - 1 + newfst];
} else {
p = indexw[wbegin - 1 + newlst];
}
offset = indexw[wbegin] - 1;
slarrb_(&in, &d__[ibegin], &work[indlld + ibegin
- 1], &p, &p, &rqtol, &rqtol, &offset, &
work[wbegin], &wgap[wbegin], &werr[wbegin]
, &work[indwrk], &iwork[iindwk], pivmin, &
spdiam, &in, &iinfo);
/* L55: */
}
if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1
> *dou) {
/* if the cluster contains no desired eigenvalues */
/* skip the computation of that branch of the rep. tree */
/* We could skip before the refinement of the extremal */
/* eigenvalues of the child, but then the representation */
/* tree could be different from the one when nothing is */
/* skipped. For this reason we skip at this place. */
idone = idone + newlst - newfst + 1;
goto L139;
}
/* Compute RRR of child cluster. */
/* Note that the new RRR is stored in Z */
/* SLARRF needs LWORK = 2*N */
slarrf_(&in, &d__[ibegin], &l[ibegin], &work[indld +
ibegin - 1], &newfst, &newlst, &work[wbegin],
&wgap[wbegin], &werr[wbegin], &spdiam, &lgap,
&rgap, pivmin, &tau, &z__[ibegin + newftt *
z_dim1], &z__[ibegin + (newftt + 1) * z_dim1],
&work[indwrk], &iinfo);
if (iinfo == 0) {
/* a new RRR for the cluster was found by SLARRF */
/* update shift and store it */
ssigma = sigma + tau;
z__[iend + (newftt + 1) * z_dim1] = ssigma;
/* WORK() are the midpoints and WERR() the semi-width */
/* Note that the entries in W are unchanged. */
i__4 = newlst;
for (k = newfst; k <= i__4; ++k) {
fudge = eps * 3.f * (r__1 = work[wbegin + k -
1], abs(r__1));
work[wbegin + k - 1] -= tau;
fudge += eps * 4.f * (r__1 = work[wbegin + k
- 1], abs(r__1));
/* Fudge errors */
werr[wbegin + k - 1] += fudge;
/* Gaps are not fudged. Provided that WERR is small */
/* when eigenvalues are close, a zero gap indicates */
/* that a new representation is needed for resolving */
/* the cluster. A fudge could lead to a wrong decision */
/* of judging eigenvalues 'separated' which in */
/* reality are not. This could have a negative impact */
/* on the orthogonality of the computed eigenvectors. */
/* L116: */
}
++nclus;
k = newcls + (nclus << 1);
iwork[k - 1] = newfst;
iwork[k] = newlst;
} else {
*info = -2;
return;
}
} else {
/* Compute eigenvector of singleton */
iter = 0;
tol = log((real) in) * 4.f * eps;
k = newfst;
windex = wbegin + k - 1;
/* Computing MAX */
i__4 = windex - 1;
windmn = f2cmax(i__4,1);
/* Computing MIN */
i__4 = windex + 1;
windpl = f2cmin(i__4,*m);
lambda = work[windex];
++done;
/* Check if eigenvector computation is to be skipped */
if (windex < *dol || windex > *dou) {
eskip = TRUE_;
goto L125;
} else {
eskip = FALSE_;
}
left = work[windex] - werr[windex];
right = work[windex] + werr[windex];
indeig = indexw[windex];
/* Note that since we compute the eigenpairs for a child, */
/* all eigenvalue approximations are w.r.t the same shift. */
/* In this case, the entries in WORK should be used for */
/* computing the gaps since they exhibit even very small */
/* differences in the eigenvalues, as opposed to the */
/* entries in W which might "look" the same. */
if (k == 1) {
/* In the case RANGE='I' and with not much initial */
/* accuracy in LAMBDA and VL, the formula */
/* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
/* can lead to an overestimation of the left gap and */
/* thus to inadequately early RQI 'convergence'. */
/* Prevent this by forcing a small left gap. */
/* Computing MAX */
r__1 = abs(left), r__2 = abs(right);
lgap = eps * f2cmax(r__1,r__2);
} else {
lgap = wgap[windmn];
}
if (k == im) {
/* In the case RANGE='I' and with not much initial */
/* accuracy in LAMBDA and VU, the formula */
/* can lead to an overestimation of the right gap and */
/* thus to inadequately early RQI 'convergence'. */
/* Prevent this by forcing a small right gap. */
/* Computing MAX */
r__1 = abs(left), r__2 = abs(right);
rgap = eps * f2cmax(r__1,r__2);
} else {
rgap = wgap[windex];
}
gap = f2cmin(lgap,rgap);
if (k == 1 || k == im) {
/* The eigenvector support can become wrong */
/* because significant entries could be cut off due to a */
/* large GAPTOL parameter in LAR1V. Prevent this. */
gaptol = 0.f;
} else {
gaptol = gap * eps;
}
isupmn = in;
isupmx = 1;
/* Update WGAP so that it holds the minimum gap */
/* to the left or the right. This is crucial in the */
/* case where bisection is used to ensure that the */
/* eigenvalue is refined up to the required precision. */
/* The correct value is restored afterwards. */
savgap = wgap[windex];
wgap[windex] = gap;
/* We want to use the Rayleigh Quotient Correction */
/* as often as possible since it converges quadratically */
/* when we are close enough to the desired eigenvalue. */
/* However, the Rayleigh Quotient can have the wrong sign */
/* and lead us away from the desired eigenvalue. In this */
/* case, the best we can do is to use bisection. */
usedbs = FALSE_;
usedrq = FALSE_;
/* Bisection is initially turned off unless it is forced */
needbs = ! tryrqc;
L120:
/* Check if bisection should be used to refine eigenvalue */
if (needbs) {
/* Take the bisection as new iterate */
usedbs = TRUE_;
itmp1 = iwork[iindr + windex];
offset = indexw[wbegin] - 1;
r__1 = eps * 2.f;
slarrb_(&in, &d__[ibegin], &work[indlld + ibegin
- 1], &indeig, &indeig, &c_b5, &r__1, &
offset, &work[wbegin], &wgap[wbegin], &
werr[wbegin], &work[indwrk], &iwork[
iindwk], pivmin, &spdiam, &itmp1, &iinfo);
if (iinfo != 0) {
*info = -3;
return;
}
lambda = work[windex];
/* Reset twist index from inaccurate LAMBDA to */
/* force computation of true MINGMA */
iwork[iindr + windex] = 0;
}
/* Given LAMBDA, compute the eigenvector. */
L__1 = ! usedbs;
slar1v_(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
ibegin], &work[indld + ibegin - 1], &work[
indlld + ibegin - 1], pivmin, &gaptol, &z__[
ibegin + windex * z_dim1], &L__1, &negcnt, &
ztz, &mingma, &iwork[iindr + windex], &isuppz[
(windex << 1) - 1], &nrminv, &resid, &rqcorr,
&work[indwrk]);
if (iter == 0) {
bstres = resid;
bstw = lambda;
} else if (resid < bstres) {
bstres = resid;
bstw = lambda;
}
/* Computing MIN */
i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
isupmn = f2cmin(i__4,i__5);
/* Computing MAX */
i__4 = isupmx, i__5 = isuppz[windex * 2];
isupmx = f2cmax(i__4,i__5);
++iter;
/* sin alpha <= |resid|/gap */
/* Note that both the residual and the gap are */
/* proportional to the matrix, so ||T|| doesn't play */
/* a role in the quotient */
/* Convergence test for Rayleigh-Quotient iteration */
/* (omitted when Bisection has been used) */
if (resid > tol * gap && abs(rqcorr) > rqtol * abs(
lambda) && ! usedbs) {
/* We need to check that the RQCORR update doesn't */
/* move the eigenvalue away from the desired one and */
/* towards a neighbor. -> protection with bisection */
if (indeig <= negcnt) {
/* The wanted eigenvalue lies to the left */
sgndef = -1.f;
} else {
/* The wanted eigenvalue lies to the right */
sgndef = 1.f;
}
/* We only use the RQCORR if it improves the */
/* the iterate reasonably. */
if (rqcorr * sgndef >= 0.f && lambda + rqcorr <=
right && lambda + rqcorr >= left) {
usedrq = TRUE_;
/* Store new midpoint of bisection interval in WORK */
if (sgndef == 1.f) {
/* The current LAMBDA is on the left of the true */
/* eigenvalue */
left = lambda;
/* We prefer to assume that the error estimate */
/* is correct. We could make the interval not */
/* as a bracket but to be modified if the RQCORR */
/* chooses to. In this case, the RIGHT side should */
/* be modified as follows: */
/* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
} else {
/* The current LAMBDA is on the right of the true */
/* eigenvalue */
right = lambda;
/* See comment about assuming the error estimate is */
/* correct above. */
/* LEFT = MIN(LEFT, LAMBDA + RQCORR) */
}
work[windex] = (right + left) * .5f;
/* Take RQCORR since it has the correct sign and */
/* improves the iterate reasonably */
lambda += rqcorr;
/* Update width of error interval */
werr[windex] = (right - left) * .5f;
} else {
needbs = TRUE_;
}
if (right - left < rqtol * abs(lambda)) {
/* The eigenvalue is computed to bisection accuracy */
/* compute eigenvector and stop */
usedbs = TRUE_;
goto L120;
} else if (iter < 10) {
goto L120;
} else if (iter == 10) {
needbs = TRUE_;
goto L120;
} else {
*info = 5;
return;
}
} else {
stp2ii = FALSE_;
if (usedrq && usedbs && bstres <= resid) {
lambda = bstw;
stp2ii = TRUE_;
}
if (stp2ii) {
/* improve error angle by second step */
L__1 = ! usedbs;
slar1v_(&in, &c__1, &in, &lambda, &d__[ibegin]
, &l[ibegin], &work[indld + ibegin -
1], &work[indlld + ibegin - 1],
pivmin, &gaptol, &z__[ibegin + windex
* z_dim1], &L__1, &negcnt, &ztz, &
mingma, &iwork[iindr + windex], &
isuppz[(windex << 1) - 1], &nrminv, &
resid, &rqcorr, &work[indwrk]);
}
work[windex] = lambda;
}
/* Compute FP-vector support w.r.t. whole matrix */
isuppz[(windex << 1) - 1] += oldien;
isuppz[windex * 2] += oldien;
zfrom = isuppz[(windex << 1) - 1];
zto = isuppz[windex * 2];
isupmn += oldien;
isupmx += oldien;
/* Ensure vector is ok if support in the RQI has changed */
if (isupmn < zfrom) {
i__4 = zfrom - 1;
for (ii = isupmn; ii <= i__4; ++ii) {
z__[ii + windex * z_dim1] = 0.f;
/* L122: */
}
}
if (isupmx > zto) {
i__4 = isupmx;
for (ii = zto + 1; ii <= i__4; ++ii) {
z__[ii + windex * z_dim1] = 0.f;
/* L123: */
}
}
i__4 = zto - zfrom + 1;
sscal_(&i__4, &nrminv, &z__[zfrom + windex * z_dim1],
&c__1);
L125:
/* Update W */
w[windex] = lambda + sigma;
/* Recompute the gaps on the left and right */
/* But only allow them to become larger and not */
/* smaller (which can only happen through "bad" */
/* cancellation and doesn't reflect the theory */
/* where the initial gaps are underestimated due */
/* to WERR being too crude.) */
if (! eskip) {
if (k > 1) {
/* Computing MAX */
r__1 = wgap[windmn], r__2 = w[windex] - werr[
windex] - w[windmn] - werr[windmn];
wgap[windmn] = f2cmax(r__1,r__2);
}
if (windex < wend) {
/* Computing MAX */
r__1 = savgap, r__2 = w[windpl] - werr[windpl]
- w[windex] - werr[windex];
wgap[windex] = f2cmax(r__1,r__2);
}
}
++idone;
}
/* here ends the code for the current child */
L139:
/* Proceed to any remaining child nodes */
newfst = j + 1;
L140:
;
}
/* L150: */
}
++ndepth;
goto L40;
}
ibegin = iend + 1;
wbegin = wend + 1;
L170:
;
}
return;
/* End of SLARRV */
} /* slarrv_ */