diff --git a/lapack-netlib/SRC/ctrsyl3.c b/lapack-netlib/SRC/ctrsyl3.c index d05923a46..70f265a14 100644 --- a/lapack-netlib/SRC/ctrsyl3.c +++ b/lapack-netlib/SRC/ctrsyl3.c @@ -157,6 +157,7 @@ struct Namelist { }; typedef struct Namelist Namelist; +#define exponent(x) #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (fabs(x)) #define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) @@ -233,7 +234,9 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define myhuge_(w) HUGE_VAL //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} #define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) +#define myexp_(w) my_expfunc(w) +static int my_expfunc(float *x) {int e; (void)frexpf(*x,&e); return e;} /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 @@ -379,3 +382,1518 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ pCd(z) = zdotc; } #endif +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static complex c_b1 = {1.f,0.f}; +static integer c__1 = 1; +static integer c_n1 = -1; +static real c_b18 = 2.f; +static real c_b106 = 1.f; + +/* > \brief \b CTRSYL3 */ + +/* Definition: */ +/* =========== */ + + +/* > \par Purpose */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CTRSYL3 solves the complex Sylvester matrix equation: */ +/* > */ +/* > op(A)*X + X*op(B) = scale*C or */ +/* > op(A)*X - X*op(B) = scale*C, */ +/* > */ +/* > where op(A) = A or A**H, and A and B are both upper triangular. A is */ +/* > M-by-M and B is N-by-N; the right hand side C and the solution X are */ +/* > M-by-N; and scale is an output scale factor, set <= 1 to avoid */ +/* > overflow in X. */ +/* > */ +/* > This is the block version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments */ +/* ========= */ + +/* > \param[in] TRANA */ +/* > \verbatim */ +/* > TRANA is CHARACTER*1 */ +/* > Specifies the option op(A): */ +/* > = 'N': op(A) = A (No transpose) */ +/* > = 'C': op(A) = A**H (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANB */ +/* > \verbatim */ +/* > TRANB is CHARACTER*1 */ +/* > Specifies the option op(B): */ +/* > = 'N': op(B) = B (No transpose) */ +/* > = 'C': op(B) = B**H (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ISGN */ +/* > \verbatim */ +/* > ISGN is INTEGER */ +/* > Specifies the sign in the equation: */ +/* > = +1: solve op(A)*X + X*op(B) = scale*C */ +/* > = -1: solve op(A)*X - X*op(B) = scale*C */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The order of the matrix A, and the number of rows in the */ +/* > matrices X and C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix B, and the number of columns in the */ +/* > matrices X and C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,M) */ +/* > The upper triangular matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,N) */ +/* > The upper triangular matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX array, dimension (LDC,N) */ +/* > On entry, the M-by-N right hand side matrix C. */ +/* > On exit, C is overwritten by the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is REAL */ +/* > The scale factor, scale, set <= 1 to avoid overflow in X. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SWORK */ +/* > \verbatim */ +/* > SWORK is REAL array, dimension (MAX(2, ROWS), MAX(1,COLS)). */ +/* > On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS */ +/* > and SWORK(2) returns the optimal COLS. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDSWORK */ +/* > \verbatim */ +/* > LDSWORK is INTEGER */ +/* > LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1) */ +/* > and NB is the optimal block size. */ +/* > */ +/* > If LDSWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal dimensions of the SWORK matrix, */ +/* > returns these values as the first and second entry of the SWORK */ +/* > matrix, and no error message related LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > = 1: A and B have common or very close eigenvalues; perturbed */ +/* > values were used to solve the equation (but the matrices */ +/* > A and B are unchanged). */ +/* > \endverbatim */ + +/* > \ingroup complexSYcomputational */ + +/* ===================================================================== */ +/* References: */ +/* E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of */ +/* algorithms: The triangular Sylvester equation, ACM Transactions */ +/* on Mathematical Software (TOMS), volume 29, pages 218--243. */ + +/* A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel */ +/* Solution of the Triangular Sylvester Equation. Lecture Notes in */ +/* Computer Science, vol 12043, pages 82--92, Springer. */ + +/* Contributor: */ +/* Angelika Schwarz, Umea University, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int ctrsyl3_(char *trana, char *tranb, integer *isgn, + integer *m, integer *n, complex *a, integer *lda, complex *b, integer + *ldb, complex *c__, integer *ldc, real *scale, real *swork, integer * + ldswork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, swork_dim1, + swork_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1, r__2, r__3, r__4; + complex q__1; + + /* Local variables */ + real scal; + complex csgn; + real anrm, bnrm, cnrm; + integer awrk, bwrk; + real *wnrm, xnrm; + integer i__, j, k, l; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + extern logical lsame_(char *, char *); + integer iinfo, i1, i2, j1, j2, k1, k2, l1, l2; +// extern integer myexp_(real *); + integer nb, jj, ll; + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + real scaloc; + extern real slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *); + real scamin; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + real bignum; + extern real slarmm_(real *, real *, real *); + logical notrna, notrnb; + real smlnum; + extern /* Subroutine */ int ctrsyl_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, real *, integer *); + logical lquery; + integer nba, nbb; + real buf, sgn; + + + +/* Decode and Test input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + swork_dim1 = *ldswork; + swork_offset = 1 + swork_dim1 * 1; + swork -= swork_offset; + + /* Function Body */ + notrna = lsame_(trana, "N"); + notrnb = lsame_(tranb, "N"); + +/* Use the same block size for all matrices. */ + +/* Computing MAX */ + i__1 = 8, i__2 = ilaenv_(&c__1, "CTRSYL", "", m, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)0); + nb = f2cmax(i__1,i__2); + +/* Compute number of blocks in A and B */ + +/* Computing MAX */ + i__1 = 1, i__2 = (*m + nb - 1) / nb; + nba = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = (*n + nb - 1) / nb; + nbb = f2cmax(i__1,i__2); + +/* Compute workspace */ + + *info = 0; + lquery = *ldswork == -1; + if (lquery) { + *ldswork = 2; + swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba); + } + +/* Test the input arguments */ + + if (! notrna && ! lsame_(trana, "C")) { + *info = -1; + } else if (! notrnb && ! lsame_(tranb, "C")) { + *info = -2; + } else if (*isgn != 1 && *isgn != -1) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*m)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldc < f2cmax(1,*m)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CTRSYL3", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *scale = 1.f; + if (*m == 0 || *n == 0) { + return 0; + } + + wnrm = (real*)malloc(f2cmax(*m,*n)*sizeof(real)); +/* Use unblocked code for small problems or if insufficient */ +/* workspace is provided */ + + if (f2cmin(nba,nbb) == 1 || *ldswork < f2cmax(nba,nbb)) { + ctrsyl_(trana, tranb, isgn, m, n, &a[a_offset], lda, &b[b_offset], + ldb, &c__[c_offset], ldc, scale, info); + return 0; + } + +/* Set constants to control overflow */ + + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + +/* Set local scaling factors. */ + + i__1 = nbb; + for (l = 1; l <= i__1; ++l) { + i__2 = nba; + for (k = 1; k <= i__2; ++k) { + swork[k + l * swork_dim1] = 1.f; + } + } + +/* Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero. */ +/* This scaling is to ensure compatibility with TRSYL and may get flushed. */ + + buf = 1.f; + +/* Compute upper bounds of blocks of A and B */ + + awrk = nbb; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*m) + 1; + i__2 = nba; + for (l = k; l <= i__2; ++l) { + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__3 = l * nb; + l2 = f2cmin(i__3,*m) + 1; + if (notrna) { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[k + (awrk + l) * swork_dim1] = clange_("I", &i__3, & + i__4, &a[k1 + l1 * a_dim1], lda, wnrm); + } else { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[l + (awrk + k) * swork_dim1] = clange_("1", &i__3, & + i__4, &a[k1 + l1 * a_dim1], lda, wnrm); + } + } + } + bwrk = nbb + nba; + i__1 = nbb; + for (k = 1; k <= i__1; ++k) { + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*n) + 1; + i__2 = nbb; + for (l = k; l <= i__2; ++l) { + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__3 = l * nb; + l2 = f2cmin(i__3,*n) + 1; + if (notrnb) { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[k + (bwrk + l) * swork_dim1] = clange_("I", &i__3, & + i__4, &b[k1 + l1 * b_dim1], ldb, wnrm); + } else { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[l + (bwrk + k) * swork_dim1] = clange_("1", &i__3, & + i__4, &b[k1 + l1 * b_dim1], ldb, wnrm); + } + } + } + + sgn = (real) (*isgn); + q__1.r = sgn, q__1.i = 0.f; + csgn.r = q__1.r, csgn.i = q__1.i; + + if (notrna && notrnb) { + +/* Solve A*X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-left corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* M L-1 */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */ +/* I=K+1 J=1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + for (k = nba; k >= 1; --k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__1 = k * nb; + k2 = f2cmin(i__1,*m) + 1; + i__1 = nbb; + for (l = 1; l <= i__1; ++l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__2 = l * nb; + l2 = f2cmin(i__2,*n) + 1; + + i__2 = k2 - k1; + i__3 = l2 - l1; + ctrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.f) { + if (scaloc == 0.f) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.f; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__2); + } + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] + / pow_ri(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__2 = k2 - k1; + i__3 = l2 - l1; + xnrm = clange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + for (i__ = k - 1; i__ >= 1; --i__) { + +/* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */ + + i1 = (i__ - 1) * nb + 1; +/* Computing MIN */ + i__2 = i__ * nb; + i2 = f2cmin(i__2,*m) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = i2 - i1; + i__3 = l2 - l1; + cnrm = clange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = slarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_ri(&c_b18, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b18, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (jj = l1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + csscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = i2 - i1; + csscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__2 = i2 - i1; + i__3 = l2 - l1; + i__4 = k2 - k1; + q__1.r = -1.f, q__1.i = 0.f; + cgemm_("N", "N", &i__2, &i__3, &i__4, &q__1, &a[i1 + k1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, + &c__[i1 + l1 * c_dim1], ldc) + ; + + } + + i__2 = nbb; + for (j = l + 1; j <= i__2; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */ + + j1 = (j - 1) * nb + 1; +/* Computing MIN */ + i__3 = j * nb; + j2 = f2cmin(i__3,*n) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = k2 - k1; + i__4 = j2 - j1; + cnrm = clange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = slarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b18, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_ri(&c_b18, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b18, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + csscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = j2 - 1; + for (jj = j1; jj <= i__3; ++jj) { + i__4 = k2 - k1; + csscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__3 = k2 - k1; + i__4 = j2 - j1; + i__5 = l2 - l1; + q__1.r = -csgn.r, q__1.i = -csgn.i; + cgemm_("N", "N", &i__3, &i__4, &i__5, &q__1, &c__[k1 + l1 + * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b1, + &c__[k1 + j1 * c_dim1], ldc) + ; + } + } + } + } else if (! notrna && notrnb) { + +/* Solve A**H *X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* upper-left corner column by column by */ + +/* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 L-1 */ +/* R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */ +/* I=1 J=1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*m) + 1; + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__3 = l * nb; + l2 = f2cmin(i__3,*n) + 1; + + i__3 = k2 - k1; + i__4 = l2 - l1; + ctrsyl_(trana, tranb, isgn, &i__3, &i__4, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.f) { + if (scaloc == 0.f) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.f; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__3); + } + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__5 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] + / pow_ri(&c_b18, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__3 = k2 - k1; + i__4 = l2 - l1; + xnrm = clange_("I", &i__3, &i__4, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__3 = nba; + for (i__ = k + 1; i__ <= i__3; ++i__) { + +/* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L ) */ + + i1 = (i__ - 1) * nb + 1; +/* Computing MIN */ + i__4 = i__ * nb; + i2 = f2cmin(i__4,*m) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__4 = i2 - i1; + i__5 = l2 - l1; + cnrm = clange_("I", &i__4, &i__5, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = slarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__4 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__4); + i__4 = nbb; + for (jj = 1; jj <= i__4; ++jj) { + i__5 = nba; + for (ll = 1; ll <= i__5; ++ll) { +/* Computing MIN */ + i__6 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b18, &i__6); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__4 = myexp_(&scaloc); + scamin /= pow_ri(&c_b18, &i__4); + i__4 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b18, &i__4); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to to C( I, L ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = k2 - k1; + csscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = i2 - i1; + csscal_(&i__5, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__4 = i2 - i1; + i__5 = l2 - l1; + i__6 = k2 - k1; + q__1.r = -1.f, q__1.i = 0.f; + cgemm_("C", "N", &i__4, &i__5, &i__6, &q__1, &a[k1 + i1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, + &c__[i1 + l1 * c_dim1], ldc) + ; + } + + i__3 = nbb; + for (j = l + 1; j <= i__3; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */ + + j1 = (j - 1) * nb + 1; +/* Computing MIN */ + i__4 = j * nb; + j2 = f2cmin(i__4,*n) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__4 = k2 - k1; + i__5 = j2 - j1; + cnrm = clange_("I", &i__4, &i__5, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = slarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__4 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__4); + i__4 = nbb; + for (jj = 1; jj <= i__4; ++jj) { + i__5 = nba; + for (ll = 1; ll <= i__5; ++ll) { +/* Computing MIN */ + i__6 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b18, &i__6); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__4 = myexp_(&scaloc); + scamin /= pow_ri(&c_b18, &i__4); + i__4 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b18, &i__4); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = k2 - k1; + csscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.f) { + i__4 = j2 - 1; + for (jj = j1; jj <= i__4; ++jj) { + i__5 = k2 - k1; + csscal_(&i__5, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__4 = k2 - k1; + i__5 = j2 - j1; + i__6 = l2 - l1; + q__1.r = -csgn.r, q__1.i = -csgn.i; + cgemm_("N", "N", &i__4, &i__5, &i__6, &q__1, &c__[k1 + l1 + * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b1, + &c__[k1 + j1 * c_dim1], ldc) + ; + } + } + } + } else if (! notrna && ! notrnb) { + +/* Solve A**H *X + ISGN*X*B**H = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* top-right corner column by column by */ + +/* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 N */ +/* R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H]. */ +/* I=1 J=L+1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*m) + 1; + for (l = nbb; l >= 1; --l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__2 = l * nb; + l2 = f2cmin(i__2,*n) + 1; + + i__2 = k2 - k1; + i__3 = l2 - l1; + ctrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.f) { + if (scaloc == 0.f) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.f; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__2); + } + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] + / pow_ri(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__2 = k2 - k1; + i__3 = l2 - l1; + xnrm = clange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__2 = nba; + for (i__ = k + 1; i__ <= i__2; ++i__) { + +/* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L ) */ + + i1 = (i__ - 1) * nb + 1; +/* Computing MIN */ + i__3 = i__ * nb; + i2 = f2cmin(i__3,*m) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = i2 - i1; + i__4 = l2 - l1; + cnrm = clange_("I", &i__3, &i__4, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = slarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b18, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_ri(&c_b18, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b18, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + csscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = i2 - i1; + csscal_(&i__4, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__3 = i2 - i1; + i__4 = l2 - l1; + i__5 = k2 - k1; + q__1.r = -1.f, q__1.i = 0.f; + cgemm_("C", "N", &i__3, &i__4, &i__5, &q__1, &a[k1 + i1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, + &c__[i1 + l1 * c_dim1], ldc) + ; + } + + i__2 = l - 1; + for (j = 1; j <= i__2; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H */ + + j1 = (j - 1) * nb + 1; +/* Computing MIN */ + i__3 = j * nb; + j2 = f2cmin(i__3,*n) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = k2 - k1; + i__4 = j2 - j1; + cnrm = clange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = slarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b18, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_ri(&c_b18, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b18, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + csscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = j2 - 1; + for (jj = j1; jj <= i__3; ++jj) { + i__4 = k2 - k1; + csscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__3 = k2 - k1; + i__4 = j2 - j1; + i__5 = l2 - l1; + q__1.r = -csgn.r, q__1.i = -csgn.i; + cgemm_("N", "C", &i__3, &i__4, &i__5, &q__1, &c__[k1 + l1 + * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b1, + &c__[k1 + j1 * c_dim1], ldc) + ; + } + } + } + } else if (notrna && ! notrnb) { + +/* Solve A*X + ISGN*X*B**H = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-right corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L) */ + +/* Where */ +/* M N */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H]. */ +/* I=K+1 J=L+1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + for (k = nba; k >= 1; --k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__1 = k * nb; + k2 = f2cmin(i__1,*m) + 1; + for (l = nbb; l >= 1; --l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__1 = l * nb; + l2 = f2cmin(i__1,*n) + 1; + + i__1 = k2 - k1; + i__2 = l2 - l1; + ctrsyl_(trana, tranb, isgn, &i__1, &i__2, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.f) { + if (scaloc == 0.f) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.f; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__1 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__1); + } + i__1 = nbb; + for (jj = 1; jj <= i__1; ++jj) { + i__2 = nba; + for (ll = 1; ll <= i__2; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__3 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] + / pow_ri(&c_b18, &i__3); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__1 = k2 - k1; + i__2 = l2 - l1; + xnrm = clange_("I", &i__1, &i__2, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */ + + i1 = (i__ - 1) * nb + 1; +/* Computing MIN */ + i__2 = i__ * nb; + i2 = f2cmin(i__2,*m) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = i2 - i1; + i__3 = l2 - l1; + cnrm = clange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = slarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_ri(&c_b18, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b18, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = k2 - k1; + csscal_(&i__3, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = i2 - i1; + csscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__2 = i2 - i1; + i__3 = l2 - l1; + i__4 = k2 - k1; + q__1.r = -1.f, q__1.i = 0.f; + cgemm_("N", "N", &i__2, &i__3, &i__4, &q__1, &a[i1 + k1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, + &c__[i1 + l1 * c_dim1], ldc) + ; + + } + + i__1 = l - 1; + for (j = 1; j <= i__1; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H */ + + j1 = (j - 1) * nb + 1; +/* Computing MIN */ + i__2 = j * nb; + j2 = f2cmin(i__2,*n) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = k2 - k1; + i__3 = j2 - j1; + cnrm = clange_("I", &i__2, &i__3, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = slarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b18, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_ri(&c_b18, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b18, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (jj = l1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + csscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = j2 - 1; + for (jj = j1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + csscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__2 = k2 - k1; + i__3 = j2 - j1; + i__4 = l2 - l1; + q__1.r = -csgn.r, q__1.i = -csgn.i; + cgemm_("N", "C", &i__2, &i__3, &i__4, &q__1, &c__[k1 + l1 + * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b1, + &c__[k1 + j1 * c_dim1], ldc) + ; + } + } + } + + } + + free(wnrm); + +/* Reduce local scaling factors */ + + *scale = swork[swork_dim1 + 1]; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { +/* Computing MIN */ + r__1 = *scale, r__2 = swork[k + l * swork_dim1]; + *scale = f2cmin(r__1,r__2); + } + } + if (*scale == 0.f) { + +/* The magnitude of the largest entry of the solution is larger */ +/* than the product of BIGNUM**2 and cannot be represented in the */ +/* form (1/SCALE)*X if SCALE is REAL. Set SCALE to */ +/* zero and give up. */ + + swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba); + return 0; + } + +/* Realize consistent scaling */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*m) + 1; + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__3 = l * nb; + l2 = f2cmin(i__3,*n) + 1; + scal = *scale / swork[k + l * swork_dim1]; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + csscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &c__1); + } + } + } + } + + if (buf != 1.f && buf > 0.f) { + +/* Decrease SCALE as much as possible. */ + +/* Computing MIN */ + r__1 = *scale / smlnum, r__2 = 1.f / buf; + scaloc = f2cmin(r__1,r__2); + buf *= scaloc; + *scale /= scaloc; + } + + if (buf != 1.f && buf > 0.f) { + +/* In case of overly aggressive scaling during the computation, */ +/* flushing of the global scale factor may be prevented by */ +/* undoing some of the scaling. This step is to ensure that */ +/* this routine flushes only scale factors that TRSYL also */ +/* flushes and be usable as a drop-in replacement. */ + +/* How much can the normwise largest entry be upscaled? */ + +/* Computing MAX */ + i__1 = c_dim1 + 1; + r__3 = (r__1 = c__[i__1].r, abs(r__1)), r__4 = (r__2 = r_imag(&c__[ + c_dim1 + 1]), abs(r__2)); + scal = f2cmax(r__3,r__4); + i__1 = *m; + for (k = 1; k <= i__1; ++k) { + i__2 = *n; + for (l = 1; l <= i__2; ++l) { +/* Computing MAX */ + i__3 = k + l * c_dim1; + r__3 = scal, r__4 = (r__1 = c__[i__3].r, abs(r__1)), r__3 = + f2cmax(r__3,r__4), r__4 = (r__2 = r_imag(&c__[k + l * + c_dim1]), abs(r__2)); + scal = f2cmax(r__3,r__4); + } + } + +/* Increase BUF as close to 1 as possible and apply scaling. */ + +/* Computing MIN */ + r__1 = bignum / scal, r__2 = 1.f / buf; + scaloc = f2cmin(r__1,r__2); + buf *= scaloc; + clascl_("G", &c_n1, &c_n1, &c_b106, &scaloc, m, n, &c__[c_offset], + ldc, &iinfo); + } + +/* Combine with buffer scaling factor. SCALE will be flushed if */ +/* BUF is less than one here. */ + + *scale *= buf; + +/* Restore workspace dimensions */ + + swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba); + + return 0; + +/* End of CTRSYL3 */ + +} /* ctrsyl3_ */ + diff --git a/lapack-netlib/SRC/dtrsyl3.c b/lapack-netlib/SRC/dtrsyl3.c index d05923a46..199baab75 100644 --- a/lapack-netlib/SRC/dtrsyl3.c +++ b/lapack-netlib/SRC/dtrsyl3.c @@ -157,6 +157,7 @@ struct Namelist { }; typedef struct Namelist Namelist; +#define exponent(x) #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (fabs(x)) #define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) @@ -233,7 +234,9 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define myhuge_(w) HUGE_VAL //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} #define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) +#define myexp_(w) my_expfunc(w) +static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;} /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 @@ -379,3 +382,1556 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ pCd(z) = zdotc; } #endif +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b19 = 2.; +static doublereal c_b31 = -1.; +static doublereal c_b32 = 1.; + +/* > \brief \b DTRSYL3 */ + +/* Definition: */ +/* =========== */ + + +/* > \par Purpose */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DTRSYL3 solves the real Sylvester matrix equation: */ +/* > */ +/* > op(A)*X + X*op(B) = scale*C or */ +/* > op(A)*X - X*op(B) = scale*C, */ +/* > */ +/* > where op(A) = A or A**T, and A and B are both upper quasi- */ +/* > triangular. A is M-by-M and B is N-by-N; the right hand side C and */ +/* > the solution X are M-by-N; and scale is an output scale factor, set */ +/* > <= 1 to avoid overflow in X. */ +/* > */ +/* > A and B must be in Schur canonical form (as returned by DHSEQR), that */ +/* > is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; */ +/* > each 2-by-2 diagonal block has its diagonal elements equal and its */ +/* > off-diagonal elements of opposite sign. */ +/* > */ +/* > This is the block version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments */ +/* ========= */ + +/* > \param[in] TRANA */ +/* > \verbatim */ +/* > TRANA is CHARACTER*1 */ +/* > Specifies the option op(A): */ +/* > = 'N': op(A) = A (No transpose) */ +/* > = 'T': op(A) = A**T (Transpose) */ +/* > = 'C': op(A) = A**H (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANB */ +/* > \verbatim */ +/* > TRANB is CHARACTER*1 */ +/* > Specifies the option op(B): */ +/* > = 'N': op(B) = B (No transpose) */ +/* > = 'T': op(B) = B**T (Transpose) */ +/* > = 'C': op(B) = B**H (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ISGN */ +/* > \verbatim */ +/* > ISGN is INTEGER */ +/* > Specifies the sign in the equation: */ +/* > = +1: solve op(A)*X + X*op(B) = scale*C */ +/* > = -1: solve op(A)*X - X*op(B) = scale*C */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The order of the matrix A, and the number of rows in the */ +/* > matrices X and C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix B, and the number of columns in the */ +/* > matrices X and C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,M) */ +/* > The upper quasi-triangular matrix A, in Schur canonical form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,N) */ +/* > The upper quasi-triangular matrix B, in Schur canonical form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N right hand side matrix C. */ +/* > On exit, C is overwritten by the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION */ +/* > The scale factor, scale, set <= 1 to avoid overflow in X. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER */ +/* > The dimension of the array IWORK. LIWORK >= ((M + NB - 1) / NB + 1) */ +/* > + ((N + NB - 1) / NB + 1), where NB is the optimal block size. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal dimension of the IWORK array, */ +/* > returns this value as the first entry of the IWORK array, and */ +/* > no error message related to LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SWORK */ +/* > \verbatim */ +/* > SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS), */ +/* > MAX(1,COLS)). */ +/* > On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS */ +/* > and SWORK(2) returns the optimal COLS. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDSWORK */ +/* > \verbatim */ +/* > LDSWORK is INTEGER */ +/* > LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1) */ +/* > and NB is the optimal block size. */ +/* > */ +/* > If LDSWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal dimensions of the SWORK matrix, */ +/* > returns these values as the first and second entry of the SWORK */ +/* > matrix, and no error message related LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > = 1: A and B have common or very close eigenvalues; perturbed */ +/* > values were used to solve the equation (but the matrices */ +/* > A and B are unchanged). */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* References: */ +/* E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of */ +/* algorithms: The triangular Sylvester equation, ACM Transactions */ +/* on Mathematical Software (TOMS), volume 29, pages 218--243. */ + +/* A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel */ +/* Solution of the Triangular Sylvester Equation. Lecture Notes in */ +/* Computer Science, vol 12043, pages 82--92, Springer. */ + +/* Contributor: */ +/* Angelika Schwarz, Umea University, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int dtrsyl3_(char *trana, char *tranb, integer *isgn, + integer *m, integer *n, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *c__, integer *ldc, doublereal *scale, + integer *iwork, integer *liwork, doublereal *swork, integer *ldswork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, swork_dim1, + swork_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3; + + /* Local variables */ + doublereal scal, anrm, bnrm, cnrm; + integer awrk, bwrk; + logical skip; + doublereal *wnrm, xnrm; + integer i__, j, k, l; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemm_(char *, char *, integer *, integer *, integer * + , doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + integer iinfo, i1, i2, j1, j2, k1, k2, l1; +// extern integer myexp_(doublereal *); + integer l2, nb, pc, jj, ll; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal scaloc, scamin; + extern doublereal dlarmm_(doublereal *, doublereal *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum; + logical notrna, notrnb; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int dtrsyl_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *); + integer nba, nbb; + doublereal buf, sgn; + + +/* Decode and Test input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --iwork; + swork_dim1 = *ldswork; + swork_offset = 1 + swork_dim1 * 1; + swork -= swork_offset; + + /* Function Body */ + notrna = lsame_(trana, "N"); + notrnb = lsame_(tranb, "N"); + +/* Use the same block size for all matrices. */ + +/* Computing MAX */ + i__1 = 8, i__2 = ilaenv_(&c__1, "DTRSYL", "", m, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)0); + nb = f2cmax(i__1,i__2); + +/* Compute number of blocks in A and B */ + +/* Computing MAX */ + i__1 = 1, i__2 = (*m + nb - 1) / nb; + nba = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = (*n + nb - 1) / nb; + nbb = f2cmax(i__1,i__2); + +/* Compute workspace */ + + *info = 0; + lquery = *liwork == -1 || *ldswork == -1; + iwork[1] = nba + nbb + 2; + if (lquery) { + *ldswork = 2; + swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba); + } + +/* Test the input arguments */ + + if (! notrna && ! lsame_(trana, "T") && ! lsame_( + trana, "C")) { + *info = -1; + } else if (! notrnb && ! lsame_(tranb, "T") && ! + lsame_(tranb, "C")) { + *info = -2; + } else if (*isgn != 1 && *isgn != -1) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*m)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldc < f2cmax(1,*m)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DTRSYL3", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *scale = 1.; + if (*m == 0 || *n == 0) { + return 0; + } + + wnrm = (doublereal*)malloc(f2cmax(*m,*n)*sizeof(doublereal)); +/* Use unblocked code for small problems or if insufficient */ +/* workspaces are provided */ + + if (f2cmin(nba,nbb) == 1 || *ldswork < f2cmax(nba,nbb) || *liwork < iwork[1]) { + dtrsyl_(trana, tranb, isgn, m, n, &a[a_offset], lda, &b[b_offset], + ldb, &c__[c_offset], ldc, scale, info); + return 0; + } + +/* Set constants to control overflow */ + + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + +/* Partition A such that 2-by-2 blocks on the diagonal are not split */ + + skip = FALSE_; + i__1 = nba; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = (i__ - 1) * nb + 1; + } + iwork[nba + 1] = *m + 1; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + l1 = iwork[k]; + l2 = iwork[k + 1] - 1; + i__2 = l2; + for (l = l1; l <= i__2; ++l) { + if (skip) { + skip = FALSE_; + mycycle_(); + } + if (l >= *m) { +/* A( M, M ) is a 1-by-1 block */ + mycycle_(); + } + if (a[l + (l + 1) * a_dim1] != 0. && a[l + 1 + l * a_dim1] != 0.) + { +/* Check if 2-by-2 block is split */ + if (l + 1 == iwork[k + 1]) { + ++iwork[k + 1]; + mycycle_(); + } + skip = TRUE_; + } + } + } + iwork[nba + 1] = *m + 1; + if (iwork[nba] >= iwork[nba + 1]) { + iwork[nba] = iwork[nba + 1]; + --nba; + } + +/* Partition B such that 2-by-2 blocks on the diagonal are not split */ + + pc = nba + 1; + skip = FALSE_; + i__1 = nbb; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[pc + i__] = (i__ - 1) * nb + 1; + } + iwork[pc + nbb + 1] = *n + 1; + i__1 = nbb; + for (k = 1; k <= i__1; ++k) { + l1 = iwork[pc + k]; + l2 = iwork[pc + k + 1] - 1; + i__2 = l2; + for (l = l1; l <= i__2; ++l) { + if (skip) { + skip = FALSE_; + mycycle_(); + } + if (l >= *n) { +/* B( N, N ) is a 1-by-1 block */ + mycycle_(); + } + if (b[l + (l + 1) * b_dim1] != 0. && b[l + 1 + l * b_dim1] != 0.) + { +/* Check if 2-by-2 block is split */ + if (l + 1 == iwork[pc + k + 1]) { + ++iwork[pc + k + 1]; + mycycle_(); + } + skip = TRUE_; + } + } + } + iwork[pc + nbb + 1] = *n + 1; + if (iwork[pc + nbb] >= iwork[pc + nbb + 1]) { + iwork[pc + nbb] = iwork[pc + nbb + 1]; + --nbb; + } + +/* Set local scaling factors - must never attain zero. */ + + i__1 = nbb; + for (l = 1; l <= i__1; ++l) { + i__2 = nba; + for (k = 1; k <= i__2; ++k) { + swork[k + l * swork_dim1] = 1.; + } + } + +/* Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero. */ +/* This scaling is to ensure compatibility with TRSYL and may get flushed. */ + + buf = 1.; + +/* Compute upper bounds of blocks of A and B */ + + awrk = nbb; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + k1 = iwork[k]; + k2 = iwork[k + 1]; + i__2 = nba; + for (l = k; l <= i__2; ++l) { + l1 = iwork[l]; + l2 = iwork[l + 1]; + if (notrna) { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[k + (awrk + l) * swork_dim1] = dlange_("I", &i__3, & + i__4, &a[k1 + l1 * a_dim1], lda, wnrm); + } else { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[l + (awrk + k) * swork_dim1] = dlange_("1", &i__3, & + i__4, &a[k1 + l1 * a_dim1], lda, wnrm); + } + } + } + bwrk = nbb + nba; + i__1 = nbb; + for (k = 1; k <= i__1; ++k) { + k1 = iwork[pc + k]; + k2 = iwork[pc + k + 1]; + i__2 = nbb; + for (l = k; l <= i__2; ++l) { + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + if (notrnb) { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[k + (bwrk + l) * swork_dim1] = dlange_("I", &i__3, & + i__4, &b[k1 + l1 * b_dim1], ldb, wnrm); + } else { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[l + (bwrk + k) * swork_dim1] = dlange_("1", &i__3, & + i__4, &b[k1 + l1 * b_dim1], ldb, wnrm); + } + } + } + + sgn = (doublereal) (*isgn); + + if (notrna && notrnb) { + +/* Solve A*X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-left corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* M L-1 */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */ +/* I=K+1 J=1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + for (k = nba; k >= 1; --k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = iwork[k]; + k2 = iwork[k + 1]; + i__1 = nbb; + for (l = 1; l <= i__1; ++l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + + i__2 = k2 - k1; + i__3 = l2 - l1; + dtrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.) { + if (scaloc == 0.) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__2); + } + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] + / pow_di(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__2 = k2 - k1; + i__3 = l2 - l1; + xnrm = dlange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + for (i__ = k - 1; i__ >= 1; --i__) { + +/* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */ + + i1 = iwork[i__]; + i2 = iwork[i__ + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = i2 - i1; + i__3 = l2 - l1; + cnrm = dlange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = dlarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_di(&c_b19, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_di(&c_b19, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (jj = l1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + dscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = i2 - i1; + dscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__2 = i2 - i1; + i__3 = l2 - l1; + i__4 = k2 - k1; + dgemm_("N", "N", &i__2, &i__3, &i__4, &c_b31, &a[i1 + k1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, & + c_b32, &c__[i1 + l1 * c_dim1], ldc); + + } + + i__2 = nbb; + for (j = l + 1; j <= i__2; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */ + + j1 = iwork[pc + j]; + j2 = iwork[pc + j + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = k2 - k1; + i__4 = j2 - j1; + cnrm = dlange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = dlarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b19, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_di(&c_b19, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_di(&c_b19, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + dscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = j2 - 1; + for (jj = j1; jj <= i__3; ++jj) { + i__4 = k2 - k1; + dscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__3 = k2 - k1; + i__4 = j2 - j1; + i__5 = l2 - l1; + d__1 = -sgn; + dgemm_("N", "N", &i__3, &i__4, &i__5, &d__1, &c__[k1 + l1 + * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b32, + &c__[k1 + j1 * c_dim1], ldc); + } + } + } + } else if (! notrna && notrnb) { + +/* Solve A**T*X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* upper-left corner column by column by */ + +/* A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 L-1 */ +/* R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */ +/* I=1 J=1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = iwork[k]; + k2 = iwork[k + 1]; + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + + i__3 = k2 - k1; + i__4 = l2 - l1; + dtrsyl_(trana, tranb, isgn, &i__3, &i__4, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.) { + if (scaloc == 0.) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__3); + } + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__5 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] + / pow_di(&c_b19, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__3 = k2 - k1; + i__4 = l2 - l1; + xnrm = dlange_("I", &i__3, &i__4, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__3 = nba; + for (i__ = k + 1; i__ <= i__3; ++i__) { + +/* C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L ) */ + + i1 = iwork[i__]; + i2 = iwork[i__ + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__4 = i2 - i1; + i__5 = l2 - l1; + cnrm = dlange_("I", &i__4, &i__5, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = dlarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__4 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__4); + i__4 = nbb; + for (jj = 1; jj <= i__4; ++jj) { + i__5 = nba; + for (ll = 1; ll <= i__5; ++ll) { +/* Computing MIN */ + i__6 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b19, &i__6); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__4 = myexp_(&scaloc); + scamin /= pow_di(&c_b19, &i__4); + i__4 = myexp_(&scaloc); + scaloc /= pow_di(&c_b19, &i__4); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = k2 - k1; + dscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = i2 - i1; + dscal_(&i__5, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__4 = i2 - i1; + i__5 = l2 - l1; + i__6 = k2 - k1; + dgemm_("T", "N", &i__4, &i__5, &i__6, &c_b31, &a[k1 + i1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, & + c_b32, &c__[i1 + l1 * c_dim1], ldc); + } + + i__3 = nbb; + for (j = l + 1; j <= i__3; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */ + + j1 = iwork[pc + j]; + j2 = iwork[pc + j + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__4 = k2 - k1; + i__5 = j2 - j1; + cnrm = dlange_("I", &i__4, &i__5, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = dlarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__4 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__4); + i__4 = nbb; + for (jj = 1; jj <= i__4; ++jj) { + i__5 = nba; + for (ll = 1; ll <= i__5; ++ll) { +/* Computing MIN */ + i__6 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b19, &i__6); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__4 = myexp_(&scaloc); + scamin /= pow_di(&c_b19, &i__4); + i__4 = myexp_(&scaloc); + scaloc /= pow_di(&c_b19, &i__4); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to to C( K, J ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = k2 - k1; + dscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.) { + i__4 = j2 - 1; + for (jj = j1; jj <= i__4; ++jj) { + i__5 = k2 - k1; + dscal_(&i__5, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__4 = k2 - k1; + i__5 = j2 - j1; + i__6 = l2 - l1; + d__1 = -sgn; + dgemm_("N", "N", &i__4, &i__5, &i__6, &d__1, &c__[k1 + l1 + * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b32, + &c__[k1 + j1 * c_dim1], ldc); + } + } + } + } else if (! notrna && ! notrnb) { + +/* Solve A**T*X + ISGN*X*B**T = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* top-right corner column by column by */ + +/* A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 N */ +/* R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. */ +/* I=1 J=L+1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = iwork[k]; + k2 = iwork[k + 1]; + for (l = nbb; l >= 1; --l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + + i__2 = k2 - k1; + i__3 = l2 - l1; + dtrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + if (scaloc * swork[k + l * swork_dim1] == 0.) { + if (scaloc == 0.) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__2); + } + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] + / pow_di(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + } + i__2 = k2 - k1; + i__3 = l2 - l1; + xnrm = dlange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__2 = nba; + for (i__ = k + 1; i__ <= i__2; ++i__) { + +/* C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L ) */ + + i1 = iwork[i__]; + i2 = iwork[i__ + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = i2 - i1; + i__4 = l2 - l1; + cnrm = dlange_("I", &i__3, &i__4, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = dlarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b19, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_di(&c_b19, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_di(&c_b19, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + dscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = i2 - i1; + dscal_(&i__4, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__3 = i2 - i1; + i__4 = l2 - l1; + i__5 = k2 - k1; + dgemm_("T", "N", &i__3, &i__4, &i__5, &c_b31, &a[k1 + i1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, & + c_b32, &c__[i1 + l1 * c_dim1], ldc); + } + + i__2 = l - 1; + for (j = 1; j <= i__2; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T */ + + j1 = iwork[pc + j]; + j2 = iwork[pc + j + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = k2 - k1; + i__4 = j2 - j1; + cnrm = dlange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = dlarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b19, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_di(&c_b19, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_di(&c_b19, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + dscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = j2 - 1; + for (jj = j1; jj <= i__3; ++jj) { + i__4 = k2 - k1; + dscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__3 = k2 - k1; + i__4 = j2 - j1; + i__5 = l2 - l1; + d__1 = -sgn; + dgemm_("N", "T", &i__3, &i__4, &i__5, &d__1, &c__[k1 + l1 + * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b32, + &c__[k1 + j1 * c_dim1], ldc); + } + } + } + } else if (notrna && ! notrnb) { + +/* Solve A*X + ISGN*X*B**T = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-right corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) */ + +/* Where */ +/* M N */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. */ +/* I=K+1 J=L+1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + for (k = nba; k >= 1; --k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = iwork[k]; + k2 = iwork[k + 1]; + for (l = nbb; l >= 1; --l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + + i__1 = k2 - k1; + i__2 = l2 - l1; + dtrsyl_(trana, tranb, isgn, &i__1, &i__2, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.) { + if (scaloc == 0.) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__1 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__1); + } + i__1 = nbb; + for (jj = 1; jj <= i__1; ++jj) { + i__2 = nba; + for (ll = 1; ll <= i__2; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__3 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] + / pow_di(&c_b19, &i__3); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__1 = k2 - k1; + i__2 = l2 - l1; + xnrm = dlange_("I", &i__1, &i__2, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */ + + i1 = iwork[i__]; + i2 = iwork[i__ + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = i2 - i1; + i__3 = l2 - l1; + cnrm = dlange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = dlarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_di(&c_b19, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_di(&c_b19, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = k2 - k1; + dscal_(&i__3, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = i2 - i1; + dscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__2 = i2 - i1; + i__3 = l2 - l1; + i__4 = k2 - k1; + dgemm_("N", "N", &i__2, &i__3, &i__4, &c_b31, &a[i1 + k1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, & + c_b32, &c__[i1 + l1 * c_dim1], ldc); + + } + + i__1 = l - 1; + for (j = 1; j <= i__1; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T */ + + j1 = iwork[pc + j]; + j2 = iwork[pc + j + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = k2 - k1; + i__3 = j2 - j1; + cnrm = dlange_("I", &i__2, &i__3, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = dlarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b19, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_di(&c_b19, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_di(&c_b19, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (jj = l1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + dscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = j2 - 1; + for (jj = j1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + dscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__2 = k2 - k1; + i__3 = j2 - j1; + i__4 = l2 - l1; + d__1 = -sgn; + dgemm_("N", "T", &i__2, &i__3, &i__4, &d__1, &c__[k1 + l1 + * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b32, + &c__[k1 + j1 * c_dim1], ldc); + } + } + } + + } + free(wnrm); +/* Reduce local scaling factors */ + + *scale = swork[swork_dim1 + 1]; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { +/* Computing MIN */ + d__1 = *scale, d__2 = swork[k + l * swork_dim1]; + *scale = f2cmin(d__1,d__2); + } + } + + if (*scale == 0.) { + +/* The magnitude of the largest entry of the solution is larger */ +/* than the product of BIGNUM**2 and cannot be represented in the */ +/* form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to */ +/* zero and give up. */ + + iwork[1] = nba + nbb + 2; + swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba); + return 0; + } + +/* Realize consistent scaling */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + k1 = iwork[k]; + k2 = iwork[k + 1]; + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + scal = *scale / swork[k + l * swork_dim1]; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + dscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &c__1); + } + } + } + } + + if (buf != 1. && buf > 0.) { + +/* Decrease SCALE as much as possible. */ + +/* Computing MIN */ + d__1 = *scale / smlnum, d__2 = 1. / buf; + scaloc = f2cmin(d__1,d__2); + buf *= scaloc; + *scale /= scaloc; + } + if (buf != 1. && buf > 0.) { + +/* In case of overly aggressive scaling during the computation, */ +/* flushing of the global scale factor may be prevented by */ +/* undoing some of the scaling. This step is to ensure that */ +/* this routine flushes only scale factors that TRSYL also */ +/* flushes and be usable as a drop-in replacement. */ + +/* How much can the normwise largest entry be upscaled? */ + + scal = c__[c_dim1 + 1]; + i__1 = *m; + for (k = 1; k <= i__1; ++k) { + i__2 = *n; + for (l = 1; l <= i__2; ++l) { +/* Computing MAX */ + d__2 = scal, d__3 = (d__1 = c__[k + l * c_dim1], abs(d__1)); + scal = f2cmax(d__2,d__3); + } + } + +/* Increase BUF as close to 1 as possible and apply scaling. */ + +/* Computing MIN */ + d__1 = bignum / scal, d__2 = 1. / buf; + scaloc = f2cmin(d__1,d__2); + buf *= scaloc; + dlascl_("G", &c_n1, &c_n1, &c_b32, &scaloc, m, n, &c__[c_offset], ldc, + &iwork[1]); + } + +/* Combine with buffer scaling factor. SCALE will be flushed if */ +/* BUF is less than one here. */ + + *scale *= buf; + +/* Restore workspace dimensions */ + + iwork[1] = nba + nbb + 2; + swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba); + + return 0; + +/* End of DTRSYL3 */ + +} /* dtrsyl3_ */ + diff --git a/lapack-netlib/SRC/strsyl3.c b/lapack-netlib/SRC/strsyl3.c index d05923a46..85d68e017 100644 --- a/lapack-netlib/SRC/strsyl3.c +++ b/lapack-netlib/SRC/strsyl3.c @@ -157,6 +157,7 @@ struct Namelist { }; typedef struct Namelist Namelist; +#define exponent(x) #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (fabs(x)) #define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) @@ -233,7 +234,9 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define myhuge_(w) HUGE_VAL //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} #define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) +#define myexp_(w) my_expfunc(w) +static int my_expfunc(float* x) {int e; (void)frexpf(*x,&e); return e;} /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 @@ -379,3 +382,1561 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ pCd(z) = zdotc; } #endif +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static real c_b19 = 2.f; +static real c_b31 = -1.f; +static real c_b32 = 1.f; + +/* > \brief \b STRSYL3 */ + +/* Definition: */ +/* =========== */ + + +/* > \par Purpose */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > STRSYL3 solves the real Sylvester matrix equation: */ +/* > */ +/* > op(A)*X + X*op(B) = scale*C or */ +/* > op(A)*X - X*op(B) = scale*C, */ +/* > */ +/* > where op(A) = A or A**T, and A and B are both upper quasi- */ +/* > triangular. A is M-by-M and B is N-by-N; the right hand side C and */ +/* > the solution X are M-by-N; and scale is an output scale factor, set */ +/* > <= 1 to avoid overflow in X. */ +/* > */ +/* > A and B must be in Schur canonical form (as returned by SHSEQR), that */ +/* > is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; */ +/* > each 2-by-2 diagonal block has its diagonal elements equal and its */ +/* > off-diagonal elements of opposite sign. */ +/* > */ +/* > This is the block version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments */ +/* ========= */ + +/* > \param[in] TRANA */ +/* > \verbatim */ +/* > TRANA is CHARACTER*1 */ +/* > Specifies the option op(A): */ +/* > = 'N': op(A) = A (No transpose) */ +/* > = 'T': op(A) = A**T (Transpose) */ +/* > = 'C': op(A) = A**H (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANB */ +/* > \verbatim */ +/* > TRANB is CHARACTER*1 */ +/* > Specifies the option op(B): */ +/* > = 'N': op(B) = B (No transpose) */ +/* > = 'T': op(B) = B**T (Transpose) */ +/* > = 'C': op(B) = B**H (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ISGN */ +/* > \verbatim */ +/* > ISGN is INTEGER */ +/* > Specifies the sign in the equation: */ +/* > = +1: solve op(A)*X + X*op(B) = scale*C */ +/* > = -1: solve op(A)*X - X*op(B) = scale*C */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The order of the matrix A, and the number of rows in the */ +/* > matrices X and C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix B, and the number of columns in the */ +/* > matrices X and C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,M) */ +/* > The upper quasi-triangular matrix A, in Schur canonical form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,N) */ +/* > The upper quasi-triangular matrix B, in Schur canonical form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (LDC,N) */ +/* > On entry, the M-by-N right hand side matrix C. */ +/* > On exit, C is overwritten by the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is REAL */ +/* > The scale factor, scale, set <= 1 to avoid overflow in X. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER */ +/* > The dimension of the array IWORK. LIWORK >= ((M + NB - 1) / NB + 1) */ +/* > + ((N + NB - 1) / NB + 1), where NB is the optimal block size. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal dimension of the IWORK array, */ +/* > returns this value as the first entry of the IWORK array, and */ +/* > no error message related to LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SWORK */ +/* > \verbatim */ +/* > SWORK is REAL array, dimension (MAX(2, ROWS), */ +/* > MAX(1,COLS)). */ +/* > On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS */ +/* > and SWORK(2) returns the optimal COLS. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDSWORK */ +/* > \verbatim */ +/* > LDSWORK is INTEGER */ +/* > LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1) */ +/* > and NB is the optimal block size. */ +/* > */ +/* > If LDSWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal dimensions of the SWORK matrix, */ +/* > returns these values as the first and second entry of the SWORK */ +/* > matrix, and no error message related LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > = 1: A and B have common or very close eigenvalues; perturbed */ +/* > values were used to solve the equation (but the matrices */ +/* > A and B are unchanged). */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* References: */ +/* E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of */ +/* algorithms: The triangular Sylvester equation, ACM Transactions */ +/* on Mathematical Software (TOMS), volume 29, pages 218--243. */ + +/* A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel */ +/* Solution of the Triangular Sylvester Equation. Lecture Notes in */ +/* Computer Science, vol 12043, pages 82--92, Springer. */ + +/* Contributor: */ +/* Angelika Schwarz, Umea University, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int strsyl3_(char *trana, char *tranb, integer *isgn, + integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb, + real *c__, integer *ldc, real *scale, integer *iwork, integer *liwork, + real *swork, integer *ldswork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, swork_dim1, + swork_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1, r__2, r__3; + + /* Local variables */ + real scal, anrm, bnrm, cnrm; + integer awrk, bwrk; + logical skip; + real *wnrm, xnrm; + integer i__, j, k, l; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemm_(char *, char *, integer *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + integer i1, i2, j1, j2, k1, k2, l1; +// extern integer myexp_(real *); + integer l2, nb, pc, jj, ll; + real scaloc; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + real scamin; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern real slarmm_(real *, real *, real *); + logical notrna, notrnb; + real smlnum; + logical lquery; + extern /* Subroutine */ int strsyl_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , real *, integer *); + integer nba, nbb; + real buf, sgn; + +/* Decode and Test input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --iwork; + swork_dim1 = *ldswork; + swork_offset = 1 + swork_dim1 * 1; + swork -= swork_offset; + + /* Function Body */ + notrna = lsame_(trana, "N"); + notrnb = lsame_(tranb, "N"); + +/* Use the same block size for all matrices. */ + +/* Computing MAX */ + i__1 = 8, i__2 = ilaenv_(&c__1, "STRSYL", "", m, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)0); + nb = f2cmax(i__1,i__2); + +/* Compute number of blocks in A and B */ + +/* Computing MAX */ + i__1 = 1, i__2 = (*m + nb - 1) / nb; + nba = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = (*n + nb - 1) / nb; + nbb = f2cmax(i__1,i__2); + +/* Compute workspace */ + + *info = 0; + lquery = *liwork == -1 || *ldswork == -1; + iwork[1] = nba + nbb + 2; + if (lquery) { + *ldswork = 2; + swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba); + } + +/* Test the input arguments */ + + if (! notrna && ! lsame_(trana, "T") && ! lsame_( + trana, "C")) { + *info = -1; + } else if (! notrnb && ! lsame_(tranb, "T") && ! + lsame_(tranb, "C")) { + *info = -2; + } else if (*isgn != 1 && *isgn != -1) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*m)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldc < f2cmax(1,*m)) { + *info = -11; + } else if (! lquery && *liwork < iwork[1]) { + *info = -14; + } else if (! lquery && *ldswork < f2cmax(nba,nbb)) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("STRSYL3", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *scale = 1.f; + if (*m == 0 || *n == 0) { + return 0; + } + +/* Use unblocked code for small problems or if insufficient */ +/* workspaces are provided */ + + if (f2cmin(nba,nbb) == 1 || *ldswork < f2cmax(nba,nbb) || *liwork < iwork[1]) { + strsyl_(trana, tranb, isgn, m, n, &a[a_offset], lda, &b[b_offset], + ldb, &c__[c_offset], ldc, scale, info); + return 0; + } + + +/* REAL WNRM( MAX( M, N ) ) */ + wnrm=(real*)malloc (f2cmax(*m,*n)*sizeof(real)); + +/* Set constants to control overflow */ + + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + +/* Partition A such that 2-by-2 blocks on the diagonal are not split */ + + skip = FALSE_; + i__1 = nba; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = (i__ - 1) * nb + 1; + } + iwork[nba + 1] = *m + 1; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + l1 = iwork[k]; + l2 = iwork[k + 1] - 1; + i__2 = l2; + for (l = l1; l <= i__2; ++l) { + if (skip) { + skip = FALSE_; + mycycle_(); + } + if (l >= *m) { +/* A( M, M ) is a 1-by-1 block */ + mycycle_(); + } + if (a[l + (l + 1) * a_dim1] != 0.f && a[l + 1 + l * a_dim1] != + 0.f) { +/* Check if 2-by-2 block is split */ + if (l + 1 == iwork[k + 1]) { + ++iwork[k + 1]; + mycycle_(); + } + skip = TRUE_; + } + } + } + iwork[nba + 1] = *m + 1; + if (iwork[nba] >= iwork[nba + 1]) { + iwork[nba] = iwork[nba + 1]; + --nba; + } + +/* Partition B such that 2-by-2 blocks on the diagonal are not split */ + + pc = nba + 1; + skip = FALSE_; + i__1 = nbb; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[pc + i__] = (i__ - 1) * nb + 1; + } + iwork[pc + nbb + 1] = *n + 1; + i__1 = nbb; + for (k = 1; k <= i__1; ++k) { + l1 = iwork[pc + k]; + l2 = iwork[pc + k + 1] - 1; + i__2 = l2; + for (l = l1; l <= i__2; ++l) { + if (skip) { + skip = FALSE_; + mycycle_(); + } + if (l >= *n) { +/* B( N, N ) is a 1-by-1 block */ + mycycle_(); + } + if (b[l + (l + 1) * b_dim1] != 0.f && b[l + 1 + l * b_dim1] != + 0.f) { +/* Check if 2-by-2 block is split */ + if (l + 1 == iwork[pc + k + 1]) { + ++iwork[pc + k + 1]; + mycycle_(); + } + skip = TRUE_; + } + } + } + iwork[pc + nbb + 1] = *n + 1; + if (iwork[pc + nbb] >= iwork[pc + nbb + 1]) { + iwork[pc + nbb] = iwork[pc + nbb + 1]; + --nbb; + } + +/* Set local scaling factors - must never attain zero. */ + + i__1 = nbb; + for (l = 1; l <= i__1; ++l) { + i__2 = nba; + for (k = 1; k <= i__2; ++k) { + swork[k + l * swork_dim1] = 1.f; + } + } + +/* Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero. */ +/* This scaling is to ensure compatibility with TRSYL and may get flushed. */ + + buf = 1.f; + +/* Compute upper bounds of blocks of A and B */ + + awrk = nbb; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + k1 = iwork[k]; + k2 = iwork[k + 1]; + i__2 = nba; + for (l = k; l <= i__2; ++l) { + l1 = iwork[l]; + l2 = iwork[l + 1]; + if (notrna) { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[k + (awrk + l) * swork_dim1] = slange_("I", &i__3, & + i__4, &a[k1 + l1 * a_dim1], lda, wnrm); + } else { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[l + (awrk + k) * swork_dim1] = slange_("1", &i__3, & + i__4, &a[k1 + l1 * a_dim1], lda, wnrm); + } + } + } + bwrk = nbb + nba; + i__1 = nbb; + for (k = 1; k <= i__1; ++k) { + k1 = iwork[pc + k]; + k2 = iwork[pc + k + 1]; + i__2 = nbb; + for (l = k; l <= i__2; ++l) { + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + if (notrnb) { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[k + (bwrk + l) * swork_dim1] = slange_("I", &i__3, & + i__4, &b[k1 + l1 * b_dim1], ldb, wnrm); + } else { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[l + (bwrk + k) * swork_dim1] = slange_("1", &i__3, & + i__4, &b[k1 + l1 * b_dim1], ldb, wnrm); + } + } + } + + sgn = (real) (*isgn); + + if (notrna && notrnb) { + +/* Solve A*X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-left corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* M L-1 */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */ +/* I=K+1 J=1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + for (k = nba; k >= 1; --k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = iwork[k]; + k2 = iwork[k + 1]; + i__1 = nbb; + for (l = 1; l <= i__1; ++l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + + i__2 = k2 - k1; + i__3 = l2 - l1; + strsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.f) { + if (scaloc == 0.f) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.f; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__2); + } + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] + / pow_ri(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__2 = k2 - k1; + i__3 = l2 - l1; + xnrm = slange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + for (i__ = k - 1; i__ >= 1; --i__) { + +/* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */ + + i1 = iwork[i__]; + i2 = iwork[i__ + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = i2 - i1; + i__3 = l2 - l1; + cnrm = slange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = slarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_ri(&c_b19, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b19, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (jj = l1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + sscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = i2 - i1; + sscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__2 = i2 - i1; + i__3 = l2 - l1; + i__4 = k2 - k1; + sgemm_("N", "N", &i__2, &i__3, &i__4, &c_b31, &a[i1 + k1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, & + c_b32, &c__[i1 + l1 * c_dim1], ldc); + + } + + i__2 = nbb; + for (j = l + 1; j <= i__2; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */ + + j1 = iwork[pc + j]; + j2 = iwork[pc + j + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = k2 - k1; + i__4 = j2 - j1; + cnrm = slange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = slarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b19, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_ri(&c_b19, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b19, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + sscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = j2 - 1; + for (jj = j1; jj <= i__3; ++jj) { + i__4 = k2 - k1; + sscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__3 = k2 - k1; + i__4 = j2 - j1; + i__5 = l2 - l1; + r__1 = -sgn; + sgemm_("N", "N", &i__3, &i__4, &i__5, &r__1, &c__[k1 + l1 + * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b32, + &c__[k1 + j1 * c_dim1], ldc); + } + } + } + } else if (! notrna && notrnb) { + +/* Solve A**T*X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* upper-left corner column by column by */ + +/* A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 L-1 */ +/* R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */ +/* I=1 J=1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = iwork[k]; + k2 = iwork[k + 1]; + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + + i__3 = k2 - k1; + i__4 = l2 - l1; + strsyl_(trana, tranb, isgn, &i__3, &i__4, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.f) { + if (scaloc == 0.f) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.f; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__3); + } + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__5 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] + / pow_ri(&c_b19, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__3 = k2 - k1; + i__4 = l2 - l1; + xnrm = slange_("I", &i__3, &i__4, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__3 = nba; + for (i__ = k + 1; i__ <= i__3; ++i__) { + +/* C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L ) */ + + i1 = iwork[i__]; + i2 = iwork[i__ + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__4 = i2 - i1; + i__5 = l2 - l1; + cnrm = slange_("I", &i__4, &i__5, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = slarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__4 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__4); + i__4 = nbb; + for (jj = 1; jj <= i__4; ++jj) { + i__5 = nba; + for (ll = 1; ll <= i__5; ++ll) { +/* Computing MIN */ + i__6 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b19, &i__6); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__4 = myexp_(&scaloc); + scamin /= pow_ri(&c_b19, &i__4); + i__4 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b19, &i__4); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = k2 - k1; + sscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = i2 - i1; + sscal_(&i__5, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__4 = i2 - i1; + i__5 = l2 - l1; + i__6 = k2 - k1; + sgemm_("T", "N", &i__4, &i__5, &i__6, &c_b31, &a[k1 + i1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, & + c_b32, &c__[i1 + l1 * c_dim1], ldc); + } + + i__3 = nbb; + for (j = l + 1; j <= i__3; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */ + + j1 = iwork[pc + j]; + j2 = iwork[pc + j + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__4 = k2 - k1; + i__5 = j2 - j1; + cnrm = slange_("I", &i__4, &i__5, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = slarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__4 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__4); + i__4 = nbb; + for (jj = 1; jj <= i__4; ++jj) { + i__5 = nba; + for (ll = 1; ll <= i__5; ++ll) { +/* Computing MIN */ + i__6 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b19, &i__6); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__4 = myexp_(&scaloc); + scamin /= pow_ri(&c_b19, &i__4); + i__4 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b19, &i__4); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to to C( K, J ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = k2 - k1; + sscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.f) { + i__4 = j2 - 1; + for (jj = j1; jj <= i__4; ++jj) { + i__5 = k2 - k1; + sscal_(&i__5, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__4 = k2 - k1; + i__5 = j2 - j1; + i__6 = l2 - l1; + r__1 = -sgn; + sgemm_("N", "N", &i__4, &i__5, &i__6, &r__1, &c__[k1 + l1 + * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b32, + &c__[k1 + j1 * c_dim1], ldc); + } + } + } + } else if (! notrna && ! notrnb) { + +/* Solve A**T*X + ISGN*X*B**T = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* top-right corner column by column by */ + +/* A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 N */ +/* R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. */ +/* I=1 J=L+1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = iwork[k]; + k2 = iwork[k + 1]; + for (l = nbb; l >= 1; --l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + + i__2 = k2 - k1; + i__3 = l2 - l1; + strsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.f) { + if (scaloc == 0.f) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.f; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__2); + } + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] + / pow_ri(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__2 = k2 - k1; + i__3 = l2 - l1; + xnrm = slange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__2 = nba; + for (i__ = k + 1; i__ <= i__2; ++i__) { + +/* C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L ) */ + + i1 = iwork[i__]; + i2 = iwork[i__ + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = i2 - i1; + i__4 = l2 - l1; + cnrm = slange_("I", &i__3, &i__4, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = slarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b19, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_ri(&c_b19, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b19, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + sscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = i2 - i1; + sscal_(&i__4, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__3 = i2 - i1; + i__4 = l2 - l1; + i__5 = k2 - k1; + sgemm_("T", "N", &i__3, &i__4, &i__5, &c_b31, &a[k1 + i1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, & + c_b32, &c__[i1 + l1 * c_dim1], ldc); + } + + i__2 = l - 1; + for (j = 1; j <= i__2; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T */ + + j1 = iwork[pc + j]; + j2 = iwork[pc + j + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = k2 - k1; + i__4 = j2 - j1; + cnrm = slange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = slarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b19, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_ri(&c_b19, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b19, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + sscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.f) { + i__3 = j2 - 1; + for (jj = j1; jj <= i__3; ++jj) { + i__4 = k2 - k1; + sscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__3 = k2 - k1; + i__4 = j2 - j1; + i__5 = l2 - l1; + r__1 = -sgn; + sgemm_("N", "T", &i__3, &i__4, &i__5, &r__1, &c__[k1 + l1 + * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b32, + &c__[k1 + j1 * c_dim1], ldc); + } + } + } + } else if (notrna && ! notrnb) { + +/* Solve A*X + ISGN*X*B**T = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-right corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) */ + +/* Where */ +/* M N */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. */ +/* I=K+1 J=L+1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + for (k = nba; k >= 1; --k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = iwork[k]; + k2 = iwork[k + 1]; + for (l = nbb; l >= 1; --l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + + i__1 = k2 - k1; + i__2 = l2 - l1; + strsyl_(trana, tranb, isgn, &i__1, &i__2, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.f) { + if (scaloc == 0.f) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.f; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__1 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__1); + } + i__1 = nbb; + for (jj = 1; jj <= i__1; ++jj) { + i__2 = nba; + for (ll = 1; ll <= i__2; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__3 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] + / pow_ri(&c_b19, &i__3); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__1 = k2 - k1; + i__2 = l2 - l1; + xnrm = slange_("I", &i__1, &i__2, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */ + + i1 = iwork[i__]; + i2 = iwork[i__ + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = i2 - i1; + i__3 = l2 - l1; + cnrm = slange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = slarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_ri(&c_b19, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b19, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = k2 - k1; + sscal_(&i__3, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = i2 - i1; + sscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__2 = i2 - i1; + i__3 = l2 - l1; + i__4 = k2 - k1; + sgemm_("N", "N", &i__2, &i__3, &i__4, &c_b31, &a[i1 + k1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, & + c_b32, &c__[i1 + l1 * c_dim1], ldc); + + } + + i__1 = l - 1; + for (j = 1; j <= i__1; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T */ + + j1 = iwork[pc + j]; + j2 = iwork[pc + j + 1]; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = k2 - k1; + i__3 = j2 - j1; + cnrm = slange_("I", &i__2, &i__3, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(r__1,r__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = slarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.f) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_ri(&c_b19, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + r__1 = bignum, r__2 = swork[ll + jj * + swork_dim1] / pow_ri(&c_b19, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_ri(&c_b19, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_ri(&c_b19, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = l2 - 1; + for (jj = l1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + sscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.f) { + i__2 = j2 - 1; + for (jj = j1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + sscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__2 = k2 - k1; + i__3 = j2 - j1; + i__4 = l2 - l1; + r__1 = -sgn; + sgemm_("N", "T", &i__2, &i__3, &i__4, &r__1, &c__[k1 + l1 + * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b32, + &c__[k1 + j1 * c_dim1], ldc); + } + } + } + + } + + free(wnrm); +/* Reduce local scaling factors */ + + *scale = swork[swork_dim1 + 1]; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { +/* Computing MIN */ + r__1 = *scale, r__2 = swork[k + l * swork_dim1]; + *scale = f2cmin(r__1,r__2); + } + } + + if (*scale == 0.f) { + +/* The magnitude of the largest entry of the solution is larger */ +/* than the product of BIGNUM**2 and cannot be represented in the */ +/* form (1/SCALE)*X if SCALE is REAL. Set SCALE to zero and give up. */ + + iwork[1] = nba + nbb + 2; + swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba); + return 0; + } + +/* Realize consistent scaling */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + k1 = iwork[k]; + k2 = iwork[k + 1]; + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { + l1 = iwork[pc + l]; + l2 = iwork[pc + l + 1]; + scal = *scale / swork[k + l * swork_dim1]; + if (scal != 1.f) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + sscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &c__1); + } + } + } + } + + if (buf != 1.f && buf > 0.f) { + +/* Decrease SCALE as much as possible. */ + +/* Computing MIN */ + r__1 = *scale / smlnum, r__2 = 1.f / buf; + scaloc = f2cmin(r__1,r__2); + buf *= scaloc; + *scale /= scaloc; + } + if (buf != 1.f && buf > 0.f) { + +/* In case of overly aggressive scaling during the computation, */ +/* flushing of the global scale factor may be prevented by */ +/* undoing some of the scaling. This step is to ensure that */ +/* this routine flushes only scale factors that TRSYL also */ +/* flushes and be usable as a drop-in replacement. */ + +/* How much can the normwise largest entry be upscaled? */ + + scal = c__[c_dim1 + 1]; + i__1 = *m; + for (k = 1; k <= i__1; ++k) { + i__2 = *n; + for (l = 1; l <= i__2; ++l) { +/* Computing MAX */ + r__2 = scal, r__3 = (r__1 = c__[k + l * c_dim1], abs(r__1)); + scal = f2cmax(r__2,r__3); + } + } + +/* Increase BUF as close to 1 as possible and apply scaling. */ + +/* Computing MIN */ + r__1 = bignum / scal, r__2 = 1.f / buf; + scaloc = f2cmin(r__1,r__2); + buf *= scaloc; + slascl_("G", &c_n1, &c_n1, &c_b32, &scaloc, m, n, &c__[c_offset], ldc, + &iwork[1]); + } + +/* Combine with buffer scaling factor. SCALE will be flushed if */ +/* BUF is less than one here. */ + + *scale *= buf; + +/* Restore workspace dimensions */ + + iwork[1] = nba + nbb + 2; + swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba); + + return 0; + +/* End of STRSYL3 */ + +} /* strsyl3_ */ + diff --git a/lapack-netlib/SRC/ztrsyl3.c b/lapack-netlib/SRC/ztrsyl3.c index d05923a46..c1be7d589 100644 --- a/lapack-netlib/SRC/ztrsyl3.c +++ b/lapack-netlib/SRC/ztrsyl3.c @@ -157,6 +157,7 @@ struct Namelist { }; typedef struct Namelist Namelist; +#define exponent(x) #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (fabs(x)) #define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) @@ -233,7 +234,9 @@ static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define myhuge_(w) HUGE_VAL //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} #define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) +#define myexp_(w) my_expfunc(w) +static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;} /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 @@ -379,3 +382,1519 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ pCd(z) = zdotc; } #endif +/* -- translated by f2c (version 20000121). + You must link the resulting object file with the libraries: + -lf2c -lm (in that order) +*/ + + + +/* Table of constant values */ + +static doublecomplex c_b1 = {1.,0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b18 = 2.; +static doublereal c_b106 = 1.; + +/* > \brief \b ZTRSYL3 */ + +/* Definition: */ +/* =========== */ + + +/* > \par Purpose */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZTRSYL3 solves the complex Sylvester matrix equation: */ +/* > */ +/* > op(A)*X + X*op(B) = scale*C or */ +/* > op(A)*X - X*op(B) = scale*C, */ +/* > */ +/* > where op(A) = A or A**H, and A and B are both upper triangular. A is */ +/* > M-by-M and B is N-by-N; the right hand side C and the solution X are */ +/* > M-by-N; and scale is an output scale factor, set <= 1 to avoid */ +/* > overflow in X. */ +/* > */ +/* > This is the block version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments */ +/* ========= */ + +/* > \param[in] TRANA */ +/* > \verbatim */ +/* > TRANA is CHARACTER*1 */ +/* > Specifies the option op(A): */ +/* > = 'N': op(A) = A (No transpose) */ +/* > = 'C': op(A) = A**H (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANB */ +/* > \verbatim */ +/* > TRANB is CHARACTER*1 */ +/* > Specifies the option op(B): */ +/* > = 'N': op(B) = B (No transpose) */ +/* > = 'C': op(B) = B**H (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ISGN */ +/* > \verbatim */ +/* > ISGN is INTEGER */ +/* > Specifies the sign in the equation: */ +/* > = +1: solve op(A)*X + X*op(B) = scale*C */ +/* > = -1: solve op(A)*X - X*op(B) = scale*C */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The order of the matrix A, and the number of rows in the */ +/* > matrices X and C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix B, and the number of columns in the */ +/* > matrices X and C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,M) */ +/* > The upper triangular matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > The upper triangular matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the M-by-N right hand side matrix C. */ +/* > On exit, C is overwritten by the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION */ +/* > The scale factor, scale, set <= 1 to avoid overflow in X. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SWORK */ +/* > \verbatim */ +/* > SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS), */ +/* > MAX(1,COLS)). */ +/* > On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS */ +/* > and SWORK(2) returns the optimal COLS. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDSWORK */ +/* > \verbatim */ +/* > LDSWORK is INTEGER */ +/* > LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1) */ +/* > and NB is the optimal block size. */ +/* > */ +/* > If LDSWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal dimensions of the SWORK matrix, */ +/* > returns these values as the first and second entry of the SWORK */ +/* > matrix, and no error message related LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > = 1: A and B have common or very close eigenvalues; perturbed */ +/* > values were used to solve the equation (but the matrices */ +/* > A and B are unchanged). */ +/* > \endverbatim */ + +/* > \ingroup complex16SYcomputational */ + +/* ===================================================================== */ +/* References: */ +/* E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of */ +/* algorithms: The triangular Sylvester equation, ACM Transactions */ +/* on Mathematical Software (TOMS), volume 29, pages 218--243. */ + +/* A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel */ +/* Solution of the Triangular Sylvester Equation. Lecture Notes in */ +/* Computer Science, vol 12043, pages 82--92, Springer. */ + +/* Contributor: */ +/* Angelika Schwarz, Umea University, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int ztrsyl3_(char *trana, char *tranb, integer *isgn, + integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex + *b, integer *ldb, doublecomplex *c__, integer *ldc, doublereal *scale, + doublereal *swork, integer *ldswork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, swork_dim1, + swork_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Local variables */ + doublereal scal; + doublecomplex csgn; + doublereal anrm, bnrm, cnrm; + integer awrk, bwrk; + doublereal *wnrm, xnrm; + integer i__, j, k, l; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer i1, i2, j1, j2, k1, k2, l1, l2; +// extern integer myexp_(doublereal *); + integer nb, jj, ll; + extern doublereal dlamch_(char *); + doublereal scaloc, scamin; + extern doublereal dlarmm_(doublereal *, doublereal *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + doublereal bignum; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex * + , integer *, integer *); + logical notrna, notrnb; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int ztrsyl_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, integer *); + integer nba, nbb; + doublereal buf, sgn; + + + +/* Decode and Test input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + swork_dim1 = *ldswork; + swork_offset = 1 + swork_dim1 * 1; + swork -= swork_offset; + + /* Function Body */ + notrna = lsame_(trana, "N"); + notrnb = lsame_(tranb, "N"); + +/* Use the same block size for all matrices. */ + +/* Computing MAX */ + i__1 = 8, i__2 = ilaenv_(&c__1, "ZTRSYL", "", m, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)0); + nb = f2cmax(i__1,i__2); + +/* Compute number of blocks in A and B */ + +/* Computing MAX */ + i__1 = 1, i__2 = (*m + nb - 1) / nb; + nba = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = (*n + nb - 1) / nb; + nbb = f2cmax(i__1,i__2); + +/* Compute workspace */ + + *info = 0; + lquery = *ldswork == -1; + if (lquery) { + *ldswork = 2; + swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba); + } + +/* Test the input arguments */ + + if (! notrna && ! lsame_(trana, "C")) { + *info = -1; + } else if (! notrnb && ! lsame_(tranb, "C")) { + *info = -2; + } else if (*isgn != 1 && *isgn != -1) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*m)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldc < f2cmax(1,*m)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZTRSYL3", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *scale = 1.; + if (*m == 0 || *n == 0) { + return 0; + } + + wnrm = (doublereal*)malloc(f2cmax(*m,*n)*sizeof(doublereal)); +/* Use unblocked code for small problems or if insufficient */ +/* workspace is provided */ + + if (f2cmin(nba,nbb) == 1 || *ldswork < f2cmax(nba,nbb)) { + ztrsyl_(trana, tranb, isgn, m, n, &a[a_offset], lda, &b[b_offset], + ldb, &c__[c_offset], ldc, scale, info); + return 0; + } + +/* Set constants to control overflow */ + + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + +/* Set local scaling factors. */ + + i__1 = nbb; + for (l = 1; l <= i__1; ++l) { + i__2 = nba; + for (k = 1; k <= i__2; ++k) { + swork[k + l * swork_dim1] = 1.; + } + } + +/* Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero. */ +/* This scaling is to ensure compatibility with TRSYL and may get flushed. */ + + buf = 1.; + +/* Compute upper bounds of blocks of A and B */ + + awrk = nbb; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*m) + 1; + i__2 = nba; + for (l = k; l <= i__2; ++l) { + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__3 = l * nb; + l2 = f2cmin(i__3,*m) + 1; + if (notrna) { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[k + (awrk + l) * swork_dim1] = zlange_("I", &i__3, & + i__4, &a[k1 + l1 * a_dim1], lda, wnrm); + } else { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[l + (awrk + k) * swork_dim1] = zlange_("1", &i__3, & + i__4, &a[k1 + l1 * a_dim1], lda, wnrm); + } + } + } + bwrk = nbb + nba; + i__1 = nbb; + for (k = 1; k <= i__1; ++k) { + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*n) + 1; + i__2 = nbb; + for (l = k; l <= i__2; ++l) { + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__3 = l * nb; + l2 = f2cmin(i__3,*n) + 1; + if (notrnb) { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[k + (bwrk + l) * swork_dim1] = zlange_("I", &i__3, & + i__4, &b[k1 + l1 * b_dim1], ldb, wnrm); + } else { + i__3 = k2 - k1; + i__4 = l2 - l1; + swork[l + (bwrk + k) * swork_dim1] = zlange_("1", &i__3, & + i__4, &b[k1 + l1 * b_dim1], ldb, wnrm); + } + } + } + + sgn = (doublereal) (*isgn); + z__1.r = sgn, z__1.i = 0.; + csgn.r = z__1.r, csgn.i = z__1.i; + + if (notrna && notrnb) { + +/* Solve A*X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-left corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* M L-1 */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */ +/* I=K+1 J=1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + for (k = nba; k >= 1; --k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__1 = k * nb; + k2 = f2cmin(i__1,*m) + 1; + i__1 = nbb; + for (l = 1; l <= i__1; ++l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__2 = l * nb; + l2 = f2cmin(i__2,*n) + 1; + + i__2 = k2 - k1; + i__3 = l2 - l1; + ztrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.) { + if (scaloc == 0.) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.; + } else { + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__2); + } + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] + / pow_di(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__2 = k2 - k1; + i__3 = l2 - l1; + xnrm = zlange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + for (i__ = k - 1; i__ >= 1; --i__) { + +/* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */ + + i1 = (i__ - 1) * nb + 1; +/* Computing MIN */ + i__2 = i__ * nb; + i2 = f2cmin(i__2,*m) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = i2 - i1; + i__3 = l2 - l1; + cnrm = zlange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = dlarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_di(&c_b18, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_di(&c_b18, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L ). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (jj = l1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + zdscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = i2 - i1; + zdscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__2 = i2 - i1; + i__3 = l2 - l1; + i__4 = k2 - k1; + z__1.r = -1., z__1.i = 0.; + zgemm_("N", "N", &i__2, &i__3, &i__4, &z__1, &a[i1 + k1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, + &c__[i1 + l1 * c_dim1], ldc) + ; + + } + + i__2 = nbb; + for (j = l + 1; j <= i__2; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */ + + j1 = (j - 1) * nb + 1; +/* Computing MIN */ + i__3 = j * nb; + j2 = f2cmin(i__3,*n) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = k2 - k1; + i__4 = j2 - j1; + cnrm = zlange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = dlarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b18, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_di(&c_b18, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_di(&c_b18, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + zdscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = j2 - 1; + for (jj = j1; jj <= i__3; ++jj) { + i__4 = k2 - k1; + zdscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__3 = k2 - k1; + i__4 = j2 - j1; + i__5 = l2 - l1; + z__1.r = -csgn.r, z__1.i = -csgn.i; + zgemm_("N", "N", &i__3, &i__4, &i__5, &z__1, &c__[k1 + l1 + * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b1, + &c__[k1 + j1 * c_dim1], ldc) + ; + } + } + } + } else if (! notrna && notrnb) { + +/* Solve A**H *X + ISGN*X*B = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* upper-left corner column by column by */ + +/* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 L-1 */ +/* R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */ +/* I=1 J=1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*m) + 1; + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__3 = l * nb; + l2 = f2cmin(i__3,*n) + 1; + + i__3 = k2 - k1; + i__4 = l2 - l1; + ztrsyl_(trana, tranb, isgn, &i__3, &i__4, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.) { + if (scaloc == 0.) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__3); + } + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__5 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] + / pow_di(&c_b18, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__3 = k2 - k1; + i__4 = l2 - l1; + xnrm = zlange_("I", &i__3, &i__4, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__3 = nba; + for (i__ = k + 1; i__ <= i__3; ++i__) { + +/* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L ) */ + + i1 = (i__ - 1) * nb + 1; +/* Computing MIN */ + i__4 = i__ * nb; + i2 = f2cmin(i__4,*m) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__4 = i2 - i1; + i__5 = l2 - l1; + cnrm = zlange_("I", &i__4, &i__5, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = dlarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__4 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__4); + i__4 = nbb; + for (jj = 1; jj <= i__4; ++jj) { + i__5 = nba; + for (ll = 1; ll <= i__5; ++ll) { +/* Computing MIN */ + i__6 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b18, &i__6); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__4 = myexp_(&scaloc); + scamin /= pow_di(&c_b18, &i__4); + i__4 = myexp_(&scaloc); + scaloc /= pow_di(&c_b18, &i__4); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to to C( I, L ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = k2 - k1; + zdscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = i2 - i1; + zdscal_(&i__5, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__4 = i2 - i1; + i__5 = l2 - l1; + i__6 = k2 - k1; + z__1.r = -1., z__1.i = 0.; + zgemm_("C", "N", &i__4, &i__5, &i__6, &z__1, &a[k1 + i1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, + &c__[i1 + l1 * c_dim1], ldc) + ; + } + + i__3 = nbb; + for (j = l + 1; j <= i__3; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */ + + j1 = (j - 1) * nb + 1; +/* Computing MIN */ + i__4 = j * nb; + j2 = f2cmin(i__4,*n) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__4 = k2 - k1; + i__5 = j2 - j1; + cnrm = zlange_("I", &i__4, &i__5, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = dlarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__4 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__4); + i__4 = nbb; + for (jj = 1; jj <= i__4; ++jj) { + i__5 = nba; + for (ll = 1; ll <= i__5; ++ll) { +/* Computing MIN */ + i__6 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b18, &i__6); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__4 = myexp_(&scaloc); + scamin /= pow_di(&c_b18, &i__4); + i__4 = myexp_(&scaloc); + scaloc /= pow_di(&c_b18, &i__4); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__4 = l2 - 1; + for (ll = l1; ll <= i__4; ++ll) { + i__5 = k2 - k1; + zdscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.) { + i__4 = j2 - 1; + for (jj = j1; jj <= i__4; ++jj) { + i__5 = k2 - k1; + zdscal_(&i__5, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__4 = k2 - k1; + i__5 = j2 - j1; + i__6 = l2 - l1; + z__1.r = -csgn.r, z__1.i = -csgn.i; + zgemm_("N", "N", &i__4, &i__5, &i__6, &z__1, &c__[k1 + l1 + * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b1, + &c__[k1 + j1 * c_dim1], ldc) + ; + } + } + } + } else if (! notrna && ! notrnb) { + +/* Solve A**H *X + ISGN*X*B**H = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* top-right corner column by column by */ + +/* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L) */ + +/* Where */ +/* K-1 N */ +/* R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H]. */ +/* I=1 J=L+1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*m) + 1; + for (l = nbb; l >= 1; --l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__2 = l * nb; + l2 = f2cmin(i__2,*n) + 1; + + i__2 = k2 - k1; + i__3 = l2 - l1; + ztrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.) { + if (scaloc == 0.) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__2); + } + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] + / pow_di(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__2 = k2 - k1; + i__3 = l2 - l1; + xnrm = zlange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__2 = nba; + for (i__ = k + 1; i__ <= i__2; ++i__) { + +/* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L ) */ + + i1 = (i__ - 1) * nb + 1; +/* Computing MIN */ + i__3 = i__ * nb; + i2 = f2cmin(i__3,*m) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = i2 - i1; + i__4 = l2 - l1; + cnrm = zlange_("I", &i__3, &i__4, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = dlarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b18, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_di(&c_b18, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_di(&c_b18, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + zdscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = i2 - i1; + zdscal_(&i__4, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__3 = i2 - i1; + i__4 = l2 - l1; + i__5 = k2 - k1; + z__1.r = -1., z__1.i = 0.; + zgemm_("C", "N", &i__3, &i__4, &i__5, &z__1, &a[k1 + i1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, + &c__[i1 + l1 * c_dim1], ldc) + ; + } + + i__2 = l - 1; + for (j = 1; j <= i__2; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H */ + + j1 = (j - 1) * nb + 1; +/* Computing MIN */ + i__3 = j * nb; + j2 = f2cmin(i__3,*n) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__3 = k2 - k1; + i__4 = j2 - j1; + cnrm = zlange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = dlarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__3 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__3); + i__3 = nbb; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = nba; + for (ll = 1; ll <= i__4; ++ll) { +/* Computing MIN */ + i__5 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b18, &i__5); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__3 = myexp_(&scaloc); + scamin /= pow_di(&c_b18, &i__3); + i__3 = myexp_(&scaloc); + scaloc /= pow_di(&c_b18, &i__3); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + zdscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.) { + i__3 = j2 - 1; + for (jj = j1; jj <= i__3; ++jj) { + i__4 = k2 - k1; + zdscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__3 = k2 - k1; + i__4 = j2 - j1; + i__5 = l2 - l1; + z__1.r = -csgn.r, z__1.i = -csgn.i; + zgemm_("N", "C", &i__3, &i__4, &i__5, &z__1, &c__[k1 + l1 + * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b1, + &c__[k1 + j1 * c_dim1], ldc) + ; + } + } + } + } else if (notrna && ! notrnb) { + +/* Solve A*X + ISGN*X*B**H = scale*C. */ + +/* The (K,L)th block of X is determined starting from */ +/* bottom-right corner column by column by */ + +/* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L) */ + +/* Where */ +/* M N */ +/* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H]. */ +/* I=K+1 J=L+1 */ + +/* Start loop over block rows (index = K) and block columns (index = L) */ + + for (k = nba; k >= 1; --k) { + +/* K1: row index of the first row in X( K, L ) */ +/* K2: row index of the first row in X( K+1, L ) */ +/* so the K2 - K1 is the column count of the block X( K, L ) */ + + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__1 = k * nb; + k2 = f2cmin(i__1,*m) + 1; + for (l = nbb; l >= 1; --l) { + +/* L1: column index of the first column in X( K, L ) */ +/* L2: column index of the first column in X( K, L + 1) */ +/* so that L2 - L1 is the row count of the block X( K, L ) */ + + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__1 = l * nb; + l2 = f2cmin(i__1,*n) + 1; + + i__1 = k2 - k1; + i__2 = l2 - l1; + ztrsyl_(trana, tranb, isgn, &i__1, &i__2, &a[k1 + k1 * a_dim1] + , lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * + c_dim1], ldc, &scaloc, &iinfo); + *info = f2cmax(*info,iinfo); + + if (scaloc * swork[k + l * swork_dim1] == 0.) { + if (scaloc == 0.) { +/* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */ +/* is larger than the product of BIGNUM**2 and cannot be */ +/* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */ +/* Mark the computation as pointless. */ + buf = 0.; + } else { +/* Use second scaling factor to prevent flushing to zero. */ + i__1 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__1); + } + i__1 = nbb; + for (jj = 1; jj <= i__1; ++jj) { + i__2 = nba; + for (ll = 1; ll <= i__2; ++ll) { +/* Bound by BIGNUM to not introduce Inf. The value */ +/* is irrelevant; corresponding entries of the */ +/* solution will be flushed in consistency scaling. */ +/* Computing MIN */ + i__3 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] + / pow_di(&c_b18, &i__3); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + } + swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1] + ; + i__1 = k2 - k1; + i__2 = l2 - l1; + xnrm = zlange_("I", &i__1, &i__2, &c__[k1 + l1 * c_dim1], ldc, + wnrm); + + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */ + + i1 = (i__ - 1) * nb + 1; +/* Computing MIN */ + i__2 = i__ * nb; + i2 = f2cmin(i__2,*m) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = i2 - i1; + i__3 = l2 - l1; + cnrm = zlange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[i__ + l * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + anrm = swork[i__ + (awrk + k) * swork_dim1]; + scaloc = dlarmm_(&anrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_di(&c_b18, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_di(&c_b18, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( I, L ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = k2 - k1; + zdscal_(&i__3, &scal, &c__[k1 + ll * c_dim1], & + c__1); + } + } + + scal = scamin / swork[i__ + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (ll = l1; ll <= i__2; ++ll) { + i__3 = i2 - i1; + zdscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[i__ + l * swork_dim1] = scamin * scaloc; + + i__2 = i2 - i1; + i__3 = l2 - l1; + i__4 = k2 - k1; + z__1.r = -1., z__1.i = 0.; + zgemm_("N", "N", &i__2, &i__3, &i__4, &z__1, &a[i1 + k1 * + a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, + &c__[i1 + l1 * c_dim1], ldc) + ; + + } + + i__1 = l - 1; + for (j = 1; j <= i__1; ++j) { + +/* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H */ + + j1 = (j - 1) * nb + 1; +/* Computing MIN */ + i__2 = j * nb; + j2 = f2cmin(i__2,*n) + 1; + +/* Compute scaling factor to survive the linear update */ +/* simulating consistent scaling. */ + + i__2 = k2 - k1; + i__3 = j2 - j1; + cnrm = zlange_("I", &i__2, &i__3, &c__[k1 + j1 * c_dim1], + ldc, wnrm); +/* Computing MIN */ + d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * + swork_dim1]; + scamin = f2cmin(d__1,d__2); + cnrm *= scamin / swork[k + j * swork_dim1]; + xnrm *= scamin / swork[k + l * swork_dim1]; + bnrm = swork[l + (bwrk + j) * swork_dim1]; + scaloc = dlarmm_(&bnrm, &xnrm, &cnrm); + if (scaloc * scamin == 0.) { +/* Use second scaling factor to prevent flushing to zero. */ + i__2 = myexp_(&scaloc); + buf *= pow_di(&c_b18, &i__2); + i__2 = nbb; + for (jj = 1; jj <= i__2; ++jj) { + i__3 = nba; + for (ll = 1; ll <= i__3; ++ll) { +/* Computing MIN */ + i__4 = myexp_(&scaloc); + d__1 = bignum, d__2 = swork[ll + jj * + swork_dim1] / pow_di(&c_b18, &i__4); + swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2); + } + } + i__2 = myexp_(&scaloc); + scamin /= pow_di(&c_b18, &i__2); + i__2 = myexp_(&scaloc); + scaloc /= pow_di(&c_b18, &i__2); + } + cnrm *= scaloc; + xnrm *= scaloc; + +/* Simultaneously apply the robust update factor and the */ +/* consistency scaling factor to C( K, J ) and C( K, L). */ + + scal = scamin / swork[k + l * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = l2 - 1; + for (jj = l1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + zdscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + + scal = scamin / swork[k + j * swork_dim1] * scaloc; + if (scal != 1.) { + i__2 = j2 - 1; + for (jj = j1; jj <= i__2; ++jj) { + i__3 = k2 - k1; + zdscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], & + c__1); + } + } + +/* Record current scaling factor */ + + swork[k + l * swork_dim1] = scamin * scaloc; + swork[k + j * swork_dim1] = scamin * scaloc; + + i__2 = k2 - k1; + i__3 = j2 - j1; + i__4 = l2 - l1; + z__1.r = -csgn.r, z__1.i = -csgn.i; + zgemm_("N", "C", &i__2, &i__3, &i__4, &z__1, &c__[k1 + l1 + * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b1, + &c__[k1 + j1 * c_dim1], ldc) + ; + } + } + } + + } + + free(wnrm); + +/* Reduce local scaling factors */ + + *scale = swork[swork_dim1 + 1]; + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { +/* Computing MIN */ + d__1 = *scale, d__2 = swork[k + l * swork_dim1]; + *scale = f2cmin(d__1,d__2); + } + } + if (*scale == 0.) { + +/* The magnitude of the largest entry of the solution is larger */ +/* than the product of BIGNUM**2 and cannot be represented in the */ +/* form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to */ +/* zero and give up. */ + + swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba); + return 0; + } + +/* Realize consistent scaling */ + + i__1 = nba; + for (k = 1; k <= i__1; ++k) { + k1 = (k - 1) * nb + 1; +/* Computing MIN */ + i__2 = k * nb; + k2 = f2cmin(i__2,*m) + 1; + i__2 = nbb; + for (l = 1; l <= i__2; ++l) { + l1 = (l - 1) * nb + 1; +/* Computing MIN */ + i__3 = l * nb; + l2 = f2cmin(i__3,*n) + 1; + scal = *scale / swork[k + l * swork_dim1]; + if (scal != 1.) { + i__3 = l2 - 1; + for (ll = l1; ll <= i__3; ++ll) { + i__4 = k2 - k1; + zdscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &c__1); + } + } + } + } + + if (buf != 1. && buf > 0.) { + +/* Decrease SCALE as much as possible. */ + +/* Computing MIN */ + d__1 = *scale / smlnum, d__2 = 1. / buf; + scaloc = f2cmin(d__1,d__2); + buf *= scaloc; + *scale /= scaloc; + } + + if (buf != 1. && buf > 0.) { + +/* In case of overly aggressive scaling during the computation, */ +/* flushing of the global scale factor may be prevented by */ +/* undoing some of the scaling. This step is to ensure that */ +/* this routine flushes only scale factors that TRSYL also */ +/* flushes and be usable as a drop-in replacement. */ + +/* How much can the normwise largest entry be upscaled? */ + +/* Computing MAX */ + i__1 = c_dim1 + 1; + d__3 = (d__1 = c__[i__1].r, abs(d__1)), d__4 = (d__2 = d_imag(&c__[ + c_dim1 + 1]), abs(d__2)); + scal = f2cmax(d__3,d__4); + i__1 = *m; + for (k = 1; k <= i__1; ++k) { + i__2 = *n; + for (l = 1; l <= i__2; ++l) { +/* Computing MAX */ + i__3 = k + l * c_dim1; + d__3 = scal, d__4 = (d__1 = c__[i__3].r, abs(d__1)), d__3 = + f2cmax(d__3,d__4), d__4 = (d__2 = d_imag(&c__[k + l * + c_dim1]), abs(d__2)); + scal = f2cmax(d__3,d__4); + } + } + +/* Increase BUF as close to 1 as possible and apply scaling. */ + +/* Computing MIN */ + d__1 = bignum / scal, d__2 = 1. / buf; + scaloc = f2cmin(d__1,d__2); + buf *= scaloc; + zlascl_("G", &c_n1, &c_n1, &c_b106, &scaloc, m, n, &c__[c_offset], + ldc, &iinfo); + } + +/* Combine with buffer scaling factor. SCALE will be flushed if */ +/* BUF is less than one here. */ + + *scale *= buf; + +/* Restore workspace dimensions */ + + swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb); + swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba); + + return 0; + +/* End of ZTRSYL3 */ + +} /* ztrsyl3_ */ +