From f157d6d6718493215ae9ab915a9202a1018bbaf0 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 19 Nov 2022 22:50:57 +0100 Subject: [PATCH] Add C equivalents of ?GELST (for Reference-LAPACK PR739) --- lapack-netlib/SRC/cgelst.c | 1108 +++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/dgelst.c | 1104 +++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/sgelst.c | 1099 +++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/zgelst.c | 1115 ++++++++++++++++++++++++++++++++++++ 4 files changed, 4426 insertions(+) create mode 100644 lapack-netlib/SRC/cgelst.c create mode 100644 lapack-netlib/SRC/dgelst.c create mode 100644 lapack-netlib/SRC/sgelst.c create mode 100644 lapack-netlib/SRC/zgelst.c diff --git a/lapack-netlib/SRC/cgelst.c b/lapack-netlib/SRC/cgelst.c new file mode 100644 index 000000000..48ded643d --- /dev/null +++ b/lapack-netlib/SRC/cgelst.c @@ -0,0 +1,1108 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factori +zation with compact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGELST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGELST solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its conjugate-transpose, using a QR */ +/* > or LQ factorization of A with compact WY representation of Q. */ +/* > It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'C' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'C' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'C': the linear system involves A**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by CGEQRT; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by CGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'C'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > modulus of elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of the modulus of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, (MN + f2cmax( MN, NRHS ))*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complexGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2022, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int cgelst_(char *trans, integer *m, integer *n, integer * + nrhs, complex *a, integer *lda, complex *b, integer *ldb, complex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + real anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer nbmin; + real rwork[1]; + integer lwopt, nb; + extern /* Subroutine */ int slabad_(real *, real *); + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + integer mn; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cgelqt_(integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *); + integer scllen; + real bignum; + extern /* Subroutine */ int cgeqrt_(integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *); + integer mnnrhs; + real smlnum; + logical lquery; + extern /* Subroutine */ int ctrtrs_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *), cgemlqt_(char *, char *, integer *, + integer *, integer *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *), cgemqrt_(char *, char *, integer *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "C"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size and optimal workspace size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + nb = ilaenv_(&c__1, "CGELST", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + mnnrhs = f2cmax(mn,*nrhs); +/* Computing MAX */ + i__1 = 1, i__2 = (mn + mnnrhs) * nb; + lwopt = f2cmax(i__1,i__2); + r__1 = (real) lwopt; + work[1].r = r__1, work[1].i = 0.f; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGELST ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + claset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + r__1 = (real) lwopt; + work[1].r = r__1, work[1].i = 0.f; + return 0; + } + +/* *GEQRT and *GELQT routines cannot accept NB larger than f2cmin(M,N) */ + + if (nb > mn) { + nb = mn; + } + +/* Determine the block size from the supplied LWORK */ +/* ( at this stage we know that LWORK >= (minimum required workspace, */ +/* but it may be less than optimal) */ + +/* Computing MIN */ + i__1 = nb, i__2 = *lwork / (mn + mnnrhs); + nb = f2cmin(i__1,i__2); + +/* The minimum value of NB, when blocked code is used */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGELST", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + if (nb < nbmin) { + nb = 1; + } + +/* Get machine parameters */ + + smlnum = slamch_("S") / slamch_("P"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + claset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + r__1 = (real) lwopt; + work[1].r = r__1, work[1].i = 0.f; + return 0; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = clange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + clascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + clascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* M > N: */ +/* Compute the blocked QR factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least N, optimally N*NB. */ + + cgeqrt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M > N, A is not transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A * X - B ||. */ + +/* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + cgemqrt_("Left", "Conjugate transpose", m, nrhs, n, &nb, &a[ + a_offset], lda, &work[1], &nb, &b[b_offset], ldb, &work[ + mn * nb + 1], info); + +/* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + ctrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* M > N, A is transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A**T * X = B. */ + +/* Compute B := inv(R**T) * B in two row blocks of B. */ + +/* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + ctrtrs_("Upper", "Conjugate transpose", "Non-unit", n, nrhs, &a[ + a_offset], lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the N-th row in B: */ +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; + } + } + +/* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + cgemqrt_("Left", "No transpose", m, nrhs, n, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *m; + + } + + } else { + +/* M < N: */ +/* Compute the blocked LQ factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least M, optimally M*NB. */ + + cgelqt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M < N, A is not transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A * X = B. */ + +/* Compute B := inv(L) * B in two row blocks of B. */ + +/* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + ctrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the M-th row in B: */ +/* B(M+1:N,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; + } + } + +/* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + cgemlqt_("Left", "Conjugate transpose", n, nrhs, m, &nb, &a[ + a_offset], lda, &work[1], &nb, &b[b_offset], ldb, &work[ + mn * nb + 1], info); + + scllen = *n; + + } else { + +/* M < N, A is transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A**T * X - B ||. */ + +/* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + cgemlqt_("Left", "No transpose", n, nrhs, m, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + ctrtrs_("Lower", "Conjugate transpose", "Non-unit", m, nrhs, &a[ + a_offset], lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + clascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + clascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + clascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + clascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + + r__1 = (real) lwopt; + work[1].r = r__1, work[1].i = 0.f; + + return 0; + +/* End of CGELST */ + +} /* cgelst_ */ + diff --git a/lapack-netlib/SRC/dgelst.c b/lapack-netlib/SRC/dgelst.c new file mode 100644 index 000000000..9327da4dd --- /dev/null +++ b/lapack-netlib/SRC/dgelst.c @@ -0,0 +1,1104 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factori +zation with compact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELST solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */ +/* > factorization of A with compact WY representation of Q. */ +/* > It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'T': the linear system involves A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by DGEQRT; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by DGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'T'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, (MN + f2cmax( MN, NRHS ))*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2022, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int dgelst_(char *trans, integer *m, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + doublereal anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer nbmin; + doublereal rwork[1]; + integer lwopt; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer nb; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer mn; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer scllen; + doublereal bignum; + extern /* Subroutine */ int dgelqt_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dgeqrt_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *); + integer mnnrhs; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *), dgemlqt_(char *, char *, + integer *, integer *, integer *, integer *, doublereal *, integer + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dgemqrt_(char *, char *, integer *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size and optimal workspace size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + nb = ilaenv_(&c__1, "DGELST", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + mnnrhs = f2cmax(mn,*nrhs); +/* Computing MAX */ + i__1 = 1, i__2 = (mn + mnnrhs) * nb; + lwopt = f2cmax(i__1,i__2); + work[1] = (doublereal) lwopt; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELST ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + dlaset_("Full", &i__1, nrhs, &c_b12, &c_b12, &b[b_offset], ldb); + work[1] = (doublereal) lwopt; + return 0; + } + +/* *GEQRT and *GELQT routines cannot accept NB larger than f2cmin(M,N) */ + + if (nb > mn) { + nb = mn; + } + +/* Determine the block size from the supplied LWORK */ +/* ( at this stage we know that LWORK >= (minimum required workspace, */ +/* but it may be less than optimal) */ + +/* Computing MIN */ + i__1 = nb, i__2 = *lwork / (mn + mnnrhs); + nb = f2cmin(i__1,i__2); + +/* The minimum value of NB, when blocked code is used */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGELST", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + if (nb < nbmin) { + nb = 1; + } + +/* Get machine parameters */ + + smlnum = dlamch_("S") / dlamch_("P"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + dlaset_("Full", &i__1, nrhs, &c_b12, &c_b12, &b[b_offset], ldb); + work[1] = (doublereal) lwopt; + return 0; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* M > N: */ +/* Compute the blocked QR factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least N, optimally N*NB. */ + + dgeqrt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M > N, A is not transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A * X - B ||. */ + +/* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + dgemqrt_("Left", "Transpose", m, nrhs, n, &nb, &a[a_offset], lda, + &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* M > N, A is transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A**T * X = B. */ + +/* Compute B := inv(R**T) * B in two row blocks of B. */ + +/* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the N-th row in B: */ +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; + } + } + +/* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + dgemqrt_("Left", "No transpose", m, nrhs, n, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *m; + + } + + } else { + +/* M < N: */ +/* Compute the blocked LQ factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least M, optimally M*NB. */ + + dgelqt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M < N, A is not transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A * X = B. */ + +/* Compute B := inv(L) * B in two row blocks of B. */ + +/* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the M-th row in B: */ +/* B(M+1:N,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; + } + } + +/* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + dgemlqt_("Left", "Transpose", n, nrhs, m, &nb, &a[a_offset], lda, + &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *n; + + } else { + +/* M < N, A is transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A**T * X - B ||. */ + +/* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + dgemlqt_("Left", "No transpose", n, nrhs, m, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + + work[1] = (doublereal) lwopt; + + return 0; + +/* End of DGELST */ + +} /* dgelst_ */ + diff --git a/lapack-netlib/SRC/sgelst.c b/lapack-netlib/SRC/sgelst.c new file mode 100644 index 000000000..e0cd84cd9 --- /dev/null +++ b/lapack-netlib/SRC/sgelst.c @@ -0,0 +1,1099 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factori +zation with compact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGELST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* REAL A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGELST solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */ +/* > factorization of A with compact WY representation of Q. */ +/* > It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'T': the linear system involves A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by SGEQRT; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by SGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'T'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, (MN + f2cmax( MN, NRHS ))*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup realGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2022, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int sgelst_(char *trans, integer *m, integer *n, integer * + nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + real anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer nbmin; + real rwork[1]; + integer lwopt, nb; + extern /* Subroutine */ int slabad_(real *, real *); + integer mn; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer scllen; + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *), sgelqt_(integer *, integer *, integer + *, real *, integer *, real *, integer *, real *, integer *); + integer mnnrhs; + extern /* Subroutine */ int sgeqrt_(integer *, integer *, integer *, real + *, integer *, real *, integer *, real *, integer *); + real smlnum; + logical lquery; + extern /* Subroutine */ int strtrs_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *), sgemlqt_(char *, char *, integer *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), + sgemqrt_(char *, char *, integer *, integer *, integer *, integer + *, real *, integer *, real *, integer *, real *, integer *, real * + , integer *); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size and optimal workspace size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + nb = ilaenv_(&c__1, "SGELST", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + mnnrhs = f2cmax(mn,*nrhs); +/* Computing MAX */ + i__1 = 1, i__2 = (mn + mnnrhs) * nb; + lwopt = f2cmax(i__1,i__2); + work[1] = (real) lwopt; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELST ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + slaset_("Full", &i__1, nrhs, &c_b12, &c_b12, &b[b_offset], ldb); + work[1] = (real) lwopt; + return 0; + } + +/* *GEQRT and *GELQT routines cannot accept NB larger than f2cmin(M,N) */ + + if (nb > mn) { + nb = mn; + } + +/* Determine the block size from the supplied LWORK */ +/* ( at this stage we know that LWORK >= (minimum required workspace, */ +/* but it may be less than optimal) */ + +/* Computing MIN */ + i__1 = nb, i__2 = *lwork / (mn + mnnrhs); + nb = f2cmin(i__1,i__2); + +/* The minimum value of NB, when blocked code is used */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGELST", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + if (nb < nbmin) { + nb = 1; + } + +/* Get machine parameters */ + + smlnum = slamch_("S") / slamch_("P"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + slaset_("Full", &i__1, nrhs, &c_b12, &c_b12, &b[b_offset], ldb); + work[1] = (real) lwopt; + return 0; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = slange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* M > N: */ +/* Compute the blocked QR factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least N, optimally N*NB. */ + + sgeqrt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M > N, A is not transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A * X - B ||. */ + +/* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + sgemqrt_("Left", "Transpose", m, nrhs, n, &nb, &a[a_offset], lda, + &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + strtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* M > N, A is transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A**T * X = B. */ + +/* Compute B := inv(R**T) * B in two row blocks of B. */ + +/* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + strtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the N-th row in B: */ +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; + } + } + +/* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + sgemqrt_("Left", "No transpose", m, nrhs, n, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *m; + + } + + } else { + +/* M < N: */ +/* Compute the blocked LQ factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least M, optimally M*NB. */ + + sgelqt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M < N, A is not transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A * X = B. */ + +/* Compute B := inv(L) * B in two row blocks of B. */ + +/* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + strtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the M-th row in B: */ +/* B(M+1:N,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; + } + } + +/* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + sgemlqt_("Left", "Transpose", n, nrhs, m, &nb, &a[a_offset], lda, + &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *n; + + } else { + +/* M < N, A is transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A**T * X - B ||. */ + +/* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + sgemlqt_("Left", "No transpose", n, nrhs, m, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + strtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + slascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + slascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + slascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + slascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + + work[1] = (real) lwopt; + + return 0; + +/* End of SGELST */ + +} /* sgelst_ */ + diff --git a/lapack-netlib/SRC/zgelst.c b/lapack-netlib/SRC/zgelst.c new file mode 100644 index 000000000..447cd30bb --- /dev/null +++ b/lapack-netlib/SRC/zgelst.c @@ -0,0 +1,1115 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factori +zation with compact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGELST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGELST solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its conjugate-transpose, using a QR */ +/* > or LQ factorization of A with compact WY representation of Q. */ +/* > It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'C' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'C' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'C': the linear system involves A**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by ZGEQRT; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by ZGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'C'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > modulus of elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of the modulus of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, (MN + f2cmax( MN, NRHS ))*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16GEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2022, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zgelst_(char *trans, integer *m, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + doublereal anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer nbmin; + doublereal rwork[1]; + integer lwopt; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer nb; + extern doublereal dlamch_(char *); + integer mn; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer scllen; + doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *), zlaset_(char *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *); + integer mnnrhs; + extern /* Subroutine */ int zgelqt_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + doublereal smlnum; + extern /* Subroutine */ int zgeqrt_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + logical lquery; + extern /* Subroutine */ int ztrtrs_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + integer *), zgemlqt_(char *, char *, + integer *, integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zgemqrt_(char *, + char *, integer *, integer *, integer *, integer *, doublecomplex + *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "C"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size and optimal workspace size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + nb = ilaenv_(&c__1, "ZGELST", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + mnnrhs = f2cmax(mn,*nrhs); +/* Computing MAX */ + i__1 = 1, i__2 = (mn + mnnrhs) * nb; + lwopt = f2cmax(i__1,i__2); + d__1 = (doublereal) lwopt; + work[1].r = d__1, work[1].i = 0.; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELST ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + zlaset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + d__1 = (doublereal) lwopt; + work[1].r = d__1, work[1].i = 0.; + return 0; + } + +/* *GEQRT and *GELQT routines cannot accept NB larger than f2cmin(M,N) */ + + if (nb > mn) { + nb = mn; + } + +/* Determine the block size from the supplied LWORK */ +/* ( at this stage we know that LWORK >= (minimum required workspace, */ +/* but it may be less than optimal) */ + +/* Computing MIN */ + i__1 = nb, i__2 = *lwork / (mn + mnnrhs); + nb = f2cmin(i__1,i__2); + +/* The minimum value of NB, when blocked code is used */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGELST", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + if (nb < nbmin) { + nb = 1; + } + +/* Get machine parameters */ + + smlnum = dlamch_("S") / dlamch_("P"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + zlaset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + d__1 = (doublereal) lwopt; + work[1].r = d__1, work[1].i = 0.; + return 0; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = zlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + zlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + zlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* M > N: */ +/* Compute the blocked QR factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least N, optimally N*NB. */ + + zgeqrt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M > N, A is not transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A * X - B ||. */ + +/* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + zgemqrt_("Left", "Conjugate transpose", m, nrhs, n, &nb, &a[ + a_offset], lda, &work[1], &nb, &b[b_offset], ldb, &work[ + mn * nb + 1], info); + +/* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + ztrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* M > N, A is transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A**T * X = B. */ + +/* Compute B := inv(R**T) * B in two row blocks of B. */ + +/* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + ztrtrs_("Upper", "Conjugate transpose", "Non-unit", n, nrhs, &a[ + a_offset], lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the N-th row in B: */ +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; + } + } + +/* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + zgemqrt_("Left", "No transpose", m, nrhs, n, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *m; + + } + + } else { + +/* M < N: */ +/* Compute the blocked LQ factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least M, optimally M*NB. */ + + zgelqt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M < N, A is not transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A * X = B. */ + +/* Compute B := inv(L) * B in two row blocks of B. */ + +/* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + ztrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the M-th row in B: */ +/* B(M+1:N,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; + } + } + +/* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + zgemlqt_("Left", "Conjugate transpose", n, nrhs, m, &nb, &a[ + a_offset], lda, &work[1], &nb, &b[b_offset], ldb, &work[ + mn * nb + 1], info); + + scllen = *n; + + } else { + +/* M < N, A is transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A**T * X - B ||. */ + +/* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + zgemlqt_("Left", "No transpose", n, nrhs, m, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + ztrtrs_("Lower", "Conjugate transpose", "Non-unit", m, nrhs, &a[ + a_offset], lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + zlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + zlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + zlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + + d__1 = (doublereal) lwopt; + work[1].r = d__1, work[1].i = 0.; + + return 0; + +/* End of ZGELST */ + +} /* zgelst_ */ +