diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index ca3a1e184..17ff8d0a0 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -124,7 +124,7 @@ set(SLASRC ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f sgesvdq.f slaorhr_col_getrfnp.f slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f - slarmm.f slatrs3.f strsyl3.f) + slarmm.f slatrs3.f strsyl3.f sgelst.f) set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f @@ -223,7 +223,7 @@ set(CLASRC chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f cungtsqr.f cungtsqr_row.f cunhr_col.f - clatrs3.f ctrsyl3.f ) + clatrs3.f ctrsyl3.f cgelst.f) set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f @@ -316,7 +316,7 @@ set(DLASRC dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f - dlarmm.f dlatrs3.f dtrsyl3.f) + dlarmm.f dlatrs3.f dtrsyl3.f dgelst.f) set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f @@ -419,7 +419,7 @@ set(ZLASRC zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f zungtsqr.f zungtsqr_row.f zunhr_col.f - zlatrs3.f ztrsyl3.f) + zlatrs3.f ztrsyl3.f zgelst.f) set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f @@ -622,7 +622,7 @@ set(SLASRC ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c sgesvdq.c slaorhr_col_getrfnp.c slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c - slarmm.c slatrs3.c strsyl3.c) + slarmm.c slatrs3.c strsyl3.c sgelst.c) set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c @@ -720,7 +720,7 @@ set(CLASRC chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c cungtsqr.c cungtsqr_row.c cunhr_col.c - clatrs3.c ctrsyl3.c) + clatrs3.c ctrsyl3.c cgelst.c) set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c @@ -812,7 +812,7 @@ set(DLASRC dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c - dlarmm.c dlatrs3.c dtrsyl3.c) + dlarmm.c dlatrs3.c dtrsyl3.c dgelst.c) set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c @@ -913,7 +913,7 @@ set(ZLASRC zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c - zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c) + zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c zgelst.c) set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 49eb69cfe..49798b0c5 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -207,7 +207,7 @@ SLASRC_O = \ ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \ - sgesvdq.o slarmm.o slatrs3.o strsyl3.o + sgesvdq.o slarmm.o slatrs3.o strsyl3.o sgelst.o endif @@ -316,7 +316,7 @@ CLASRC_O = \ chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \ cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \ - cgesvdq.o clatrs3.o ctrsyl3.o + cgesvdq.o clatrs3.o ctrsyl3.o cgelst.o endif ifdef USEXBLAS @@ -417,7 +417,7 @@ DLASRC_O = \ dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \ - dgesvdq.o dlarmm.o dlatrs3.o dtrsyl3.o + dgesvdq.o dlarmm.o dlatrs3.o dtrsyl3.o dgelst.o endif ifdef USEXBLAS @@ -526,7 +526,7 @@ ZLASRC_O = \ zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \ zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \ - zgesvdq.o zlatrs3.o ztrsyl3.o + zgesvdq.o zlatrs3.o ztrsyl3.o zgelst.o endif ifdef USEXBLAS diff --git a/lapack-netlib/SRC/cgelss.f b/lapack-netlib/SRC/cgelss.f index 04defbb2e..da6b9092f 100644 --- a/lapack-netlib/SRC/cgelss.f +++ b/lapack-netlib/SRC/cgelss.f @@ -266,11 +266,11 @@ * * Compute space needed for CGEQRF CALL CGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO ) - LWORK_CGEQRF = REAL( DUM(1) ) + LWORK_CGEQRF = INT( DUM(1) ) * Compute space needed for CUNMQR CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, DUM(1), B, $ LDB, DUM(1), -1, INFO ) - LWORK_CUNMQR = REAL( DUM(1) ) + LWORK_CUNMQR = INT( DUM(1) ) MM = N MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'CGEQRF', ' ', M, $ N, -1, -1 ) ) @@ -284,15 +284,15 @@ * Compute space needed for CGEBRD CALL CGEBRD( MM, N, A, LDA, S, S, DUM(1), DUM(1), DUM(1), $ -1, INFO ) - LWORK_CGEBRD = REAL( DUM(1) ) + LWORK_CGEBRD = INT( DUM(1) ) * Compute space needed for CUNMBR CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, DUM(1), $ B, LDB, DUM(1), -1, INFO ) - LWORK_CUNMBR = REAL( DUM(1) ) + LWORK_CUNMBR = INT( DUM(1) ) * Compute space needed for CUNGBR CALL CUNGBR( 'P', N, N, N, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_CUNGBR = REAL( DUM(1) ) + LWORK_CUNGBR = INT( DUM(1) ) * Compute total workspace needed MAXWRK = MAX( MAXWRK, 2*N + LWORK_CGEBRD ) MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNMBR ) @@ -310,23 +310,23 @@ * Compute space needed for CGELQF CALL CGELQF( M, N, A, LDA, DUM(1), DUM(1), $ -1, INFO ) - LWORK_CGELQF = REAL( DUM(1) ) + LWORK_CGELQF = INT( DUM(1) ) * Compute space needed for CGEBRD CALL CGEBRD( M, M, A, LDA, S, S, DUM(1), DUM(1), $ DUM(1), -1, INFO ) - LWORK_CGEBRD = REAL( DUM(1) ) + LWORK_CGEBRD = INT( DUM(1) ) * Compute space needed for CUNMBR CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, $ DUM(1), B, LDB, DUM(1), -1, INFO ) - LWORK_CUNMBR = REAL( DUM(1) ) + LWORK_CUNMBR = INT( DUM(1) ) * Compute space needed for CUNGBR CALL CUNGBR( 'P', M, M, M, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_CUNGBR = REAL( DUM(1) ) + LWORK_CUNGBR = INT( DUM(1) ) * Compute space needed for CUNMLQ CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, DUM(1), $ B, LDB, DUM(1), -1, INFO ) - LWORK_CUNMLQ = REAL( DUM(1) ) + LWORK_CUNMLQ = INT( DUM(1) ) * Compute total workspace needed MAXWRK = M + LWORK_CGELQF MAXWRK = MAX( MAXWRK, 3*M + M*M + LWORK_CGEBRD ) @@ -345,15 +345,15 @@ * Compute space needed for CGEBRD CALL CGEBRD( M, N, A, LDA, S, S, DUM(1), DUM(1), $ DUM(1), -1, INFO ) - LWORK_CGEBRD = REAL( DUM(1) ) + LWORK_CGEBRD = INT( DUM(1) ) * Compute space needed for CUNMBR CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, A, LDA, $ DUM(1), B, LDB, DUM(1), -1, INFO ) - LWORK_CUNMBR = REAL( DUM(1) ) + LWORK_CUNMBR = INT( DUM(1) ) * Compute space needed for CUNGBR CALL CUNGBR( 'P', M, N, M, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_CUNGBR = REAL( DUM(1) ) + LWORK_CUNGBR = INT( DUM(1) ) MAXWRK = 2*M + LWORK_CGEBRD MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNMBR ) MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNGBR ) diff --git a/lapack-netlib/SRC/cgelst.c b/lapack-netlib/SRC/cgelst.c new file mode 100644 index 000000000..48ded643d --- /dev/null +++ b/lapack-netlib/SRC/cgelst.c @@ -0,0 +1,1108 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factori +zation with compact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGELST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGELST solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its conjugate-transpose, using a QR */ +/* > or LQ factorization of A with compact WY representation of Q. */ +/* > It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'C' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'C' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'C': the linear system involves A**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by CGEQRT; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by CGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'C'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > modulus of elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of the modulus of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, (MN + f2cmax( MN, NRHS ))*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complexGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2022, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int cgelst_(char *trans, integer *m, integer *n, integer * + nrhs, complex *a, integer *lda, complex *b, integer *ldb, complex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + real anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer nbmin; + real rwork[1]; + integer lwopt, nb; + extern /* Subroutine */ int slabad_(real *, real *); + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + integer mn; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cgelqt_(integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *); + integer scllen; + real bignum; + extern /* Subroutine */ int cgeqrt_(integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *); + integer mnnrhs; + real smlnum; + logical lquery; + extern /* Subroutine */ int ctrtrs_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *), cgemlqt_(char *, char *, integer *, + integer *, integer *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *), cgemqrt_(char *, char *, integer *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "C"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size and optimal workspace size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + nb = ilaenv_(&c__1, "CGELST", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + mnnrhs = f2cmax(mn,*nrhs); +/* Computing MAX */ + i__1 = 1, i__2 = (mn + mnnrhs) * nb; + lwopt = f2cmax(i__1,i__2); + r__1 = (real) lwopt; + work[1].r = r__1, work[1].i = 0.f; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGELST ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + claset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + r__1 = (real) lwopt; + work[1].r = r__1, work[1].i = 0.f; + return 0; + } + +/* *GEQRT and *GELQT routines cannot accept NB larger than f2cmin(M,N) */ + + if (nb > mn) { + nb = mn; + } + +/* Determine the block size from the supplied LWORK */ +/* ( at this stage we know that LWORK >= (minimum required workspace, */ +/* but it may be less than optimal) */ + +/* Computing MIN */ + i__1 = nb, i__2 = *lwork / (mn + mnnrhs); + nb = f2cmin(i__1,i__2); + +/* The minimum value of NB, when blocked code is used */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGELST", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + if (nb < nbmin) { + nb = 1; + } + +/* Get machine parameters */ + + smlnum = slamch_("S") / slamch_("P"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + claset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + r__1 = (real) lwopt; + work[1].r = r__1, work[1].i = 0.f; + return 0; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = clange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + clascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + clascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* M > N: */ +/* Compute the blocked QR factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least N, optimally N*NB. */ + + cgeqrt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M > N, A is not transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A * X - B ||. */ + +/* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + cgemqrt_("Left", "Conjugate transpose", m, nrhs, n, &nb, &a[ + a_offset], lda, &work[1], &nb, &b[b_offset], ldb, &work[ + mn * nb + 1], info); + +/* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + ctrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* M > N, A is transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A**T * X = B. */ + +/* Compute B := inv(R**T) * B in two row blocks of B. */ + +/* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + ctrtrs_("Upper", "Conjugate transpose", "Non-unit", n, nrhs, &a[ + a_offset], lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the N-th row in B: */ +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; + } + } + +/* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + cgemqrt_("Left", "No transpose", m, nrhs, n, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *m; + + } + + } else { + +/* M < N: */ +/* Compute the blocked LQ factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least M, optimally M*NB. */ + + cgelqt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M < N, A is not transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A * X = B. */ + +/* Compute B := inv(L) * B in two row blocks of B. */ + +/* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + ctrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the M-th row in B: */ +/* B(M+1:N,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; + } + } + +/* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + cgemlqt_("Left", "Conjugate transpose", n, nrhs, m, &nb, &a[ + a_offset], lda, &work[1], &nb, &b[b_offset], ldb, &work[ + mn * nb + 1], info); + + scllen = *n; + + } else { + +/* M < N, A is transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A**T * X - B ||. */ + +/* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + cgemlqt_("Left", "No transpose", n, nrhs, m, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + ctrtrs_("Lower", "Conjugate transpose", "Non-unit", m, nrhs, &a[ + a_offset], lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + clascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + clascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + clascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + clascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + + r__1 = (real) lwopt; + work[1].r = r__1, work[1].i = 0.f; + + return 0; + +/* End of CGELST */ + +} /* cgelst_ */ + diff --git a/lapack-netlib/SRC/cgelst.f b/lapack-netlib/SRC/cgelst.f new file mode 100644 index 000000000..7d8e44ddf --- /dev/null +++ b/lapack-netlib/SRC/cgelst.f @@ -0,0 +1,533 @@ +*> \brief CGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its conjugate-transpose, using a QR +*> or LQ factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by CGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by CGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> modulus of elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of the modulus of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complexGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, CLANGE +* .. +* .. External Subroutines .. + EXTERNAL CGELQT, CGEQRT, CGEMLQT, CGEMQRT, SLABAD, + $ CLASCL, CLASET, CTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'CGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = REAL( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'CGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL CGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMQRT( 'Left', 'Conjugate transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL CGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMLQT( 'Left', 'Conjugate transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL CGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ M, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = REAL( LWOPT ) +* + RETURN +* +* End of CGELST +* + END diff --git a/lapack-netlib/SRC/dgelst.c b/lapack-netlib/SRC/dgelst.c new file mode 100644 index 000000000..9327da4dd --- /dev/null +++ b/lapack-netlib/SRC/dgelst.c @@ -0,0 +1,1104 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factori +zation with compact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELST solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */ +/* > factorization of A with compact WY representation of Q. */ +/* > It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'T': the linear system involves A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by DGEQRT; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by DGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'T'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, (MN + f2cmax( MN, NRHS ))*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2022, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int dgelst_(char *trans, integer *m, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + doublereal anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer nbmin; + doublereal rwork[1]; + integer lwopt; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer nb; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer mn; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer scllen; + doublereal bignum; + extern /* Subroutine */ int dgelqt_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dgeqrt_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *); + integer mnnrhs; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *), dgemlqt_(char *, char *, + integer *, integer *, integer *, integer *, doublereal *, integer + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dgemqrt_(char *, char *, integer *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size and optimal workspace size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + nb = ilaenv_(&c__1, "DGELST", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + mnnrhs = f2cmax(mn,*nrhs); +/* Computing MAX */ + i__1 = 1, i__2 = (mn + mnnrhs) * nb; + lwopt = f2cmax(i__1,i__2); + work[1] = (doublereal) lwopt; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELST ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + dlaset_("Full", &i__1, nrhs, &c_b12, &c_b12, &b[b_offset], ldb); + work[1] = (doublereal) lwopt; + return 0; + } + +/* *GEQRT and *GELQT routines cannot accept NB larger than f2cmin(M,N) */ + + if (nb > mn) { + nb = mn; + } + +/* Determine the block size from the supplied LWORK */ +/* ( at this stage we know that LWORK >= (minimum required workspace, */ +/* but it may be less than optimal) */ + +/* Computing MIN */ + i__1 = nb, i__2 = *lwork / (mn + mnnrhs); + nb = f2cmin(i__1,i__2); + +/* The minimum value of NB, when blocked code is used */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGELST", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + if (nb < nbmin) { + nb = 1; + } + +/* Get machine parameters */ + + smlnum = dlamch_("S") / dlamch_("P"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + dlaset_("Full", &i__1, nrhs, &c_b12, &c_b12, &b[b_offset], ldb); + work[1] = (doublereal) lwopt; + return 0; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* M > N: */ +/* Compute the blocked QR factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least N, optimally N*NB. */ + + dgeqrt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M > N, A is not transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A * X - B ||. */ + +/* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + dgemqrt_("Left", "Transpose", m, nrhs, n, &nb, &a[a_offset], lda, + &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* M > N, A is transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A**T * X = B. */ + +/* Compute B := inv(R**T) * B in two row blocks of B. */ + +/* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the N-th row in B: */ +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; + } + } + +/* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + dgemqrt_("Left", "No transpose", m, nrhs, n, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *m; + + } + + } else { + +/* M < N: */ +/* Compute the blocked LQ factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least M, optimally M*NB. */ + + dgelqt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M < N, A is not transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A * X = B. */ + +/* Compute B := inv(L) * B in two row blocks of B. */ + +/* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the M-th row in B: */ +/* B(M+1:N,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; + } + } + +/* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + dgemlqt_("Left", "Transpose", n, nrhs, m, &nb, &a[a_offset], lda, + &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *n; + + } else { + +/* M < N, A is transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A**T * X - B ||. */ + +/* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + dgemlqt_("Left", "No transpose", n, nrhs, m, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + + work[1] = (doublereal) lwopt; + + return 0; + +/* End of DGELST */ + +} /* dgelst_ */ + diff --git a/lapack-netlib/SRC/dgelst.f b/lapack-netlib/SRC/dgelst.f new file mode 100644 index 000000000..ca0e04a9b --- /dev/null +++ b/lapack-netlib/SRC/dgelst.f @@ -0,0 +1,531 @@ +*> \brief DGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its transpose, using a QR or LQ +*> factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by DGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by DGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DGELQT, DGEQRT, DGEMLQT, DGEMQRT, DLABAD, + $ DLASCL, DLASET, DTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'DGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = DBLE( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'DGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = DLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL DGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMQRT( 'Left', 'Transpose', M, NRHS, N, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, WORK( MN*NB+1 ), + $ INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL DGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMLQT( 'Left', 'Transpose', N, NRHS, M, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL DGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = DBLE( LWOPT ) +* + RETURN +* +* End of DGELST +* + END diff --git a/lapack-netlib/SRC/sgelss.f b/lapack-netlib/SRC/sgelss.f index be9e2ea11..9aed4329f 100644 --- a/lapack-netlib/SRC/sgelss.f +++ b/lapack-netlib/SRC/sgelss.f @@ -253,11 +253,11 @@ * * Compute space needed for SGEQRF CALL SGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO ) - LWORK_SGEQRF=DUM(1) + LWORK_SGEQRF = INT( DUM(1) ) * Compute space needed for SORMQR CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B, $ LDB, DUM(1), -1, INFO ) - LWORK_SORMQR=DUM(1) + LWORK_SORMQR = INT( DUM(1) ) MM = N MAXWRK = MAX( MAXWRK, N + LWORK_SGEQRF ) MAXWRK = MAX( MAXWRK, N + LWORK_SORMQR ) @@ -272,15 +272,15 @@ * Compute space needed for SGEBRD CALL SGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1), $ DUM(1), DUM(1), -1, INFO ) - LWORK_SGEBRD=DUM(1) + LWORK_SGEBRD = INT( DUM(1) ) * Compute space needed for SORMBR CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1), $ B, LDB, DUM(1), -1, INFO ) - LWORK_SORMBR=DUM(1) + LWORK_SORMBR = INT( DUM(1) ) * Compute space needed for SORGBR CALL SORGBR( 'P', N, N, N, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_SORGBR=DUM(1) + LWORK_SORGBR = INT( DUM(1) ) * Compute total workspace needed MAXWRK = MAX( MAXWRK, 3*N + LWORK_SGEBRD ) MAXWRK = MAX( MAXWRK, 3*N + LWORK_SORMBR ) @@ -304,19 +304,19 @@ * Compute space needed for SGEBRD CALL SGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), $ DUM(1), DUM(1), -1, INFO ) - LWORK_SGEBRD=DUM(1) + LWORK_SGEBRD = INT( DUM(1) ) * Compute space needed for SORMBR CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, $ DUM(1), B, LDB, DUM(1), -1, INFO ) - LWORK_SORMBR=DUM(1) + LWORK_SORMBR = INT( DUM(1) ) * Compute space needed for SORGBR CALL SORGBR( 'P', M, M, M, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_SORGBR=DUM(1) + LWORK_SORGBR = INT( DUM(1) ) * Compute space needed for SORMLQ CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1), $ B, LDB, DUM(1), -1, INFO ) - LWORK_SORMLQ=DUM(1) + LWORK_SORMLQ = INT( DUM(1) ) * Compute total workspace needed MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, $ -1 ) @@ -337,15 +337,15 @@ * Compute space needed for SGEBRD CALL SGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), $ DUM(1), DUM(1), -1, INFO ) - LWORK_SGEBRD=DUM(1) + LWORK_SGEBRD = INT( DUM(1) ) * Compute space needed for SORMBR CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA, $ DUM(1), B, LDB, DUM(1), -1, INFO ) - LWORK_SORMBR=DUM(1) + LWORK_SORMBR = INT( DUM(1) ) * Compute space needed for SORGBR CALL SORGBR( 'P', M, N, M, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_SORGBR=DUM(1) + LWORK_SORGBR = INT( DUM(1) ) MAXWRK = 3*M + LWORK_SGEBRD MAXWRK = MAX( MAXWRK, 3*M + LWORK_SORMBR ) MAXWRK = MAX( MAXWRK, 3*M + LWORK_SORGBR ) diff --git a/lapack-netlib/SRC/sgelst.c b/lapack-netlib/SRC/sgelst.c new file mode 100644 index 000000000..e0cd84cd9 --- /dev/null +++ b/lapack-netlib/SRC/sgelst.c @@ -0,0 +1,1099 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factori +zation with compact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGELST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* REAL A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGELST solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */ +/* > factorization of A with compact WY representation of Q. */ +/* > It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'T': the linear system involves A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by SGEQRT; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by SGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'T'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, (MN + f2cmax( MN, NRHS ))*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup realGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2022, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int sgelst_(char *trans, integer *m, integer *n, integer * + nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + real anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer nbmin; + real rwork[1]; + integer lwopt, nb; + extern /* Subroutine */ int slabad_(real *, real *); + integer mn; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer scllen; + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *), sgelqt_(integer *, integer *, integer + *, real *, integer *, real *, integer *, real *, integer *); + integer mnnrhs; + extern /* Subroutine */ int sgeqrt_(integer *, integer *, integer *, real + *, integer *, real *, integer *, real *, integer *); + real smlnum; + logical lquery; + extern /* Subroutine */ int strtrs_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *), sgemlqt_(char *, char *, integer *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), + sgemqrt_(char *, char *, integer *, integer *, integer *, integer + *, real *, integer *, real *, integer *, real *, integer *, real * + , integer *); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size and optimal workspace size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + nb = ilaenv_(&c__1, "SGELST", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + mnnrhs = f2cmax(mn,*nrhs); +/* Computing MAX */ + i__1 = 1, i__2 = (mn + mnnrhs) * nb; + lwopt = f2cmax(i__1,i__2); + work[1] = (real) lwopt; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELST ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + slaset_("Full", &i__1, nrhs, &c_b12, &c_b12, &b[b_offset], ldb); + work[1] = (real) lwopt; + return 0; + } + +/* *GEQRT and *GELQT routines cannot accept NB larger than f2cmin(M,N) */ + + if (nb > mn) { + nb = mn; + } + +/* Determine the block size from the supplied LWORK */ +/* ( at this stage we know that LWORK >= (minimum required workspace, */ +/* but it may be less than optimal) */ + +/* Computing MIN */ + i__1 = nb, i__2 = *lwork / (mn + mnnrhs); + nb = f2cmin(i__1,i__2); + +/* The minimum value of NB, when blocked code is used */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGELST", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + if (nb < nbmin) { + nb = 1; + } + +/* Get machine parameters */ + + smlnum = slamch_("S") / slamch_("P"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + slaset_("Full", &i__1, nrhs, &c_b12, &c_b12, &b[b_offset], ldb); + work[1] = (real) lwopt; + return 0; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = slange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* M > N: */ +/* Compute the blocked QR factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least N, optimally N*NB. */ + + sgeqrt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M > N, A is not transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A * X - B ||. */ + +/* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + sgemqrt_("Left", "Transpose", m, nrhs, n, &nb, &a[a_offset], lda, + &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + strtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* M > N, A is transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A**T * X = B. */ + +/* Compute B := inv(R**T) * B in two row blocks of B. */ + +/* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + strtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the N-th row in B: */ +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; + } + } + +/* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + sgemqrt_("Left", "No transpose", m, nrhs, n, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *m; + + } + + } else { + +/* M < N: */ +/* Compute the blocked LQ factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least M, optimally M*NB. */ + + sgelqt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M < N, A is not transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A * X = B. */ + +/* Compute B := inv(L) * B in two row blocks of B. */ + +/* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + strtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the M-th row in B: */ +/* B(M+1:N,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; + } + } + +/* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + sgemlqt_("Left", "Transpose", n, nrhs, m, &nb, &a[a_offset], lda, + &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *n; + + } else { + +/* M < N, A is transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A**T * X - B ||. */ + +/* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + sgemlqt_("Left", "No transpose", n, nrhs, m, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + strtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + slascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + slascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + slascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + slascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + + work[1] = (real) lwopt; + + return 0; + +/* End of SGELST */ + +} /* sgelst_ */ + diff --git a/lapack-netlib/SRC/sgelst.f b/lapack-netlib/SRC/sgelst.f new file mode 100644 index 000000000..5377bc720 --- /dev/null +++ b/lapack-netlib/SRC/sgelst.f @@ -0,0 +1,531 @@ +*> \brief SGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its transpose, using a QR or LQ +*> factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by SGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by SGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup realGEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SGELQT, SGEQRT, SGEMLQT, SGEMQRT, SLABAD, + $ SLASCL, SLASET, STRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'SGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = REAL( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'SGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + WORK( 1 ) = REAL( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = SLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL SGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMQRT( 'Left', 'Transpose', M, NRHS, N, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, WORK( MN*NB+1 ), + $ INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL STRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL STRTRS( 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL SGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMLQT( 'Left', 'Transpose', N, NRHS, M, NB, A, LDA, + $ WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL SGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = REAL( LWOPT ) +* + RETURN +* +* End of SGELST +* + END diff --git a/lapack-netlib/SRC/zgelss.f b/lapack-netlib/SRC/zgelss.f index e4aba6497..be53ba95b 100644 --- a/lapack-netlib/SRC/zgelss.f +++ b/lapack-netlib/SRC/zgelss.f @@ -266,11 +266,11 @@ * * Compute space needed for ZGEQRF CALL ZGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO ) - LWORK_ZGEQRF = DBLE( DUM(1) ) + LWORK_ZGEQRF = INT( DUM(1) ) * Compute space needed for ZUNMQR CALL ZUNMQR( 'L', 'C', M, NRHS, N, A, LDA, DUM(1), B, $ LDB, DUM(1), -1, INFO ) - LWORK_ZUNMQR = DBLE( DUM(1) ) + LWORK_ZUNMQR = INT( DUM(1) ) MM = N MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'ZGEQRF', ' ', M, $ N, -1, -1 ) ) @@ -284,15 +284,15 @@ * Compute space needed for ZGEBRD CALL ZGEBRD( MM, N, A, LDA, S, S, DUM(1), DUM(1), DUM(1), $ -1, INFO ) - LWORK_ZGEBRD = DBLE( DUM(1) ) + LWORK_ZGEBRD = INT( DUM(1) ) * Compute space needed for ZUNMBR CALL ZUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, DUM(1), $ B, LDB, DUM(1), -1, INFO ) - LWORK_ZUNMBR = DBLE( DUM(1) ) + LWORK_ZUNMBR = INT( DUM(1) ) * Compute space needed for ZUNGBR CALL ZUNGBR( 'P', N, N, N, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_ZUNGBR = DBLE( DUM(1) ) + LWORK_ZUNGBR = INT( DUM(1) ) * Compute total workspace needed MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZGEBRD ) MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNMBR ) @@ -310,23 +310,23 @@ * Compute space needed for ZGELQF CALL ZGELQF( M, N, A, LDA, DUM(1), DUM(1), $ -1, INFO ) - LWORK_ZGELQF = DBLE( DUM(1) ) + LWORK_ZGELQF = INT( DUM(1) ) * Compute space needed for ZGEBRD CALL ZGEBRD( M, M, A, LDA, S, S, DUM(1), DUM(1), $ DUM(1), -1, INFO ) - LWORK_ZGEBRD = DBLE( DUM(1) ) + LWORK_ZGEBRD = INT( DUM(1) ) * Compute space needed for ZUNMBR CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, $ DUM(1), B, LDB, DUM(1), -1, INFO ) - LWORK_ZUNMBR = DBLE( DUM(1) ) + LWORK_ZUNMBR = INT( DUM(1) ) * Compute space needed for ZUNGBR CALL ZUNGBR( 'P', M, M, M, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_ZUNGBR = DBLE( DUM(1) ) + LWORK_ZUNGBR = INT( DUM(1) ) * Compute space needed for ZUNMLQ CALL ZUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, DUM(1), $ B, LDB, DUM(1), -1, INFO ) - LWORK_ZUNMLQ = DBLE( DUM(1) ) + LWORK_ZUNMLQ = INT( DUM(1) ) * Compute total workspace needed MAXWRK = M + LWORK_ZGELQF MAXWRK = MAX( MAXWRK, 3*M + M*M + LWORK_ZGEBRD ) @@ -345,15 +345,15 @@ * Compute space needed for ZGEBRD CALL ZGEBRD( M, N, A, LDA, S, S, DUM(1), DUM(1), $ DUM(1), -1, INFO ) - LWORK_ZGEBRD = DBLE( DUM(1) ) + LWORK_ZGEBRD = INT( DUM(1) ) * Compute space needed for ZUNMBR CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, M, A, LDA, $ DUM(1), B, LDB, DUM(1), -1, INFO ) - LWORK_ZUNMBR = DBLE( DUM(1) ) + LWORK_ZUNMBR = INT( DUM(1) ) * Compute space needed for ZUNGBR CALL ZUNGBR( 'P', M, N, M, A, LDA, DUM(1), $ DUM(1), -1, INFO ) - LWORK_ZUNGBR = DBLE( DUM(1) ) + LWORK_ZUNGBR = INT( DUM(1) ) MAXWRK = 2*M + LWORK_ZGEBRD MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNMBR ) MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNGBR ) diff --git a/lapack-netlib/SRC/zgelst.c b/lapack-netlib/SRC/zgelst.c new file mode 100644 index 000000000..447cd30bb --- /dev/null +++ b/lapack-netlib/SRC/zgelst.c @@ -0,0 +1,1115 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factori +zation with compact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGELST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGELST solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its conjugate-transpose, using a QR */ +/* > or LQ factorization of A with compact WY representation of Q. */ +/* > It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'C' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'C' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'C': the linear system involves A**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by ZGEQRT; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by ZGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'C'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > modulus of elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of the modulus of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, (MN + f2cmax( MN, NRHS ))*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16GEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2022, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zgelst_(char *trans, integer *m, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + doublereal anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer nbmin; + doublereal rwork[1]; + integer lwopt; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer nb; + extern doublereal dlamch_(char *); + integer mn; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer scllen; + doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *), zlaset_(char *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *); + integer mnnrhs; + extern /* Subroutine */ int zgelqt_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + doublereal smlnum; + extern /* Subroutine */ int zgeqrt_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + logical lquery; + extern /* Subroutine */ int ztrtrs_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + integer *), zgemlqt_(char *, char *, + integer *, integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zgemqrt_(char *, + char *, integer *, integer *, integer *, integer *, doublecomplex + *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "C"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size and optimal workspace size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + nb = ilaenv_(&c__1, "ZGELST", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + mnnrhs = f2cmax(mn,*nrhs); +/* Computing MAX */ + i__1 = 1, i__2 = (mn + mnnrhs) * nb; + lwopt = f2cmax(i__1,i__2); + d__1 = (doublereal) lwopt; + work[1].r = d__1, work[1].i = 0.; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELST ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + zlaset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + d__1 = (doublereal) lwopt; + work[1].r = d__1, work[1].i = 0.; + return 0; + } + +/* *GEQRT and *GELQT routines cannot accept NB larger than f2cmin(M,N) */ + + if (nb > mn) { + nb = mn; + } + +/* Determine the block size from the supplied LWORK */ +/* ( at this stage we know that LWORK >= (minimum required workspace, */ +/* but it may be less than optimal) */ + +/* Computing MIN */ + i__1 = nb, i__2 = *lwork / (mn + mnnrhs); + nb = f2cmin(i__1,i__2); + +/* The minimum value of NB, when blocked code is used */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGELST", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + if (nb < nbmin) { + nb = 1; + } + +/* Get machine parameters */ + + smlnum = dlamch_("S") / dlamch_("P"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + zlaset_("Full", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + d__1 = (doublereal) lwopt; + work[1].r = d__1, work[1].i = 0.; + return 0; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = zlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + zlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + zlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* M > N: */ +/* Compute the blocked QR factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least N, optimally N*NB. */ + + zgeqrt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M > N, A is not transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A * X - B ||. */ + +/* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + zgemqrt_("Left", "Conjugate transpose", m, nrhs, n, &nb, &a[ + a_offset], lda, &work[1], &nb, &b[b_offset], ldb, &work[ + mn * nb + 1], info); + +/* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + ztrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* M > N, A is transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A**T * X = B. */ + +/* Compute B := inv(R**T) * B in two row blocks of B. */ + +/* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + ztrtrs_("Upper", "Conjugate transpose", "Non-unit", n, nrhs, &a[ + a_offset], lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the N-th row in B: */ +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; + } + } + +/* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + zgemqrt_("Left", "No transpose", m, nrhs, n, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + + scllen = *m; + + } + + } else { + +/* M < N: */ +/* Compute the blocked LQ factorization of A, */ +/* using the compact WY representation of Q, */ +/* workspace at least M, optimally M*NB. */ + + zgelqt_(m, n, &nb, &a[a_offset], lda, &work[1], &nb, &work[mn * nb + + 1], info); + + if (! tpsd) { + +/* M < N, A is not transposed: */ +/* Underdetermined system of equations, */ +/* minimum norm solution of A * X = B. */ + +/* Compute B := inv(L) * B in two row blocks of B. */ + +/* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + ztrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* Block 2: Zero out all rows below the M-th row in B: */ +/* B(M+1:N,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; + } + } + +/* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + zgemlqt_("Left", "Conjugate transpose", n, nrhs, m, &nb, &a[ + a_offset], lda, &work[1], &nb, &b[b_offset], ldb, &work[ + mn * nb + 1], info); + + scllen = *n; + + } else { + +/* M < N, A is transposed: */ +/* Overdetermined system of equations, */ +/* least-squares problem, f2cmin || A**T * X - B ||. */ + +/* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), */ +/* using the compact WY representation of Q, */ +/* workspace at least NRHS, optimally NRHS*NB. */ + + zgemlqt_("Left", "No transpose", n, nrhs, m, &nb, &a[a_offset], + lda, &work[1], &nb, &b[b_offset], ldb, &work[mn * nb + 1], + info); + +/* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + ztrtrs_("Lower", "Conjugate transpose", "Non-unit", m, nrhs, &a[ + a_offset], lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + zlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + zlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + zlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + + d__1 = (doublereal) lwopt; + work[1].r = d__1, work[1].i = 0.; + + return 0; + +/* End of ZGELST */ + +} /* zgelst_ */ + diff --git a/lapack-netlib/SRC/zgelst.f b/lapack-netlib/SRC/zgelst.f new file mode 100644 index 000000000..4dabdc91e --- /dev/null +++ b/lapack-netlib/SRC/zgelst.f @@ -0,0 +1,533 @@ +*> \brief ZGELST solves overdetermined or underdetermined systems for GE matrices using QR or LQ factorization with compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELST solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its conjugate-transpose, using a QR +*> or LQ factorization of A with compact WY representation of Q. +*> It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by ZGEQRT; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by ZGELQT. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> modulus of elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of the modulus of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, (MN + max( MN, NRHS ))*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex16GEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2022, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZGELST( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, LWOPT, MN, MNNRHS, + $ NB, NBMIN, SCLLEN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL ZGELQT, ZGEQRT, ZGEMLQT, ZGEMQRT, DLABAD, + $ ZLASCL, ZLASET, ZTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size and optimal workspace size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + NB = ILAENV( 1, 'ZGELST', ' ', M, N, -1, -1 ) +* + MNNRHS = MAX( MN, NRHS ) + LWOPT = MAX( 1, (MN+MNNRHS)*NB ) + WORK( 1 ) = DBLE( LWOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELST ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* +* *GEQRT and *GELQT routines cannot accept NB larger than min(M,N) +* + IF( NB.GT.MN ) NB = MN +* +* Determine the block size from the supplied LWORK +* ( at this stage we know that LWORK >= (minimum required workspace, +* but it may be less than optimal) +* + NB = MIN( NB, LWORK/( MN + MNNRHS ) ) +* +* The minimum value of NB, when blocked code is used +* + NBMIN = MAX( 2, ILAENV( 2, 'ZGELST', ' ', M, N, -1, -1 ) ) +* + IF( NB.LT.NBMIN ) THEN + NB = 1 + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + WORK( 1 ) = DBLE( LWOPT ) + RETURN + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* M > N: +* Compute the blocked QR factorization of A, +* using the compact WY representation of Q, +* workspace at least N, optimally N*NB. +* + CALL ZGEQRT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M > N, A is not transposed: +* Overdetermined system of equations, +* least-squares problem, min || A * X - B ||. +* +* Compute B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMQRT( 'Left', 'Conjugate transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* +* Compute B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* M > N, A is transposed: +* Underdetermined system of equations, +* minimum norm solution of A**T * X = B. +* +* Compute B := inv(R**T) * B in two row blocks of B. +* +* Block 1: B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the N-th row in B: +* B(N+1:M,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = N + 1, M + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMQRT( 'Left', 'No transpose', M, NRHS, N, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* M < N: +* Compute the blocked LQ factorization of A, +* using the compact WY representation of Q, +* workspace at least M, optimally M*NB. +* + CALL ZGELQT( M, N, NB, A, LDA, WORK( 1 ), NB, + $ WORK( MN*NB+1 ), INFO ) +* + IF( .NOT.TPSD ) THEN +* +* M < N, A is not transposed: +* Underdetermined system of equations, +* minimum norm solution of A * X = B. +* +* Compute B := inv(L) * B in two row blocks of B. +* +* Block 1: B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* Block 2: Zero out all rows below the M-th row in B: +* B(M+1:N,1:NRHS) = ZERO +* + DO J = 1, NRHS + DO I = M + 1, N + B( I, J ) = ZERO + END DO + END DO +* +* Compute B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMLQT( 'Left', 'Conjugate transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1 ), INFO ) +* + SCLLEN = N +* + ELSE +* +* M < N, A is transposed: +* Overdetermined system of equations, +* least-squares problem, min || A**T * X - B ||. +* +* Compute B(1:N,1:NRHS) := Q * B(1:N,1:NRHS), +* using the compact WY representation of Q, +* workspace at least NRHS, optimally NRHS*NB. +* + CALL ZGEMLQT( 'Left', 'No transpose', N, NRHS, M, NB, + $ A, LDA, WORK( 1 ), NB, B, LDB, + $ WORK( MN*NB+1), INFO ) +* +* Compute B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ M, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + WORK( 1 ) = DBLE( LWOPT ) +* + RETURN +* +* End of ZGELST +* + END diff --git a/lapack-netlib/TESTING/LIN/alahd.f b/lapack-netlib/TESTING/LIN/alahd.f index 2cc0fba06..f0423a23b 100644 --- a/lapack-netlib/TESTING/LIN/alahd.f +++ b/lapack-netlib/TESTING/LIN/alahd.f @@ -608,17 +608,18 @@ ELSE IF( LSAMEN( 2, P2, 'LS' ) ) THEN * * LS: Least Squares driver routines for -* LS, LSD, LSS, LSX and LSY. +* LS, LST, TSLS, LSD, LSS, LSX and LSY. * WRITE( IOUNIT, FMT = 9984 )PATH WRITE( IOUNIT, FMT = 9967 ) - WRITE( IOUNIT, FMT = 9921 )C1, C1, C1, C1 + WRITE( IOUNIT, FMT = 9921 )C1, C1, C1, C1, C1, C1 WRITE( IOUNIT, FMT = 9935 )1 WRITE( IOUNIT, FMT = 9931 )2 - WRITE( IOUNIT, FMT = 9933 )3 - WRITE( IOUNIT, FMT = 9935 )4 - WRITE( IOUNIT, FMT = 9934 )5 - WRITE( IOUNIT, FMT = 9932 )6 + WRITE( IOUNIT, FMT = 9919 ) + WRITE( IOUNIT, FMT = 9933 )7 + WRITE( IOUNIT, FMT = 9935 )8 + WRITE( IOUNIT, FMT = 9934 )9 + WRITE( IOUNIT, FMT = 9932 )10 WRITE( IOUNIT, FMT = 9920 ) WRITE( IOUNIT, FMT = '( '' Messages:'' )' ) * @@ -1048,10 +1049,11 @@ $ 'check if X is in the row space of A or A'' ', $ '(overdetermined case)' ) 9929 FORMAT( ' Test ratios (1-3: ', A1, 'TZRZF):' ) - 9920 FORMAT( 3X, ' 7-10: same as 3-6', 3X, ' 11-14: same as 3-6' ) - 9921 FORMAT( ' Test ratios:', / ' (1-2: ', A1, 'GELS, 3-6: ', A1, - $ 'GELSY, 7-10: ', A1, 'GELSS, 11-14: ', A1, 'GELSD, 15-16: ', - $ A1, 'GETSLS)') + 9919 FORMAT( 3X, ' 3-4: same as 1-2', 3X, ' 5-6: same as 1-2' ) + 9920 FORMAT( 3X, ' 11-14: same as 7-10', 3X, ' 15-18: same as 7-10' ) + 9921 FORMAT( ' Test ratios:', / ' (1-2: ', A1, 'GELS, 3-4: ', A1, + $ 'GELST, 5-6: ', A1, 'GETSLS, 7-10: ', A1, 'GELSY, 11-14: ', + $ A1, 'GETSS, 15-18: ', A1, 'GELSD)' ) 9928 FORMAT( 7X, 'where ALPHA = ( 1 + SQRT( 17 ) ) / 8' ) 9927 FORMAT( 3X, I2, ': ABS( Largest element in L )', / 12X, $ ' - ( 1 / ( 1 - ALPHA ) ) + THRESH' ) diff --git a/lapack-netlib/TESTING/LIN/cdrvls.f b/lapack-netlib/TESTING/LIN/cdrvls.f index 7fe189e5f..ecba705d5 100644 --- a/lapack-netlib/TESTING/LIN/cdrvls.f +++ b/lapack-netlib/TESTING/LIN/cdrvls.f @@ -31,7 +31,8 @@ *> *> \verbatim *> -*> CDRVLS tests the least squares driver routines CGELS, CGETSLS, CGELSS, CGELSY +*> CDRVLS tests the least squares driver routines CGELS, CGELST, +*> CGETSLS, CGELSS, CGELSY *> and CGELSD. *> \endverbatim * @@ -211,7 +212,7 @@ * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) REAL ONE, ZERO @@ -228,8 +229,8 @@ $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, LRWORK, - $ LWORK_CGELS, LWORK_CGETSLS, LWORK_CGELSS, - $ LWORK_CGELSY, LWORK_CGELSD, + $ LWORK_CGELS, LWORK_CGELST, LWORK_CGETSLS, + $ LWORK_CGELSS, LWORK_CGELSY, LWORK_CGELSD, $ LRWORK_CGELSY, LRWORK_CGELSS, LRWORK_CGELSD REAL EPS, NORMA, NORMB, RCOND * .. @@ -249,7 +250,7 @@ * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, CERRLS, CGELS, CGELSD, - $ CGELSS, CGELSY, CGEMM, CGETSLS, CLACPY, + $ CGELSS, CGELST, CGELSY, CGEMM, CGETSLS, CLACPY, $ CLARNV, CQRT13, CQRT15, CQRT16, CSSCAL, $ SAXPY, XLAENV * .. @@ -334,7 +335,8 @@ LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -361,6 +363,10 @@ CALL CGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_CGELS = INT( WQ( 1 ) ) +* Compute workspace needed for CGELST + CALL CGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_CGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for CGETSLS CALL CGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -425,21 +431,26 @@ ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 100 -* +* ===================================================== +* Begin test CGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test CGELS -* * Generate a matrix of scaling type ISCALE * CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -484,15 +495,20 @@ $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for CGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL CQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, RWORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for CGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN @@ -515,7 +531,7 @@ * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -524,26 +540,34 @@ $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test CGETSLS + END DO + END DO + END IF +* ===================================================== +* End test CGELS +* ===================================================== +* ===================================================== +* Begin test CGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) + CALL XLAENV( 3, NXVAL( INB ) ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -560,9 +584,9 @@ IF( NCOLS.GT.0 ) THEN CALL CLARNV( 2, ISEED, NCOLS*NRHS, $ WORK ) - CALL CSCAL( NCOLS*NRHS, - $ CONE / REAL( NCOLS ), WORK, - $ 1 ) + CALL CSSCAL( NCOLS*NRHS, + $ ONE / REAL( NCOLS ), WORK, + $ 1 ) END IF CALL CGEMM( TRANS, 'No transpose', NROWS, $ NRHS, NCOLS, CONE, COPYA, LDA, @@ -578,31 +602,37 @@ CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'CGETSLS ' - CALL CGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'CGELST' + CALL CGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) +* IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'CGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'CGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for CGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL CLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL CQRT16( TRANS, M, N, NRHS, COPYA, - $ LDA, B, LDB, C, LDB, WORK2, - $ RESULT( 15 ) ) + $ LDA, B, LDB, C, LDB, RWORK, + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for CGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * * Solving LS system * - RESULT( 16 ) = CQRT17( TRANS, 1, M, N, + RESULT( 4 ) = CQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -610,7 +640,7 @@ * * Solving overdetermined system * - RESULT( 16 ) = CQRT14( TRANS, M, N, + RESULT( 4 ) = CQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -618,21 +648,151 @@ * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 )TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO END IF +* ===================================================== +* End test CGELST +* ===================================================== +* ===================================================== +* Begin test CGELSTSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL CQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO INB = 1, NNB + MB = NBVAL( INB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO IMB = 1, NNB + NB = NBVAL( IMB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'C' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL CLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL CSCAL( NCOLS*NRHS, + $ CONE / REAL( NCOLS ), + $ WORK, 1 ) + END IF + CALL CGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, CONE, + $ COPYA, LDA, WORK, LDWORK, + $ CZERO, B, LDB ) + CALL CLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL CLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL CLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'CGETSLS ' + CALL CGETSLS( TRANS, M, N, NRHS, A, + $ LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'CGETSLS ', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for CGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL CLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL CQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK2, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for CGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = CQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = CQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, B, + $ LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 )TRANS, + $ M, N, NRHS, MB, NB, ITYPE, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO + END IF +* ===================================================== +* End test CGELSTSLS +* ==================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -680,37 +840,37 @@ * * workspace used: 2*MNMIN+NB*NB+NB*MAX(N,NRHS) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = CQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = CQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK, RWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 4 ) ) + $ RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = CQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = CQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = CQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = CQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -736,62 +896,6 @@ * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd -* - IF( RANK.GT.0 ) THEN - CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = SASUM( MNMIN, S, 1 ) / - $ SASUM( MNMIN, COPYS, 1 ) / - $ ( EPS*REAL( MNMIN ) ) - ELSE - RESULT( 7 ) = ZERO - END IF -* -* Test 8: Compute error in solution -* - CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, - $ LDWORK ) - CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, - $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 8 ) ) -* -* Test 9: Check norm of r'*A -* - RESULT( 9 ) = ZERO - IF( M.GT.CRANK ) - $ RESULT( 9 ) = CQRT17( 'No transpose', 1, M, - $ N, NRHS, COPYA, LDA, B, LDB, - $ COPYB, LDB, C, WORK, LWORK ) -* -* Test 10: Check if x is in the rowspace of A -* - RESULT( 10 ) = ZERO - IF( N.GT.CRANK ) - $ RESULT( 10 ) = CQRT14( 'No transpose', M, N, - $ NRHS, COPYA, LDA, B, LDB, - $ WORK, LWORK ) -* -* Test CGELSD -* -* CGELSD: Compute the minimum-norm solution X -* to min( norm( A * X - B ) ) using a -* divide and conquer SVD. -* - CALL XLAENV( 9, 25 ) -* - CALL CLACPY( 'Full', M, N, COPYA, LDA, A, LDA ) - CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, B, - $ LDB ) -* - SRNAMT = 'CGELSD' - CALL CGELSD( M, N, NRHS, A, LDA, B, LDB, S, - $ RCOND, CRANK, WORK, LWORK, RWORK, - $ IWORK, INFO ) - IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'CGELSD', INFO, 0, ' ', M, - $ N, NRHS, -1, NB, ITYPE, NFAIL, - $ NERRS, NOUT ) -* * Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN @@ -827,10 +931,66 @@ $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * +* Test CGELSD +* +* CGELSD: Compute the minimum-norm solution X +* to min( norm( A * X - B ) ) using a +* divide and conquer SVD. +* + CALL XLAENV( 9, 25 ) +* + CALL CLACPY( 'Full', M, N, COPYA, LDA, A, LDA ) + CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, B, + $ LDB ) +* + SRNAMT = 'CGELSD' + CALL CGELSD( M, N, NRHS, A, LDA, B, LDB, S, + $ RCOND, CRANK, WORK, LWORK, RWORK, + $ IWORK, INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'CGELSD', INFO, 0, ' ', M, + $ N, NRHS, -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 15: Compute relative error in svd +* + IF( RANK.GT.0 ) THEN + CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) + RESULT( 15 ) = SASUM( MNMIN, S, 1 ) / + $ SASUM( MNMIN, COPYS, 1 ) / + $ ( EPS*REAL( MNMIN ) ) + ELSE + RESULT( 15 ) = ZERO + END IF +* +* Test 16: Compute error in solution +* + CALL CLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, + $ LDWORK ) + CALL CQRT16( 'No transpose', M, N, NRHS, COPYA, + $ LDA, B, LDB, WORK, LDWORK, RWORK, + $ RESULT( 16 ) ) +* +* Test 17: Check norm of r'*A +* + RESULT( 17 ) = ZERO + IF( M.GT.CRANK ) + $ RESULT( 17 ) = CQRT17( 'No transpose', 1, M, + $ N, NRHS, COPYA, LDA, B, LDB, + $ COPYB, LDB, C, WORK, LWORK ) +* +* Test 18: Check if x is in the rowspace of A +* + RESULT( 18 ) = ZERO + IF( N.GT.CRANK ) + $ RESULT( 18 ) = CQRT14( 'No transpose', M, N, + $ NRHS, COPYA, LDA, B, LDB, + $ WORK, LWORK ) +* * Print information about the tests that did not * pass the threshold. * - DO 80 K = 3, 14 + DO 80 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) diff --git a/lapack-netlib/TESTING/LIN/cerrls.f b/lapack-netlib/TESTING/LIN/cerrls.f index 48e44ad86..fca943918 100644 --- a/lapack-netlib/TESTING/LIN/cerrls.f +++ b/lapack-netlib/TESTING/LIN/cerrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> CERRLS tests the error exits for the COMPLEX least squares -*> driver routines (CGELS, CGELSS, CGELSY, CGELSD). +*> driver routines (CGELS, CGELST, CGETSLS, CGELSS, CGELSY, CGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CGELS, CGELSD, CGELSS, CGELSY, CHKXER + EXTERNAL ALAESM, CHKXER, CGELS, CGELSD, CGELSS, CGELST, + $ CGELSY, CGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ INFOT = 8 CALL CGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'CGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'CGELS ', INFOT, NOUT, LERR, OK ) * +* CGELST +* + SRNAMT = 'CGELST' + INFOT = 1 + CALL CGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGELST', INFOT, NOUT, LERR, OK ) +* +* CGETSLS +* + SRNAMT = 'CGETSLS' + INFOT = 1 + CALL CGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'CGETSLS', INFOT, NOUT, LERR, OK ) +* * CGELSS * SRNAMT = 'CGELSS' diff --git a/lapack-netlib/TESTING/LIN/ddrvls.f b/lapack-netlib/TESTING/LIN/ddrvls.f index b64930c10..b3d07d67f 100644 --- a/lapack-netlib/TESTING/LIN/ddrvls.f +++ b/lapack-netlib/TESTING/LIN/ddrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> DDRVLS tests the least squares driver routines DGELS, DGETSLS, DGELSS, DGELSY, -*> and DGELSD. +*> DDRVLS tests the least squares driver routines DGELS, DGELST, +*> DGETSLS, DGELSS, DGELSY, and DGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) DOUBLE PRECISION ONE, TWO, ZERO @@ -225,8 +225,8 @@ $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, - $ LWORK_DGELS, LWORK_DGETSLS, LWORK_DGELSS, - $ LWORK_DGELSY, LWORK_DGELSD + $ LWORK_DGELS, LWORK_DGELST, LWORK_DGETSLS, + $ LWORK_DGELSS, LWORK_DGELSY, LWORK_DGELSD DOUBLE PRECISION EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. @@ -243,12 +243,12 @@ * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, DERRLS, DGELS, - $ DGELSD, DGELSS, DGELSY, DGEMM, DLACPY, - $ DLARNV, DLASRT, DQRT13, DQRT15, DQRT16, DSCAL, - $ XLAENV + $ DGELSD, DGELSS, DGELST, DGELSY, DGEMM, + $ DGETSLS, DLACPY, DLARNV, DQRT13, DQRT15, + $ DQRT16, DSCAL, XLAENV * .. * .. Intrinsic Functions .. - INTRINSIC DBLE, INT, LOG, MAX, MIN, SQRT + INTRINSIC DBLE, INT, MAX, MIN, SQRT * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -330,7 +330,8 @@ LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -357,6 +358,10 @@ CALL DGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_DGELS = INT ( WQ ( 1 ) ) +* Compute workspace needed for DGELST + CALL DGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_DGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for DGETSLS CALL DGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -378,9 +383,9 @@ * Compute LIWORK workspace needed for DGELSY and DGELSD LIWORK = MAX( LIWORK, N, IWQ( 1 ) ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_DGELS, LWORK_DGETSLS, - $ LWORK_DGELSY, LWORK_DGELSS, - $ LWORK_DGELSD ) + LWORK = MAX( LWORK, LWORK_DGELS, LWORK_DGELST, + $ LWORK_DGETSLS, LWORK_DGELSY, + $ LWORK_DGELSS, LWORK_DGELSD ) END IF ENDDO ENDDO @@ -411,21 +416,26 @@ ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 110 -* +* ===================================================== +* Begin test DGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test DGELS -* * Generate a matrix of scaling type ISCALE * CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -469,20 +479,27 @@ $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for DGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL DQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for DGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * RESULT( 2 ) = DQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, @@ -500,35 +517,42 @@ * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9999 )TRANS, M, + WRITE( NOUT, FMT = 9999 ) TRANS, M, $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test DGETSLS + END DO + END DO + END IF +* ===================================================== +* End test DGELS +* ===================================================== +* ===================================================== +* Begin test DGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -563,31 +587,38 @@ CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'DGETSLS ' - CALL DGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'DGELST' + CALL DGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'DGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'DGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for DGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL DLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL DQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, - $ RESULT( 15 ) ) + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for DGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * - RESULT( 16 ) = DQRT17( TRANS, 1, M, N, + RESULT( 4 ) = DQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -595,7 +626,7 @@ * * Solving overdetermined system * - RESULT( 16 ) = DQRT14( TRANS, M, N, + RESULT( 4 ) = DQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -603,21 +634,151 @@ * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 ) TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO END IF +* ===================================================== +* End test DGELST +* ===================================================== +* ===================================================== +* Begin test DGETSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL DQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO IMB = 1, NNB + MB = NBVAL( IMB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'T' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL DLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL DSCAL( NCOLS*NRHS, + $ ONE / DBLE( NCOLS ), + $ WORK, 1 ) + END IF + CALL DGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, ONE, + $ COPYA, LDA, WORK, LDWORK, + $ ZERO, B, LDB ) + CALL DLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL DLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL DLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'DGETSLS' + CALL DGETSLS( TRANS, M, N, NRHS, + $ A, LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'DGETSLS', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for DGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL DLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL DQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for DGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = DQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = DQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, + $ B, LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 ) TRANS, + $ M, N, NRHS, MB, NB, ITYPE, + $ K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO + END IF +* ===================================================== +* End test DGETSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -662,37 +823,37 @@ $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = DQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = DQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 4 ) ) + $ WORK( M*NRHS+1 ), RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -716,38 +877,38 @@ * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 8 ) ) + $ WORK( M*NRHS+1 ), RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -776,45 +937,45 @@ $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL DLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL DQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 12 ) ) + $ WORK( M*NRHS+1 ), RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = DQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = DQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = DQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = DQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 90 K = 3, 14 + DO 90 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -826,6 +987,12 @@ NRUN = NRUN + 12 * 100 CONTINUE + + + + + + 110 CONTINUE 120 CONTINUE 130 CONTINUE diff --git a/lapack-netlib/TESTING/LIN/derrls.f b/lapack-netlib/TESTING/LIN/derrls.f index a1f74dec2..09d745238 100644 --- a/lapack-netlib/TESTING/LIN/derrls.f +++ b/lapack-netlib/TESTING/LIN/derrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> DERRLS tests the error exits for the DOUBLE PRECISION least squares -*> driver routines (DGELS, SGELSS, SGELSY, SGELSD). +*> driver routines (DGELS, DGELST, DGETSLS, SGELSS, SGELSY, SGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, DGELS, DGELSD, DGELSS, DGELSY + EXTERNAL ALAESM, CHKXER, DGELS, DGELSD, DGELSS, DGELST, + $ DGELSY, DGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ INFOT = 8 CALL DGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'DGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'DGELS ', INFOT, NOUT, LERR, OK ) * +* DGELST +* + SRNAMT = 'DGELST' + INFOT = 1 + CALL DGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGELST', INFOT, NOUT, LERR, OK ) +* +* DGETSLS +* + SRNAMT = 'DGETSLS' + INFOT = 1 + CALL DGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGETSLS', INFOT, NOUT, LERR, OK ) +* * DGELSS * SRNAMT = 'DGELSS' diff --git a/lapack-netlib/TESTING/LIN/sdrvls.f b/lapack-netlib/TESTING/LIN/sdrvls.f index b96451503..2baf9a3fb 100644 --- a/lapack-netlib/TESTING/LIN/sdrvls.f +++ b/lapack-netlib/TESTING/LIN/sdrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> SDRVLS tests the least squares driver routines SGELS, SGETSLS, SGELSS, SGELSY, -*> and SGELSD. +*> SDRVLS tests the least squares driver routines SGELS, SGELST, +*> SGETSLS, SGELSS, SGELSY and SGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) REAL ONE, TWO, ZERO @@ -225,8 +225,8 @@ $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, - $ LWORK_SGELS, LWORK_SGETSLS, LWORK_SGELSS, - $ LWORK_SGELSY, LWORK_SGELSD + $ LWORK_SGELS, LWORK_SGELST, LWORK_SGETSLS, + $ LWORK_SGELSS, LWORK_SGELSY, LWORK_SGELSD REAL EPS, NORMA, NORMB, RCOND * .. * .. Local Arrays .. @@ -243,12 +243,12 @@ * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASVM, SAXPY, SERRLS, SGELS, - $ SGELSD, SGELSS, SGELSY, SGEMM, SLACPY, - $ SLARNV, SQRT13, SQRT15, SQRT16, SSCAL, - $ XLAENV, SGETSLS + $ SGELSD, SGELSS, SGELST, SGELSY, SGEMM, + $ SGETSLS, SLACPY, SLARNV, SQRT13, SQRT15, + $ SQRT16, SSCAL, XLAENV * .. * .. Intrinsic Functions .. - INTRINSIC INT, LOG, MAX, MIN, REAL, SQRT + INTRINSIC INT, MAX, MIN, REAL, SQRT * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -330,7 +330,8 @@ LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -357,6 +358,10 @@ CALL SGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ( 1 ), -1, INFO ) LWORK_SGELS = INT ( WQ( 1 ) ) +* Compute workspace needed for SGELST + CALL SGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_SGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for SGETSLS CALL SGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ( 1 ), -1, INFO ) @@ -378,9 +383,9 @@ * Compute LIWORK workspace needed for SGELSY and SGELSD LIWORK = MAX( LIWORK, N, IWQ( 1 ) ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_SGELS, LWORK_SGETSLS, - $ LWORK_SGELSY, LWORK_SGELSS, - $ LWORK_SGELSD ) + LWORK = MAX( LWORK, LWORK_SGELS, LWORK_SGELST, + $ LWORK_SGETSLS, LWORK_SGELSY, + $ LWORK_SGELSS, LWORK_SGELSD ) END IF ENDDO ENDDO @@ -411,21 +416,26 @@ ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 110 -* +* ===================================================== +* Begin test SGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test SGELS -* * Generate a matrix of scaling type ISCALE * CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -469,20 +479,27 @@ $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for SGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL SQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for SGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * RESULT( 2 ) = SQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, @@ -500,7 +517,7 @@ * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -509,26 +526,33 @@ $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test SGETSLS + END DO + END DO + END IF +* ===================================================== +* End test SGELS +* ===================================================== +* ===================================================== +* Begin test SGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -563,31 +587,38 @@ CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'SGETSLS ' - CALL SGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'SGELST' + CALL SGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'SGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'SGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for SGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL SLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL SQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, WORK, - $ RESULT( 15 ) ) + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for SGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * -* Solving LS system +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) * - RESULT( 16 ) = SQRT17( TRANS, 1, M, N, + RESULT( 4 ) = SQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -595,7 +626,7 @@ * * Solving overdetermined system * - RESULT( 16 ) = SQRT14( TRANS, M, N, + RESULT( 4 ) = SQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -603,21 +634,151 @@ * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 ) TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO END IF +* ===================================================== +* End test SGELST +* ===================================================== +* ===================================================== +* Begin test SGETSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL SQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO IMB = 1, NNB + MB = NBVAL( IMB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'T' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL SLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL SSCAL( NCOLS*NRHS, + $ ONE / REAL( NCOLS ), + $ WORK, 1 ) + END IF + CALL SGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, ONE, + $ COPYA, LDA, WORK, LDWORK, + $ ZERO, B, LDB ) + CALL SLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL SLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL SLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'SGETSLS' + CALL SGETSLS( TRANS, M, N, NRHS, + $ A, LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'SGETSLS', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for SGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL SLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL SQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for SGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = SQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = SQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, + $ B, LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 ) TRANS, + $ M, N, NRHS, MB, NB, ITYPE, + $ K, RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO + END IF +* ===================================================== +* End test SGETSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -662,37 +823,37 @@ $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = SQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = SQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 4 ) ) + $ WORK( M*NRHS+1 ), RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -716,38 +877,38 @@ * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 8 ) ) + $ WORK( M*NRHS+1 ), RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -776,45 +937,45 @@ $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL SAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = SASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = SASUM( MNMIN, S, 1 ) / $ SASUM( MNMIN, COPYS, 1 ) / $ ( EPS*REAL( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL SLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL SQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, - $ WORK( M*NRHS+1 ), RESULT( 12 ) ) + $ WORK( M*NRHS+1 ), RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = SQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = SQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = SQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = SQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 90 K = 3, 14 + DO 90 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) diff --git a/lapack-netlib/TESTING/LIN/serrls.f b/lapack-netlib/TESTING/LIN/serrls.f index e6ee4360f..6c4820066 100644 --- a/lapack-netlib/TESTING/LIN/serrls.f +++ b/lapack-netlib/TESTING/LIN/serrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> SERRLS tests the error exits for the REAL least squares -*> driver routines (SGELS, SGELSS, SGELSY, SGELSD). +*> driver routines (SGELS, SGELST, SGETSLS, SGELSS, SGELSY, SGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, SGELS, SGELSD, SGELSS, SGELSY + EXTERNAL ALAESM, CHKXER, SGELS, SGELSD, SGELSS, SGELST, + $ SGELSY, SGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ INFOT = 8 CALL SGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'SGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'DGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'SGELS ', INFOT, NOUT, LERR, OK ) * +* SGELST +* + SRNAMT = 'SGELST' + INFOT = 1 + CALL SGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGELST', INFOT, NOUT, LERR, OK ) +* +* SGETSLS +* + SRNAMT = 'SGETSLS' + INFOT = 1 + CALL SGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'SGETSLS', INFOT, NOUT, LERR, OK ) +* * SGELSS * SRNAMT = 'SGELSS' diff --git a/lapack-netlib/TESTING/LIN/zdrvls.f b/lapack-netlib/TESTING/LIN/zdrvls.f index 2eab97905..b21345d30 100644 --- a/lapack-netlib/TESTING/LIN/zdrvls.f +++ b/lapack-netlib/TESTING/LIN/zdrvls.f @@ -31,8 +31,8 @@ *> *> \verbatim *> -*> ZDRVLS tests the least squares driver routines ZGELS, ZGETSLS, ZGELSS, ZGELSY -*> and ZGELSD. +*> ZDRVLS tests the least squares driver routines ZGELS, ZGELST, +*> ZGETSLS, ZGELSS, ZGELSY and ZGELSD. *> \endverbatim * * Arguments: @@ -211,7 +211,7 @@ * * .. Parameters .. INTEGER NTESTS - PARAMETER ( NTESTS = 16 ) + PARAMETER ( NTESTS = 18 ) INTEGER SMLSIZ PARAMETER ( SMLSIZ = 25 ) DOUBLE PRECISION ONE, ZERO @@ -228,8 +228,8 @@ $ LWLSY, LWORK, M, MNMIN, N, NB, NCOLS, NERRS, $ NFAIL, NRHS, NROWS, NRUN, RANK, MB, $ MMAX, NMAX, NSMAX, LIWORK, LRWORK, - $ LWORK_ZGELS, LWORK_ZGETSLS, LWORK_ZGELSS, - $ LWORK_ZGELSY, LWORK_ZGELSD, + $ LWORK_ZGELS, LWORK_ZGELST, LWORK_ZGETSLS, + $ LWORK_ZGELSS, LWORK_ZGELSY, LWORK_ZGELSD, $ LRWORK_ZGELSY, LRWORK_ZGELSS, LRWORK_ZGELSD DOUBLE PRECISION EPS, NORMA, NORMB, RCOND * .. @@ -248,10 +248,10 @@ EXTERNAL DASUM, DLAMCH, ZQRT12, ZQRT14, ZQRT17 * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, DLASRT, XLAENV, - $ ZDSCAL, ZERRLS, ZGELS, ZGELSD, ZGELSS, - $ ZGELSY, ZGEMM, ZLACPY, ZLARNV, ZQRT13, ZQRT15, - $ ZQRT16, ZGETSLS + EXTERNAL ALAERH, ALAHD, ALASVM, DAXPY, ZERRLS, ZGELS, + $ ZGELSD, ZGELSS, ZGELST, ZGELSY, ZGEMM, + $ ZGETSLS, ZLACPY, ZLARNV, ZQRT13, ZQRT15, + $ ZQRT16, ZDSCAL, XLAENV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, INT, SQRT @@ -334,7 +334,8 @@ LIWORK = 1 * * Iterate through all test cases and compute necessary workspace -* sizes for ?GELS, ?GETSLS, ?GELSY, ?GELSS and ?GELSD routines. +* sizes for ?GELS, ?GELST, ?GETSLS, ?GELSY, ?GELSS and ?GELSD +* routines. * DO IM = 1, NM M = MVAL( IM ) @@ -361,6 +362,10 @@ CALL ZGELS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) LWORK_ZGELS = INT ( WQ( 1 ) ) +* Compute workspace needed for ZGELST + CALL ZGELST( TRANS, M, N, NRHS, A, LDA, + $ B, LDB, WQ, -1, INFO ) + LWORK_ZGELST = INT ( WQ ( 1 ) ) * Compute workspace needed for ZGETSLS CALL ZGETSLS( TRANS, M, N, NRHS, A, LDA, $ B, LDB, WQ, -1, INFO ) @@ -390,9 +395,9 @@ LRWORK = MAX( LRWORK, LRWORK_ZGELSY, $ LRWORK_ZGELSS, LRWORK_ZGELSD ) * Compute LWORK workspace needed for all functions - LWORK = MAX( LWORK, LWORK_ZGELS, LWORK_ZGETSLS, - $ LWORK_ZGELSY, LWORK_ZGELSS, - $ LWORK_ZGELSD ) + LWORK = MAX( LWORK, LWORK_ZGELS, LWORK_ZGELST, + $ LWORK_ZGETSLS, LWORK_ZGELSY, + $ LWORK_ZGELSS, LWORK_ZGELSD ) END IF ENDDO ENDDO @@ -425,21 +430,26 @@ ITYPE = ( IRANK-1 )*3 + ISCALE IF( .NOT.DOTYPE( ITYPE ) ) $ GO TO 100 -* +* ===================================================== +* Begin test ZGELS +* ===================================================== IF( IRANK.EQ.1 ) THEN * -* Test ZGELS -* * Generate a matrix of scaling type ISCALE * CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 40 INB = 1, NNB +* +* Loop for testing different block sizes. +* + DO INB = 1, NNB NB = NBVAL( INB ) CALL XLAENV( 1, NB ) CALL XLAENV( 3, NXVAL( INB ) ) * - DO 30 ITRAN = 1, 2 +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -484,15 +494,20 @@ $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 1: Check correctness of results +* for ZGELS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL ZQRT16( TRANS, M, N, NRHS, COPYA, $ LDA, B, LDB, C, LDB, RWORK, $ RESULT( 1 ) ) +* +* Test 2: Check correctness of results +* for ZGELS. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN @@ -515,7 +530,7 @@ * Print information about the tests that * did not pass the threshold. * - DO 20 K = 1, 2 + DO K = 1, 2 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) @@ -524,26 +539,34 @@ $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 20 CONTINUE + END DO NRUN = NRUN + 2 - 30 CONTINUE - 40 CONTINUE -* -* -* Test ZGETSLS + END DO + END DO + END IF +* ===================================================== +* End test ZGELS +* ===================================================== +* ===================================================== +* Begin test ZGELST +* ===================================================== + IF( IRANK.EQ.1 ) THEN * * Generate a matrix of scaling type ISCALE * CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, $ ISEED ) - DO 65 INB = 1, NNB - MB = NBVAL( INB ) - CALL XLAENV( 1, MB ) - DO 62 IMB = 1, NNB - NB = NBVAL( IMB ) - CALL XLAENV( 2, NB ) * - DO 60 ITRAN = 1, 2 +* Loop for testing different block sizes. +* + DO INB = 1, NNB + NB = NBVAL( INB ) + CALL XLAENV( 1, NB ) + CALL XLAENV( 3, NXVAL( INB ) ) +* +* Loop for testing non-transposed and transposed. +* + DO ITRAN = 1, 2 IF( ITRAN.EQ.1 ) THEN TRANS = 'N' NROWS = M @@ -560,9 +583,9 @@ IF( NCOLS.GT.0 ) THEN CALL ZLARNV( 2, ISEED, NCOLS*NRHS, $ WORK ) - CALL ZSCAL( NCOLS*NRHS, - $ CONE / DBLE( NCOLS ), WORK, - $ 1 ) + CALL ZDSCAL( NCOLS*NRHS, + $ ONE / DBLE( NCOLS ), WORK, + $ 1 ) END IF CALL ZGEMM( TRANS, 'No transpose', NROWS, $ NRHS, NCOLS, CONE, COPYA, LDA, @@ -578,31 +601,37 @@ CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, B, LDB ) END IF - SRNAMT = 'ZGETSLS ' - CALL ZGETSLS( TRANS, M, N, NRHS, A, - $ LDA, B, LDB, WORK, LWORK, INFO ) + SRNAMT = 'ZGELST' + CALL ZGELST( TRANS, M, N, NRHS, A, LDA, B, + $ LDB, WORK, LWORK, INFO ) +* IF( INFO.NE.0 ) - $ CALL ALAERH( PATH, 'ZGETSLS ', INFO, 0, + $ CALL ALAERH( PATH, 'ZGELST', INFO, 0, $ TRANS, M, N, NRHS, -1, NB, $ ITYPE, NFAIL, NERRS, $ NOUT ) * -* Check correctness of results +* Test 3: Check correctness of results +* for ZGELST, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) * - LDWORK = MAX( 1, NROWS ) IF( NROWS.GT.0 .AND. NRHS.GT.0 ) $ CALL ZLACPY( 'Full', NROWS, NRHS, $ COPYB, LDB, C, LDB ) CALL ZQRT16( TRANS, M, N, NRHS, COPYA, - $ LDA, B, LDB, C, LDB, WORK2, - $ RESULT( 15 ) ) + $ LDA, B, LDB, C, LDB, RWORK, + $ RESULT( 3 ) ) +* +* Test 4: Check correctness of results +* for ZGELST. * IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN * * Solving LS system * - RESULT( 16 ) = ZQRT17( TRANS, 1, M, N, + RESULT( 4 ) = ZQRT17( TRANS, 1, M, N, $ NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, $ LWORK ) @@ -610,7 +639,7 @@ * * Solving overdetermined system * - RESULT( 16 ) = ZQRT14( TRANS, M, N, + RESULT( 4 ) = ZQRT14( TRANS, M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) END IF @@ -618,21 +647,151 @@ * Print information about the tests that * did not pass the threshold. * - DO 50 K = 15, 16 + DO K = 3, 4 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) - WRITE( NOUT, FMT = 9997 )TRANS, M, - $ N, NRHS, MB, NB, ITYPE, K, + WRITE( NOUT, FMT = 9999 )TRANS, M, + $ N, NRHS, NB, ITYPE, K, $ RESULT( K ) NFAIL = NFAIL + 1 END IF - 50 CONTINUE + END DO NRUN = NRUN + 2 - 60 CONTINUE - 62 CONTINUE - 65 CONTINUE + END DO + END DO END IF +* ===================================================== +* End test ZGELST +* ===================================================== +* ===================================================== +* Begin test ZGELSTSLS +* ===================================================== + IF( IRANK.EQ.1 ) THEN +* +* Generate a matrix of scaling type ISCALE +* + CALL ZQRT13( ISCALE, M, N, COPYA, LDA, NORMA, + $ ISEED ) +* +* Loop for testing different block sizes MB. +* + DO INB = 1, NNB + MB = NBVAL( INB ) + CALL XLAENV( 1, MB ) +* +* Loop for testing different block sizes NB. +* + DO IMB = 1, NNB + NB = NBVAL( IMB ) + CALL XLAENV( 2, NB ) +* +* Loop for testing non-transposed +* and transposed. +* + DO ITRAN = 1, 2 + IF( ITRAN.EQ.1 ) THEN + TRANS = 'N' + NROWS = M + NCOLS = N + ELSE + TRANS = 'C' + NROWS = N + NCOLS = M + END IF + LDWORK = MAX( 1, NCOLS ) +* +* Set up a consistent rhs +* + IF( NCOLS.GT.0 ) THEN + CALL ZLARNV( 2, ISEED, NCOLS*NRHS, + $ WORK ) + CALL ZSCAL( NCOLS*NRHS, + $ CONE / DBLE( NCOLS ), + $ WORK, 1 ) + END IF + CALL ZGEMM( TRANS, 'No transpose', + $ NROWS, NRHS, NCOLS, CONE, + $ COPYA, LDA, WORK, LDWORK, + $ CZERO, B, LDB ) + CALL ZLACPY( 'Full', NROWS, NRHS, + $ B, LDB, COPYB, LDB ) +* +* Solve LS or overdetermined system +* + IF( M.GT.0 .AND. N.GT.0 ) THEN + CALL ZLACPY( 'Full', M, N, + $ COPYA, LDA, A, LDA ) + CALL ZLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, B, LDB ) + END IF + SRNAMT = 'ZGETSLS ' + CALL ZGETSLS( TRANS, M, N, NRHS, A, + $ LDA, B, LDB, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) + $ CALL ALAERH( PATH, 'ZGETSLS ', INFO, + $ 0, TRANS, M, N, NRHS, + $ -1, NB, ITYPE, NFAIL, + $ NERRS, NOUT ) +* +* Test 5: Check correctness of results +* for ZGETSLS, compute the residual: +* RESID = norm(B - A*X) / +* / ( max(m,n) * norm(A) * norm(X) * EPS ) +* + IF( NROWS.GT.0 .AND. NRHS.GT.0 ) + $ CALL ZLACPY( 'Full', NROWS, NRHS, + $ COPYB, LDB, C, LDB ) + CALL ZQRT16( TRANS, M, N, NRHS, + $ COPYA, LDA, B, LDB, + $ C, LDB, WORK2, + $ RESULT( 5 ) ) +* +* Test 6: Check correctness of results +* for ZGETSLS. +* + IF( ( ITRAN.EQ.1 .AND. M.GE.N ) .OR. + $ ( ITRAN.EQ.2 .AND. M.LT.N ) ) THEN +* +* Solving LS system, compute: +* r = norm((B- A*X)**T * A) / +* / (norm(A)*norm(B)*max(M,N,NRHS)*EPS) +* + RESULT( 6 ) = ZQRT17( TRANS, 1, M, + $ N, NRHS, COPYA, LDA, + $ B, LDB, COPYB, LDB, + $ C, WORK, LWORK ) + ELSE +* +* Solving overdetermined system +* + RESULT( 6 ) = ZQRT14( TRANS, M, N, + $ NRHS, COPYA, LDA, B, + $ LDB, WORK, LWORK ) + END IF +* +* Print information about the tests that +* did not pass the threshold. +* + DO K = 5, 6 + IF( RESULT( K ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9997 )TRANS, + $ M, N, NRHS, MB, NB, ITYPE, K, + $ RESULT( K ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + 2 + END DO + END DO + END DO + END IF +* ===================================================== +* End test ZGELSTSLS +* ===================================================== * * Generate a matrix of scaling type ISCALE and rank * type IRANK. @@ -680,37 +839,37 @@ * * workspace used: 2*MNMIN+NB*NB+NB*MAX(N,NRHS) * -* Test 3: Compute relative error in svd +* Test 7: Compute relative error in svd * workspace: M*N + 4*MIN(M,N) + MAX(M,N) * - RESULT( 3 ) = ZQRT12( CRANK, CRANK, A, LDA, + RESULT( 7 ) = ZQRT12( CRANK, CRANK, A, LDA, $ COPYS, WORK, LWORK, RWORK ) * -* Test 4: Compute error in solution +* Test 8: Compute error in solution * workspace: M*NRHS + M * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 4 ) ) + $ RESULT( 8 ) ) * -* Test 5: Check norm of r'*A +* Test 9: Check norm of r'*A * workspace: NRHS*(M+N) * - RESULT( 5 ) = ZERO + RESULT( 9 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 5 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 9 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 6: Check if x is in the rowspace of A +* Test 10: Check if x is in the rowspace of A * workspace: (M+NRHS)*(N+2) * - RESULT( 6 ) = ZERO + RESULT( 10 ) = ZERO * IF( N.GT.CRANK ) - $ RESULT( 6 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 10 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -736,38 +895,38 @@ * workspace used: 3*min(m,n) + * max(2*min(m,n),nrhs,max(m,n)) * -* Test 7: Compute relative error in svd +* Test 11: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 7 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 7 ) = ZERO + RESULT( 11 ) = ZERO END IF * -* Test 8: Compute error in solution +* Test 12: Compute error in solution * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 8 ) ) + $ RESULT( 12 ) ) * -* Test 9: Check norm of r'*A +* Test 13: Check norm of r'*A * - RESULT( 9 ) = ZERO + RESULT( 13 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 9 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 13 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 10: Check if x is in the rowspace of A +* Test 14: Check if x is in the rowspace of A * - RESULT( 10 ) = ZERO + RESULT( 14 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 10 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 14 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * @@ -792,45 +951,45 @@ $ N, NRHS, -1, NB, ITYPE, NFAIL, $ NERRS, NOUT ) * -* Test 11: Compute relative error in svd +* Test 15: Compute relative error in svd * IF( RANK.GT.0 ) THEN CALL DAXPY( MNMIN, -ONE, COPYS, 1, S, 1 ) - RESULT( 11 ) = DASUM( MNMIN, S, 1 ) / + RESULT( 15 ) = DASUM( MNMIN, S, 1 ) / $ DASUM( MNMIN, COPYS, 1 ) / $ ( EPS*DBLE( MNMIN ) ) ELSE - RESULT( 11 ) = ZERO + RESULT( 15 ) = ZERO END IF * -* Test 12: Compute error in solution +* Test 16: Compute error in solution * CALL ZLACPY( 'Full', M, NRHS, COPYB, LDB, WORK, $ LDWORK ) CALL ZQRT16( 'No transpose', M, N, NRHS, COPYA, $ LDA, B, LDB, WORK, LDWORK, RWORK, - $ RESULT( 12 ) ) + $ RESULT( 16 ) ) * -* Test 13: Check norm of r'*A +* Test 17: Check norm of r'*A * - RESULT( 13 ) = ZERO + RESULT( 17 ) = ZERO IF( M.GT.CRANK ) - $ RESULT( 13 ) = ZQRT17( 'No transpose', 1, M, + $ RESULT( 17 ) = ZQRT17( 'No transpose', 1, M, $ N, NRHS, COPYA, LDA, B, LDB, $ COPYB, LDB, C, WORK, LWORK ) * -* Test 14: Check if x is in the rowspace of A +* Test 18: Check if x is in the rowspace of A * - RESULT( 14 ) = ZERO + RESULT( 18 ) = ZERO IF( N.GT.CRANK ) - $ RESULT( 14 ) = ZQRT14( 'No transpose', M, N, + $ RESULT( 18 ) = ZQRT14( 'No transpose', M, N, $ NRHS, COPYA, LDA, B, LDB, $ WORK, LWORK ) * * Print information about the tests that did not * pass the threshold. * - DO 80 K = 3, 14 + DO 80 K = 7, 18 IF( RESULT( K ).GE.THRESH ) THEN IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) $ CALL ALAHD( NOUT, PATH ) diff --git a/lapack-netlib/TESTING/LIN/zerrls.f b/lapack-netlib/TESTING/LIN/zerrls.f index 66e56c8c6..22f049ee0 100644 --- a/lapack-netlib/TESTING/LIN/zerrls.f +++ b/lapack-netlib/TESTING/LIN/zerrls.f @@ -22,7 +22,7 @@ *> \verbatim *> *> ZERRLS tests the error exits for the COMPLEX*16 least squares -*> driver routines (ZGELS, CGELSS, CGELSY, CGELSD). +*> driver routines (ZGELS, ZGELST, ZGETSLS, CGELSS, CGELSY, CGELSD). *> \endverbatim * * Arguments: @@ -83,7 +83,8 @@ EXTERNAL LSAMEN * .. * .. External Subroutines .. - EXTERNAL ALAESM, CHKXER, ZGELS, ZGELSD, ZGELSS, ZGELSY + EXTERNAL ALAESM, CHKXER, ZGELS, ZGELSD, ZGELSS, ZGELST, + $ ZGELSY, ZGETSLS * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -130,10 +131,66 @@ INFOT = 8 CALL ZGELS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) CALL CHKXER( 'ZGELS ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELS', INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGELS( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) CALL CHKXER( 'ZGELS ', INFOT, NOUT, LERR, OK ) * +* ZGELST +* + SRNAMT = 'ZGELST' + INFOT = 1 + CALL ZGELST( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGELST( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZGELST( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGELST( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZGELST( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELST( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGELST( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZGELST( 'N', 1, 1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGELST', INFOT, NOUT, LERR, OK ) +* +* ZGETSLS +* + SRNAMT = 'ZGETSLS' + INFOT = 1 + CALL ZGETSLS( '/', 0, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZGETSLS( 'N', -1, 0, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZGETSLS( 'N', 0, -1, 0, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZGETSLS( 'N', 0, 0, -1, A, 1, B, 1, W, 1, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZGETSLS( 'N', 2, 0, 0, A, 1, B, 2, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGETSLS( 'N', 2, 0, 0, A, 2, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZGETSLS( 'N', 0, 2, 0, A, 1, B, 1, W, 2, INFO ) + CALL CHKXER( 'ZGETSLS', INFOT, NOUT, LERR, OK ) +* * ZGELSS * SRNAMT = 'ZGELSS'