Merge pull request #3214 from martin-frbg/lapack-3.9.1hrt

Add new Householder Reconstruction functions from LAPACK 3.9.1
This commit is contained in:
Martin Kroeker 2021-05-02 23:40:03 +02:00 committed by GitHub
commit e72420e8c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
48 changed files with 9053 additions and 158 deletions

View File

@ -66,7 +66,7 @@ set(SLASRC
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
slarrv.f slartv.f slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f slarz.f slarzb.f slarzt.f slasy2.f
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
@ -112,14 +112,14 @@ set(SLASRC
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f stpqrt.f stpqrt2.f stpmqrt.f stprfb.f
sgelqt.f sgelqt3.f sgemlqt.f sgelqt.f sgelqt3.f sgemlqt.f
sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f sgetsls.f sgetsqrhrt.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
sgelq.f slaswlq.f slamswlq.f sgemlq.f sgelq.f slaswlq.f slamswlq.f sgemlq.f
stplqt.f stplqt2.f stpmlqt.f stplqt.f stplqt2.f stpmlqt.f
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
sgesvdq.f slaorhr_col_getrfnp.f sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f ) slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@ -171,7 +171,7 @@ set(CLASRC
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
clarf.f clarfb.f clarfg.f clarfgp.f clarft.f clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
@ -209,14 +209,14 @@ set(CLASRC
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f
cgelqt.f cgelqt3.f cgemlqt.f cgelqt.f cgelqt3.f cgemlqt.f
cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f cgetsls.f cgetsqrhrt.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
cgelq.f claswlq.f clamswlq.f cgemlq.f cgelq.f claswlq.f clamswlq.f cgemlq.f
ctplqt.f ctplqt2.f ctpmlqt.f ctplqt.f ctplqt2.f ctpmlqt.f
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
cungtsqr.f cunhr_col.f ) cungtsqr.f cungtsqr_row.f cunhr_col.f )
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@ -253,7 +253,7 @@ set(DLASRC
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
dlargv.f dlarrv.f dlartv.f dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlarz.f dlarzb.f dlarzt.f dlasy2.f
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
@ -300,14 +300,14 @@ set(DLASRC
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f
dgelqt.f dgelqt3.f dgemlqt.f dgelqt.f dgelqt3.f dgemlqt.f
dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f dgetsls.f dgetsqrhrt.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
dgelq.f dlaswlq.f dlamswlq.f dgemlq.f dgelq.f dlaswlq.f dlamswlq.f dgemlq.f
dtplqt.f dtplqt2.f dtpmlqt.f dtplqt.f dtplqt2.f dtpmlqt.f
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f ) dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@ -360,7 +360,7 @@ set(ZLASRC
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f
zlarfg.f zlarfgp.f zlarft.f zlarfg.f zlarfgp.f zlarft.f
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
@ -402,13 +402,13 @@ set(ZLASRC
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f
ztplqt.f ztplqt2.f ztpmlqt.f ztplqt.f ztplqt2.f ztpmlqt.f
zgelqt.f zgelqt3.f zgemlqt.f zgelqt.f zgelqt3.f zgemlqt.f
zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f zgetsls.f zgetsqrhrt.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
zungtsqr.f zunhr_col.f) zungtsqr.f zungtsqr_row.f zunhr_col.f)
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f

View File

@ -114,6 +114,8 @@ set(CSRC
lapacke_cgetrs_work.c lapacke_cgetrs_work.c
lapacke_cgetsls.c lapacke_cgetsls.c
lapacke_cgetsls_work.c lapacke_cgetsls_work.c
lapacke_cgetsqrhrt.c
lapacke_cgetsqrhrt_work.c
lapacke_cggbak.c lapacke_cggbak.c
lapacke_cggbak_work.c lapacke_cggbak_work.c
lapacke_cggbal.c lapacke_cggbal.c
@ -590,6 +592,8 @@ set(CSRC
lapacke_cungrq_work.c lapacke_cungrq_work.c
lapacke_cungtr.c lapacke_cungtr.c
lapacke_cungtr_work.c lapacke_cungtr_work.c
lapacke_cungtsqr_row.c
lapacke_cungtsqr_row_work.c
lapacke_cunmbr.c lapacke_cunmbr.c
lapacke_cunmbr_work.c lapacke_cunmbr_work.c
lapacke_cunmhr.c lapacke_cunmhr.c
@ -735,6 +739,8 @@ set(DSRC
lapacke_dgetrs_work.c lapacke_dgetrs_work.c
lapacke_dgetsls.c lapacke_dgetsls.c
lapacke_dgetsls_work.c lapacke_dgetsls_work.c
lapacke_dgetsqrhrt.c
lapacke_dgetsqrhrt_work.c
lapacke_dggbak.c lapacke_dggbak.c
lapacke_dggbak_work.c lapacke_dggbak_work.c
lapacke_dggbal.c lapacke_dggbal.c
@ -862,6 +868,8 @@ set(DSRC
lapacke_dorgrq_work.c lapacke_dorgrq_work.c
lapacke_dorgtr.c lapacke_dorgtr.c
lapacke_dorgtr_work.c lapacke_dorgtr_work.c
lapacke_dorgtsqr_row.c
lapacke_dorgtsqr_row_work.c
lapacke_dormbr.c lapacke_dormbr.c
lapacke_dormbr_work.c lapacke_dormbr_work.c
lapacke_dormhr.c lapacke_dormhr.c
@ -1309,6 +1317,8 @@ set(SSRC
lapacke_sgetrs_work.c lapacke_sgetrs_work.c
lapacke_sgetsls.c lapacke_sgetsls.c
lapacke_sgetsls_work.c lapacke_sgetsls_work.c
lapacke_sgetsqrhrt.c
lapacke_sgetsqrhrt_work.c
lapacke_sggbak.c lapacke_sggbak.c
lapacke_sggbak_work.c lapacke_sggbak_work.c
lapacke_sggbal.c lapacke_sggbal.c
@ -1435,6 +1445,8 @@ set(SSRC
lapacke_sorgrq_work.c lapacke_sorgrq_work.c
lapacke_sorgtr.c lapacke_sorgtr.c
lapacke_sorgtr_work.c lapacke_sorgtr_work.c
lapacke_sorgtsqr_row.c
lapacke_sorgtsqr_row_work.c
lapacke_sormbr.c lapacke_sormbr.c
lapacke_sormbr_work.c lapacke_sormbr_work.c
lapacke_sormhr.c lapacke_sormhr.c
@ -1877,6 +1889,8 @@ set(ZSRC
lapacke_zgetrs_work.c lapacke_zgetrs_work.c
lapacke_zgetsls.c lapacke_zgetsls.c
lapacke_zgetsls_work.c lapacke_zgetsls_work.c
lapacke_zgetsqrhrt.c
lapacke_zgetsqrhrt_work.c
lapacke_zggbak.c lapacke_zggbak.c
lapacke_zggbak_work.c lapacke_zggbak_work.c
lapacke_zggbal.c lapacke_zggbal.c
@ -2351,6 +2365,8 @@ set(ZSRC
lapacke_zungrq_work.c lapacke_zungrq_work.c
lapacke_zungtr.c lapacke_zungtr.c
lapacke_zungtr_work.c lapacke_zungtr_work.c
lapacke_zungtsqr_row.c
lapacke_zungtsqr_row_work.c
lapacke_zunmbr.c lapacke_zunmbr.c
lapacke_zunmbr_work.c lapacke_zunmbr_work.c
lapacke_zunmhr.c lapacke_zunmhr.c

View File

@ -2941,6 +2941,42 @@ void LAPACK_zgetsls(
lapack_complex_double* work, lapack_int const* lwork, lapack_complex_double* work, lapack_int const* lwork,
lapack_int* info ); lapack_int* info );
#define LAPACK_cgetsqrhrt LAPACK_GLOBAL(cgetsqrhrt,CGETSQRHRT)
void LAPACK_cgetsqrhrt(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
lapack_complex_float* A, lapack_int const* lda,
lapack_complex_float* T, lapack_int const* ldt,
lapack_complex_float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_dgetsqrhrt LAPACK_GLOBAL(dgetsqrhrt,DGETSQRHRT)
void LAPACK_dgetsqrhrt(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
double* A, lapack_int const* lda,
double* T, lapack_int const* ldt,
double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_sgetsqrhrt LAPACK_GLOBAL(sgetsqrhrt,SGETSQRHRT)
void LAPACK_sgetsqrhrt(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
float* A, lapack_int const* lda,
float* T, lapack_int const* ldt,
float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_zgetsqrhrt LAPACK_GLOBAL(zgetsqrhrt,ZGETSQRHRT)
void LAPACK_zgetsqrhrt(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
lapack_complex_double* A, lapack_int const* lda,
lapack_complex_double* T, lapack_int const* ldt,
lapack_complex_double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK) #define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK)
void LAPACK_cggbak( void LAPACK_cggbak(
char const* job, char const* side, char const* job, char const* side,
@ -7251,6 +7287,24 @@ void LAPACK_sorgtr(
float* work, lapack_int const* lwork, float* work, lapack_int const* lwork,
lapack_int* info ); lapack_int* info );
#define LAPACK_dorgtsqr_row LAPACK_GLOBAL(dorgtsqr_row,DORGTSQR_ROW)
void LAPACK_dorgtsqr_row(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb, lapack_int const* nb,
double* A, lapack_int const* lda,
double const* T, lapack_int const* ldt,
double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_sorgtsqr_row LAPACK_GLOBAL(sorgtsqr_row,SORGTSQR_ROW)
void LAPACK_sorgtsqr_row(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb, lapack_int const* nb,
float* A, lapack_int const* lda,
float const* T, lapack_int const* ldt,
float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR) #define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR)
void LAPACK_dormbr( void LAPACK_dormbr(
char const* vect, char const* side, char const* trans, char const* vect, char const* side, char const* trans,
@ -13540,6 +13594,24 @@ void LAPACK_zungtr(
lapack_complex_double* work, lapack_int const* lwork, lapack_complex_double* work, lapack_int const* lwork,
lapack_int* info ); lapack_int* info );
#define LAPACK_cungtsqr_row LAPACK_GLOBAL(cungtsqr_row,CUNGTSQR_ROW)
void LAPACK_cungtsqr_row(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb, lapack_int const* nb,
lapack_complex_float* A, lapack_int const* lda,
lapack_complex_float const* T, lapack_int const* ldt,
lapack_complex_float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_zungtsqr_row LAPACK_GLOBAL(zungtsqr_row,ZUNGTSQR_ROW)
void LAPACK_zungtsqr_row(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb, lapack_int const* nb,
lapack_complex_double* A, lapack_int const* lda,
lapack_complex_double const* T, lapack_int const* ldt,
lapack_complex_double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR) #define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR)
void LAPACK_cunmbr( void LAPACK_cunmbr(
char const* vect, char const* side, char const* trans, char const* vect, char const* side, char const* trans,

View File

@ -2598,6 +2598,15 @@ lapack_int LAPACKE_sorgtr( int matrix_layout, char uplo, lapack_int n, float* a,
lapack_int LAPACKE_dorgtr( int matrix_layout, char uplo, lapack_int n, double* a, lapack_int LAPACKE_dorgtr( int matrix_layout, char uplo, lapack_int n, double* a,
lapack_int lda, const double* tau ); lapack_int lda, const double* tau );
lapack_int LAPACKE_sorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
float* a, lapack_int lda,
const float* t, lapack_int ldt );
lapack_int LAPACKE_dorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
double* a, lapack_int lda,
const double* t, lapack_int ldt );
lapack_int LAPACKE_sormbr( int matrix_layout, char vect, char side, char trans, lapack_int LAPACKE_sormbr( int matrix_layout, char vect, char side, char trans,
lapack_int m, lapack_int n, lapack_int k, lapack_int m, lapack_int n, lapack_int k,
const float* a, lapack_int lda, const float* tau, const float* a, lapack_int lda, const float* tau,
@ -4577,6 +4586,15 @@ lapack_int LAPACKE_zungtr( int matrix_layout, char uplo, lapack_int n,
lapack_complex_double* a, lapack_int lda, lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* tau ); const lapack_complex_double* tau );
lapack_int LAPACKE_cungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* t, lapack_int ldt );
lapack_int LAPACKE_cunmbr( int matrix_layout, char vect, char side, char trans, lapack_int LAPACKE_cunmbr( int matrix_layout, char vect, char side, char trans,
lapack_int m, lapack_int n, lapack_int k, lapack_int m, lapack_int n, lapack_int k,
const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* a, lapack_int lda,
@ -7880,6 +7898,19 @@ lapack_int LAPACKE_dorgtr_work( int matrix_layout, char uplo, lapack_int n,
double* a, lapack_int lda, const double* tau, double* a, lapack_int lda, const double* tau,
double* work, lapack_int lwork ); double* work, lapack_int lwork );
lapack_int LAPACKE_sorgtsqr_row_work( int matrix_layout,
lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
float* a, lapack_int lda,
const float* t, lapack_int ldt,
float* work, lapack_int lwork );
lapack_int LAPACKE_dorgtsqr_row_work( int matrix_layout,
lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
double* a, lapack_int lda,
const double* t, lapack_int ldt,
double* work, lapack_int lwork );
lapack_int LAPACKE_sormbr_work( int matrix_layout, char vect, char side, lapack_int LAPACKE_sormbr_work( int matrix_layout, char vect, char side,
char trans, lapack_int m, lapack_int n, char trans, lapack_int m, lapack_int n,
lapack_int k, const float* a, lapack_int lda, lapack_int k, const float* a, lapack_int lda,
@ -10281,6 +10312,19 @@ lapack_int LAPACKE_zungtr_work( int matrix_layout, char uplo, lapack_int n,
const lapack_complex_double* tau, const lapack_complex_double* tau,
lapack_complex_double* work, lapack_int lwork ); lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_cungtsqr_row_work( int matrix_layout,
lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* t, lapack_int ldt,
lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zungtsqr_row_work( int matrix_layout,
lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* t, lapack_int ldt,
lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_cunmbr_work( int matrix_layout, char vect, char side, lapack_int LAPACKE_cunmbr_work( int matrix_layout, char vect, char side,
char trans, lapack_int m, lapack_int n, char trans, lapack_int m, lapack_int n,
lapack_int k, const lapack_complex_float* a, lapack_int k, const lapack_complex_float* a,
@ -12026,6 +12070,44 @@ lapack_int LAPACKE_zgetsls_work( int matrix_layout, char trans, lapack_int m,
lapack_complex_double* b, lapack_int ldb, lapack_complex_double* b, lapack_int ldb,
lapack_complex_double* work, lapack_int lwork ); lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_sgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
float* a, lapack_int lda,
float* t, lapack_int ldt );
lapack_int LAPACKE_dgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
double* a, lapack_int lda,
double* t, lapack_int ldt );
lapack_int LAPACKE_cgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_float* a, lapack_int lda,
lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_double* a, lapack_int lda,
lapack_complex_double* t, lapack_int ldt );
lapack_int LAPACKE_sgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
float* a, lapack_int lda,
float* t, lapack_int ldt,
float* work, lapack_int lwork );
lapack_int LAPACKE_dgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
double* a, lapack_int lda,
double* t, lapack_int ldt,
double* work, lapack_int lwork );
lapack_int LAPACKE_cgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_float* a, lapack_int lda,
lapack_complex_float* t, lapack_int ldt,
lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_double* a, lapack_int lda,
lapack_complex_double* t, lapack_int ldt,
lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_ssyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n, lapack_int LAPACKE_ssyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n,
float* a, lapack_int lda, float* w ); float* a, lapack_int lda, float* w );
lapack_int LAPACKE_dsyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n, lapack_int LAPACKE_dsyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n,

View File

@ -162,6 +162,8 @@ lapacke_cgetrs.o \
lapacke_cgetrs_work.o \ lapacke_cgetrs_work.o \
lapacke_cgetsls.o \ lapacke_cgetsls.o \
lapacke_cgetsls_work.o \ lapacke_cgetsls_work.o \
lapacke_cgetsqrhrt.o \
lapacke_cgetsqrhrt_work.o \
lapacke_cggbak.o \ lapacke_cggbak.o \
lapacke_cggbak_work.o \ lapacke_cggbak_work.o \
lapacke_cggbal.o \ lapacke_cggbal.o \
@ -634,6 +636,8 @@ lapacke_cungrq.o \
lapacke_cungrq_work.o \ lapacke_cungrq_work.o \
lapacke_cungtr.o \ lapacke_cungtr.o \
lapacke_cungtr_work.o \ lapacke_cungtr_work.o \
lapacke_cungtsqr_row.o \
lapacke_cungtsqr_row_work.o \
lapacke_cunmbr.o \ lapacke_cunmbr.o \
lapacke_cunmbr_work.o \ lapacke_cunmbr_work.o \
lapacke_cunmhr.o \ lapacke_cunmhr.o \
@ -778,6 +782,8 @@ lapacke_dgetrs.o \
lapacke_dgetrs_work.o \ lapacke_dgetrs_work.o \
lapacke_dgetsls.o \ lapacke_dgetsls.o \
lapacke_dgetsls_work.o \ lapacke_dgetsls_work.o \
lapacke_dgetsqrhrt.o \
lapacke_dgetsqrhrt_work.o \
lapacke_dggbak.o \ lapacke_dggbak.o \
lapacke_dggbak_work.o \ lapacke_dggbak_work.o \
lapacke_dggbal.o \ lapacke_dggbal.o \
@ -900,6 +906,8 @@ lapacke_dorgrq.o \
lapacke_dorgrq_work.o \ lapacke_dorgrq_work.o \
lapacke_dorgtr.o \ lapacke_dorgtr.o \
lapacke_dorgtr_work.o \ lapacke_dorgtr_work.o \
lapacke_dorgtsqr_row.o \
lapacke_dorgtsqr_row_work.o \
lapacke_dormbr.o \ lapacke_dormbr.o \
lapacke_dormbr_work.o \ lapacke_dormbr_work.o \
lapacke_dormhr.o \ lapacke_dormhr.o \
@ -1348,6 +1356,8 @@ lapacke_sgetrs.o \
lapacke_sgetrs_work.o \ lapacke_sgetrs_work.o \
lapacke_sgetsls.o \ lapacke_sgetsls.o \
lapacke_sgetsls_work.o \ lapacke_sgetsls_work.o \
lapacke_sgetsqrhrt.o \
lapacke_sgetsqrhrt_work.o \
lapacke_sggbak.o \ lapacke_sggbak.o \
lapacke_sggbak_work.o \ lapacke_sggbak_work.o \
lapacke_sggbal.o \ lapacke_sggbal.o \
@ -1468,6 +1478,8 @@ lapacke_sorgrq.o \
lapacke_sorgrq_work.o \ lapacke_sorgrq_work.o \
lapacke_sorgtr.o \ lapacke_sorgtr.o \
lapacke_sorgtr_work.o \ lapacke_sorgtr_work.o \
lapacke_sorgtsqr_row.o \
lapacke_sorgtsqr_row_work.o \
lapacke_sormbr.o \ lapacke_sormbr.o \
lapacke_sormbr_work.o \ lapacke_sormbr_work.o \
lapacke_sormhr.o \ lapacke_sormhr.o \
@ -1908,6 +1920,8 @@ lapacke_zgetrs.o \
lapacke_zgetrs_work.o \ lapacke_zgetrs_work.o \
lapacke_zgetsls.o \ lapacke_zgetsls.o \
lapacke_zgetsls_work.o \ lapacke_zgetsls_work.o \
lapacke_zgetsqrhrt.o \
lapacke_zgetsqrhrt_work.o \
lapacke_zggbak.o \ lapacke_zggbak.o \
lapacke_zggbak_work.o \ lapacke_zggbak_work.o \
lapacke_zggbal.o \ lapacke_zggbal.o \
@ -2380,6 +2394,8 @@ lapacke_zungrq.o \
lapacke_zungrq_work.o \ lapacke_zungrq_work.o \
lapacke_zungtr.o \ lapacke_zungtr.o \
lapacke_zungtr_work.o \ lapacke_zungtr_work.o \
lapacke_zungtsqr_row.o \
lapacke_zungtsqr_row_work.o \
lapacke_zunmbr.o \ lapacke_zunmbr.o \
lapacke_zunmbr_work.o \ lapacke_zunmbr_work.o \
lapacke_zunmhr.o \ lapacke_zunmhr.o \

View File

@ -0,0 +1,80 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_float* a, lapack_int lda,
lapack_complex_float* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
lapack_complex_float* work = NULL;
lapack_complex_float work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -7;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_cgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */
work = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_cgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt", info );
}
return info;
}

View File

@ -0,0 +1,108 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_float* a, lapack_int lda,
lapack_complex_float* t, lapack_int ldt,
lapack_complex_float* work, lapack_int lwork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_complex_float* a_t = NULL;
lapack_int ldt_t = MAX(1,nb2);
lapack_complex_float* t_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
return info;
}
if( ldt < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
}
return info;
}

View File

@ -0,0 +1,83 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cungtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
lapack_complex_float* work = NULL;
lapack_complex_float work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_cungtsqr_row", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -6;
}
if( LAPACKE_cge_nancheck( matrix_layout, nb, n, t, ldt ) ) {
return -8;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_cungtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */
work = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_cungtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cungtsqr_row", info );
}
return info;
}

View File

@ -0,0 +1,109 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cungtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cungtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* t, lapack_int ldt,
lapack_complex_float* work, lapack_int lwork )
{
lapack_int info = 0;
if (matrix_layout == LAPACK_COL_MAJOR) {
/* Call LAPACK function and adjust info */
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt,
work, &lwork, &info);
if (info < 0) {
info = info - 1;
}
} else if (matrix_layout == LAPACK_ROW_MAJOR) {
lapack_int lda_t = MAX(1,m);
lapack_complex_float* a_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -7;
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
return info;
}
lapack_int ldt_t = MAX(1,nb);
lapack_complex_float* t_t = NULL;
/* Check leading dimension(s) */
if( ldt < n ) {
info = -9;
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
LAPACKE_cge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t );
/* Call LAPACK function and adjust info */
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
}
return info;
}

View File

@ -0,0 +1,79 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_dgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
double* a, lapack_int lda,
double* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
double* work = NULL;
double work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -7;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_dgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = (lapack_int)work_query;
/* Allocate memory for work arrays */
work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_dgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt", info );
}
return info;
}

View File

@ -0,0 +1,106 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_dgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
double* a, lapack_int lda,
double* t, lapack_int ldt,
double* work, lapack_int lwork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
double* a_t = NULL;
lapack_int ldt_t = MAX(1,nb2);
double* t_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
return info;
}
if( ldt < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
LAPACKE_dge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
}
return info;
}

View File

@ -0,0 +1,82 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dorgtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_dorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
double* a, lapack_int lda,
const double* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
double* work = NULL;
double work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -6;
}
if( LAPACKE_dge_nancheck( matrix_layout, nb, n, t, ldt ) ) {
return -8;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_dorgtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = (lapack_int)work_query;
/* Allocate memory for work arrays */
work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_dorgtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row", info );
}
return info;
}

View File

@ -0,0 +1,108 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dorgtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_dorgtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
double* a, lapack_int lda,
const double* t, lapack_int ldt,
double* work, lapack_int lwork )
{
lapack_int info = 0;
if (matrix_layout == LAPACK_COL_MAJOR) {
/* Call LAPACK function and adjust info */
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt,
work, &lwork, &info);
if (info < 0) {
info = info - 1;
}
} else if (matrix_layout == LAPACK_ROW_MAJOR) {
lapack_int lda_t = MAX(1,m);
double* a_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -7;
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
return info;
}
lapack_int ldt_t = MAX(1,nb);
double* t_t = NULL;
/* Check leading dimension(s) */
if( ldt < n ) {
info = -9;
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
LAPACKE_dge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t );
/* Call LAPACK function and adjust info */
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
}
return info;
}

View File

@ -0,0 +1,79 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_sgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
float* a, lapack_int lda,
float* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
float* work = NULL;
float work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_sgetsqrhrt", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -7;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_sgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = (lapack_int)work_query;
/* Allocate memory for work arrays */
work = (float*)LAPACKE_malloc( sizeof(float) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_sgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_sgetsqrhrt", info );
}
return info;
}

View File

@ -0,0 +1,106 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_sgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
float* a, lapack_int lda,
float* t, lapack_int ldt,
float* work, lapack_int lwork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_sgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
float* a_t = NULL;
lapack_int ldt_t = MAX(1,nb2);
float* t_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_sgetsqrhrt_work", info );
return info;
}
if( ldt < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_sgetsqrhrt_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_sgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (float*)LAPACKE_malloc( sizeof(float) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_sgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
LAPACKE_sge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_sgetsqrhrt_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_sgetsqrhrt_work", info );
}
return info;
}

View File

@ -0,0 +1,82 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sorgtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_sorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
float* a, lapack_int lda,
const float* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
float* work = NULL;
float work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_sorgtsqr_row", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -6;
}
if( LAPACKE_sge_nancheck( matrix_layout, nb, n, t, ldt ) ) {
return -8;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_sorgtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = (lapack_int)work_query;
/* Allocate memory for work arrays */
work = (float*)LAPACKE_malloc( sizeof(float) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_sorgtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_sorgtsqr_row", info );
}
return info;
}

View File

@ -0,0 +1,108 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sorgtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_sorgtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
float* a, lapack_int lda,
const float* t, lapack_int ldt,
float* work, lapack_int lwork )
{
lapack_int info = 0;
if (matrix_layout == LAPACK_COL_MAJOR) {
/* Call LAPACK function and adjust info */
LAPACK_sorgtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt,
work, &lwork, &info);
if (info < 0) {
info = info - 1;
}
} else if (matrix_layout == LAPACK_ROW_MAJOR) {
lapack_int lda_t = MAX(1,m);
float* a_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -7;
LAPACKE_xerbla( "LAPACKE_sorgtsqr_row_work", info );
return info;
}
lapack_int ldt_t = MAX(1,nb);
float* t_t = NULL;
/* Check leading dimension(s) */
if( ldt < n ) {
info = -9;
LAPACKE_xerbla( "LAPACKE_sorgtsqr_row_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_sorgtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (float*)LAPACKE_malloc( sizeof(float) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
LAPACKE_sge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t );
/* Call LAPACK function and adjust info */
LAPACK_sorgtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_sorgtsqr_row_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_sorgtsqr_row_work", info );
}
return info;
}

View File

@ -0,0 +1,80 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_zgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_double* a, lapack_int lda,
lapack_complex_double* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
lapack_complex_double* work = NULL;
lapack_complex_double work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_zgetsqrhrt", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -7;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_zgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = LAPACK_Z2INT( work_query );
/* Allocate memory for work arrays */
work = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_zgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_zgetsqrhrt", info );
}
return info;
}

View File

@ -0,0 +1,108 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_zgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_double* a, lapack_int lda,
lapack_complex_double* t, lapack_int ldt,
lapack_complex_double* work, lapack_int lwork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_zgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_complex_double* a_t = NULL;
lapack_int ldt_t = MAX(1,nb2);
lapack_complex_double* t_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_zgetsqrhrt_work", info );
return info;
}
if( ldt < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_zgetsqrhrt_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_zgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_zgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
LAPACKE_zge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_zgetsqrhrt_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_zgetsqrhrt_work", info );
}
return info;
}

View File

@ -0,0 +1,83 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zungtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_zungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
lapack_complex_double* work = NULL;
lapack_complex_double work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_zungtsqr_row", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -6;
}
if( LAPACKE_zge_nancheck( matrix_layout, nb, n, t, ldt ) ) {
return -8;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_zungtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = LAPACK_Z2INT( work_query );
/* Allocate memory for work arrays */
work = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_zungtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_zungtsqr_row", info );
}
return info;
}

View File

@ -0,0 +1,109 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zungtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_zungtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* t, lapack_int ldt,
lapack_complex_double* work, lapack_int lwork )
{
lapack_int info = 0;
if (matrix_layout == LAPACK_COL_MAJOR) {
/* Call LAPACK function and adjust info */
LAPACK_zungtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt,
work, &lwork, &info);
if (info < 0) {
info = info - 1;
}
} else if (matrix_layout == LAPACK_ROW_MAJOR) {
lapack_int lda_t = MAX(1,m);
lapack_complex_double* a_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -7;
LAPACKE_xerbla( "LAPACKE_zungtsqr_row_work", info );
return info;
}
lapack_int ldt_t = MAX(1,nb);
lapack_complex_double* t_t = NULL;
/* Check leading dimension(s) */
if( ldt < n ) {
info = -9;
LAPACKE_xerbla( "LAPACKE_zungtsqr_row_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_zungtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
LAPACKE_zge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t );
/* Call LAPACK function and adjust info */
LAPACK_zungtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_zungtsqr_row_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_zungtsqr_row_work", info );
}
return info;
}

View File

@ -135,14 +135,14 @@ SLASRC_O = \
slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \ slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \ slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \ slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \
slarf.o slarfb.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \ slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \
slarrv.o slartv.o \ slarrv.o slartv.o \
slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \ slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \
slasyf_rk.o \ slasyf_rk.o \
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o \ slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o \
slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \ slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \ sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
sorgrq.o sorgtr.o sorgtsqr.o sorm2l.o sorm2r.o sorm22.o \ sorgrq.o sorgtr.o sorgtsqr.o sorgtsqr_row.o sorm2l.o sorm2r.o sorm22.o \
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \ sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \ sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
spbstf.o spbsv.o spbsvx.o \ spbstf.o spbsv.o spbsvx.o \
@ -181,7 +181,7 @@ SLASRC_O = \
sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \ sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o \ stpqrt.o stpqrt2.o stpmqrt.o stprfb.o \
sgelqt.o sgelqt3.o sgemlqt.o \ sgelqt.o sgelqt3.o sgemlqt.o \
sgetsls.o sgeqr.o slatsqr.o slamtsqr.o sgemqr.o \ sgetsls.o sgetsqrhrt.o sgeqr.o slatsqr.o slamtsqr.o sgemqr.o \
sgelq.o slaswlq.o slamswlq.o sgemlq.o \ sgelq.o slaswlq.o slamswlq.o sgemlq.o \
stplqt.o stplqt2.o stpmlqt.o \ stplqt.o stplqt2.o stpmlqt.o \
sorhr_col.o slaorhr_col_getrfnp.o slaorhr_col_getrfnp2.o \ sorhr_col.o slaorhr_col_getrfnp.o slaorhr_col_getrfnp2.o \
@ -250,7 +250,7 @@ CLASRC_O = \
claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \ claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \
claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \ claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \ claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \
clarf.o clarfb.o clarfg.o clarft.o clarfgp.o \ clarf.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarfgp.o \
clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \ clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \ clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
claswp.o clasyf.o clasyf_rook.o clasyf_rk.o clasyf_aa.o \ claswp.o clasyf.o clasyf_rook.o clasyf_rk.o clasyf_aa.o \
@ -278,7 +278,7 @@ CLASRC_O = \
ctptrs.o ctrcon.o ctrevc.o ctrevc3.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \ ctptrs.o ctrcon.o ctrevc.o ctrevc3.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrzf.o cung2l.o cung2r.o \ ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrzf.o cung2l.o cung2r.o \
cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \ cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
cungrq.o cungtr.o cungtsqr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o cunm22.o \ cungrq.o cungtr.o cungtsqr.o cungtsqr_row.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o cunm22.o \
cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \ cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o \ cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o \
chfrk.o ctfttp.o clanhf.o cpftrf.o cpftri.o cpftrs.o ctfsm.o ctftri.o \ chfrk.o ctfttp.o clanhf.o cpftrf.o cpftri.o cpftrs.o ctfsm.o ctftri.o \
@ -289,7 +289,7 @@ CLASRC_O = \
cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \ cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \
ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o \ ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o \
cgelqt.o cgelqt3.o cgemlqt.o \ cgelqt.o cgelqt3.o cgemlqt.o \
cgetsls.o cgeqr.o clatsqr.o clamtsqr.o cgemqr.o \ cgetsls.o cgetsqrhrt.o cgeqr.o clatsqr.o clamtsqr.o cgemqr.o \
cgelq.o claswlq.o clamswlq.o cgemlq.o \ cgelq.o claswlq.o clamswlq.o cgemlq.o \
ctplqt.o ctplqt2.o ctpmlqt.o \ ctplqt.o ctplqt2.o ctpmlqt.o \
cunhr_col.o claunhr_col_getrfnp.o claunhr_col_getrfnp2.o \ cunhr_col.o claunhr_col_getrfnp.o claunhr_col_getrfnp2.o \
@ -342,14 +342,14 @@ DLASRC_O = \
dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \ dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \
dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \
dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \ dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \
dlargv.o dlarrv.o dlartv.o \ dlargv.o dlarrv.o dlartv.o \
dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \ dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \
dlasyf.o dlasyf_rook.o dlasyf_rk.o \ dlasyf.o dlasyf_rook.o dlasyf_rk.o \
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlauu2.o \ dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlauu2.o \
dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \ dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \ dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
dorgrq.o dorgtr.o dorgtsqr.o dorm2l.o dorm2r.o dorm22.o \ dorgrq.o dorgtr.o dorgtsqr.o dorgtsqr_row.o dorm2l.o dorm2r.o dorm22.o \
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \ dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \ dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
dpbstf.o dpbsv.o dpbsvx.o \ dpbstf.o dpbsv.o dpbsvx.o \
@ -389,7 +389,7 @@ DLASRC_O = \
dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \ dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \
dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o \ dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o \
dgelqt.o dgelqt3.o dgemlqt.o \ dgelqt.o dgelqt3.o dgemlqt.o \
dgetsls.o dgeqr.o dlatsqr.o dlamtsqr.o dgemqr.o \ dgetsls.o dgetsqrhrt.o dgeqr.o dlatsqr.o dlamtsqr.o dgemqr.o \
dgelq.o dlaswlq.o dlamswlq.o dgemlq.o \ dgelq.o dlaswlq.o dlamswlq.o dgemlq.o \
dtplqt.o dtplqt2.o dtpmlqt.o \ dtplqt.o dtplqt2.o dtpmlqt.o \
dorhr_col.o dlaorhr_col_getrfnp.o dlaorhr_col_getrfnp2.o \ dorhr_col.o dlaorhr_col_getrfnp.o dlaorhr_col_getrfnp2.o \
@ -455,7 +455,7 @@ ZLASRC_O = \
zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \ zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \
zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \ zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \
zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \ zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \
zlarcm.o zlarf.o zlarfb.o \ zlarcm.o zlarf.o zlarfb.o zlarfb_gett.o \
zlarfg.o zlarft.o zlarfgp.o \ zlarfg.o zlarft.o zlarfgp.o \
zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \ zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \ zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
@ -484,7 +484,7 @@ ZLASRC_O = \
ztptrs.o ztrcon.o ztrevc.o ztrevc3.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \ ztptrs.o ztrcon.o ztrevc.o ztrevc3.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrzf.o zung2l.o \ ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrzf.o zung2l.o \
zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \ zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
zungrq.o zungtr.o zungtsqr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o zunm22.o \ zungrq.o zungtr.o zungtsqr.o zungtsqr_row.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o zunm22.o \
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \ zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
zunmtr.o zupgtr.o \ zunmtr.o zupgtr.o \
zupmtr.o izmax1.o dzsum1.o zstemr.o \ zupmtr.o izmax1.o dzsum1.o zstemr.o \
@ -498,7 +498,7 @@ ZLASRC_O = \
ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o \ ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o \
ztplqt.o ztplqt2.o ztpmlqt.o \ ztplqt.o ztplqt2.o ztpmlqt.o \
zgelqt.o zgelqt3.o zgemlqt.o \ zgelqt.o zgelqt3.o zgemlqt.o \
zgetsls.o zgeqr.o zlatsqr.o zlamtsqr.o zgemqr.o \ zgetsls.o zgetsqrhrt.o zgeqr.o zlatsqr.o zlamtsqr.o zgemqr.o \
zgelq.o zlaswlq.o zlamswlq.o zgemlq.o \ zgelq.o zlaswlq.o zlamswlq.o zgemlq.o \
zunhr_col.o zlaunhr_col_getrfnp.o zlaunhr_col_getrfnp2.o \ zunhr_col.o zlaunhr_col_getrfnp.o zlaunhr_col_getrfnp2.o \
zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \ zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \

View File

@ -0,0 +1,349 @@
*> \brief \b CGETSQRHRT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download CGETSQRHRT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgetsqrhrt.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgetsqrhrt.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgetsqrhrt.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE CGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
* $ LWORK, INFO )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
* ..
* .. Array Arguments ..
* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CGETSQRHRT computes a NB2-sized column blocked QR-factorization
*> of a complex M-by-N matrix A with M >= N,
*>
*> A = Q * R.
*>
*> The routine uses internally a NB1-sized column blocked and MB1-sized
*> row blocked TSQR-factorization and perfors the reconstruction
*> of the Householder vectors from the TSQR output. The routine also
*> converts the R_tsqr factor from the TSQR-factorization output into
*> the R factor that corresponds to the Householder QR-factorization,
*>
*> A = Q_tsqr * R_tsqr = Q * R.
*>
*> The output Q and R factors are stored in the same format as in CGEQRT
*> (Q is in blocked compact WY-representation). See the documentation
*> of CGEQRT for more details on the format.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. M >= N >= 0.
*> \endverbatim
*>
*> \param[in] MB1
*> \verbatim
*> MB1 is INTEGER
*> The row block size to be used in the blocked TSQR.
*> MB1 > N.
*> \endverbatim
*>
*> \param[in] NB1
*> \verbatim
*> NB1 is INTEGER
*> The column block size to be used in the blocked TSQR.
*> N >= NB1 >= 1.
*> \endverbatim
*>
*> \param[in] NB2
*> \verbatim
*> NB2 is INTEGER
*> The block size to be used in the blocked QR that is
*> output. NB2 >= 1.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is COMPLEX*16 array, dimension (LDA,N)
*>
*> On entry: an M-by-N matrix A.
*>
*> On exit:
*> a) the elements on and above the diagonal
*> of the array contain the N-by-N upper-triangular
*> matrix R corresponding to the Householder QR;
*> b) the elements below the diagonal represent Q by
*> the columns of blocked V (compact WY-representation).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] T
*> \verbatim
*> T is COMPLEX array, dimension (LDT,N))
*> The upper triangular block reflectors stored in compact form
*> as a sequence of upper triangular blocks.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= NB2.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> (workspace) COMPLEX array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> The dimension of the array WORK.
*> LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ),
*> where
*> NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)),
*> NB1LOCAL = MIN(NB1,N).
*> LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL,
*> LW1 = NB1LOCAL * N,
*> LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ),
*> If LWORK = -1, then a workspace query is assumed.
*> The routine only calculates the optimal size of the WORK
*> array, returns this value as the first entry of the WORK
*> array, and no error message related to LWORK is issued
*> by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup comlpexOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE CGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
$ LWORK, INFO )
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
* ..
* .. Array Arguments ..
COMPLEX A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
COMPLEX CONE
PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT,
$ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS
* ..
* .. External Subroutines ..
EXTERNAL CCOPY, CLATSQR, CUNGTSQR_ROW, CUNHR_COL,
$ XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC CEILING, REAL, CMPLX, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
LQUERY = LWORK.EQ.-1
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
INFO = -2
ELSE IF( MB1.LE.N ) THEN
INFO = -3
ELSE IF( NB1.LT.1 ) THEN
INFO = -4
ELSE IF( NB2.LT.1 ) THEN
INFO = -5
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -7
ELSE IF( LDT.LT.MAX( 1, MIN( NB2, N ) ) ) THEN
INFO = -9
ELSE
*
* Test the input LWORK for the dimension of the array WORK.
* This workspace is used to store array:
* a) Matrix T and WORK for CLATSQR;
* b) N-by-N upper-triangular factor R_tsqr;
* c) Matrix T and array WORK for CUNGTSQR_ROW;
* d) Diagonal D for CUNHR_COL.
*
IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN
INFO = -11
ELSE
*
* Set block size for column blocks
*
NB1LOCAL = MIN( NB1, N )
*
NUM_ALL_ROW_BLOCKS = MAX( 1,
$ CEILING( REAL( M - N ) / REAL( MB1 - N ) ) )
*
* Length and leading dimension of WORK array to place
* T array in TSQR.
*
LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL
LDWT = NB1LOCAL
*
* Length of TSQR work array
*
LW1 = NB1LOCAL * N
*
* Length of CUNGTSQR_ROW work array.
*
LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) )
*
LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) )
*
IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN
INFO = -11
END IF
*
END IF
END IF
*
* Handle error in the input parameters and return workspace query.
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'CGETSQRHRT', -INFO )
RETURN
ELSE IF ( LQUERY ) THEN
WORK( 1 ) = CMPLX( LWORKOPT )
RETURN
END IF
*
* Quick return if possible
*
IF( MIN( M, N ).EQ.0 ) THEN
WORK( 1 ) = CMPLX( LWORKOPT )
RETURN
END IF
*
NB2LOCAL = MIN( NB2, N )
*
*
* (1) Perform TSQR-factorization of the M-by-N matrix A.
*
CALL CLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
$ WORK(LWT+1), LW1, IINFO )
*
* (2) Copy the factor R_tsqr stored in the upper-triangular part
* of A into the square matrix in the work array
* WORK(LWT+1:LWT+N*N) column-by-column.
*
DO J = 1, N
CALL CCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 )
END DO
*
* (3) Generate a M-by-N matrix Q with orthonormal columns from
* the result stored below the diagonal in the array A in place.
*
CALL CUNGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
$ WORK( LWT+N*N+1 ), LW2, IINFO )
*
* (4) Perform the reconstruction of Householder vectors from
* the matrix Q (stored in A) in place.
*
CALL CUNHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT,
$ WORK( LWT+N*N+1 ), IINFO )
*
* (5) Copy the factor R_tsqr stored in the square matrix in the
* work array WORK(LWT+1:LWT+N*N) into the upper-triangular
* part of A.
*
* (6) Compute from R_tsqr the factor R_hr corresponding to
* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr.
* This multiplication by the sign matrix S on the left means
* changing the sign of I-th row of the matrix R_tsqr according
* to sign of the I-th diagonal element DIAG(I) of the matrix S.
* DIAG is stored in WORK( LWT+N*N+1 ) from the CUNHR_COL output.
*
* (5) and (6) can be combined in a single loop, so the rows in A
* are accessed only once.
*
DO I = 1, N
IF( WORK( LWT+N*N+I ).EQ.-CONE ) THEN
DO J = I, N
A( I, J ) = -CONE * WORK( LWT+N*(J-1)+I )
END DO
ELSE
CALL CCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA )
END IF
END DO
*
WORK( 1 ) = CMPLX( LWORKOPT )
RETURN
*
* End of CGETSQRHRT
*
END

View File

@ -0,0 +1,597 @@
*> \brief \b CLARFB_GETT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download CLARFB_GETT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarfb_gett.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clarfb_gett.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarfb_gett.f">
*> [TXT]</a>
*> \endhtmlonly
*>
* Definition:
* ===========
*
* SUBROUTINE CLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB,
* $ WORK, LDWORK )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* CHARACTER IDENT
* INTEGER K, LDA, LDB, LDT, LDWORK, M, N
* ..
* .. Array Arguments ..
* COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ),
* $ WORK( LDWORK, * )
* ..
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CLARFB_GETT applies a complex Householder block reflector H from the
*> left to a complex (K+M)-by-N "triangular-pentagonal" matrix
*> composed of two block matrices: an upper trapezoidal K-by-N matrix A
*> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
*> in the array B. The block reflector H is stored in a compact
*> WY-representation, where the elementary reflectors are in the
*> arrays A, B and T. See Further Details section.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] IDENT
*> \verbatim
*> IDENT is CHARACTER*1
*> If IDENT = not 'I', or not 'i', then V1 is unit
*> lower-triangular and stored in the left K-by-K block of
*> the input matrix A,
*> If IDENT = 'I' or 'i', then V1 is an identity matrix and
*> not stored.
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix B.
*> M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrices A and B.
*> N >= 0.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> The number or rows of the matrix A.
*> K is also order of the matrix T, i.e. the number of
*> elementary reflectors whose product defines the block
*> reflector. 0 <= K <= N.
*> \endverbatim
*>
*> \param[in] T
*> \verbatim
*> T is COMPLEX array, dimension (LDT,K)
*> The upper-triangular K-by-K matrix T in the representation
*> of the block reflector.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= K.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is COMPLEX array, dimension (LDA,N)
*>
*> On entry:
*> a) In the K-by-N upper-trapezoidal part A: input matrix A.
*> b) In the columns below the diagonal: columns of V1
*> (ones are not stored on the diagonal).
*>
*> On exit:
*> A is overwritten by rectangular K-by-N product H*A.
*>
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array A. LDA >= max(1,K).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is COMPLEX array, dimension (LDB,N)
*>
*> On entry:
*> a) In the M-by-(N-K) right block: input matrix B.
*> b) In the M-by-N left block: columns of V2.
*>
*> On exit:
*> B is overwritten by rectangular M-by-N product H*B.
*>
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,M).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is COMPLEX array,
*> dimension (LDWORK,max(K,N-K))
*> \endverbatim
*>
*> \param[in] LDWORK
*> \verbatim
*> LDWORK is INTEGER
*> The leading dimension of the array WORK. LDWORK>=max(1,K).
*>
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complexOTHERauxiliary
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> (1) Description of the Algebraic Operation.
*>
*> The matrix A is a K-by-N matrix composed of two column block
*> matrices, A1, which is K-by-K, and A2, which is K-by-(N-K):
*> A = ( A1, A2 ).
*> The matrix B is an M-by-N matrix composed of two column block
*> matrices, B1, which is M-by-K, and B2, which is M-by-(N-K):
*> B = ( B1, B2 ).
*>
*> Perform the operation:
*>
*> ( A_out ) := H * ( A_in ) = ( I - V * T * V**H ) * ( A_in ) =
*> ( B_out ) ( B_in ) ( B_in )
*> = ( I - ( V1 ) * T * ( V1**H, V2**H ) ) * ( A_in )
*> ( V2 ) ( B_in )
*> On input:
*>
*> a) ( A_in ) consists of two block columns:
*> ( B_in )
*>
*> ( A_in ) = (( A1_in ) ( A2_in )) = (( A1_in ) ( A2_in ))
*> ( B_in ) (( B1_in ) ( B2_in )) (( 0 ) ( B2_in )),
*>
*> where the column blocks are:
*>
*> ( A1_in ) is a K-by-K upper-triangular matrix stored in the
*> upper triangular part of the array A(1:K,1:K).
*> ( B1_in ) is an M-by-K rectangular ZERO matrix and not stored.
*>
*> ( A2_in ) is a K-by-(N-K) rectangular matrix stored
*> in the array A(1:K,K+1:N).
*> ( B2_in ) is an M-by-(N-K) rectangular matrix stored
*> in the array B(1:M,K+1:N).
*>
*> b) V = ( V1 )
*> ( V2 )
*>
*> where:
*> 1) if IDENT == 'I',V1 is a K-by-K identity matrix, not stored;
*> 2) if IDENT != 'I',V1 is a K-by-K unit lower-triangular matrix,
*> stored in the lower-triangular part of the array
*> A(1:K,1:K) (ones are not stored),
*> and V2 is an M-by-K rectangular stored the array B(1:M,1:K),
*> (because on input B1_in is a rectangular zero
*> matrix that is not stored and the space is
*> used to store V2).
*>
*> c) T is a K-by-K upper-triangular matrix stored
*> in the array T(1:K,1:K).
*>
*> On output:
*>
*> a) ( A_out ) consists of two block columns:
*> ( B_out )
*>
*> ( A_out ) = (( A1_out ) ( A2_out ))
*> ( B_out ) (( B1_out ) ( B2_out )),
*>
*> where the column blocks are:
*>
*> ( A1_out ) is a K-by-K square matrix, or a K-by-K
*> upper-triangular matrix, if V1 is an
*> identity matrix. AiOut is stored in
*> the array A(1:K,1:K).
*> ( B1_out ) is an M-by-K rectangular matrix stored
*> in the array B(1:M,K:N).
*>
*> ( A2_out ) is a K-by-(N-K) rectangular matrix stored
*> in the array A(1:K,K+1:N).
*> ( B2_out ) is an M-by-(N-K) rectangular matrix stored
*> in the array B(1:M,K+1:N).
*>
*>
*> The operation above can be represented as the same operation
*> on each block column:
*>
*> ( A1_out ) := H * ( A1_in ) = ( I - V * T * V**H ) * ( A1_in )
*> ( B1_out ) ( 0 ) ( 0 )
*>
*> ( A2_out ) := H * ( A2_in ) = ( I - V * T * V**H ) * ( A2_in )
*> ( B2_out ) ( B2_in ) ( B2_in )
*>
*> If IDENT != 'I':
*>
*> The computation for column block 1:
*>
*> A1_out: = A1_in - V1*T*(V1**H)*A1_in
*>
*> B1_out: = - V2*T*(V1**H)*A1_in
*>
*> The computation for column block 2, which exists if N > K:
*>
*> A2_out: = A2_in - V1*T*( (V1**H)*A2_in + (V2**H)*B2_in )
*>
*> B2_out: = B2_in - V2*T*( (V1**H)*A2_in + (V2**H)*B2_in )
*>
*> If IDENT == 'I':
*>
*> The operation for column block 1:
*>
*> A1_out: = A1_in - V1*T*A1_in
*>
*> B1_out: = - V2*T*A1_in
*>
*> The computation for column block 2, which exists if N > K:
*>
*> A2_out: = A2_in - T*( A2_in + (V2**H)*B2_in )
*>
*> B2_out: = B2_in - V2*T*( A2_in + (V2**H)*B2_in )
*>
*> (2) Description of the Algorithmic Computation.
*>
*> In the first step, we compute column block 2, i.e. A2 and B2.
*> Here, we need to use the K-by-(N-K) rectangular workspace
*> matrix W2 that is of the same size as the matrix A2.
*> W2 is stored in the array WORK(1:K,1:(N-K)).
*>
*> In the second step, we compute column block 1, i.e. A1 and B1.
*> Here, we need to use the K-by-K square workspace matrix W1
*> that is of the same size as the as the matrix A1.
*> W1 is stored in the array WORK(1:K,1:K).
*>
*> NOTE: Hence, in this routine, we need the workspace array WORK
*> only of size WORK(1:K,1:max(K,N-K)) so it can hold both W2 from
*> the first step and W1 from the second step.
*>
*> Case (A), when V1 is unit lower-triangular, i.e. IDENT != 'I',
*> more computations than in the Case (B).
*>
*> if( IDENT != 'I' ) then
*> if ( N > K ) then
*> (First Step - column block 2)
*> col2_(1) W2: = A2
*> col2_(2) W2: = (V1**H) * W2 = (unit_lower_tr_of_(A1)**H) * W2
*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2
*> col2_(7) A2: = A2 - W2
*> else
*> (Second Step - column block 1)
*> col1_(1) W1: = A1
*> col1_(2) W1: = (V1**H) * W1 = (unit_lower_tr_of_(A1)**H) * W1
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1
*> col1_(6) square A1: = A1 - W1
*> end if
*> end if
*>
*> Case (B), when V1 is an identity matrix, i.e. IDENT == 'I',
*> less computations than in the Case (A)
*>
*> if( IDENT == 'I' ) then
*> if ( N > K ) then
*> (First Step - column block 2)
*> col2_(1) W2: = A2
*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> col2_(7) A2: = A2 - W2
*> else
*> (Second Step - column block 1)
*> col1_(1) W1: = A1
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> col1_(6) upper-triangular_of_(A1): = A1 - W1
*> end if
*> end if
*>
*> Combine these cases (A) and (B) together, this is the resulting
*> algorithm:
*>
*> if ( N > K ) then
*>
*> (First Step - column block 2)
*>
*> col2_(1) W2: = A2
*> if( IDENT != 'I' ) then
*> col2_(2) W2: = (V1**H) * W2
*> = (unit_lower_tr_of_(A1)**H) * W2
*> end if
*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2]
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> if( IDENT != 'I' ) then
*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2
*> end if
*> col2_(7) A2: = A2 - W2
*>
*> else
*>
*> (Second Step - column block 1)
*>
*> col1_(1) W1: = A1
*> if( IDENT != 'I' ) then
*> col1_(2) W1: = (V1**H) * W1
*> = (unit_lower_tr_of_(A1)**H) * W1
*> end if
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> if( IDENT != 'I' ) then
*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1
*> col1_(6_a) below_diag_of_(A1): = - below_diag_of_(W1)
*> end if
*> col1_(6_b) up_tr_of_(A1): = up_tr_of_(A1) - up_tr_of_(W1)
*>
*> end if
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE CLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB,
$ WORK, LDWORK )
IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER IDENT
INTEGER K, LDA, LDB, LDT, LDWORK, M, N
* ..
* .. Array Arguments ..
COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ),
$ WORK( LDWORK, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
COMPLEX CONE, CZERO
PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ),
$ CZERO = ( 0.0E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LNOTIDENT
INTEGER I, J
* ..
* .. EXTERNAL FUNCTIONS ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL CCOPY, CGEMM, CTRMM
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( M.LT.0 .OR. N.LE.0 .OR. K.EQ.0 .OR. K.GT.N )
$ RETURN
*
LNOTIDENT = .NOT.LSAME( IDENT, 'I' )
*
* ------------------------------------------------------------------
*
* First Step. Computation of the Column Block 2:
*
* ( A2 ) := H * ( A2 )
* ( B2 ) ( B2 )
*
* ------------------------------------------------------------------
*
IF( N.GT.K ) THEN
*
* col2_(1) Compute W2: = A2. Therefore, copy A2 = A(1:K, K+1:N)
* into W2=WORK(1:K, 1:N-K) column-by-column.
*
DO J = 1, N-K
CALL CCOPY( K, A( 1, K+J ), 1, WORK( 1, J ), 1 )
END DO
IF( LNOTIDENT ) THEN
*
* col2_(2) Compute W2: = (V1**H) * W2 = (A1**H) * W2,
* V1 is not an identy matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored).
*
*
CALL CTRMM( 'L', 'L', 'C', 'U', K, N-K, CONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col2_(3) Compute W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2
* V2 stored in B1.
*
IF( M.GT.0 ) THEN
CALL CGEMM( 'C', 'N', K, N-K, M, CONE, B, LDB,
$ B( 1, K+1 ), LDB, CONE, WORK, LDWORK )
END IF
*
* col2_(4) Compute W2: = T * W2,
* T is upper-triangular.
*
CALL CTRMM( 'L', 'U', 'N', 'N', K, N-K, CONE, T, LDT,
$ WORK, LDWORK )
*
* col2_(5) Compute B2: = B2 - V2 * W2 = B2 - B1 * W2,
* V2 stored in B1.
*
IF( M.GT.0 ) THEN
CALL CGEMM( 'N', 'N', M, N-K, K, -CONE, B, LDB,
$ WORK, LDWORK, CONE, B( 1, K+1 ), LDB )
END IF
*
IF( LNOTIDENT ) THEN
*
* col2_(6) Compute W2: = V1 * W2 = A1 * W2,
* V1 is not an identity matrix, but unit lower-triangular,
* V1 stored in A1 (diagonal ones are not stored).
*
CALL CTRMM( 'L', 'L', 'N', 'U', K, N-K, CONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col2_(7) Compute A2: = A2 - W2 =
* = A(1:K, K+1:N-K) - WORK(1:K, 1:N-K),
* column-by-column.
*
DO J = 1, N-K
DO I = 1, K
A( I, K+J ) = A( I, K+J ) - WORK( I, J )
END DO
END DO
*
END IF
*
* ------------------------------------------------------------------
*
* Second Step. Computation of the Column Block 1:
*
* ( A1 ) := H * ( A1 )
* ( B1 ) ( 0 )
*
* ------------------------------------------------------------------
*
* col1_(1) Compute W1: = A1. Copy the upper-triangular
* A1 = A(1:K, 1:K) into the upper-triangular
* W1 = WORK(1:K, 1:K) column-by-column.
*
DO J = 1, K
CALL CCOPY( J, A( 1, J ), 1, WORK( 1, J ), 1 )
END DO
*
* Set the subdiagonal elements of W1 to zero column-by-column.
*
DO J = 1, K - 1
DO I = J + 1, K
WORK( I, J ) = CZERO
END DO
END DO
*
IF( LNOTIDENT ) THEN
*
* col1_(2) Compute W1: = (V1**H) * W1 = (A1**H) * W1,
* V1 is not an identity matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored),
* W1 is upper-triangular with zeroes below the diagonal.
*
CALL CTRMM( 'L', 'L', 'C', 'U', K, K, CONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col1_(3) Compute W1: = T * W1,
* T is upper-triangular,
* W1 is upper-triangular with zeroes below the diagonal.
*
CALL CTRMM( 'L', 'U', 'N', 'N', K, K, CONE, T, LDT,
$ WORK, LDWORK )
*
* col1_(4) Compute B1: = - V2 * W1 = - B1 * W1,
* V2 = B1, W1 is upper-triangular with zeroes below the diagonal.
*
IF( M.GT.0 ) THEN
CALL CTRMM( 'R', 'U', 'N', 'N', M, K, -CONE, WORK, LDWORK,
$ B, LDB )
END IF
*
IF( LNOTIDENT ) THEN
*
* col1_(5) Compute W1: = V1 * W1 = A1 * W1,
* V1 is not an identity matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored),
* W1 is upper-triangular on input with zeroes below the diagonal,
* and square on output.
*
CALL CTRMM( 'L', 'L', 'N', 'U', K, K, CONE, A, LDA,
$ WORK, LDWORK )
*
* col1_(6) Compute A1: = A1 - W1 = A(1:K, 1:K) - WORK(1:K, 1:K)
* column-by-column. A1 is upper-triangular on input.
* If IDENT, A1 is square on output, and W1 is square,
* if NOT IDENT, A1 is upper-triangular on output,
* W1 is upper-triangular.
*
* col1_(6)_a Compute elements of A1 below the diagonal.
*
DO J = 1, K - 1
DO I = J + 1, K
A( I, J ) = - WORK( I, J )
END DO
END DO
*
END IF
*
* col1_(6)_b Compute elements of A1 on and above the diagonal.
*
DO J = 1, K
DO I = 1, J
A( I, J ) = A( I, J ) - WORK( I, J )
END DO
END DO
*
RETURN
*
* End of CLARFB_GETT
*
END

View File

@ -0,0 +1,380 @@
*> \brief \b CUNGTSQR_ROW
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download CUNGTSQR_ROW + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cunrgtsqr_row.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cunrgtsqr_row.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cunrgtsqr_row.f">
*> [TXT]</a>
*> \endhtmlonly
*>
* Definition:
* ===========
*
* SUBROUTINE CUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
* $ LWORK, INFO )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB
* ..
* .. Array Arguments ..
* COMPLEX A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CUNGTSQR_ROW generates an M-by-N complex matrix Q_out with
*> orthonormal columns from the output of CLATSQR. These N orthonormal
*> columns are the first N columns of a product of complex unitary
*> matrices Q(k)_in of order M, which are returned by CLATSQR in
*> a special format.
*>
*> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
*>
*> The input matrices Q(k)_in are stored in row and column blocks in A.
*> See the documentation of CLATSQR for more details on the format of
*> Q(k)_in, where each Q(k)_in is represented by block Householder
*> transformations. This routine calls an auxiliary routine CLARFB_GETT,
*> where the computation is performed on each individual block. The
*> algorithm first sweeps NB-sized column blocks from the right to left
*> starting in the bottom row block and continues to the top row block
*> (hence _ROW in the routine name). This sweep is in reverse order of
*> the order in which CLATSQR generates the output blocks.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. M >= N >= 0.
*> \endverbatim
*>
*> \param[in] MB
*> \verbatim
*> MB is INTEGER
*> The row block size used by CLATSQR to return
*> arrays A and T. MB > N.
*> (Note that if MB > M, then M is used instead of MB
*> as the row block size).
*> \endverbatim
*>
*> \param[in] NB
*> \verbatim
*> NB is INTEGER
*> The column block size used by CLATSQR to return
*> arrays A and T. NB >= 1.
*> (Note that if NB > N, then N is used instead of NB
*> as the column block size).
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is COMPLEX array, dimension (LDA,N)
*>
*> On entry:
*>
*> The elements on and above the diagonal are not used as
*> input. The elements below the diagonal represent the unit
*> lower-trapezoidal blocked matrix V computed by CLATSQR
*> that defines the input matrices Q_in(k) (ones on the
*> diagonal are not stored). See CLATSQR for more details.
*>
*> On exit:
*>
*> The array A contains an M-by-N orthonormal matrix Q_out,
*> i.e the columns of A are orthogonal unit vectors.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[in] T
*> \verbatim
*> T is COMPLEX array,
*> dimension (LDT, N * NIRB)
*> where NIRB = Number_of_input_row_blocks
*> = MAX( 1, CEIL((M-N)/(MB-N)) )
*> Let NICB = Number_of_input_col_blocks
*> = CEIL(N/NB)
*>
*> The upper-triangular block reflectors used to define the
*> input matrices Q_in(k), k=(1:NIRB*NICB). The block
*> reflectors are stored in compact form in NIRB block
*> reflector sequences. Each of the NIRB block reflector
*> sequences is stored in a larger NB-by-N column block of T
*> and consists of NICB smaller NB-by-NB upper-triangular
*> column blocks. See CLATSQR for more details on the format
*> of T.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T.
*> LDT >= max(1,min(NB,N)).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> (workspace) COMPLEX array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> The dimension of the array WORK.
*> LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)),
*> where NBLOCAL=MIN(NB,N).
*> If LWORK = -1, then a workspace query is assumed.
*> The routine only calculates the optimal size of the WORK
*> array, returns this value as the first entry of the WORK
*> array, and no error message related to LWORK is issued
*> by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*>
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complexOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE CUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
$ LWORK, INFO )
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB
* ..
* .. Array Arguments ..
COMPLEX A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
COMPLEX CONE, CZERO
PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ),
$ CZERO = ( 0.0E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM,
$ LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB,
$ KB, KB_LAST, KNB, MB1
* ..
* .. Local Arrays ..
COMPLEX DUMMY( 1, 1 )
* ..
* .. External Subroutines ..
EXTERNAL CLARFB_GETT, CLASET, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC CMPLX, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
LQUERY = LWORK.EQ.-1
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
INFO = -2
ELSE IF( MB.LE.N ) THEN
INFO = -3
ELSE IF( NB.LT.1 ) THEN
INFO = -4
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -6
ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
INFO = -8
ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
INFO = -10
END IF
*
NBLOCAL = MIN( NB, N )
*
* Determine the workspace size.
*
IF( INFO.EQ.0 ) THEN
LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) )
END IF
*
* Handle error in the input parameters and handle the workspace query.
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'CUNGTSQR_ROW', -INFO )
RETURN
ELSE IF ( LQUERY ) THEN
WORK( 1 ) = CMPLX( LWORKOPT )
RETURN
END IF
*
* Quick return if possible
*
IF( MIN( M, N ).EQ.0 ) THEN
WORK( 1 ) = CMPLX( LWORKOPT )
RETURN
END IF
*
* (0) Set the upper-triangular part of the matrix A to zero and
* its diagonal elements to one.
*
CALL CLASET('U', M, N, CZERO, CONE, A, LDA )
*
* KB_LAST is the column index of the last column block reflector
* in the matrices T and V.
*
KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1
*
*
* (1) Bottom-up loop over row blocks of A, except the top row block.
* NOTE: If MB>=M, then the loop is never executed.
*
IF ( MB.LT.M ) THEN
*
* MB2 is the row blocking size for the row blocks before the
* first top row block in the matrix A. IB is the row index for
* the row blocks in the matrix A before the first top row block.
* IB_BOTTOM is the row index for the last bottom row block
* in the matrix A. JB_T is the column index of the corresponding
* column block in the matrix T.
*
* Initialize variables.
*
* NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A
* including the first row block.
*
MB2 = MB - N
M_PLUS_ONE = M + 1
ITMP = ( M - MB - 1 ) / MB2
IB_BOTTOM = ITMP * MB2 + MB + 1
NUM_ALL_ROW_BLOCKS = ITMP + 2
JB_T = NUM_ALL_ROW_BLOCKS * N + 1
*
DO IB = IB_BOTTOM, MB+1, -MB2
*
* Determine the block size IMB for the current row block
* in the matrix A.
*
IMB = MIN( M_PLUS_ONE - IB, MB2 )
*
* Determine the column index JB_T for the current column block
* in the matrix T.
*
JB_T = JB_T - N
*
* Apply column blocks of H in the row block from right to left.
*
* KB is the column index of the current column block reflector
* in the matrices T and V.
*
DO KB = KB_LAST, 1, -NBLOCAL
*
* Determine the size of the current column block KNB in
* the matrices T and V.
*
KNB = MIN( NBLOCAL, N - KB + 1 )
*
CALL CLARFB_GETT( 'I', IMB, N-KB+1, KNB,
$ T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA,
$ A( IB, KB ), LDA, WORK, KNB )
*
END DO
*
END DO
*
END IF
*
* (2) Top row block of A.
* NOTE: If MB>=M, then we have only one row block of A of size M
* and we work on the entire matrix A.
*
MB1 = MIN( MB, M )
*
* Apply column blocks of H in the top row block from right to left.
*
* KB is the column index of the current block reflector in
* the matrices T and V.
*
DO KB = KB_LAST, 1, -NBLOCAL
*
* Determine the size of the current column block KNB in
* the matrices T and V.
*
KNB = MIN( NBLOCAL, N - KB + 1 )
*
IF( MB1-KB-KNB+1.EQ.0 ) THEN
*
* In SLARFB_GETT parameters, when M=0, then the matrix B
* does not exist, hence we need to pass a dummy array
* reference DUMMY(1,1) to B with LDDUMMY=1.
*
CALL CLARFB_GETT( 'N', 0, N-KB+1, KNB,
$ T( 1, KB ), LDT, A( KB, KB ), LDA,
$ DUMMY( 1, 1 ), 1, WORK, KNB )
ELSE
CALL CLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB,
$ T( 1, KB ), LDT, A( KB, KB ), LDA,
$ A( KB+KNB, KB), LDA, WORK, KNB )
END IF
*
END DO
*
WORK( 1 ) = CMPLX( LWORKOPT )
RETURN
*
* End of CUNGTSQR_ROW
*
END

View File

@ -0,0 +1,349 @@
*> \brief \b DGETSQRHRT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DGETSQRHRT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetsqrhrt.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetsqrhrt.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetsqrhrt.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
* $ LWORK, INFO )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGETSQRHRT computes a NB2-sized column blocked QR-factorization
*> of a real M-by-N matrix A with M >= N,
*>
*> A = Q * R.
*>
*> The routine uses internally a NB1-sized column blocked and MB1-sized
*> row blocked TSQR-factorization and perfors the reconstruction
*> of the Householder vectors from the TSQR output. The routine also
*> converts the R_tsqr factor from the TSQR-factorization output into
*> the R factor that corresponds to the Householder QR-factorization,
*>
*> A = Q_tsqr * R_tsqr = Q * R.
*>
*> The output Q and R factors are stored in the same format as in DGEQRT
*> (Q is in blocked compact WY-representation). See the documentation
*> of DGEQRT for more details on the format.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. M >= N >= 0.
*> \endverbatim
*>
*> \param[in] MB1
*> \verbatim
*> MB1 is INTEGER
*> The row block size to be used in the blocked TSQR.
*> MB1 > N.
*> \endverbatim
*>
*> \param[in] NB1
*> \verbatim
*> NB1 is INTEGER
*> The column block size to be used in the blocked TSQR.
*> N >= NB1 >= 1.
*> \endverbatim
*>
*> \param[in] NB2
*> \verbatim
*> NB2 is INTEGER
*> The block size to be used in the blocked QR that is
*> output. NB2 >= 1.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*>
*> On entry: an M-by-N matrix A.
*>
*> On exit:
*> a) the elements on and above the diagonal
*> of the array contain the N-by-N upper-triangular
*> matrix R corresponding to the Householder QR;
*> b) the elements below the diagonal represent Q by
*> the columns of blocked V (compact WY-representation).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] T
*> \verbatim
*> T is DOUBLE PRECISION array, dimension (LDT,N))
*> The upper triangular block reflectors stored in compact form
*> as a sequence of upper triangular blocks.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= NB2.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> The dimension of the array WORK.
*> LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ),
*> where
*> NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)),
*> NB1LOCAL = MIN(NB1,N).
*> LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL,
*> LW1 = NB1LOCAL * N,
*> LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ),
*> If LWORK = -1, then a workspace query is assumed.
*> The routine only calculates the optimal size of the WORK
*> array, returns this value as the first entry of the WORK
*> array, and no error message related to LWORK is issued
*> by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
$ LWORK, INFO )
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT,
$ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DLATSQR, DORGTSQR_ROW, DORHR_COL,
$ XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC CEILING, DBLE, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
LQUERY = LWORK.EQ.-1
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
INFO = -2
ELSE IF( MB1.LE.N ) THEN
INFO = -3
ELSE IF( NB1.LT.1 ) THEN
INFO = -4
ELSE IF( NB2.LT.1 ) THEN
INFO = -5
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -7
ELSE IF( LDT.LT.MAX( 1, MIN( NB2, N ) ) ) THEN
INFO = -9
ELSE
*
* Test the input LWORK for the dimension of the array WORK.
* This workspace is used to store array:
* a) Matrix T and WORK for DLATSQR;
* b) N-by-N upper-triangular factor R_tsqr;
* c) Matrix T and array WORK for DORGTSQR_ROW;
* d) Diagonal D for DORHR_COL.
*
IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN
INFO = -11
ELSE
*
* Set block size for column blocks
*
NB1LOCAL = MIN( NB1, N )
*
NUM_ALL_ROW_BLOCKS = MAX( 1,
$ CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) )
*
* Length and leading dimension of WORK array to place
* T array in TSQR.
*
LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL
LDWT = NB1LOCAL
*
* Length of TSQR work array
*
LW1 = NB1LOCAL * N
*
* Length of DORGTSQR_ROW work array.
*
LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) )
*
LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) )
*
IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN
INFO = -11
END IF
*
END IF
END IF
*
* Handle error in the input parameters and return workspace query.
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGETSQRHRT', -INFO )
RETURN
ELSE IF ( LQUERY ) THEN
WORK( 1 ) = DBLE( LWORKOPT )
RETURN
END IF
*
* Quick return if possible
*
IF( MIN( M, N ).EQ.0 ) THEN
WORK( 1 ) = DBLE( LWORKOPT )
RETURN
END IF
*
NB2LOCAL = MIN( NB2, N )
*
*
* (1) Perform TSQR-factorization of the M-by-N matrix A.
*
CALL DLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
$ WORK(LWT+1), LW1, IINFO )
*
* (2) Copy the factor R_tsqr stored in the upper-triangular part
* of A into the square matrix in the work array
* WORK(LWT+1:LWT+N*N) column-by-column.
*
DO J = 1, N
CALL DCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 )
END DO
*
* (3) Generate a M-by-N matrix Q with orthonormal columns from
* the result stored below the diagonal in the array A in place.
*
CALL DORGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
$ WORK( LWT+N*N+1 ), LW2, IINFO )
*
* (4) Perform the reconstruction of Householder vectors from
* the matrix Q (stored in A) in place.
*
CALL DORHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT,
$ WORK( LWT+N*N+1 ), IINFO )
*
* (5) Copy the factor R_tsqr stored in the square matrix in the
* work array WORK(LWT+1:LWT+N*N) into the upper-triangular
* part of A.
*
* (6) Compute from R_tsqr the factor R_hr corresponding to
* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr.
* This multiplication by the sign matrix S on the left means
* changing the sign of I-th row of the matrix R_tsqr according
* to sign of the I-th diagonal element DIAG(I) of the matrix S.
* DIAG is stored in WORK( LWT+N*N+1 ) from the DORHR_COL output.
*
* (5) and (6) can be combined in a single loop, so the rows in A
* are accessed only once.
*
DO I = 1, N
IF( WORK( LWT+N*N+I ).EQ.-ONE ) THEN
DO J = I, N
A( I, J ) = -ONE * WORK( LWT+N*(J-1)+I )
END DO
ELSE
CALL DCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA )
END IF
END DO
*
WORK( 1 ) = DBLE( LWORKOPT )
RETURN
*
* End of DGETSQRHRT
*
END

View File

@ -0,0 +1,596 @@
*> \brief \b DLARFB_GETT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DLARFB_GETT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarfb_gett.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarfb_gett.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarfb_gett.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB,
* $ WORK, LDWORK )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* CHARACTER IDENT
* INTEGER K, LDA, LDB, LDT, LDWORK, M, N
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ),
* $ WORK( LDWORK, * )
* ..
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLARFB_GETT applies a real Householder block reflector H from the
*> left to a real (K+M)-by-N "triangular-pentagonal" matrix
*> composed of two block matrices: an upper trapezoidal K-by-N matrix A
*> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
*> in the array B. The block reflector H is stored in a compact
*> WY-representation, where the elementary reflectors are in the
*> arrays A, B and T. See Further Details section.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] IDENT
*> \verbatim
*> IDENT is CHARACTER*1
*> If IDENT = not 'I', or not 'i', then V1 is unit
*> lower-triangular and stored in the left K-by-K block of
*> the input matrix A,
*> If IDENT = 'I' or 'i', then V1 is an identity matrix and
*> not stored.
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix B.
*> M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrices A and B.
*> N >= 0.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> The number or rows of the matrix A.
*> K is also order of the matrix T, i.e. the number of
*> elementary reflectors whose product defines the block
*> reflector. 0 <= K <= N.
*> \endverbatim
*>
*> \param[in] T
*> \verbatim
*> T is DOUBLE PRECISION array, dimension (LDT,K)
*> The upper-triangular K-by-K matrix T in the representation
*> of the block reflector.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= K.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*>
*> On entry:
*> a) In the K-by-N upper-trapezoidal part A: input matrix A.
*> b) In the columns below the diagonal: columns of V1
*> (ones are not stored on the diagonal).
*>
*> On exit:
*> A is overwritten by rectangular K-by-N product H*A.
*>
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array A. LDA >= max(1,K).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension (LDB,N)
*>
*> On entry:
*> a) In the M-by-(N-K) right block: input matrix B.
*> b) In the M-by-N left block: columns of V2.
*>
*> On exit:
*> B is overwritten by rectangular M-by-N product H*B.
*>
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,M).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array,
*> dimension (LDWORK,max(K,N-K))
*> \endverbatim
*>
*> \param[in] LDWORK
*> \verbatim
*> LDWORK is INTEGER
*> The leading dimension of the array WORK. LDWORK>=max(1,K).
*>
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> (1) Description of the Algebraic Operation.
*>
*> The matrix A is a K-by-N matrix composed of two column block
*> matrices, A1, which is K-by-K, and A2, which is K-by-(N-K):
*> A = ( A1, A2 ).
*> The matrix B is an M-by-N matrix composed of two column block
*> matrices, B1, which is M-by-K, and B2, which is M-by-(N-K):
*> B = ( B1, B2 ).
*>
*> Perform the operation:
*>
*> ( A_out ) := H * ( A_in ) = ( I - V * T * V**T ) * ( A_in ) =
*> ( B_out ) ( B_in ) ( B_in )
*> = ( I - ( V1 ) * T * ( V1**T, V2**T ) ) * ( A_in )
*> ( V2 ) ( B_in )
*> On input:
*>
*> a) ( A_in ) consists of two block columns:
*> ( B_in )
*>
*> ( A_in ) = (( A1_in ) ( A2_in )) = (( A1_in ) ( A2_in ))
*> ( B_in ) (( B1_in ) ( B2_in )) (( 0 ) ( B2_in )),
*>
*> where the column blocks are:
*>
*> ( A1_in ) is a K-by-K upper-triangular matrix stored in the
*> upper triangular part of the array A(1:K,1:K).
*> ( B1_in ) is an M-by-K rectangular ZERO matrix and not stored.
*>
*> ( A2_in ) is a K-by-(N-K) rectangular matrix stored
*> in the array A(1:K,K+1:N).
*> ( B2_in ) is an M-by-(N-K) rectangular matrix stored
*> in the array B(1:M,K+1:N).
*>
*> b) V = ( V1 )
*> ( V2 )
*>
*> where:
*> 1) if IDENT == 'I',V1 is a K-by-K identity matrix, not stored;
*> 2) if IDENT != 'I',V1 is a K-by-K unit lower-triangular matrix,
*> stored in the lower-triangular part of the array
*> A(1:K,1:K) (ones are not stored),
*> and V2 is an M-by-K rectangular stored the array B(1:M,1:K),
*> (because on input B1_in is a rectangular zero
*> matrix that is not stored and the space is
*> used to store V2).
*>
*> c) T is a K-by-K upper-triangular matrix stored
*> in the array T(1:K,1:K).
*>
*> On output:
*>
*> a) ( A_out ) consists of two block columns:
*> ( B_out )
*>
*> ( A_out ) = (( A1_out ) ( A2_out ))
*> ( B_out ) (( B1_out ) ( B2_out )),
*>
*> where the column blocks are:
*>
*> ( A1_out ) is a K-by-K square matrix, or a K-by-K
*> upper-triangular matrix, if V1 is an
*> identity matrix. AiOut is stored in
*> the array A(1:K,1:K).
*> ( B1_out ) is an M-by-K rectangular matrix stored
*> in the array B(1:M,K:N).
*>
*> ( A2_out ) is a K-by-(N-K) rectangular matrix stored
*> in the array A(1:K,K+1:N).
*> ( B2_out ) is an M-by-(N-K) rectangular matrix stored
*> in the array B(1:M,K+1:N).
*>
*>
*> The operation above can be represented as the same operation
*> on each block column:
*>
*> ( A1_out ) := H * ( A1_in ) = ( I - V * T * V**T ) * ( A1_in )
*> ( B1_out ) ( 0 ) ( 0 )
*>
*> ( A2_out ) := H * ( A2_in ) = ( I - V * T * V**T ) * ( A2_in )
*> ( B2_out ) ( B2_in ) ( B2_in )
*>
*> If IDENT != 'I':
*>
*> The computation for column block 1:
*>
*> A1_out: = A1_in - V1*T*(V1**T)*A1_in
*>
*> B1_out: = - V2*T*(V1**T)*A1_in
*>
*> The computation for column block 2, which exists if N > K:
*>
*> A2_out: = A2_in - V1*T*( (V1**T)*A2_in + (V2**T)*B2_in )
*>
*> B2_out: = B2_in - V2*T*( (V1**T)*A2_in + (V2**T)*B2_in )
*>
*> If IDENT == 'I':
*>
*> The operation for column block 1:
*>
*> A1_out: = A1_in - V1*T**A1_in
*>
*> B1_out: = - V2*T**A1_in
*>
*> The computation for column block 2, which exists if N > K:
*>
*> A2_out: = A2_in - T*( A2_in + (V2**T)*B2_in )
*>
*> B2_out: = B2_in - V2*T*( A2_in + (V2**T)*B2_in )
*>
*> (2) Description of the Algorithmic Computation.
*>
*> In the first step, we compute column block 2, i.e. A2 and B2.
*> Here, we need to use the K-by-(N-K) rectangular workspace
*> matrix W2 that is of the same size as the matrix A2.
*> W2 is stored in the array WORK(1:K,1:(N-K)).
*>
*> In the second step, we compute column block 1, i.e. A1 and B1.
*> Here, we need to use the K-by-K square workspace matrix W1
*> that is of the same size as the as the matrix A1.
*> W1 is stored in the array WORK(1:K,1:K).
*>
*> NOTE: Hence, in this routine, we need the workspace array WORK
*> only of size WORK(1:K,1:max(K,N-K)) so it can hold both W2 from
*> the first step and W1 from the second step.
*>
*> Case (A), when V1 is unit lower-triangular, i.e. IDENT != 'I',
*> more computations than in the Case (B).
*>
*> if( IDENT != 'I' ) then
*> if ( N > K ) then
*> (First Step - column block 2)
*> col2_(1) W2: = A2
*> col2_(2) W2: = (V1**T) * W2 = (unit_lower_tr_of_(A1)**T) * W2
*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2
*> col2_(7) A2: = A2 - W2
*> else
*> (Second Step - column block 1)
*> col1_(1) W1: = A1
*> col1_(2) W1: = (V1**T) * W1 = (unit_lower_tr_of_(A1)**T) * W1
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1
*> col1_(6) square A1: = A1 - W1
*> end if
*> end if
*>
*> Case (B), when V1 is an identity matrix, i.e. IDENT == 'I',
*> less computations than in the Case (A)
*>
*> if( IDENT == 'I' ) then
*> if ( N > K ) then
*> (First Step - column block 2)
*> col2_(1) W2: = A2
*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> col2_(7) A2: = A2 - W2
*> else
*> (Second Step - column block 1)
*> col1_(1) W1: = A1
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> col1_(6) upper-triangular_of_(A1): = A1 - W1
*> end if
*> end if
*>
*> Combine these cases (A) and (B) together, this is the resulting
*> algorithm:
*>
*> if ( N > K ) then
*>
*> (First Step - column block 2)
*>
*> col2_(1) W2: = A2
*> if( IDENT != 'I' ) then
*> col2_(2) W2: = (V1**T) * W2
*> = (unit_lower_tr_of_(A1)**T) * W2
*> end if
*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2]
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> if( IDENT != 'I' ) then
*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2
*> end if
*> col2_(7) A2: = A2 - W2
*>
*> else
*>
*> (Second Step - column block 1)
*>
*> col1_(1) W1: = A1
*> if( IDENT != 'I' ) then
*> col1_(2) W1: = (V1**T) * W1
*> = (unit_lower_tr_of_(A1)**T) * W1
*> end if
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> if( IDENT != 'I' ) then
*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1
*> col1_(6_a) below_diag_of_(A1): = - below_diag_of_(W1)
*> end if
*> col1_(6_b) up_tr_of_(A1): = up_tr_of_(A1) - up_tr_of_(W1)
*>
*> end if
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB,
$ WORK, LDWORK )
IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER IDENT
INTEGER K, LDA, LDB, LDT, LDWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ),
$ WORK( LDWORK, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LNOTIDENT
INTEGER I, J
* ..
* .. EXTERNAL FUNCTIONS ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DGEMM, DTRMM
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( M.LT.0 .OR. N.LE.0 .OR. K.EQ.0 .OR. K.GT.N )
$ RETURN
*
LNOTIDENT = .NOT.LSAME( IDENT, 'I' )
*
* ------------------------------------------------------------------
*
* First Step. Computation of the Column Block 2:
*
* ( A2 ) := H * ( A2 )
* ( B2 ) ( B2 )
*
* ------------------------------------------------------------------
*
IF( N.GT.K ) THEN
*
* col2_(1) Compute W2: = A2. Therefore, copy A2 = A(1:K, K+1:N)
* into W2=WORK(1:K, 1:N-K) column-by-column.
*
DO J = 1, N-K
CALL DCOPY( K, A( 1, K+J ), 1, WORK( 1, J ), 1 )
END DO
IF( LNOTIDENT ) THEN
*
* col2_(2) Compute W2: = (V1**T) * W2 = (A1**T) * W2,
* V1 is not an identy matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored).
*
*
CALL DTRMM( 'L', 'L', 'T', 'U', K, N-K, ONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col2_(3) Compute W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2
* V2 stored in B1.
*
IF( M.GT.0 ) THEN
CALL DGEMM( 'T', 'N', K, N-K, M, ONE, B, LDB,
$ B( 1, K+1 ), LDB, ONE, WORK, LDWORK )
END IF
*
* col2_(4) Compute W2: = T * W2,
* T is upper-triangular.
*
CALL DTRMM( 'L', 'U', 'N', 'N', K, N-K, ONE, T, LDT,
$ WORK, LDWORK )
*
* col2_(5) Compute B2: = B2 - V2 * W2 = B2 - B1 * W2,
* V2 stored in B1.
*
IF( M.GT.0 ) THEN
CALL DGEMM( 'N', 'N', M, N-K, K, -ONE, B, LDB,
$ WORK, LDWORK, ONE, B( 1, K+1 ), LDB )
END IF
*
IF( LNOTIDENT ) THEN
*
* col2_(6) Compute W2: = V1 * W2 = A1 * W2,
* V1 is not an identity matrix, but unit lower-triangular,
* V1 stored in A1 (diagonal ones are not stored).
*
CALL DTRMM( 'L', 'L', 'N', 'U', K, N-K, ONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col2_(7) Compute A2: = A2 - W2 =
* = A(1:K, K+1:N-K) - WORK(1:K, 1:N-K),
* column-by-column.
*
DO J = 1, N-K
DO I = 1, K
A( I, K+J ) = A( I, K+J ) - WORK( I, J )
END DO
END DO
*
END IF
*
* ------------------------------------------------------------------
*
* Second Step. Computation of the Column Block 1:
*
* ( A1 ) := H * ( A1 )
* ( B1 ) ( 0 )
*
* ------------------------------------------------------------------
*
* col1_(1) Compute W1: = A1. Copy the upper-triangular
* A1 = A(1:K, 1:K) into the upper-triangular
* W1 = WORK(1:K, 1:K) column-by-column.
*
DO J = 1, K
CALL DCOPY( J, A( 1, J ), 1, WORK( 1, J ), 1 )
END DO
*
* Set the subdiagonal elements of W1 to zero column-by-column.
*
DO J = 1, K - 1
DO I = J + 1, K
WORK( I, J ) = ZERO
END DO
END DO
*
IF( LNOTIDENT ) THEN
*
* col1_(2) Compute W1: = (V1**T) * W1 = (A1**T) * W1,
* V1 is not an identity matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored),
* W1 is upper-triangular with zeroes below the diagonal.
*
CALL DTRMM( 'L', 'L', 'T', 'U', K, K, ONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col1_(3) Compute W1: = T * W1,
* T is upper-triangular,
* W1 is upper-triangular with zeroes below the diagonal.
*
CALL DTRMM( 'L', 'U', 'N', 'N', K, K, ONE, T, LDT,
$ WORK, LDWORK )
*
* col1_(4) Compute B1: = - V2 * W1 = - B1 * W1,
* V2 = B1, W1 is upper-triangular with zeroes below the diagonal.
*
IF( M.GT.0 ) THEN
CALL DTRMM( 'R', 'U', 'N', 'N', M, K, -ONE, WORK, LDWORK,
$ B, LDB )
END IF
*
IF( LNOTIDENT ) THEN
*
* col1_(5) Compute W1: = V1 * W1 = A1 * W1,
* V1 is not an identity matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored),
* W1 is upper-triangular on input with zeroes below the diagonal,
* and square on output.
*
CALL DTRMM( 'L', 'L', 'N', 'U', K, K, ONE, A, LDA,
$ WORK, LDWORK )
*
* col1_(6) Compute A1: = A1 - W1 = A(1:K, 1:K) - WORK(1:K, 1:K)
* column-by-column. A1 is upper-triangular on input.
* If IDENT, A1 is square on output, and W1 is square,
* if NOT IDENT, A1 is upper-triangular on output,
* W1 is upper-triangular.
*
* col1_(6)_a Compute elements of A1 below the diagonal.
*
DO J = 1, K - 1
DO I = J + 1, K
A( I, J ) = - WORK( I, J )
END DO
END DO
*
END IF
*
* col1_(6)_b Compute elements of A1 on and above the diagonal.
*
DO J = 1, K
DO I = 1, J
A( I, J ) = A( I, J ) - WORK( I, J )
END DO
END DO
*
RETURN
*
* End of DLARFB_GETT
*
END

View File

@ -0,0 +1,379 @@
*> \brief \b DORGTSQR_ROW
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download DORGTSQR_ROW + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgtsqr_row.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgtsqr_row.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgtsqr_row.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE DORGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
* $ LWORK, INFO )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DORGTSQR_ROW generates an M-by-N real matrix Q_out with
*> orthonormal columns from the output of DLATSQR. These N orthonormal
*> columns are the first N columns of a product of complex unitary
*> matrices Q(k)_in of order M, which are returned by DLATSQR in
*> a special format.
*>
*> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
*>
*> The input matrices Q(k)_in are stored in row and column blocks in A.
*> See the documentation of DLATSQR for more details on the format of
*> Q(k)_in, where each Q(k)_in is represented by block Householder
*> transformations. This routine calls an auxiliary routine DLARFB_GETT,
*> where the computation is performed on each individual block. The
*> algorithm first sweeps NB-sized column blocks from the right to left
*> starting in the bottom row block and continues to the top row block
*> (hence _ROW in the routine name). This sweep is in reverse order of
*> the order in which DLATSQR generates the output blocks.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. M >= N >= 0.
*> \endverbatim
*>
*> \param[in] MB
*> \verbatim
*> MB is INTEGER
*> The row block size used by DLATSQR to return
*> arrays A and T. MB > N.
*> (Note that if MB > M, then M is used instead of MB
*> as the row block size).
*> \endverbatim
*>
*> \param[in] NB
*> \verbatim
*> NB is INTEGER
*> The column block size used by DLATSQR to return
*> arrays A and T. NB >= 1.
*> (Note that if NB > N, then N is used instead of NB
*> as the column block size).
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*>
*> On entry:
*>
*> The elements on and above the diagonal are not used as
*> input. The elements below the diagonal represent the unit
*> lower-trapezoidal blocked matrix V computed by DLATSQR
*> that defines the input matrices Q_in(k) (ones on the
*> diagonal are not stored). See DLATSQR for more details.
*>
*> On exit:
*>
*> The array A contains an M-by-N orthonormal matrix Q_out,
*> i.e the columns of A are orthogonal unit vectors.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[in] T
*> \verbatim
*> T is DOUBLE PRECISION array,
*> dimension (LDT, N * NIRB)
*> where NIRB = Number_of_input_row_blocks
*> = MAX( 1, CEIL((M-N)/(MB-N)) )
*> Let NICB = Number_of_input_col_blocks
*> = CEIL(N/NB)
*>
*> The upper-triangular block reflectors used to define the
*> input matrices Q_in(k), k=(1:NIRB*NICB). The block
*> reflectors are stored in compact form in NIRB block
*> reflector sequences. Each of the NIRB block reflector
*> sequences is stored in a larger NB-by-N column block of T
*> and consists of NICB smaller NB-by-NB upper-triangular
*> column blocks. See DLATSQR for more details on the format
*> of T.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T.
*> LDT >= max(1,min(NB,N)).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> The dimension of the array WORK.
*> LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)),
*> where NBLOCAL=MIN(NB,N).
*> If LWORK = -1, then a workspace query is assumed.
*> The routine only calculates the optimal size of the WORK
*> array, returns this value as the first entry of the WORK
*> array, and no error message related to LWORK is issued
*> by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*>
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DORGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
$ LWORK, INFO )
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM,
$ LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB,
$ KB, KB_LAST, KNB, MB1
* ..
* .. Local Arrays ..
DOUBLE PRECISION DUMMY( 1, 1 )
* ..
* .. External Subroutines ..
EXTERNAL DLARFB_GETT, DLASET, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC DBLE, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
LQUERY = LWORK.EQ.-1
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
INFO = -2
ELSE IF( MB.LE.N ) THEN
INFO = -3
ELSE IF( NB.LT.1 ) THEN
INFO = -4
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -6
ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
INFO = -8
ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
INFO = -10
END IF
*
NBLOCAL = MIN( NB, N )
*
* Determine the workspace size.
*
IF( INFO.EQ.0 ) THEN
LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) )
END IF
*
* Handle error in the input parameters and handle the workspace query.
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DORGTSQR_ROW', -INFO )
RETURN
ELSE IF ( LQUERY ) THEN
WORK( 1 ) = DBLE( LWORKOPT )
RETURN
END IF
*
* Quick return if possible
*
IF( MIN( M, N ).EQ.0 ) THEN
WORK( 1 ) = DBLE( LWORKOPT )
RETURN
END IF
*
* (0) Set the upper-triangular part of the matrix A to zero and
* its diagonal elements to one.
*
CALL DLASET('U', M, N, ZERO, ONE, A, LDA )
*
* KB_LAST is the column index of the last column block reflector
* in the matrices T and V.
*
KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1
*
*
* (1) Bottom-up loop over row blocks of A, except the top row block.
* NOTE: If MB>=M, then the loop is never executed.
*
IF ( MB.LT.M ) THEN
*
* MB2 is the row blocking size for the row blocks before the
* first top row block in the matrix A. IB is the row index for
* the row blocks in the matrix A before the first top row block.
* IB_BOTTOM is the row index for the last bottom row block
* in the matrix A. JB_T is the column index of the corresponding
* column block in the matrix T.
*
* Initialize variables.
*
* NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A
* including the first row block.
*
MB2 = MB - N
M_PLUS_ONE = M + 1
ITMP = ( M - MB - 1 ) / MB2
IB_BOTTOM = ITMP * MB2 + MB + 1
NUM_ALL_ROW_BLOCKS = ITMP + 2
JB_T = NUM_ALL_ROW_BLOCKS * N + 1
*
DO IB = IB_BOTTOM, MB+1, -MB2
*
* Determine the block size IMB for the current row block
* in the matrix A.
*
IMB = MIN( M_PLUS_ONE - IB, MB2 )
*
* Determine the column index JB_T for the current column block
* in the matrix T.
*
JB_T = JB_T - N
*
* Apply column blocks of H in the row block from right to left.
*
* KB is the column index of the current column block reflector
* in the matrices T and V.
*
DO KB = KB_LAST, 1, -NBLOCAL
*
* Determine the size of the current column block KNB in
* the matrices T and V.
*
KNB = MIN( NBLOCAL, N - KB + 1 )
*
CALL DLARFB_GETT( 'I', IMB, N-KB+1, KNB,
$ T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA,
$ A( IB, KB ), LDA, WORK, KNB )
*
END DO
*
END DO
*
END IF
*
* (2) Top row block of A.
* NOTE: If MB>=M, then we have only one row block of A of size M
* and we work on the entire matrix A.
*
MB1 = MIN( MB, M )
*
* Apply column blocks of H in the top row block from right to left.
*
* KB is the column index of the current block reflector in
* the matrices T and V.
*
DO KB = KB_LAST, 1, -NBLOCAL
*
* Determine the size of the current column block KNB in
* the matrices T and V.
*
KNB = MIN( NBLOCAL, N - KB + 1 )
*
IF( MB1-KB-KNB+1.EQ.0 ) THEN
*
* In SLARFB_GETT parameters, when M=0, then the matrix B
* does not exist, hence we need to pass a dummy array
* reference DUMMY(1,1) to B with LDDUMMY=1.
*
CALL DLARFB_GETT( 'N', 0, N-KB+1, KNB,
$ T( 1, KB ), LDT, A( KB, KB ), LDA,
$ DUMMY( 1, 1 ), 1, WORK, KNB )
ELSE
CALL DLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB,
$ T( 1, KB ), LDT, A( KB, KB ), LDA,
$ A( KB+KNB, KB), LDA, WORK, KNB )
END IF
*
END DO
*
WORK( 1 ) = DBLE( LWORKOPT )
RETURN
*
* End of DORGTSQR_ROW
*
END

View File

@ -0,0 +1,349 @@
*> \brief \b SGETSQRHRT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download SGETSQRHRT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetsqrhrt.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetsqrhrt.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetsqrhrt.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE SGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
* $ LWORK, INFO )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
* ..
* .. Array Arguments ..
* REAL A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SGETSQRHRT computes a NB2-sized column blocked QR-factorization
*> of a complex M-by-N matrix A with M >= N,
*>
*> A = Q * R.
*>
*> The routine uses internally a NB1-sized column blocked and MB1-sized
*> row blocked TSQR-factorization and perfors the reconstruction
*> of the Householder vectors from the TSQR output. The routine also
*> converts the R_tsqr factor from the TSQR-factorization output into
*> the R factor that corresponds to the Householder QR-factorization,
*>
*> A = Q_tsqr * R_tsqr = Q * R.
*>
*> The output Q and R factors are stored in the same format as in SGEQRT
*> (Q is in blocked compact WY-representation). See the documentation
*> of SGEQRT for more details on the format.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. M >= N >= 0.
*> \endverbatim
*>
*> \param[in] MB1
*> \verbatim
*> MB1 is INTEGER
*> The row block size to be used in the blocked TSQR.
*> MB1 > N.
*> \endverbatim
*>
*> \param[in] NB1
*> \verbatim
*> NB1 is INTEGER
*> The column block size to be used in the blocked TSQR.
*> N >= NB1 >= 1.
*> \endverbatim
*>
*> \param[in] NB2
*> \verbatim
*> NB2 is INTEGER
*> The block size to be used in the blocked QR that is
*> output. NB2 >= 1.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is REAL array, dimension (LDA,N)
*>
*> On entry: an M-by-N matrix A.
*>
*> On exit:
*> a) the elements on and above the diagonal
*> of the array contain the N-by-N upper-triangular
*> matrix R corresponding to the Householder QR;
*> b) the elements below the diagonal represent Q by
*> the columns of blocked V (compact WY-representation).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] T
*> \verbatim
*> T is REAL array, dimension (LDT,N))
*> The upper triangular block reflectors stored in compact form
*> as a sequence of upper triangular blocks.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= NB2.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> (workspace) REAL array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> The dimension of the array WORK.
*> LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ),
*> where
*> NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)),
*> NB1LOCAL = MIN(NB1,N).
*> LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL,
*> LW1 = NB1LOCAL * N,
*> LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ),
*> If LWORK = -1, then a workspace query is assumed.
*> The routine only calculates the optimal size of the WORK
*> array, returns this value as the first entry of the WORK
*> array, and no error message related to LWORK is issued
*> by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup singleOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE SGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
$ LWORK, INFO )
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
* ..
* .. Array Arguments ..
REAL A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL ONE
PARAMETER ( ONE = 1.0E+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT,
$ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS
* ..
* .. External Subroutines ..
EXTERNAL SCOPY, SLATSQR, SORGTSQR_ROW, SORHR_COL,
$ XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC CEILING, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
LQUERY = LWORK.EQ.-1
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
INFO = -2
ELSE IF( MB1.LE.N ) THEN
INFO = -3
ELSE IF( NB1.LT.1 ) THEN
INFO = -4
ELSE IF( NB2.LT.1 ) THEN
INFO = -5
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -7
ELSE IF( LDT.LT.MAX( 1, MIN( NB2, N ) ) ) THEN
INFO = -9
ELSE
*
* Test the input LWORK for the dimension of the array WORK.
* This workspace is used to store array:
* a) Matrix T and WORK for SLATSQR;
* b) N-by-N upper-triangular factor R_tsqr;
* c) Matrix T and array WORK for SORGTSQR_ROW;
* d) Diagonal D for SORHR_COL.
*
IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN
INFO = -11
ELSE
*
* Set block size for column blocks
*
NB1LOCAL = MIN( NB1, N )
*
NUM_ALL_ROW_BLOCKS = MAX( 1,
$ CEILING( REAL( M - N ) / REAL( MB1 - N ) ) )
*
* Length and leading dimension of WORK array to place
* T array in TSQR.
*
LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL
LDWT = NB1LOCAL
*
* Length of TSQR work array
*
LW1 = NB1LOCAL * N
*
* Length of SORGTSQR_ROW work array.
*
LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) )
*
LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) )
*
IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN
INFO = -11
END IF
*
END IF
END IF
*
* Handle error in the input parameters and return workspace query.
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SGETSQRHRT', -INFO )
RETURN
ELSE IF ( LQUERY ) THEN
WORK( 1 ) = REAL( LWORKOPT )
RETURN
END IF
*
* Quick return if possible
*
IF( MIN( M, N ).EQ.0 ) THEN
WORK( 1 ) = REAL( LWORKOPT )
RETURN
END IF
*
NB2LOCAL = MIN( NB2, N )
*
*
* (1) Perform TSQR-factorization of the M-by-N matrix A.
*
CALL SLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
$ WORK(LWT+1), LW1, IINFO )
*
* (2) Copy the factor R_tsqr stored in the upper-triangular part
* of A into the square matrix in the work array
* WORK(LWT+1:LWT+N*N) column-by-column.
*
DO J = 1, N
CALL SCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 )
END DO
*
* (3) Generate a M-by-N matrix Q with orthonormal columns from
* the result stored below the diagonal in the array A in place.
*
CALL SORGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
$ WORK( LWT+N*N+1 ), LW2, IINFO )
*
* (4) Perform the reconstruction of Householder vectors from
* the matrix Q (stored in A) in place.
*
CALL SORHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT,
$ WORK( LWT+N*N+1 ), IINFO )
*
* (5) Copy the factor R_tsqr stored in the square matrix in the
* work array WORK(LWT+1:LWT+N*N) into the upper-triangular
* part of A.
*
* (6) Compute from R_tsqr the factor R_hr corresponding to
* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr.
* This multiplication by the sign matrix S on the left means
* changing the sign of I-th row of the matrix R_tsqr according
* to sign of the I-th diagonal element DIAG(I) of the matrix S.
* DIAG is stored in WORK( LWT+N*N+1 ) from the SORHR_COL output.
*
* (5) and (6) can be combined in a single loop, so the rows in A
* are accessed only once.
*
DO I = 1, N
IF( WORK( LWT+N*N+I ).EQ.-ONE ) THEN
DO J = I, N
A( I, J ) = -ONE * WORK( LWT+N*(J-1)+I )
END DO
ELSE
CALL SCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA )
END IF
END DO
*
WORK( 1 ) = REAL( LWORKOPT )
RETURN
*
* End of SGETSQRHRT
*
END

View File

@ -0,0 +1,596 @@
*> \brief \b SLARFB_GETT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download SLARFB_GETT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slarfb_gett.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slarfb_gett.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slarfb_gett.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE SLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB,
* $ WORK, LDWORK )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* CHARACTER IDENT
* INTEGER K, LDA, LDB, LDT, LDWORK, M, N
* ..
* .. Array Arguments ..
* REAL A( LDA, * ), B( LDB, * ), T( LDT, * ),
* $ WORK( LDWORK, * )
* ..
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SLARFB_GETT applies a real Householder block reflector H from the
*> left to a real (K+M)-by-N "triangular-pentagonal" matrix
*> composed of two block matrices: an upper trapezoidal K-by-N matrix A
*> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
*> in the array B. The block reflector H is stored in a compact
*> WY-representation, where the elementary reflectors are in the
*> arrays A, B and T. See Further Details section.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] IDENT
*> \verbatim
*> IDENT is CHARACTER*1
*> If IDENT = not 'I', or not 'i', then V1 is unit
*> lower-triangular and stored in the left K-by-K block of
*> the input matrix A,
*> If IDENT = 'I' or 'i', then V1 is an identity matrix and
*> not stored.
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix B.
*> M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrices A and B.
*> N >= 0.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> The number or rows of the matrix A.
*> K is also order of the matrix T, i.e. the number of
*> elementary reflectors whose product defines the block
*> reflector. 0 <= K <= N.
*> \endverbatim
*>
*> \param[in] T
*> \verbatim
*> T is REAL array, dimension (LDT,K)
*> The upper-triangular K-by-K matrix T in the representation
*> of the block reflector.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= K.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is REAL array, dimension (LDA,N)
*>
*> On entry:
*> a) In the K-by-N upper-trapezoidal part A: input matrix A.
*> b) In the columns below the diagonal: columns of V1
*> (ones are not stored on the diagonal).
*>
*> On exit:
*> A is overwritten by rectangular K-by-N product H*A.
*>
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array A. LDA >= max(1,K).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is REAL array, dimension (LDB,N)
*>
*> On entry:
*> a) In the M-by-(N-K) right block: input matrix B.
*> b) In the M-by-N left block: columns of V2.
*>
*> On exit:
*> B is overwritten by rectangular M-by-N product H*B.
*>
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,M).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is REAL array,
*> dimension (LDWORK,max(K,N-K))
*> \endverbatim
*>
*> \param[in] LDWORK
*> \verbatim
*> LDWORK is INTEGER
*> The leading dimension of the array WORK. LDWORK>=max(1,K).
*>
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup singleOTHERauxiliary
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> (1) Description of the Algebraic Operation.
*>
*> The matrix A is a K-by-N matrix composed of two column block
*> matrices, A1, which is K-by-K, and A2, which is K-by-(N-K):
*> A = ( A1, A2 ).
*> The matrix B is an M-by-N matrix composed of two column block
*> matrices, B1, which is M-by-K, and B2, which is M-by-(N-K):
*> B = ( B1, B2 ).
*>
*> Perform the operation:
*>
*> ( A_out ) := H * ( A_in ) = ( I - V * T * V**T ) * ( A_in ) =
*> ( B_out ) ( B_in ) ( B_in )
*> = ( I - ( V1 ) * T * ( V1**T, V2**T ) ) * ( A_in )
*> ( V2 ) ( B_in )
*> On input:
*>
*> a) ( A_in ) consists of two block columns:
*> ( B_in )
*>
*> ( A_in ) = (( A1_in ) ( A2_in )) = (( A1_in ) ( A2_in ))
*> ( B_in ) (( B1_in ) ( B2_in )) (( 0 ) ( B2_in )),
*>
*> where the column blocks are:
*>
*> ( A1_in ) is a K-by-K upper-triangular matrix stored in the
*> upper triangular part of the array A(1:K,1:K).
*> ( B1_in ) is an M-by-K rectangular ZERO matrix and not stored.
*>
*> ( A2_in ) is a K-by-(N-K) rectangular matrix stored
*> in the array A(1:K,K+1:N).
*> ( B2_in ) is an M-by-(N-K) rectangular matrix stored
*> in the array B(1:M,K+1:N).
*>
*> b) V = ( V1 )
*> ( V2 )
*>
*> where:
*> 1) if IDENT == 'I',V1 is a K-by-K identity matrix, not stored;
*> 2) if IDENT != 'I',V1 is a K-by-K unit lower-triangular matrix,
*> stored in the lower-triangular part of the array
*> A(1:K,1:K) (ones are not stored),
*> and V2 is an M-by-K rectangular stored the array B(1:M,1:K),
*> (because on input B1_in is a rectangular zero
*> matrix that is not stored and the space is
*> used to store V2).
*>
*> c) T is a K-by-K upper-triangular matrix stored
*> in the array T(1:K,1:K).
*>
*> On output:
*>
*> a) ( A_out ) consists of two block columns:
*> ( B_out )
*>
*> ( A_out ) = (( A1_out ) ( A2_out ))
*> ( B_out ) (( B1_out ) ( B2_out )),
*>
*> where the column blocks are:
*>
*> ( A1_out ) is a K-by-K square matrix, or a K-by-K
*> upper-triangular matrix, if V1 is an
*> identity matrix. AiOut is stored in
*> the array A(1:K,1:K).
*> ( B1_out ) is an M-by-K rectangular matrix stored
*> in the array B(1:M,K:N).
*>
*> ( A2_out ) is a K-by-(N-K) rectangular matrix stored
*> in the array A(1:K,K+1:N).
*> ( B2_out ) is an M-by-(N-K) rectangular matrix stored
*> in the array B(1:M,K+1:N).
*>
*>
*> The operation above can be represented as the same operation
*> on each block column:
*>
*> ( A1_out ) := H * ( A1_in ) = ( I - V * T * V**T ) * ( A1_in )
*> ( B1_out ) ( 0 ) ( 0 )
*>
*> ( A2_out ) := H * ( A2_in ) = ( I - V * T * V**T ) * ( A2_in )
*> ( B2_out ) ( B2_in ) ( B2_in )
*>
*> If IDENT != 'I':
*>
*> The computation for column block 1:
*>
*> A1_out: = A1_in - V1*T*(V1**T)*A1_in
*>
*> B1_out: = - V2*T*(V1**T)*A1_in
*>
*> The computation for column block 2, which exists if N > K:
*>
*> A2_out: = A2_in - V1*T*( (V1**T)*A2_in + (V2**T)*B2_in )
*>
*> B2_out: = B2_in - V2*T*( (V1**T)*A2_in + (V2**T)*B2_in )
*>
*> If IDENT == 'I':
*>
*> The operation for column block 1:
*>
*> A1_out: = A1_in - V1*T**A1_in
*>
*> B1_out: = - V2*T**A1_in
*>
*> The computation for column block 2, which exists if N > K:
*>
*> A2_out: = A2_in - T*( A2_in + (V2**T)*B2_in )
*>
*> B2_out: = B2_in - V2*T*( A2_in + (V2**T)*B2_in )
*>
*> (2) Description of the Algorithmic Computation.
*>
*> In the first step, we compute column block 2, i.e. A2 and B2.
*> Here, we need to use the K-by-(N-K) rectangular workspace
*> matrix W2 that is of the same size as the matrix A2.
*> W2 is stored in the array WORK(1:K,1:(N-K)).
*>
*> In the second step, we compute column block 1, i.e. A1 and B1.
*> Here, we need to use the K-by-K square workspace matrix W1
*> that is of the same size as the as the matrix A1.
*> W1 is stored in the array WORK(1:K,1:K).
*>
*> NOTE: Hence, in this routine, we need the workspace array WORK
*> only of size WORK(1:K,1:max(K,N-K)) so it can hold both W2 from
*> the first step and W1 from the second step.
*>
*> Case (A), when V1 is unit lower-triangular, i.e. IDENT != 'I',
*> more computations than in the Case (B).
*>
*> if( IDENT != 'I' ) then
*> if ( N > K ) then
*> (First Step - column block 2)
*> col2_(1) W2: = A2
*> col2_(2) W2: = (V1**T) * W2 = (unit_lower_tr_of_(A1)**T) * W2
*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2
*> col2_(7) A2: = A2 - W2
*> else
*> (Second Step - column block 1)
*> col1_(1) W1: = A1
*> col1_(2) W1: = (V1**T) * W1 = (unit_lower_tr_of_(A1)**T) * W1
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1
*> col1_(6) square A1: = A1 - W1
*> end if
*> end if
*>
*> Case (B), when V1 is an identity matrix, i.e. IDENT == 'I',
*> less computations than in the Case (A)
*>
*> if( IDENT == 'I' ) then
*> if ( N > K ) then
*> (First Step - column block 2)
*> col2_(1) W2: = A2
*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> col2_(7) A2: = A2 - W2
*> else
*> (Second Step - column block 1)
*> col1_(1) W1: = A1
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> col1_(6) upper-triangular_of_(A1): = A1 - W1
*> end if
*> end if
*>
*> Combine these cases (A) and (B) together, this is the resulting
*> algorithm:
*>
*> if ( N > K ) then
*>
*> (First Step - column block 2)
*>
*> col2_(1) W2: = A2
*> if( IDENT != 'I' ) then
*> col2_(2) W2: = (V1**T) * W2
*> = (unit_lower_tr_of_(A1)**T) * W2
*> end if
*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2]
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> if( IDENT != 'I' ) then
*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2
*> end if
*> col2_(7) A2: = A2 - W2
*>
*> else
*>
*> (Second Step - column block 1)
*>
*> col1_(1) W1: = A1
*> if( IDENT != 'I' ) then
*> col1_(2) W1: = (V1**T) * W1
*> = (unit_lower_tr_of_(A1)**T) * W1
*> end if
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> if( IDENT != 'I' ) then
*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1
*> col1_(6_a) below_diag_of_(A1): = - below_diag_of_(W1)
*> end if
*> col1_(6_b) up_tr_of_(A1): = up_tr_of_(A1) - up_tr_of_(W1)
*>
*> end if
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE SLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB,
$ WORK, LDWORK )
IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER IDENT
INTEGER K, LDA, LDB, LDT, LDWORK, M, N
* ..
* .. Array Arguments ..
REAL A( LDA, * ), B( LDB, * ), T( LDT, * ),
$ WORK( LDWORK, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL ONE, ZERO
PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 )
* ..
* .. Local Scalars ..
LOGICAL LNOTIDENT
INTEGER I, J
* ..
* .. EXTERNAL FUNCTIONS ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL SCOPY, SGEMM, STRMM
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( M.LT.0 .OR. N.LE.0 .OR. K.EQ.0 .OR. K.GT.N )
$ RETURN
*
LNOTIDENT = .NOT.LSAME( IDENT, 'I' )
*
* ------------------------------------------------------------------
*
* First Step. Computation of the Column Block 2:
*
* ( A2 ) := H * ( A2 )
* ( B2 ) ( B2 )
*
* ------------------------------------------------------------------
*
IF( N.GT.K ) THEN
*
* col2_(1) Compute W2: = A2. Therefore, copy A2 = A(1:K, K+1:N)
* into W2=WORK(1:K, 1:N-K) column-by-column.
*
DO J = 1, N-K
CALL SCOPY( K, A( 1, K+J ), 1, WORK( 1, J ), 1 )
END DO
IF( LNOTIDENT ) THEN
*
* col2_(2) Compute W2: = (V1**T) * W2 = (A1**T) * W2,
* V1 is not an identy matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored).
*
*
CALL STRMM( 'L', 'L', 'T', 'U', K, N-K, ONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col2_(3) Compute W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2
* V2 stored in B1.
*
IF( M.GT.0 ) THEN
CALL SGEMM( 'T', 'N', K, N-K, M, ONE, B, LDB,
$ B( 1, K+1 ), LDB, ONE, WORK, LDWORK )
END IF
*
* col2_(4) Compute W2: = T * W2,
* T is upper-triangular.
*
CALL STRMM( 'L', 'U', 'N', 'N', K, N-K, ONE, T, LDT,
$ WORK, LDWORK )
*
* col2_(5) Compute B2: = B2 - V2 * W2 = B2 - B1 * W2,
* V2 stored in B1.
*
IF( M.GT.0 ) THEN
CALL SGEMM( 'N', 'N', M, N-K, K, -ONE, B, LDB,
$ WORK, LDWORK, ONE, B( 1, K+1 ), LDB )
END IF
*
IF( LNOTIDENT ) THEN
*
* col2_(6) Compute W2: = V1 * W2 = A1 * W2,
* V1 is not an identity matrix, but unit lower-triangular,
* V1 stored in A1 (diagonal ones are not stored).
*
CALL STRMM( 'L', 'L', 'N', 'U', K, N-K, ONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col2_(7) Compute A2: = A2 - W2 =
* = A(1:K, K+1:N-K) - WORK(1:K, 1:N-K),
* column-by-column.
*
DO J = 1, N-K
DO I = 1, K
A( I, K+J ) = A( I, K+J ) - WORK( I, J )
END DO
END DO
*
END IF
*
* ------------------------------------------------------------------
*
* Second Step. Computation of the Column Block 1:
*
* ( A1 ) := H * ( A1 )
* ( B1 ) ( 0 )
*
* ------------------------------------------------------------------
*
* col1_(1) Compute W1: = A1. Copy the upper-triangular
* A1 = A(1:K, 1:K) into the upper-triangular
* W1 = WORK(1:K, 1:K) column-by-column.
*
DO J = 1, K
CALL SCOPY( J, A( 1, J ), 1, WORK( 1, J ), 1 )
END DO
*
* Set the subdiagonal elements of W1 to zero column-by-column.
*
DO J = 1, K - 1
DO I = J + 1, K
WORK( I, J ) = ZERO
END DO
END DO
*
IF( LNOTIDENT ) THEN
*
* col1_(2) Compute W1: = (V1**T) * W1 = (A1**T) * W1,
* V1 is not an identity matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored),
* W1 is upper-triangular with zeroes below the diagonal.
*
CALL STRMM( 'L', 'L', 'T', 'U', K, K, ONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col1_(3) Compute W1: = T * W1,
* T is upper-triangular,
* W1 is upper-triangular with zeroes below the diagonal.
*
CALL STRMM( 'L', 'U', 'N', 'N', K, K, ONE, T, LDT,
$ WORK, LDWORK )
*
* col1_(4) Compute B1: = - V2 * W1 = - B1 * W1,
* V2 = B1, W1 is upper-triangular with zeroes below the diagonal.
*
IF( M.GT.0 ) THEN
CALL STRMM( 'R', 'U', 'N', 'N', M, K, -ONE, WORK, LDWORK,
$ B, LDB )
END IF
*
IF( LNOTIDENT ) THEN
*
* col1_(5) Compute W1: = V1 * W1 = A1 * W1,
* V1 is not an identity matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored),
* W1 is upper-triangular on input with zeroes below the diagonal,
* and square on output.
*
CALL STRMM( 'L', 'L', 'N', 'U', K, K, ONE, A, LDA,
$ WORK, LDWORK )
*
* col1_(6) Compute A1: = A1 - W1 = A(1:K, 1:K) - WORK(1:K, 1:K)
* column-by-column. A1 is upper-triangular on input.
* If IDENT, A1 is square on output, and W1 is square,
* if NOT IDENT, A1 is upper-triangular on output,
* W1 is upper-triangular.
*
* col1_(6)_a Compute elements of A1 below the diagonal.
*
DO J = 1, K - 1
DO I = J + 1, K
A( I, J ) = - WORK( I, J )
END DO
END DO
*
END IF
*
* col1_(6)_b Compute elements of A1 on and above the diagonal.
*
DO J = 1, K
DO I = 1, J
A( I, J ) = A( I, J ) - WORK( I, J )
END DO
END DO
*
RETURN
*
* End of SLARFB_GETT
*
END

View File

@ -0,0 +1,379 @@
*> \brief \b SORGTSQR_ROW
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download SORGTSQR_ROW + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sorgtsqr_row.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sorgtsqr_row.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sorgtsqr_row.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE SORGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
* $ LWORK, INFO )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB
* ..
* .. Array Arguments ..
* REAL A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SORGTSQR_ROW generates an M-by-N real matrix Q_out with
*> orthonormal columns from the output of SLATSQR. These N orthonormal
*> columns are the first N columns of a product of complex unitary
*> matrices Q(k)_in of order M, which are returned by SLATSQR in
*> a special format.
*>
*> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
*>
*> The input matrices Q(k)_in are stored in row and column blocks in A.
*> See the documentation of SLATSQR for more details on the format of
*> Q(k)_in, where each Q(k)_in is represented by block Householder
*> transformations. This routine calls an auxiliary routine SLARFB_GETT,
*> where the computation is performed on each individual block. The
*> algorithm first sweeps NB-sized column blocks from the right to left
*> starting in the bottom row block and continues to the top row block
*> (hence _ROW in the routine name). This sweep is in reverse order of
*> the order in which SLATSQR generates the output blocks.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. M >= N >= 0.
*> \endverbatim
*>
*> \param[in] MB
*> \verbatim
*> MB is INTEGER
*> The row block size used by SLATSQR to return
*> arrays A and T. MB > N.
*> (Note that if MB > M, then M is used instead of MB
*> as the row block size).
*> \endverbatim
*>
*> \param[in] NB
*> \verbatim
*> NB is INTEGER
*> The column block size used by SLATSQR to return
*> arrays A and T. NB >= 1.
*> (Note that if NB > N, then N is used instead of NB
*> as the column block size).
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is REAL array, dimension (LDA,N)
*>
*> On entry:
*>
*> The elements on and above the diagonal are not used as
*> input. The elements below the diagonal represent the unit
*> lower-trapezoidal blocked matrix V computed by SLATSQR
*> that defines the input matrices Q_in(k) (ones on the
*> diagonal are not stored). See SLATSQR for more details.
*>
*> On exit:
*>
*> The array A contains an M-by-N orthonormal matrix Q_out,
*> i.e the columns of A are orthogonal unit vectors.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[in] T
*> \verbatim
*> T is REAL array,
*> dimension (LDT, N * NIRB)
*> where NIRB = Number_of_input_row_blocks
*> = MAX( 1, CEIL((M-N)/(MB-N)) )
*> Let NICB = Number_of_input_col_blocks
*> = CEIL(N/NB)
*>
*> The upper-triangular block reflectors used to define the
*> input matrices Q_in(k), k=(1:NIRB*NICB). The block
*> reflectors are stored in compact form in NIRB block
*> reflector sequences. Each of the NIRB block reflector
*> sequences is stored in a larger NB-by-N column block of T
*> and consists of NICB smaller NB-by-NB upper-triangular
*> column blocks. See SLATSQR for more details on the format
*> of T.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T.
*> LDT >= max(1,min(NB,N)).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> (workspace) REAL array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> The dimension of the array WORK.
*> LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)),
*> where NBLOCAL=MIN(NB,N).
*> If LWORK = -1, then a workspace query is assumed.
*> The routine only calculates the optimal size of the WORK
*> array, returns this value as the first entry of the WORK
*> array, and no error message related to LWORK is issued
*> by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*>
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup sigleOTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE SORGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
$ LWORK, INFO )
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB
* ..
* .. Array Arguments ..
REAL A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL ONE, ZERO
PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM,
$ LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB,
$ KB, KB_LAST, KNB, MB1
* ..
* .. Local Arrays ..
REAL DUMMY( 1, 1 )
* ..
* .. External Subroutines ..
EXTERNAL SLARFB_GETT, SLASET, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC REAL, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
LQUERY = LWORK.EQ.-1
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
INFO = -2
ELSE IF( MB.LE.N ) THEN
INFO = -3
ELSE IF( NB.LT.1 ) THEN
INFO = -4
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -6
ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
INFO = -8
ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
INFO = -10
END IF
*
NBLOCAL = MIN( NB, N )
*
* Determine the workspace size.
*
IF( INFO.EQ.0 ) THEN
LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) )
END IF
*
* Handle error in the input parameters and handle the workspace query.
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SORGTSQR_ROW', -INFO )
RETURN
ELSE IF ( LQUERY ) THEN
WORK( 1 ) = REAL( LWORKOPT )
RETURN
END IF
*
* Quick return if possible
*
IF( MIN( M, N ).EQ.0 ) THEN
WORK( 1 ) = REAL( LWORKOPT )
RETURN
END IF
*
* (0) Set the upper-triangular part of the matrix A to zero and
* its diagonal elements to one.
*
CALL SLASET('U', M, N, ZERO, ONE, A, LDA )
*
* KB_LAST is the column index of the last column block reflector
* in the matrices T and V.
*
KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1
*
*
* (1) Bottom-up loop over row blocks of A, except the top row block.
* NOTE: If MB>=M, then the loop is never executed.
*
IF ( MB.LT.M ) THEN
*
* MB2 is the row blocking size for the row blocks before the
* first top row block in the matrix A. IB is the row index for
* the row blocks in the matrix A before the first top row block.
* IB_BOTTOM is the row index for the last bottom row block
* in the matrix A. JB_T is the column index of the corresponding
* column block in the matrix T.
*
* Initialize variables.
*
* NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A
* including the first row block.
*
MB2 = MB - N
M_PLUS_ONE = M + 1
ITMP = ( M - MB - 1 ) / MB2
IB_BOTTOM = ITMP * MB2 + MB + 1
NUM_ALL_ROW_BLOCKS = ITMP + 2
JB_T = NUM_ALL_ROW_BLOCKS * N + 1
*
DO IB = IB_BOTTOM, MB+1, -MB2
*
* Determine the block size IMB for the current row block
* in the matrix A.
*
IMB = MIN( M_PLUS_ONE - IB, MB2 )
*
* Determine the column index JB_T for the current column block
* in the matrix T.
*
JB_T = JB_T - N
*
* Apply column blocks of H in the row block from right to left.
*
* KB is the column index of the current column block reflector
* in the matrices T and V.
*
DO KB = KB_LAST, 1, -NBLOCAL
*
* Determine the size of the current column block KNB in
* the matrices T and V.
*
KNB = MIN( NBLOCAL, N - KB + 1 )
*
CALL SLARFB_GETT( 'I', IMB, N-KB+1, KNB,
$ T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA,
$ A( IB, KB ), LDA, WORK, KNB )
*
END DO
*
END DO
*
END IF
*
* (2) Top row block of A.
* NOTE: If MB>=M, then we have only one row block of A of size M
* and we work on the entire matrix A.
*
MB1 = MIN( MB, M )
*
* Apply column blocks of H in the top row block from right to left.
*
* KB is the column index of the current block reflector in
* the matrices T and V.
*
DO KB = KB_LAST, 1, -NBLOCAL
*
* Determine the size of the current column block KNB in
* the matrices T and V.
*
KNB = MIN( NBLOCAL, N - KB + 1 )
*
IF( MB1-KB-KNB+1.EQ.0 ) THEN
*
* In SLARFB_GETT parameters, when M=0, then the matrix B
* does not exist, hence we need to pass a dummy array
* reference DUMMY(1,1) to B with LDDUMMY=1.
*
CALL SLARFB_GETT( 'N', 0, N-KB+1, KNB,
$ T( 1, KB ), LDT, A( KB, KB ), LDA,
$ DUMMY( 1, 1 ), 1, WORK, KNB )
ELSE
CALL SLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB,
$ T( 1, KB ), LDT, A( KB, KB ), LDA,
$ A( KB+KNB, KB), LDA, WORK, KNB )
END IF
*
END DO
*
WORK( 1 ) = REAL( LWORKOPT )
RETURN
*
* End of SORGTSQR_ROW
*
END

View File

@ -0,0 +1,349 @@
*> \brief \b ZGETSQRHRT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download ZGETSQRHRT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgetsqrhrt.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgetsqrhrt.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgetsqrhrt.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE ZGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
* $ LWORK, INFO )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
* ..
* .. Array Arguments ..
* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> ZGETSQRHRT computes a NB2-sized column blocked QR-factorization
*> of a complex M-by-N matrix A with M >= N,
*>
*> A = Q * R.
*>
*> The routine uses internally a NB1-sized column blocked and MB1-sized
*> row blocked TSQR-factorization and perfors the reconstruction
*> of the Householder vectors from the TSQR output. The routine also
*> converts the R_tsqr factor from the TSQR-factorization output into
*> the R factor that corresponds to the Householder QR-factorization,
*>
*> A = Q_tsqr * R_tsqr = Q * R.
*>
*> The output Q and R factors are stored in the same format as in ZGEQRT
*> (Q is in blocked compact WY-representation). See the documentation
*> of ZGEQRT for more details on the format.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. M >= N >= 0.
*> \endverbatim
*>
*> \param[in] MB1
*> \verbatim
*> MB1 is INTEGER
*> The row block size to be used in the blocked TSQR.
*> MB1 > N.
*> \endverbatim
*>
*> \param[in] NB1
*> \verbatim
*> NB1 is INTEGER
*> The column block size to be used in the blocked TSQR.
*> N >= NB1 >= 1.
*> \endverbatim
*>
*> \param[in] NB2
*> \verbatim
*> NB2 is INTEGER
*> The block size to be used in the blocked QR that is
*> output. NB2 >= 1.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is COMPLEX*16 array, dimension (LDA,N)
*>
*> On entry: an M-by-N matrix A.
*>
*> On exit:
*> a) the elements on and above the diagonal
*> of the array contain the N-by-N upper-triangular
*> matrix R corresponding to the Householder QR;
*> b) the elements below the diagonal represent Q by
*> the columns of blocked V (compact WY-representation).
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[out] T
*> \verbatim
*> T is COMPLEX*16 array, dimension (LDT,N))
*> The upper triangular block reflectors stored in compact form
*> as a sequence of upper triangular blocks.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= NB2.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> The dimension of the array WORK.
*> LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ),
*> where
*> NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)),
*> NB1LOCAL = MIN(NB1,N).
*> LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL,
*> LW1 = NB1LOCAL * N,
*> LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ),
*> If LWORK = -1, then a workspace query is assumed.
*> The routine only calculates the optimal size of the WORK
*> array, returns this value as the first entry of the WORK
*> array, and no error message related to LWORK is issued
*> by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup comlpex16OTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE ZGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
$ LWORK, INFO )
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
* ..
* .. Array Arguments ..
COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
COMPLEX*16 CONE
PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT,
$ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS
* ..
* .. External Subroutines ..
EXTERNAL ZCOPY, ZLATSQR, ZUNGTSQR_ROW, ZUNHR_COL,
$ XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC CEILING, DBLE, DCMPLX, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
LQUERY = LWORK.EQ.-1
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
INFO = -2
ELSE IF( MB1.LE.N ) THEN
INFO = -3
ELSE IF( NB1.LT.1 ) THEN
INFO = -4
ELSE IF( NB2.LT.1 ) THEN
INFO = -5
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -7
ELSE IF( LDT.LT.MAX( 1, MIN( NB2, N ) ) ) THEN
INFO = -9
ELSE
*
* Test the input LWORK for the dimension of the array WORK.
* This workspace is used to store array:
* a) Matrix T and WORK for ZLATSQR;
* b) N-by-N upper-triangular factor R_tsqr;
* c) Matrix T and array WORK for ZUNGTSQR_ROW;
* d) Diagonal D for ZUNHR_COL.
*
IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN
INFO = -11
ELSE
*
* Set block size for column blocks
*
NB1LOCAL = MIN( NB1, N )
*
NUM_ALL_ROW_BLOCKS = MAX( 1,
$ CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) )
*
* Length and leading dimension of WORK array to place
* T array in TSQR.
*
LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL
LDWT = NB1LOCAL
*
* Length of TSQR work array
*
LW1 = NB1LOCAL * N
*
* Length of ZUNGTSQR_ROW work array.
*
LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) )
*
LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) )
*
IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN
INFO = -11
END IF
*
END IF
END IF
*
* Handle error in the input parameters and return workspace query.
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'ZGETSQRHRT', -INFO )
RETURN
ELSE IF ( LQUERY ) THEN
WORK( 1 ) = DCMPLX( LWORKOPT )
RETURN
END IF
*
* Quick return if possible
*
IF( MIN( M, N ).EQ.0 ) THEN
WORK( 1 ) = DCMPLX( LWORKOPT )
RETURN
END IF
*
NB2LOCAL = MIN( NB2, N )
*
*
* (1) Perform TSQR-factorization of the M-by-N matrix A.
*
CALL ZLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
$ WORK(LWT+1), LW1, IINFO )
*
* (2) Copy the factor R_tsqr stored in the upper-triangular part
* of A into the square matrix in the work array
* WORK(LWT+1:LWT+N*N) column-by-column.
*
DO J = 1, N
CALL ZCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 )
END DO
*
* (3) Generate a M-by-N matrix Q with orthonormal columns from
* the result stored below the diagonal in the array A in place.
*
CALL ZUNGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
$ WORK( LWT+N*N+1 ), LW2, IINFO )
*
* (4) Perform the reconstruction of Householder vectors from
* the matrix Q (stored in A) in place.
*
CALL ZUNHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT,
$ WORK( LWT+N*N+1 ), IINFO )
*
* (5) Copy the factor R_tsqr stored in the square matrix in the
* work array WORK(LWT+1:LWT+N*N) into the upper-triangular
* part of A.
*
* (6) Compute from R_tsqr the factor R_hr corresponding to
* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr.
* This multiplication by the sign matrix S on the left means
* changing the sign of I-th row of the matrix R_tsqr according
* to sign of the I-th diagonal element DIAG(I) of the matrix S.
* DIAG is stored in WORK( LWT+N*N+1 ) from the ZUNHR_COL output.
*
* (5) and (6) can be combined in a single loop, so the rows in A
* are accessed only once.
*
DO I = 1, N
IF( WORK( LWT+N*N+I ).EQ.-CONE ) THEN
DO J = I, N
A( I, J ) = -CONE * WORK( LWT+N*(J-1)+I )
END DO
ELSE
CALL ZCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA )
END IF
END DO
*
WORK( 1 ) = DCMPLX( LWORKOPT )
RETURN
*
* End of ZGETSQRHRT
*
END

View File

@ -0,0 +1,597 @@
*> \brief \b ZLARFB_GETT
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download ZLARFB_GETT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarfb_gett.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarfb_gett.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarfb_gett.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE ZLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB,
* $ WORK, LDWORK )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* CHARACTER IDENT
* INTEGER K, LDA, LDB, LDT, LDWORK, M, N
* ..
* .. Array Arguments ..
* COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ),
* $ WORK( LDWORK, * )
* ..
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> ZLARFB_GETT applies a complex Householder block reflector H from the
*> left to a complex (K+M)-by-N "triangular-pentagonal" matrix
*> composed of two block matrices: an upper trapezoidal K-by-N matrix A
*> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
*> in the array B. The block reflector H is stored in a compact
*> WY-representation, where the elementary reflectors are in the
*> arrays A, B and T. See Further Details section.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] IDENT
*> \verbatim
*> IDENT is CHARACTER*1
*> If IDENT = not 'I', or not 'i', then V1 is unit
*> lower-triangular and stored in the left K-by-K block of
*> the input matrix A,
*> If IDENT = 'I' or 'i', then V1 is an identity matrix and
*> not stored.
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix B.
*> M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrices A and B.
*> N >= 0.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> The number or rows of the matrix A.
*> K is also order of the matrix T, i.e. the number of
*> elementary reflectors whose product defines the block
*> reflector. 0 <= K <= N.
*> \endverbatim
*>
*> \param[in] T
*> \verbatim
*> T is COMPLEX*16 array, dimension (LDT,K)
*> The upper-triangular K-by-K matrix T in the representation
*> of the block reflector.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= K.
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is COMPLEX*16 array, dimension (LDA,N)
*>
*> On entry:
*> a) In the K-by-N upper-trapezoidal part A: input matrix A.
*> b) In the columns below the diagonal: columns of V1
*> (ones are not stored on the diagonal).
*>
*> On exit:
*> A is overwritten by rectangular K-by-N product H*A.
*>
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array A. LDA >= max(1,K).
*> \endverbatim
*>
*> \param[in,out] B
*> \verbatim
*> B is COMPLEX*16 array, dimension (LDB,N)
*>
*> On entry:
*> a) In the M-by-(N-K) right block: input matrix B.
*> b) In the M-by-N left block: columns of V2.
*>
*> On exit:
*> B is overwritten by rectangular M-by-N product H*B.
*>
*> See Further Details section.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> The leading dimension of the array B. LDB >= max(1,M).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is COMPLEX*16 array,
*> dimension (LDWORK,max(K,N-K))
*> \endverbatim
*>
*> \param[in] LDWORK
*> \verbatim
*> LDWORK is INTEGER
*> The leading dimension of the array WORK. LDWORK>=max(1,K).
*>
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complex16OTHERauxiliary
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> (1) Description of the Algebraic Operation.
*>
*> The matrix A is a K-by-N matrix composed of two column block
*> matrices, A1, which is K-by-K, and A2, which is K-by-(N-K):
*> A = ( A1, A2 ).
*> The matrix B is an M-by-N matrix composed of two column block
*> matrices, B1, which is M-by-K, and B2, which is M-by-(N-K):
*> B = ( B1, B2 ).
*>
*> Perform the operation:
*>
*> ( A_out ) := H * ( A_in ) = ( I - V * T * V**H ) * ( A_in ) =
*> ( B_out ) ( B_in ) ( B_in )
*> = ( I - ( V1 ) * T * ( V1**H, V2**H ) ) * ( A_in )
*> ( V2 ) ( B_in )
*> On input:
*>
*> a) ( A_in ) consists of two block columns:
*> ( B_in )
*>
*> ( A_in ) = (( A1_in ) ( A2_in )) = (( A1_in ) ( A2_in ))
*> ( B_in ) (( B1_in ) ( B2_in )) (( 0 ) ( B2_in )),
*>
*> where the column blocks are:
*>
*> ( A1_in ) is a K-by-K upper-triangular matrix stored in the
*> upper triangular part of the array A(1:K,1:K).
*> ( B1_in ) is an M-by-K rectangular ZERO matrix and not stored.
*>
*> ( A2_in ) is a K-by-(N-K) rectangular matrix stored
*> in the array A(1:K,K+1:N).
*> ( B2_in ) is an M-by-(N-K) rectangular matrix stored
*> in the array B(1:M,K+1:N).
*>
*> b) V = ( V1 )
*> ( V2 )
*>
*> where:
*> 1) if IDENT == 'I',V1 is a K-by-K identity matrix, not stored;
*> 2) if IDENT != 'I',V1 is a K-by-K unit lower-triangular matrix,
*> stored in the lower-triangular part of the array
*> A(1:K,1:K) (ones are not stored),
*> and V2 is an M-by-K rectangular stored the array B(1:M,1:K),
*> (because on input B1_in is a rectangular zero
*> matrix that is not stored and the space is
*> used to store V2).
*>
*> c) T is a K-by-K upper-triangular matrix stored
*> in the array T(1:K,1:K).
*>
*> On output:
*>
*> a) ( A_out ) consists of two block columns:
*> ( B_out )
*>
*> ( A_out ) = (( A1_out ) ( A2_out ))
*> ( B_out ) (( B1_out ) ( B2_out )),
*>
*> where the column blocks are:
*>
*> ( A1_out ) is a K-by-K square matrix, or a K-by-K
*> upper-triangular matrix, if V1 is an
*> identity matrix. AiOut is stored in
*> the array A(1:K,1:K).
*> ( B1_out ) is an M-by-K rectangular matrix stored
*> in the array B(1:M,K:N).
*>
*> ( A2_out ) is a K-by-(N-K) rectangular matrix stored
*> in the array A(1:K,K+1:N).
*> ( B2_out ) is an M-by-(N-K) rectangular matrix stored
*> in the array B(1:M,K+1:N).
*>
*>
*> The operation above can be represented as the same operation
*> on each block column:
*>
*> ( A1_out ) := H * ( A1_in ) = ( I - V * T * V**H ) * ( A1_in )
*> ( B1_out ) ( 0 ) ( 0 )
*>
*> ( A2_out ) := H * ( A2_in ) = ( I - V * T * V**H ) * ( A2_in )
*> ( B2_out ) ( B2_in ) ( B2_in )
*>
*> If IDENT != 'I':
*>
*> The computation for column block 1:
*>
*> A1_out: = A1_in - V1*T*(V1**H)*A1_in
*>
*> B1_out: = - V2*T*(V1**H)*A1_in
*>
*> The computation for column block 2, which exists if N > K:
*>
*> A2_out: = A2_in - V1*T*( (V1**H)*A2_in + (V2**H)*B2_in )
*>
*> B2_out: = B2_in - V2*T*( (V1**H)*A2_in + (V2**H)*B2_in )
*>
*> If IDENT == 'I':
*>
*> The operation for column block 1:
*>
*> A1_out: = A1_in - V1*T*A1_in
*>
*> B1_out: = - V2*T*A1_in
*>
*> The computation for column block 2, which exists if N > K:
*>
*> A2_out: = A2_in - T*( A2_in + (V2**H)*B2_in )
*>
*> B2_out: = B2_in - V2*T*( A2_in + (V2**H)*B2_in )
*>
*> (2) Description of the Algorithmic Computation.
*>
*> In the first step, we compute column block 2, i.e. A2 and B2.
*> Here, we need to use the K-by-(N-K) rectangular workspace
*> matrix W2 that is of the same size as the matrix A2.
*> W2 is stored in the array WORK(1:K,1:(N-K)).
*>
*> In the second step, we compute column block 1, i.e. A1 and B1.
*> Here, we need to use the K-by-K square workspace matrix W1
*> that is of the same size as the as the matrix A1.
*> W1 is stored in the array WORK(1:K,1:K).
*>
*> NOTE: Hence, in this routine, we need the workspace array WORK
*> only of size WORK(1:K,1:max(K,N-K)) so it can hold both W2 from
*> the first step and W1 from the second step.
*>
*> Case (A), when V1 is unit lower-triangular, i.e. IDENT != 'I',
*> more computations than in the Case (B).
*>
*> if( IDENT != 'I' ) then
*> if ( N > K ) then
*> (First Step - column block 2)
*> col2_(1) W2: = A2
*> col2_(2) W2: = (V1**H) * W2 = (unit_lower_tr_of_(A1)**H) * W2
*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2
*> col2_(7) A2: = A2 - W2
*> else
*> (Second Step - column block 1)
*> col1_(1) W1: = A1
*> col1_(2) W1: = (V1**H) * W1 = (unit_lower_tr_of_(A1)**H) * W1
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1
*> col1_(6) square A1: = A1 - W1
*> end if
*> end if
*>
*> Case (B), when V1 is an identity matrix, i.e. IDENT == 'I',
*> less computations than in the Case (A)
*>
*> if( IDENT == 'I' ) then
*> if ( N > K ) then
*> (First Step - column block 2)
*> col2_(1) W2: = A2
*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> col2_(7) A2: = A2 - W2
*> else
*> (Second Step - column block 1)
*> col1_(1) W1: = A1
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> col1_(6) upper-triangular_of_(A1): = A1 - W1
*> end if
*> end if
*>
*> Combine these cases (A) and (B) together, this is the resulting
*> algorithm:
*>
*> if ( N > K ) then
*>
*> (First Step - column block 2)
*>
*> col2_(1) W2: = A2
*> if( IDENT != 'I' ) then
*> col2_(2) W2: = (V1**H) * W2
*> = (unit_lower_tr_of_(A1)**H) * W2
*> end if
*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2]
*> col2_(4) W2: = T * W2
*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2
*> if( IDENT != 'I' ) then
*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2
*> end if
*> col2_(7) A2: = A2 - W2
*>
*> else
*>
*> (Second Step - column block 1)
*>
*> col1_(1) W1: = A1
*> if( IDENT != 'I' ) then
*> col1_(2) W1: = (V1**H) * W1
*> = (unit_lower_tr_of_(A1)**H) * W1
*> end if
*> col1_(3) W1: = T * W1
*> col1_(4) B1: = - V2 * W1 = - B1 * W1
*> if( IDENT != 'I' ) then
*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1
*> col1_(6_a) below_diag_of_(A1): = - below_diag_of_(W1)
*> end if
*> col1_(6_b) up_tr_of_(A1): = up_tr_of_(A1) - up_tr_of_(W1)
*>
*> end if
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE ZLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB,
$ WORK, LDWORK )
IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER IDENT
INTEGER K, LDA, LDB, LDT, LDWORK, M, N
* ..
* .. Array Arguments ..
COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ),
$ WORK( LDWORK, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
COMPLEX*16 CONE, CZERO
PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ),
$ CZERO = ( 0.0D+0, 0.0D+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LNOTIDENT
INTEGER I, J
* ..
* .. EXTERNAL FUNCTIONS ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL ZCOPY, ZGEMM, ZTRMM
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( M.LT.0 .OR. N.LE.0 .OR. K.EQ.0 .OR. K.GT.N )
$ RETURN
*
LNOTIDENT = .NOT.LSAME( IDENT, 'I' )
*
* ------------------------------------------------------------------
*
* First Step. Computation of the Column Block 2:
*
* ( A2 ) := H * ( A2 )
* ( B2 ) ( B2 )
*
* ------------------------------------------------------------------
*
IF( N.GT.K ) THEN
*
* col2_(1) Compute W2: = A2. Therefore, copy A2 = A(1:K, K+1:N)
* into W2=WORK(1:K, 1:N-K) column-by-column.
*
DO J = 1, N-K
CALL ZCOPY( K, A( 1, K+J ), 1, WORK( 1, J ), 1 )
END DO
IF( LNOTIDENT ) THEN
*
* col2_(2) Compute W2: = (V1**H) * W2 = (A1**H) * W2,
* V1 is not an identy matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored).
*
*
CALL ZTRMM( 'L', 'L', 'C', 'U', K, N-K, CONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col2_(3) Compute W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2
* V2 stored in B1.
*
IF( M.GT.0 ) THEN
CALL ZGEMM( 'C', 'N', K, N-K, M, CONE, B, LDB,
$ B( 1, K+1 ), LDB, CONE, WORK, LDWORK )
END IF
*
* col2_(4) Compute W2: = T * W2,
* T is upper-triangular.
*
CALL ZTRMM( 'L', 'U', 'N', 'N', K, N-K, CONE, T, LDT,
$ WORK, LDWORK )
*
* col2_(5) Compute B2: = B2 - V2 * W2 = B2 - B1 * W2,
* V2 stored in B1.
*
IF( M.GT.0 ) THEN
CALL ZGEMM( 'N', 'N', M, N-K, K, -CONE, B, LDB,
$ WORK, LDWORK, CONE, B( 1, K+1 ), LDB )
END IF
*
IF( LNOTIDENT ) THEN
*
* col2_(6) Compute W2: = V1 * W2 = A1 * W2,
* V1 is not an identity matrix, but unit lower-triangular,
* V1 stored in A1 (diagonal ones are not stored).
*
CALL ZTRMM( 'L', 'L', 'N', 'U', K, N-K, CONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col2_(7) Compute A2: = A2 - W2 =
* = A(1:K, K+1:N-K) - WORK(1:K, 1:N-K),
* column-by-column.
*
DO J = 1, N-K
DO I = 1, K
A( I, K+J ) = A( I, K+J ) - WORK( I, J )
END DO
END DO
*
END IF
*
* ------------------------------------------------------------------
*
* Second Step. Computation of the Column Block 1:
*
* ( A1 ) := H * ( A1 )
* ( B1 ) ( 0 )
*
* ------------------------------------------------------------------
*
* col1_(1) Compute W1: = A1. Copy the upper-triangular
* A1 = A(1:K, 1:K) into the upper-triangular
* W1 = WORK(1:K, 1:K) column-by-column.
*
DO J = 1, K
CALL ZCOPY( J, A( 1, J ), 1, WORK( 1, J ), 1 )
END DO
*
* Set the subdiagonal elements of W1 to zero column-by-column.
*
DO J = 1, K - 1
DO I = J + 1, K
WORK( I, J ) = CZERO
END DO
END DO
*
IF( LNOTIDENT ) THEN
*
* col1_(2) Compute W1: = (V1**H) * W1 = (A1**H) * W1,
* V1 is not an identity matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored),
* W1 is upper-triangular with zeroes below the diagonal.
*
CALL ZTRMM( 'L', 'L', 'C', 'U', K, K, CONE, A, LDA,
$ WORK, LDWORK )
END IF
*
* col1_(3) Compute W1: = T * W1,
* T is upper-triangular,
* W1 is upper-triangular with zeroes below the diagonal.
*
CALL ZTRMM( 'L', 'U', 'N', 'N', K, K, CONE, T, LDT,
$ WORK, LDWORK )
*
* col1_(4) Compute B1: = - V2 * W1 = - B1 * W1,
* V2 = B1, W1 is upper-triangular with zeroes below the diagonal.
*
IF( M.GT.0 ) THEN
CALL ZTRMM( 'R', 'U', 'N', 'N', M, K, -CONE, WORK, LDWORK,
$ B, LDB )
END IF
*
IF( LNOTIDENT ) THEN
*
* col1_(5) Compute W1: = V1 * W1 = A1 * W1,
* V1 is not an identity matrix, but unit lower-triangular
* V1 stored in A1 (diagonal ones are not stored),
* W1 is upper-triangular on input with zeroes below the diagonal,
* and square on output.
*
CALL ZTRMM( 'L', 'L', 'N', 'U', K, K, CONE, A, LDA,
$ WORK, LDWORK )
*
* col1_(6) Compute A1: = A1 - W1 = A(1:K, 1:K) - WORK(1:K, 1:K)
* column-by-column. A1 is upper-triangular on input.
* If IDENT, A1 is square on output, and W1 is square,
* if NOT IDENT, A1 is upper-triangular on output,
* W1 is upper-triangular.
*
* col1_(6)_a Compute elements of A1 below the diagonal.
*
DO J = 1, K - 1
DO I = J + 1, K
A( I, J ) = - WORK( I, J )
END DO
END DO
*
END IF
*
* col1_(6)_b Compute elements of A1 on and above the diagonal.
*
DO J = 1, K
DO I = 1, J
A( I, J ) = A( I, J ) - WORK( I, J )
END DO
END DO
*
RETURN
*
* End of ZLARFB_GETT
*
END

View File

@ -0,0 +1,380 @@
*> \brief \b ZUNGTSQR_ROW
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download ZUNGTSQR_ROW + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zunrgtsqr_row.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zunrgtsqr_row.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zunrgtsqr_row.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE ZUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
* $ LWORK, INFO )
* IMPLICIT NONE
*
* .. Scalar Arguments ..
* INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB
* ..
* .. Array Arguments ..
* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> ZUNGTSQR_ROW generates an M-by-N complex matrix Q_out with
*> orthonormal columns from the output of ZLATSQR. These N orthonormal
*> columns are the first N columns of a product of complex unitary
*> matrices Q(k)_in of order M, which are returned by ZLATSQR in
*> a special format.
*>
*> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
*>
*> The input matrices Q(k)_in are stored in row and column blocks in A.
*> See the documentation of ZLATSQR for more details on the format of
*> Q(k)_in, where each Q(k)_in is represented by block Householder
*> transformations. This routine calls an auxiliary routine ZLARFB_GETT,
*> where the computation is performed on each individual block. The
*> algorithm first sweeps NB-sized column blocks from the right to left
*> starting in the bottom row block and continues to the top row block
*> (hence _ROW in the routine name). This sweep is in reverse order of
*> the order in which ZLATSQR generates the output blocks.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of columns of the matrix A. M >= N >= 0.
*> \endverbatim
*>
*> \param[in] MB
*> \verbatim
*> MB is INTEGER
*> The row block size used by ZLATSQR to return
*> arrays A and T. MB > N.
*> (Note that if MB > M, then M is used instead of MB
*> as the row block size).
*> \endverbatim
*>
*> \param[in] NB
*> \verbatim
*> NB is INTEGER
*> The column block size used by ZLATSQR to return
*> arrays A and T. NB >= 1.
*> (Note that if NB > N, then N is used instead of NB
*> as the column block size).
*> \endverbatim
*>
*> \param[in,out] A
*> \verbatim
*> A is COMPLEX*16 array, dimension (LDA,N)
*>
*> On entry:
*>
*> The elements on and above the diagonal are not used as
*> input. The elements below the diagonal represent the unit
*> lower-trapezoidal blocked matrix V computed by ZLATSQR
*> that defines the input matrices Q_in(k) (ones on the
*> diagonal are not stored). See ZLATSQR for more details.
*>
*> On exit:
*>
*> The array A contains an M-by-N orthonormal matrix Q_out,
*> i.e the columns of A are orthogonal unit vectors.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max(1,M).
*> \endverbatim
*>
*> \param[in] T
*> \verbatim
*> T is COMPLEX*16 array,
*> dimension (LDT, N * NIRB)
*> where NIRB = Number_of_input_row_blocks
*> = MAX( 1, CEIL((M-N)/(MB-N)) )
*> Let NICB = Number_of_input_col_blocks
*> = CEIL(N/NB)
*>
*> The upper-triangular block reflectors used to define the
*> input matrices Q_in(k), k=(1:NIRB*NICB). The block
*> reflectors are stored in compact form in NIRB block
*> reflector sequences. Each of the NIRB block reflector
*> sequences is stored in a larger NB-by-N column block of T
*> and consists of NICB smaller NB-by-NB upper-triangular
*> column blocks. See ZLATSQR for more details on the format
*> of T.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T.
*> LDT >= max(1,min(NB,N)).
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK))
*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> \verbatim
*> The dimension of the array WORK.
*> LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)),
*> where NBLOCAL=MIN(NB,N).
*> If LWORK = -1, then a workspace query is assumed.
*> The routine only calculates the optimal size of the WORK
*> array, returns this value as the first entry of the WORK
*> array, and no error message related to LWORK is issued
*> by XERBLA.
*> \endverbatim
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -i, the i-th argument had an illegal value
*> \endverbatim
*>
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complex16OTHERcomputational
*
*> \par Contributors:
* ==================
*>
*> \verbatim
*>
*> November 2020, Igor Kozachenko,
*> Computer Science Division,
*> University of California, Berkeley
*>
*> \endverbatim
*>
* =====================================================================
SUBROUTINE ZUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
$ LWORK, INFO )
IMPLICIT NONE
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB
* ..
* .. Array Arguments ..
COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
COMPLEX*16 CONE, CZERO
PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ),
$ CZERO = ( 0.0D+0, 0.0D+0 ) )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM,
$ LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB,
$ KB, KB_LAST, KNB, MB1
* ..
* .. Local Arrays ..
COMPLEX*16 DUMMY( 1, 1 )
* ..
* .. External Subroutines ..
EXTERNAL ZLARFB_GETT, ZLASET, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC DCMPLX, MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
LQUERY = LWORK.EQ.-1
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
INFO = -2
ELSE IF( MB.LE.N ) THEN
INFO = -3
ELSE IF( NB.LT.1 ) THEN
INFO = -4
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -6
ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
INFO = -8
ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
INFO = -10
END IF
*
NBLOCAL = MIN( NB, N )
*
* Determine the workspace size.
*
IF( INFO.EQ.0 ) THEN
LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) )
END IF
*
* Handle error in the input parameters and handle the workspace query.
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'ZUNGTSQR_ROW', -INFO )
RETURN
ELSE IF ( LQUERY ) THEN
WORK( 1 ) = DCMPLX( LWORKOPT )
RETURN
END IF
*
* Quick return if possible
*
IF( MIN( M, N ).EQ.0 ) THEN
WORK( 1 ) = DCMPLX( LWORKOPT )
RETURN
END IF
*
* (0) Set the upper-triangular part of the matrix A to zero and
* its diagonal elements to one.
*
CALL ZLASET('U', M, N, CZERO, CONE, A, LDA )
*
* KB_LAST is the column index of the last column block reflector
* in the matrices T and V.
*
KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1
*
*
* (1) Bottom-up loop over row blocks of A, except the top row block.
* NOTE: If MB>=M, then the loop is never executed.
*
IF ( MB.LT.M ) THEN
*
* MB2 is the row blocking size for the row blocks before the
* first top row block in the matrix A. IB is the row index for
* the row blocks in the matrix A before the first top row block.
* IB_BOTTOM is the row index for the last bottom row block
* in the matrix A. JB_T is the column index of the corresponding
* column block in the matrix T.
*
* Initialize variables.
*
* NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A
* including the first row block.
*
MB2 = MB - N
M_PLUS_ONE = M + 1
ITMP = ( M - MB - 1 ) / MB2
IB_BOTTOM = ITMP * MB2 + MB + 1
NUM_ALL_ROW_BLOCKS = ITMP + 2
JB_T = NUM_ALL_ROW_BLOCKS * N + 1
*
DO IB = IB_BOTTOM, MB+1, -MB2
*
* Determine the block size IMB for the current row block
* in the matrix A.
*
IMB = MIN( M_PLUS_ONE - IB, MB2 )
*
* Determine the column index JB_T for the current column block
* in the matrix T.
*
JB_T = JB_T - N
*
* Apply column blocks of H in the row block from right to left.
*
* KB is the column index of the current column block reflector
* in the matrices T and V.
*
DO KB = KB_LAST, 1, -NBLOCAL
*
* Determine the size of the current column block KNB in
* the matrices T and V.
*
KNB = MIN( NBLOCAL, N - KB + 1 )
*
CALL ZLARFB_GETT( 'I', IMB, N-KB+1, KNB,
$ T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA,
$ A( IB, KB ), LDA, WORK, KNB )
*
END DO
*
END DO
*
END IF
*
* (2) Top row block of A.
* NOTE: If MB>=M, then we have only one row block of A of size M
* and we work on the entire matrix A.
*
MB1 = MIN( MB, M )
*
* Apply column blocks of H in the top row block from right to left.
*
* KB is the column index of the current block reflector in
* the matrices T and V.
*
DO KB = KB_LAST, 1, -NBLOCAL
*
* Determine the size of the current column block KNB in
* the matrices T and V.
*
KNB = MIN( NBLOCAL, N - KB + 1 )
*
IF( MB1-KB-KNB+1.EQ.0 ) THEN
*
* In SLARFB_GETT parameters, when M=0, then the matrix B
* does not exist, hence we need to pass a dummy array
* reference DUMMY(1,1) to B with LDDUMMY=1.
*
CALL ZLARFB_GETT( 'N', 0, N-KB+1, KNB,
$ T( 1, KB ), LDT, A( KB, KB ), LDA,
$ DUMMY( 1, 1 ), 1, WORK, KNB )
ELSE
CALL ZLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB,
$ T( 1, KB ), LDT, A( KB, KB ), LDA,
$ A( KB+KNB, KB), LDA, WORK, KNB )
END IF
*
END DO
*
WORK( 1 ) = DCMPLX( LWORKOPT )
RETURN
*
* End of ZUNGTSQR_ROW
*
END

View File

@ -40,7 +40,7 @@ set(SLINTST schkaa.f
sgennd.f sqrt04.f sqrt05.f schkqrt.f serrqrt.f schkqrtp.f serrqrtp.f sgennd.f sqrt04.f sqrt05.f schkqrt.f serrqrt.f schkqrtp.f serrqrtp.f
schklqt.f schklqtp.f schktsqr.f schklqt.f schklqtp.f schktsqr.f
serrlqt.f serrlqtp.f serrtsqr.f stsqr01.f slqt04.f slqt05.f serrlqt.f serrlqtp.f serrtsqr.f stsqr01.f slqt04.f slqt05.f
schkorhr_col.f serrorhr_col.f sorhr_col01.f) schkorhr_col.f serrorhr_col.f sorhr_col01.f sorhr_col02.f)
if(USE_XBLAS) if(USE_XBLAS)
list(APPEND SLINTST sdrvgbx.f sdrvgex.f sdrvsyx.f sdrvpox.f list(APPEND SLINTST sdrvgbx.f sdrvgex.f sdrvsyx.f sdrvpox.f
@ -96,7 +96,7 @@ set(CLINTST cchkaa.f
cqrt04.f cqrt05.f cchkqrt.f cerrqrt.f cchkqrtp.f cerrqrtp.f cqrt04.f cqrt05.f cchkqrt.f cerrqrt.f cchkqrtp.f cerrqrtp.f
cchklqt.f cchklqtp.f cchktsqr.f cchklqt.f cchklqtp.f cchktsqr.f
cerrlqt.f cerrlqtp.f cerrtsqr.f ctsqr01.f clqt04.f clqt05.f cerrlqt.f cerrlqtp.f cerrtsqr.f ctsqr01.f clqt04.f clqt05.f
cchkunhr_col.f cerrunhr_col.f cunhr_col01.f) cchkunhr_col.f cerrunhr_col.f cunhr_col01.f cunhr_col02.f)
if(USE_XBLAS) if(USE_XBLAS)
list(APPEND CLINTST cdrvgbx.f cdrvgex.f cdrvhex.f cdrvsyx.f cdrvpox.f list(APPEND CLINTST cdrvgbx.f cdrvgex.f cdrvhex.f cdrvsyx.f cdrvpox.f
@ -142,7 +142,7 @@ set(DLINTST dchkaa.f
dqrt04.f dqrt05.f dchkqrt.f derrqrt.f dchkqrtp.f derrqrtp.f dqrt04.f dqrt05.f dchkqrt.f derrqrt.f dchkqrtp.f derrqrtp.f
dchklq.f dchklqt.f dchklqtp.f dchktsqr.f dchklq.f dchklqt.f dchklqtp.f dchktsqr.f
derrlqt.f derrlqtp.f derrtsqr.f dtsqr01.f dlqt04.f dlqt05.f derrlqt.f derrlqtp.f derrtsqr.f dtsqr01.f dlqt04.f dlqt05.f
dchkorhr_col.f derrorhr_col.f dorhr_col01.f) dchkorhr_col.f derrorhr_col.f dorhr_col01.f dorhr_col02.f)
if(USE_XBLAS) if(USE_XBLAS)
list(APPEND DLINTST ddrvgbx.f ddrvgex.f ddrvsyx.f ddrvpox.f list(APPEND DLINTST ddrvgbx.f ddrvgex.f ddrvsyx.f ddrvpox.f
@ -198,7 +198,7 @@ set(ZLINTST zchkaa.f
zqrt04.f zqrt05.f zchkqrt.f zerrqrt.f zchkqrtp.f zerrqrtp.f zqrt04.f zqrt05.f zchkqrt.f zerrqrt.f zchkqrtp.f zerrqrtp.f
zchklqt.f zchklqtp.f zchktsqr.f zchklqt.f zchklqtp.f zchktsqr.f
zerrlqt.f zerrlqtp.f zerrtsqr.f ztsqr01.f zlqt04.f zlqt05.f zerrlqt.f zerrlqtp.f zerrtsqr.f ztsqr01.f zlqt04.f zlqt05.f
zchkunhr_col.f zerrunhr_col.f zunhr_col01.f) zchkunhr_col.f zerrunhr_col.f zunhr_col01.f zunhr_col02.f)
if(USE_XBLAS) if(USE_XBLAS)
list(APPEND ZLINTST zdrvgbx.f zdrvgex.f zdrvhex.f zdrvsyx.f zdrvpox.f list(APPEND ZLINTST zdrvgbx.f zdrvgex.f zdrvhex.f zdrvsyx.f zdrvpox.f

View File

@ -74,7 +74,7 @@ SLINTST = schkaa.o \
sgennd.o sqrt04.o sqrt05.o schkqrt.o serrqrt.o schkqrtp.o serrqrtp.o \ sgennd.o sqrt04.o sqrt05.o schkqrt.o serrqrt.o schkqrtp.o serrqrtp.o \
schklqt.o schklqtp.o schktsqr.o \ schklqt.o schklqtp.o schktsqr.o \
serrlqt.o serrlqtp.o serrtsqr.o stsqr01.o slqt04.o slqt05.o \ serrlqt.o serrlqtp.o serrtsqr.o stsqr01.o slqt04.o slqt05.o \
schkorhr_col.o serrorhr_col.o sorhr_col01.o schkorhr_col.o serrorhr_col.o sorhr_col01.o sorhr_col02.o
ifdef USEXBLAS ifdef USEXBLAS
SLINTST += sdrvgbx.o sdrvgex.o sdrvsyx.o sdrvpox.o \ SLINTST += sdrvgbx.o sdrvgex.o sdrvsyx.o sdrvpox.o \
@ -123,7 +123,7 @@ CLINTST = cchkaa.o \
cqrt04.o cqrt05.o cchkqrt.o cerrqrt.o cchkqrtp.o cerrqrtp.o \ cqrt04.o cqrt05.o cchkqrt.o cerrqrt.o cchkqrtp.o cerrqrtp.o \
cchklqt.o cchklqtp.o cchktsqr.o \ cchklqt.o cchklqtp.o cchktsqr.o \
cerrlqt.o cerrlqtp.o cerrtsqr.o ctsqr01.o clqt04.o clqt05.o \ cerrlqt.o cerrlqtp.o cerrtsqr.o ctsqr01.o clqt04.o clqt05.o \
cchkunhr_col.o cerrunhr_col.o cunhr_col01.o cchkunhr_col.o cerrunhr_col.o cunhr_col01.o cunhr_col02.o
ifdef USEXBLAS ifdef USEXBLAS
CLINTST += cdrvgbx.o cdrvgex.o cdrvhex.o cdrvsyx.o cdrvpox.o \ CLINTST += cdrvgbx.o cdrvgex.o cdrvhex.o cdrvsyx.o cdrvpox.o \
@ -167,7 +167,7 @@ DLINTST = dchkaa.o \
dqrt04.o dqrt05.o dchkqrt.o derrqrt.o dchkqrtp.o derrqrtp.o \ dqrt04.o dqrt05.o dchkqrt.o derrqrt.o dchkqrtp.o derrqrtp.o \
dchklq.o dchklqt.o dchklqtp.o dchktsqr.o \ dchklq.o dchklqt.o dchklqtp.o dchktsqr.o \
derrlqt.o derrlqtp.o derrtsqr.o dtsqr01.o dlqt04.o dlqt05.o \ derrlqt.o derrlqtp.o derrtsqr.o dtsqr01.o dlqt04.o dlqt05.o \
dchkorhr_col.o derrorhr_col.o dorhr_col01.o dchkorhr_col.o derrorhr_col.o dorhr_col01.o dorhr_col02.o
ifdef USEXBLAS ifdef USEXBLAS
DLINTST += ddrvgbx.o ddrvgex.o ddrvsyx.o ddrvpox.o \ DLINTST += ddrvgbx.o ddrvgex.o ddrvsyx.o ddrvpox.o \
@ -215,7 +215,7 @@ ZLINTST = zchkaa.o \
zqrt04.o zqrt05.o zchkqrt.o zerrqrt.o zchkqrtp.o zerrqrtp.o \ zqrt04.o zqrt05.o zchkqrt.o zerrqrt.o zchkqrtp.o zerrqrtp.o \
zchklqt.o zchklqtp.o zchktsqr.o \ zchklqt.o zchklqtp.o zchktsqr.o \
zerrlqt.o zerrlqtp.o zerrtsqr.o ztsqr01.o zlqt04.o zlqt05.o \ zerrlqt.o zerrlqtp.o zerrtsqr.o ztsqr01.o zlqt04.o zlqt05.o \
zchkunhr_col.o zerrunhr_col.o zunhr_col01.o zchkunhr_col.o zerrunhr_col.o zunhr_col01.o zunhr_col02.o
ifdef USEXBLAS ifdef USEXBLAS
ZLINTST += zdrvgbx.o zdrvgex.o zdrvhex.o zdrvsyx.o zdrvpox.o \ ZLINTST += zdrvgbx.o zdrvgex.o zdrvhex.o zdrvsyx.o zdrvpox.o \

View File

@ -24,9 +24,12 @@
*> *>
*> \verbatim *> \verbatim
*> *>
*> CCHKUNHR_COL tests CUNHR_COL using CLATSQR and CGEMQRT. Therefore, CLATSQR *> CCHKUNHR_COL tests:
*> (used in CGEQR) and CGEMQRT (used in CGEMQR) have to be tested *> 1) CUNGTSQR and CUNHR_COL using CLATSQR, CGEMQRT,
*> before this test. *> 2) CUNGTSQR_ROW and CUNHR_COL inside CGETSQRHRT
*> (which calls CLATSQR, CUNGTSQR_ROW and CUNHR_COL) using CGEMQRT.
*> Therefore, CLATSQR (part of CGEQR), CGEMQRT (part of CGEMQR)
*> have to be tested before this test.
*> *>
*> \endverbatim *> \endverbatim
* *
@ -97,19 +100,16 @@
*> \author Univ. of Colorado Denver *> \author Univ. of Colorado Denver
*> \author NAG Ltd. *> \author NAG Ltd.
* *
*> \date November 2019
*
*> \ingroup complex_lin *> \ingroup complex_lin
* *
* ===================================================================== * =====================================================================
SUBROUTINE CCHKUNHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, NNB, SUBROUTINE CCHKUNHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL,
$ NBVAL, NOUT ) $ NNB, NBVAL, NOUT )
IMPLICIT NONE IMPLICIT NONE
* *
* -- LAPACK test routine (version 3.7.0) -- * -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
LOGICAL TSTERR LOGICAL TSTERR
@ -135,10 +135,11 @@
REAL RESULT( NTESTS ) REAL RESULT( NTESTS )
* .. * ..
* .. External Subroutines .. * .. External Subroutines ..
EXTERNAL ALAHD, ALASUM, CERRUNHR_COL, CUNHR_COL01 EXTERNAL ALAHD, ALASUM, CERRUNHR_COL, CUNHR_COL01,
$ CUNHR_COL02
* .. * ..
* .. Intrinsic Functions .. * .. Intrinsic Functions ..
INTRINSIC MAX, MIN INTRINSIC MAX, MIN
* .. * ..
* .. Scalars in Common .. * .. Scalars in Common ..
LOGICAL LERR, OK LOGICAL LERR, OK
@ -201,8 +202,8 @@
* *
* Test CUNHR_COL * Test CUNHR_COL
* *
CALL CUNHR_COL01( M, N, MB1, NB1, NB2, CALL CUNHR_COL01( M, N, MB1, NB1,
$ RESULT ) $ NB2, RESULT )
* *
* Print information about the tests that did * Print information about the tests that did
* not pass the threshold. * not pass the threshold.
@ -226,12 +227,78 @@
END DO END DO
END DO END DO
* *
* Do for each value of M in MVAL.
*
DO I = 1, NM
M = MVAL( I )
*
* Do for each value of N in NVAL.
*
DO J = 1, NN
N = NVAL( J )
*
* Only for M >= N
*
IF ( MIN( M, N ).GT.0 .AND. M.GE.N ) THEN
*
* Do for each possible value of MB1
*
DO IMB1 = 1, NNB
MB1 = NBVAL( IMB1 )
*
* Only for MB1 > N
*
IF ( MB1.GT.N ) THEN
*
* Do for each possible value of NB1
*
DO INB1 = 1, NNB
NB1 = NBVAL( INB1 )
*
* Do for each possible value of NB2
*
DO INB2 = 1, NNB
NB2 = NBVAL( INB2 )
*
IF( NB1.GT.0 .AND. NB2.GT.0 ) THEN
*
* Test CUNHR_COL
*
CALL CUNHR_COL02( M, N, MB1, NB1,
$ NB2, RESULT )
*
* Print information about the tests that did
* not pass the threshold.
*
DO T = 1, NTESTS
IF( RESULT( T ).GE.THRESH ) THEN
IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
$ CALL ALAHD( NOUT, PATH )
WRITE( NOUT, FMT = 9998 ) M, N, MB1,
$ NB1, NB2, T, RESULT( T )
NFAIL = NFAIL + 1
END IF
END DO
NRUN = NRUN + NTESTS
END IF
END DO
END DO
END IF
END DO
END IF
END DO
END DO
*
* Print a summary of the results. * Print a summary of the results.
* *
CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS )
* *
9999 FORMAT( 'M=', I5, ', N=', I5, ', MB1=', I5, 9999 FORMAT( 'CUNGTSQR and CUNHR_COL: M=', I5, ', N=', I5,
$ ', NB1=', I5, ', NB2=', I5,' test(', I2, ')=', G12.5 ) $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5,
$ ' test(', I2, ')=', G12.5 )
9998 FORMAT( 'CUNGTSQR_ROW and CUNHR_COL: M=', I5, ', N=', I5,
$ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5,
$ ' test(', I2, ')=', G12.5 )
RETURN RETURN
* *
* End of CCHKUNHR_COL * End of CCHKUNHR_COL

View File

@ -13,7 +13,7 @@
* .. Scalar Arguments .. * .. Scalar Arguments ..
* INTEGER M, N, MB1, NB1, NB2 * INTEGER M, N, MB1, NB1, NB2
* .. Return values .. * .. Return values ..
* REAL RESULT(6) * DOUBLE PRECISION RESULT(6)
* *
* *
*> \par Purpose: *> \par Purpose:
@ -21,8 +21,8 @@
*> *>
*> \verbatim *> \verbatim
*> *>
*> CUNHR_COL01 tests CUNHR_COL using CLATSQR, CGEMQRT and CUNGTSQR. *> CUNHR_COL01 tests CUNGTSQR and CUNHR_COL using CLATSQR, CGEMQRT.
*> Therefore, CLATSQR (part of CGEQR), CGEMQRT (part CGEMQR), CUNGTSQR *> Therefore, CLATSQR (part of CGEQR), CGEMQRT (part of CGEMQR)
*> have to be tested before this test. *> have to be tested before this test.
*> *>
*> \endverbatim *> \endverbatim
@ -62,14 +62,46 @@
*> \verbatim *> \verbatim
*> RESULT is REAL array, dimension (6) *> RESULT is REAL array, dimension (6)
*> Results of each of the six tests below. *> Results of each of the six tests below.
*> ( C is a M-by-N random matrix, D is a N-by-M random matrix )
*> *>
*> RESULT(1) = | A - Q * R | / (eps * m * |A|) *> A is a m-by-n test input matrix to be factored.
*> RESULT(2) = | I - (Q**H) * Q | / (eps * m ) *> so that A = Q_gr * ( R )
*> RESULT(3) = | Q * C - Q * C | / (eps * m * |C|) *> ( 0 ),
*> RESULT(4) = | (Q**H) * C - (Q**H) * C | / (eps * m * |C|) *>
*> RESULT(5) = | (D * Q) - D * Q | / (eps * m * |D|) *> Q_qr is an implicit m-by-m unitary Q matrix, the result
*> RESULT(6) = | D * (Q**H) - D * (Q**H) | / (eps * m * |D|) *> of factorization in blocked WY-representation,
*> stored in CGEQRT output format.
*>
*> R is a n-by-n upper-triangular matrix,
*>
*> 0 is a (m-n)-by-n zero matrix,
*>
*> Q is an explicit m-by-m unitary matrix Q = Q_gr * I
*>
*> C is an m-by-n random matrix,
*>
*> D is an n-by-m random matrix.
*>
*> The six tests are:
*>
*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| )
*> is equivalent to test for | A - Q * R | / (eps * m * |A|),
*>
*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ),
*>
*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|),
*>
*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|)
*>
*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|)
*>
*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|),
*>
*> where:
*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are
*> computed using CGEMQRT,
*>
*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are
*> computed using CGEMM.
*> \endverbatim *> \endverbatim
* *
* Authors: * Authors:
@ -80,18 +112,15 @@
*> \author Univ. of Colorado Denver *> \author Univ. of Colorado Denver
*> \author NAG Ltd. *> \author NAG Ltd.
* *
*> \date November 2019 *> \ingroup complex_lin
*
*> \ingroup complex16_lin
* *
* ===================================================================== * =====================================================================
SUBROUTINE CUNHR_COL01( M, N, MB1, NB1, NB2, RESULT ) SUBROUTINE CUNHR_COL01( M, N, MB1, NB1, NB2, RESULT )
IMPLICIT NONE IMPLICIT NONE
* *
* -- LAPACK test routine (version 3.9.0) -- * -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2019
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
INTEGER M, N, MB1, NB1, NB2 INTEGER M, N, MB1, NB1, NB2
@ -102,10 +131,10 @@
* *
* .. * ..
* .. Local allocatable arrays * .. Local allocatable arrays
COMPLEX, ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), COMPLEX , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:),
$ WORK( : ), T1(:,:), T2(:,:), DIAG(:), $ WORK( : ), T1(:,:), T2(:,:), DIAG(:),
$ C(:,:), CF(:,:), D(:,:), DF(:,:) $ C(:,:), CF(:,:), D(:,:), DF(:,:)
REAL, ALLOCATABLE :: RWORK(:) REAL , ALLOCATABLE :: RWORK(:)
* *
* .. Parameters .. * .. Parameters ..
REAL ZERO REAL ZERO
@ -218,7 +247,7 @@
* Copy the factor R into the array R. * Copy the factor R into the array R.
* *
SRNAMT = 'CLACPY' SRNAMT = 'CLACPY'
CALL CLACPY( 'U', M, N, AF, M, R, M ) CALL CLACPY( 'U', N, N, AF, M, R, M )
* *
* Reconstruct the orthogonal matrix Q. * Reconstruct the orthogonal matrix Q.
* *
@ -240,7 +269,7 @@
* matrix S. * matrix S.
* *
SRNAMT = 'CLACPY' SRNAMT = 'CLACPY'
CALL CLACPY( 'U', M, N, R, M, AF, M ) CALL CLACPY( 'U', N, N, R, M, AF, M )
* *
DO I = 1, N DO I = 1, N
IF( DIAG( I ).EQ.-CONE ) THEN IF( DIAG( I ).EQ.-CONE ) THEN

View File

@ -0,0 +1,381 @@
*> \brief \b CUNHR_COL02
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE CUNHR_COL02( M, N, MB1, NB1, NB2, RESULT )
*
* .. Scalar Arguments ..
* INTEGER M, N, MB1, NB1, NB2
* .. Return values ..
* REAL RESULT(6)
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CUNHR_COL02 tests CUNGTSQR_ROW and CUNHR_COL inside CGETSQRHRT
*> (which calls CLATSQR, CUNGTSQR_ROW and CUNHR_COL) using CGEMQRT.
*> Therefore, CLATSQR (part of CGEQR), CGEMQRT (part of CGEMQR)
*> have to be tested before this test.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> Number of rows in test matrix.
*> \endverbatim
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> Number of columns in test matrix.
*> \endverbatim
*> \param[in] MB1
*> \verbatim
*> MB1 is INTEGER
*> Number of row in row block in an input test matrix.
*> \endverbatim
*>
*> \param[in] NB1
*> \verbatim
*> NB1 is INTEGER
*> Number of columns in column block an input test matrix.
*> \endverbatim
*>
*> \param[in] NB2
*> \verbatim
*> NB2 is INTEGER
*> Number of columns in column block in an output test matrix.
*> \endverbatim
*>
*> \param[out] RESULT
*> \verbatim
*> RESULT is REAL array, dimension (6)
*> Results of each of the six tests below.
*>
*> A is a m-by-n test input matrix to be factored.
*> so that A = Q_gr * ( R )
*> ( 0 ),
*>
*> Q_qr is an implicit m-by-m unitary Q matrix, the result
*> of factorization in blocked WY-representation,
*> stored in CGEQRT output format.
*>
*> R is a n-by-n upper-triangular matrix,
*>
*> 0 is a (m-n)-by-n zero matrix,
*>
*> Q is an explicit m-by-m unitary matrix Q = Q_gr * I
*>
*> C is an m-by-n random matrix,
*>
*> D is an n-by-m random matrix.
*>
*> The six tests are:
*>
*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| )
*> is equivalent to test for | A - Q * R | / (eps * m * |A|),
*>
*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ),
*>
*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|),
*>
*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|)
*>
*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|)
*>
*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|),
*>
*> where:
*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are
*> computed using CGEMQRT,
*>
*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are
*> computed using CGEMM.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complex_lin
*
* =====================================================================
SUBROUTINE CUNHR_COL02( M, N, MB1, NB1, NB2, RESULT )
IMPLICIT NONE
*
* -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER M, N, MB1, NB1, NB2
* .. Return values ..
REAL RESULT(6)
*
* =====================================================================
*
* ..
* .. Local allocatable arrays
COMPLEX , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:),
$ WORK( : ), T1(:,:), T2(:,:), DIAG(:),
$ C(:,:), CF(:,:), D(:,:), DF(:,:)
REAL , ALLOCATABLE :: RWORK(:)
*
* .. Parameters ..
REAL ZERO
PARAMETER ( ZERO = 0.0E+0 )
COMPLEX CONE, CZERO
PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ),
$ CZERO = ( 0.0E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
LOGICAL TESTZEROS
INTEGER INFO, J, K, L, LWORK, NB2_UB, NRB
REAL ANORM, EPS, RESID, CNORM, DNORM
* ..
* .. Local Arrays ..
INTEGER ISEED( 4 )
COMPLEX WORKQUERY( 1 )
* ..
* .. External Functions ..
REAL SLAMCH, CLANGE, CLANSY
EXTERNAL SLAMCH, CLANGE, CLANSY
* ..
* .. External Subroutines ..
EXTERNAL CLACPY, CLARNV, CLASET, CGETSQRHRT,
$ CSCAL, CGEMM, CGEMQRT, CHERK
* ..
* .. Intrinsic Functions ..
INTRINSIC CEILING, REAL, MAX, MIN
* ..
* .. Scalars in Common ..
CHARACTER(LEN=32) SRNAMT
* ..
* .. Common blocks ..
COMMON / SRMNAMC / SRNAMT
* ..
* .. Data statements ..
DATA ISEED / 1988, 1989, 1990, 1991 /
*
* TEST MATRICES WITH HALF OF MATRIX BEING ZEROS
*
TESTZEROS = .FALSE.
*
EPS = SLAMCH( 'Epsilon' )
K = MIN( M, N )
L = MAX( M, N, 1)
*
* Dynamically allocate local arrays
*
ALLOCATE ( A(M,N), AF(M,N), Q(L,L), R(M,L), RWORK(L),
$ C(M,N), CF(M,N),
$ D(N,M), DF(N,M) )
*
* Put random numbers into A and copy to AF
*
DO J = 1, N
CALL CLARNV( 2, ISEED, M, A( 1, J ) )
END DO
IF( TESTZEROS ) THEN
IF( M.GE.4 ) THEN
DO J = 1, N
CALL CLARNV( 2, ISEED, M/2, A( M/4, J ) )
END DO
END IF
END IF
CALL CLACPY( 'Full', M, N, A, M, AF, M )
*
* Number of row blocks in CLATSQR
*
NRB = MAX( 1, CEILING( REAL( M - N ) / REAL( MB1 - N ) ) )
*
ALLOCATE ( T1( NB1, N * NRB ) )
ALLOCATE ( T2( NB2, N ) )
ALLOCATE ( DIAG( N ) )
*
* Begin determine LWORK for the array WORK and allocate memory.
*
* CGEMQRT requires NB2 to be bounded by N.
*
NB2_UB = MIN( NB2, N)
*
*
CALL CGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2,
$ WORKQUERY, -1, INFO )
*
LWORK = INT( WORKQUERY( 1 ) )
*
* In CGEMQRT, WORK is N*NB2_UB if SIDE = 'L',
* or M*NB2_UB if SIDE = 'R'.
*
LWORK = MAX( LWORK, NB2_UB * N, NB2_UB * M )
*
ALLOCATE ( WORK( LWORK ) )
*
* End allocate memory for WORK.
*
*
* Begin Householder reconstruction routines
*
* Factor the matrix A in the array AF.
*
SRNAMT = 'CGETSQRHRT'
CALL CGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2,
$ WORK, LWORK, INFO )
*
* End Householder reconstruction routines.
*
*
* Generate the m-by-m matrix Q
*
CALL CLASET( 'Full', M, M, CZERO, CONE, Q, M )
*
SRNAMT = 'CGEMQRT'
CALL CGEMQRT( 'L', 'N', M, M, K, NB2_UB, AF, M, T2, NB2, Q, M,
$ WORK, INFO )
*
* Copy R
*
CALL CLASET( 'Full', M, N, CZERO, CZERO, R, M )
*
CALL CLACPY( 'Upper', M, N, AF, M, R, M )
*
* TEST 1
* Compute |R - (Q**T)*A| / ( eps * m * |A| ) and store in RESULT(1)
*
CALL CGEMM( 'C', 'N', M, N, M, -CONE, Q, M, A, M, CONE, R, M )
*
ANORM = CLANGE( '1', M, N, A, M, RWORK )
RESID = CLANGE( '1', M, N, R, M, RWORK )
IF( ANORM.GT.ZERO ) THEN
RESULT( 1 ) = RESID / ( EPS * MAX( 1, M ) * ANORM )
ELSE
RESULT( 1 ) = ZERO
END IF
*
* TEST 2
* Compute |I - (Q**T)*Q| / ( eps * m ) and store in RESULT(2)
*
CALL CLASET( 'Full', M, M, CZERO, CONE, R, M )
CALL CHERK( 'U', 'C', M, M, -CONE, Q, M, CONE, R, M )
RESID = CLANSY( '1', 'Upper', M, R, M, RWORK )
RESULT( 2 ) = RESID / ( EPS * MAX( 1, M ) )
*
* Generate random m-by-n matrix C
*
DO J = 1, N
CALL CLARNV( 2, ISEED, M, C( 1, J ) )
END DO
CNORM = CLANGE( '1', M, N, C, M, RWORK )
CALL CLACPY( 'Full', M, N, C, M, CF, M )
*
* Apply Q to C as Q*C = CF
*
SRNAMT = 'CGEMQRT'
CALL CGEMQRT( 'L', 'N', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M,
$ WORK, INFO )
*
* TEST 3
* Compute |CF - Q*C| / ( eps * m * |C| )
*
CALL CGEMM( 'N', 'N', M, N, M, -CONE, Q, M, C, M, CONE, CF, M )
RESID = CLANGE( '1', M, N, CF, M, RWORK )
IF( CNORM.GT.ZERO ) THEN
RESULT( 3 ) = RESID / ( EPS * MAX( 1, M ) * CNORM )
ELSE
RESULT( 3 ) = ZERO
END IF
*
* Copy C into CF again
*
CALL CLACPY( 'Full', M, N, C, M, CF, M )
*
* Apply Q to C as (Q**T)*C = CF
*
SRNAMT = 'CGEMQRT'
CALL CGEMQRT( 'L', 'C', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M,
$ WORK, INFO )
*
* TEST 4
* Compute |CF - (Q**T)*C| / ( eps * m * |C|)
*
CALL CGEMM( 'C', 'N', M, N, M, -CONE, Q, M, C, M, CONE, CF, M )
RESID = CLANGE( '1', M, N, CF, M, RWORK )
IF( CNORM.GT.ZERO ) THEN
RESULT( 4 ) = RESID / ( EPS * MAX( 1, M ) * CNORM )
ELSE
RESULT( 4 ) = ZERO
END IF
*
* Generate random n-by-m matrix D and a copy DF
*
DO J = 1, M
CALL CLARNV( 2, ISEED, N, D( 1, J ) )
END DO
DNORM = CLANGE( '1', N, M, D, N, RWORK )
CALL CLACPY( 'Full', N, M, D, N, DF, N )
*
* Apply Q to D as D*Q = DF
*
SRNAMT = 'CGEMQRT'
CALL CGEMQRT( 'R', 'N', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N,
$ WORK, INFO )
*
* TEST 5
* Compute |DF - D*Q| / ( eps * m * |D| )
*
CALL CGEMM( 'N', 'N', N, M, M, -CONE, D, N, Q, M, CONE, DF, N )
RESID = CLANGE( '1', N, M, DF, N, RWORK )
IF( DNORM.GT.ZERO ) THEN
RESULT( 5 ) = RESID / ( EPS * MAX( 1, M ) * DNORM )
ELSE
RESULT( 5 ) = ZERO
END IF
*
* Copy D into DF again
*
CALL CLACPY( 'Full', N, M, D, N, DF, N )
*
* Apply Q to D as D*QT = DF
*
SRNAMT = 'CGEMQRT'
CALL CGEMQRT( 'R', 'C', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N,
$ WORK, INFO )
*
* TEST 6
* Compute |DF - D*(Q**T)| / ( eps * m * |D| )
*
CALL CGEMM( 'N', 'C', N, M, M, -CONE, D, N, Q, M, CONE, DF, N )
RESID = CLANGE( '1', N, M, DF, N, RWORK )
IF( DNORM.GT.ZERO ) THEN
RESULT( 6 ) = RESID / ( EPS * MAX( 1, M ) * DNORM )
ELSE
RESULT( 6 ) = ZERO
END IF
*
* Deallocate all arrays
*
DEALLOCATE ( A, AF, Q, R, RWORK, WORK, T1, T2, DIAG,
$ C, D, CF, DF )
*
RETURN
*
* End of CUNHR_COL02
*
END

View File

@ -24,9 +24,12 @@
*> *>
*> \verbatim *> \verbatim
*> *>
*> DCHKORHR_COL tests DORHR_COL using DLATSQR and DGEMQRT. Therefore, DLATSQR *> DCHKORHR_COL tests:
*> (used in DGEQR) and DGEMQRT (used in DGEMQR) have to be tested *> 1) DORGTSQR and DORHR_COL using DLATSQR, DGEMQRT,
*> before this test. *> 2) DORGTSQR_ROW and DORHR_COL inside DGETSQRHRT
*> (which calls DLATSQR, DORGTSQR_ROW and DORHR_COL) using DGEMQRT.
*> Therefore, DLATSQR (part of DGEQR), DGEMQRT (part of DGEMQR)
*> have to be tested before this test.
*> *>
*> \endverbatim *> \endverbatim
* *
@ -97,19 +100,16 @@
*> \author Univ. of Colorado Denver *> \author Univ. of Colorado Denver
*> \author NAG Ltd. *> \author NAG Ltd.
* *
*> \date November 2019
*
*> \ingroup double_lin *> \ingroup double_lin
* *
* ===================================================================== * =====================================================================
SUBROUTINE DCHKORHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, NNB, SUBROUTINE DCHKORHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL,
$ NBVAL, NOUT ) $ NNB, NBVAL, NOUT )
IMPLICIT NONE IMPLICIT NONE
* *
* -- LAPACK test routine (version 3.7.0) -- * -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
LOGICAL TSTERR LOGICAL TSTERR
@ -135,10 +135,11 @@
DOUBLE PRECISION RESULT( NTESTS ) DOUBLE PRECISION RESULT( NTESTS )
* .. * ..
* .. External Subroutines .. * .. External Subroutines ..
EXTERNAL ALAHD, ALASUM, DERRORHR_COL, DORHR_COL01 EXTERNAL ALAHD, ALASUM, DERRORHR_COL, DORHR_COL01,
$ DORHR_COL02
* .. * ..
* .. Intrinsic Functions .. * .. Intrinsic Functions ..
INTRINSIC MAX, MIN INTRINSIC MAX, MIN
* .. * ..
* .. Scalars in Common .. * .. Scalars in Common ..
LOGICAL LERR, OK LOGICAL LERR, OK
@ -201,8 +202,8 @@
* *
* Test DORHR_COL * Test DORHR_COL
* *
CALL DORHR_COL01( M, N, MB1, NB1, NB2, CALL DORHR_COL01( M, N, MB1, NB1,
$ RESULT ) $ NB2, RESULT )
* *
* Print information about the tests that did * Print information about the tests that did
* not pass the threshold. * not pass the threshold.
@ -226,12 +227,78 @@
END DO END DO
END DO END DO
* *
* Do for each value of M in MVAL.
*
DO I = 1, NM
M = MVAL( I )
*
* Do for each value of N in NVAL.
*
DO J = 1, NN
N = NVAL( J )
*
* Only for M >= N
*
IF ( MIN( M, N ).GT.0 .AND. M.GE.N ) THEN
*
* Do for each possible value of MB1
*
DO IMB1 = 1, NNB
MB1 = NBVAL( IMB1 )
*
* Only for MB1 > N
*
IF ( MB1.GT.N ) THEN
*
* Do for each possible value of NB1
*
DO INB1 = 1, NNB
NB1 = NBVAL( INB1 )
*
* Do for each possible value of NB2
*
DO INB2 = 1, NNB
NB2 = NBVAL( INB2 )
*
IF( NB1.GT.0 .AND. NB2.GT.0 ) THEN
*
* Test DORHR_COL
*
CALL DORHR_COL02( M, N, MB1, NB1,
$ NB2, RESULT )
*
* Print information about the tests that did
* not pass the threshold.
*
DO T = 1, NTESTS
IF( RESULT( T ).GE.THRESH ) THEN
IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
$ CALL ALAHD( NOUT, PATH )
WRITE( NOUT, FMT = 9998 ) M, N, MB1,
$ NB1, NB2, T, RESULT( T )
NFAIL = NFAIL + 1
END IF
END DO
NRUN = NRUN + NTESTS
END IF
END DO
END DO
END IF
END DO
END IF
END DO
END DO
*
* Print a summary of the results. * Print a summary of the results.
* *
CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS )
* *
9999 FORMAT( 'M=', I5, ', N=', I5, ', MB1=', I5, 9999 FORMAT( 'DORGTSQR and DORHR_COL: M=', I5, ', N=', I5,
$ ', NB1=', I5, ', NB2=', I5,' test(', I2, ')=', G12.5 ) $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5,
$ ' test(', I2, ')=', G12.5 )
9998 FORMAT( 'DORGTSQR_ROW and DORHR_COL: M=', I5, ', N=', I5,
$ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5,
$ ' test(', I2, ')=', G12.5 )
RETURN RETURN
* *
* End of DCHKORHR_COL * End of DCHKORHR_COL

View File

@ -21,8 +21,8 @@
*> *>
*> \verbatim *> \verbatim
*> *>
*> DORHR_COL01 tests DORHR_COL using DLATSQR, DGEMQRT and DORGTSQR. *> DORHR_COL01 tests DORGTSQR and DORHR_COL using DLATSQR, DGEMQRT.
*> Therefore, DLATSQR (part of DGEQR), DGEMQRT (part DGEMQR), DORGTSQR *> Therefore, DLATSQR (part of DGEQR), DGEMQRT (part of DGEMQR)
*> have to be tested before this test. *> have to be tested before this test.
*> *>
*> \endverbatim *> \endverbatim
@ -62,14 +62,46 @@
*> \verbatim *> \verbatim
*> RESULT is DOUBLE PRECISION array, dimension (6) *> RESULT is DOUBLE PRECISION array, dimension (6)
*> Results of each of the six tests below. *> Results of each of the six tests below.
*> ( C is a M-by-N random matrix, D is a N-by-M random matrix )
*> *>
*> RESULT(1) = | A - Q * R | / (eps * m * |A|) *> A is a m-by-n test input matrix to be factored.
*> RESULT(2) = | I - (Q**H) * Q | / (eps * m ) *> so that A = Q_gr * ( R )
*> RESULT(3) = | Q * C - Q * C | / (eps * m * |C|) *> ( 0 ),
*> RESULT(4) = | (Q**H) * C - (Q**H) * C | / (eps * m * |C|) *>
*> RESULT(5) = | (D * Q) - D * Q | / (eps * m * |D|) *> Q_qr is an implicit m-by-m orthogonal Q matrix, the result
*> RESULT(6) = | D * (Q**H) - D * (Q**H) | / (eps * m * |D|) *> of factorization in blocked WY-representation,
*> stored in ZGEQRT output format.
*>
*> R is a n-by-n upper-triangular matrix,
*>
*> 0 is a (m-n)-by-n zero matrix,
*>
*> Q is an explicit m-by-m orthogonal matrix Q = Q_gr * I
*>
*> C is an m-by-n random matrix,
*>
*> D is an n-by-m random matrix.
*>
*> The six tests are:
*>
*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| )
*> is equivalent to test for | A - Q * R | / (eps * m * |A|),
*>
*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ),
*>
*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|),
*>
*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|)
*>
*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|)
*>
*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|),
*>
*> where:
*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are
*> computed using DGEMQRT,
*>
*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are
*> computed using DGEMM.
*> \endverbatim *> \endverbatim
* *
* Authors: * Authors:
@ -80,18 +112,15 @@
*> \author Univ. of Colorado Denver *> \author Univ. of Colorado Denver
*> \author NAG Ltd. *> \author NAG Ltd.
* *
*> \date November 2019 *> \ingroup double_lin
*
*> \ingroup single_lin
* *
* ===================================================================== * =====================================================================
SUBROUTINE DORHR_COL01( M, N, MB1, NB1, NB2, RESULT ) SUBROUTINE DORHR_COL01( M, N, MB1, NB1, NB2, RESULT )
IMPLICIT NONE IMPLICIT NONE
* *
* -- LAPACK test routine (version 3.9.0) -- * -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2019
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
INTEGER M, N, MB1, NB1, NB2 INTEGER M, N, MB1, NB1, NB2

View File

@ -0,0 +1,377 @@
*> \brief \b DORHR_COL02
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DORHR_COL02( M, N, MB1, NB1, NB2, RESULT )
*
* .. Scalar Arguments ..
* INTEGER M, N, MB1, NB1, NB2
* .. Return values ..
* DOUBLE PRECISION RESULT(6)
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DORHR_COL02 tests DORGTSQR_ROW and DORHR_COL inside DGETSQRHRT
*> (which calls DLATSQR, DORGTSQR_ROW and DORHR_COL) using DGEMQRT.
*> Therefore, DLATSQR (part of DGEQR), DGEMQRT (part of DGEMQR)
*> have to be tested before this test.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> Number of rows in test matrix.
*> \endverbatim
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> Number of columns in test matrix.
*> \endverbatim
*> \param[in] MB1
*> \verbatim
*> MB1 is INTEGER
*> Number of row in row block in an input test matrix.
*> \endverbatim
*>
*> \param[in] NB1
*> \verbatim
*> NB1 is INTEGER
*> Number of columns in column block an input test matrix.
*> \endverbatim
*>
*> \param[in] NB2
*> \verbatim
*> NB2 is INTEGER
*> Number of columns in column block in an output test matrix.
*> \endverbatim
*>
*> \param[out] RESULT
*> \verbatim
*> RESULT is DOUBLE PRECISION array, dimension (6)
*> Results of each of the six tests below.
*>
*> A is a m-by-n test input matrix to be factored.
*> so that A = Q_gr * ( R )
*> ( 0 ),
*>
*> Q_qr is an implicit m-by-m orthogonal Q matrix, the result
*> of factorization in blocked WY-representation,
*> stored in ZGEQRT output format.
*>
*> R is a n-by-n upper-triangular matrix,
*>
*> 0 is a (m-n)-by-n zero matrix,
*>
*> Q is an explicit m-by-m orthogonal matrix Q = Q_gr * I
*>
*> C is an m-by-n random matrix,
*>
*> D is an n-by-m random matrix.
*>
*> The six tests are:
*>
*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| )
*> is equivalent to test for | A - Q * R | / (eps * m * |A|),
*>
*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ),
*>
*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|),
*>
*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|)
*>
*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|)
*>
*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|),
*>
*> where:
*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are
*> computed using DGEMQRT,
*>
*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are
*> computed using DGEMM.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup double_lin
*
* =====================================================================
SUBROUTINE DORHR_COL02( M, N, MB1, NB1, NB2, RESULT )
IMPLICIT NONE
*
* -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER M, N, MB1, NB1, NB2
* .. Return values ..
DOUBLE PRECISION RESULT(6)
*
* =====================================================================
*
* ..
* .. Local allocatable arrays
DOUBLE PRECISION, ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:),
$ RWORK(:), WORK( : ), T1(:,:), T2(:,:), DIAG(:),
$ C(:,:), CF(:,:), D(:,:), DF(:,:)
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL TESTZEROS
INTEGER INFO, J, K, L, LWORK, NB2_UB, NRB
DOUBLE PRECISION ANORM, EPS, RESID, CNORM, DNORM
* ..
* .. Local Arrays ..
INTEGER ISEED( 4 )
DOUBLE PRECISION WORKQUERY( 1 )
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH, DLANGE, DLANSY
EXTERNAL DLAMCH, DLANGE, DLANSY
* ..
* .. External Subroutines ..
EXTERNAL DLACPY, DLARNV, DLASET, DGETSQRHRT,
$ DSCAL, DGEMM, DGEMQRT, DSYRK
* ..
* .. Intrinsic Functions ..
INTRINSIC CEILING, DBLE, MAX, MIN
* ..
* .. Scalars in Common ..
CHARACTER(LEN=32) SRNAMT
* ..
* .. Common blocks ..
COMMON / SRMNAMC / SRNAMT
* ..
* .. Data statements ..
DATA ISEED / 1988, 1989, 1990, 1991 /
*
* TEST MATRICES WITH HALF OF MATRIX BEING ZEROS
*
TESTZEROS = .FALSE.
*
EPS = DLAMCH( 'Epsilon' )
K = MIN( M, N )
L = MAX( M, N, 1)
*
* Dynamically allocate local arrays
*
ALLOCATE ( A(M,N), AF(M,N), Q(L,L), R(M,L), RWORK(L),
$ C(M,N), CF(M,N),
$ D(N,M), DF(N,M) )
*
* Put random numbers into A and copy to AF
*
DO J = 1, N
CALL DLARNV( 2, ISEED, M, A( 1, J ) )
END DO
IF( TESTZEROS ) THEN
IF( M.GE.4 ) THEN
DO J = 1, N
CALL DLARNV( 2, ISEED, M/2, A( M/4, J ) )
END DO
END IF
END IF
CALL DLACPY( 'Full', M, N, A, M, AF, M )
*
* Number of row blocks in DLATSQR
*
NRB = MAX( 1, CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) )
*
ALLOCATE ( T1( NB1, N * NRB ) )
ALLOCATE ( T2( NB2, N ) )
ALLOCATE ( DIAG( N ) )
*
* Begin determine LWORK for the array WORK and allocate memory.
*
* DGEMQRT requires NB2 to be bounded by N.
*
NB2_UB = MIN( NB2, N)
*
*
CALL DGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2,
$ WORKQUERY, -1, INFO )
*
LWORK = INT( WORKQUERY( 1 ) )
*
* In DGEMQRT, WORK is N*NB2_UB if SIDE = 'L',
* or M*NB2_UB if SIDE = 'R'.
*
LWORK = MAX( LWORK, NB2_UB * N, NB2_UB * M )
*
ALLOCATE ( WORK( LWORK ) )
*
* End allocate memory for WORK.
*
*
* Begin Householder reconstruction routines
*
* Factor the matrix A in the array AF.
*
SRNAMT = 'DGETSQRHRT'
CALL DGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2,
$ WORK, LWORK, INFO )
*
* End Householder reconstruction routines.
*
*
* Generate the m-by-m matrix Q
*
CALL DLASET( 'Full', M, M, ZERO, ONE, Q, M )
*
SRNAMT = 'DGEMQRT'
CALL DGEMQRT( 'L', 'N', M, M, K, NB2_UB, AF, M, T2, NB2, Q, M,
$ WORK, INFO )
*
* Copy R
*
CALL DLASET( 'Full', M, N, ZERO, ZERO, R, M )
*
CALL DLACPY( 'Upper', M, N, AF, M, R, M )
*
* TEST 1
* Compute |R - (Q**T)*A| / ( eps * m * |A| ) and store in RESULT(1)
*
CALL DGEMM( 'T', 'N', M, N, M, -ONE, Q, M, A, M, ONE, R, M )
*
ANORM = DLANGE( '1', M, N, A, M, RWORK )
RESID = DLANGE( '1', M, N, R, M, RWORK )
IF( ANORM.GT.ZERO ) THEN
RESULT( 1 ) = RESID / ( EPS * MAX( 1, M ) * ANORM )
ELSE
RESULT( 1 ) = ZERO
END IF
*
* TEST 2
* Compute |I - (Q**T)*Q| / ( eps * m ) and store in RESULT(2)
*
CALL DLASET( 'Full', M, M, ZERO, ONE, R, M )
CALL DSYRK( 'U', 'T', M, M, -ONE, Q, M, ONE, R, M )
RESID = DLANSY( '1', 'Upper', M, R, M, RWORK )
RESULT( 2 ) = RESID / ( EPS * MAX( 1, M ) )
*
* Generate random m-by-n matrix C
*
DO J = 1, N
CALL DLARNV( 2, ISEED, M, C( 1, J ) )
END DO
CNORM = DLANGE( '1', M, N, C, M, RWORK )
CALL DLACPY( 'Full', M, N, C, M, CF, M )
*
* Apply Q to C as Q*C = CF
*
SRNAMT = 'DGEMQRT'
CALL DGEMQRT( 'L', 'N', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M,
$ WORK, INFO )
*
* TEST 3
* Compute |CF - Q*C| / ( eps * m * |C| )
*
CALL DGEMM( 'N', 'N', M, N, M, -ONE, Q, M, C, M, ONE, CF, M )
RESID = DLANGE( '1', M, N, CF, M, RWORK )
IF( CNORM.GT.ZERO ) THEN
RESULT( 3 ) = RESID / ( EPS * MAX( 1, M ) * CNORM )
ELSE
RESULT( 3 ) = ZERO
END IF
*
* Copy C into CF again
*
CALL DLACPY( 'Full', M, N, C, M, CF, M )
*
* Apply Q to C as (Q**T)*C = CF
*
SRNAMT = 'DGEMQRT'
CALL DGEMQRT( 'L', 'T', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M,
$ WORK, INFO )
*
* TEST 4
* Compute |CF - (Q**T)*C| / ( eps * m * |C|)
*
CALL DGEMM( 'T', 'N', M, N, M, -ONE, Q, M, C, M, ONE, CF, M )
RESID = DLANGE( '1', M, N, CF, M, RWORK )
IF( CNORM.GT.ZERO ) THEN
RESULT( 4 ) = RESID / ( EPS * MAX( 1, M ) * CNORM )
ELSE
RESULT( 4 ) = ZERO
END IF
*
* Generate random n-by-m matrix D and a copy DF
*
DO J = 1, M
CALL DLARNV( 2, ISEED, N, D( 1, J ) )
END DO
DNORM = DLANGE( '1', N, M, D, N, RWORK )
CALL DLACPY( 'Full', N, M, D, N, DF, N )
*
* Apply Q to D as D*Q = DF
*
SRNAMT = 'DGEMQRT'
CALL DGEMQRT( 'R', 'N', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N,
$ WORK, INFO )
*
* TEST 5
* Compute |DF - D*Q| / ( eps * m * |D| )
*
CALL DGEMM( 'N', 'N', N, M, M, -ONE, D, N, Q, M, ONE, DF, N )
RESID = DLANGE( '1', N, M, DF, N, RWORK )
IF( DNORM.GT.ZERO ) THEN
RESULT( 5 ) = RESID / ( EPS * MAX( 1, M ) * DNORM )
ELSE
RESULT( 5 ) = ZERO
END IF
*
* Copy D into DF again
*
CALL DLACPY( 'Full', N, M, D, N, DF, N )
*
* Apply Q to D as D*QT = DF
*
SRNAMT = 'DGEMQRT'
CALL DGEMQRT( 'R', 'T', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N,
$ WORK, INFO )
*
* TEST 6
* Compute |DF - D*(Q**T)| / ( eps * m * |D| )
*
CALL DGEMM( 'N', 'T', N, M, M, -ONE, D, N, Q, M, ONE, DF, N )
RESID = DLANGE( '1', N, M, DF, N, RWORK )
IF( DNORM.GT.ZERO ) THEN
RESULT( 6 ) = RESID / ( EPS * MAX( 1, M ) * DNORM )
ELSE
RESULT( 6 ) = ZERO
END IF
*
* Deallocate all arrays
*
DEALLOCATE ( A, AF, Q, R, RWORK, WORK, T1, T2, DIAG,
$ C, D, CF, DF )
*
RETURN
*
* End of DORHR_COL02
*
END

View File

@ -24,8 +24,11 @@
*> *>
*> \verbatim *> \verbatim
*> *>
*> SCHKORHR_COL tests SORHR_COL using SLATSQR, SGEMQRT and SORGTSQR. *> SCHKORHR_COL tests:
*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part SGEMQR), SORGTSQR *> 1) SORGTSQR and SORHR_COL using SLATSQR, SGEMQRT,
*> 2) SORGTSQR_ROW and SORHR_COL inside DGETSQRHRT
*> (which calls SLATSQR, SORGTSQR_ROW and SORHR_COL) using SGEMQRT.
*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part of SGEMQR)
*> have to be tested before this test. *> have to be tested before this test.
*> *>
*> \endverbatim *> \endverbatim
@ -97,19 +100,16 @@
*> \author Univ. of Colorado Denver *> \author Univ. of Colorado Denver
*> \author NAG Ltd. *> \author NAG Ltd.
* *
*> \date November 2019 *> \ingroup single_lin
*
*> \ingroup sigle_lin
* *
* ===================================================================== * =====================================================================
SUBROUTINE SCHKORHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, NNB, SUBROUTINE SCHKORHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL,
$ NBVAL, NOUT ) $ NNB, NBVAL, NOUT )
IMPLICIT NONE IMPLICIT NONE
* *
* -- LAPACK test routine (version 3.9.0) -- * -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* June 2019
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
LOGICAL TSTERR LOGICAL TSTERR
@ -135,7 +135,8 @@
REAL RESULT( NTESTS ) REAL RESULT( NTESTS )
* .. * ..
* .. External Subroutines .. * .. External Subroutines ..
EXTERNAL ALAHD, ALASUM, SERRORHR_COL, SORHR_COL01 EXTERNAL ALAHD, ALASUM, SERRORHR_COL, SORHR_COL01,
$ SORHR_COL02
* .. * ..
* .. Intrinsic Functions .. * .. Intrinsic Functions ..
INTRINSIC MAX, MIN INTRINSIC MAX, MIN
@ -201,8 +202,8 @@
* *
* Test SORHR_COL * Test SORHR_COL
* *
CALL SORHR_COL01( M, N, MB1, NB1, NB2, CALL SORHR_COL01( M, N, MB1, NB1,
$ RESULT ) $ NB2, RESULT )
* *
* Print information about the tests that did * Print information about the tests that did
* not pass the threshold. * not pass the threshold.
@ -226,12 +227,78 @@
END DO END DO
END DO END DO
* *
* Do for each value of M in MVAL.
*
DO I = 1, NM
M = MVAL( I )
*
* Do for each value of N in NVAL.
*
DO J = 1, NN
N = NVAL( J )
*
* Only for M >= N
*
IF ( MIN( M, N ).GT.0 .AND. M.GE.N ) THEN
*
* Do for each possible value of MB1
*
DO IMB1 = 1, NNB
MB1 = NBVAL( IMB1 )
*
* Only for MB1 > N
*
IF ( MB1.GT.N ) THEN
*
* Do for each possible value of NB1
*
DO INB1 = 1, NNB
NB1 = NBVAL( INB1 )
*
* Do for each possible value of NB2
*
DO INB2 = 1, NNB
NB2 = NBVAL( INB2 )
*
IF( NB1.GT.0 .AND. NB2.GT.0 ) THEN
*
* Test SORHR_COL
*
CALL SORHR_COL02( M, N, MB1, NB1,
$ NB2, RESULT )
*
* Print information about the tests that did
* not pass the threshold.
*
DO T = 1, NTESTS
IF( RESULT( T ).GE.THRESH ) THEN
IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
$ CALL ALAHD( NOUT, PATH )
WRITE( NOUT, FMT = 9998 ) M, N, MB1,
$ NB1, NB2, T, RESULT( T )
NFAIL = NFAIL + 1
END IF
END DO
NRUN = NRUN + NTESTS
END IF
END DO
END DO
END IF
END DO
END IF
END DO
END DO
*
* Print a summary of the results. * Print a summary of the results.
* *
CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS )
* *
9999 FORMAT( 'M=', I5, ', N=', I5, ', MB1=', I5, 9999 FORMAT( 'SORGTSQR and SORHR_COL: M=', I5, ', N=', I5,
$ ', NB1=', I5, ', NB2=', I5,' test(', I2, ')=', G12.5 ) $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5,
$ ' test(', I2, ')=', G12.5 )
9998 FORMAT( 'SORGTSQR_ROW and SORHR_COL: M=', I5, ', N=', I5,
$ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5,
$ ' test(', I2, ')=', G12.5 )
RETURN RETURN
* *
* End of SCHKORHR_COL * End of SCHKORHR_COL

View File

@ -8,12 +8,12 @@
* Definition: * Definition:
* =========== * ===========
* *
* SUBROUTINE SORHR_COL01( M, N, MB1, NB1, NB2, RESULT) * SUBROUTINE SORHR_COL01( M, N, MB1, NB1, NB2, RESULT )
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
* INTEGER M, N, MB1, NB1, NB2 * INTEGER M, N, MB1, NB1, NB2
* .. Return values .. * .. Return values ..
* REAL RESULT(6) * REAL RESULT(6)
* *
* *
*> \par Purpose: *> \par Purpose:
@ -21,8 +21,8 @@
*> *>
*> \verbatim *> \verbatim
*> *>
*> SORHR_COL01 tests SORHR_COL using SLATSQR, SGEMQRT and SORGTSQR. *> SORHR_COL01 tests SORGTSQR and SORHR_COL using SLATSQR, SGEMQRT.
*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part SGEMQR), SORGTSQR *> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part of SGEMQR)
*> have to be tested before this test. *> have to be tested before this test.
*> *>
*> \endverbatim *> \endverbatim
@ -62,14 +62,46 @@
*> \verbatim *> \verbatim
*> RESULT is REAL array, dimension (6) *> RESULT is REAL array, dimension (6)
*> Results of each of the six tests below. *> Results of each of the six tests below.
*> ( C is a M-by-N random matrix, D is a N-by-M random matrix )
*> *>
*> RESULT(1) = | A - Q * R | / (eps * m * |A|) *> A is a m-by-n test input matrix to be factored.
*> RESULT(2) = | I - (Q**H) * Q | / (eps * m ) *> so that A = Q_gr * ( R )
*> RESULT(3) = | Q * C - Q * C | / (eps * m * |C|) *> ( 0 ),
*> RESULT(4) = | (Q**H) * C - (Q**H) * C | / (eps * m * |C|) *>
*> RESULT(5) = | (D * Q) - D * Q | / (eps * m * |D|) *> Q_qr is an implicit m-by-m orthogonal Q matrix, the result
*> RESULT(6) = | D * (Q**H) - D * (Q**H) | / (eps * m * |D|) *> of factorization in blocked WY-representation,
*> stored in SGEQRT output format.
*>
*> R is a n-by-n upper-triangular matrix,
*>
*> 0 is a (m-n)-by-n zero matrix,
*>
*> Q is an explicit m-by-m orthogonal matrix Q = Q_gr * I
*>
*> C is an m-by-n random matrix,
*>
*> D is an n-by-m random matrix.
*>
*> The six tests are:
*>
*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| )
*> is equivalent to test for | A - Q * R | / (eps * m * |A|),
*>
*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ),
*>
*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|),
*>
*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|)
*>
*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|)
*>
*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|),
*>
*> where:
*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are
*> computed using SGEMQRT,
*>
*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are
*> computed using SGEMM.
*> \endverbatim *> \endverbatim
* *
* Authors: * Authors:
@ -80,18 +112,15 @@
*> \author Univ. of Colorado Denver *> \author Univ. of Colorado Denver
*> \author NAG Ltd. *> \author NAG Ltd.
* *
*> \date November 2019
*
*> \ingroup single_lin *> \ingroup single_lin
* *
* ===================================================================== * =====================================================================
SUBROUTINE SORHR_COL01( M, N, MB1, NB1, NB2, RESULT ) SUBROUTINE SORHR_COL01( M, N, MB1, NB1, NB2, RESULT )
IMPLICIT NONE IMPLICIT NONE
* *
* -- LAPACK test routine (version 3.9.0) -- * -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2019
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
INTEGER M, N, MB1, NB1, NB2 INTEGER M, N, MB1, NB1, NB2
@ -102,7 +131,7 @@
* *
* .. * ..
* .. Local allocatable arrays * .. Local allocatable arrays
REAL, ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), REAL , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:),
$ RWORK(:), WORK( : ), T1(:,:), T2(:,:), DIAG(:), $ RWORK(:), WORK( : ), T1(:,:), T2(:,:), DIAG(:),
$ C(:,:), CF(:,:), D(:,:), DF(:,:) $ C(:,:), CF(:,:), D(:,:), DF(:,:)
* *
@ -128,7 +157,7 @@
$ SORGTSQR, SSCAL, SGEMM, SGEMQRT, SSYRK $ SORGTSQR, SSCAL, SGEMM, SGEMQRT, SSYRK
* .. * ..
* .. Intrinsic Functions .. * .. Intrinsic Functions ..
INTRINSIC CEILING, MAX, MIN, REAL INTRINSIC CEILING, REAL, MAX, MIN
* .. * ..
* .. Scalars in Common .. * .. Scalars in Common ..
CHARACTER(LEN=32) SRNAMT CHARACTER(LEN=32) SRNAMT
@ -230,7 +259,7 @@
* *
* Compute the factor R_hr corresponding to the Householder * Compute the factor R_hr corresponding to the Householder
* reconstructed Q_hr and place it in the upper triangle of AF to * reconstructed Q_hr and place it in the upper triangle of AF to
* match the Q storage format in DGEQRT. R_hr = R_tsqr * S, * match the Q storage format in SGEQRT. R_hr = R_tsqr * S,
* this means changing the sign of I-th row of the matrix R_tsqr * this means changing the sign of I-th row of the matrix R_tsqr
* according to sign of of I-th diagonal element DIAG(I) of the * according to sign of of I-th diagonal element DIAG(I) of the
* matrix S. * matrix S.

View File

@ -0,0 +1,376 @@
*> \brief \b SORHR_COL02
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE SORHR_COL02( M, N, MB1, NB1, NB2, RESULT )
*
* .. Scalar Arguments ..
* INTEGER M, N, MB1, NB1, NB2
* .. Return values ..
* REAL RESULT(6)
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SORHR_COL02 tests SORGTSQR_ROW and SORHR_COL inside SGETSQRHRT
*> (which calls SLATSQR, SORGTSQR_ROW and SORHR_COL) using SGEMQRT.
*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part of SGEMQR)
*> have to be tested before this test.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> Number of rows in test matrix.
*> \endverbatim
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> Number of columns in test matrix.
*> \endverbatim
*> \param[in] MB1
*> \verbatim
*> MB1 is INTEGER
*> Number of row in row block in an input test matrix.
*> \endverbatim
*>
*> \param[in] NB1
*> \verbatim
*> NB1 is INTEGER
*> Number of columns in column block an input test matrix.
*> \endverbatim
*>
*> \param[in] NB2
*> \verbatim
*> NB2 is INTEGER
*> Number of columns in column block in an output test matrix.
*> \endverbatim
*>
*> \param[out] RESULT
*> \verbatim
*> RESULT is REAL array, dimension (6)
*> Results of each of the six tests below.
*>
*> A is a m-by-n test input matrix to be factored.
*> so that A = Q_gr * ( R )
*> ( 0 ),
*>
*> Q_qr is an implicit m-by-m orthogonal Q matrix, the result
*> of factorization in blocked WY-representation,
*> stored in SGEQRT output format.
*>
*> R is a n-by-n upper-triangular matrix,
*>
*> 0 is a (m-n)-by-n zero matrix,
*>
*> Q is an explicit m-by-m orthogonal matrix Q = Q_gr * I
*>
*> C is an m-by-n random matrix,
*>
*> D is an n-by-m random matrix.
*>
*> The six tests are:
*>
*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| )
*> is equivalent to test for | A - Q * R | / (eps * m * |A|),
*>
*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ),
*>
*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|),
*>
*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|)
*>
*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|)
*>
*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|),
*>
*> where:
*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are
*> computed using SGEMQRT,
*>
*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are
*> computed using SGEMM.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup single_lin
*
* =====================================================================
SUBROUTINE SORHR_COL02( M, N, MB1, NB1, NB2, RESULT )
IMPLICIT NONE
*
* -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER M, N, MB1, NB1, NB2
* .. Return values ..
REAL RESULT(6)
*
* =====================================================================
*
* ..
* .. Local allocatable arrays
REAL , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:),
$ RWORK(:), WORK( : ), T1(:,:), T2(:,:), DIAG(:),
$ C(:,:), CF(:,:), D(:,:), DF(:,:)
*
* .. Parameters ..
REAL ONE, ZERO
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
* ..
* .. Local Scalars ..
LOGICAL TESTZEROS
INTEGER INFO, J, K, L, LWORK, NB2_UB, NRB
REAL ANORM, EPS, RESID, CNORM, DNORM
* ..
* .. Local Arrays ..
INTEGER ISEED( 4 )
REAL WORKQUERY( 1 )
* ..
* .. External Functions ..
REAL SLAMCH, SLANGE, SLANSY
EXTERNAL SLAMCH, SLANGE, SLANSY
* ..
* .. External Subroutines ..
EXTERNAL SLACPY, SLARNV, SLASET, SGETSQRHRT,
$ SSCAL, SGEMM, SGEMQRT, SSYRK
* ..
* .. Intrinsic Functions ..
INTRINSIC CEILING, REAL, MAX, MIN
* ..
* .. Scalars in Common ..
CHARACTER(LEN=32) SRNAMT
* ..
* .. Common blocks ..
COMMON / SRMNAMC / SRNAMT
* ..
* .. Data statements ..
DATA ISEED / 1988, 1989, 1990, 1991 /
*
* TEST MATRICES WITH HALF OF MATRIX BEING ZEROS
*
TESTZEROS = .FALSE.
*
EPS = SLAMCH( 'Epsilon' )
K = MIN( M, N )
L = MAX( M, N, 1)
*
* Dynamically allocate local arrays
*
ALLOCATE ( A(M,N), AF(M,N), Q(L,L), R(M,L), RWORK(L),
$ C(M,N), CF(M,N),
$ D(N,M), DF(N,M) )
*
* Put random numbers into A and copy to AF
*
DO J = 1, N
CALL SLARNV( 2, ISEED, M, A( 1, J ) )
END DO
IF( TESTZEROS ) THEN
IF( M.GE.4 ) THEN
DO J = 1, N
CALL SLARNV( 2, ISEED, M/2, A( M/4, J ) )
END DO
END IF
END IF
CALL SLACPY( 'Full', M, N, A, M, AF, M )
*
* Number of row blocks in SLATSQR
*
NRB = MAX( 1, CEILING( REAL( M - N ) / REAL( MB1 - N ) ) )
*
ALLOCATE ( T1( NB1, N * NRB ) )
ALLOCATE ( T2( NB2, N ) )
ALLOCATE ( DIAG( N ) )
*
* Begin determine LWORK for the array WORK and allocate memory.
*
* SGEMQRT requires NB2 to be bounded by N.
*
NB2_UB = MIN( NB2, N)
*
CALL SGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2,
$ WORKQUERY, -1, INFO )
*
LWORK = INT( WORKQUERY( 1 ) )
*
* In SGEMQRT, WORK is N*NB2_UB if SIDE = 'L',
* or M*NB2_UB if SIDE = 'R'.
*
LWORK = MAX( LWORK, NB2_UB * N, NB2_UB * M )
*
ALLOCATE ( WORK( LWORK ) )
*
* End allocate memory for WORK.
*
*
* Begin Householder reconstruction routines
*
* Factor the matrix A in the array AF.
*
SRNAMT = 'SGETSQRHRT'
CALL SGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2,
$ WORK, LWORK, INFO )
*
* End Householder reconstruction routines.
*
*
* Generate the m-by-m matrix Q
*
CALL SLASET( 'Full', M, M, ZERO, ONE, Q, M )
*
SRNAMT = 'SGEMQRT'
CALL SGEMQRT( 'L', 'N', M, M, K, NB2_UB, AF, M, T2, NB2, Q, M,
$ WORK, INFO )
*
* Copy R
*
CALL SLASET( 'Full', M, N, ZERO, ZERO, R, M )
*
CALL SLACPY( 'Upper', M, N, AF, M, R, M )
*
* TEST 1
* Compute |R - (Q**T)*A| / ( eps * m * |A| ) and store in RESULT(1)
*
CALL SGEMM( 'T', 'N', M, N, M, -ONE, Q, M, A, M, ONE, R, M )
*
ANORM = SLANGE( '1', M, N, A, M, RWORK )
RESID = SLANGE( '1', M, N, R, M, RWORK )
IF( ANORM.GT.ZERO ) THEN
RESULT( 1 ) = RESID / ( EPS * MAX( 1, M ) * ANORM )
ELSE
RESULT( 1 ) = ZERO
END IF
*
* TEST 2
* Compute |I - (Q**T)*Q| / ( eps * m ) and store in RESULT(2)
*
CALL SLASET( 'Full', M, M, ZERO, ONE, R, M )
CALL SSYRK( 'U', 'T', M, M, -ONE, Q, M, ONE, R, M )
RESID = SLANSY( '1', 'Upper', M, R, M, RWORK )
RESULT( 2 ) = RESID / ( EPS * MAX( 1, M ) )
*
* Generate random m-by-n matrix C
*
DO J = 1, N
CALL SLARNV( 2, ISEED, M, C( 1, J ) )
END DO
CNORM = SLANGE( '1', M, N, C, M, RWORK )
CALL SLACPY( 'Full', M, N, C, M, CF, M )
*
* Apply Q to C as Q*C = CF
*
SRNAMT = 'SGEMQRT'
CALL SGEMQRT( 'L', 'N', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M,
$ WORK, INFO )
*
* TEST 3
* Compute |CF - Q*C| / ( eps * m * |C| )
*
CALL SGEMM( 'N', 'N', M, N, M, -ONE, Q, M, C, M, ONE, CF, M )
RESID = SLANGE( '1', M, N, CF, M, RWORK )
IF( CNORM.GT.ZERO ) THEN
RESULT( 3 ) = RESID / ( EPS * MAX( 1, M ) * CNORM )
ELSE
RESULT( 3 ) = ZERO
END IF
*
* Copy C into CF again
*
CALL SLACPY( 'Full', M, N, C, M, CF, M )
*
* Apply Q to C as (Q**T)*C = CF
*
SRNAMT = 'SGEMQRT'
CALL SGEMQRT( 'L', 'T', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M,
$ WORK, INFO )
*
* TEST 4
* Compute |CF - (Q**T)*C| / ( eps * m * |C|)
*
CALL SGEMM( 'T', 'N', M, N, M, -ONE, Q, M, C, M, ONE, CF, M )
RESID = SLANGE( '1', M, N, CF, M, RWORK )
IF( CNORM.GT.ZERO ) THEN
RESULT( 4 ) = RESID / ( EPS * MAX( 1, M ) * CNORM )
ELSE
RESULT( 4 ) = ZERO
END IF
*
* Generate random n-by-m matrix D and a copy DF
*
DO J = 1, M
CALL SLARNV( 2, ISEED, N, D( 1, J ) )
END DO
DNORM = SLANGE( '1', N, M, D, N, RWORK )
CALL SLACPY( 'Full', N, M, D, N, DF, N )
*
* Apply Q to D as D*Q = DF
*
SRNAMT = 'SGEMQRT'
CALL SGEMQRT( 'R', 'N', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N,
$ WORK, INFO )
*
* TEST 5
* Compute |DF - D*Q| / ( eps * m * |D| )
*
CALL SGEMM( 'N', 'N', N, M, M, -ONE, D, N, Q, M, ONE, DF, N )
RESID = SLANGE( '1', N, M, DF, N, RWORK )
IF( DNORM.GT.ZERO ) THEN
RESULT( 5 ) = RESID / ( EPS * MAX( 1, M ) * DNORM )
ELSE
RESULT( 5 ) = ZERO
END IF
*
* Copy D into DF again
*
CALL SLACPY( 'Full', N, M, D, N, DF, N )
*
* Apply Q to D as D*QT = DF
*
SRNAMT = 'SGEMQRT'
CALL SGEMQRT( 'R', 'T', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N,
$ WORK, INFO )
*
* TEST 6
* Compute |DF - D*(Q**T)| / ( eps * m * |D| )
*
CALL SGEMM( 'N', 'T', N, M, M, -ONE, D, N, Q, M, ONE, DF, N )
RESID = SLANGE( '1', N, M, DF, N, RWORK )
IF( DNORM.GT.ZERO ) THEN
RESULT( 6 ) = RESID / ( EPS * MAX( 1, M ) * DNORM )
ELSE
RESULT( 6 ) = ZERO
END IF
*
* Deallocate all arrays
*
DEALLOCATE ( A, AF, Q, R, RWORK, WORK, T1, T2, DIAG,
$ C, D, CF, DF )
*
RETURN
*
* End of SORHR_COL02
*
END

View File

@ -24,9 +24,12 @@
*> *>
*> \verbatim *> \verbatim
*> *>
*> ZCHKUNHR_COL tests ZUNHR_COL using ZLATSQR and ZGEMQRT. Therefore, ZLATSQR *> ZCHKUNHR_COL tests:
*> (used in ZGEQR) and ZGEMQRT (used in ZGEMQR) have to be tested *> 1) ZUNGTSQR and ZUNHR_COL using ZLATSQR, ZGEMQRT,
*> before this test. *> 2) ZUNGTSQR_ROW and ZUNHR_COL inside ZGETSQRHRT
*> (which calls ZLATSQR, ZUNGTSQR_ROW and ZUNHR_COL) using ZGEMQRT.
*> Therefore, ZLATSQR (part of ZGEQR), ZGEMQRT (part of ZGEMQR)
*> have to be tested before this test.
*> *>
*> \endverbatim *> \endverbatim
* *
@ -97,19 +100,16 @@
*> \author Univ. of Colorado Denver *> \author Univ. of Colorado Denver
*> \author NAG Ltd. *> \author NAG Ltd.
* *
*> \date November 2019
*
*> \ingroup complex16_lin *> \ingroup complex16_lin
* *
* ===================================================================== * =====================================================================
SUBROUTINE ZCHKUNHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, NNB, SUBROUTINE ZCHKUNHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL,
$ NBVAL, NOUT ) $ NNB, NBVAL, NOUT )
IMPLICIT NONE IMPLICIT NONE
* *
* -- LAPACK test routine (version 3.7.0) -- * -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
LOGICAL TSTERR LOGICAL TSTERR
@ -135,10 +135,11 @@
DOUBLE PRECISION RESULT( NTESTS ) DOUBLE PRECISION RESULT( NTESTS )
* .. * ..
* .. External Subroutines .. * .. External Subroutines ..
EXTERNAL ALAHD, ALASUM, ZERRUNHR_COL, ZUNHR_COL01 EXTERNAL ALAHD, ALASUM, ZERRUNHR_COL, ZUNHR_COL01,
$ ZUNHR_COL02
* .. * ..
* .. Intrinsic Functions .. * .. Intrinsic Functions ..
INTRINSIC MAX, MIN INTRINSIC MAX, MIN
* .. * ..
* .. Scalars in Common .. * .. Scalars in Common ..
LOGICAL LERR, OK LOGICAL LERR, OK
@ -201,8 +202,8 @@
* *
* Test ZUNHR_COL * Test ZUNHR_COL
* *
CALL ZUNHR_COL01( M, N, MB1, NB1, NB2, CALL ZUNHR_COL01( M, N, MB1, NB1,
$ RESULT ) $ NB2, RESULT )
* *
* Print information about the tests that did * Print information about the tests that did
* not pass the threshold. * not pass the threshold.
@ -226,12 +227,78 @@
END DO END DO
END DO END DO
* *
* Do for each value of M in MVAL.
*
DO I = 1, NM
M = MVAL( I )
*
* Do for each value of N in NVAL.
*
DO J = 1, NN
N = NVAL( J )
*
* Only for M >= N
*
IF ( MIN( M, N ).GT.0 .AND. M.GE.N ) THEN
*
* Do for each possible value of MB1
*
DO IMB1 = 1, NNB
MB1 = NBVAL( IMB1 )
*
* Only for MB1 > N
*
IF ( MB1.GT.N ) THEN
*
* Do for each possible value of NB1
*
DO INB1 = 1, NNB
NB1 = NBVAL( INB1 )
*
* Do for each possible value of NB2
*
DO INB2 = 1, NNB
NB2 = NBVAL( INB2 )
*
IF( NB1.GT.0 .AND. NB2.GT.0 ) THEN
*
* Test ZUNHR_COL
*
CALL ZUNHR_COL02( M, N, MB1, NB1,
$ NB2, RESULT )
*
* Print information about the tests that did
* not pass the threshold.
*
DO T = 1, NTESTS
IF( RESULT( T ).GE.THRESH ) THEN
IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
$ CALL ALAHD( NOUT, PATH )
WRITE( NOUT, FMT = 9998 ) M, N, MB1,
$ NB1, NB2, T, RESULT( T )
NFAIL = NFAIL + 1
END IF
END DO
NRUN = NRUN + NTESTS
END IF
END DO
END DO
END IF
END DO
END IF
END DO
END DO
*
* Print a summary of the results. * Print a summary of the results.
* *
CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS )
* *
9999 FORMAT( 'M=', I5, ', N=', I5, ', MB1=', I5, 9999 FORMAT( 'ZUNGTSQR and ZUNHR_COL: M=', I5, ', N=', I5,
$ ', NB1=', I5, ', NB2=', I5,' test(', I2, ')=', G12.5 ) $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5,
$ ' test(', I2, ')=', G12.5 )
9998 FORMAT( 'ZUNGTSQR_ROW and ZUNHR_COL: M=', I5, ', N=', I5,
$ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5,
$ ' test(', I2, ')=', G12.5 )
RETURN RETURN
* *
* End of ZCHKUNHR_COL * End of ZCHKUNHR_COL

View File

@ -21,8 +21,8 @@
*> *>
*> \verbatim *> \verbatim
*> *>
*> ZUNHR_COL01 tests ZUNHR_COL using ZLATSQR, ZGEMQRT and ZUNGTSQR. *> ZUNHR_COL01 tests ZUNGTSQR and ZUNHR_COL using ZLATSQR, ZGEMQRT.
*> Therefore, ZLATSQR (part of ZGEQR), ZGEMQRT (part ZGEMQR), ZUNGTSQR *> Therefore, ZLATSQR (part of ZGEQR), ZGEMQRT (part of ZGEMQR)
*> have to be tested before this test. *> have to be tested before this test.
*> *>
*> \endverbatim *> \endverbatim
@ -62,14 +62,46 @@
*> \verbatim *> \verbatim
*> RESULT is DOUBLE PRECISION array, dimension (6) *> RESULT is DOUBLE PRECISION array, dimension (6)
*> Results of each of the six tests below. *> Results of each of the six tests below.
*> ( C is a M-by-N random matrix, D is a N-by-M random matrix )
*> *>
*> RESULT(1) = | A - Q * R | / (eps * m * |A|) *> A is a m-by-n test input matrix to be factored.
*> RESULT(2) = | I - (Q**H) * Q | / (eps * m ) *> so that A = Q_gr * ( R )
*> RESULT(3) = | Q * C - Q * C | / (eps * m * |C|) *> ( 0 ),
*> RESULT(4) = | (Q**H) * C - (Q**H) * C | / (eps * m * |C|) *>
*> RESULT(5) = | (D * Q) - D * Q | / (eps * m * |D|) *> Q_qr is an implicit m-by-m unitary Q matrix, the result
*> RESULT(6) = | D * (Q**H) - D * (Q**H) | / (eps * m * |D|) *> of factorization in blocked WY-representation,
*> stored in ZGEQRT output format.
*>
*> R is a n-by-n upper-triangular matrix,
*>
*> 0 is a (m-n)-by-n zero matrix,
*>
*> Q is an explicit m-by-m unitary matrix Q = Q_gr * I
*>
*> C is an m-by-n random matrix,
*>
*> D is an n-by-m random matrix.
*>
*> The six tests are:
*>
*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| )
*> is equivalent to test for | A - Q * R | / (eps * m * |A|),
*>
*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ),
*>
*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|),
*>
*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|)
*>
*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|)
*>
*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|),
*>
*> where:
*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are
*> computed using ZGEMQRT,
*>
*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are
*> computed using ZGEMM.
*> \endverbatim *> \endverbatim
* *
* Authors: * Authors:
@ -80,18 +112,15 @@
*> \author Univ. of Colorado Denver *> \author Univ. of Colorado Denver
*> \author NAG Ltd. *> \author NAG Ltd.
* *
*> \date November 2019
*
*> \ingroup complex16_lin *> \ingroup complex16_lin
* *
* ===================================================================== * =====================================================================
SUBROUTINE ZUNHR_COL01( M, N, MB1, NB1, NB2, RESULT ) SUBROUTINE ZUNHR_COL01( M, N, MB1, NB1, NB2, RESULT )
IMPLICIT NONE IMPLICIT NONE
* *
* -- LAPACK test routine (version 3.9.0) -- * -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2019
* *
* .. Scalar Arguments .. * .. Scalar Arguments ..
INTEGER M, N, MB1, NB1, NB2 INTEGER M, N, MB1, NB1, NB2
@ -102,7 +131,7 @@
* *
* .. * ..
* .. Local allocatable arrays * .. Local allocatable arrays
COMPLEX*16, ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), COMPLEX*16 , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:),
$ WORK( : ), T1(:,:), T2(:,:), DIAG(:), $ WORK( : ), T1(:,:), T2(:,:), DIAG(:),
$ C(:,:), CF(:,:), D(:,:), DF(:,:) $ C(:,:), CF(:,:), D(:,:), DF(:,:)
DOUBLE PRECISION, ALLOCATABLE :: RWORK(:) DOUBLE PRECISION, ALLOCATABLE :: RWORK(:)
@ -218,7 +247,7 @@
* Copy the factor R into the array R. * Copy the factor R into the array R.
* *
SRNAMT = 'ZLACPY' SRNAMT = 'ZLACPY'
CALL ZLACPY( 'U', M, N, AF, M, R, M ) CALL ZLACPY( 'U', N, N, AF, M, R, M )
* *
* Reconstruct the orthogonal matrix Q. * Reconstruct the orthogonal matrix Q.
* *
@ -240,7 +269,7 @@
* matrix S. * matrix S.
* *
SRNAMT = 'ZLACPY' SRNAMT = 'ZLACPY'
CALL ZLACPY( 'U', M, N, R, M, AF, M ) CALL ZLACPY( 'U', N, N, R, M, AF, M )
* *
DO I = 1, N DO I = 1, N
IF( DIAG( I ).EQ.-CONE ) THEN IF( DIAG( I ).EQ.-CONE ) THEN

View File

@ -0,0 +1,381 @@
*> \brief \b ZUNHR_COL02
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE ZUNHR_COL02( M, N, MB1, NB1, NB2, RESULT )
*
* .. Scalar Arguments ..
* INTEGER M, N, MB1, NB1, NB2
* .. Return values ..
* DOUBLE PRECISION RESULT(6)
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> ZUNHR_COL02 tests ZUNGTSQR_ROW and ZUNHR_COL inside ZGETSQRHRT
*> (which calls ZLATSQR, ZUNGTSQR_ROW and ZUNHR_COL) using ZGEMQRT.
*> Therefore, ZLATSQR (part of ZGEQR), ZGEMQRT (part of ZGEMQR)
*> have to be tested before this test.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> Number of rows in test matrix.
*> \endverbatim
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> Number of columns in test matrix.
*> \endverbatim
*> \param[in] MB1
*> \verbatim
*> MB1 is INTEGER
*> Number of row in row block in an input test matrix.
*> \endverbatim
*>
*> \param[in] NB1
*> \verbatim
*> NB1 is INTEGER
*> Number of columns in column block an input test matrix.
*> \endverbatim
*>
*> \param[in] NB2
*> \verbatim
*> NB2 is INTEGER
*> Number of columns in column block in an output test matrix.
*> \endverbatim
*>
*> \param[out] RESULT
*> \verbatim
*> RESULT is DOUBLE PRECISION array, dimension (6)
*> Results of each of the six tests below.
*>
*> A is a m-by-n test input matrix to be factored.
*> so that A = Q_gr * ( R )
*> ( 0 ),
*>
*> Q_qr is an implicit m-by-m unitary Q matrix, the result
*> of factorization in blocked WY-representation,
*> stored in ZGEQRT output format.
*>
*> R is a n-by-n upper-triangular matrix,
*>
*> 0 is a (m-n)-by-n zero matrix,
*>
*> Q is an explicit m-by-m unitary matrix Q = Q_gr * I
*>
*> C is an m-by-n random matrix,
*>
*> D is an n-by-m random matrix.
*>
*> The six tests are:
*>
*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| )
*> is equivalent to test for | A - Q * R | / (eps * m * |A|),
*>
*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ),
*>
*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|),
*>
*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|)
*>
*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|)
*>
*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|),
*>
*> where:
*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are
*> computed using ZGEMQRT,
*>
*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are
*> computed using ZGEMM.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complex16_lin
*
* =====================================================================
SUBROUTINE ZUNHR_COL02( M, N, MB1, NB1, NB2, RESULT )
IMPLICIT NONE
*
* -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER M, N, MB1, NB1, NB2
* .. Return values ..
DOUBLE PRECISION RESULT(6)
*
* =====================================================================
*
* ..
* .. Local allocatable arrays
COMPLEX*16 , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:),
$ WORK( : ), T1(:,:), T2(:,:), DIAG(:),
$ C(:,:), CF(:,:), D(:,:), DF(:,:)
DOUBLE PRECISION, ALLOCATABLE :: RWORK(:)
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D+0 )
COMPLEX*16 CONE, CZERO
PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ),
$ CZERO = ( 0.0D+0, 0.0D+0 ) )
* ..
* .. Local Scalars ..
LOGICAL TESTZEROS
INTEGER INFO, J, K, L, LWORK, NB2_UB, NRB
DOUBLE PRECISION ANORM, EPS, RESID, CNORM, DNORM
* ..
* .. Local Arrays ..
INTEGER ISEED( 4 )
COMPLEX*16 WORKQUERY( 1 )
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH, ZLANGE, ZLANSY
EXTERNAL DLAMCH, ZLANGE, ZLANSY
* ..
* .. External Subroutines ..
EXTERNAL ZLACPY, ZLARNV, ZLASET, ZGETSQRHRT,
$ ZSCAL, ZGEMM, ZGEMQRT, ZHERK
* ..
* .. Intrinsic Functions ..
INTRINSIC CEILING, DBLE, MAX, MIN
* ..
* .. Scalars in Common ..
CHARACTER(LEN=32) SRNAMT
* ..
* .. Common blocks ..
COMMON / SRMNAMC / SRNAMT
* ..
* .. Data statements ..
DATA ISEED / 1988, 1989, 1990, 1991 /
*
* TEST MATRICES WITH HALF OF MATRIX BEING ZEROS
*
TESTZEROS = .FALSE.
*
EPS = DLAMCH( 'Epsilon' )
K = MIN( M, N )
L = MAX( M, N, 1)
*
* Dynamically allocate local arrays
*
ALLOCATE ( A(M,N), AF(M,N), Q(L,L), R(M,L), RWORK(L),
$ C(M,N), CF(M,N),
$ D(N,M), DF(N,M) )
*
* Put random numbers into A and copy to AF
*
DO J = 1, N
CALL ZLARNV( 2, ISEED, M, A( 1, J ) )
END DO
IF( TESTZEROS ) THEN
IF( M.GE.4 ) THEN
DO J = 1, N
CALL ZLARNV( 2, ISEED, M/2, A( M/4, J ) )
END DO
END IF
END IF
CALL ZLACPY( 'Full', M, N, A, M, AF, M )
*
* Number of row blocks in ZLATSQR
*
NRB = MAX( 1, CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) )
*
ALLOCATE ( T1( NB1, N * NRB ) )
ALLOCATE ( T2( NB2, N ) )
ALLOCATE ( DIAG( N ) )
*
* Begin determine LWORK for the array WORK and allocate memory.
*
* ZGEMQRT requires NB2 to be bounded by N.
*
NB2_UB = MIN( NB2, N)
*
*
CALL ZGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2,
$ WORKQUERY, -1, INFO )
*
LWORK = INT( WORKQUERY( 1 ) )
*
* In ZGEMQRT, WORK is N*NB2_UB if SIDE = 'L',
* or M*NB2_UB if SIDE = 'R'.
*
LWORK = MAX( LWORK, NB2_UB * N, NB2_UB * M )
*
ALLOCATE ( WORK( LWORK ) )
*
* End allocate memory for WORK.
*
*
* Begin Householder reconstruction routines
*
* Factor the matrix A in the array AF.
*
SRNAMT = 'ZGETSQRHRT'
CALL ZGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2,
$ WORK, LWORK, INFO )
*
* End Householder reconstruction routines.
*
*
* Generate the m-by-m matrix Q
*
CALL ZLASET( 'Full', M, M, CZERO, CONE, Q, M )
*
SRNAMT = 'ZGEMQRT'
CALL ZGEMQRT( 'L', 'N', M, M, K, NB2_UB, AF, M, T2, NB2, Q, M,
$ WORK, INFO )
*
* Copy R
*
CALL ZLASET( 'Full', M, N, CZERO, CZERO, R, M )
*
CALL ZLACPY( 'Upper', M, N, AF, M, R, M )
*
* TEST 1
* Compute |R - (Q**T)*A| / ( eps * m * |A| ) and store in RESULT(1)
*
CALL ZGEMM( 'C', 'N', M, N, M, -CONE, Q, M, A, M, CONE, R, M )
*
ANORM = ZLANGE( '1', M, N, A, M, RWORK )
RESID = ZLANGE( '1', M, N, R, M, RWORK )
IF( ANORM.GT.ZERO ) THEN
RESULT( 1 ) = RESID / ( EPS * MAX( 1, M ) * ANORM )
ELSE
RESULT( 1 ) = ZERO
END IF
*
* TEST 2
* Compute |I - (Q**T)*Q| / ( eps * m ) and store in RESULT(2)
*
CALL ZLASET( 'Full', M, M, CZERO, CONE, R, M )
CALL ZHERK( 'U', 'C', M, M, -CONE, Q, M, CONE, R, M )
RESID = ZLANSY( '1', 'Upper', M, R, M, RWORK )
RESULT( 2 ) = RESID / ( EPS * MAX( 1, M ) )
*
* Generate random m-by-n matrix C
*
DO J = 1, N
CALL ZLARNV( 2, ISEED, M, C( 1, J ) )
END DO
CNORM = ZLANGE( '1', M, N, C, M, RWORK )
CALL ZLACPY( 'Full', M, N, C, M, CF, M )
*
* Apply Q to C as Q*C = CF
*
SRNAMT = 'ZGEMQRT'
CALL ZGEMQRT( 'L', 'N', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M,
$ WORK, INFO )
*
* TEST 3
* Compute |CF - Q*C| / ( eps * m * |C| )
*
CALL ZGEMM( 'N', 'N', M, N, M, -CONE, Q, M, C, M, CONE, CF, M )
RESID = ZLANGE( '1', M, N, CF, M, RWORK )
IF( CNORM.GT.ZERO ) THEN
RESULT( 3 ) = RESID / ( EPS * MAX( 1, M ) * CNORM )
ELSE
RESULT( 3 ) = ZERO
END IF
*
* Copy C into CF again
*
CALL ZLACPY( 'Full', M, N, C, M, CF, M )
*
* Apply Q to C as (Q**T)*C = CF
*
SRNAMT = 'ZGEMQRT'
CALL ZGEMQRT( 'L', 'C', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M,
$ WORK, INFO )
*
* TEST 4
* Compute |CF - (Q**T)*C| / ( eps * m * |C|)
*
CALL ZGEMM( 'C', 'N', M, N, M, -CONE, Q, M, C, M, CONE, CF, M )
RESID = ZLANGE( '1', M, N, CF, M, RWORK )
IF( CNORM.GT.ZERO ) THEN
RESULT( 4 ) = RESID / ( EPS * MAX( 1, M ) * CNORM )
ELSE
RESULT( 4 ) = ZERO
END IF
*
* Generate random n-by-m matrix D and a copy DF
*
DO J = 1, M
CALL ZLARNV( 2, ISEED, N, D( 1, J ) )
END DO
DNORM = ZLANGE( '1', N, M, D, N, RWORK )
CALL ZLACPY( 'Full', N, M, D, N, DF, N )
*
* Apply Q to D as D*Q = DF
*
SRNAMT = 'ZGEMQRT'
CALL ZGEMQRT( 'R', 'N', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N,
$ WORK, INFO )
*
* TEST 5
* Compute |DF - D*Q| / ( eps * m * |D| )
*
CALL ZGEMM( 'N', 'N', N, M, M, -CONE, D, N, Q, M, CONE, DF, N )
RESID = ZLANGE( '1', N, M, DF, N, RWORK )
IF( DNORM.GT.ZERO ) THEN
RESULT( 5 ) = RESID / ( EPS * MAX( 1, M ) * DNORM )
ELSE
RESULT( 5 ) = ZERO
END IF
*
* Copy D into DF again
*
CALL ZLACPY( 'Full', N, M, D, N, DF, N )
*
* Apply Q to D as D*QT = DF
*
SRNAMT = 'ZGEMQRT'
CALL ZGEMQRT( 'R', 'C', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N,
$ WORK, INFO )
*
* TEST 6
* Compute |DF - D*(Q**T)| / ( eps * m * |D| )
*
CALL ZGEMM( 'N', 'C', N, M, M, -CONE, D, N, Q, M, CONE, DF, N )
RESID = ZLANGE( '1', N, M, DF, N, RWORK )
IF( DNORM.GT.ZERO ) THEN
RESULT( 6 ) = RESID / ( EPS * MAX( 1, M ) * DNORM )
ELSE
RESULT( 6 ) = ZERO
END IF
*
* Deallocate all arrays
*
DEALLOCATE ( A, AF, Q, R, RWORK, WORK, T1, T2, DIAG,
$ C, D, CF, DF )
*
RETURN
*
* End of ZUNHR_COL02
*
END