diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 73f2592ef..0e45d4c63 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -66,7 +66,7 @@ set(SLASRC slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f - slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f + slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f slarrv.f slartv.f slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f @@ -112,14 +112,14 @@ set(SLASRC sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f stpqrt.f stpqrt2.f stpmqrt.f stprfb.f sgelqt.f sgelqt3.f sgemlqt.f - sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f + sgetsls.f sgetsqrhrt.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f sgelq.f slaswlq.f slamswlq.f sgemlq.f stplqt.f stplqt2.f stpmlqt.f ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f sgesvdq.f slaorhr_col_getrfnp.f - slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f ) + slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f ) set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f @@ -171,7 +171,7 @@ set(CLASRC claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f - clarf.f clarfb.f clarfg.f clarfgp.f clarft.f + clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f @@ -209,14 +209,14 @@ set(CLASRC cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cgelqt.f cgelqt3.f cgemlqt.f - cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f + cgetsls.f cgetsqrhrt.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f cgelq.f claswlq.f clamswlq.f cgemlq.f ctplqt.f ctplqt2.f ctpmlqt.f chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f - cungtsqr.f cunhr_col.f ) + cungtsqr.f cungtsqr_row.f cunhr_col.f ) set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f @@ -253,7 +253,7 @@ set(DLASRC dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f - dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f + dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f dlargv.f dlarrv.f dlartv.f dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f @@ -300,14 +300,14 @@ set(DLASRC dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dgelqt.f dgelqt3.f dgemlqt.f - dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f + dgetsls.f dgetsqrhrt.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f dgelq.f dlaswlq.f dlamswlq.f dgemlq.f dtplqt.f dtplqt2.f dtpmlqt.f dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f - dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f ) + dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f ) set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f @@ -360,7 +360,7 @@ set(ZLASRC zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f - zlarcm.f zlarf.f zlarfb.f + zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f zlarfg.f zlarfgp.f zlarft.f zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f @@ -402,13 +402,13 @@ set(ZLASRC ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f ztplqt.f ztplqt2.f ztpmlqt.f zgelqt.f zgelqt3.f zgemlqt.f - zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f + zgetsls.f zgetsqrhrt.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f zgelq.f zlaswlq.f zlamswlq.f zgemlq.f zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f - zungtsqr.f zunhr_col.f) + zungtsqr.f zungtsqr_row.f zunhr_col.f) set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index 54a583887..340ea6d6c 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -114,6 +114,8 @@ set(CSRC lapacke_cgetrs_work.c lapacke_cgetsls.c lapacke_cgetsls_work.c + lapacke_cgetsqrhrt.c + lapacke_cgetsqrhrt_work.c lapacke_cggbak.c lapacke_cggbak_work.c lapacke_cggbal.c @@ -590,6 +592,8 @@ set(CSRC lapacke_cungrq_work.c lapacke_cungtr.c lapacke_cungtr_work.c + lapacke_cungtsqr_row.c + lapacke_cungtsqr_row_work.c lapacke_cunmbr.c lapacke_cunmbr_work.c lapacke_cunmhr.c @@ -735,6 +739,8 @@ set(DSRC lapacke_dgetrs_work.c lapacke_dgetsls.c lapacke_dgetsls_work.c + lapacke_dgetsqrhrt.c + lapacke_dgetsqrhrt_work.c lapacke_dggbak.c lapacke_dggbak_work.c lapacke_dggbal.c @@ -862,6 +868,8 @@ set(DSRC lapacke_dorgrq_work.c lapacke_dorgtr.c lapacke_dorgtr_work.c + lapacke_dorgtsqr_row.c + lapacke_dorgtsqr_row_work.c lapacke_dormbr.c lapacke_dormbr_work.c lapacke_dormhr.c @@ -1309,6 +1317,8 @@ set(SSRC lapacke_sgetrs_work.c lapacke_sgetsls.c lapacke_sgetsls_work.c + lapacke_sgetsqrhrt.c + lapacke_sgetsqrhrt_work.c lapacke_sggbak.c lapacke_sggbak_work.c lapacke_sggbal.c @@ -1435,6 +1445,8 @@ set(SSRC lapacke_sorgrq_work.c lapacke_sorgtr.c lapacke_sorgtr_work.c + lapacke_sorgtsqr_row.c + lapacke_sorgtsqr_row_work.c lapacke_sormbr.c lapacke_sormbr_work.c lapacke_sormhr.c @@ -1877,6 +1889,8 @@ set(ZSRC lapacke_zgetrs_work.c lapacke_zgetsls.c lapacke_zgetsls_work.c + lapacke_zgetsqrhrt.c + lapacke_zgetsqrhrt_work.c lapacke_zggbak.c lapacke_zggbak_work.c lapacke_zggbal.c @@ -2351,6 +2365,8 @@ set(ZSRC lapacke_zungrq_work.c lapacke_zungtr.c lapacke_zungtr_work.c + lapacke_zungtsqr_row.c + lapacke_zungtsqr_row_work.c lapacke_zunmbr.c lapacke_zunmbr_work.c lapacke_zunmhr.c diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h index 341efabda..ada1944b2 100644 --- a/lapack-netlib/LAPACKE/include/lapack.h +++ b/lapack-netlib/LAPACKE/include/lapack.h @@ -2941,6 +2941,42 @@ void LAPACK_zgetsls( lapack_complex_double* work, lapack_int const* lwork, lapack_int* info ); +#define LAPACK_cgetsqrhrt LAPACK_GLOBAL(cgetsqrhrt,CGETSQRHRT) +void LAPACK_cgetsqrhrt( + lapack_int const* m, lapack_int const* n, + lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2, + lapack_complex_float* A, lapack_int const* lda, + lapack_complex_float* T, lapack_int const* ldt, + lapack_complex_float* work, lapack_int const* lwork, + lapack_int* info ); + +#define LAPACK_dgetsqrhrt LAPACK_GLOBAL(dgetsqrhrt,DGETSQRHRT) +void LAPACK_dgetsqrhrt( + lapack_int const* m, lapack_int const* n, + lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2, + double* A, lapack_int const* lda, + double* T, lapack_int const* ldt, + double* work, lapack_int const* lwork, + lapack_int* info ); + +#define LAPACK_sgetsqrhrt LAPACK_GLOBAL(sgetsqrhrt,SGETSQRHRT) +void LAPACK_sgetsqrhrt( + lapack_int const* m, lapack_int const* n, + lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2, + float* A, lapack_int const* lda, + float* T, lapack_int const* ldt, + float* work, lapack_int const* lwork, + lapack_int* info ); + +#define LAPACK_zgetsqrhrt LAPACK_GLOBAL(zgetsqrhrt,ZGETSQRHRT) +void LAPACK_zgetsqrhrt( + lapack_int const* m, lapack_int const* n, + lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2, + lapack_complex_double* A, lapack_int const* lda, + lapack_complex_double* T, lapack_int const* ldt, + lapack_complex_double* work, lapack_int const* lwork, + lapack_int* info ); + #define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK) void LAPACK_cggbak( char const* job, char const* side, @@ -7251,6 +7287,24 @@ void LAPACK_sorgtr( float* work, lapack_int const* lwork, lapack_int* info ); +#define LAPACK_dorgtsqr_row LAPACK_GLOBAL(dorgtsqr_row,DORGTSQR_ROW) +void LAPACK_dorgtsqr_row( + lapack_int const* m, lapack_int const* n, + lapack_int const* mb, lapack_int const* nb, + double* A, lapack_int const* lda, + double const* T, lapack_int const* ldt, + double* work, lapack_int const* lwork, + lapack_int* info ); + +#define LAPACK_sorgtsqr_row LAPACK_GLOBAL(sorgtsqr_row,SORGTSQR_ROW) +void LAPACK_sorgtsqr_row( + lapack_int const* m, lapack_int const* n, + lapack_int const* mb, lapack_int const* nb, + float* A, lapack_int const* lda, + float const* T, lapack_int const* ldt, + float* work, lapack_int const* lwork, + lapack_int* info ); + #define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR) void LAPACK_dormbr( char const* vect, char const* side, char const* trans, @@ -13540,6 +13594,24 @@ void LAPACK_zungtr( lapack_complex_double* work, lapack_int const* lwork, lapack_int* info ); +#define LAPACK_cungtsqr_row LAPACK_GLOBAL(cungtsqr_row,CUNGTSQR_ROW) +void LAPACK_cungtsqr_row( + lapack_int const* m, lapack_int const* n, + lapack_int const* mb, lapack_int const* nb, + lapack_complex_float* A, lapack_int const* lda, + lapack_complex_float const* T, lapack_int const* ldt, + lapack_complex_float* work, lapack_int const* lwork, + lapack_int* info ); + +#define LAPACK_zungtsqr_row LAPACK_GLOBAL(zungtsqr_row,ZUNGTSQR_ROW) +void LAPACK_zungtsqr_row( + lapack_int const* m, lapack_int const* n, + lapack_int const* mb, lapack_int const* nb, + lapack_complex_double* A, lapack_int const* lda, + lapack_complex_double const* T, lapack_int const* ldt, + lapack_complex_double* work, lapack_int const* lwork, + lapack_int* info ); + #define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR) void LAPACK_cunmbr( char const* vect, char const* side, char const* trans, diff --git a/lapack-netlib/LAPACKE/include/lapacke.h b/lapack-netlib/LAPACKE/include/lapacke.h index b280dde0a..5c129db91 100644 --- a/lapack-netlib/LAPACKE/include/lapacke.h +++ b/lapack-netlib/LAPACKE/include/lapacke.h @@ -2598,6 +2598,15 @@ lapack_int LAPACKE_sorgtr( int matrix_layout, char uplo, lapack_int n, float* a, lapack_int LAPACKE_dorgtr( int matrix_layout, char uplo, lapack_int n, double* a, lapack_int lda, const double* tau ); +lapack_int LAPACKE_sorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + float* a, lapack_int lda, + const float* t, lapack_int ldt ); +lapack_int LAPACKE_dorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + double* a, lapack_int lda, + const double* t, lapack_int ldt ); + lapack_int LAPACKE_sormbr( int matrix_layout, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, @@ -4577,6 +4586,15 @@ lapack_int LAPACKE_zungtr( int matrix_layout, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau ); +lapack_int LAPACKE_cungtsqr_row( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_zungtsqr_row( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* t, lapack_int ldt ); + lapack_int LAPACKE_cunmbr( int matrix_layout, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, @@ -7880,6 +7898,19 @@ lapack_int LAPACKE_dorgtr_work( int matrix_layout, char uplo, lapack_int n, double* a, lapack_int lda, const double* tau, double* work, lapack_int lwork ); +lapack_int LAPACKE_sorgtsqr_row_work( int matrix_layout, + lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + float* a, lapack_int lda, + const float* t, lapack_int ldt, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dorgtsqr_row_work( int matrix_layout, + lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + double* a, lapack_int lda, + const double* t, lapack_int ldt, + double* work, lapack_int lwork ); + lapack_int LAPACKE_sormbr_work( int matrix_layout, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, @@ -10281,6 +10312,19 @@ lapack_int LAPACKE_zungtr_work( int matrix_layout, char uplo, lapack_int n, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); +lapack_int LAPACKE_cungtsqr_row_work( int matrix_layout, + lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zungtsqr_row_work( int matrix_layout, + lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* work, lapack_int lwork ); + lapack_int LAPACKE_cunmbr_work( int matrix_layout, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, @@ -12026,6 +12070,44 @@ lapack_int LAPACKE_zgetsls_work( int matrix_layout, char trans, lapack_int m, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work, lapack_int lwork ); +lapack_int LAPACKE_sgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + float* a, lapack_int lda, + float* t, lapack_int ldt ); +lapack_int LAPACKE_dgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + double* a, lapack_int lda, + double* t, lapack_int ldt ); +lapack_int LAPACKE_cgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int ldt ); +lapack_int LAPACKE_zgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int ldt ); + +lapack_int LAPACKE_sgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + float* a, lapack_int lda, + float* t, lapack_int ldt, + float* work, lapack_int lwork ); +lapack_int LAPACKE_dgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + double* a, lapack_int lda, + double* t, lapack_int ldt, + double* work, lapack_int lwork ); +lapack_int LAPACKE_cgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* work, lapack_int lwork ); +lapack_int LAPACKE_zgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* work, lapack_int lwork ); + lapack_int LAPACKE_ssyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* w ); lapack_int LAPACKE_dsyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n, diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index a602dd7a0..7f827e1c9 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -162,6 +162,8 @@ lapacke_cgetrs.o \ lapacke_cgetrs_work.o \ lapacke_cgetsls.o \ lapacke_cgetsls_work.o \ +lapacke_cgetsqrhrt.o \ +lapacke_cgetsqrhrt_work.o \ lapacke_cggbak.o \ lapacke_cggbak_work.o \ lapacke_cggbal.o \ @@ -634,6 +636,8 @@ lapacke_cungrq.o \ lapacke_cungrq_work.o \ lapacke_cungtr.o \ lapacke_cungtr_work.o \ +lapacke_cungtsqr_row.o \ +lapacke_cungtsqr_row_work.o \ lapacke_cunmbr.o \ lapacke_cunmbr_work.o \ lapacke_cunmhr.o \ @@ -778,6 +782,8 @@ lapacke_dgetrs.o \ lapacke_dgetrs_work.o \ lapacke_dgetsls.o \ lapacke_dgetsls_work.o \ +lapacke_dgetsqrhrt.o \ +lapacke_dgetsqrhrt_work.o \ lapacke_dggbak.o \ lapacke_dggbak_work.o \ lapacke_dggbal.o \ @@ -900,6 +906,8 @@ lapacke_dorgrq.o \ lapacke_dorgrq_work.o \ lapacke_dorgtr.o \ lapacke_dorgtr_work.o \ +lapacke_dorgtsqr_row.o \ +lapacke_dorgtsqr_row_work.o \ lapacke_dormbr.o \ lapacke_dormbr_work.o \ lapacke_dormhr.o \ @@ -1348,6 +1356,8 @@ lapacke_sgetrs.o \ lapacke_sgetrs_work.o \ lapacke_sgetsls.o \ lapacke_sgetsls_work.o \ +lapacke_sgetsqrhrt.o \ +lapacke_sgetsqrhrt_work.o \ lapacke_sggbak.o \ lapacke_sggbak_work.o \ lapacke_sggbal.o \ @@ -1468,6 +1478,8 @@ lapacke_sorgrq.o \ lapacke_sorgrq_work.o \ lapacke_sorgtr.o \ lapacke_sorgtr_work.o \ +lapacke_sorgtsqr_row.o \ +lapacke_sorgtsqr_row_work.o \ lapacke_sormbr.o \ lapacke_sormbr_work.o \ lapacke_sormhr.o \ @@ -1908,6 +1920,8 @@ lapacke_zgetrs.o \ lapacke_zgetrs_work.o \ lapacke_zgetsls.o \ lapacke_zgetsls_work.o \ +lapacke_zgetsqrhrt.o \ +lapacke_zgetsqrhrt_work.o \ lapacke_zggbak.o \ lapacke_zggbak_work.o \ lapacke_zggbal.o \ @@ -2380,6 +2394,8 @@ lapacke_zungrq.o \ lapacke_zungrq_work.o \ lapacke_zungtr.o \ lapacke_zungtr_work.o \ +lapacke_zungtsqr_row.o \ +lapacke_zungtsqr_row_work.o \ lapacke_zunmbr.o \ lapacke_zunmbr_work.o \ lapacke_zunmhr.o \ diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgetsqrhrt.c b/lapack-netlib/LAPACKE/src/lapacke_cgetsqrhrt.c new file mode 100644 index 000000000..0e67e0b83 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_cgetsqrhrt.c @@ -0,0 +1,80 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function cgetsqrhrt +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int ldt ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_float* work = NULL; + lapack_complex_float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_cgetsqrhrt", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_cgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2, + a, lda, t, ldt, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_cgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2, + a, lda, t, ldt, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cgetsqrhrt", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgetsqrhrt_work.c b/lapack-netlib/LAPACKE/src/lapacke_cgetsqrhrt_work.c new file mode 100644 index 000000000..598f193e6 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_cgetsqrhrt_work.c @@ -0,0 +1,108 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function cgetsqrhrt +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + lapack_complex_float* a, lapack_int lda, + lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,m); + lapack_complex_float* a_t = NULL; + lapack_int ldt_t = MAX(1,nb2); + lapack_complex_float* t_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info ); + return info; + } + if( ldt < n ) { + info = -10; + LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t, + work, &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + t_t = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,n) ); + if( t_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + LAPACKE_cge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt ); + /* Release memory and exit */ + LAPACKE_free( t_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_cungtsqr_row.c b/lapack-netlib/LAPACKE/src/lapacke_cungtsqr_row.c new file mode 100644 index 000000000..bb551fcbc --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_cungtsqr_row.c @@ -0,0 +1,83 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function cungtsqr_row +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cungtsqr_row( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* t, lapack_int ldt ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_float* work = NULL; + lapack_complex_float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_cungtsqr_row", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_cge_nancheck( matrix_layout, nb, n, t, ldt ) ) { + return -8; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_cungtsqr_row_work( matrix_layout, m, n, mb, nb, + a, lda, t, ldt, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_cungtsqr_row_work( matrix_layout, m, n, mb, nb, + a, lda, t, ldt, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cungtsqr_row", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_cungtsqr_row_work.c b/lapack-netlib/LAPACKE/src/lapacke_cungtsqr_row_work.c new file mode 100644 index 000000000..96b18ab13 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_cungtsqr_row_work.c @@ -0,0 +1,109 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function cungtsqr_row +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cungtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + lapack_complex_float* a, lapack_int lda, + const lapack_complex_float* t, lapack_int ldt, + lapack_complex_float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if (matrix_layout == LAPACK_COL_MAJOR) { + /* Call LAPACK function and adjust info */ + LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt, + work, &lwork, &info); + if (info < 0) { + info = info - 1; + } + } else if (matrix_layout == LAPACK_ROW_MAJOR) { + lapack_int lda_t = MAX(1,m); + lapack_complex_float* a_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -7; + LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info ); + return info; + } + lapack_int ldt_t = MAX(1,nb); + lapack_complex_float* t_t = NULL; + /* Check leading dimension(s) */ + if( ldt < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t, + work, &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + t_t = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,n) ); + if( t_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t ); + /* Call LAPACK function and adjust info */ + LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + /* Release memory and exit */ + LAPACKE_free( t_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgetsqrhrt.c b/lapack-netlib/LAPACKE/src/lapacke_dgetsqrhrt.c new file mode 100644 index 000000000..cf0e3200c --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dgetsqrhrt.c @@ -0,0 +1,79 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dgetsqrhrt +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + double* a, lapack_int lda, + double* t, lapack_int ldt ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + double* work = NULL; + double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dgetsqrhrt", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_dgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2, + a, lda, t, ldt, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int)work_query; + /* Allocate memory for work arrays */ + work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_dgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2, + a, lda, t, ldt, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dgetsqrhrt", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgetsqrhrt_work.c b/lapack-netlib/LAPACKE/src/lapacke_dgetsqrhrt_work.c new file mode 100644 index 000000000..f91887ffe --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dgetsqrhrt_work.c @@ -0,0 +1,106 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dgetsqrhrt +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + double* a, lapack_int lda, + double* t, lapack_int ldt, + double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,m); + double* a_t = NULL; + lapack_int ldt_t = MAX(1,nb2); + double* t_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info ); + return info; + } + if( ldt < n ) { + info = -10; + LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t, + work, &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,n) ); + if( t_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + LAPACKE_dge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt ); + /* Release memory and exit */ + LAPACKE_free( t_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorgtsqr_row.c b/lapack-netlib/LAPACKE/src/lapacke_dorgtsqr_row.c new file mode 100644 index 000000000..1da3405a8 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dorgtsqr_row.c @@ -0,0 +1,82 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dorgtsqr_row +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + double* a, lapack_int lda, + const double* t, lapack_int ldt ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + double* work = NULL; + double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dorgtsqr_row", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_dge_nancheck( matrix_layout, nb, n, t, ldt ) ) { + return -8; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_dorgtsqr_row_work( matrix_layout, m, n, mb, nb, + a, lda, t, ldt, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int)work_query; + /* Allocate memory for work arrays */ + work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_dorgtsqr_row_work( matrix_layout, m, n, mb, nb, + a, lda, t, ldt, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dorgtsqr_row", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_dorgtsqr_row_work.c b/lapack-netlib/LAPACKE/src/lapacke_dorgtsqr_row_work.c new file mode 100644 index 000000000..e16467f3a --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_dorgtsqr_row_work.c @@ -0,0 +1,108 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dorgtsqr_row +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dorgtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + double* a, lapack_int lda, + const double* t, lapack_int ldt, + double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if (matrix_layout == LAPACK_COL_MAJOR) { + /* Call LAPACK function and adjust info */ + LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt, + work, &lwork, &info); + if (info < 0) { + info = info - 1; + } + } else if (matrix_layout == LAPACK_ROW_MAJOR) { + lapack_int lda_t = MAX(1,m); + double* a_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -7; + LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info ); + return info; + } + lapack_int ldt_t = MAX(1,nb); + double* t_t = NULL; + /* Check leading dimension(s) */ + if( ldt < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t, + work, &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,n) ); + if( t_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + LAPACKE_dge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + + /* Release memory and exit */ + LAPACKE_free( t_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgetsqrhrt.c b/lapack-netlib/LAPACKE/src/lapacke_sgetsqrhrt.c new file mode 100644 index 000000000..759afce48 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_sgetsqrhrt.c @@ -0,0 +1,79 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function sgetsqrhrt +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + float* a, lapack_int lda, + float* t, lapack_int ldt ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + float* work = NULL; + float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_sgetsqrhrt", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_sgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2, + a, lda, t, ldt, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int)work_query; + /* Allocate memory for work arrays */ + work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_sgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2, + a, lda, t, ldt, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sgetsqrhrt", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgetsqrhrt_work.c b/lapack-netlib/LAPACKE/src/lapacke_sgetsqrhrt_work.c new file mode 100644 index 000000000..40193008d --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_sgetsqrhrt_work.c @@ -0,0 +1,106 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function sgetsqrhrt +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + float* a, lapack_int lda, + float* t, lapack_int ldt, + float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_sgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,m); + float* a_t = NULL; + lapack_int ldt_t = MAX(1,nb2); + float* t_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_sgetsqrhrt_work", info ); + return info; + } + if( ldt < n ) { + info = -10; + LAPACKE_xerbla( "LAPACKE_sgetsqrhrt_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_sgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t, + work, &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + t_t = (float*)LAPACKE_malloc( sizeof(float) * ldt_t * MAX(1,n) ); + if( t_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_sgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + LAPACKE_sge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt ); + /* Release memory and exit */ + LAPACKE_free( t_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sgetsqrhrt_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_sgetsqrhrt_work", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorgtsqr_row.c b/lapack-netlib/LAPACKE/src/lapacke_sorgtsqr_row.c new file mode 100644 index 000000000..350783a78 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_sorgtsqr_row.c @@ -0,0 +1,82 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function sorgtsqr_row +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + float* a, lapack_int lda, + const float* t, lapack_int ldt ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + float* work = NULL; + float work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_sorgtsqr_row", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_sge_nancheck( matrix_layout, nb, n, t, ldt ) ) { + return -8; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_sorgtsqr_row_work( matrix_layout, m, n, mb, nb, + a, lda, t, ldt, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int)work_query; + /* Allocate memory for work arrays */ + work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_sorgtsqr_row_work( matrix_layout, m, n, mb, nb, + a, lda, t, ldt, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sorgtsqr_row", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_sorgtsqr_row_work.c b/lapack-netlib/LAPACKE/src/lapacke_sorgtsqr_row_work.c new file mode 100644 index 000000000..a66f70b52 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_sorgtsqr_row_work.c @@ -0,0 +1,108 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function sorgtsqr_row +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sorgtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + float* a, lapack_int lda, + const float* t, lapack_int ldt, + float* work, lapack_int lwork ) +{ + lapack_int info = 0; + if (matrix_layout == LAPACK_COL_MAJOR) { + /* Call LAPACK function and adjust info */ + LAPACK_sorgtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt, + work, &lwork, &info); + if (info < 0) { + info = info - 1; + } + } else if (matrix_layout == LAPACK_ROW_MAJOR) { + lapack_int lda_t = MAX(1,m); + float* a_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -7; + LAPACKE_xerbla( "LAPACKE_sorgtsqr_row_work", info ); + return info; + } + lapack_int ldt_t = MAX(1,nb); + float* t_t = NULL; + /* Check leading dimension(s) */ + if( ldt < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_sorgtsqr_row_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_sorgtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t, + work, &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + t_t = (float*)LAPACKE_malloc( sizeof(float) * ldt_t * MAX(1,n) ); + if( t_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + LAPACKE_sge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t ); + /* Call LAPACK function and adjust info */ + LAPACK_sorgtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + + /* Release memory and exit */ + LAPACKE_free( t_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sorgtsqr_row_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_sorgtsqr_row_work", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgetsqrhrt.c b/lapack-netlib/LAPACKE/src/lapacke_zgetsqrhrt.c new file mode 100644 index 000000000..53557c92d --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zgetsqrhrt.c @@ -0,0 +1,80 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zgetsqrhrt +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int ldt ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_double* work = NULL; + lapack_complex_double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zgetsqrhrt", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -7; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2, + a, lda, t, ldt, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_zgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2, + a, lda, t, ldt, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zgetsqrhrt", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgetsqrhrt_work.c b/lapack-netlib/LAPACKE/src/lapacke_zgetsqrhrt_work.c new file mode 100644 index 000000000..a6825df56 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zgetsqrhrt_work.c @@ -0,0 +1,108 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zgetsqrhrt +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb1, lapack_int nb1, lapack_int nb2, + lapack_complex_double* a, lapack_int lda, + lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int lda_t = MAX(1,m); + lapack_complex_double* a_t = NULL; + lapack_int ldt_t = MAX(1,nb2); + lapack_complex_double* t_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -8; + LAPACKE_xerbla( "LAPACKE_zgetsqrhrt_work", info ); + return info; + } + if( ldt < n ) { + info = -10; + LAPACKE_xerbla( "LAPACKE_zgetsqrhrt_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_zgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t, + work, &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + t_t = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * ldt_t * MAX(1,n) ); + if( t_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + LAPACKE_zge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt ); + /* Release memory and exit */ + LAPACKE_free( t_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zgetsqrhrt_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zgetsqrhrt_work", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_zungtsqr_row.c b/lapack-netlib/LAPACKE/src/lapacke_zungtsqr_row.c new file mode 100644 index 000000000..71418fb84 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zungtsqr_row.c @@ -0,0 +1,83 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zungtsqr_row +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zungtsqr_row( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* t, lapack_int ldt ) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_complex_double* work = NULL; + lapack_complex_double work_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zungtsqr_row", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) { + return -6; + } + if( LAPACKE_zge_nancheck( matrix_layout, nb, n, t, ldt ) ) { + return -8; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zungtsqr_row_work( matrix_layout, m, n, mb, nb, + a, lda, t, ldt, &work_query, lwork ); + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + /* Allocate memory for work arrays */ + work = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + /* Call middle-level interface */ + info = LAPACKE_zungtsqr_row_work( matrix_layout, m, n, mb, nb, + a, lda, t, ldt, work, lwork ); + /* Release memory and exit */ + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zungtsqr_row", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/LAPACKE/src/lapacke_zungtsqr_row_work.c b/lapack-netlib/LAPACKE/src/lapacke_zungtsqr_row_work.c new file mode 100644 index 000000000..909855864 --- /dev/null +++ b/lapack-netlib/LAPACKE/src/lapacke_zungtsqr_row_work.c @@ -0,0 +1,109 @@ +/***************************************************************************** + Copyright (c) 2020, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zungtsqr_row +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zungtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n, + lapack_int mb, lapack_int nb, + lapack_complex_double* a, lapack_int lda, + const lapack_complex_double* t, lapack_int ldt, + lapack_complex_double* work, lapack_int lwork ) +{ + lapack_int info = 0; + if (matrix_layout == LAPACK_COL_MAJOR) { + /* Call LAPACK function and adjust info */ + LAPACK_zungtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt, + work, &lwork, &info); + if (info < 0) { + info = info - 1; + } + } else if (matrix_layout == LAPACK_ROW_MAJOR) { + lapack_int lda_t = MAX(1,m); + lapack_complex_double* a_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -7; + LAPACKE_xerbla( "LAPACKE_zungtsqr_row_work", info ); + return info; + } + lapack_int ldt_t = MAX(1,nb); + lapack_complex_double* t_t = NULL; + /* Check leading dimension(s) */ + if( ldt < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_zungtsqr_row_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_zungtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t, + work, &lwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + t_t = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * ldt_t * MAX(1,n) ); + if( t_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + /* Transpose input matrices */ + LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zungtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t, + work, &lwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + /* Release memory and exit */ + LAPACKE_free( t_t ); +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zungtsqr_row_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zungtsqr_row_work", info ); + } + return info; +} \ No newline at end of file diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 83baac875..d1ee96667 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -135,14 +135,14 @@ SLASRC_O = \ slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \ slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \ slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \ - slarf.o slarfb.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \ + slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \ slarrv.o slartv.o \ slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \ slasyf_rk.o \ slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o \ slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \ sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \ - sorgrq.o sorgtr.o sorgtsqr.o sorm2l.o sorm2r.o sorm22.o \ + sorgrq.o sorgtr.o sorgtsqr.o sorgtsqr_row.o sorm2l.o sorm2r.o sorm22.o \ sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \ sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \ spbstf.o spbsv.o spbsvx.o \ @@ -181,7 +181,7 @@ SLASRC_O = \ sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \ stpqrt.o stpqrt2.o stpmqrt.o stprfb.o \ sgelqt.o sgelqt3.o sgemlqt.o \ - sgetsls.o sgeqr.o slatsqr.o slamtsqr.o sgemqr.o \ + sgetsls.o sgetsqrhrt.o sgeqr.o slatsqr.o slamtsqr.o sgemqr.o \ sgelq.o slaswlq.o slamswlq.o sgemlq.o \ stplqt.o stplqt2.o stpmlqt.o \ sorhr_col.o slaorhr_col_getrfnp.o slaorhr_col_getrfnp2.o \ @@ -250,7 +250,7 @@ CLASRC_O = \ claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \ claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \ claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \ - clarf.o clarfb.o clarfg.o clarft.o clarfgp.o \ + clarf.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarfgp.o \ clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \ clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \ claswp.o clasyf.o clasyf_rook.o clasyf_rk.o clasyf_aa.o \ @@ -278,7 +278,7 @@ CLASRC_O = \ ctptrs.o ctrcon.o ctrevc.o ctrevc3.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \ ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrzf.o cung2l.o cung2r.o \ cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \ - cungrq.o cungtr.o cungtsqr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o cunm22.o \ + cungrq.o cungtr.o cungtsqr.o cungtsqr_row.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o cunm22.o \ cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \ cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o \ chfrk.o ctfttp.o clanhf.o cpftrf.o cpftri.o cpftrs.o ctfsm.o ctftri.o \ @@ -289,7 +289,7 @@ CLASRC_O = \ cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \ ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o \ cgelqt.o cgelqt3.o cgemlqt.o \ - cgetsls.o cgeqr.o clatsqr.o clamtsqr.o cgemqr.o \ + cgetsls.o cgetsqrhrt.o cgeqr.o clatsqr.o clamtsqr.o cgemqr.o \ cgelq.o claswlq.o clamswlq.o cgemlq.o \ ctplqt.o ctplqt2.o ctpmlqt.o \ cunhr_col.o claunhr_col_getrfnp.o claunhr_col_getrfnp2.o \ @@ -342,14 +342,14 @@ DLASRC_O = \ dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \ dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ - dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \ + dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \ dlargv.o dlarrv.o dlartv.o \ dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \ dlasyf.o dlasyf_rook.o dlasyf_rk.o \ dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlauu2.o \ dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \ dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \ - dorgrq.o dorgtr.o dorgtsqr.o dorm2l.o dorm2r.o dorm22.o \ + dorgrq.o dorgtr.o dorgtsqr.o dorgtsqr_row.o dorm2l.o dorm2r.o dorm22.o \ dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \ dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \ dpbstf.o dpbsv.o dpbsvx.o \ @@ -389,7 +389,7 @@ DLASRC_O = \ dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \ dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o \ dgelqt.o dgelqt3.o dgemlqt.o \ - dgetsls.o dgeqr.o dlatsqr.o dlamtsqr.o dgemqr.o \ + dgetsls.o dgetsqrhrt.o dgeqr.o dlatsqr.o dlamtsqr.o dgemqr.o \ dgelq.o dlaswlq.o dlamswlq.o dgemlq.o \ dtplqt.o dtplqt2.o dtpmlqt.o \ dorhr_col.o dlaorhr_col_getrfnp.o dlaorhr_col_getrfnp2.o \ @@ -455,7 +455,7 @@ ZLASRC_O = \ zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \ zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \ zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \ - zlarcm.o zlarf.o zlarfb.o \ + zlarcm.o zlarf.o zlarfb.o zlarfb_gett.o \ zlarfg.o zlarft.o zlarfgp.o \ zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \ zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \ @@ -484,7 +484,7 @@ ZLASRC_O = \ ztptrs.o ztrcon.o ztrevc.o ztrevc3.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \ ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrzf.o zung2l.o \ zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \ - zungrq.o zungtr.o zungtsqr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o zunm22.o \ + zungrq.o zungtr.o zungtsqr.o zungtsqr_row.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o zunm22.o \ zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \ zunmtr.o zupgtr.o \ zupmtr.o izmax1.o dzsum1.o zstemr.o \ @@ -498,7 +498,7 @@ ZLASRC_O = \ ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o \ ztplqt.o ztplqt2.o ztpmlqt.o \ zgelqt.o zgelqt3.o zgemlqt.o \ - zgetsls.o zgeqr.o zlatsqr.o zlamtsqr.o zgemqr.o \ + zgetsls.o zgetsqrhrt.o zgeqr.o zlatsqr.o zlamtsqr.o zgemqr.o \ zgelq.o zlaswlq.o zlamswlq.o zgemlq.o \ zunhr_col.o zlaunhr_col_getrfnp.o zlaunhr_col_getrfnp2.o \ zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \ diff --git a/lapack-netlib/SRC/cgetsqrhrt.f b/lapack-netlib/SRC/cgetsqrhrt.f new file mode 100644 index 000000000..4e4dc1d4a --- /dev/null +++ b/lapack-netlib/SRC/cgetsqrhrt.f @@ -0,0 +1,349 @@ +*> \brief \b CGETSQRHRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGETSQRHRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, +* $ LWORK, INFO ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGETSQRHRT computes a NB2-sized column blocked QR-factorization +*> of a complex M-by-N matrix A with M >= N, +*> +*> A = Q * R. +*> +*> The routine uses internally a NB1-sized column blocked and MB1-sized +*> row blocked TSQR-factorization and perfors the reconstruction +*> of the Householder vectors from the TSQR output. The routine also +*> converts the R_tsqr factor from the TSQR-factorization output into +*> the R factor that corresponds to the Householder QR-factorization, +*> +*> A = Q_tsqr * R_tsqr = Q * R. +*> +*> The output Q and R factors are stored in the same format as in CGEQRT +*> (Q is in blocked compact WY-representation). See the documentation +*> of CGEQRT for more details on the format. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB1 +*> \verbatim +*> MB1 is INTEGER +*> The row block size to be used in the blocked TSQR. +*> MB1 > N. +*> \endverbatim +*> +*> \param[in] NB1 +*> \verbatim +*> NB1 is INTEGER +*> The column block size to be used in the blocked TSQR. +*> N >= NB1 >= 1. +*> \endverbatim +*> +*> \param[in] NB2 +*> \verbatim +*> NB2 is INTEGER +*> The block size to be used in the blocked QR that is +*> output. NB2 >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> +*> On entry: an M-by-N matrix A. +*> +*> On exit: +*> a) the elements on and above the diagonal +*> of the array contain the N-by-N upper-triangular +*> matrix R corresponding to the Householder QR; +*> b) the elements below the diagonal represent Q by +*> the columns of blocked V (compact WY-representation). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. +*> LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), +*> where +*> NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), +*> NB1LOCAL = MIN(NB1,N). +*> LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, +*> LW1 = NB1LOCAL * N, +*> LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), +*> If LWORK = -1, then a workspace query is assumed. +*> The routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup comlpexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, + $ LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT, + $ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLATSQR, CUNGTSQR_ROW, CUNHR_COL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CEILING, REAL, CMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = LWORK.EQ.-1 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB1.LE.N ) THEN + INFO = -3 + ELSE IF( NB1.LT.1 ) THEN + INFO = -4 + ELSE IF( NB2.LT.1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, MIN( NB2, N ) ) ) THEN + INFO = -9 + ELSE +* +* Test the input LWORK for the dimension of the array WORK. +* This workspace is used to store array: +* a) Matrix T and WORK for CLATSQR; +* b) N-by-N upper-triangular factor R_tsqr; +* c) Matrix T and array WORK for CUNGTSQR_ROW; +* d) Diagonal D for CUNHR_COL. +* + IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE +* +* Set block size for column blocks +* + NB1LOCAL = MIN( NB1, N ) +* + NUM_ALL_ROW_BLOCKS = MAX( 1, + $ CEILING( REAL( M - N ) / REAL( MB1 - N ) ) ) +* +* Length and leading dimension of WORK array to place +* T array in TSQR. +* + LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL + + LDWT = NB1LOCAL +* +* Length of TSQR work array +* + LW1 = NB1LOCAL * N +* +* Length of CUNGTSQR_ROW work array. +* + LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ) +* + LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ) +* + IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN + INFO = -11 + END IF +* + END IF + END IF +* +* Handle error in the input parameters and return workspace query. +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETSQRHRT', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + WORK( 1 ) = CMPLX( LWORKOPT ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + WORK( 1 ) = CMPLX( LWORKOPT ) + RETURN + END IF +* + NB2LOCAL = MIN( NB2, N ) +* +* +* (1) Perform TSQR-factorization of the M-by-N matrix A. +* + CALL CLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT, + $ WORK(LWT+1), LW1, IINFO ) +* +* (2) Copy the factor R_tsqr stored in the upper-triangular part +* of A into the square matrix in the work array +* WORK(LWT+1:LWT+N*N) column-by-column. +* + DO J = 1, N + CALL CCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 ) + END DO +* +* (3) Generate a M-by-N matrix Q with orthonormal columns from +* the result stored below the diagonal in the array A in place. +* + + CALL CUNGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT, + $ WORK( LWT+N*N+1 ), LW2, IINFO ) +* +* (4) Perform the reconstruction of Householder vectors from +* the matrix Q (stored in A) in place. +* + CALL CUNHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT, + $ WORK( LWT+N*N+1 ), IINFO ) +* +* (5) Copy the factor R_tsqr stored in the square matrix in the +* work array WORK(LWT+1:LWT+N*N) into the upper-triangular +* part of A. +* +* (6) Compute from R_tsqr the factor R_hr corresponding to +* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. +* This multiplication by the sign matrix S on the left means +* changing the sign of I-th row of the matrix R_tsqr according +* to sign of the I-th diagonal element DIAG(I) of the matrix S. +* DIAG is stored in WORK( LWT+N*N+1 ) from the CUNHR_COL output. +* +* (5) and (6) can be combined in a single loop, so the rows in A +* are accessed only once. +* + DO I = 1, N + IF( WORK( LWT+N*N+I ).EQ.-CONE ) THEN + DO J = I, N + A( I, J ) = -CONE * WORK( LWT+N*(J-1)+I ) + END DO + ELSE + CALL CCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA ) + END IF + END DO +* + WORK( 1 ) = CMPLX( LWORKOPT ) + RETURN +* +* End of CGETSQRHRT +* + END \ No newline at end of file diff --git a/lapack-netlib/SRC/clarfb_gett.f b/lapack-netlib/SRC/clarfb_gett.f new file mode 100644 index 000000000..ee6959ed8 --- /dev/null +++ b/lapack-netlib/SRC/clarfb_gett.f @@ -0,0 +1,597 @@ +*> \brief \b CLARFB_GETT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFB_GETT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +*> +* Definition: +* =========== +* +* SUBROUTINE CLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB, +* $ WORK, LDWORK ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER IDENT +* INTEGER K, LDA, LDB, LDT, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ WORK( LDWORK, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFB_GETT applies a complex Householder block reflector H from the +*> left to a complex (K+M)-by-N "triangular-pentagonal" matrix +*> composed of two block matrices: an upper trapezoidal K-by-N matrix A +*> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored +*> in the array B. The block reflector H is stored in a compact +*> WY-representation, where the elementary reflectors are in the +*> arrays A, B and T. See Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IDENT +*> \verbatim +*> IDENT is CHARACTER*1 +*> If IDENT = not 'I', or not 'i', then V1 is unit +*> lower-triangular and stored in the left K-by-K block of +*> the input matrix A, +*> If IDENT = 'I' or 'i', then V1 is an identity matrix and +*> not stored. +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number or rows of the matrix A. +*> K is also order of the matrix T, i.e. the number of +*> elementary reflectors whose product defines the block +*> reflector. 0 <= K <= N. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The upper-triangular K-by-K matrix T in the representation +*> of the block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> +*> On entry: +*> a) In the K-by-N upper-trapezoidal part A: input matrix A. +*> b) In the columns below the diagonal: columns of V1 +*> (ones are not stored on the diagonal). +*> +*> On exit: +*> A is overwritten by rectangular K-by-N product H*A. +*> +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> +*> On entry: +*> a) In the M-by-(N-K) right block: input matrix B. +*> b) In the M-by-N left block: columns of V2. +*> +*> On exit: +*> B is overwritten by rectangular M-by-N product H*B. +*> +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, +*> dimension (LDWORK,max(K,N-K)) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. LDWORK>=max(1,K). +*> +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> (1) Description of the Algebraic Operation. +*> +*> The matrix A is a K-by-N matrix composed of two column block +*> matrices, A1, which is K-by-K, and A2, which is K-by-(N-K): +*> A = ( A1, A2 ). +*> The matrix B is an M-by-N matrix composed of two column block +*> matrices, B1, which is M-by-K, and B2, which is M-by-(N-K): +*> B = ( B1, B2 ). +*> +*> Perform the operation: +*> +*> ( A_out ) := H * ( A_in ) = ( I - V * T * V**H ) * ( A_in ) = +*> ( B_out ) ( B_in ) ( B_in ) +*> = ( I - ( V1 ) * T * ( V1**H, V2**H ) ) * ( A_in ) +*> ( V2 ) ( B_in ) +*> On input: +*> +*> a) ( A_in ) consists of two block columns: +*> ( B_in ) +*> +*> ( A_in ) = (( A1_in ) ( A2_in )) = (( A1_in ) ( A2_in )) +*> ( B_in ) (( B1_in ) ( B2_in )) (( 0 ) ( B2_in )), +*> +*> where the column blocks are: +*> +*> ( A1_in ) is a K-by-K upper-triangular matrix stored in the +*> upper triangular part of the array A(1:K,1:K). +*> ( B1_in ) is an M-by-K rectangular ZERO matrix and not stored. +*> +*> ( A2_in ) is a K-by-(N-K) rectangular matrix stored +*> in the array A(1:K,K+1:N). +*> ( B2_in ) is an M-by-(N-K) rectangular matrix stored +*> in the array B(1:M,K+1:N). +*> +*> b) V = ( V1 ) +*> ( V2 ) +*> +*> where: +*> 1) if IDENT == 'I',V1 is a K-by-K identity matrix, not stored; +*> 2) if IDENT != 'I',V1 is a K-by-K unit lower-triangular matrix, +*> stored in the lower-triangular part of the array +*> A(1:K,1:K) (ones are not stored), +*> and V2 is an M-by-K rectangular stored the array B(1:M,1:K), +*> (because on input B1_in is a rectangular zero +*> matrix that is not stored and the space is +*> used to store V2). +*> +*> c) T is a K-by-K upper-triangular matrix stored +*> in the array T(1:K,1:K). +*> +*> On output: +*> +*> a) ( A_out ) consists of two block columns: +*> ( B_out ) +*> +*> ( A_out ) = (( A1_out ) ( A2_out )) +*> ( B_out ) (( B1_out ) ( B2_out )), +*> +*> where the column blocks are: +*> +*> ( A1_out ) is a K-by-K square matrix, or a K-by-K +*> upper-triangular matrix, if V1 is an +*> identity matrix. AiOut is stored in +*> the array A(1:K,1:K). +*> ( B1_out ) is an M-by-K rectangular matrix stored +*> in the array B(1:M,K:N). +*> +*> ( A2_out ) is a K-by-(N-K) rectangular matrix stored +*> in the array A(1:K,K+1:N). +*> ( B2_out ) is an M-by-(N-K) rectangular matrix stored +*> in the array B(1:M,K+1:N). +*> +*> +*> The operation above can be represented as the same operation +*> on each block column: +*> +*> ( A1_out ) := H * ( A1_in ) = ( I - V * T * V**H ) * ( A1_in ) +*> ( B1_out ) ( 0 ) ( 0 ) +*> +*> ( A2_out ) := H * ( A2_in ) = ( I - V * T * V**H ) * ( A2_in ) +*> ( B2_out ) ( B2_in ) ( B2_in ) +*> +*> If IDENT != 'I': +*> +*> The computation for column block 1: +*> +*> A1_out: = A1_in - V1*T*(V1**H)*A1_in +*> +*> B1_out: = - V2*T*(V1**H)*A1_in +*> +*> The computation for column block 2, which exists if N > K: +*> +*> A2_out: = A2_in - V1*T*( (V1**H)*A2_in + (V2**H)*B2_in ) +*> +*> B2_out: = B2_in - V2*T*( (V1**H)*A2_in + (V2**H)*B2_in ) +*> +*> If IDENT == 'I': +*> +*> The operation for column block 1: +*> +*> A1_out: = A1_in - V1*T*A1_in +*> +*> B1_out: = - V2*T*A1_in +*> +*> The computation for column block 2, which exists if N > K: +*> +*> A2_out: = A2_in - T*( A2_in + (V2**H)*B2_in ) +*> +*> B2_out: = B2_in - V2*T*( A2_in + (V2**H)*B2_in ) +*> +*> (2) Description of the Algorithmic Computation. +*> +*> In the first step, we compute column block 2, i.e. A2 and B2. +*> Here, we need to use the K-by-(N-K) rectangular workspace +*> matrix W2 that is of the same size as the matrix A2. +*> W2 is stored in the array WORK(1:K,1:(N-K)). +*> +*> In the second step, we compute column block 1, i.e. A1 and B1. +*> Here, we need to use the K-by-K square workspace matrix W1 +*> that is of the same size as the as the matrix A1. +*> W1 is stored in the array WORK(1:K,1:K). +*> +*> NOTE: Hence, in this routine, we need the workspace array WORK +*> only of size WORK(1:K,1:max(K,N-K)) so it can hold both W2 from +*> the first step and W1 from the second step. +*> +*> Case (A), when V1 is unit lower-triangular, i.e. IDENT != 'I', +*> more computations than in the Case (B). +*> +*> if( IDENT != 'I' ) then +*> if ( N > K ) then +*> (First Step - column block 2) +*> col2_(1) W2: = A2 +*> col2_(2) W2: = (V1**H) * W2 = (unit_lower_tr_of_(A1)**H) * W2 +*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2 +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2 +*> col2_(7) A2: = A2 - W2 +*> else +*> (Second Step - column block 1) +*> col1_(1) W1: = A1 +*> col1_(2) W1: = (V1**H) * W1 = (unit_lower_tr_of_(A1)**H) * W1 +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1 +*> col1_(6) square A1: = A1 - W1 +*> end if +*> end if +*> +*> Case (B), when V1 is an identity matrix, i.e. IDENT == 'I', +*> less computations than in the Case (A) +*> +*> if( IDENT == 'I' ) then +*> if ( N > K ) then +*> (First Step - column block 2) +*> col2_(1) W2: = A2 +*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2 +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> col2_(7) A2: = A2 - W2 +*> else +*> (Second Step - column block 1) +*> col1_(1) W1: = A1 +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> col1_(6) upper-triangular_of_(A1): = A1 - W1 +*> end if +*> end if +*> +*> Combine these cases (A) and (B) together, this is the resulting +*> algorithm: +*> +*> if ( N > K ) then +*> +*> (First Step - column block 2) +*> +*> col2_(1) W2: = A2 +*> if( IDENT != 'I' ) then +*> col2_(2) W2: = (V1**H) * W2 +*> = (unit_lower_tr_of_(A1)**H) * W2 +*> end if +*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2] +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> if( IDENT != 'I' ) then +*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2 +*> end if +*> col2_(7) A2: = A2 - W2 +*> +*> else +*> +*> (Second Step - column block 1) +*> +*> col1_(1) W1: = A1 +*> if( IDENT != 'I' ) then +*> col1_(2) W1: = (V1**H) * W1 +*> = (unit_lower_tr_of_(A1)**H) * W1 +*> end if +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> if( IDENT != 'I' ) then +*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1 +*> col1_(6_a) below_diag_of_(A1): = - below_diag_of_(W1) +*> end if +*> col1_(6_b) up_tr_of_(A1): = up_tr_of_(A1) - up_tr_of_(W1) +*> +*> end if +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB, + $ WORK, LDWORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER IDENT + INTEGER K, LDA, LDB, LDT, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LNOTIDENT + INTEGER I, J +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LT.0 .OR. N.LE.0 .OR. K.EQ.0 .OR. K.GT.N ) + $ RETURN +* + LNOTIDENT = .NOT.LSAME( IDENT, 'I' ) +* +* ------------------------------------------------------------------ +* +* First Step. Computation of the Column Block 2: +* +* ( A2 ) := H * ( A2 ) +* ( B2 ) ( B2 ) +* +* ------------------------------------------------------------------ +* + IF( N.GT.K ) THEN +* +* col2_(1) Compute W2: = A2. Therefore, copy A2 = A(1:K, K+1:N) +* into W2=WORK(1:K, 1:N-K) column-by-column. +* + DO J = 1, N-K + CALL CCOPY( K, A( 1, K+J ), 1, WORK( 1, J ), 1 ) + END DO + + IF( LNOTIDENT ) THEN +* +* col2_(2) Compute W2: = (V1**H) * W2 = (A1**H) * W2, +* V1 is not an identy matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored). +* +* + CALL CTRMM( 'L', 'L', 'C', 'U', K, N-K, CONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col2_(3) Compute W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2 +* V2 stored in B1. +* + IF( M.GT.0 ) THEN + CALL CGEMM( 'C', 'N', K, N-K, M, CONE, B, LDB, + $ B( 1, K+1 ), LDB, CONE, WORK, LDWORK ) + END IF +* +* col2_(4) Compute W2: = T * W2, +* T is upper-triangular. +* + CALL CTRMM( 'L', 'U', 'N', 'N', K, N-K, CONE, T, LDT, + $ WORK, LDWORK ) +* +* col2_(5) Compute B2: = B2 - V2 * W2 = B2 - B1 * W2, +* V2 stored in B1. +* + IF( M.GT.0 ) THEN + CALL CGEMM( 'N', 'N', M, N-K, K, -CONE, B, LDB, + $ WORK, LDWORK, CONE, B( 1, K+1 ), LDB ) + END IF +* + IF( LNOTIDENT ) THEN +* +* col2_(6) Compute W2: = V1 * W2 = A1 * W2, +* V1 is not an identity matrix, but unit lower-triangular, +* V1 stored in A1 (diagonal ones are not stored). +* + CALL CTRMM( 'L', 'L', 'N', 'U', K, N-K, CONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col2_(7) Compute A2: = A2 - W2 = +* = A(1:K, K+1:N-K) - WORK(1:K, 1:N-K), +* column-by-column. +* + DO J = 1, N-K + DO I = 1, K + A( I, K+J ) = A( I, K+J ) - WORK( I, J ) + END DO + END DO +* + END IF +* +* ------------------------------------------------------------------ +* +* Second Step. Computation of the Column Block 1: +* +* ( A1 ) := H * ( A1 ) +* ( B1 ) ( 0 ) +* +* ------------------------------------------------------------------ +* +* col1_(1) Compute W1: = A1. Copy the upper-triangular +* A1 = A(1:K, 1:K) into the upper-triangular +* W1 = WORK(1:K, 1:K) column-by-column. +* + DO J = 1, K + CALL CCOPY( J, A( 1, J ), 1, WORK( 1, J ), 1 ) + END DO +* +* Set the subdiagonal elements of W1 to zero column-by-column. +* + DO J = 1, K - 1 + DO I = J + 1, K + WORK( I, J ) = CZERO + END DO + END DO +* + IF( LNOTIDENT ) THEN +* +* col1_(2) Compute W1: = (V1**H) * W1 = (A1**H) * W1, +* V1 is not an identity matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored), +* W1 is upper-triangular with zeroes below the diagonal. +* + CALL CTRMM( 'L', 'L', 'C', 'U', K, K, CONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col1_(3) Compute W1: = T * W1, +* T is upper-triangular, +* W1 is upper-triangular with zeroes below the diagonal. +* + CALL CTRMM( 'L', 'U', 'N', 'N', K, K, CONE, T, LDT, + $ WORK, LDWORK ) +* +* col1_(4) Compute B1: = - V2 * W1 = - B1 * W1, +* V2 = B1, W1 is upper-triangular with zeroes below the diagonal. +* + IF( M.GT.0 ) THEN + CALL CTRMM( 'R', 'U', 'N', 'N', M, K, -CONE, WORK, LDWORK, + $ B, LDB ) + END IF +* + IF( LNOTIDENT ) THEN +* +* col1_(5) Compute W1: = V1 * W1 = A1 * W1, +* V1 is not an identity matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored), +* W1 is upper-triangular on input with zeroes below the diagonal, +* and square on output. +* + CALL CTRMM( 'L', 'L', 'N', 'U', K, K, CONE, A, LDA, + $ WORK, LDWORK ) +* +* col1_(6) Compute A1: = A1 - W1 = A(1:K, 1:K) - WORK(1:K, 1:K) +* column-by-column. A1 is upper-triangular on input. +* If IDENT, A1 is square on output, and W1 is square, +* if NOT IDENT, A1 is upper-triangular on output, +* W1 is upper-triangular. +* +* col1_(6)_a Compute elements of A1 below the diagonal. +* + DO J = 1, K - 1 + DO I = J + 1, K + A( I, J ) = - WORK( I, J ) + END DO + END DO +* + END IF +* +* col1_(6)_b Compute elements of A1 on and above the diagonal. +* + DO J = 1, K + DO I = 1, J + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + RETURN +* +* End of CLARFB_GETT +* + END diff --git a/lapack-netlib/SRC/cungtsqr_row.f b/lapack-netlib/SRC/cungtsqr_row.f new file mode 100644 index 000000000..e1597c58b --- /dev/null +++ b/lapack-netlib/SRC/cungtsqr_row.f @@ -0,0 +1,380 @@ +*> \brief \b CUNGTSQR_ROW +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGTSQR_ROW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +*> +* Definition: +* =========== +* +* SUBROUTINE CUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK, +* $ LWORK, INFO ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGTSQR_ROW generates an M-by-N complex matrix Q_out with +*> orthonormal columns from the output of CLATSQR. These N orthonormal +*> columns are the first N columns of a product of complex unitary +*> matrices Q(k)_in of order M, which are returned by CLATSQR in +*> a special format. +*> +*> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). +*> +*> The input matrices Q(k)_in are stored in row and column blocks in A. +*> See the documentation of CLATSQR for more details on the format of +*> Q(k)_in, where each Q(k)_in is represented by block Householder +*> transformations. This routine calls an auxiliary routine CLARFB_GETT, +*> where the computation is performed on each individual block. The +*> algorithm first sweeps NB-sized column blocks from the right to left +*> starting in the bottom row block and continues to the top row block +*> (hence _ROW in the routine name). This sweep is in reverse order of +*> the order in which CLATSQR generates the output blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size used by CLATSQR to return +*> arrays A and T. MB > N. +*> (Note that if MB > M, then M is used instead of MB +*> as the row block size). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size used by CLATSQR to return +*> arrays A and T. NB >= 1. +*> (Note that if NB > N, then N is used instead of NB +*> as the column block size). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> +*> On entry: +*> +*> The elements on and above the diagonal are not used as +*> input. The elements below the diagonal represent the unit +*> lower-trapezoidal blocked matrix V computed by CLATSQR +*> that defines the input matrices Q_in(k) (ones on the +*> diagonal are not stored). See CLATSQR for more details. +*> +*> On exit: +*> +*> The array A contains an M-by-N orthonormal matrix Q_out, +*> i.e the columns of A are orthogonal unit vectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, +*> dimension (LDT, N * NIRB) +*> where NIRB = Number_of_input_row_blocks +*> = MAX( 1, CEIL((M-N)/(MB-N)) ) +*> Let NICB = Number_of_input_col_blocks +*> = CEIL(N/NB) +*> +*> The upper-triangular block reflectors used to define the +*> input matrices Q_in(k), k=(1:NIRB*NICB). The block +*> reflectors are stored in compact form in NIRB block +*> reflector sequences. Each of the NIRB block reflector +*> sequences is stored in a larger NB-by-N column block of T +*> and consists of NICB smaller NB-by-NB upper-triangular +*> column blocks. See CLATSQR for more details on the format +*> of T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. +*> LDT >= max(1,min(NB,N)). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. +*> LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)), +*> where NBLOCAL=MIN(NB,N). +*> If LWORK = -1, then a workspace query is assumed. +*> The routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +*> +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK, + $ LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM, + $ LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB, + $ KB, KB_LAST, KNB, MB1 +* .. +* .. Local Arrays .. + COMPLEX DUMMY( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CLARFB_GETT, CLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + LQUERY = LWORK.EQ.-1 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB.LE.N ) THEN + INFO = -3 + ELSE IF( NB.LT.1 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + NBLOCAL = MIN( NB, N ) +* +* Determine the workspace size. +* + IF( INFO.EQ.0 ) THEN + LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) ) + END IF +* +* Handle error in the input parameters and handle the workspace query. +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGTSQR_ROW', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + WORK( 1 ) = CMPLX( LWORKOPT ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + WORK( 1 ) = CMPLX( LWORKOPT ) + RETURN + END IF +* +* (0) Set the upper-triangular part of the matrix A to zero and +* its diagonal elements to one. +* + CALL CLASET('U', M, N, CZERO, CONE, A, LDA ) +* +* KB_LAST is the column index of the last column block reflector +* in the matrices T and V. +* + KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1 +* +* +* (1) Bottom-up loop over row blocks of A, except the top row block. +* NOTE: If MB>=M, then the loop is never executed. +* + IF ( MB.LT.M ) THEN +* +* MB2 is the row blocking size for the row blocks before the +* first top row block in the matrix A. IB is the row index for +* the row blocks in the matrix A before the first top row block. +* IB_BOTTOM is the row index for the last bottom row block +* in the matrix A. JB_T is the column index of the corresponding +* column block in the matrix T. +* +* Initialize variables. +* +* NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A +* including the first row block. +* + MB2 = MB - N + M_PLUS_ONE = M + 1 + ITMP = ( M - MB - 1 ) / MB2 + IB_BOTTOM = ITMP * MB2 + MB + 1 + NUM_ALL_ROW_BLOCKS = ITMP + 2 + JB_T = NUM_ALL_ROW_BLOCKS * N + 1 +* + DO IB = IB_BOTTOM, MB+1, -MB2 +* +* Determine the block size IMB for the current row block +* in the matrix A. +* + IMB = MIN( M_PLUS_ONE - IB, MB2 ) +* +* Determine the column index JB_T for the current column block +* in the matrix T. +* + JB_T = JB_T - N +* +* Apply column blocks of H in the row block from right to left. +* +* KB is the column index of the current column block reflector +* in the matrices T and V. +* + DO KB = KB_LAST, 1, -NBLOCAL +* +* Determine the size of the current column block KNB in +* the matrices T and V. +* + KNB = MIN( NBLOCAL, N - KB + 1 ) +* + CALL CLARFB_GETT( 'I', IMB, N-KB+1, KNB, + $ T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA, + $ A( IB, KB ), LDA, WORK, KNB ) +* + END DO +* + END DO +* + END IF +* +* (2) Top row block of A. +* NOTE: If MB>=M, then we have only one row block of A of size M +* and we work on the entire matrix A. +* + MB1 = MIN( MB, M ) +* +* Apply column blocks of H in the top row block from right to left. +* +* KB is the column index of the current block reflector in +* the matrices T and V. +* + DO KB = KB_LAST, 1, -NBLOCAL +* +* Determine the size of the current column block KNB in +* the matrices T and V. +* + KNB = MIN( NBLOCAL, N - KB + 1 ) +* + IF( MB1-KB-KNB+1.EQ.0 ) THEN +* +* In SLARFB_GETT parameters, when M=0, then the matrix B +* does not exist, hence we need to pass a dummy array +* reference DUMMY(1,1) to B with LDDUMMY=1. +* + CALL CLARFB_GETT( 'N', 0, N-KB+1, KNB, + $ T( 1, KB ), LDT, A( KB, KB ), LDA, + $ DUMMY( 1, 1 ), 1, WORK, KNB ) + ELSE + CALL CLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB, + $ T( 1, KB ), LDT, A( KB, KB ), LDA, + $ A( KB+KNB, KB), LDA, WORK, KNB ) + + END IF +* + END DO +* + WORK( 1 ) = CMPLX( LWORKOPT ) + RETURN +* +* End of CUNGTSQR_ROW +* + END diff --git a/lapack-netlib/SRC/dgetsqrhrt.f b/lapack-netlib/SRC/dgetsqrhrt.f new file mode 100644 index 000000000..668deeba8 --- /dev/null +++ b/lapack-netlib/SRC/dgetsqrhrt.f @@ -0,0 +1,349 @@ +*> \brief \b DGETSQRHRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGETSQRHRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, +* $ LWORK, INFO ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETSQRHRT computes a NB2-sized column blocked QR-factorization +*> of a real M-by-N matrix A with M >= N, +*> +*> A = Q * R. +*> +*> The routine uses internally a NB1-sized column blocked and MB1-sized +*> row blocked TSQR-factorization and perfors the reconstruction +*> of the Householder vectors from the TSQR output. The routine also +*> converts the R_tsqr factor from the TSQR-factorization output into +*> the R factor that corresponds to the Householder QR-factorization, +*> +*> A = Q_tsqr * R_tsqr = Q * R. +*> +*> The output Q and R factors are stored in the same format as in DGEQRT +*> (Q is in blocked compact WY-representation). See the documentation +*> of DGEQRT for more details on the format. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB1 +*> \verbatim +*> MB1 is INTEGER +*> The row block size to be used in the blocked TSQR. +*> MB1 > N. +*> \endverbatim +*> +*> \param[in] NB1 +*> \verbatim +*> NB1 is INTEGER +*> The column block size to be used in the blocked TSQR. +*> N >= NB1 >= 1. +*> \endverbatim +*> +*> \param[in] NB2 +*> \verbatim +*> NB2 is INTEGER +*> The block size to be used in the blocked QR that is +*> output. NB2 >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> +*> On entry: an M-by-N matrix A. +*> +*> On exit: +*> a) the elements on and above the diagonal +*> of the array contain the N-by-N upper-triangular +*> matrix R corresponding to the Householder QR; +*> b) the elements below the diagonal represent Q by +*> the columns of blocked V (compact WY-representation). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. +*> LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), +*> where +*> NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), +*> NB1LOCAL = MIN(NB1,N). +*> LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, +*> LW1 = NB1LOCAL * N, +*> LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), +*> If LWORK = -1, then a workspace query is assumed. +*> The routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, + $ LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT, + $ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLATSQR, DORGTSQR_ROW, DORHR_COL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CEILING, DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = LWORK.EQ.-1 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB1.LE.N ) THEN + INFO = -3 + ELSE IF( NB1.LT.1 ) THEN + INFO = -4 + ELSE IF( NB2.LT.1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, MIN( NB2, N ) ) ) THEN + INFO = -9 + ELSE +* +* Test the input LWORK for the dimension of the array WORK. +* This workspace is used to store array: +* a) Matrix T and WORK for DLATSQR; +* b) N-by-N upper-triangular factor R_tsqr; +* c) Matrix T and array WORK for DORGTSQR_ROW; +* d) Diagonal D for DORHR_COL. +* + IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE +* +* Set block size for column blocks +* + NB1LOCAL = MIN( NB1, N ) +* + NUM_ALL_ROW_BLOCKS = MAX( 1, + $ CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) ) +* +* Length and leading dimension of WORK array to place +* T array in TSQR. +* + LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL + + LDWT = NB1LOCAL +* +* Length of TSQR work array +* + LW1 = NB1LOCAL * N +* +* Length of DORGTSQR_ROW work array. +* + LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ) +* + LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ) +* + IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN + INFO = -11 + END IF +* + END IF + END IF +* +* Handle error in the input parameters and return workspace query. +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETSQRHRT', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + WORK( 1 ) = DBLE( LWORKOPT ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + WORK( 1 ) = DBLE( LWORKOPT ) + RETURN + END IF +* + NB2LOCAL = MIN( NB2, N ) +* +* +* (1) Perform TSQR-factorization of the M-by-N matrix A. +* + CALL DLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT, + $ WORK(LWT+1), LW1, IINFO ) +* +* (2) Copy the factor R_tsqr stored in the upper-triangular part +* of A into the square matrix in the work array +* WORK(LWT+1:LWT+N*N) column-by-column. +* + DO J = 1, N + CALL DCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 ) + END DO +* +* (3) Generate a M-by-N matrix Q with orthonormal columns from +* the result stored below the diagonal in the array A in place. +* + + CALL DORGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT, + $ WORK( LWT+N*N+1 ), LW2, IINFO ) +* +* (4) Perform the reconstruction of Householder vectors from +* the matrix Q (stored in A) in place. +* + CALL DORHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT, + $ WORK( LWT+N*N+1 ), IINFO ) +* +* (5) Copy the factor R_tsqr stored in the square matrix in the +* work array WORK(LWT+1:LWT+N*N) into the upper-triangular +* part of A. +* +* (6) Compute from R_tsqr the factor R_hr corresponding to +* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. +* This multiplication by the sign matrix S on the left means +* changing the sign of I-th row of the matrix R_tsqr according +* to sign of the I-th diagonal element DIAG(I) of the matrix S. +* DIAG is stored in WORK( LWT+N*N+1 ) from the DORHR_COL output. +* +* (5) and (6) can be combined in a single loop, so the rows in A +* are accessed only once. +* + DO I = 1, N + IF( WORK( LWT+N*N+I ).EQ.-ONE ) THEN + DO J = I, N + A( I, J ) = -ONE * WORK( LWT+N*(J-1)+I ) + END DO + ELSE + CALL DCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA ) + END IF + END DO +* + WORK( 1 ) = DBLE( LWORKOPT ) + RETURN +* +* End of DGETSQRHRT +* + END \ No newline at end of file diff --git a/lapack-netlib/SRC/dlarfb_gett.f b/lapack-netlib/SRC/dlarfb_gett.f new file mode 100644 index 000000000..10ab6461e --- /dev/null +++ b/lapack-netlib/SRC/dlarfb_gett.f @@ -0,0 +1,596 @@ +*> \brief \b DLARFB_GETT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFB_GETT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB, +* $ WORK, LDWORK ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER IDENT +* INTEGER K, LDA, LDB, LDT, LDWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ WORK( LDWORK, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFB_GETT applies a real Householder block reflector H from the +*> left to a real (K+M)-by-N "triangular-pentagonal" matrix +*> composed of two block matrices: an upper trapezoidal K-by-N matrix A +*> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored +*> in the array B. The block reflector H is stored in a compact +*> WY-representation, where the elementary reflectors are in the +*> arrays A, B and T. See Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IDENT +*> \verbatim +*> IDENT is CHARACTER*1 +*> If IDENT = not 'I', or not 'i', then V1 is unit +*> lower-triangular and stored in the left K-by-K block of +*> the input matrix A, +*> If IDENT = 'I' or 'i', then V1 is an identity matrix and +*> not stored. +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number or rows of the matrix A. +*> K is also order of the matrix T, i.e. the number of +*> elementary reflectors whose product defines the block +*> reflector. 0 <= K <= N. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The upper-triangular K-by-K matrix T in the representation +*> of the block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> +*> On entry: +*> a) In the K-by-N upper-trapezoidal part A: input matrix A. +*> b) In the columns below the diagonal: columns of V1 +*> (ones are not stored on the diagonal). +*> +*> On exit: +*> A is overwritten by rectangular K-by-N product H*A. +*> +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> +*> On entry: +*> a) In the M-by-(N-K) right block: input matrix B. +*> b) In the M-by-N left block: columns of V2. +*> +*> On exit: +*> B is overwritten by rectangular M-by-N product H*B. +*> +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (LDWORK,max(K,N-K)) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. LDWORK>=max(1,K). +*> +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> (1) Description of the Algebraic Operation. +*> +*> The matrix A is a K-by-N matrix composed of two column block +*> matrices, A1, which is K-by-K, and A2, which is K-by-(N-K): +*> A = ( A1, A2 ). +*> The matrix B is an M-by-N matrix composed of two column block +*> matrices, B1, which is M-by-K, and B2, which is M-by-(N-K): +*> B = ( B1, B2 ). +*> +*> Perform the operation: +*> +*> ( A_out ) := H * ( A_in ) = ( I - V * T * V**T ) * ( A_in ) = +*> ( B_out ) ( B_in ) ( B_in ) +*> = ( I - ( V1 ) * T * ( V1**T, V2**T ) ) * ( A_in ) +*> ( V2 ) ( B_in ) +*> On input: +*> +*> a) ( A_in ) consists of two block columns: +*> ( B_in ) +*> +*> ( A_in ) = (( A1_in ) ( A2_in )) = (( A1_in ) ( A2_in )) +*> ( B_in ) (( B1_in ) ( B2_in )) (( 0 ) ( B2_in )), +*> +*> where the column blocks are: +*> +*> ( A1_in ) is a K-by-K upper-triangular matrix stored in the +*> upper triangular part of the array A(1:K,1:K). +*> ( B1_in ) is an M-by-K rectangular ZERO matrix and not stored. +*> +*> ( A2_in ) is a K-by-(N-K) rectangular matrix stored +*> in the array A(1:K,K+1:N). +*> ( B2_in ) is an M-by-(N-K) rectangular matrix stored +*> in the array B(1:M,K+1:N). +*> +*> b) V = ( V1 ) +*> ( V2 ) +*> +*> where: +*> 1) if IDENT == 'I',V1 is a K-by-K identity matrix, not stored; +*> 2) if IDENT != 'I',V1 is a K-by-K unit lower-triangular matrix, +*> stored in the lower-triangular part of the array +*> A(1:K,1:K) (ones are not stored), +*> and V2 is an M-by-K rectangular stored the array B(1:M,1:K), +*> (because on input B1_in is a rectangular zero +*> matrix that is not stored and the space is +*> used to store V2). +*> +*> c) T is a K-by-K upper-triangular matrix stored +*> in the array T(1:K,1:K). +*> +*> On output: +*> +*> a) ( A_out ) consists of two block columns: +*> ( B_out ) +*> +*> ( A_out ) = (( A1_out ) ( A2_out )) +*> ( B_out ) (( B1_out ) ( B2_out )), +*> +*> where the column blocks are: +*> +*> ( A1_out ) is a K-by-K square matrix, or a K-by-K +*> upper-triangular matrix, if V1 is an +*> identity matrix. AiOut is stored in +*> the array A(1:K,1:K). +*> ( B1_out ) is an M-by-K rectangular matrix stored +*> in the array B(1:M,K:N). +*> +*> ( A2_out ) is a K-by-(N-K) rectangular matrix stored +*> in the array A(1:K,K+1:N). +*> ( B2_out ) is an M-by-(N-K) rectangular matrix stored +*> in the array B(1:M,K+1:N). +*> +*> +*> The operation above can be represented as the same operation +*> on each block column: +*> +*> ( A1_out ) := H * ( A1_in ) = ( I - V * T * V**T ) * ( A1_in ) +*> ( B1_out ) ( 0 ) ( 0 ) +*> +*> ( A2_out ) := H * ( A2_in ) = ( I - V * T * V**T ) * ( A2_in ) +*> ( B2_out ) ( B2_in ) ( B2_in ) +*> +*> If IDENT != 'I': +*> +*> The computation for column block 1: +*> +*> A1_out: = A1_in - V1*T*(V1**T)*A1_in +*> +*> B1_out: = - V2*T*(V1**T)*A1_in +*> +*> The computation for column block 2, which exists if N > K: +*> +*> A2_out: = A2_in - V1*T*( (V1**T)*A2_in + (V2**T)*B2_in ) +*> +*> B2_out: = B2_in - V2*T*( (V1**T)*A2_in + (V2**T)*B2_in ) +*> +*> If IDENT == 'I': +*> +*> The operation for column block 1: +*> +*> A1_out: = A1_in - V1*T**A1_in +*> +*> B1_out: = - V2*T**A1_in +*> +*> The computation for column block 2, which exists if N > K: +*> +*> A2_out: = A2_in - T*( A2_in + (V2**T)*B2_in ) +*> +*> B2_out: = B2_in - V2*T*( A2_in + (V2**T)*B2_in ) +*> +*> (2) Description of the Algorithmic Computation. +*> +*> In the first step, we compute column block 2, i.e. A2 and B2. +*> Here, we need to use the K-by-(N-K) rectangular workspace +*> matrix W2 that is of the same size as the matrix A2. +*> W2 is stored in the array WORK(1:K,1:(N-K)). +*> +*> In the second step, we compute column block 1, i.e. A1 and B1. +*> Here, we need to use the K-by-K square workspace matrix W1 +*> that is of the same size as the as the matrix A1. +*> W1 is stored in the array WORK(1:K,1:K). +*> +*> NOTE: Hence, in this routine, we need the workspace array WORK +*> only of size WORK(1:K,1:max(K,N-K)) so it can hold both W2 from +*> the first step and W1 from the second step. +*> +*> Case (A), when V1 is unit lower-triangular, i.e. IDENT != 'I', +*> more computations than in the Case (B). +*> +*> if( IDENT != 'I' ) then +*> if ( N > K ) then +*> (First Step - column block 2) +*> col2_(1) W2: = A2 +*> col2_(2) W2: = (V1**T) * W2 = (unit_lower_tr_of_(A1)**T) * W2 +*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2 +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2 +*> col2_(7) A2: = A2 - W2 +*> else +*> (Second Step - column block 1) +*> col1_(1) W1: = A1 +*> col1_(2) W1: = (V1**T) * W1 = (unit_lower_tr_of_(A1)**T) * W1 +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1 +*> col1_(6) square A1: = A1 - W1 +*> end if +*> end if +*> +*> Case (B), when V1 is an identity matrix, i.e. IDENT == 'I', +*> less computations than in the Case (A) +*> +*> if( IDENT == 'I' ) then +*> if ( N > K ) then +*> (First Step - column block 2) +*> col2_(1) W2: = A2 +*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2 +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> col2_(7) A2: = A2 - W2 +*> else +*> (Second Step - column block 1) +*> col1_(1) W1: = A1 +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> col1_(6) upper-triangular_of_(A1): = A1 - W1 +*> end if +*> end if +*> +*> Combine these cases (A) and (B) together, this is the resulting +*> algorithm: +*> +*> if ( N > K ) then +*> +*> (First Step - column block 2) +*> +*> col2_(1) W2: = A2 +*> if( IDENT != 'I' ) then +*> col2_(2) W2: = (V1**T) * W2 +*> = (unit_lower_tr_of_(A1)**T) * W2 +*> end if +*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2] +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> if( IDENT != 'I' ) then +*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2 +*> end if +*> col2_(7) A2: = A2 - W2 +*> +*> else +*> +*> (Second Step - column block 1) +*> +*> col1_(1) W1: = A1 +*> if( IDENT != 'I' ) then +*> col1_(2) W1: = (V1**T) * W1 +*> = (unit_lower_tr_of_(A1)**T) * W1 +*> end if +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> if( IDENT != 'I' ) then +*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1 +*> col1_(6_a) below_diag_of_(A1): = - below_diag_of_(W1) +*> end if +*> col1_(6_b) up_tr_of_(A1): = up_tr_of_(A1) - up_tr_of_(W1) +*> +*> end if +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB, + $ WORK, LDWORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER IDENT + INTEGER K, LDA, LDB, LDT, LDWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTIDENT + INTEGER I, J +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LT.0 .OR. N.LE.0 .OR. K.EQ.0 .OR. K.GT.N ) + $ RETURN +* + LNOTIDENT = .NOT.LSAME( IDENT, 'I' ) +* +* ------------------------------------------------------------------ +* +* First Step. Computation of the Column Block 2: +* +* ( A2 ) := H * ( A2 ) +* ( B2 ) ( B2 ) +* +* ------------------------------------------------------------------ +* + IF( N.GT.K ) THEN +* +* col2_(1) Compute W2: = A2. Therefore, copy A2 = A(1:K, K+1:N) +* into W2=WORK(1:K, 1:N-K) column-by-column. +* + DO J = 1, N-K + CALL DCOPY( K, A( 1, K+J ), 1, WORK( 1, J ), 1 ) + END DO + + IF( LNOTIDENT ) THEN +* +* col2_(2) Compute W2: = (V1**T) * W2 = (A1**T) * W2, +* V1 is not an identy matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored). +* +* + CALL DTRMM( 'L', 'L', 'T', 'U', K, N-K, ONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col2_(3) Compute W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2 +* V2 stored in B1. +* + IF( M.GT.0 ) THEN + CALL DGEMM( 'T', 'N', K, N-K, M, ONE, B, LDB, + $ B( 1, K+1 ), LDB, ONE, WORK, LDWORK ) + END IF +* +* col2_(4) Compute W2: = T * W2, +* T is upper-triangular. +* + CALL DTRMM( 'L', 'U', 'N', 'N', K, N-K, ONE, T, LDT, + $ WORK, LDWORK ) +* +* col2_(5) Compute B2: = B2 - V2 * W2 = B2 - B1 * W2, +* V2 stored in B1. +* + IF( M.GT.0 ) THEN + CALL DGEMM( 'N', 'N', M, N-K, K, -ONE, B, LDB, + $ WORK, LDWORK, ONE, B( 1, K+1 ), LDB ) + END IF +* + IF( LNOTIDENT ) THEN +* +* col2_(6) Compute W2: = V1 * W2 = A1 * W2, +* V1 is not an identity matrix, but unit lower-triangular, +* V1 stored in A1 (diagonal ones are not stored). +* + CALL DTRMM( 'L', 'L', 'N', 'U', K, N-K, ONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col2_(7) Compute A2: = A2 - W2 = +* = A(1:K, K+1:N-K) - WORK(1:K, 1:N-K), +* column-by-column. +* + DO J = 1, N-K + DO I = 1, K + A( I, K+J ) = A( I, K+J ) - WORK( I, J ) + END DO + END DO +* + END IF +* +* ------------------------------------------------------------------ +* +* Second Step. Computation of the Column Block 1: +* +* ( A1 ) := H * ( A1 ) +* ( B1 ) ( 0 ) +* +* ------------------------------------------------------------------ +* +* col1_(1) Compute W1: = A1. Copy the upper-triangular +* A1 = A(1:K, 1:K) into the upper-triangular +* W1 = WORK(1:K, 1:K) column-by-column. +* + DO J = 1, K + CALL DCOPY( J, A( 1, J ), 1, WORK( 1, J ), 1 ) + END DO +* +* Set the subdiagonal elements of W1 to zero column-by-column. +* + DO J = 1, K - 1 + DO I = J + 1, K + WORK( I, J ) = ZERO + END DO + END DO +* + IF( LNOTIDENT ) THEN +* +* col1_(2) Compute W1: = (V1**T) * W1 = (A1**T) * W1, +* V1 is not an identity matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored), +* W1 is upper-triangular with zeroes below the diagonal. +* + CALL DTRMM( 'L', 'L', 'T', 'U', K, K, ONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col1_(3) Compute W1: = T * W1, +* T is upper-triangular, +* W1 is upper-triangular with zeroes below the diagonal. +* + CALL DTRMM( 'L', 'U', 'N', 'N', K, K, ONE, T, LDT, + $ WORK, LDWORK ) +* +* col1_(4) Compute B1: = - V2 * W1 = - B1 * W1, +* V2 = B1, W1 is upper-triangular with zeroes below the diagonal. +* + IF( M.GT.0 ) THEN + CALL DTRMM( 'R', 'U', 'N', 'N', M, K, -ONE, WORK, LDWORK, + $ B, LDB ) + END IF +* + IF( LNOTIDENT ) THEN +* +* col1_(5) Compute W1: = V1 * W1 = A1 * W1, +* V1 is not an identity matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored), +* W1 is upper-triangular on input with zeroes below the diagonal, +* and square on output. +* + CALL DTRMM( 'L', 'L', 'N', 'U', K, K, ONE, A, LDA, + $ WORK, LDWORK ) +* +* col1_(6) Compute A1: = A1 - W1 = A(1:K, 1:K) - WORK(1:K, 1:K) +* column-by-column. A1 is upper-triangular on input. +* If IDENT, A1 is square on output, and W1 is square, +* if NOT IDENT, A1 is upper-triangular on output, +* W1 is upper-triangular. +* +* col1_(6)_a Compute elements of A1 below the diagonal. +* + DO J = 1, K - 1 + DO I = J + 1, K + A( I, J ) = - WORK( I, J ) + END DO + END DO +* + END IF +* +* col1_(6)_b Compute elements of A1 on and above the diagonal. +* + DO J = 1, K + DO I = 1, J + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + RETURN +* +* End of DLARFB_GETT +* + END diff --git a/lapack-netlib/SRC/dorgtsqr_row.f b/lapack-netlib/SRC/dorgtsqr_row.f new file mode 100644 index 000000000..94f8b0120 --- /dev/null +++ b/lapack-netlib/SRC/dorgtsqr_row.f @@ -0,0 +1,379 @@ +*> \brief \b DORGTSQR_ROW +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGTSQR_ROW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK, +* $ LWORK, INFO ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGTSQR_ROW generates an M-by-N real matrix Q_out with +*> orthonormal columns from the output of DLATSQR. These N orthonormal +*> columns are the first N columns of a product of complex unitary +*> matrices Q(k)_in of order M, which are returned by DLATSQR in +*> a special format. +*> +*> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). +*> +*> The input matrices Q(k)_in are stored in row and column blocks in A. +*> See the documentation of DLATSQR for more details on the format of +*> Q(k)_in, where each Q(k)_in is represented by block Householder +*> transformations. This routine calls an auxiliary routine DLARFB_GETT, +*> where the computation is performed on each individual block. The +*> algorithm first sweeps NB-sized column blocks from the right to left +*> starting in the bottom row block and continues to the top row block +*> (hence _ROW in the routine name). This sweep is in reverse order of +*> the order in which DLATSQR generates the output blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size used by DLATSQR to return +*> arrays A and T. MB > N. +*> (Note that if MB > M, then M is used instead of MB +*> as the row block size). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size used by DLATSQR to return +*> arrays A and T. NB >= 1. +*> (Note that if NB > N, then N is used instead of NB +*> as the column block size). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> +*> On entry: +*> +*> The elements on and above the diagonal are not used as +*> input. The elements below the diagonal represent the unit +*> lower-trapezoidal blocked matrix V computed by DLATSQR +*> that defines the input matrices Q_in(k) (ones on the +*> diagonal are not stored). See DLATSQR for more details. +*> +*> On exit: +*> +*> The array A contains an M-by-N orthonormal matrix Q_out, +*> i.e the columns of A are orthogonal unit vectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, +*> dimension (LDT, N * NIRB) +*> where NIRB = Number_of_input_row_blocks +*> = MAX( 1, CEIL((M-N)/(MB-N)) ) +*> Let NICB = Number_of_input_col_blocks +*> = CEIL(N/NB) +*> +*> The upper-triangular block reflectors used to define the +*> input matrices Q_in(k), k=(1:NIRB*NICB). The block +*> reflectors are stored in compact form in NIRB block +*> reflector sequences. Each of the NIRB block reflector +*> sequences is stored in a larger NB-by-N column block of T +*> and consists of NICB smaller NB-by-NB upper-triangular +*> column blocks. See DLATSQR for more details on the format +*> of T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. +*> LDT >= max(1,min(NB,N)). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. +*> LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)), +*> where NBLOCAL=MIN(NB,N). +*> If LWORK = -1, then a workspace query is assumed. +*> The routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +*> +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DORGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK, + $ LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM, + $ LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB, + $ KB, KB_LAST, KNB, MB1 +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUMMY( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLARFB_GETT, DLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + LQUERY = LWORK.EQ.-1 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB.LE.N ) THEN + INFO = -3 + ELSE IF( NB.LT.1 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + NBLOCAL = MIN( NB, N ) +* +* Determine the workspace size. +* + IF( INFO.EQ.0 ) THEN + LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) ) + END IF +* +* Handle error in the input parameters and handle the workspace query. +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGTSQR_ROW', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + WORK( 1 ) = DBLE( LWORKOPT ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + WORK( 1 ) = DBLE( LWORKOPT ) + RETURN + END IF +* +* (0) Set the upper-triangular part of the matrix A to zero and +* its diagonal elements to one. +* + CALL DLASET('U', M, N, ZERO, ONE, A, LDA ) +* +* KB_LAST is the column index of the last column block reflector +* in the matrices T and V. +* + KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1 +* +* +* (1) Bottom-up loop over row blocks of A, except the top row block. +* NOTE: If MB>=M, then the loop is never executed. +* + IF ( MB.LT.M ) THEN +* +* MB2 is the row blocking size for the row blocks before the +* first top row block in the matrix A. IB is the row index for +* the row blocks in the matrix A before the first top row block. +* IB_BOTTOM is the row index for the last bottom row block +* in the matrix A. JB_T is the column index of the corresponding +* column block in the matrix T. +* +* Initialize variables. +* +* NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A +* including the first row block. +* + MB2 = MB - N + M_PLUS_ONE = M + 1 + ITMP = ( M - MB - 1 ) / MB2 + IB_BOTTOM = ITMP * MB2 + MB + 1 + NUM_ALL_ROW_BLOCKS = ITMP + 2 + JB_T = NUM_ALL_ROW_BLOCKS * N + 1 +* + DO IB = IB_BOTTOM, MB+1, -MB2 +* +* Determine the block size IMB for the current row block +* in the matrix A. +* + IMB = MIN( M_PLUS_ONE - IB, MB2 ) +* +* Determine the column index JB_T for the current column block +* in the matrix T. +* + JB_T = JB_T - N +* +* Apply column blocks of H in the row block from right to left. +* +* KB is the column index of the current column block reflector +* in the matrices T and V. +* + DO KB = KB_LAST, 1, -NBLOCAL +* +* Determine the size of the current column block KNB in +* the matrices T and V. +* + KNB = MIN( NBLOCAL, N - KB + 1 ) +* + CALL DLARFB_GETT( 'I', IMB, N-KB+1, KNB, + $ T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA, + $ A( IB, KB ), LDA, WORK, KNB ) +* + END DO +* + END DO +* + END IF +* +* (2) Top row block of A. +* NOTE: If MB>=M, then we have only one row block of A of size M +* and we work on the entire matrix A. +* + MB1 = MIN( MB, M ) +* +* Apply column blocks of H in the top row block from right to left. +* +* KB is the column index of the current block reflector in +* the matrices T and V. +* + DO KB = KB_LAST, 1, -NBLOCAL +* +* Determine the size of the current column block KNB in +* the matrices T and V. +* + KNB = MIN( NBLOCAL, N - KB + 1 ) +* + IF( MB1-KB-KNB+1.EQ.0 ) THEN +* +* In SLARFB_GETT parameters, when M=0, then the matrix B +* does not exist, hence we need to pass a dummy array +* reference DUMMY(1,1) to B with LDDUMMY=1. +* + CALL DLARFB_GETT( 'N', 0, N-KB+1, KNB, + $ T( 1, KB ), LDT, A( KB, KB ), LDA, + $ DUMMY( 1, 1 ), 1, WORK, KNB ) + ELSE + CALL DLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB, + $ T( 1, KB ), LDT, A( KB, KB ), LDA, + $ A( KB+KNB, KB), LDA, WORK, KNB ) + + END IF +* + END DO +* + WORK( 1 ) = DBLE( LWORKOPT ) + RETURN +* +* End of DORGTSQR_ROW +* + END diff --git a/lapack-netlib/SRC/sgetsqrhrt.f b/lapack-netlib/SRC/sgetsqrhrt.f new file mode 100644 index 000000000..f9580da7b --- /dev/null +++ b/lapack-netlib/SRC/sgetsqrhrt.f @@ -0,0 +1,349 @@ +*> \brief \b SGETSQRHRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGETSQRHRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, +* $ LWORK, INFO ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGETSQRHRT computes a NB2-sized column blocked QR-factorization +*> of a complex M-by-N matrix A with M >= N, +*> +*> A = Q * R. +*> +*> The routine uses internally a NB1-sized column blocked and MB1-sized +*> row blocked TSQR-factorization and perfors the reconstruction +*> of the Householder vectors from the TSQR output. The routine also +*> converts the R_tsqr factor from the TSQR-factorization output into +*> the R factor that corresponds to the Householder QR-factorization, +*> +*> A = Q_tsqr * R_tsqr = Q * R. +*> +*> The output Q and R factors are stored in the same format as in SGEQRT +*> (Q is in blocked compact WY-representation). See the documentation +*> of SGEQRT for more details on the format. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB1 +*> \verbatim +*> MB1 is INTEGER +*> The row block size to be used in the blocked TSQR. +*> MB1 > N. +*> \endverbatim +*> +*> \param[in] NB1 +*> \verbatim +*> NB1 is INTEGER +*> The column block size to be used in the blocked TSQR. +*> N >= NB1 >= 1. +*> \endverbatim +*> +*> \param[in] NB2 +*> \verbatim +*> NB2 is INTEGER +*> The block size to be used in the blocked QR that is +*> output. NB2 >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> +*> On entry: an M-by-N matrix A. +*> +*> On exit: +*> a) the elements on and above the diagonal +*> of the array contain the N-by-N upper-triangular +*> matrix R corresponding to the Householder QR; +*> b) the elements below the diagonal represent Q by +*> the columns of blocked V (compact WY-representation). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. +*> LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), +*> where +*> NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), +*> NB1LOCAL = MIN(NB1,N). +*> LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, +*> LW1 = NB1LOCAL * N, +*> LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), +*> If LWORK = -1, then a workspace query is assumed. +*> The routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup singleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, + $ LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT, + $ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLATSQR, SORGTSQR_ROW, SORHR_COL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CEILING, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = LWORK.EQ.-1 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB1.LE.N ) THEN + INFO = -3 + ELSE IF( NB1.LT.1 ) THEN + INFO = -4 + ELSE IF( NB2.LT.1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, MIN( NB2, N ) ) ) THEN + INFO = -9 + ELSE +* +* Test the input LWORK for the dimension of the array WORK. +* This workspace is used to store array: +* a) Matrix T and WORK for SLATSQR; +* b) N-by-N upper-triangular factor R_tsqr; +* c) Matrix T and array WORK for SORGTSQR_ROW; +* d) Diagonal D for SORHR_COL. +* + IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE +* +* Set block size for column blocks +* + NB1LOCAL = MIN( NB1, N ) +* + NUM_ALL_ROW_BLOCKS = MAX( 1, + $ CEILING( REAL( M - N ) / REAL( MB1 - N ) ) ) +* +* Length and leading dimension of WORK array to place +* T array in TSQR. +* + LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL + + LDWT = NB1LOCAL +* +* Length of TSQR work array +* + LW1 = NB1LOCAL * N +* +* Length of SORGTSQR_ROW work array. +* + LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ) +* + LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ) +* + IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN + INFO = -11 + END IF +* + END IF + END IF +* +* Handle error in the input parameters and return workspace query. +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETSQRHRT', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + WORK( 1 ) = REAL( LWORKOPT ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + WORK( 1 ) = REAL( LWORKOPT ) + RETURN + END IF +* + NB2LOCAL = MIN( NB2, N ) +* +* +* (1) Perform TSQR-factorization of the M-by-N matrix A. +* + CALL SLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT, + $ WORK(LWT+1), LW1, IINFO ) +* +* (2) Copy the factor R_tsqr stored in the upper-triangular part +* of A into the square matrix in the work array +* WORK(LWT+1:LWT+N*N) column-by-column. +* + DO J = 1, N + CALL SCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 ) + END DO +* +* (3) Generate a M-by-N matrix Q with orthonormal columns from +* the result stored below the diagonal in the array A in place. +* + + CALL SORGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT, + $ WORK( LWT+N*N+1 ), LW2, IINFO ) +* +* (4) Perform the reconstruction of Householder vectors from +* the matrix Q (stored in A) in place. +* + CALL SORHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT, + $ WORK( LWT+N*N+1 ), IINFO ) +* +* (5) Copy the factor R_tsqr stored in the square matrix in the +* work array WORK(LWT+1:LWT+N*N) into the upper-triangular +* part of A. +* +* (6) Compute from R_tsqr the factor R_hr corresponding to +* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. +* This multiplication by the sign matrix S on the left means +* changing the sign of I-th row of the matrix R_tsqr according +* to sign of the I-th diagonal element DIAG(I) of the matrix S. +* DIAG is stored in WORK( LWT+N*N+1 ) from the SORHR_COL output. +* +* (5) and (6) can be combined in a single loop, so the rows in A +* are accessed only once. +* + DO I = 1, N + IF( WORK( LWT+N*N+I ).EQ.-ONE ) THEN + DO J = I, N + A( I, J ) = -ONE * WORK( LWT+N*(J-1)+I ) + END DO + ELSE + CALL SCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA ) + END IF + END DO +* + WORK( 1 ) = REAL( LWORKOPT ) + RETURN +* +* End of SGETSQRHRT +* + END \ No newline at end of file diff --git a/lapack-netlib/SRC/slarfb_gett.f b/lapack-netlib/SRC/slarfb_gett.f new file mode 100644 index 000000000..7719f2965 --- /dev/null +++ b/lapack-netlib/SRC/slarfb_gett.f @@ -0,0 +1,596 @@ +*> \brief \b SLARFB_GETT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFB_GETT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB, +* $ WORK, LDWORK ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER IDENT +* INTEGER K, LDA, LDB, LDT, LDWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ WORK( LDWORK, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFB_GETT applies a real Householder block reflector H from the +*> left to a real (K+M)-by-N "triangular-pentagonal" matrix +*> composed of two block matrices: an upper trapezoidal K-by-N matrix A +*> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored +*> in the array B. The block reflector H is stored in a compact +*> WY-representation, where the elementary reflectors are in the +*> arrays A, B and T. See Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IDENT +*> \verbatim +*> IDENT is CHARACTER*1 +*> If IDENT = not 'I', or not 'i', then V1 is unit +*> lower-triangular and stored in the left K-by-K block of +*> the input matrix A, +*> If IDENT = 'I' or 'i', then V1 is an identity matrix and +*> not stored. +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number or rows of the matrix A. +*> K is also order of the matrix T, i.e. the number of +*> elementary reflectors whose product defines the block +*> reflector. 0 <= K <= N. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The upper-triangular K-by-K matrix T in the representation +*> of the block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> +*> On entry: +*> a) In the K-by-N upper-trapezoidal part A: input matrix A. +*> b) In the columns below the diagonal: columns of V1 +*> (ones are not stored on the diagonal). +*> +*> On exit: +*> A is overwritten by rectangular K-by-N product H*A. +*> +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> +*> On entry: +*> a) In the M-by-(N-K) right block: input matrix B. +*> b) In the M-by-N left block: columns of V2. +*> +*> On exit: +*> B is overwritten by rectangular M-by-N product H*B. +*> +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, +*> dimension (LDWORK,max(K,N-K)) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. LDWORK>=max(1,K). +*> +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup singleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> (1) Description of the Algebraic Operation. +*> +*> The matrix A is a K-by-N matrix composed of two column block +*> matrices, A1, which is K-by-K, and A2, which is K-by-(N-K): +*> A = ( A1, A2 ). +*> The matrix B is an M-by-N matrix composed of two column block +*> matrices, B1, which is M-by-K, and B2, which is M-by-(N-K): +*> B = ( B1, B2 ). +*> +*> Perform the operation: +*> +*> ( A_out ) := H * ( A_in ) = ( I - V * T * V**T ) * ( A_in ) = +*> ( B_out ) ( B_in ) ( B_in ) +*> = ( I - ( V1 ) * T * ( V1**T, V2**T ) ) * ( A_in ) +*> ( V2 ) ( B_in ) +*> On input: +*> +*> a) ( A_in ) consists of two block columns: +*> ( B_in ) +*> +*> ( A_in ) = (( A1_in ) ( A2_in )) = (( A1_in ) ( A2_in )) +*> ( B_in ) (( B1_in ) ( B2_in )) (( 0 ) ( B2_in )), +*> +*> where the column blocks are: +*> +*> ( A1_in ) is a K-by-K upper-triangular matrix stored in the +*> upper triangular part of the array A(1:K,1:K). +*> ( B1_in ) is an M-by-K rectangular ZERO matrix and not stored. +*> +*> ( A2_in ) is a K-by-(N-K) rectangular matrix stored +*> in the array A(1:K,K+1:N). +*> ( B2_in ) is an M-by-(N-K) rectangular matrix stored +*> in the array B(1:M,K+1:N). +*> +*> b) V = ( V1 ) +*> ( V2 ) +*> +*> where: +*> 1) if IDENT == 'I',V1 is a K-by-K identity matrix, not stored; +*> 2) if IDENT != 'I',V1 is a K-by-K unit lower-triangular matrix, +*> stored in the lower-triangular part of the array +*> A(1:K,1:K) (ones are not stored), +*> and V2 is an M-by-K rectangular stored the array B(1:M,1:K), +*> (because on input B1_in is a rectangular zero +*> matrix that is not stored and the space is +*> used to store V2). +*> +*> c) T is a K-by-K upper-triangular matrix stored +*> in the array T(1:K,1:K). +*> +*> On output: +*> +*> a) ( A_out ) consists of two block columns: +*> ( B_out ) +*> +*> ( A_out ) = (( A1_out ) ( A2_out )) +*> ( B_out ) (( B1_out ) ( B2_out )), +*> +*> where the column blocks are: +*> +*> ( A1_out ) is a K-by-K square matrix, or a K-by-K +*> upper-triangular matrix, if V1 is an +*> identity matrix. AiOut is stored in +*> the array A(1:K,1:K). +*> ( B1_out ) is an M-by-K rectangular matrix stored +*> in the array B(1:M,K:N). +*> +*> ( A2_out ) is a K-by-(N-K) rectangular matrix stored +*> in the array A(1:K,K+1:N). +*> ( B2_out ) is an M-by-(N-K) rectangular matrix stored +*> in the array B(1:M,K+1:N). +*> +*> +*> The operation above can be represented as the same operation +*> on each block column: +*> +*> ( A1_out ) := H * ( A1_in ) = ( I - V * T * V**T ) * ( A1_in ) +*> ( B1_out ) ( 0 ) ( 0 ) +*> +*> ( A2_out ) := H * ( A2_in ) = ( I - V * T * V**T ) * ( A2_in ) +*> ( B2_out ) ( B2_in ) ( B2_in ) +*> +*> If IDENT != 'I': +*> +*> The computation for column block 1: +*> +*> A1_out: = A1_in - V1*T*(V1**T)*A1_in +*> +*> B1_out: = - V2*T*(V1**T)*A1_in +*> +*> The computation for column block 2, which exists if N > K: +*> +*> A2_out: = A2_in - V1*T*( (V1**T)*A2_in + (V2**T)*B2_in ) +*> +*> B2_out: = B2_in - V2*T*( (V1**T)*A2_in + (V2**T)*B2_in ) +*> +*> If IDENT == 'I': +*> +*> The operation for column block 1: +*> +*> A1_out: = A1_in - V1*T**A1_in +*> +*> B1_out: = - V2*T**A1_in +*> +*> The computation for column block 2, which exists if N > K: +*> +*> A2_out: = A2_in - T*( A2_in + (V2**T)*B2_in ) +*> +*> B2_out: = B2_in - V2*T*( A2_in + (V2**T)*B2_in ) +*> +*> (2) Description of the Algorithmic Computation. +*> +*> In the first step, we compute column block 2, i.e. A2 and B2. +*> Here, we need to use the K-by-(N-K) rectangular workspace +*> matrix W2 that is of the same size as the matrix A2. +*> W2 is stored in the array WORK(1:K,1:(N-K)). +*> +*> In the second step, we compute column block 1, i.e. A1 and B1. +*> Here, we need to use the K-by-K square workspace matrix W1 +*> that is of the same size as the as the matrix A1. +*> W1 is stored in the array WORK(1:K,1:K). +*> +*> NOTE: Hence, in this routine, we need the workspace array WORK +*> only of size WORK(1:K,1:max(K,N-K)) so it can hold both W2 from +*> the first step and W1 from the second step. +*> +*> Case (A), when V1 is unit lower-triangular, i.e. IDENT != 'I', +*> more computations than in the Case (B). +*> +*> if( IDENT != 'I' ) then +*> if ( N > K ) then +*> (First Step - column block 2) +*> col2_(1) W2: = A2 +*> col2_(2) W2: = (V1**T) * W2 = (unit_lower_tr_of_(A1)**T) * W2 +*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2 +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2 +*> col2_(7) A2: = A2 - W2 +*> else +*> (Second Step - column block 1) +*> col1_(1) W1: = A1 +*> col1_(2) W1: = (V1**T) * W1 = (unit_lower_tr_of_(A1)**T) * W1 +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1 +*> col1_(6) square A1: = A1 - W1 +*> end if +*> end if +*> +*> Case (B), when V1 is an identity matrix, i.e. IDENT == 'I', +*> less computations than in the Case (A) +*> +*> if( IDENT == 'I' ) then +*> if ( N > K ) then +*> (First Step - column block 2) +*> col2_(1) W2: = A2 +*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2 +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> col2_(7) A2: = A2 - W2 +*> else +*> (Second Step - column block 1) +*> col1_(1) W1: = A1 +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> col1_(6) upper-triangular_of_(A1): = A1 - W1 +*> end if +*> end if +*> +*> Combine these cases (A) and (B) together, this is the resulting +*> algorithm: +*> +*> if ( N > K ) then +*> +*> (First Step - column block 2) +*> +*> col2_(1) W2: = A2 +*> if( IDENT != 'I' ) then +*> col2_(2) W2: = (V1**T) * W2 +*> = (unit_lower_tr_of_(A1)**T) * W2 +*> end if +*> col2_(3) W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2] +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> if( IDENT != 'I' ) then +*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2 +*> end if +*> col2_(7) A2: = A2 - W2 +*> +*> else +*> +*> (Second Step - column block 1) +*> +*> col1_(1) W1: = A1 +*> if( IDENT != 'I' ) then +*> col1_(2) W1: = (V1**T) * W1 +*> = (unit_lower_tr_of_(A1)**T) * W1 +*> end if +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> if( IDENT != 'I' ) then +*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1 +*> col1_(6_a) below_diag_of_(A1): = - below_diag_of_(W1) +*> end if +*> col1_(6_b) up_tr_of_(A1): = up_tr_of_(A1) - up_tr_of_(W1) +*> +*> end if +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB, + $ WORK, LDWORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER IDENT + INTEGER K, LDA, LDB, LDT, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTIDENT + INTEGER I, J +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, STRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LT.0 .OR. N.LE.0 .OR. K.EQ.0 .OR. K.GT.N ) + $ RETURN +* + LNOTIDENT = .NOT.LSAME( IDENT, 'I' ) +* +* ------------------------------------------------------------------ +* +* First Step. Computation of the Column Block 2: +* +* ( A2 ) := H * ( A2 ) +* ( B2 ) ( B2 ) +* +* ------------------------------------------------------------------ +* + IF( N.GT.K ) THEN +* +* col2_(1) Compute W2: = A2. Therefore, copy A2 = A(1:K, K+1:N) +* into W2=WORK(1:K, 1:N-K) column-by-column. +* + DO J = 1, N-K + CALL SCOPY( K, A( 1, K+J ), 1, WORK( 1, J ), 1 ) + END DO + + IF( LNOTIDENT ) THEN +* +* col2_(2) Compute W2: = (V1**T) * W2 = (A1**T) * W2, +* V1 is not an identy matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored). +* +* + CALL STRMM( 'L', 'L', 'T', 'U', K, N-K, ONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col2_(3) Compute W2: = W2 + (V2**T) * B2 = W2 + (B1**T) * B2 +* V2 stored in B1. +* + IF( M.GT.0 ) THEN + CALL SGEMM( 'T', 'N', K, N-K, M, ONE, B, LDB, + $ B( 1, K+1 ), LDB, ONE, WORK, LDWORK ) + END IF +* +* col2_(4) Compute W2: = T * W2, +* T is upper-triangular. +* + CALL STRMM( 'L', 'U', 'N', 'N', K, N-K, ONE, T, LDT, + $ WORK, LDWORK ) +* +* col2_(5) Compute B2: = B2 - V2 * W2 = B2 - B1 * W2, +* V2 stored in B1. +* + IF( M.GT.0 ) THEN + CALL SGEMM( 'N', 'N', M, N-K, K, -ONE, B, LDB, + $ WORK, LDWORK, ONE, B( 1, K+1 ), LDB ) + END IF +* + IF( LNOTIDENT ) THEN +* +* col2_(6) Compute W2: = V1 * W2 = A1 * W2, +* V1 is not an identity matrix, but unit lower-triangular, +* V1 stored in A1 (diagonal ones are not stored). +* + CALL STRMM( 'L', 'L', 'N', 'U', K, N-K, ONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col2_(7) Compute A2: = A2 - W2 = +* = A(1:K, K+1:N-K) - WORK(1:K, 1:N-K), +* column-by-column. +* + DO J = 1, N-K + DO I = 1, K + A( I, K+J ) = A( I, K+J ) - WORK( I, J ) + END DO + END DO +* + END IF +* +* ------------------------------------------------------------------ +* +* Second Step. Computation of the Column Block 1: +* +* ( A1 ) := H * ( A1 ) +* ( B1 ) ( 0 ) +* +* ------------------------------------------------------------------ +* +* col1_(1) Compute W1: = A1. Copy the upper-triangular +* A1 = A(1:K, 1:K) into the upper-triangular +* W1 = WORK(1:K, 1:K) column-by-column. +* + DO J = 1, K + CALL SCOPY( J, A( 1, J ), 1, WORK( 1, J ), 1 ) + END DO +* +* Set the subdiagonal elements of W1 to zero column-by-column. +* + DO J = 1, K - 1 + DO I = J + 1, K + WORK( I, J ) = ZERO + END DO + END DO +* + IF( LNOTIDENT ) THEN +* +* col1_(2) Compute W1: = (V1**T) * W1 = (A1**T) * W1, +* V1 is not an identity matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored), +* W1 is upper-triangular with zeroes below the diagonal. +* + CALL STRMM( 'L', 'L', 'T', 'U', K, K, ONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col1_(3) Compute W1: = T * W1, +* T is upper-triangular, +* W1 is upper-triangular with zeroes below the diagonal. +* + CALL STRMM( 'L', 'U', 'N', 'N', K, K, ONE, T, LDT, + $ WORK, LDWORK ) +* +* col1_(4) Compute B1: = - V2 * W1 = - B1 * W1, +* V2 = B1, W1 is upper-triangular with zeroes below the diagonal. +* + IF( M.GT.0 ) THEN + CALL STRMM( 'R', 'U', 'N', 'N', M, K, -ONE, WORK, LDWORK, + $ B, LDB ) + END IF +* + IF( LNOTIDENT ) THEN +* +* col1_(5) Compute W1: = V1 * W1 = A1 * W1, +* V1 is not an identity matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored), +* W1 is upper-triangular on input with zeroes below the diagonal, +* and square on output. +* + CALL STRMM( 'L', 'L', 'N', 'U', K, K, ONE, A, LDA, + $ WORK, LDWORK ) +* +* col1_(6) Compute A1: = A1 - W1 = A(1:K, 1:K) - WORK(1:K, 1:K) +* column-by-column. A1 is upper-triangular on input. +* If IDENT, A1 is square on output, and W1 is square, +* if NOT IDENT, A1 is upper-triangular on output, +* W1 is upper-triangular. +* +* col1_(6)_a Compute elements of A1 below the diagonal. +* + DO J = 1, K - 1 + DO I = J + 1, K + A( I, J ) = - WORK( I, J ) + END DO + END DO +* + END IF +* +* col1_(6)_b Compute elements of A1 on and above the diagonal. +* + DO J = 1, K + DO I = 1, J + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + RETURN +* +* End of SLARFB_GETT +* + END diff --git a/lapack-netlib/SRC/sorgtsqr_row.f b/lapack-netlib/SRC/sorgtsqr_row.f new file mode 100644 index 000000000..d2a2150cd --- /dev/null +++ b/lapack-netlib/SRC/sorgtsqr_row.f @@ -0,0 +1,379 @@ +*> \brief \b SORGTSQR_ROW +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGTSQR_ROW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK, +* $ LWORK, INFO ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGTSQR_ROW generates an M-by-N real matrix Q_out with +*> orthonormal columns from the output of SLATSQR. These N orthonormal +*> columns are the first N columns of a product of complex unitary +*> matrices Q(k)_in of order M, which are returned by SLATSQR in +*> a special format. +*> +*> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). +*> +*> The input matrices Q(k)_in are stored in row and column blocks in A. +*> See the documentation of SLATSQR for more details on the format of +*> Q(k)_in, where each Q(k)_in is represented by block Householder +*> transformations. This routine calls an auxiliary routine SLARFB_GETT, +*> where the computation is performed on each individual block. The +*> algorithm first sweeps NB-sized column blocks from the right to left +*> starting in the bottom row block and continues to the top row block +*> (hence _ROW in the routine name). This sweep is in reverse order of +*> the order in which SLATSQR generates the output blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size used by SLATSQR to return +*> arrays A and T. MB > N. +*> (Note that if MB > M, then M is used instead of MB +*> as the row block size). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size used by SLATSQR to return +*> arrays A and T. NB >= 1. +*> (Note that if NB > N, then N is used instead of NB +*> as the column block size). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> +*> On entry: +*> +*> The elements on and above the diagonal are not used as +*> input. The elements below the diagonal represent the unit +*> lower-trapezoidal blocked matrix V computed by SLATSQR +*> that defines the input matrices Q_in(k) (ones on the +*> diagonal are not stored). See SLATSQR for more details. +*> +*> On exit: +*> +*> The array A contains an M-by-N orthonormal matrix Q_out, +*> i.e the columns of A are orthogonal unit vectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, +*> dimension (LDT, N * NIRB) +*> where NIRB = Number_of_input_row_blocks +*> = MAX( 1, CEIL((M-N)/(MB-N)) ) +*> Let NICB = Number_of_input_col_blocks +*> = CEIL(N/NB) +*> +*> The upper-triangular block reflectors used to define the +*> input matrices Q_in(k), k=(1:NIRB*NICB). The block +*> reflectors are stored in compact form in NIRB block +*> reflector sequences. Each of the NIRB block reflector +*> sequences is stored in a larger NB-by-N column block of T +*> and consists of NICB smaller NB-by-NB upper-triangular +*> column blocks. See SLATSQR for more details on the format +*> of T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. +*> LDT >= max(1,min(NB,N)). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. +*> LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)), +*> where NBLOCAL=MIN(NB,N). +*> If LWORK = -1, then a workspace query is assumed. +*> The routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +*> +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup sigleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SORGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK, + $ LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM, + $ LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB, + $ KB, KB_LAST, KNB, MB1 +* .. +* .. Local Arrays .. + REAL DUMMY( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SLARFB_GETT, SLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + LQUERY = LWORK.EQ.-1 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB.LE.N ) THEN + INFO = -3 + ELSE IF( NB.LT.1 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + NBLOCAL = MIN( NB, N ) +* +* Determine the workspace size. +* + IF( INFO.EQ.0 ) THEN + LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) ) + END IF +* +* Handle error in the input parameters and handle the workspace query. +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGTSQR_ROW', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + WORK( 1 ) = REAL( LWORKOPT ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + WORK( 1 ) = REAL( LWORKOPT ) + RETURN + END IF +* +* (0) Set the upper-triangular part of the matrix A to zero and +* its diagonal elements to one. +* + CALL SLASET('U', M, N, ZERO, ONE, A, LDA ) +* +* KB_LAST is the column index of the last column block reflector +* in the matrices T and V. +* + KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1 +* +* +* (1) Bottom-up loop over row blocks of A, except the top row block. +* NOTE: If MB>=M, then the loop is never executed. +* + IF ( MB.LT.M ) THEN +* +* MB2 is the row blocking size for the row blocks before the +* first top row block in the matrix A. IB is the row index for +* the row blocks in the matrix A before the first top row block. +* IB_BOTTOM is the row index for the last bottom row block +* in the matrix A. JB_T is the column index of the corresponding +* column block in the matrix T. +* +* Initialize variables. +* +* NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A +* including the first row block. +* + MB2 = MB - N + M_PLUS_ONE = M + 1 + ITMP = ( M - MB - 1 ) / MB2 + IB_BOTTOM = ITMP * MB2 + MB + 1 + NUM_ALL_ROW_BLOCKS = ITMP + 2 + JB_T = NUM_ALL_ROW_BLOCKS * N + 1 +* + DO IB = IB_BOTTOM, MB+1, -MB2 +* +* Determine the block size IMB for the current row block +* in the matrix A. +* + IMB = MIN( M_PLUS_ONE - IB, MB2 ) +* +* Determine the column index JB_T for the current column block +* in the matrix T. +* + JB_T = JB_T - N +* +* Apply column blocks of H in the row block from right to left. +* +* KB is the column index of the current column block reflector +* in the matrices T and V. +* + DO KB = KB_LAST, 1, -NBLOCAL +* +* Determine the size of the current column block KNB in +* the matrices T and V. +* + KNB = MIN( NBLOCAL, N - KB + 1 ) +* + CALL SLARFB_GETT( 'I', IMB, N-KB+1, KNB, + $ T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA, + $ A( IB, KB ), LDA, WORK, KNB ) +* + END DO +* + END DO +* + END IF +* +* (2) Top row block of A. +* NOTE: If MB>=M, then we have only one row block of A of size M +* and we work on the entire matrix A. +* + MB1 = MIN( MB, M ) +* +* Apply column blocks of H in the top row block from right to left. +* +* KB is the column index of the current block reflector in +* the matrices T and V. +* + DO KB = KB_LAST, 1, -NBLOCAL +* +* Determine the size of the current column block KNB in +* the matrices T and V. +* + KNB = MIN( NBLOCAL, N - KB + 1 ) +* + IF( MB1-KB-KNB+1.EQ.0 ) THEN +* +* In SLARFB_GETT parameters, when M=0, then the matrix B +* does not exist, hence we need to pass a dummy array +* reference DUMMY(1,1) to B with LDDUMMY=1. +* + CALL SLARFB_GETT( 'N', 0, N-KB+1, KNB, + $ T( 1, KB ), LDT, A( KB, KB ), LDA, + $ DUMMY( 1, 1 ), 1, WORK, KNB ) + ELSE + CALL SLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB, + $ T( 1, KB ), LDT, A( KB, KB ), LDA, + $ A( KB+KNB, KB), LDA, WORK, KNB ) + + END IF +* + END DO +* + WORK( 1 ) = REAL( LWORKOPT ) + RETURN +* +* End of SORGTSQR_ROW +* + END diff --git a/lapack-netlib/SRC/zgetsqrhrt.f b/lapack-netlib/SRC/zgetsqrhrt.f new file mode 100644 index 000000000..5f0167937 --- /dev/null +++ b/lapack-netlib/SRC/zgetsqrhrt.f @@ -0,0 +1,349 @@ +*> \brief \b ZGETSQRHRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGETSQRHRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, +* $ LWORK, INFO ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGETSQRHRT computes a NB2-sized column blocked QR-factorization +*> of a complex M-by-N matrix A with M >= N, +*> +*> A = Q * R. +*> +*> The routine uses internally a NB1-sized column blocked and MB1-sized +*> row blocked TSQR-factorization and perfors the reconstruction +*> of the Householder vectors from the TSQR output. The routine also +*> converts the R_tsqr factor from the TSQR-factorization output into +*> the R factor that corresponds to the Householder QR-factorization, +*> +*> A = Q_tsqr * R_tsqr = Q * R. +*> +*> The output Q and R factors are stored in the same format as in ZGEQRT +*> (Q is in blocked compact WY-representation). See the documentation +*> of ZGEQRT for more details on the format. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB1 +*> \verbatim +*> MB1 is INTEGER +*> The row block size to be used in the blocked TSQR. +*> MB1 > N. +*> \endverbatim +*> +*> \param[in] NB1 +*> \verbatim +*> NB1 is INTEGER +*> The column block size to be used in the blocked TSQR. +*> N >= NB1 >= 1. +*> \endverbatim +*> +*> \param[in] NB2 +*> \verbatim +*> NB2 is INTEGER +*> The block size to be used in the blocked QR that is +*> output. NB2 >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> +*> On entry: an M-by-N matrix A. +*> +*> On exit: +*> a) the elements on and above the diagonal +*> of the array contain the N-by-N upper-triangular +*> matrix R corresponding to the Householder QR; +*> b) the elements below the diagonal represent Q by +*> the columns of blocked V (compact WY-representation). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. +*> LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), +*> where +*> NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), +*> NB1LOCAL = MIN(NB1,N). +*> LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, +*> LW1 = NB1LOCAL * N, +*> LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), +*> If LWORK = -1, then a workspace query is assumed. +*> The routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup comlpex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, + $ LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT, + $ NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZLATSQR, ZUNGTSQR_ROW, ZUNHR_COL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CEILING, DBLE, DCMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = LWORK.EQ.-1 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB1.LE.N ) THEN + INFO = -3 + ELSE IF( NB1.LT.1 ) THEN + INFO = -4 + ELSE IF( NB2.LT.1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, MIN( NB2, N ) ) ) THEN + INFO = -9 + ELSE +* +* Test the input LWORK for the dimension of the array WORK. +* This workspace is used to store array: +* a) Matrix T and WORK for ZLATSQR; +* b) N-by-N upper-triangular factor R_tsqr; +* c) Matrix T and array WORK for ZUNGTSQR_ROW; +* d) Diagonal D for ZUNHR_COL. +* + IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE +* +* Set block size for column blocks +* + NB1LOCAL = MIN( NB1, N ) +* + NUM_ALL_ROW_BLOCKS = MAX( 1, + $ CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) ) +* +* Length and leading dimension of WORK array to place +* T array in TSQR. +* + LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL + + LDWT = NB1LOCAL +* +* Length of TSQR work array +* + LW1 = NB1LOCAL * N +* +* Length of ZUNGTSQR_ROW work array. +* + LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ) +* + LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ) +* + IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN + INFO = -11 + END IF +* + END IF + END IF +* +* Handle error in the input parameters and return workspace query. +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETSQRHRT', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + WORK( 1 ) = DCMPLX( LWORKOPT ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + WORK( 1 ) = DCMPLX( LWORKOPT ) + RETURN + END IF +* + NB2LOCAL = MIN( NB2, N ) +* +* +* (1) Perform TSQR-factorization of the M-by-N matrix A. +* + CALL ZLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT, + $ WORK(LWT+1), LW1, IINFO ) +* +* (2) Copy the factor R_tsqr stored in the upper-triangular part +* of A into the square matrix in the work array +* WORK(LWT+1:LWT+N*N) column-by-column. +* + DO J = 1, N + CALL ZCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 ) + END DO +* +* (3) Generate a M-by-N matrix Q with orthonormal columns from +* the result stored below the diagonal in the array A in place. +* + + CALL ZUNGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT, + $ WORK( LWT+N*N+1 ), LW2, IINFO ) +* +* (4) Perform the reconstruction of Householder vectors from +* the matrix Q (stored in A) in place. +* + CALL ZUNHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT, + $ WORK( LWT+N*N+1 ), IINFO ) +* +* (5) Copy the factor R_tsqr stored in the square matrix in the +* work array WORK(LWT+1:LWT+N*N) into the upper-triangular +* part of A. +* +* (6) Compute from R_tsqr the factor R_hr corresponding to +* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. +* This multiplication by the sign matrix S on the left means +* changing the sign of I-th row of the matrix R_tsqr according +* to sign of the I-th diagonal element DIAG(I) of the matrix S. +* DIAG is stored in WORK( LWT+N*N+1 ) from the ZUNHR_COL output. +* +* (5) and (6) can be combined in a single loop, so the rows in A +* are accessed only once. +* + DO I = 1, N + IF( WORK( LWT+N*N+I ).EQ.-CONE ) THEN + DO J = I, N + A( I, J ) = -CONE * WORK( LWT+N*(J-1)+I ) + END DO + ELSE + CALL ZCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA ) + END IF + END DO +* + WORK( 1 ) = DCMPLX( LWORKOPT ) + RETURN +* +* End of ZGETSQRHRT +* + END \ No newline at end of file diff --git a/lapack-netlib/SRC/zlarfb_gett.f b/lapack-netlib/SRC/zlarfb_gett.f new file mode 100644 index 000000000..4a3c4dcf1 --- /dev/null +++ b/lapack-netlib/SRC/zlarfb_gett.f @@ -0,0 +1,597 @@ +*> \brief \b ZLARFB_GETT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFB_GETT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB, +* $ WORK, LDWORK ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER IDENT +* INTEGER K, LDA, LDB, LDT, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ WORK( LDWORK, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFB_GETT applies a complex Householder block reflector H from the +*> left to a complex (K+M)-by-N "triangular-pentagonal" matrix +*> composed of two block matrices: an upper trapezoidal K-by-N matrix A +*> stored in the array A, and a rectangular M-by-(N-K) matrix B, stored +*> in the array B. The block reflector H is stored in a compact +*> WY-representation, where the elementary reflectors are in the +*> arrays A, B and T. See Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IDENT +*> \verbatim +*> IDENT is CHARACTER*1 +*> If IDENT = not 'I', or not 'i', then V1 is unit +*> lower-triangular and stored in the left K-by-K block of +*> the input matrix A, +*> If IDENT = 'I' or 'i', then V1 is an identity matrix and +*> not stored. +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number or rows of the matrix A. +*> K is also order of the matrix T, i.e. the number of +*> elementary reflectors whose product defines the block +*> reflector. 0 <= K <= N. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The upper-triangular K-by-K matrix T in the representation +*> of the block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> +*> On entry: +*> a) In the K-by-N upper-trapezoidal part A: input matrix A. +*> b) In the columns below the diagonal: columns of V1 +*> (ones are not stored on the diagonal). +*> +*> On exit: +*> A is overwritten by rectangular K-by-N product H*A. +*> +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> +*> On entry: +*> a) In the M-by-(N-K) right block: input matrix B. +*> b) In the M-by-N left block: columns of V2. +*> +*> On exit: +*> B is overwritten by rectangular M-by-N product H*B. +*> +*> See Further Details section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, +*> dimension (LDWORK,max(K,N-K)) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. LDWORK>=max(1,K). +*> +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> (1) Description of the Algebraic Operation. +*> +*> The matrix A is a K-by-N matrix composed of two column block +*> matrices, A1, which is K-by-K, and A2, which is K-by-(N-K): +*> A = ( A1, A2 ). +*> The matrix B is an M-by-N matrix composed of two column block +*> matrices, B1, which is M-by-K, and B2, which is M-by-(N-K): +*> B = ( B1, B2 ). +*> +*> Perform the operation: +*> +*> ( A_out ) := H * ( A_in ) = ( I - V * T * V**H ) * ( A_in ) = +*> ( B_out ) ( B_in ) ( B_in ) +*> = ( I - ( V1 ) * T * ( V1**H, V2**H ) ) * ( A_in ) +*> ( V2 ) ( B_in ) +*> On input: +*> +*> a) ( A_in ) consists of two block columns: +*> ( B_in ) +*> +*> ( A_in ) = (( A1_in ) ( A2_in )) = (( A1_in ) ( A2_in )) +*> ( B_in ) (( B1_in ) ( B2_in )) (( 0 ) ( B2_in )), +*> +*> where the column blocks are: +*> +*> ( A1_in ) is a K-by-K upper-triangular matrix stored in the +*> upper triangular part of the array A(1:K,1:K). +*> ( B1_in ) is an M-by-K rectangular ZERO matrix and not stored. +*> +*> ( A2_in ) is a K-by-(N-K) rectangular matrix stored +*> in the array A(1:K,K+1:N). +*> ( B2_in ) is an M-by-(N-K) rectangular matrix stored +*> in the array B(1:M,K+1:N). +*> +*> b) V = ( V1 ) +*> ( V2 ) +*> +*> where: +*> 1) if IDENT == 'I',V1 is a K-by-K identity matrix, not stored; +*> 2) if IDENT != 'I',V1 is a K-by-K unit lower-triangular matrix, +*> stored in the lower-triangular part of the array +*> A(1:K,1:K) (ones are not stored), +*> and V2 is an M-by-K rectangular stored the array B(1:M,1:K), +*> (because on input B1_in is a rectangular zero +*> matrix that is not stored and the space is +*> used to store V2). +*> +*> c) T is a K-by-K upper-triangular matrix stored +*> in the array T(1:K,1:K). +*> +*> On output: +*> +*> a) ( A_out ) consists of two block columns: +*> ( B_out ) +*> +*> ( A_out ) = (( A1_out ) ( A2_out )) +*> ( B_out ) (( B1_out ) ( B2_out )), +*> +*> where the column blocks are: +*> +*> ( A1_out ) is a K-by-K square matrix, or a K-by-K +*> upper-triangular matrix, if V1 is an +*> identity matrix. AiOut is stored in +*> the array A(1:K,1:K). +*> ( B1_out ) is an M-by-K rectangular matrix stored +*> in the array B(1:M,K:N). +*> +*> ( A2_out ) is a K-by-(N-K) rectangular matrix stored +*> in the array A(1:K,K+1:N). +*> ( B2_out ) is an M-by-(N-K) rectangular matrix stored +*> in the array B(1:M,K+1:N). +*> +*> +*> The operation above can be represented as the same operation +*> on each block column: +*> +*> ( A1_out ) := H * ( A1_in ) = ( I - V * T * V**H ) * ( A1_in ) +*> ( B1_out ) ( 0 ) ( 0 ) +*> +*> ( A2_out ) := H * ( A2_in ) = ( I - V * T * V**H ) * ( A2_in ) +*> ( B2_out ) ( B2_in ) ( B2_in ) +*> +*> If IDENT != 'I': +*> +*> The computation for column block 1: +*> +*> A1_out: = A1_in - V1*T*(V1**H)*A1_in +*> +*> B1_out: = - V2*T*(V1**H)*A1_in +*> +*> The computation for column block 2, which exists if N > K: +*> +*> A2_out: = A2_in - V1*T*( (V1**H)*A2_in + (V2**H)*B2_in ) +*> +*> B2_out: = B2_in - V2*T*( (V1**H)*A2_in + (V2**H)*B2_in ) +*> +*> If IDENT == 'I': +*> +*> The operation for column block 1: +*> +*> A1_out: = A1_in - V1*T*A1_in +*> +*> B1_out: = - V2*T*A1_in +*> +*> The computation for column block 2, which exists if N > K: +*> +*> A2_out: = A2_in - T*( A2_in + (V2**H)*B2_in ) +*> +*> B2_out: = B2_in - V2*T*( A2_in + (V2**H)*B2_in ) +*> +*> (2) Description of the Algorithmic Computation. +*> +*> In the first step, we compute column block 2, i.e. A2 and B2. +*> Here, we need to use the K-by-(N-K) rectangular workspace +*> matrix W2 that is of the same size as the matrix A2. +*> W2 is stored in the array WORK(1:K,1:(N-K)). +*> +*> In the second step, we compute column block 1, i.e. A1 and B1. +*> Here, we need to use the K-by-K square workspace matrix W1 +*> that is of the same size as the as the matrix A1. +*> W1 is stored in the array WORK(1:K,1:K). +*> +*> NOTE: Hence, in this routine, we need the workspace array WORK +*> only of size WORK(1:K,1:max(K,N-K)) so it can hold both W2 from +*> the first step and W1 from the second step. +*> +*> Case (A), when V1 is unit lower-triangular, i.e. IDENT != 'I', +*> more computations than in the Case (B). +*> +*> if( IDENT != 'I' ) then +*> if ( N > K ) then +*> (First Step - column block 2) +*> col2_(1) W2: = A2 +*> col2_(2) W2: = (V1**H) * W2 = (unit_lower_tr_of_(A1)**H) * W2 +*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2 +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2 +*> col2_(7) A2: = A2 - W2 +*> else +*> (Second Step - column block 1) +*> col1_(1) W1: = A1 +*> col1_(2) W1: = (V1**H) * W1 = (unit_lower_tr_of_(A1)**H) * W1 +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1 +*> col1_(6) square A1: = A1 - W1 +*> end if +*> end if +*> +*> Case (B), when V1 is an identity matrix, i.e. IDENT == 'I', +*> less computations than in the Case (A) +*> +*> if( IDENT == 'I' ) then +*> if ( N > K ) then +*> (First Step - column block 2) +*> col2_(1) W2: = A2 +*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2 +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> col2_(7) A2: = A2 - W2 +*> else +*> (Second Step - column block 1) +*> col1_(1) W1: = A1 +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> col1_(6) upper-triangular_of_(A1): = A1 - W1 +*> end if +*> end if +*> +*> Combine these cases (A) and (B) together, this is the resulting +*> algorithm: +*> +*> if ( N > K ) then +*> +*> (First Step - column block 2) +*> +*> col2_(1) W2: = A2 +*> if( IDENT != 'I' ) then +*> col2_(2) W2: = (V1**H) * W2 +*> = (unit_lower_tr_of_(A1)**H) * W2 +*> end if +*> col2_(3) W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2] +*> col2_(4) W2: = T * W2 +*> col2_(5) B2: = B2 - V2 * W2 = B2 - B1 * W2 +*> if( IDENT != 'I' ) then +*> col2_(6) W2: = V1 * W2 = unit_lower_tr_of_(A1) * W2 +*> end if +*> col2_(7) A2: = A2 - W2 +*> +*> else +*> +*> (Second Step - column block 1) +*> +*> col1_(1) W1: = A1 +*> if( IDENT != 'I' ) then +*> col1_(2) W1: = (V1**H) * W1 +*> = (unit_lower_tr_of_(A1)**H) * W1 +*> end if +*> col1_(3) W1: = T * W1 +*> col1_(4) B1: = - V2 * W1 = - B1 * W1 +*> if( IDENT != 'I' ) then +*> col1_(5) square W1: = V1 * W1 = unit_lower_tr_of_(A1) * W1 +*> col1_(6_a) below_diag_of_(A1): = - below_diag_of_(W1) +*> end if +*> col1_(6_b) up_tr_of_(A1): = up_tr_of_(A1) - up_tr_of_(W1) +*> +*> end if +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFB_GETT( IDENT, M, N, K, T, LDT, A, LDA, B, LDB, + $ WORK, LDWORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER IDENT + INTEGER K, LDA, LDB, LDT, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LNOTIDENT + INTEGER I, J +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LT.0 .OR. N.LE.0 .OR. K.EQ.0 .OR. K.GT.N ) + $ RETURN +* + LNOTIDENT = .NOT.LSAME( IDENT, 'I' ) +* +* ------------------------------------------------------------------ +* +* First Step. Computation of the Column Block 2: +* +* ( A2 ) := H * ( A2 ) +* ( B2 ) ( B2 ) +* +* ------------------------------------------------------------------ +* + IF( N.GT.K ) THEN +* +* col2_(1) Compute W2: = A2. Therefore, copy A2 = A(1:K, K+1:N) +* into W2=WORK(1:K, 1:N-K) column-by-column. +* + DO J = 1, N-K + CALL ZCOPY( K, A( 1, K+J ), 1, WORK( 1, J ), 1 ) + END DO + + IF( LNOTIDENT ) THEN +* +* col2_(2) Compute W2: = (V1**H) * W2 = (A1**H) * W2, +* V1 is not an identy matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored). +* +* + CALL ZTRMM( 'L', 'L', 'C', 'U', K, N-K, CONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col2_(3) Compute W2: = W2 + (V2**H) * B2 = W2 + (B1**H) * B2 +* V2 stored in B1. +* + IF( M.GT.0 ) THEN + CALL ZGEMM( 'C', 'N', K, N-K, M, CONE, B, LDB, + $ B( 1, K+1 ), LDB, CONE, WORK, LDWORK ) + END IF +* +* col2_(4) Compute W2: = T * W2, +* T is upper-triangular. +* + CALL ZTRMM( 'L', 'U', 'N', 'N', K, N-K, CONE, T, LDT, + $ WORK, LDWORK ) +* +* col2_(5) Compute B2: = B2 - V2 * W2 = B2 - B1 * W2, +* V2 stored in B1. +* + IF( M.GT.0 ) THEN + CALL ZGEMM( 'N', 'N', M, N-K, K, -CONE, B, LDB, + $ WORK, LDWORK, CONE, B( 1, K+1 ), LDB ) + END IF +* + IF( LNOTIDENT ) THEN +* +* col2_(6) Compute W2: = V1 * W2 = A1 * W2, +* V1 is not an identity matrix, but unit lower-triangular, +* V1 stored in A1 (diagonal ones are not stored). +* + CALL ZTRMM( 'L', 'L', 'N', 'U', K, N-K, CONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col2_(7) Compute A2: = A2 - W2 = +* = A(1:K, K+1:N-K) - WORK(1:K, 1:N-K), +* column-by-column. +* + DO J = 1, N-K + DO I = 1, K + A( I, K+J ) = A( I, K+J ) - WORK( I, J ) + END DO + END DO +* + END IF +* +* ------------------------------------------------------------------ +* +* Second Step. Computation of the Column Block 1: +* +* ( A1 ) := H * ( A1 ) +* ( B1 ) ( 0 ) +* +* ------------------------------------------------------------------ +* +* col1_(1) Compute W1: = A1. Copy the upper-triangular +* A1 = A(1:K, 1:K) into the upper-triangular +* W1 = WORK(1:K, 1:K) column-by-column. +* + DO J = 1, K + CALL ZCOPY( J, A( 1, J ), 1, WORK( 1, J ), 1 ) + END DO +* +* Set the subdiagonal elements of W1 to zero column-by-column. +* + DO J = 1, K - 1 + DO I = J + 1, K + WORK( I, J ) = CZERO + END DO + END DO +* + IF( LNOTIDENT ) THEN +* +* col1_(2) Compute W1: = (V1**H) * W1 = (A1**H) * W1, +* V1 is not an identity matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored), +* W1 is upper-triangular with zeroes below the diagonal. +* + CALL ZTRMM( 'L', 'L', 'C', 'U', K, K, CONE, A, LDA, + $ WORK, LDWORK ) + END IF +* +* col1_(3) Compute W1: = T * W1, +* T is upper-triangular, +* W1 is upper-triangular with zeroes below the diagonal. +* + CALL ZTRMM( 'L', 'U', 'N', 'N', K, K, CONE, T, LDT, + $ WORK, LDWORK ) +* +* col1_(4) Compute B1: = - V2 * W1 = - B1 * W1, +* V2 = B1, W1 is upper-triangular with zeroes below the diagonal. +* + IF( M.GT.0 ) THEN + CALL ZTRMM( 'R', 'U', 'N', 'N', M, K, -CONE, WORK, LDWORK, + $ B, LDB ) + END IF +* + IF( LNOTIDENT ) THEN +* +* col1_(5) Compute W1: = V1 * W1 = A1 * W1, +* V1 is not an identity matrix, but unit lower-triangular +* V1 stored in A1 (diagonal ones are not stored), +* W1 is upper-triangular on input with zeroes below the diagonal, +* and square on output. +* + CALL ZTRMM( 'L', 'L', 'N', 'U', K, K, CONE, A, LDA, + $ WORK, LDWORK ) +* +* col1_(6) Compute A1: = A1 - W1 = A(1:K, 1:K) - WORK(1:K, 1:K) +* column-by-column. A1 is upper-triangular on input. +* If IDENT, A1 is square on output, and W1 is square, +* if NOT IDENT, A1 is upper-triangular on output, +* W1 is upper-triangular. +* +* col1_(6)_a Compute elements of A1 below the diagonal. +* + DO J = 1, K - 1 + DO I = J + 1, K + A( I, J ) = - WORK( I, J ) + END DO + END DO +* + END IF +* +* col1_(6)_b Compute elements of A1 on and above the diagonal. +* + DO J = 1, K + DO I = 1, J + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + RETURN +* +* End of ZLARFB_GETT +* + END diff --git a/lapack-netlib/SRC/zungtsqr_row.f b/lapack-netlib/SRC/zungtsqr_row.f new file mode 100644 index 000000000..0d32ad6ce --- /dev/null +++ b/lapack-netlib/SRC/zungtsqr_row.f @@ -0,0 +1,380 @@ +*> \brief \b ZUNGTSQR_ROW +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGTSQR_ROW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK, +* $ LWORK, INFO ) +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGTSQR_ROW generates an M-by-N complex matrix Q_out with +*> orthonormal columns from the output of ZLATSQR. These N orthonormal +*> columns are the first N columns of a product of complex unitary +*> matrices Q(k)_in of order M, which are returned by ZLATSQR in +*> a special format. +*> +*> Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ). +*> +*> The input matrices Q(k)_in are stored in row and column blocks in A. +*> See the documentation of ZLATSQR for more details on the format of +*> Q(k)_in, where each Q(k)_in is represented by block Householder +*> transformations. This routine calls an auxiliary routine ZLARFB_GETT, +*> where the computation is performed on each individual block. The +*> algorithm first sweeps NB-sized column blocks from the right to left +*> starting in the bottom row block and continues to the top row block +*> (hence _ROW in the routine name). This sweep is in reverse order of +*> the order in which ZLATSQR generates the output blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size used by ZLATSQR to return +*> arrays A and T. MB > N. +*> (Note that if MB > M, then M is used instead of MB +*> as the row block size). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size used by ZLATSQR to return +*> arrays A and T. NB >= 1. +*> (Note that if NB > N, then N is used instead of NB +*> as the column block size). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> +*> On entry: +*> +*> The elements on and above the diagonal are not used as +*> input. The elements below the diagonal represent the unit +*> lower-trapezoidal blocked matrix V computed by ZLATSQR +*> that defines the input matrices Q_in(k) (ones on the +*> diagonal are not stored). See ZLATSQR for more details. +*> +*> On exit: +*> +*> The array A contains an M-by-N orthonormal matrix Q_out, +*> i.e the columns of A are orthogonal unit vectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, +*> dimension (LDT, N * NIRB) +*> where NIRB = Number_of_input_row_blocks +*> = MAX( 1, CEIL((M-N)/(MB-N)) ) +*> Let NICB = Number_of_input_col_blocks +*> = CEIL(N/NB) +*> +*> The upper-triangular block reflectors used to define the +*> input matrices Q_in(k), k=(1:NIRB*NICB). The block +*> reflectors are stored in compact form in NIRB block +*> reflector sequences. Each of the NIRB block reflector +*> sequences is stored in a larger NB-by-N column block of T +*> and consists of NICB smaller NB-by-NB upper-triangular +*> column blocks. See ZLATSQR for more details on the format +*> of T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. +*> LDT >= max(1,min(NB,N)). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. +*> LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)), +*> where NBLOCAL=MIN(NB,N). +*> If LWORK = -1, then a workspace query is assumed. +*> The routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +*> +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2020, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK, + $ LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, LWORK, M, N, MB, NB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM, + $ LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB, + $ KB, KB_LAST, KNB, MB1 +* .. +* .. Local Arrays .. + COMPLEX*16 DUMMY( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL ZLARFB_GETT, ZLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + LQUERY = LWORK.EQ.-1 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB.LE.N ) THEN + INFO = -3 + ELSE IF( NB.LT.1 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + NBLOCAL = MIN( NB, N ) +* +* Determine the workspace size. +* + IF( INFO.EQ.0 ) THEN + LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) ) + END IF +* +* Handle error in the input parameters and handle the workspace query. +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGTSQR_ROW', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + WORK( 1 ) = DCMPLX( LWORKOPT ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + WORK( 1 ) = DCMPLX( LWORKOPT ) + RETURN + END IF +* +* (0) Set the upper-triangular part of the matrix A to zero and +* its diagonal elements to one. +* + CALL ZLASET('U', M, N, CZERO, CONE, A, LDA ) +* +* KB_LAST is the column index of the last column block reflector +* in the matrices T and V. +* + KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1 +* +* +* (1) Bottom-up loop over row blocks of A, except the top row block. +* NOTE: If MB>=M, then the loop is never executed. +* + IF ( MB.LT.M ) THEN +* +* MB2 is the row blocking size for the row blocks before the +* first top row block in the matrix A. IB is the row index for +* the row blocks in the matrix A before the first top row block. +* IB_BOTTOM is the row index for the last bottom row block +* in the matrix A. JB_T is the column index of the corresponding +* column block in the matrix T. +* +* Initialize variables. +* +* NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A +* including the first row block. +* + MB2 = MB - N + M_PLUS_ONE = M + 1 + ITMP = ( M - MB - 1 ) / MB2 + IB_BOTTOM = ITMP * MB2 + MB + 1 + NUM_ALL_ROW_BLOCKS = ITMP + 2 + JB_T = NUM_ALL_ROW_BLOCKS * N + 1 +* + DO IB = IB_BOTTOM, MB+1, -MB2 +* +* Determine the block size IMB for the current row block +* in the matrix A. +* + IMB = MIN( M_PLUS_ONE - IB, MB2 ) +* +* Determine the column index JB_T for the current column block +* in the matrix T. +* + JB_T = JB_T - N +* +* Apply column blocks of H in the row block from right to left. +* +* KB is the column index of the current column block reflector +* in the matrices T and V. +* + DO KB = KB_LAST, 1, -NBLOCAL +* +* Determine the size of the current column block KNB in +* the matrices T and V. +* + KNB = MIN( NBLOCAL, N - KB + 1 ) +* + CALL ZLARFB_GETT( 'I', IMB, N-KB+1, KNB, + $ T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA, + $ A( IB, KB ), LDA, WORK, KNB ) +* + END DO +* + END DO +* + END IF +* +* (2) Top row block of A. +* NOTE: If MB>=M, then we have only one row block of A of size M +* and we work on the entire matrix A. +* + MB1 = MIN( MB, M ) +* +* Apply column blocks of H in the top row block from right to left. +* +* KB is the column index of the current block reflector in +* the matrices T and V. +* + DO KB = KB_LAST, 1, -NBLOCAL +* +* Determine the size of the current column block KNB in +* the matrices T and V. +* + KNB = MIN( NBLOCAL, N - KB + 1 ) +* + IF( MB1-KB-KNB+1.EQ.0 ) THEN +* +* In SLARFB_GETT parameters, when M=0, then the matrix B +* does not exist, hence we need to pass a dummy array +* reference DUMMY(1,1) to B with LDDUMMY=1. +* + CALL ZLARFB_GETT( 'N', 0, N-KB+1, KNB, + $ T( 1, KB ), LDT, A( KB, KB ), LDA, + $ DUMMY( 1, 1 ), 1, WORK, KNB ) + ELSE + CALL ZLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB, + $ T( 1, KB ), LDT, A( KB, KB ), LDA, + $ A( KB+KNB, KB), LDA, WORK, KNB ) + + END IF +* + END DO +* + WORK( 1 ) = DCMPLX( LWORKOPT ) + RETURN +* +* End of ZUNGTSQR_ROW +* + END diff --git a/lapack-netlib/TESTING/LIN/CMakeLists.txt b/lapack-netlib/TESTING/LIN/CMakeLists.txt index 0d0bb5418..309ed7e77 100644 --- a/lapack-netlib/TESTING/LIN/CMakeLists.txt +++ b/lapack-netlib/TESTING/LIN/CMakeLists.txt @@ -40,7 +40,7 @@ set(SLINTST schkaa.f sgennd.f sqrt04.f sqrt05.f schkqrt.f serrqrt.f schkqrtp.f serrqrtp.f schklqt.f schklqtp.f schktsqr.f serrlqt.f serrlqtp.f serrtsqr.f stsqr01.f slqt04.f slqt05.f - schkorhr_col.f serrorhr_col.f sorhr_col01.f) + schkorhr_col.f serrorhr_col.f sorhr_col01.f sorhr_col02.f) if(USE_XBLAS) list(APPEND SLINTST sdrvgbx.f sdrvgex.f sdrvsyx.f sdrvpox.f @@ -96,7 +96,7 @@ set(CLINTST cchkaa.f cqrt04.f cqrt05.f cchkqrt.f cerrqrt.f cchkqrtp.f cerrqrtp.f cchklqt.f cchklqtp.f cchktsqr.f cerrlqt.f cerrlqtp.f cerrtsqr.f ctsqr01.f clqt04.f clqt05.f - cchkunhr_col.f cerrunhr_col.f cunhr_col01.f) + cchkunhr_col.f cerrunhr_col.f cunhr_col01.f cunhr_col02.f) if(USE_XBLAS) list(APPEND CLINTST cdrvgbx.f cdrvgex.f cdrvhex.f cdrvsyx.f cdrvpox.f @@ -142,7 +142,7 @@ set(DLINTST dchkaa.f dqrt04.f dqrt05.f dchkqrt.f derrqrt.f dchkqrtp.f derrqrtp.f dchklq.f dchklqt.f dchklqtp.f dchktsqr.f derrlqt.f derrlqtp.f derrtsqr.f dtsqr01.f dlqt04.f dlqt05.f - dchkorhr_col.f derrorhr_col.f dorhr_col01.f) + dchkorhr_col.f derrorhr_col.f dorhr_col01.f dorhr_col02.f) if(USE_XBLAS) list(APPEND DLINTST ddrvgbx.f ddrvgex.f ddrvsyx.f ddrvpox.f @@ -198,7 +198,7 @@ set(ZLINTST zchkaa.f zqrt04.f zqrt05.f zchkqrt.f zerrqrt.f zchkqrtp.f zerrqrtp.f zchklqt.f zchklqtp.f zchktsqr.f zerrlqt.f zerrlqtp.f zerrtsqr.f ztsqr01.f zlqt04.f zlqt05.f - zchkunhr_col.f zerrunhr_col.f zunhr_col01.f) + zchkunhr_col.f zerrunhr_col.f zunhr_col01.f zunhr_col02.f) if(USE_XBLAS) list(APPEND ZLINTST zdrvgbx.f zdrvgex.f zdrvhex.f zdrvsyx.f zdrvpox.f diff --git a/lapack-netlib/TESTING/LIN/Makefile b/lapack-netlib/TESTING/LIN/Makefile index 6e790aa93..674265816 100644 --- a/lapack-netlib/TESTING/LIN/Makefile +++ b/lapack-netlib/TESTING/LIN/Makefile @@ -74,7 +74,7 @@ SLINTST = schkaa.o \ sgennd.o sqrt04.o sqrt05.o schkqrt.o serrqrt.o schkqrtp.o serrqrtp.o \ schklqt.o schklqtp.o schktsqr.o \ serrlqt.o serrlqtp.o serrtsqr.o stsqr01.o slqt04.o slqt05.o \ - schkorhr_col.o serrorhr_col.o sorhr_col01.o + schkorhr_col.o serrorhr_col.o sorhr_col01.o sorhr_col02.o ifdef USEXBLAS SLINTST += sdrvgbx.o sdrvgex.o sdrvsyx.o sdrvpox.o \ @@ -123,7 +123,7 @@ CLINTST = cchkaa.o \ cqrt04.o cqrt05.o cchkqrt.o cerrqrt.o cchkqrtp.o cerrqrtp.o \ cchklqt.o cchklqtp.o cchktsqr.o \ cerrlqt.o cerrlqtp.o cerrtsqr.o ctsqr01.o clqt04.o clqt05.o \ - cchkunhr_col.o cerrunhr_col.o cunhr_col01.o + cchkunhr_col.o cerrunhr_col.o cunhr_col01.o cunhr_col02.o ifdef USEXBLAS CLINTST += cdrvgbx.o cdrvgex.o cdrvhex.o cdrvsyx.o cdrvpox.o \ @@ -167,7 +167,7 @@ DLINTST = dchkaa.o \ dqrt04.o dqrt05.o dchkqrt.o derrqrt.o dchkqrtp.o derrqrtp.o \ dchklq.o dchklqt.o dchklqtp.o dchktsqr.o \ derrlqt.o derrlqtp.o derrtsqr.o dtsqr01.o dlqt04.o dlqt05.o \ - dchkorhr_col.o derrorhr_col.o dorhr_col01.o + dchkorhr_col.o derrorhr_col.o dorhr_col01.o dorhr_col02.o ifdef USEXBLAS DLINTST += ddrvgbx.o ddrvgex.o ddrvsyx.o ddrvpox.o \ @@ -215,7 +215,7 @@ ZLINTST = zchkaa.o \ zqrt04.o zqrt05.o zchkqrt.o zerrqrt.o zchkqrtp.o zerrqrtp.o \ zchklqt.o zchklqtp.o zchktsqr.o \ zerrlqt.o zerrlqtp.o zerrtsqr.o ztsqr01.o zlqt04.o zlqt05.o \ - zchkunhr_col.o zerrunhr_col.o zunhr_col01.o + zchkunhr_col.o zerrunhr_col.o zunhr_col01.o zunhr_col02.o ifdef USEXBLAS ZLINTST += zdrvgbx.o zdrvgex.o zdrvhex.o zdrvsyx.o zdrvpox.o \ diff --git a/lapack-netlib/TESTING/LIN/cchkunhr_col.f b/lapack-netlib/TESTING/LIN/cchkunhr_col.f index 00077ddd9..0d6a9063d 100644 --- a/lapack-netlib/TESTING/LIN/cchkunhr_col.f +++ b/lapack-netlib/TESTING/LIN/cchkunhr_col.f @@ -24,9 +24,12 @@ *> *> \verbatim *> -*> CCHKUNHR_COL tests CUNHR_COL using CLATSQR and CGEMQRT. Therefore, CLATSQR -*> (used in CGEQR) and CGEMQRT (used in CGEMQR) have to be tested -*> before this test. +*> CCHKUNHR_COL tests: +*> 1) CUNGTSQR and CUNHR_COL using CLATSQR, CGEMQRT, +*> 2) CUNGTSQR_ROW and CUNHR_COL inside CGETSQRHRT +*> (which calls CLATSQR, CUNGTSQR_ROW and CUNHR_COL) using CGEMQRT. +*> Therefore, CLATSQR (part of CGEQR), CGEMQRT (part of CGEMQR) +*> have to be tested before this test. *> *> \endverbatim * @@ -97,19 +100,16 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date November 2019 -* *> \ingroup complex_lin * * ===================================================================== - SUBROUTINE CCHKUNHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, NNB, - $ NBVAL, NOUT ) + SUBROUTINE CCHKUNHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, + $ NNB, NBVAL, NOUT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -135,10 +135,11 @@ REAL RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAHD, ALASUM, CERRUNHR_COL, CUNHR_COL01 + EXTERNAL ALAHD, ALASUM, CERRUNHR_COL, CUNHR_COL01, + $ CUNHR_COL02 * .. * .. Intrinsic Functions .. - INTRINSIC MAX, MIN + INTRINSIC MAX, MIN * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -201,8 +202,8 @@ * * Test CUNHR_COL * - CALL CUNHR_COL01( M, N, MB1, NB1, NB2, - $ RESULT ) + CALL CUNHR_COL01( M, N, MB1, NB1, + $ NB2, RESULT ) * * Print information about the tests that did * not pass the threshold. @@ -226,12 +227,78 @@ END DO END DO * +* Do for each value of M in MVAL. +* + DO I = 1, NM + M = MVAL( I ) +* +* Do for each value of N in NVAL. +* + DO J = 1, NN + N = NVAL( J ) +* +* Only for M >= N +* + IF ( MIN( M, N ).GT.0 .AND. M.GE.N ) THEN +* +* Do for each possible value of MB1 +* + DO IMB1 = 1, NNB + MB1 = NBVAL( IMB1 ) +* +* Only for MB1 > N +* + IF ( MB1.GT.N ) THEN +* +* Do for each possible value of NB1 +* + DO INB1 = 1, NNB + NB1 = NBVAL( INB1 ) +* +* Do for each possible value of NB2 +* + DO INB2 = 1, NNB + NB2 = NBVAL( INB2 ) +* + IF( NB1.GT.0 .AND. NB2.GT.0 ) THEN +* +* Test CUNHR_COL +* + CALL CUNHR_COL02( M, N, MB1, NB1, + $ NB2, RESULT ) +* +* Print information about the tests that did +* not pass the threshold. +* + DO T = 1, NTESTS + IF( RESULT( T ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 ) M, N, MB1, + $ NB1, NB2, T, RESULT( T ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + NTESTS + END IF + END DO + END DO + END IF + END DO + END IF + END DO + END DO +* * Print a summary of the results. * CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) * - 9999 FORMAT( 'M=', I5, ', N=', I5, ', MB1=', I5, - $ ', NB1=', I5, ', NB2=', I5,' test(', I2, ')=', G12.5 ) + 9999 FORMAT( 'CUNGTSQR and CUNHR_COL: M=', I5, ', N=', I5, + $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5, + $ ' test(', I2, ')=', G12.5 ) + 9998 FORMAT( 'CUNGTSQR_ROW and CUNHR_COL: M=', I5, ', N=', I5, + $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5, + $ ' test(', I2, ')=', G12.5 ) RETURN * * End of CCHKUNHR_COL diff --git a/lapack-netlib/TESTING/LIN/cunhr_col01.f b/lapack-netlib/TESTING/LIN/cunhr_col01.f index d760caba5..d77d60b1a 100644 --- a/lapack-netlib/TESTING/LIN/cunhr_col01.f +++ b/lapack-netlib/TESTING/LIN/cunhr_col01.f @@ -13,7 +13,7 @@ * .. Scalar Arguments .. * INTEGER M, N, MB1, NB1, NB2 * .. Return values .. -* REAL RESULT(6) +* DOUBLE PRECISION RESULT(6) * * *> \par Purpose: @@ -21,8 +21,8 @@ *> *> \verbatim *> -*> CUNHR_COL01 tests CUNHR_COL using CLATSQR, CGEMQRT and CUNGTSQR. -*> Therefore, CLATSQR (part of CGEQR), CGEMQRT (part CGEMQR), CUNGTSQR +*> CUNHR_COL01 tests CUNGTSQR and CUNHR_COL using CLATSQR, CGEMQRT. +*> Therefore, CLATSQR (part of CGEQR), CGEMQRT (part of CGEMQR) *> have to be tested before this test. *> *> \endverbatim @@ -62,14 +62,46 @@ *> \verbatim *> RESULT is REAL array, dimension (6) *> Results of each of the six tests below. -*> ( C is a M-by-N random matrix, D is a N-by-M random matrix ) *> -*> RESULT(1) = | A - Q * R | / (eps * m * |A|) -*> RESULT(2) = | I - (Q**H) * Q | / (eps * m ) -*> RESULT(3) = | Q * C - Q * C | / (eps * m * |C|) -*> RESULT(4) = | (Q**H) * C - (Q**H) * C | / (eps * m * |C|) -*> RESULT(5) = | (D * Q) - D * Q | / (eps * m * |D|) -*> RESULT(6) = | D * (Q**H) - D * (Q**H) | / (eps * m * |D|) +*> A is a m-by-n test input matrix to be factored. +*> so that A = Q_gr * ( R ) +*> ( 0 ), +*> +*> Q_qr is an implicit m-by-m unitary Q matrix, the result +*> of factorization in blocked WY-representation, +*> stored in CGEQRT output format. +*> +*> R is a n-by-n upper-triangular matrix, +*> +*> 0 is a (m-n)-by-n zero matrix, +*> +*> Q is an explicit m-by-m unitary matrix Q = Q_gr * I +*> +*> C is an m-by-n random matrix, +*> +*> D is an n-by-m random matrix. +*> +*> The six tests are: +*> +*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| ) +*> is equivalent to test for | A - Q * R | / (eps * m * |A|), +*> +*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ), +*> +*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|), +*> +*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|) +*> +*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|) +*> +*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|), +*> +*> where: +*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are +*> computed using CGEMQRT, +*> +*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are +*> computed using CGEMM. *> \endverbatim * * Authors: @@ -80,18 +112,15 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date November 2019 -* -*> \ingroup complex16_lin +*> \ingroup complex_lin * * ===================================================================== SUBROUTINE CUNHR_COL01( M, N, MB1, NB1, NB2, RESULT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.9.0) -- +* -- LAPACK test routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2019 * * .. Scalar Arguments .. INTEGER M, N, MB1, NB1, NB2 @@ -102,10 +131,10 @@ * * .. * .. Local allocatable arrays - COMPLEX, ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), + COMPLEX , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), $ WORK( : ), T1(:,:), T2(:,:), DIAG(:), $ C(:,:), CF(:,:), D(:,:), DF(:,:) - REAL, ALLOCATABLE :: RWORK(:) + REAL , ALLOCATABLE :: RWORK(:) * * .. Parameters .. REAL ZERO @@ -218,7 +247,7 @@ * Copy the factor R into the array R. * SRNAMT = 'CLACPY' - CALL CLACPY( 'U', M, N, AF, M, R, M ) + CALL CLACPY( 'U', N, N, AF, M, R, M ) * * Reconstruct the orthogonal matrix Q. * @@ -240,7 +269,7 @@ * matrix S. * SRNAMT = 'CLACPY' - CALL CLACPY( 'U', M, N, R, M, AF, M ) + CALL CLACPY( 'U', N, N, R, M, AF, M ) * DO I = 1, N IF( DIAG( I ).EQ.-CONE ) THEN diff --git a/lapack-netlib/TESTING/LIN/cunhr_col02.f b/lapack-netlib/TESTING/LIN/cunhr_col02.f new file mode 100644 index 000000000..001f291da --- /dev/null +++ b/lapack-netlib/TESTING/LIN/cunhr_col02.f @@ -0,0 +1,381 @@ +*> \brief \b CUNHR_COL02 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CUNHR_COL02( M, N, MB1, NB1, NB2, RESULT ) +* +* .. Scalar Arguments .. +* INTEGER M, N, MB1, NB1, NB2 +* .. Return values .. +* REAL RESULT(6) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNHR_COL02 tests CUNGTSQR_ROW and CUNHR_COL inside CGETSQRHRT +*> (which calls CLATSQR, CUNGTSQR_ROW and CUNHR_COL) using CGEMQRT. +*> Therefore, CLATSQR (part of CGEQR), CGEMQRT (part of CGEMQR) +*> have to be tested before this test. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> Number of rows in test matrix. +*> \endverbatim +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> Number of columns in test matrix. +*> \endverbatim +*> \param[in] MB1 +*> \verbatim +*> MB1 is INTEGER +*> Number of row in row block in an input test matrix. +*> \endverbatim +*> +*> \param[in] NB1 +*> \verbatim +*> NB1 is INTEGER +*> Number of columns in column block an input test matrix. +*> \endverbatim +*> +*> \param[in] NB2 +*> \verbatim +*> NB2 is INTEGER +*> Number of columns in column block in an output test matrix. +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is REAL array, dimension (6) +*> Results of each of the six tests below. +*> +*> A is a m-by-n test input matrix to be factored. +*> so that A = Q_gr * ( R ) +*> ( 0 ), +*> +*> Q_qr is an implicit m-by-m unitary Q matrix, the result +*> of factorization in blocked WY-representation, +*> stored in CGEQRT output format. +*> +*> R is a n-by-n upper-triangular matrix, +*> +*> 0 is a (m-n)-by-n zero matrix, +*> +*> Q is an explicit m-by-m unitary matrix Q = Q_gr * I +*> +*> C is an m-by-n random matrix, +*> +*> D is an n-by-m random matrix. +*> +*> The six tests are: +*> +*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| ) +*> is equivalent to test for | A - Q * R | / (eps * m * |A|), +*> +*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ), +*> +*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|), +*> +*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|) +*> +*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|) +*> +*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|), +*> +*> where: +*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are +*> computed using CGEMQRT, +*> +*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are +*> computed using CGEMM. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex_lin +* +* ===================================================================== + SUBROUTINE CUNHR_COL02( M, N, MB1, NB1, NB2, RESULT ) + IMPLICIT NONE +* +* -- LAPACK test routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER M, N, MB1, NB1, NB2 +* .. Return values .. + REAL RESULT(6) +* +* ===================================================================== +* +* .. +* .. Local allocatable arrays + COMPLEX , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), + $ WORK( : ), T1(:,:), T2(:,:), DIAG(:), + $ C(:,:), CF(:,:), D(:,:), DF(:,:) + REAL , ALLOCATABLE :: RWORK(:) +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL TESTZEROS + INTEGER INFO, J, K, L, LWORK, NB2_UB, NRB + REAL ANORM, EPS, RESID, CNORM, DNORM +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) + COMPLEX WORKQUERY( 1 ) +* .. +* .. External Functions .. + REAL SLAMCH, CLANGE, CLANSY + EXTERNAL SLAMCH, CLANGE, CLANSY +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLARNV, CLASET, CGETSQRHRT, + $ CSCAL, CGEMM, CGEMQRT, CHERK +* .. +* .. Intrinsic Functions .. + INTRINSIC CEILING, REAL, MAX, MIN +* .. +* .. Scalars in Common .. + CHARACTER(LEN=32) SRNAMT +* .. +* .. Common blocks .. + COMMON / SRMNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEED / 1988, 1989, 1990, 1991 / +* +* TEST MATRICES WITH HALF OF MATRIX BEING ZEROS +* + TESTZEROS = .FALSE. +* + EPS = SLAMCH( 'Epsilon' ) + K = MIN( M, N ) + L = MAX( M, N, 1) +* +* Dynamically allocate local arrays +* + ALLOCATE ( A(M,N), AF(M,N), Q(L,L), R(M,L), RWORK(L), + $ C(M,N), CF(M,N), + $ D(N,M), DF(N,M) ) +* +* Put random numbers into A and copy to AF +* + DO J = 1, N + CALL CLARNV( 2, ISEED, M, A( 1, J ) ) + END DO + IF( TESTZEROS ) THEN + IF( M.GE.4 ) THEN + DO J = 1, N + CALL CLARNV( 2, ISEED, M/2, A( M/4, J ) ) + END DO + END IF + END IF + CALL CLACPY( 'Full', M, N, A, M, AF, M ) +* +* Number of row blocks in CLATSQR +* + NRB = MAX( 1, CEILING( REAL( M - N ) / REAL( MB1 - N ) ) ) +* + ALLOCATE ( T1( NB1, N * NRB ) ) + ALLOCATE ( T2( NB2, N ) ) + ALLOCATE ( DIAG( N ) ) +* +* Begin determine LWORK for the array WORK and allocate memory. +* +* CGEMQRT requires NB2 to be bounded by N. +* + NB2_UB = MIN( NB2, N) +* +* + CALL CGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2, + $ WORKQUERY, -1, INFO ) +* + LWORK = INT( WORKQUERY( 1 ) ) +* +* In CGEMQRT, WORK is N*NB2_UB if SIDE = 'L', +* or M*NB2_UB if SIDE = 'R'. +* + LWORK = MAX( LWORK, NB2_UB * N, NB2_UB * M ) +* + ALLOCATE ( WORK( LWORK ) ) +* +* End allocate memory for WORK. +* +* +* Begin Householder reconstruction routines +* +* Factor the matrix A in the array AF. +* + SRNAMT = 'CGETSQRHRT' + CALL CGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2, + $ WORK, LWORK, INFO ) +* +* End Householder reconstruction routines. +* +* +* Generate the m-by-m matrix Q +* + CALL CLASET( 'Full', M, M, CZERO, CONE, Q, M ) +* + SRNAMT = 'CGEMQRT' + CALL CGEMQRT( 'L', 'N', M, M, K, NB2_UB, AF, M, T2, NB2, Q, M, + $ WORK, INFO ) +* +* Copy R +* + CALL CLASET( 'Full', M, N, CZERO, CZERO, R, M ) +* + CALL CLACPY( 'Upper', M, N, AF, M, R, M ) +* +* TEST 1 +* Compute |R - (Q**T)*A| / ( eps * m * |A| ) and store in RESULT(1) +* + CALL CGEMM( 'C', 'N', M, N, M, -CONE, Q, M, A, M, CONE, R, M ) +* + ANORM = CLANGE( '1', M, N, A, M, RWORK ) + RESID = CLANGE( '1', M, N, R, M, RWORK ) + IF( ANORM.GT.ZERO ) THEN + RESULT( 1 ) = RESID / ( EPS * MAX( 1, M ) * ANORM ) + ELSE + RESULT( 1 ) = ZERO + END IF +* +* TEST 2 +* Compute |I - (Q**T)*Q| / ( eps * m ) and store in RESULT(2) +* + CALL CLASET( 'Full', M, M, CZERO, CONE, R, M ) + CALL CHERK( 'U', 'C', M, M, -CONE, Q, M, CONE, R, M ) + RESID = CLANSY( '1', 'Upper', M, R, M, RWORK ) + RESULT( 2 ) = RESID / ( EPS * MAX( 1, M ) ) +* +* Generate random m-by-n matrix C +* + DO J = 1, N + CALL CLARNV( 2, ISEED, M, C( 1, J ) ) + END DO + CNORM = CLANGE( '1', M, N, C, M, RWORK ) + CALL CLACPY( 'Full', M, N, C, M, CF, M ) +* +* Apply Q to C as Q*C = CF +* + SRNAMT = 'CGEMQRT' + CALL CGEMQRT( 'L', 'N', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M, + $ WORK, INFO ) +* +* TEST 3 +* Compute |CF - Q*C| / ( eps * m * |C| ) +* + CALL CGEMM( 'N', 'N', M, N, M, -CONE, Q, M, C, M, CONE, CF, M ) + RESID = CLANGE( '1', M, N, CF, M, RWORK ) + IF( CNORM.GT.ZERO ) THEN + RESULT( 3 ) = RESID / ( EPS * MAX( 1, M ) * CNORM ) + ELSE + RESULT( 3 ) = ZERO + END IF +* +* Copy C into CF again +* + CALL CLACPY( 'Full', M, N, C, M, CF, M ) +* +* Apply Q to C as (Q**T)*C = CF +* + SRNAMT = 'CGEMQRT' + CALL CGEMQRT( 'L', 'C', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M, + $ WORK, INFO ) +* +* TEST 4 +* Compute |CF - (Q**T)*C| / ( eps * m * |C|) +* + CALL CGEMM( 'C', 'N', M, N, M, -CONE, Q, M, C, M, CONE, CF, M ) + RESID = CLANGE( '1', M, N, CF, M, RWORK ) + IF( CNORM.GT.ZERO ) THEN + RESULT( 4 ) = RESID / ( EPS * MAX( 1, M ) * CNORM ) + ELSE + RESULT( 4 ) = ZERO + END IF +* +* Generate random n-by-m matrix D and a copy DF +* + DO J = 1, M + CALL CLARNV( 2, ISEED, N, D( 1, J ) ) + END DO + DNORM = CLANGE( '1', N, M, D, N, RWORK ) + CALL CLACPY( 'Full', N, M, D, N, DF, N ) +* +* Apply Q to D as D*Q = DF +* + SRNAMT = 'CGEMQRT' + CALL CGEMQRT( 'R', 'N', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N, + $ WORK, INFO ) +* +* TEST 5 +* Compute |DF - D*Q| / ( eps * m * |D| ) +* + CALL CGEMM( 'N', 'N', N, M, M, -CONE, D, N, Q, M, CONE, DF, N ) + RESID = CLANGE( '1', N, M, DF, N, RWORK ) + IF( DNORM.GT.ZERO ) THEN + RESULT( 5 ) = RESID / ( EPS * MAX( 1, M ) * DNORM ) + ELSE + RESULT( 5 ) = ZERO + END IF +* +* Copy D into DF again +* + CALL CLACPY( 'Full', N, M, D, N, DF, N ) +* +* Apply Q to D as D*QT = DF +* + SRNAMT = 'CGEMQRT' + CALL CGEMQRT( 'R', 'C', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N, + $ WORK, INFO ) +* +* TEST 6 +* Compute |DF - D*(Q**T)| / ( eps * m * |D| ) +* + CALL CGEMM( 'N', 'C', N, M, M, -CONE, D, N, Q, M, CONE, DF, N ) + RESID = CLANGE( '1', N, M, DF, N, RWORK ) + IF( DNORM.GT.ZERO ) THEN + RESULT( 6 ) = RESID / ( EPS * MAX( 1, M ) * DNORM ) + ELSE + RESULT( 6 ) = ZERO + END IF +* +* Deallocate all arrays +* + DEALLOCATE ( A, AF, Q, R, RWORK, WORK, T1, T2, DIAG, + $ C, D, CF, DF ) +* + RETURN +* +* End of CUNHR_COL02 +* + END diff --git a/lapack-netlib/TESTING/LIN/dchkorhr_col.f b/lapack-netlib/TESTING/LIN/dchkorhr_col.f index 3b3e421eb..0e2d44d8d 100644 --- a/lapack-netlib/TESTING/LIN/dchkorhr_col.f +++ b/lapack-netlib/TESTING/LIN/dchkorhr_col.f @@ -24,9 +24,12 @@ *> *> \verbatim *> -*> DCHKORHR_COL tests DORHR_COL using DLATSQR and DGEMQRT. Therefore, DLATSQR -*> (used in DGEQR) and DGEMQRT (used in DGEMQR) have to be tested -*> before this test. +*> DCHKORHR_COL tests: +*> 1) DORGTSQR and DORHR_COL using DLATSQR, DGEMQRT, +*> 2) DORGTSQR_ROW and DORHR_COL inside DGETSQRHRT +*> (which calls DLATSQR, DORGTSQR_ROW and DORHR_COL) using DGEMQRT. +*> Therefore, DLATSQR (part of DGEQR), DGEMQRT (part of DGEMQR) +*> have to be tested before this test. *> *> \endverbatim * @@ -97,19 +100,16 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date November 2019 -* *> \ingroup double_lin * * ===================================================================== - SUBROUTINE DCHKORHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, NNB, - $ NBVAL, NOUT ) + SUBROUTINE DCHKORHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, + $ NNB, NBVAL, NOUT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -135,10 +135,11 @@ DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAHD, ALASUM, DERRORHR_COL, DORHR_COL01 + EXTERNAL ALAHD, ALASUM, DERRORHR_COL, DORHR_COL01, + $ DORHR_COL02 * .. * .. Intrinsic Functions .. - INTRINSIC MAX, MIN + INTRINSIC MAX, MIN * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -201,8 +202,8 @@ * * Test DORHR_COL * - CALL DORHR_COL01( M, N, MB1, NB1, NB2, - $ RESULT ) + CALL DORHR_COL01( M, N, MB1, NB1, + $ NB2, RESULT ) * * Print information about the tests that did * not pass the threshold. @@ -226,12 +227,78 @@ END DO END DO * +* Do for each value of M in MVAL. +* + DO I = 1, NM + M = MVAL( I ) +* +* Do for each value of N in NVAL. +* + DO J = 1, NN + N = NVAL( J ) +* +* Only for M >= N +* + IF ( MIN( M, N ).GT.0 .AND. M.GE.N ) THEN +* +* Do for each possible value of MB1 +* + DO IMB1 = 1, NNB + MB1 = NBVAL( IMB1 ) +* +* Only for MB1 > N +* + IF ( MB1.GT.N ) THEN +* +* Do for each possible value of NB1 +* + DO INB1 = 1, NNB + NB1 = NBVAL( INB1 ) +* +* Do for each possible value of NB2 +* + DO INB2 = 1, NNB + NB2 = NBVAL( INB2 ) +* + IF( NB1.GT.0 .AND. NB2.GT.0 ) THEN +* +* Test DORHR_COL +* + CALL DORHR_COL02( M, N, MB1, NB1, + $ NB2, RESULT ) +* +* Print information about the tests that did +* not pass the threshold. +* + DO T = 1, NTESTS + IF( RESULT( T ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 ) M, N, MB1, + $ NB1, NB2, T, RESULT( T ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + NTESTS + END IF + END DO + END DO + END IF + END DO + END IF + END DO + END DO +* * Print a summary of the results. * CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) * - 9999 FORMAT( 'M=', I5, ', N=', I5, ', MB1=', I5, - $ ', NB1=', I5, ', NB2=', I5,' test(', I2, ')=', G12.5 ) + 9999 FORMAT( 'DORGTSQR and DORHR_COL: M=', I5, ', N=', I5, + $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5, + $ ' test(', I2, ')=', G12.5 ) + 9998 FORMAT( 'DORGTSQR_ROW and DORHR_COL: M=', I5, ', N=', I5, + $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5, + $ ' test(', I2, ')=', G12.5 ) RETURN * * End of DCHKORHR_COL diff --git a/lapack-netlib/TESTING/LIN/dorhr_col01.f b/lapack-netlib/TESTING/LIN/dorhr_col01.f index 3e48de37f..979255ca9 100644 --- a/lapack-netlib/TESTING/LIN/dorhr_col01.f +++ b/lapack-netlib/TESTING/LIN/dorhr_col01.f @@ -21,8 +21,8 @@ *> *> \verbatim *> -*> DORHR_COL01 tests DORHR_COL using DLATSQR, DGEMQRT and DORGTSQR. -*> Therefore, DLATSQR (part of DGEQR), DGEMQRT (part DGEMQR), DORGTSQR +*> DORHR_COL01 tests DORGTSQR and DORHR_COL using DLATSQR, DGEMQRT. +*> Therefore, DLATSQR (part of DGEQR), DGEMQRT (part of DGEMQR) *> have to be tested before this test. *> *> \endverbatim @@ -62,14 +62,46 @@ *> \verbatim *> RESULT is DOUBLE PRECISION array, dimension (6) *> Results of each of the six tests below. -*> ( C is a M-by-N random matrix, D is a N-by-M random matrix ) *> -*> RESULT(1) = | A - Q * R | / (eps * m * |A|) -*> RESULT(2) = | I - (Q**H) * Q | / (eps * m ) -*> RESULT(3) = | Q * C - Q * C | / (eps * m * |C|) -*> RESULT(4) = | (Q**H) * C - (Q**H) * C | / (eps * m * |C|) -*> RESULT(5) = | (D * Q) - D * Q | / (eps * m * |D|) -*> RESULT(6) = | D * (Q**H) - D * (Q**H) | / (eps * m * |D|) +*> A is a m-by-n test input matrix to be factored. +*> so that A = Q_gr * ( R ) +*> ( 0 ), +*> +*> Q_qr is an implicit m-by-m orthogonal Q matrix, the result +*> of factorization in blocked WY-representation, +*> stored in ZGEQRT output format. +*> +*> R is a n-by-n upper-triangular matrix, +*> +*> 0 is a (m-n)-by-n zero matrix, +*> +*> Q is an explicit m-by-m orthogonal matrix Q = Q_gr * I +*> +*> C is an m-by-n random matrix, +*> +*> D is an n-by-m random matrix. +*> +*> The six tests are: +*> +*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| ) +*> is equivalent to test for | A - Q * R | / (eps * m * |A|), +*> +*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ), +*> +*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|), +*> +*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|) +*> +*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|) +*> +*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|), +*> +*> where: +*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are +*> computed using DGEMQRT, +*> +*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are +*> computed using DGEMM. *> \endverbatim * * Authors: @@ -80,18 +112,15 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date November 2019 -* -*> \ingroup single_lin +*> \ingroup double_lin * * ===================================================================== SUBROUTINE DORHR_COL01( M, N, MB1, NB1, NB2, RESULT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.9.0) -- +* -- LAPACK test routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2019 * * .. Scalar Arguments .. INTEGER M, N, MB1, NB1, NB2 diff --git a/lapack-netlib/TESTING/LIN/dorhr_col02.f b/lapack-netlib/TESTING/LIN/dorhr_col02.f new file mode 100644 index 000000000..d4c438edb --- /dev/null +++ b/lapack-netlib/TESTING/LIN/dorhr_col02.f @@ -0,0 +1,377 @@ +*> \brief \b DORHR_COL02 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DORHR_COL02( M, N, MB1, NB1, NB2, RESULT ) +* +* .. Scalar Arguments .. +* INTEGER M, N, MB1, NB1, NB2 +* .. Return values .. +* DOUBLE PRECISION RESULT(6) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORHR_COL02 tests DORGTSQR_ROW and DORHR_COL inside DGETSQRHRT +*> (which calls DLATSQR, DORGTSQR_ROW and DORHR_COL) using DGEMQRT. +*> Therefore, DLATSQR (part of DGEQR), DGEMQRT (part of DGEMQR) +*> have to be tested before this test. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> Number of rows in test matrix. +*> \endverbatim +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> Number of columns in test matrix. +*> \endverbatim +*> \param[in] MB1 +*> \verbatim +*> MB1 is INTEGER +*> Number of row in row block in an input test matrix. +*> \endverbatim +*> +*> \param[in] NB1 +*> \verbatim +*> NB1 is INTEGER +*> Number of columns in column block an input test matrix. +*> \endverbatim +*> +*> \param[in] NB2 +*> \verbatim +*> NB2 is INTEGER +*> Number of columns in column block in an output test matrix. +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is DOUBLE PRECISION array, dimension (6) +*> Results of each of the six tests below. +*> +*> A is a m-by-n test input matrix to be factored. +*> so that A = Q_gr * ( R ) +*> ( 0 ), +*> +*> Q_qr is an implicit m-by-m orthogonal Q matrix, the result +*> of factorization in blocked WY-representation, +*> stored in ZGEQRT output format. +*> +*> R is a n-by-n upper-triangular matrix, +*> +*> 0 is a (m-n)-by-n zero matrix, +*> +*> Q is an explicit m-by-m orthogonal matrix Q = Q_gr * I +*> +*> C is an m-by-n random matrix, +*> +*> D is an n-by-m random matrix. +*> +*> The six tests are: +*> +*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| ) +*> is equivalent to test for | A - Q * R | / (eps * m * |A|), +*> +*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ), +*> +*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|), +*> +*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|) +*> +*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|) +*> +*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|), +*> +*> where: +*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are +*> computed using DGEMQRT, +*> +*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are +*> computed using DGEMM. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup double_lin +* +* ===================================================================== + SUBROUTINE DORHR_COL02( M, N, MB1, NB1, NB2, RESULT ) + IMPLICIT NONE +* +* -- LAPACK test routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER M, N, MB1, NB1, NB2 +* .. Return values .. + DOUBLE PRECISION RESULT(6) +* +* ===================================================================== +* +* .. +* .. Local allocatable arrays + DOUBLE PRECISION, ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), + $ RWORK(:), WORK( : ), T1(:,:), T2(:,:), DIAG(:), + $ C(:,:), CF(:,:), D(:,:), DF(:,:) +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL TESTZEROS + INTEGER INFO, J, K, L, LWORK, NB2_UB, NRB + DOUBLE PRECISION ANORM, EPS, RESID, CNORM, DNORM +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) + DOUBLE PRECISION WORKQUERY( 1 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANGE, DLANSY + EXTERNAL DLAMCH, DLANGE, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLARNV, DLASET, DGETSQRHRT, + $ DSCAL, DGEMM, DGEMQRT, DSYRK +* .. +* .. Intrinsic Functions .. + INTRINSIC CEILING, DBLE, MAX, MIN +* .. +* .. Scalars in Common .. + CHARACTER(LEN=32) SRNAMT +* .. +* .. Common blocks .. + COMMON / SRMNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEED / 1988, 1989, 1990, 1991 / +* +* TEST MATRICES WITH HALF OF MATRIX BEING ZEROS +* + TESTZEROS = .FALSE. +* + EPS = DLAMCH( 'Epsilon' ) + K = MIN( M, N ) + L = MAX( M, N, 1) +* +* Dynamically allocate local arrays +* + ALLOCATE ( A(M,N), AF(M,N), Q(L,L), R(M,L), RWORK(L), + $ C(M,N), CF(M,N), + $ D(N,M), DF(N,M) ) +* +* Put random numbers into A and copy to AF +* + DO J = 1, N + CALL DLARNV( 2, ISEED, M, A( 1, J ) ) + END DO + IF( TESTZEROS ) THEN + IF( M.GE.4 ) THEN + DO J = 1, N + CALL DLARNV( 2, ISEED, M/2, A( M/4, J ) ) + END DO + END IF + END IF + CALL DLACPY( 'Full', M, N, A, M, AF, M ) +* +* Number of row blocks in DLATSQR +* + NRB = MAX( 1, CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) ) +* + ALLOCATE ( T1( NB1, N * NRB ) ) + ALLOCATE ( T2( NB2, N ) ) + ALLOCATE ( DIAG( N ) ) +* +* Begin determine LWORK for the array WORK and allocate memory. +* +* DGEMQRT requires NB2 to be bounded by N. +* + NB2_UB = MIN( NB2, N) +* +* + CALL DGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2, + $ WORKQUERY, -1, INFO ) +* + LWORK = INT( WORKQUERY( 1 ) ) +* +* In DGEMQRT, WORK is N*NB2_UB if SIDE = 'L', +* or M*NB2_UB if SIDE = 'R'. +* + LWORK = MAX( LWORK, NB2_UB * N, NB2_UB * M ) +* + ALLOCATE ( WORK( LWORK ) ) +* +* End allocate memory for WORK. +* +* +* Begin Householder reconstruction routines +* +* Factor the matrix A in the array AF. +* + SRNAMT = 'DGETSQRHRT' + CALL DGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2, + $ WORK, LWORK, INFO ) +* +* End Householder reconstruction routines. +* +* +* Generate the m-by-m matrix Q +* + CALL DLASET( 'Full', M, M, ZERO, ONE, Q, M ) +* + SRNAMT = 'DGEMQRT' + CALL DGEMQRT( 'L', 'N', M, M, K, NB2_UB, AF, M, T2, NB2, Q, M, + $ WORK, INFO ) +* +* Copy R +* + CALL DLASET( 'Full', M, N, ZERO, ZERO, R, M ) +* + CALL DLACPY( 'Upper', M, N, AF, M, R, M ) +* +* TEST 1 +* Compute |R - (Q**T)*A| / ( eps * m * |A| ) and store in RESULT(1) +* + CALL DGEMM( 'T', 'N', M, N, M, -ONE, Q, M, A, M, ONE, R, M ) +* + ANORM = DLANGE( '1', M, N, A, M, RWORK ) + RESID = DLANGE( '1', M, N, R, M, RWORK ) + IF( ANORM.GT.ZERO ) THEN + RESULT( 1 ) = RESID / ( EPS * MAX( 1, M ) * ANORM ) + ELSE + RESULT( 1 ) = ZERO + END IF +* +* TEST 2 +* Compute |I - (Q**T)*Q| / ( eps * m ) and store in RESULT(2) +* + CALL DLASET( 'Full', M, M, ZERO, ONE, R, M ) + CALL DSYRK( 'U', 'T', M, M, -ONE, Q, M, ONE, R, M ) + RESID = DLANSY( '1', 'Upper', M, R, M, RWORK ) + RESULT( 2 ) = RESID / ( EPS * MAX( 1, M ) ) +* +* Generate random m-by-n matrix C +* + DO J = 1, N + CALL DLARNV( 2, ISEED, M, C( 1, J ) ) + END DO + CNORM = DLANGE( '1', M, N, C, M, RWORK ) + CALL DLACPY( 'Full', M, N, C, M, CF, M ) +* +* Apply Q to C as Q*C = CF +* + SRNAMT = 'DGEMQRT' + CALL DGEMQRT( 'L', 'N', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M, + $ WORK, INFO ) +* +* TEST 3 +* Compute |CF - Q*C| / ( eps * m * |C| ) +* + CALL DGEMM( 'N', 'N', M, N, M, -ONE, Q, M, C, M, ONE, CF, M ) + RESID = DLANGE( '1', M, N, CF, M, RWORK ) + IF( CNORM.GT.ZERO ) THEN + RESULT( 3 ) = RESID / ( EPS * MAX( 1, M ) * CNORM ) + ELSE + RESULT( 3 ) = ZERO + END IF +* +* Copy C into CF again +* + CALL DLACPY( 'Full', M, N, C, M, CF, M ) +* +* Apply Q to C as (Q**T)*C = CF +* + SRNAMT = 'DGEMQRT' + CALL DGEMQRT( 'L', 'T', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M, + $ WORK, INFO ) +* +* TEST 4 +* Compute |CF - (Q**T)*C| / ( eps * m * |C|) +* + CALL DGEMM( 'T', 'N', M, N, M, -ONE, Q, M, C, M, ONE, CF, M ) + RESID = DLANGE( '1', M, N, CF, M, RWORK ) + IF( CNORM.GT.ZERO ) THEN + RESULT( 4 ) = RESID / ( EPS * MAX( 1, M ) * CNORM ) + ELSE + RESULT( 4 ) = ZERO + END IF +* +* Generate random n-by-m matrix D and a copy DF +* + DO J = 1, M + CALL DLARNV( 2, ISEED, N, D( 1, J ) ) + END DO + DNORM = DLANGE( '1', N, M, D, N, RWORK ) + CALL DLACPY( 'Full', N, M, D, N, DF, N ) +* +* Apply Q to D as D*Q = DF +* + SRNAMT = 'DGEMQRT' + CALL DGEMQRT( 'R', 'N', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N, + $ WORK, INFO ) +* +* TEST 5 +* Compute |DF - D*Q| / ( eps * m * |D| ) +* + CALL DGEMM( 'N', 'N', N, M, M, -ONE, D, N, Q, M, ONE, DF, N ) + RESID = DLANGE( '1', N, M, DF, N, RWORK ) + IF( DNORM.GT.ZERO ) THEN + RESULT( 5 ) = RESID / ( EPS * MAX( 1, M ) * DNORM ) + ELSE + RESULT( 5 ) = ZERO + END IF +* +* Copy D into DF again +* + CALL DLACPY( 'Full', N, M, D, N, DF, N ) +* +* Apply Q to D as D*QT = DF +* + SRNAMT = 'DGEMQRT' + CALL DGEMQRT( 'R', 'T', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N, + $ WORK, INFO ) +* +* TEST 6 +* Compute |DF - D*(Q**T)| / ( eps * m * |D| ) +* + CALL DGEMM( 'N', 'T', N, M, M, -ONE, D, N, Q, M, ONE, DF, N ) + RESID = DLANGE( '1', N, M, DF, N, RWORK ) + IF( DNORM.GT.ZERO ) THEN + RESULT( 6 ) = RESID / ( EPS * MAX( 1, M ) * DNORM ) + ELSE + RESULT( 6 ) = ZERO + END IF +* +* Deallocate all arrays +* + DEALLOCATE ( A, AF, Q, R, RWORK, WORK, T1, T2, DIAG, + $ C, D, CF, DF ) +* + RETURN +* +* End of DORHR_COL02 +* + END diff --git a/lapack-netlib/TESTING/LIN/schkorhr_col.f b/lapack-netlib/TESTING/LIN/schkorhr_col.f index cf6d2d323..f61b74902 100644 --- a/lapack-netlib/TESTING/LIN/schkorhr_col.f +++ b/lapack-netlib/TESTING/LIN/schkorhr_col.f @@ -24,8 +24,11 @@ *> *> \verbatim *> -*> SCHKORHR_COL tests SORHR_COL using SLATSQR, SGEMQRT and SORGTSQR. -*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part SGEMQR), SORGTSQR +*> SCHKORHR_COL tests: +*> 1) SORGTSQR and SORHR_COL using SLATSQR, SGEMQRT, +*> 2) SORGTSQR_ROW and SORHR_COL inside DGETSQRHRT +*> (which calls SLATSQR, SORGTSQR_ROW and SORHR_COL) using SGEMQRT. +*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part of SGEMQR) *> have to be tested before this test. *> *> \endverbatim @@ -97,19 +100,16 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date November 2019 -* -*> \ingroup sigle_lin +*> \ingroup single_lin * * ===================================================================== - SUBROUTINE SCHKORHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, NNB, - $ NBVAL, NOUT ) + SUBROUTINE SCHKORHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, + $ NNB, NBVAL, NOUT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.9.0) -- +* -- LAPACK test routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* June 2019 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -135,7 +135,8 @@ REAL RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAHD, ALASUM, SERRORHR_COL, SORHR_COL01 + EXTERNAL ALAHD, ALASUM, SERRORHR_COL, SORHR_COL01, + $ SORHR_COL02 * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -201,8 +202,8 @@ * * Test SORHR_COL * - CALL SORHR_COL01( M, N, MB1, NB1, NB2, - $ RESULT ) + CALL SORHR_COL01( M, N, MB1, NB1, + $ NB2, RESULT ) * * Print information about the tests that did * not pass the threshold. @@ -226,12 +227,78 @@ END DO END DO * +* Do for each value of M in MVAL. +* + DO I = 1, NM + M = MVAL( I ) +* +* Do for each value of N in NVAL. +* + DO J = 1, NN + N = NVAL( J ) +* +* Only for M >= N +* + IF ( MIN( M, N ).GT.0 .AND. M.GE.N ) THEN +* +* Do for each possible value of MB1 +* + DO IMB1 = 1, NNB + MB1 = NBVAL( IMB1 ) +* +* Only for MB1 > N +* + IF ( MB1.GT.N ) THEN +* +* Do for each possible value of NB1 +* + DO INB1 = 1, NNB + NB1 = NBVAL( INB1 ) +* +* Do for each possible value of NB2 +* + DO INB2 = 1, NNB + NB2 = NBVAL( INB2 ) +* + IF( NB1.GT.0 .AND. NB2.GT.0 ) THEN +* +* Test SORHR_COL +* + CALL SORHR_COL02( M, N, MB1, NB1, + $ NB2, RESULT ) +* +* Print information about the tests that did +* not pass the threshold. +* + DO T = 1, NTESTS + IF( RESULT( T ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 ) M, N, MB1, + $ NB1, NB2, T, RESULT( T ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + NTESTS + END IF + END DO + END DO + END IF + END DO + END IF + END DO + END DO +* * Print a summary of the results. * CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) * - 9999 FORMAT( 'M=', I5, ', N=', I5, ', MB1=', I5, - $ ', NB1=', I5, ', NB2=', I5,' test(', I2, ')=', G12.5 ) + 9999 FORMAT( 'SORGTSQR and SORHR_COL: M=', I5, ', N=', I5, + $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5, + $ ' test(', I2, ')=', G12.5 ) + 9998 FORMAT( 'SORGTSQR_ROW and SORHR_COL: M=', I5, ', N=', I5, + $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5, + $ ' test(', I2, ')=', G12.5 ) RETURN * * End of SCHKORHR_COL diff --git a/lapack-netlib/TESTING/LIN/sorhr_col01.f b/lapack-netlib/TESTING/LIN/sorhr_col01.f index 02429041b..dcc2c1cae 100644 --- a/lapack-netlib/TESTING/LIN/sorhr_col01.f +++ b/lapack-netlib/TESTING/LIN/sorhr_col01.f @@ -8,12 +8,12 @@ * Definition: * =========== * -* SUBROUTINE SORHR_COL01( M, N, MB1, NB1, NB2, RESULT) +* SUBROUTINE SORHR_COL01( M, N, MB1, NB1, NB2, RESULT ) * * .. Scalar Arguments .. * INTEGER M, N, MB1, NB1, NB2 * .. Return values .. -* REAL RESULT(6) +* REAL RESULT(6) * * *> \par Purpose: @@ -21,8 +21,8 @@ *> *> \verbatim *> -*> SORHR_COL01 tests SORHR_COL using SLATSQR, SGEMQRT and SORGTSQR. -*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part SGEMQR), SORGTSQR +*> SORHR_COL01 tests SORGTSQR and SORHR_COL using SLATSQR, SGEMQRT. +*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part of SGEMQR) *> have to be tested before this test. *> *> \endverbatim @@ -62,14 +62,46 @@ *> \verbatim *> RESULT is REAL array, dimension (6) *> Results of each of the six tests below. -*> ( C is a M-by-N random matrix, D is a N-by-M random matrix ) *> -*> RESULT(1) = | A - Q * R | / (eps * m * |A|) -*> RESULT(2) = | I - (Q**H) * Q | / (eps * m ) -*> RESULT(3) = | Q * C - Q * C | / (eps * m * |C|) -*> RESULT(4) = | (Q**H) * C - (Q**H) * C | / (eps * m * |C|) -*> RESULT(5) = | (D * Q) - D * Q | / (eps * m * |D|) -*> RESULT(6) = | D * (Q**H) - D * (Q**H) | / (eps * m * |D|) +*> A is a m-by-n test input matrix to be factored. +*> so that A = Q_gr * ( R ) +*> ( 0 ), +*> +*> Q_qr is an implicit m-by-m orthogonal Q matrix, the result +*> of factorization in blocked WY-representation, +*> stored in SGEQRT output format. +*> +*> R is a n-by-n upper-triangular matrix, +*> +*> 0 is a (m-n)-by-n zero matrix, +*> +*> Q is an explicit m-by-m orthogonal matrix Q = Q_gr * I +*> +*> C is an m-by-n random matrix, +*> +*> D is an n-by-m random matrix. +*> +*> The six tests are: +*> +*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| ) +*> is equivalent to test for | A - Q * R | / (eps * m * |A|), +*> +*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ), +*> +*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|), +*> +*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|) +*> +*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|) +*> +*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|), +*> +*> where: +*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are +*> computed using SGEMQRT, +*> +*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are +*> computed using SGEMM. *> \endverbatim * * Authors: @@ -80,18 +112,15 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date November 2019 -* *> \ingroup single_lin * * ===================================================================== SUBROUTINE SORHR_COL01( M, N, MB1, NB1, NB2, RESULT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.9.0) -- +* -- LAPACK test routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2019 * * .. Scalar Arguments .. INTEGER M, N, MB1, NB1, NB2 @@ -102,7 +131,7 @@ * * .. * .. Local allocatable arrays - REAL, ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), + REAL , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), $ RWORK(:), WORK( : ), T1(:,:), T2(:,:), DIAG(:), $ C(:,:), CF(:,:), D(:,:), DF(:,:) * @@ -128,7 +157,7 @@ $ SORGTSQR, SSCAL, SGEMM, SGEMQRT, SSYRK * .. * .. Intrinsic Functions .. - INTRINSIC CEILING, MAX, MIN, REAL + INTRINSIC CEILING, REAL, MAX, MIN * .. * .. Scalars in Common .. CHARACTER(LEN=32) SRNAMT @@ -230,7 +259,7 @@ * * Compute the factor R_hr corresponding to the Householder * reconstructed Q_hr and place it in the upper triangle of AF to -* match the Q storage format in DGEQRT. R_hr = R_tsqr * S, +* match the Q storage format in SGEQRT. R_hr = R_tsqr * S, * this means changing the sign of I-th row of the matrix R_tsqr * according to sign of of I-th diagonal element DIAG(I) of the * matrix S. diff --git a/lapack-netlib/TESTING/LIN/sorhr_col02.f b/lapack-netlib/TESTING/LIN/sorhr_col02.f new file mode 100644 index 000000000..1cbe40577 --- /dev/null +++ b/lapack-netlib/TESTING/LIN/sorhr_col02.f @@ -0,0 +1,376 @@ +*> \brief \b SORHR_COL02 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SORHR_COL02( M, N, MB1, NB1, NB2, RESULT ) +* +* .. Scalar Arguments .. +* INTEGER M, N, MB1, NB1, NB2 +* .. Return values .. +* REAL RESULT(6) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORHR_COL02 tests SORGTSQR_ROW and SORHR_COL inside SGETSQRHRT +*> (which calls SLATSQR, SORGTSQR_ROW and SORHR_COL) using SGEMQRT. +*> Therefore, SLATSQR (part of SGEQR), SGEMQRT (part of SGEMQR) +*> have to be tested before this test. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> Number of rows in test matrix. +*> \endverbatim +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> Number of columns in test matrix. +*> \endverbatim +*> \param[in] MB1 +*> \verbatim +*> MB1 is INTEGER +*> Number of row in row block in an input test matrix. +*> \endverbatim +*> +*> \param[in] NB1 +*> \verbatim +*> NB1 is INTEGER +*> Number of columns in column block an input test matrix. +*> \endverbatim +*> +*> \param[in] NB2 +*> \verbatim +*> NB2 is INTEGER +*> Number of columns in column block in an output test matrix. +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is REAL array, dimension (6) +*> Results of each of the six tests below. +*> +*> A is a m-by-n test input matrix to be factored. +*> so that A = Q_gr * ( R ) +*> ( 0 ), +*> +*> Q_qr is an implicit m-by-m orthogonal Q matrix, the result +*> of factorization in blocked WY-representation, +*> stored in SGEQRT output format. +*> +*> R is a n-by-n upper-triangular matrix, +*> +*> 0 is a (m-n)-by-n zero matrix, +*> +*> Q is an explicit m-by-m orthogonal matrix Q = Q_gr * I +*> +*> C is an m-by-n random matrix, +*> +*> D is an n-by-m random matrix. +*> +*> The six tests are: +*> +*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| ) +*> is equivalent to test for | A - Q * R | / (eps * m * |A|), +*> +*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ), +*> +*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|), +*> +*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|) +*> +*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|) +*> +*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|), +*> +*> where: +*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are +*> computed using SGEMQRT, +*> +*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are +*> computed using SGEMM. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup single_lin +* +* ===================================================================== + SUBROUTINE SORHR_COL02( M, N, MB1, NB1, NB2, RESULT ) + IMPLICIT NONE +* +* -- LAPACK test routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER M, N, MB1, NB1, NB2 +* .. Return values .. + REAL RESULT(6) +* +* ===================================================================== +* +* .. +* .. Local allocatable arrays + REAL , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), + $ RWORK(:), WORK( : ), T1(:,:), T2(:,:), DIAG(:), + $ C(:,:), CF(:,:), D(:,:), DF(:,:) +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL TESTZEROS + INTEGER INFO, J, K, L, LWORK, NB2_UB, NRB + REAL ANORM, EPS, RESID, CNORM, DNORM +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) + REAL WORKQUERY( 1 ) +* .. +* .. External Functions .. + REAL SLAMCH, SLANGE, SLANSY + EXTERNAL SLAMCH, SLANGE, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLARNV, SLASET, SGETSQRHRT, + $ SSCAL, SGEMM, SGEMQRT, SSYRK +* .. +* .. Intrinsic Functions .. + INTRINSIC CEILING, REAL, MAX, MIN +* .. +* .. Scalars in Common .. + CHARACTER(LEN=32) SRNAMT +* .. +* .. Common blocks .. + COMMON / SRMNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEED / 1988, 1989, 1990, 1991 / +* +* TEST MATRICES WITH HALF OF MATRIX BEING ZEROS +* + TESTZEROS = .FALSE. +* + EPS = SLAMCH( 'Epsilon' ) + K = MIN( M, N ) + L = MAX( M, N, 1) +* +* Dynamically allocate local arrays +* + ALLOCATE ( A(M,N), AF(M,N), Q(L,L), R(M,L), RWORK(L), + $ C(M,N), CF(M,N), + $ D(N,M), DF(N,M) ) +* +* Put random numbers into A and copy to AF +* + DO J = 1, N + CALL SLARNV( 2, ISEED, M, A( 1, J ) ) + END DO + IF( TESTZEROS ) THEN + IF( M.GE.4 ) THEN + DO J = 1, N + CALL SLARNV( 2, ISEED, M/2, A( M/4, J ) ) + END DO + END IF + END IF + CALL SLACPY( 'Full', M, N, A, M, AF, M ) +* +* Number of row blocks in SLATSQR +* + NRB = MAX( 1, CEILING( REAL( M - N ) / REAL( MB1 - N ) ) ) +* + ALLOCATE ( T1( NB1, N * NRB ) ) + ALLOCATE ( T2( NB2, N ) ) + ALLOCATE ( DIAG( N ) ) +* +* Begin determine LWORK for the array WORK and allocate memory. +* +* SGEMQRT requires NB2 to be bounded by N. +* + NB2_UB = MIN( NB2, N) +* + CALL SGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2, + $ WORKQUERY, -1, INFO ) +* + LWORK = INT( WORKQUERY( 1 ) ) +* +* In SGEMQRT, WORK is N*NB2_UB if SIDE = 'L', +* or M*NB2_UB if SIDE = 'R'. +* + LWORK = MAX( LWORK, NB2_UB * N, NB2_UB * M ) +* + ALLOCATE ( WORK( LWORK ) ) +* +* End allocate memory for WORK. +* +* +* Begin Householder reconstruction routines +* +* Factor the matrix A in the array AF. +* + SRNAMT = 'SGETSQRHRT' + CALL SGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2, + $ WORK, LWORK, INFO ) +* +* End Householder reconstruction routines. +* +* +* Generate the m-by-m matrix Q +* + CALL SLASET( 'Full', M, M, ZERO, ONE, Q, M ) +* + SRNAMT = 'SGEMQRT' + CALL SGEMQRT( 'L', 'N', M, M, K, NB2_UB, AF, M, T2, NB2, Q, M, + $ WORK, INFO ) +* +* Copy R +* + CALL SLASET( 'Full', M, N, ZERO, ZERO, R, M ) +* + CALL SLACPY( 'Upper', M, N, AF, M, R, M ) +* +* TEST 1 +* Compute |R - (Q**T)*A| / ( eps * m * |A| ) and store in RESULT(1) +* + CALL SGEMM( 'T', 'N', M, N, M, -ONE, Q, M, A, M, ONE, R, M ) +* + ANORM = SLANGE( '1', M, N, A, M, RWORK ) + RESID = SLANGE( '1', M, N, R, M, RWORK ) + IF( ANORM.GT.ZERO ) THEN + RESULT( 1 ) = RESID / ( EPS * MAX( 1, M ) * ANORM ) + ELSE + RESULT( 1 ) = ZERO + END IF +* +* TEST 2 +* Compute |I - (Q**T)*Q| / ( eps * m ) and store in RESULT(2) +* + CALL SLASET( 'Full', M, M, ZERO, ONE, R, M ) + CALL SSYRK( 'U', 'T', M, M, -ONE, Q, M, ONE, R, M ) + RESID = SLANSY( '1', 'Upper', M, R, M, RWORK ) + RESULT( 2 ) = RESID / ( EPS * MAX( 1, M ) ) +* +* Generate random m-by-n matrix C +* + DO J = 1, N + CALL SLARNV( 2, ISEED, M, C( 1, J ) ) + END DO + CNORM = SLANGE( '1', M, N, C, M, RWORK ) + CALL SLACPY( 'Full', M, N, C, M, CF, M ) +* +* Apply Q to C as Q*C = CF +* + SRNAMT = 'SGEMQRT' + CALL SGEMQRT( 'L', 'N', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M, + $ WORK, INFO ) +* +* TEST 3 +* Compute |CF - Q*C| / ( eps * m * |C| ) +* + CALL SGEMM( 'N', 'N', M, N, M, -ONE, Q, M, C, M, ONE, CF, M ) + RESID = SLANGE( '1', M, N, CF, M, RWORK ) + IF( CNORM.GT.ZERO ) THEN + RESULT( 3 ) = RESID / ( EPS * MAX( 1, M ) * CNORM ) + ELSE + RESULT( 3 ) = ZERO + END IF +* +* Copy C into CF again +* + CALL SLACPY( 'Full', M, N, C, M, CF, M ) +* +* Apply Q to C as (Q**T)*C = CF +* + SRNAMT = 'SGEMQRT' + CALL SGEMQRT( 'L', 'T', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M, + $ WORK, INFO ) +* +* TEST 4 +* Compute |CF - (Q**T)*C| / ( eps * m * |C|) +* + CALL SGEMM( 'T', 'N', M, N, M, -ONE, Q, M, C, M, ONE, CF, M ) + RESID = SLANGE( '1', M, N, CF, M, RWORK ) + IF( CNORM.GT.ZERO ) THEN + RESULT( 4 ) = RESID / ( EPS * MAX( 1, M ) * CNORM ) + ELSE + RESULT( 4 ) = ZERO + END IF +* +* Generate random n-by-m matrix D and a copy DF +* + DO J = 1, M + CALL SLARNV( 2, ISEED, N, D( 1, J ) ) + END DO + DNORM = SLANGE( '1', N, M, D, N, RWORK ) + CALL SLACPY( 'Full', N, M, D, N, DF, N ) +* +* Apply Q to D as D*Q = DF +* + SRNAMT = 'SGEMQRT' + CALL SGEMQRT( 'R', 'N', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N, + $ WORK, INFO ) +* +* TEST 5 +* Compute |DF - D*Q| / ( eps * m * |D| ) +* + CALL SGEMM( 'N', 'N', N, M, M, -ONE, D, N, Q, M, ONE, DF, N ) + RESID = SLANGE( '1', N, M, DF, N, RWORK ) + IF( DNORM.GT.ZERO ) THEN + RESULT( 5 ) = RESID / ( EPS * MAX( 1, M ) * DNORM ) + ELSE + RESULT( 5 ) = ZERO + END IF +* +* Copy D into DF again +* + CALL SLACPY( 'Full', N, M, D, N, DF, N ) +* +* Apply Q to D as D*QT = DF +* + SRNAMT = 'SGEMQRT' + CALL SGEMQRT( 'R', 'T', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N, + $ WORK, INFO ) +* +* TEST 6 +* Compute |DF - D*(Q**T)| / ( eps * m * |D| ) +* + CALL SGEMM( 'N', 'T', N, M, M, -ONE, D, N, Q, M, ONE, DF, N ) + RESID = SLANGE( '1', N, M, DF, N, RWORK ) + IF( DNORM.GT.ZERO ) THEN + RESULT( 6 ) = RESID / ( EPS * MAX( 1, M ) * DNORM ) + ELSE + RESULT( 6 ) = ZERO + END IF +* +* Deallocate all arrays +* + DEALLOCATE ( A, AF, Q, R, RWORK, WORK, T1, T2, DIAG, + $ C, D, CF, DF ) +* + RETURN +* +* End of SORHR_COL02 +* + END diff --git a/lapack-netlib/TESTING/LIN/zchkunhr_col.f b/lapack-netlib/TESTING/LIN/zchkunhr_col.f index ef8f8bcc4..395ea178a 100644 --- a/lapack-netlib/TESTING/LIN/zchkunhr_col.f +++ b/lapack-netlib/TESTING/LIN/zchkunhr_col.f @@ -24,9 +24,12 @@ *> *> \verbatim *> -*> ZCHKUNHR_COL tests ZUNHR_COL using ZLATSQR and ZGEMQRT. Therefore, ZLATSQR -*> (used in ZGEQR) and ZGEMQRT (used in ZGEMQR) have to be tested -*> before this test. +*> ZCHKUNHR_COL tests: +*> 1) ZUNGTSQR and ZUNHR_COL using ZLATSQR, ZGEMQRT, +*> 2) ZUNGTSQR_ROW and ZUNHR_COL inside ZGETSQRHRT +*> (which calls ZLATSQR, ZUNGTSQR_ROW and ZUNHR_COL) using ZGEMQRT. +*> Therefore, ZLATSQR (part of ZGEQR), ZGEMQRT (part of ZGEMQR) +*> have to be tested before this test. *> *> \endverbatim * @@ -97,19 +100,16 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date November 2019 -* *> \ingroup complex16_lin * * ===================================================================== - SUBROUTINE ZCHKUNHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, NNB, - $ NBVAL, NOUT ) + SUBROUTINE ZCHKUNHR_COL( THRESH, TSTERR, NM, MVAL, NN, NVAL, + $ NNB, NBVAL, NOUT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK test routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* December 2016 * * .. Scalar Arguments .. LOGICAL TSTERR @@ -135,10 +135,11 @@ DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAHD, ALASUM, ZERRUNHR_COL, ZUNHR_COL01 + EXTERNAL ALAHD, ALASUM, ZERRUNHR_COL, ZUNHR_COL01, + $ ZUNHR_COL02 * .. * .. Intrinsic Functions .. - INTRINSIC MAX, MIN + INTRINSIC MAX, MIN * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -201,8 +202,8 @@ * * Test ZUNHR_COL * - CALL ZUNHR_COL01( M, N, MB1, NB1, NB2, - $ RESULT ) + CALL ZUNHR_COL01( M, N, MB1, NB1, + $ NB2, RESULT ) * * Print information about the tests that did * not pass the threshold. @@ -226,12 +227,78 @@ END DO END DO * +* Do for each value of M in MVAL. +* + DO I = 1, NM + M = MVAL( I ) +* +* Do for each value of N in NVAL. +* + DO J = 1, NN + N = NVAL( J ) +* +* Only for M >= N +* + IF ( MIN( M, N ).GT.0 .AND. M.GE.N ) THEN +* +* Do for each possible value of MB1 +* + DO IMB1 = 1, NNB + MB1 = NBVAL( IMB1 ) +* +* Only for MB1 > N +* + IF ( MB1.GT.N ) THEN +* +* Do for each possible value of NB1 +* + DO INB1 = 1, NNB + NB1 = NBVAL( INB1 ) +* +* Do for each possible value of NB2 +* + DO INB2 = 1, NNB + NB2 = NBVAL( INB2 ) +* + IF( NB1.GT.0 .AND. NB2.GT.0 ) THEN +* +* Test ZUNHR_COL +* + CALL ZUNHR_COL02( M, N, MB1, NB1, + $ NB2, RESULT ) +* +* Print information about the tests that did +* not pass the threshold. +* + DO T = 1, NTESTS + IF( RESULT( T ).GE.THRESH ) THEN + IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 ) + $ CALL ALAHD( NOUT, PATH ) + WRITE( NOUT, FMT = 9998 ) M, N, MB1, + $ NB1, NB2, T, RESULT( T ) + NFAIL = NFAIL + 1 + END IF + END DO + NRUN = NRUN + NTESTS + END IF + END DO + END DO + END IF + END DO + END IF + END DO + END DO +* * Print a summary of the results. * CALL ALASUM( PATH, NOUT, NFAIL, NRUN, NERRS ) * - 9999 FORMAT( 'M=', I5, ', N=', I5, ', MB1=', I5, - $ ', NB1=', I5, ', NB2=', I5,' test(', I2, ')=', G12.5 ) + 9999 FORMAT( 'ZUNGTSQR and ZUNHR_COL: M=', I5, ', N=', I5, + $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5, + $ ' test(', I2, ')=', G12.5 ) + 9998 FORMAT( 'ZUNGTSQR_ROW and ZUNHR_COL: M=', I5, ', N=', I5, + $ ', MB1=', I5, ', NB1=', I5, ', NB2=', I5, + $ ' test(', I2, ')=', G12.5 ) RETURN * * End of ZCHKUNHR_COL diff --git a/lapack-netlib/TESTING/LIN/zunhr_col01.f b/lapack-netlib/TESTING/LIN/zunhr_col01.f index 9fb3bf352..b7590a8ea 100644 --- a/lapack-netlib/TESTING/LIN/zunhr_col01.f +++ b/lapack-netlib/TESTING/LIN/zunhr_col01.f @@ -21,8 +21,8 @@ *> *> \verbatim *> -*> ZUNHR_COL01 tests ZUNHR_COL using ZLATSQR, ZGEMQRT and ZUNGTSQR. -*> Therefore, ZLATSQR (part of ZGEQR), ZGEMQRT (part ZGEMQR), ZUNGTSQR +*> ZUNHR_COL01 tests ZUNGTSQR and ZUNHR_COL using ZLATSQR, ZGEMQRT. +*> Therefore, ZLATSQR (part of ZGEQR), ZGEMQRT (part of ZGEMQR) *> have to be tested before this test. *> *> \endverbatim @@ -62,14 +62,46 @@ *> \verbatim *> RESULT is DOUBLE PRECISION array, dimension (6) *> Results of each of the six tests below. -*> ( C is a M-by-N random matrix, D is a N-by-M random matrix ) *> -*> RESULT(1) = | A - Q * R | / (eps * m * |A|) -*> RESULT(2) = | I - (Q**H) * Q | / (eps * m ) -*> RESULT(3) = | Q * C - Q * C | / (eps * m * |C|) -*> RESULT(4) = | (Q**H) * C - (Q**H) * C | / (eps * m * |C|) -*> RESULT(5) = | (D * Q) - D * Q | / (eps * m * |D|) -*> RESULT(6) = | D * (Q**H) - D * (Q**H) | / (eps * m * |D|) +*> A is a m-by-n test input matrix to be factored. +*> so that A = Q_gr * ( R ) +*> ( 0 ), +*> +*> Q_qr is an implicit m-by-m unitary Q matrix, the result +*> of factorization in blocked WY-representation, +*> stored in ZGEQRT output format. +*> +*> R is a n-by-n upper-triangular matrix, +*> +*> 0 is a (m-n)-by-n zero matrix, +*> +*> Q is an explicit m-by-m unitary matrix Q = Q_gr * I +*> +*> C is an m-by-n random matrix, +*> +*> D is an n-by-m random matrix. +*> +*> The six tests are: +*> +*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| ) +*> is equivalent to test for | A - Q * R | / (eps * m * |A|), +*> +*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ), +*> +*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|), +*> +*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|) +*> +*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|) +*> +*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|), +*> +*> where: +*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are +*> computed using ZGEMQRT, +*> +*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are +*> computed using ZGEMM. *> \endverbatim * * Authors: @@ -80,18 +112,15 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \date November 2019 -* *> \ingroup complex16_lin * * ===================================================================== SUBROUTINE ZUNHR_COL01( M, N, MB1, NB1, NB2, RESULT ) IMPLICIT NONE * -* -- LAPACK test routine (version 3.9.0) -- +* -- LAPACK test routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- -* November 2019 * * .. Scalar Arguments .. INTEGER M, N, MB1, NB1, NB2 @@ -102,7 +131,7 @@ * * .. * .. Local allocatable arrays - COMPLEX*16, ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), + COMPLEX*16 , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), $ WORK( : ), T1(:,:), T2(:,:), DIAG(:), $ C(:,:), CF(:,:), D(:,:), DF(:,:) DOUBLE PRECISION, ALLOCATABLE :: RWORK(:) @@ -218,7 +247,7 @@ * Copy the factor R into the array R. * SRNAMT = 'ZLACPY' - CALL ZLACPY( 'U', M, N, AF, M, R, M ) + CALL ZLACPY( 'U', N, N, AF, M, R, M ) * * Reconstruct the orthogonal matrix Q. * @@ -240,7 +269,7 @@ * matrix S. * SRNAMT = 'ZLACPY' - CALL ZLACPY( 'U', M, N, R, M, AF, M ) + CALL ZLACPY( 'U', N, N, R, M, AF, M ) * DO I = 1, N IF( DIAG( I ).EQ.-CONE ) THEN diff --git a/lapack-netlib/TESTING/LIN/zunhr_col02.f b/lapack-netlib/TESTING/LIN/zunhr_col02.f new file mode 100644 index 000000000..c6e7f80cd --- /dev/null +++ b/lapack-netlib/TESTING/LIN/zunhr_col02.f @@ -0,0 +1,381 @@ +*> \brief \b ZUNHR_COL02 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZUNHR_COL02( M, N, MB1, NB1, NB2, RESULT ) +* +* .. Scalar Arguments .. +* INTEGER M, N, MB1, NB1, NB2 +* .. Return values .. +* DOUBLE PRECISION RESULT(6) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNHR_COL02 tests ZUNGTSQR_ROW and ZUNHR_COL inside ZGETSQRHRT +*> (which calls ZLATSQR, ZUNGTSQR_ROW and ZUNHR_COL) using ZGEMQRT. +*> Therefore, ZLATSQR (part of ZGEQR), ZGEMQRT (part of ZGEMQR) +*> have to be tested before this test. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> Number of rows in test matrix. +*> \endverbatim +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> Number of columns in test matrix. +*> \endverbatim +*> \param[in] MB1 +*> \verbatim +*> MB1 is INTEGER +*> Number of row in row block in an input test matrix. +*> \endverbatim +*> +*> \param[in] NB1 +*> \verbatim +*> NB1 is INTEGER +*> Number of columns in column block an input test matrix. +*> \endverbatim +*> +*> \param[in] NB2 +*> \verbatim +*> NB2 is INTEGER +*> Number of columns in column block in an output test matrix. +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is DOUBLE PRECISION array, dimension (6) +*> Results of each of the six tests below. +*> +*> A is a m-by-n test input matrix to be factored. +*> so that A = Q_gr * ( R ) +*> ( 0 ), +*> +*> Q_qr is an implicit m-by-m unitary Q matrix, the result +*> of factorization in blocked WY-representation, +*> stored in ZGEQRT output format. +*> +*> R is a n-by-n upper-triangular matrix, +*> +*> 0 is a (m-n)-by-n zero matrix, +*> +*> Q is an explicit m-by-m unitary matrix Q = Q_gr * I +*> +*> C is an m-by-n random matrix, +*> +*> D is an n-by-m random matrix. +*> +*> The six tests are: +*> +*> RESULT(1) = |R - (Q**H) * A| / ( eps * m * |A| ) +*> is equivalent to test for | A - Q * R | / (eps * m * |A|), +*> +*> RESULT(2) = |I - (Q**H) * Q| / ( eps * m ), +*> +*> RESULT(3) = | Q_qr * C - Q * C | / (eps * m * |C|), +*> +*> RESULT(4) = | (Q_gr**H) * C - (Q**H) * C | / (eps * m * |C|) +*> +*> RESULT(5) = | D * Q_qr - D * Q | / (eps * m * |D|) +*> +*> RESULT(6) = | D * (Q_qr**H) - D * (Q**H) | / (eps * m * |D|), +*> +*> where: +*> Q_qr * C, (Q_gr**H) * C, D * Q_qr, D * (Q_qr**H) are +*> computed using ZGEMQRT, +*> +*> Q * C, (Q**H) * C, D * Q, D * (Q**H) are +*> computed using ZGEMM. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex16_lin +* +* ===================================================================== + SUBROUTINE ZUNHR_COL02( M, N, MB1, NB1, NB2, RESULT ) + IMPLICIT NONE +* +* -- LAPACK test routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER M, N, MB1, NB1, NB2 +* .. Return values .. + DOUBLE PRECISION RESULT(6) +* +* ===================================================================== +* +* .. +* .. Local allocatable arrays + COMPLEX*16 , ALLOCATABLE :: A(:,:), AF(:,:), Q(:,:), R(:,:), + $ WORK( : ), T1(:,:), T2(:,:), DIAG(:), + $ C(:,:), CF(:,:), D(:,:), DF(:,:) + DOUBLE PRECISION, ALLOCATABLE :: RWORK(:) +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL TESTZEROS + INTEGER INFO, J, K, L, LWORK, NB2_UB, NRB + DOUBLE PRECISION ANORM, EPS, RESID, CNORM, DNORM +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) + COMPLEX*16 WORKQUERY( 1 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, ZLANGE, ZLANSY + EXTERNAL DLAMCH, ZLANGE, ZLANSY +* .. +* .. External Subroutines .. + EXTERNAL ZLACPY, ZLARNV, ZLASET, ZGETSQRHRT, + $ ZSCAL, ZGEMM, ZGEMQRT, ZHERK +* .. +* .. Intrinsic Functions .. + INTRINSIC CEILING, DBLE, MAX, MIN +* .. +* .. Scalars in Common .. + CHARACTER(LEN=32) SRNAMT +* .. +* .. Common blocks .. + COMMON / SRMNAMC / SRNAMT +* .. +* .. Data statements .. + DATA ISEED / 1988, 1989, 1990, 1991 / +* +* TEST MATRICES WITH HALF OF MATRIX BEING ZEROS +* + TESTZEROS = .FALSE. +* + EPS = DLAMCH( 'Epsilon' ) + K = MIN( M, N ) + L = MAX( M, N, 1) +* +* Dynamically allocate local arrays +* + ALLOCATE ( A(M,N), AF(M,N), Q(L,L), R(M,L), RWORK(L), + $ C(M,N), CF(M,N), + $ D(N,M), DF(N,M) ) +* +* Put random numbers into A and copy to AF +* + DO J = 1, N + CALL ZLARNV( 2, ISEED, M, A( 1, J ) ) + END DO + IF( TESTZEROS ) THEN + IF( M.GE.4 ) THEN + DO J = 1, N + CALL ZLARNV( 2, ISEED, M/2, A( M/4, J ) ) + END DO + END IF + END IF + CALL ZLACPY( 'Full', M, N, A, M, AF, M ) +* +* Number of row blocks in ZLATSQR +* + NRB = MAX( 1, CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) ) +* + ALLOCATE ( T1( NB1, N * NRB ) ) + ALLOCATE ( T2( NB2, N ) ) + ALLOCATE ( DIAG( N ) ) +* +* Begin determine LWORK for the array WORK and allocate memory. +* +* ZGEMQRT requires NB2 to be bounded by N. +* + NB2_UB = MIN( NB2, N) +* +* + CALL ZGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2, + $ WORKQUERY, -1, INFO ) +* + LWORK = INT( WORKQUERY( 1 ) ) +* +* In ZGEMQRT, WORK is N*NB2_UB if SIDE = 'L', +* or M*NB2_UB if SIDE = 'R'. +* + LWORK = MAX( LWORK, NB2_UB * N, NB2_UB * M ) +* + ALLOCATE ( WORK( LWORK ) ) +* +* End allocate memory for WORK. +* +* +* Begin Householder reconstruction routines +* +* Factor the matrix A in the array AF. +* + SRNAMT = 'ZGETSQRHRT' + CALL ZGETSQRHRT( M, N, MB1, NB1, NB2, AF, M, T2, NB2, + $ WORK, LWORK, INFO ) +* +* End Householder reconstruction routines. +* +* +* Generate the m-by-m matrix Q +* + CALL ZLASET( 'Full', M, M, CZERO, CONE, Q, M ) +* + SRNAMT = 'ZGEMQRT' + CALL ZGEMQRT( 'L', 'N', M, M, K, NB2_UB, AF, M, T2, NB2, Q, M, + $ WORK, INFO ) +* +* Copy R +* + CALL ZLASET( 'Full', M, N, CZERO, CZERO, R, M ) +* + CALL ZLACPY( 'Upper', M, N, AF, M, R, M ) +* +* TEST 1 +* Compute |R - (Q**T)*A| / ( eps * m * |A| ) and store in RESULT(1) +* + CALL ZGEMM( 'C', 'N', M, N, M, -CONE, Q, M, A, M, CONE, R, M ) +* + ANORM = ZLANGE( '1', M, N, A, M, RWORK ) + RESID = ZLANGE( '1', M, N, R, M, RWORK ) + IF( ANORM.GT.ZERO ) THEN + RESULT( 1 ) = RESID / ( EPS * MAX( 1, M ) * ANORM ) + ELSE + RESULT( 1 ) = ZERO + END IF +* +* TEST 2 +* Compute |I - (Q**T)*Q| / ( eps * m ) and store in RESULT(2) +* + CALL ZLASET( 'Full', M, M, CZERO, CONE, R, M ) + CALL ZHERK( 'U', 'C', M, M, -CONE, Q, M, CONE, R, M ) + RESID = ZLANSY( '1', 'Upper', M, R, M, RWORK ) + RESULT( 2 ) = RESID / ( EPS * MAX( 1, M ) ) +* +* Generate random m-by-n matrix C +* + DO J = 1, N + CALL ZLARNV( 2, ISEED, M, C( 1, J ) ) + END DO + CNORM = ZLANGE( '1', M, N, C, M, RWORK ) + CALL ZLACPY( 'Full', M, N, C, M, CF, M ) +* +* Apply Q to C as Q*C = CF +* + SRNAMT = 'ZGEMQRT' + CALL ZGEMQRT( 'L', 'N', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M, + $ WORK, INFO ) +* +* TEST 3 +* Compute |CF - Q*C| / ( eps * m * |C| ) +* + CALL ZGEMM( 'N', 'N', M, N, M, -CONE, Q, M, C, M, CONE, CF, M ) + RESID = ZLANGE( '1', M, N, CF, M, RWORK ) + IF( CNORM.GT.ZERO ) THEN + RESULT( 3 ) = RESID / ( EPS * MAX( 1, M ) * CNORM ) + ELSE + RESULT( 3 ) = ZERO + END IF +* +* Copy C into CF again +* + CALL ZLACPY( 'Full', M, N, C, M, CF, M ) +* +* Apply Q to C as (Q**T)*C = CF +* + SRNAMT = 'ZGEMQRT' + CALL ZGEMQRT( 'L', 'C', M, N, K, NB2_UB, AF, M, T2, NB2, CF, M, + $ WORK, INFO ) +* +* TEST 4 +* Compute |CF - (Q**T)*C| / ( eps * m * |C|) +* + CALL ZGEMM( 'C', 'N', M, N, M, -CONE, Q, M, C, M, CONE, CF, M ) + RESID = ZLANGE( '1', M, N, CF, M, RWORK ) + IF( CNORM.GT.ZERO ) THEN + RESULT( 4 ) = RESID / ( EPS * MAX( 1, M ) * CNORM ) + ELSE + RESULT( 4 ) = ZERO + END IF +* +* Generate random n-by-m matrix D and a copy DF +* + DO J = 1, M + CALL ZLARNV( 2, ISEED, N, D( 1, J ) ) + END DO + DNORM = ZLANGE( '1', N, M, D, N, RWORK ) + CALL ZLACPY( 'Full', N, M, D, N, DF, N ) +* +* Apply Q to D as D*Q = DF +* + SRNAMT = 'ZGEMQRT' + CALL ZGEMQRT( 'R', 'N', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N, + $ WORK, INFO ) +* +* TEST 5 +* Compute |DF - D*Q| / ( eps * m * |D| ) +* + CALL ZGEMM( 'N', 'N', N, M, M, -CONE, D, N, Q, M, CONE, DF, N ) + RESID = ZLANGE( '1', N, M, DF, N, RWORK ) + IF( DNORM.GT.ZERO ) THEN + RESULT( 5 ) = RESID / ( EPS * MAX( 1, M ) * DNORM ) + ELSE + RESULT( 5 ) = ZERO + END IF +* +* Copy D into DF again +* + CALL ZLACPY( 'Full', N, M, D, N, DF, N ) +* +* Apply Q to D as D*QT = DF +* + SRNAMT = 'ZGEMQRT' + CALL ZGEMQRT( 'R', 'C', N, M, K, NB2_UB, AF, M, T2, NB2, DF, N, + $ WORK, INFO ) +* +* TEST 6 +* Compute |DF - D*(Q**T)| / ( eps * m * |D| ) +* + CALL ZGEMM( 'N', 'C', N, M, M, -CONE, D, N, Q, M, CONE, DF, N ) + RESID = ZLANGE( '1', N, M, DF, N, RWORK ) + IF( DNORM.GT.ZERO ) THEN + RESULT( 6 ) = RESID / ( EPS * MAX( 1, M ) * DNORM ) + ELSE + RESULT( 6 ) = ZERO + END IF +* +* Deallocate all arrays +* + DEALLOCATE ( A, AF, Q, R, RWORK, WORK, T1, T2, DIAG, + $ C, D, CF, DF ) +* + RETURN +* +* End of ZUNHR_COL02 +* + END