Merge pull request #3821 from martin-frbg/lapack651

Add a BLAS3-based triangular Sylvester equation solver (Reference-LAPACK PR 651)
This commit is contained in:
Martin Kroeker 2022-11-16 13:59:02 +01:00 committed by GitHub
commit f16aa1ce7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
59 changed files with 24707 additions and 139 deletions

View File

@ -123,7 +123,8 @@ set(SLASRC
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f
slarmm.f slatrs3.f strsyl3.f)
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@ -221,7 +222,8 @@ set(CLASRC
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
cungtsqr.f cungtsqr_row.f cunhr_col.f )
cungtsqr.f cungtsqr_row.f cunhr_col.f
clatrs3.f ctrsyl3.f )
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@ -313,7 +315,8 @@ set(DLASRC
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f
dlarmm.f dlatrs3.f dtrsyl3.f)
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@ -415,7 +418,8 @@ set(ZLASRC
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
zungtsqr.f zungtsqr_row.f zunhr_col.f)
zungtsqr.f zungtsqr_row.f zunhr_col.f
zlatrs3.f ztrsyl3.f)
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
@ -617,7 +621,8 @@ set(SLASRC
ssyevd_2stage.c ssyev_2stage.c ssyevx_2stage.c ssyevr_2stage.c
ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c
sgesvdq.c slaorhr_col_getrfnp.c
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c )
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c
slarmm.c slatrs3.c strsyl3.c)
set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c
sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c
@ -714,7 +719,8 @@ set(CLASRC
cheevd_2stage.c cheev_2stage.c cheevx_2stage.c cheevr_2stage.c
chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c
cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c
cungtsqr.c cungtsqr_row.c cunhr_col.c )
cungtsqr.c cungtsqr_row.c cunhr_col.c
clatrs3.c ctrsyl3.c)
set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c
cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c
@ -805,7 +811,8 @@ set(DLASRC
dsyevd_2stage.c dsyev_2stage.c dsyevx_2stage.c dsyevr_2stage.c
dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c
dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c )
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c
dlarmm.c dlatrs3.c dtrsyl3.c)
set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c
dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c
@ -906,7 +913,7 @@ set(ZLASRC
zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c
zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c
zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c
zungtsqr.c zungtsqr_row.c zunhr_col.c)
zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c)
set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c

View File

@ -12,6 +12,7 @@
#include <stdlib.h>
#include <stdarg.h>
#include <inttypes.h>
/* It seems all current Fortran compilers put strlen at end.
* Some historical compilers put strlen after the str argument
@ -80,11 +81,26 @@ extern "C" {
/*----------------------------------------------------------------------------*/
#ifndef lapack_int
#define lapack_int int
#if defined(LAPACK_ILP64)
#define lapack_int int64_t
#else
#define lapack_int int32_t
#endif
#endif
/*
* Integer format string
*/
#ifndef LAPACK_IFMT
#if defined(LAPACK_ILP64)
#define LAPACK_IFMT PRId64
#else
#define LAPACK_IFMT PRId32
#endif
#endif
#ifndef lapack_logical
#define lapack_logical lapack_int
#define lapack_logical lapack_int
#endif
/* f2c, hence clapack and MacOS Accelerate, returns double instead of float
@ -115,7 +131,7 @@ typedef lapack_logical (*LAPACK_Z_SELECT2)
( const lapack_complex_double*, const lapack_complex_double* );
#define LAPACK_lsame_base LAPACK_GLOBAL(lsame,LSAME)
lapack_logical LAPACK_lsame_base( const char* ca, const char* cb,
lapack_logical LAPACK_lsame_base( const char* ca, const char* cb,
lapack_int lca, lapack_int lcb
#ifdef LAPACK_FORTRAN_STRLEN_END
, size_t, size_t
@ -21986,6 +22002,84 @@ void LAPACK_ztrsyl_base(
#define LAPACK_ztrsyl(...) LAPACK_ztrsyl_base(__VA_ARGS__)
#endif
#define LAPACK_ctrsyl3_base LAPACK_GLOBAL(ctrsyl3,CTRSYL3)
void LAPACK_ctrsyl3_base(
char const* trana, char const* tranb,
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
lapack_complex_float const* A, lapack_int const* lda,
lapack_complex_float const* B, lapack_int const* ldb,
lapack_complex_float* C, lapack_int const* ldc, float* scale,
float* swork, lapack_int const *ldswork,
lapack_int* info
#ifdef LAPACK_FORTRAN_STRLEN_END
, size_t, size_t
#endif
);
#ifdef LAPACK_FORTRAN_STRLEN_END
#define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__, 1, 1)
#else
#define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__)
#endif
#define LAPACK_dtrsyl3_base LAPACK_GLOBAL(dtrsyl3,DTRSYL3)
void LAPACK_dtrsyl3_base(
char const* trana, char const* tranb,
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
double const* A, lapack_int const* lda,
double const* B, lapack_int const* ldb,
double* C, lapack_int const* ldc, double* scale,
lapack_int* iwork, lapack_int const* liwork,
double* swork, lapack_int const *ldswork,
lapack_int* info
#ifdef LAPACK_FORTRAN_STRLEN_END
, size_t, size_t
#endif
);
#ifdef LAPACK_FORTRAN_STRLEN_END
#define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__, 1, 1)
#else
#define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__)
#endif
#define LAPACK_strsyl3_base LAPACK_GLOBAL(strsyl3,STRSYL3)
void LAPACK_strsyl3_base(
char const* trana, char const* tranb,
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
float const* A, lapack_int const* lda,
float const* B, lapack_int const* ldb,
float* C, lapack_int const* ldc, float* scale,
lapack_int* iwork, lapack_int const* liwork,
float* swork, lapack_int const *ldswork,
lapack_int* info
#ifdef LAPACK_FORTRAN_STRLEN_END
, size_t, size_t
#endif
);
#ifdef LAPACK_FORTRAN_STRLEN_END
#define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__, 1, 1)
#else
#define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__)
#endif
#define LAPACK_ztrsyl3_base LAPACK_GLOBAL(ztrsyl3,ZTRSYL3)
void LAPACK_ztrsyl3_base(
char const* trana, char const* tranb,
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
lapack_complex_double const* A, lapack_int const* lda,
lapack_complex_double const* B, lapack_int const* ldb,
lapack_complex_double* C, lapack_int const* ldc, double* scale,
double* swork, lapack_int const *ldswork,
lapack_int* info
#ifdef LAPACK_FORTRAN_STRLEN_END
, size_t, size_t
#endif
);
#ifdef LAPACK_FORTRAN_STRLEN_END
#define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__, 1, 1)
#else
#define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__)
#endif
#define LAPACK_ctrtri_base LAPACK_GLOBAL(ctrtri,CTRTRI)
void LAPACK_ctrtri_base(
char const* uplo, char const* diag,

View File

@ -2313,6 +2313,19 @@ lapack_int LAPACKE_zlagge( int matrix_layout, lapack_int m, lapack_int n,
float LAPACKE_slamch( char cmach );
double LAPACKE_dlamch( char cmach );
float LAPACKE_slangb( int matrix_layout, char norm, lapack_int n,
lapack_int kl, lapack_int ku, const float* ab,
lapack_int ldab );
double LAPACKE_dlangb( int matrix_layout, char norm, lapack_int n,
lapack_int kl, lapack_int ku, const double* ab,
lapack_int ldab );
float LAPACKE_clangb( int matrix_layout, char norm, lapack_int n,
lapack_int kl, lapack_int ku,
const lapack_complex_float* ab, lapack_int ldab );
double LAPACKE_zlangb( int matrix_layout, char norm, lapack_int n,
lapack_int kl, lapack_int ku,
const lapack_complex_double* ab, lapack_int ldab );
float LAPACKE_slange( int matrix_layout, char norm, lapack_int m,
lapack_int n, const float* a, lapack_int lda );
double LAPACKE_dlange( int matrix_layout, char norm, lapack_int m,
@ -4477,6 +4490,23 @@ lapack_int LAPACKE_ztrsyl( int matrix_layout, char trana, char tranb,
lapack_complex_double* c, lapack_int ldc,
double* scale );
lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const float* a, lapack_int lda, const float* b,
lapack_int ldb, float* c, lapack_int ldc,
float* scale );
lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const double* a, lapack_int lda, const double* b,
lapack_int ldb, double* c, lapack_int ldc,
double* scale );
lapack_int LAPACKE_ztrsyl3( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* b, lapack_int ldb,
lapack_complex_double* c, lapack_int ldc,
double* scale );
lapack_int LAPACKE_strtri( int matrix_layout, char uplo, char diag, lapack_int n,
float* a, lapack_int lda );
lapack_int LAPACKE_dtrtri( int matrix_layout, char uplo, char diag, lapack_int n,
@ -7576,6 +7606,21 @@ double LAPACKE_dlapy3_work( double x, double y, double z );
float LAPACKE_slamch_work( char cmach );
double LAPACKE_dlamch_work( char cmach );
float LAPACKE_slangb_work( int matrix_layout, char norm, lapack_int n,
lapack_int kl, lapack_int ku, const float* ab,
lapack_int ldab, float* work );
double LAPACKE_dlangb_work( int matrix_layout, char norm, lapack_int n,
lapack_int kl, lapack_int ku, const double* ab,
lapack_int ldab, double* work );
float LAPACKE_clangb_work( int matrix_layout, char norm, lapack_int n,
lapack_int kl, lapack_int ku,
const lapack_complex_float* ab, lapack_int ldab,
float* work );
double LAPACKE_zlangb_work( int matrix_layout, char norm, lapack_int n,
lapack_int kl, lapack_int ku,
const lapack_complex_double* ab, lapack_int ldab,
double* work );
float LAPACKE_slange_work( int matrix_layout, char norm, lapack_int m,
lapack_int n, const float* a, lapack_int lda,
float* work );
@ -10174,6 +10219,35 @@ lapack_int LAPACKE_ztrsyl_work( int matrix_layout, char trana, char tranb,
lapack_complex_double* c, lapack_int ldc,
double* scale );
lapack_int LAPACKE_strsyl3_work( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const float* a, lapack_int lda,
const float* b, lapack_int ldb,
float* c, lapack_int ldc, float* scale,
lapack_int* iwork, lapack_int liwork,
float* swork, lapack_int ldswork );
lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const double* a, lapack_int lda,
const double* b, lapack_int ldb,
double* c, lapack_int ldc, double* scale,
lapack_int* iwork, lapack_int liwork,
double* swork, lapack_int ldswork );
lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* b, lapack_int ldb,
lapack_complex_float* c, lapack_int ldc,
float* scale, float* swork,
lapack_int ldswork );
lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* b, lapack_int ldb,
lapack_complex_double* c, lapack_int ldc,
double* scale, double* swork,
lapack_int ldswork );
lapack_int LAPACKE_strtri_work( int matrix_layout, char uplo, char diag,
lapack_int n, float* a, lapack_int lda );
lapack_int LAPACKE_dtrtri_work( int matrix_layout, char uplo, char diag,

View File

@ -48,7 +48,6 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
lapack_int lrwork = -1;
float* rwork = NULL;
float rwork_query;
lapack_int i;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 );
return -1;

View File

@ -0,0 +1,56 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_ctrsyl3( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* b, lapack_int ldb,
lapack_complex_float* c, lapack_int ldc,
float* scale )
{
lapack_int info = 0;
float swork_query[2];
float* swork = NULL;
lapack_int ldswork = -1;
lapack_int swork_size = -1;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_ctrsyl3", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) {
return -7;
}
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
return -9;
}
if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) {
return -11;
}
}
#endif
/* Query optimal working array sizes */
info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
b, ldb, c, ldc, scale, swork_query, ldswork );
if( info != 0 ) {
goto exit_level_0;
}
ldswork = swork_query[0];
swork_size = ldswork * swork_query[1];
swork = (float*)LAPACKE_malloc( sizeof(float) * swork_size);
if( swork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
lda, b, ldb, c, ldc, scale, swork, ldswork );
/* Release memory and exit */
LAPACKE_free( swork );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_ctrsyl3", info );
}
return info;
}

View File

@ -0,0 +1,88 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* b, lapack_int ldb,
lapack_complex_float* c, lapack_int ldc,
float* scale, float* swork,
lapack_int ldswork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_ctrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
scale, swork, &ldswork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_int ldb_t = MAX(1,n);
lapack_int ldc_t = MAX(1,m);
lapack_complex_float* a_t = NULL;
lapack_complex_float* b_t = NULL;
lapack_complex_float* c_t = NULL;
/* Check leading dimension(s) */
if( lda < m ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
return info;
}
if( ldb < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
return info;
}
if( ldc < n ) {
info = -12;
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
return info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,m) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
b_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) );
if( b_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
c_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * ldc_t * MAX(1,n) );
if( c_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_2;
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
LAPACKE_cge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */
LAPACK_ctrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
c_t, &ldc_t, scale, swork, &ldswork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
/* Release memory and exit */
LAPACKE_free( c_t );
exit_level_2:
LAPACKE_free( b_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
}
return info;
}

View File

@ -48,7 +48,6 @@ lapack_int LAPACKE_dgesvdq( int matrix_layout, char joba, char jobp,
lapack_int lrwork = -1;
double* rwork = NULL;
double rwork_query;
lapack_int i;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_dgesvdq", -1 );
return -1;

View File

@ -0,0 +1,68 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const double* a, lapack_int lda, const double* b,
lapack_int ldb, double* c, lapack_int ldc,
double* scale )
{
lapack_int info = 0;
double swork_query[2];
double* swork = NULL;
lapack_int ldswork = -1;
lapack_int swork_size = -1;
lapack_int iwork_query;
lapack_int* iwork = NULL;
lapack_int liwork = -1;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_dtrsyl3", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, m, m, a, lda ) ) {
return -7;
}
if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
return -9;
}
if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) {
return -11;
}
}
#endif
/* Query optimal working array sizes */
info = LAPACKE_dtrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
b, ldb, c, ldc, scale, &iwork_query, liwork,
swork_query, ldswork );
if( info != 0 ) {
goto exit_level_0;
}
ldswork = swork_query[0];
swork_size = ldswork * swork_query[1];
swork = (double*)LAPACKE_malloc( sizeof(double) * swork_size);
if( swork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
liwork = iwork_query;
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
if ( iwork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_1;
}
/* Call middle-level interface */
info = LAPACKE_dtrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
lda, b, ldb, c, ldc, scale, iwork, liwork,
swork, ldswork );
/* Release memory and exit */
LAPACKE_free( iwork );
exit_level_1:
LAPACKE_free( swork );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dtrsyl3", info );
}
return info;
}

View File

@ -0,0 +1,86 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const double* a, lapack_int lda,
const double* b, lapack_int ldb, double* c,
lapack_int ldc, double* scale,
lapack_int* iwork, lapack_int liwork,
double* swork, lapack_int ldswork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_dtrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
scale, iwork, &liwork, swork, &ldswork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_int ldb_t = MAX(1,n);
lapack_int ldc_t = MAX(1,m);
double* a_t = NULL;
double* b_t = NULL;
double* c_t = NULL;
/* Check leading dimension(s) */
if( lda < m ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
return info;
}
if( ldb < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
return info;
}
if( ldc < n ) {
info = -12;
LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
return info;
}
/* Allocate memory for temporary array(s) */
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,m) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) );
if( b_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
c_t = (double*)LAPACKE_malloc( sizeof(double) * ldc_t * MAX(1,n) );
if( c_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_2;
}
/* Transpose input matrices */
LAPACKE_dge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
LAPACKE_dge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */
LAPACK_dtrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
c_t, &ldc_t, scale, iwork, &liwork, swork, &ldswork,
&info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
/* Release memory and exit */
LAPACKE_free( c_t );
exit_level_2:
LAPACKE_free( b_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
}
return info;
}

View File

@ -48,7 +48,6 @@ lapack_int LAPACKE_sgesvdq( int matrix_layout, char joba, char jobp,
lapack_int lrwork = -1;
float* rwork = NULL;
float rwork_query;
lapack_int i;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_sgesvdq", -1 );
return -1;

View File

@ -0,0 +1,68 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const float* a, lapack_int lda, const float* b,
lapack_int ldb, float* c, lapack_int ldc,
float* scale )
{
lapack_int info = 0;
float swork_query[2];
float* swork = NULL;
lapack_int ldswork = -1;
lapack_int swork_size = -1;
lapack_int iwork_query;
lapack_int* iwork = NULL;
lapack_int liwork = -1;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_strsyl3", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_sge_nancheck( matrix_layout, m, m, a, lda ) ) {
return -7;
}
if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) {
return -9;
}
if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) {
return -11;
}
}
#endif
/* Query optimal working array sizes */
info = LAPACKE_strsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
b, ldb, c, ldc, scale, &iwork_query, liwork,
swork_query, ldswork );
if( info != 0 ) {
goto exit_level_0;
}
ldswork = swork_query[0];
swork_size = ldswork * swork_query[1];
swork = (float*)LAPACKE_malloc( sizeof(float) * swork_size);
if( swork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
liwork = iwork_query;
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
if ( iwork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_1;
}
/* Call middle-level interface */
info = LAPACKE_strsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
lda, b, ldb, c, ldc, scale, iwork, liwork,
swork, ldswork );
/* Release memory and exit */
LAPACKE_free( iwork );
exit_level_1:
LAPACKE_free( swork );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_strsyl3", info );
}
return info;
}

View File

@ -0,0 +1,86 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_strsyl3_work( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const float* a, lapack_int lda,
const float* b, lapack_int ldb, float* c,
lapack_int ldc, float* scale,
lapack_int* iwork, lapack_int liwork,
float* swork, lapack_int ldswork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_strsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
scale, iwork, &liwork, swork, &ldswork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_int ldb_t = MAX(1,n);
lapack_int ldc_t = MAX(1,m);
float* a_t = NULL;
float* b_t = NULL;
float* c_t = NULL;
/* Check leading dimension(s) */
if( lda < m ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
return info;
}
if( ldb < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
return info;
}
if( ldc < n ) {
info = -12;
LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
return info;
}
/* Allocate memory for temporary array(s) */
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) );
if( b_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
c_t = (float*)LAPACKE_malloc( sizeof(float) * ldc_t * MAX(1,n) );
if( c_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_2;
}
/* Transpose input matrices */
LAPACKE_sge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
LAPACKE_sge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */
LAPACK_strsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
c_t, &ldc_t, scale, iwork, &liwork, swork, &ldswork,
&info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
/* Release memory and exit */
LAPACKE_free( c_t );
exit_level_2:
LAPACKE_free( b_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
}
return info;
}

View File

@ -48,7 +48,6 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp,
lapack_int lrwork = -1;
double* rwork = NULL;
double rwork_query;
lapack_int i;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_zgesvdq", -1 );
return -1;

View File

@ -0,0 +1,56 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_ztrsyl3( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* b, lapack_int ldb,
lapack_complex_double* c, lapack_int ldc,
double* scale )
{
lapack_int info = 0;
double swork_query[2];
double* swork = NULL;
lapack_int ldswork = -1;
lapack_int swork_size = -1;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_ztrsyl3", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_zge_nancheck( matrix_layout, m, m, a, lda ) ) {
return -7;
}
if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) {
return -9;
}
if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) {
return -11;
}
}
#endif
/* Query optimal working array sizes */
info = LAPACKE_ztrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
b, ldb, c, ldc, scale, swork_query, ldswork );
if( info != 0 ) {
goto exit_level_0;
}
ldswork = swork_query[0];
swork_size = ldswork * swork_query[1];
swork = (double*)LAPACKE_malloc( sizeof(double) * swork_size);
if( swork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_ztrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
lda, b, ldb, c, ldc, scale, swork, ldswork );
/* Release memory and exit */
LAPACKE_free( swork );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_ztrsyl3", info );
}
return info;
}

View File

@ -0,0 +1,88 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb,
lapack_int isgn, lapack_int m, lapack_int n,
const lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* b, lapack_int ldb,
lapack_complex_double* c, lapack_int ldc,
double* scale, double* swork,
lapack_int ldswork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_ztrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
scale, swork, &ldswork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_int ldb_t = MAX(1,n);
lapack_int ldc_t = MAX(1,m);
lapack_complex_double* a_t = NULL;
lapack_complex_double* b_t = NULL;
lapack_complex_double* c_t = NULL;
/* Check leading dimension(s) */
if( lda < m ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
return info;
}
if( ldb < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
return info;
}
if( ldc < n ) {
info = -12;
LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
return info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,m) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
b_t = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) );
if( b_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
c_t = (lapack_complex_double*)
LAPACKE_malloc( sizeof(lapack_complex_double) * ldc_t * MAX(1,n) );
if( c_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_2;
}
/* Transpose input matrices */
LAPACKE_zge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
LAPACKE_zge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */
LAPACK_ztrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
c_t, &ldc_t, scale, swork, &ldswork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
/* Release memory and exit */
LAPACKE_free( c_t );
exit_level_2:
LAPACKE_free( b_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
}
return info;
}

View File

@ -207,7 +207,7 @@ SLASRC_O = \
ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \
ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \
ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \
sgesvdq.o
sgesvdq.o slarmm.o slatrs3.o strsyl3.o
endif
@ -316,7 +316,7 @@ CLASRC_O = \
chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \
cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \
chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \
cgesvdq.o
cgesvdq.o clatrs3.o ctrsyl3.o
endif
ifdef USEXBLAS
@ -417,7 +417,7 @@ DLASRC_O = \
dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \
dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \
dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \
dgesvdq.o
dgesvdq.o dlarmm.o dlatrs3.o dtrsyl3.o
endif
ifdef USEXBLAS
@ -526,7 +526,7 @@ ZLASRC_O = \
zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \
zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \
zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \
zgesvdq.o
zgesvdq.o zlatrs3.o ztrsyl3.o
endif
ifdef USEXBLAS

1282
lapack-netlib/SRC/clatrs3.c Normal file

File diff suppressed because it is too large Load Diff

666
lapack-netlib/SRC/clatrs3.f Normal file
View File

@ -0,0 +1,666 @@
*> \brief \b CLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
*
* Definition:
* ===========
*
* SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
* X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* CHARACTER DIAG, NORMIN, TRANS, UPLO
* INTEGER INFO, LDA, LWORK, LDX, N, NRHS
* ..
* .. Array Arguments ..
* REAL CNORM( * ), SCALE( * ), WORK( * )
* COMPLEX A( LDA, * ), X( LDX, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CLATRS3 solves one of the triangular systems
*>
*> A * X = B * diag(scale), A**T * X = B * diag(scale), or
*> A**H * X = B * diag(scale)
*>
*> with scaling to prevent overflow. Here A is an upper or lower
*> triangular matrix, A**T denotes the transpose of A, A**H denotes the
*> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
*> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
*> is usually less than or equal to 1, chosen such that X(:,j) is less
*> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
*> for some j), then a non-trivial solution to A*X = 0 is returned. If
*> the system is so badly scaled that the solution cannot be represented
*> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
*>
*> This is a BLAS-3 version of LATRS for solving several right
*> hand sides simultaneously.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> Specifies whether the matrix A is upper or lower triangular.
*> = 'U': Upper triangular
*> = 'L': Lower triangular
*> \endverbatim
*>
*> \param[in] TRANS
*> \verbatim
*> TRANS is CHARACTER*1
*> Specifies the operation applied to A.
*> = 'N': Solve A * x = s*b (No transpose)
*> = 'T': Solve A**T* x = s*b (Transpose)
*> = 'C': Solve A**T* x = s*b (Conjugate transpose)
*> \endverbatim
*>
*> \param[in] DIAG
*> \verbatim
*> DIAG is CHARACTER*1
*> Specifies whether or not the matrix A is unit triangular.
*> = 'N': Non-unit triangular
*> = 'U': Unit triangular
*> \endverbatim
*>
*> \param[in] NORMIN
*> \verbatim
*> NORMIN is CHARACTER*1
*> Specifies whether CNORM has been set or not.
*> = 'Y': CNORM contains the column norms on entry
*> = 'N': CNORM is not set on entry. On exit, the norms will
*> be computed and stored in CNORM.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of columns of X. NRHS >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is COMPLEX array, dimension (LDA,N)
*> The triangular matrix A. If UPLO = 'U', the leading n by n
*> upper triangular part of the array A contains the upper
*> triangular matrix, and the strictly lower triangular part of
*> A is not referenced. If UPLO = 'L', the leading n by n lower
*> triangular part of the array A contains the lower triangular
*> matrix, and the strictly upper triangular part of A is not
*> referenced. If DIAG = 'U', the diagonal elements of A are
*> also not referenced and are assumed to be 1.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max (1,N).
*> \endverbatim
*>
*> \param[in,out] X
*> \verbatim
*> X is COMPLEX array, dimension (LDX,NRHS)
*> On entry, the right hand side B of the triangular system.
*> On exit, X is overwritten by the solution matrix X.
*> \endverbatim
*>
*> \param[in] LDX
*> \verbatim
*> LDX is INTEGER
*> The leading dimension of the array X. LDX >= max (1,N).
*> \endverbatim
*>
*> \param[out] SCALE
*> \verbatim
*> SCALE is REAL array, dimension (NRHS)
*> The scaling factor s(k) is for the triangular system
*> A * x(:,k) = s(k)*b(:,k) or A**T* x(:,k) = s(k)*b(:,k).
*> If SCALE = 0, the matrix A is singular or badly scaled.
*> If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
*> that is an exact or approximate solution to A*x(:,k) = 0
*> is returned. If the system so badly scaled that solution
*> cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
*> is returned.
*> \endverbatim
*>
*> \param[in,out] CNORM
*> \verbatim
*> CNORM is REAL array, dimension (N)
*>
*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
*> contains the norm of the off-diagonal part of the j-th column
*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal
*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
*> must be greater than or equal to the 1-norm.
*>
*> If NORMIN = 'N', CNORM is an output argument and CNORM(j)
*> returns the 1-norm of the offdiagonal part of the j-th column
*> of A.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is REAL array, dimension (LWORK).
*> On exit, if INFO = 0, WORK(1) returns the optimal size of
*> WORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> LWORK is INTEGER
*> LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
*> NBA = (N + NB - 1)/NB and NB is the optimal block size.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal dimensions of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -k, the k-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*> \par Further Details:
* =====================
* \verbatim
* The algorithm follows the structure of a block triangular solve.
* The diagonal block is solved with a call to the robust the triangular
* solver LATRS for every right-hand side RHS = 1, ..., NRHS
* op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
* where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
* The linear block updates operate on block columns of X,
* B( I, K ) - op(A( I, J )) * X( J, K )
* and use GEMM. To avoid overflow in the linear block update, the worst case
* growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
* such that
* || s * B( I, RHS )||_oo
* + || op(A( I, J )) ||_oo * || s * X( J, RHS ) ||_oo <= Overflow threshold
*
* Once all columns of a block column have been rescaled (BLAS-1), the linear
* update is executed with GEMM without overflow.
*
* To limit rescaling, local scale factors track the scaling of column segments.
* There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
* per right-hand side column RHS = 1, ..., NRHS. The global scale factor
* SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
* I = 1, ..., NBA.
* A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
* updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
* linear update of potentially inconsistently scaled vector segments
* s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
* computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
* if necessary, rescales the blocks prior to calling GEMM.
*
* \endverbatim
* =====================================================================
* References:
* C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
* Parallel robust solution of triangular linear systems. Concurrency
* and Computation: Practice and Experience, 31(19), e5064.
*
* Contributor:
* Angelika Schwarz, Umea University, Sweden.
*
* =====================================================================
SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
$ X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
IMPLICIT NONE
*
* .. Scalar Arguments ..
CHARACTER DIAG, TRANS, NORMIN, UPLO
INTEGER INFO, LDA, LWORK, LDX, N, NRHS
* ..
* .. Array Arguments ..
COMPLEX A( LDA, * ), X( LDX, * )
REAL CNORM( * ), SCALE( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
COMPLEX CZERO, CONE
PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) )
PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) )
INTEGER NBMAX, NBMIN, NBRHS, NRHSMIN
PARAMETER ( NRHSMIN = 2, NBRHS = 32 )
PARAMETER ( NBMIN = 8, NBMAX = 64 )
* ..
* .. Local Arrays ..
REAL W( NBMAX ), XNRM( NBRHS )
* ..
* .. Local Scalars ..
LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER
INTEGER AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
$ JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
$ LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
REAL ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
$ SCAMIN, SMLNUM, TMAX
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
REAL SLAMCH, CLANGE, SLARMM
EXTERNAL ILAENV, LSAME, SLAMCH, CLANGE, SLARMM
* ..
* .. External Subroutines ..
EXTERNAL CLATRS, CSSCAL, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN
* ..
* .. Executable Statements ..
*
INFO = 0
UPPER = LSAME( UPLO, 'U' )
NOTRAN = LSAME( TRANS, 'N' )
NOUNIT = LSAME( DIAG, 'N' )
LQUERY = ( LWORK.EQ.-1 )
*
* Partition A and X into blocks.
*
NB = MAX( NBMIN, ILAENV( 1, 'CLATRS', '', N, N, -1, -1 ) )
NB = MIN( NBMAX, NB )
NBA = MAX( 1, (N + NB - 1) / NB )
NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
*
* Compute the workspace
*
* The workspace comprises two parts.
* The first part stores the local scale factors. Each simultaneously
* computed right-hand side requires one local scale factor per block
* row. WORK( I + KK * LDS ) is the scale factor of the vector
* segment associated with the I-th block row and the KK-th vector
* in the block column.
LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
LDS = NBA
* The second part stores upper bounds of the triangular A. There are
* a total of NBA x NBA blocks, of which only the upper triangular
* part or the lower triangular part is referenced. The upper bound of
* the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
LANRM = NBA * NBA
AWRK = LSCALE
WORK( 1 ) = LSCALE + LANRM
*
* Test the input parameters.
*
IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
INFO = -1
ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
$ LSAME( TRANS, 'C' ) ) THEN
INFO = -2
ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
INFO = -3
ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
$ LSAME( NORMIN, 'N' ) ) THEN
INFO = -4
ELSE IF( N.LT.0 ) THEN
INFO = -5
ELSE IF( NRHS.LT.0 ) THEN
INFO = -6
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -8
ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
INFO = -10
ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
INFO = -14
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'CLATRS3', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Initialize scaling factors
*
DO KK = 1, NRHS
SCALE( KK ) = ONE
END DO
*
* Quick return if possible
*
IF( MIN( N, NRHS ).EQ.0 )
$ RETURN
*
* Determine machine dependent constant to control overflow.
*
BIGNUM = SLAMCH( 'Overflow' )
SMLNUM = SLAMCH( 'Safe Minimum' )
*
* Use unblocked code for small problems
*
IF( NRHS.LT.NRHSMIN ) THEN
CALL CLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1 ),
$ SCALE( 1 ), CNORM, INFO )
DO K = 2, NRHS
CALL CLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
$ SCALE( K ), CNORM, INFO )
END DO
RETURN
END IF
*
* Compute norms of blocks of A excluding diagonal blocks and find
* the block with the largest norm TMAX.
*
TMAX = ZERO
DO J = 1, NBA
J1 = (J-1)*NB + 1
J2 = MIN( J*NB, N ) + 1
IF ( UPPER ) THEN
IFIRST = 1
ILAST = J - 1
ELSE
IFIRST = J + 1
ILAST = NBA
END IF
DO I = IFIRST, ILAST
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
*
* Compute upper bound of A( I1:I2-1, J1:J2-1 ).
*
IF( NOTRAN ) THEN
ANRM = CLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
WORK( AWRK + I+(J-1)*NBA ) = ANRM
ELSE
ANRM = CLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
WORK( AWRK + J+(I-1)*NBA ) = ANRM
END IF
TMAX = MAX( TMAX, ANRM )
END DO
END DO
*
IF( .NOT. TMAX.LE.SLAMCH('Overflow') ) THEN
*
* Some matrix entries have huge absolute value. At least one upper
* bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
* number, either due to overflow in LANGE or due to Inf in A.
* Fall back to LATRS. Set normin = 'N' for every right-hand side to
* force computation of TSCAL in LATRS to avoid the likely overflow
* in the computation of the column norms CNORM.
*
DO K = 1, NRHS
CALL CLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
$ SCALE( K ), CNORM, INFO )
END DO
RETURN
END IF
*
* Every right-hand side requires workspace to store NBA local scale
* factors. To save workspace, X is computed successively in block columns
* of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
* workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
DO K = 1, NBX
* Loop over block columns (index = K) of X and, for column-wise scalings,
* over individual columns (index = KK).
* K1: column index of the first column in X( J, K )
* K2: column index of the first column in X( J, K+1 )
* so the K2 - K1 is the column count of the block X( J, K )
K1 = (K-1)*NBRHS + 1
K2 = MIN( K*NBRHS, NRHS ) + 1
*
* Initialize local scaling factors of current block column X( J, K )
*
DO KK = 1, K2-K1
DO I = 1, NBA
WORK( I+KK*LDS ) = ONE
END DO
END DO
*
IF( NOTRAN ) THEN
*
* Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
*
IF( UPPER ) THEN
JFIRST = NBA
JLAST = 1
JINC = -1
ELSE
JFIRST = 1
JLAST = NBA
JINC = 1
END IF
ELSE
*
* Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
* where op(A) = A**T or op(A) = A**H
*
IF( UPPER ) THEN
JFIRST = 1
JLAST = NBA
JINC = 1
ELSE
JFIRST = NBA
JLAST = 1
JINC = -1
END IF
END IF
DO J = JFIRST, JLAST, JINC
* J1: row index of the first row in A( J, J )
* J2: row index of the first row in A( J+1, J+1 )
* so that J2 - J1 is the row count of the block A( J, J )
J1 = (J-1)*NB + 1
J2 = MIN( J*NB, N ) + 1
*
* Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
IF( KK.EQ.1 ) THEN
CALL CLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
$ A( J1, J1 ), LDA, X( J1, RHS ),
$ SCALOC, CNORM, INFO )
ELSE
CALL CLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
$ A( J1, J1 ), LDA, X( J1, RHS ),
$ SCALOC, CNORM, INFO )
END IF
* Find largest absolute value entry in the vector segment
* X( J1:J2-1, RHS ) as an upper bound for the worst case
* growth in the linear updates.
XNRM( KK ) = CLANGE( 'I', J2-J1, 1, X( J1, RHS ),
$ LDX, W )
*
IF( SCALOC .EQ. ZERO ) THEN
* LATRS found that A is singular through A(j,j) = 0.
* Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
* and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
* set by LATRS.
SCALE( RHS ) = ZERO
DO II = 1, J1-1
X( II, KK ) = CZERO
END DO
DO II = J2, N
X( II, KK ) = CZERO
END DO
* Discard the local scale factors.
DO II = 1, NBA
WORK( II+KK*LDS ) = ONE
END DO
SCALOC = ONE
ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
* LATRS computed a valid scale factor, but combined with
* the current scaling the solution does not have a
* scale factor > 0.
*
* Set WORK( J+KK*LDS ) to smallest valid scale
* factor and increase SCALOC accordingly.
SCAL = WORK( J+KK*LDS ) / SMLNUM
SCALOC = SCALOC * SCAL
WORK( J+KK*LDS ) = SMLNUM
* If LATRS overestimated the growth, x may be
* rescaled to preserve a valid combined scale
* factor WORK( J, KK ) > 0.
RSCAL = ONE / SCALOC
IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
XNRM( KK ) = XNRM( KK ) * RSCAL
CALL CSSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
SCALOC = ONE
ELSE
* The system op(A) * x = b is badly scaled and its
* solution cannot be represented as (1/scale) * x.
* Set x to zero. This approach deviates from LATRS
* where a completely meaningless non-zero vector
* is returned that is not a solution to op(A) * x = b.
SCALE( RHS ) = ZERO
DO II = 1, N
X( II, KK ) = CZERO
END DO
* Discard the local scale factors.
DO II = 1, NBA
WORK( II+KK*LDS ) = ONE
END DO
SCALOC = ONE
END IF
END IF
SCALOC = SCALOC * WORK( J+KK*LDS )
WORK( J+KK*LDS ) = SCALOC
END DO
*
* Linear block updates
*
IF( NOTRAN ) THEN
IF( UPPER ) THEN
IFIRST = J - 1
ILAST = 1
IINC = -1
ELSE
IFIRST = J + 1
ILAST = NBA
IINC = 1
END IF
ELSE
IF( UPPER ) THEN
IFIRST = J + 1
ILAST = NBA
IINC = 1
ELSE
IFIRST = J - 1
ILAST = 1
IINC = -1
END IF
END IF
*
DO I = IFIRST, ILAST, IINC
* I1: row index of the first column in X( I, K )
* I2: row index of the first column in X( I+1, K )
* so the I2 - I1 is the row count of the block X( I, K )
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
*
* Prepare the linear update to be executed with GEMM.
* For each column, compute a consistent scaling, a
* scaling factor to survive the linear update, and
* rescale the column segments, if necesssary. Then
* the linear update is safely executed.
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
* Compute consistent scaling
SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
*
* Compute scaling factor to survive the linear update
* simulating consistent scaling.
*
BNRM = CLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
XNRM( KK ) = XNRM( KK )*( SCAMIN / WORK( J+KK*LDS) )
ANRM = WORK( AWRK + I+(J-1)*NBA )
SCALOC = SLARMM( ANRM, XNRM( KK ), BNRM )
*
* Simultaneously apply the robust update factor and the
* consistency scaling factor to X( I, KK ) and X( J, KK ).
*
SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
IF( SCAL.NE.ONE ) THEN
CALL CSSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
WORK( I+KK*LDS ) = SCAMIN*SCALOC
END IF
*
SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
IF( SCAL.NE.ONE ) THEN
CALL CSSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
WORK( J+KK*LDS ) = SCAMIN*SCALOC
END IF
END DO
*
IF( NOTRAN ) THEN
*
* B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
*
CALL CGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -CONE,
$ A( I1, J1 ), LDA, X( J1, K1 ), LDX,
$ CONE, X( I1, K1 ), LDX )
ELSE IF( LSAME( TRANS, 'T' ) ) THEN
*
* B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
*
CALL CGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -CONE,
$ A( J1, I1 ), LDA, X( J1, K1 ), LDX,
$ CONE, X( I1, K1 ), LDX )
ELSE
*
* B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
*
CALL CGEMM( 'C', 'N', I2-I1, K2-K1, J2-J1, -CONE,
$ A( J1, I1 ), LDA, X( J1, K1 ), LDX,
$ CONE, X( I1, K1 ), LDX )
END IF
END DO
END DO
*
* Reduce local scaling factors
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
DO I = 1, NBA
SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
END DO
END DO
*
* Realize consistent scaling
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
DO I = 1, NBA
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
IF( SCAL.NE.ONE )
$ CALL CSSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
END DO
END IF
END DO
END DO
RETURN
*
* End of CLATRS3
*
END

2022
lapack-netlib/SRC/ctrsyl3.c Normal file

File diff suppressed because it is too large Load Diff

1142
lapack-netlib/SRC/ctrsyl3.f Normal file

File diff suppressed because it is too large Load Diff

605
lapack-netlib/SRC/dlarmm.c Normal file
View File

@ -0,0 +1,605 @@
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <complex.h>
#ifdef complex
#undef complex
#endif
#ifdef I
#undef I
#endif
#if defined(_WIN64)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#ifdef LAPACK_ILP64
typedef BLASLONG blasint;
#if defined(_WIN64)
#define blasabs(x) llabs(x)
#else
#define blasabs(x) labs(x)
#endif
#else
typedef int blasint;
#define blasabs(x) abs(x)
#endif
typedef blasint integer;
typedef unsigned int uinteger;
typedef char *address;
typedef short int shortint;
typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
typedef char integer1;
#define TRUE_ (1)
#define FALSE_ (0)
/* Extern is for use with -E */
#ifndef Extern
#define Extern extern
#endif
/* I/O stuff */
typedef int flag;
typedef int ftnlen;
typedef int ftnint;
/*external read, write*/
typedef struct
{ flag cierr;
ftnint ciunit;
flag ciend;
char *cifmt;
ftnint cirec;
} cilist;
/*internal read, write*/
typedef struct
{ flag icierr;
char *iciunit;
flag iciend;
char *icifmt;
ftnint icirlen;
ftnint icirnum;
} icilist;
/*open*/
typedef struct
{ flag oerr;
ftnint ounit;
char *ofnm;
ftnlen ofnmlen;
char *osta;
char *oacc;
char *ofm;
ftnint orl;
char *oblnk;
} olist;
/*close*/
typedef struct
{ flag cerr;
ftnint cunit;
char *csta;
} cllist;
/*rewind, backspace, endfile*/
typedef struct
{ flag aerr;
ftnint aunit;
} alist;
/* inquire */
typedef struct
{ flag inerr;
ftnint inunit;
char *infile;
ftnlen infilen;
ftnint *inex; /*parameters in standard's order*/
ftnint *inopen;
ftnint *innum;
ftnint *innamed;
char *inname;
ftnlen innamlen;
char *inacc;
ftnlen inacclen;
char *inseq;
ftnlen inseqlen;
char *indir;
ftnlen indirlen;
char *infmt;
ftnlen infmtlen;
char *inform;
ftnint informlen;
char *inunf;
ftnlen inunflen;
ftnint *inrecl;
ftnint *innrec;
char *inblank;
ftnlen inblanklen;
} inlist;
#define VOID void
union Multitype { /* for multiple entry points */
integer1 g;
shortint h;
integer i;
/* longint j; */
real r;
doublereal d;
complex c;
doublecomplex z;
};
typedef union Multitype Multitype;
struct Vardesc { /* for Namelist */
char *name;
char *addr;
ftnlen *dims;
int type;
};
typedef struct Vardesc Vardesc;
struct Namelist {
char *name;
Vardesc **vars;
int nvars;
};
typedef struct Namelist Namelist;
#define abs(x) ((x) >= 0 ? (x) : -(x))
#define dabs(x) (fabs(x))
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
#define dmin(a,b) (f2cmin(a,b))
#define dmax(a,b) (f2cmax(a,b))
#define bit_test(a,b) ((a) >> (b) & 1)
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
#define abort_() { sig_die("Fortran abort routine called", 1); }
#define c_abs(z) (cabsf(Cf(z)))
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
#ifdef _MSC_VER
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
#else
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
#endif
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
#define d_abs(x) (fabs(*(x)))
#define d_acos(x) (acos(*(x)))
#define d_asin(x) (asin(*(x)))
#define d_atan(x) (atan(*(x)))
#define d_atn2(x, y) (atan2(*(x),*(y)))
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
#define d_cos(x) (cos(*(x)))
#define d_cosh(x) (cosh(*(x)))
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
#define d_exp(x) (exp(*(x)))
#define d_imag(z) (cimag(Cd(z)))
#define r_imag(z) (cimagf(Cf(z)))
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define d_log(x) (log(*(x)))
#define d_mod(x, y) (fmod(*(x), *(y)))
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
#define d_nint(x) u_nint(*(x))
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
#define d_sign(a,b) u_sign(*(a),*(b))
#define r_sign(a,b) u_sign(*(a),*(b))
#define d_sin(x) (sin(*(x)))
#define d_sinh(x) (sinh(*(x)))
#define d_sqrt(x) (sqrt(*(x)))
#define d_tan(x) (tan(*(x)))
#define d_tanh(x) (tanh(*(x)))
#define i_abs(x) abs(*(x))
#define i_dnnt(x) ((integer)u_nint(*(x)))
#define i_len(s, n) (n)
#define i_nint(x) ((integer)u_nint(*(x)))
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
#define pow_si(B,E) spow_ui(*(B),*(E))
#define pow_ri(B,E) spow_ui(*(B),*(E))
#define pow_di(B,E) dpow_ui(*(B),*(E))
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
#define myexit_() break;
#define mycycle_() continue;
#define myceiling_(w) {ceil(w)}
#define myhuge_(w) {HUGE_VAL}
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
#define myexp_(w) my_expfunc(w)
static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;}
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* > \brief \b DLARMM */
/* Definition: */
/* =========== */
/* DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM ) */
/* DOUBLE PRECISION ANORM, BNORM, CNORM */
/* > \par Purpose: */
/* ======= */
/* > */
/* > \verbatim */
/* > */
/* > DLARMM returns a factor s in (0, 1] such that the linear updates */
/* > */
/* > (s * C) - A * (s * B) and (s * C) - (s * A) * B */
/* > */
/* > cannot overflow, where A, B, and C are matrices of conforming */
/* > dimensions. */
/* > */
/* > This is an auxiliary routine so there is no argument checking. */
/* > \endverbatim */
/* Arguments: */
/* ========= */
/* > \param[in] ANORM */
/* > \verbatim */
/* > ANORM is DOUBLE PRECISION */
/* > The infinity norm of A. ANORM >= 0. */
/* > The number of rows of the matrix A. M >= 0. */
/* > \endverbatim */
/* > */
/* > \param[in] BNORM */
/* > \verbatim */
/* > BNORM is DOUBLE PRECISION */
/* > The infinity norm of B. BNORM >= 0. */
/* > \endverbatim */
/* > */
/* > \param[in] CNORM */
/* > \verbatim */
/* > CNORM is DOUBLE PRECISION */
/* > The infinity norm of C. CNORM >= 0. */
/* > \endverbatim */
/* > */
/* > */
/* ===================================================================== */
/* > References: */
/* > C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for */
/* > Robust Solution of Triangular Linear Systems. In: International */
/* > Conference on Parallel Processing and Applied Mathematics, pages */
/* > 68--78. Springer, 2017. */
/* > */
/* > \ingroup OTHERauxiliary */
/* ===================================================================== */
doublereal dlarmm_(doublereal *anorm, doublereal *bnorm, doublereal *cnorm)
{
/* System generated locals */
doublereal ret_val;
/* Local variables */
extern doublereal dlamch_(char *);
doublereal bignum, smlnum;
/* Determine machine dependent parameters to control overflow. */
smlnum = dlamch_("Safe minimum") / dlamch_("Precision");
bignum = 1. / smlnum / 4.;
/* Compute a scale factor. */
ret_val = 1.;
if (*bnorm <= 1.) {
if (*anorm * *bnorm > bignum - *cnorm) {
ret_val = .5;
}
} else {
if (*anorm > (bignum - *cnorm) / *bnorm) {
ret_val = .5 / *bnorm;
}
}
return ret_val;
/* ==== End of DLARMM ==== */
} /* dlarmm_ */

View File

@ -0,0 +1,99 @@
*> \brief \b DLARMM
*
* Definition:
* ===========
*
* DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ANORM, BNORM, CNORM
* ..
*
*> \par Purpose:
* =======
*>
*> \verbatim
*>
*> DLARMM returns a factor s in (0, 1] such that the linear updates
*>
*> (s * C) - A * (s * B) and (s * C) - (s * A) * B
*>
*> cannot overflow, where A, B, and C are matrices of conforming
*> dimensions.
*>
*> This is an auxiliary routine so there is no argument checking.
*> \endverbatim
*
* Arguments:
* =========
*
*> \param[in] ANORM
*> \verbatim
*> ANORM is DOUBLE PRECISION
*> The infinity norm of A. ANORM >= 0.
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] BNORM
*> \verbatim
*> BNORM is DOUBLE PRECISION
*> The infinity norm of B. BNORM >= 0.
*> \endverbatim
*>
*> \param[in] CNORM
*> \verbatim
*> CNORM is DOUBLE PRECISION
*> The infinity norm of C. CNORM >= 0.
*> \endverbatim
*>
*>
* =====================================================================
*> References:
*> C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for
*> Robust Solution of Triangular Linear Systems. In: International
*> Conference on Parallel Processing and Applied Mathematics, pages
*> 68--78. Springer, 2017.
*>
*> \ingroup OTHERauxiliary
* =====================================================================
DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM )
IMPLICIT NONE
* .. Scalar Arguments ..
DOUBLE PRECISION ANORM, BNORM, CNORM
* .. Parameters ..
DOUBLE PRECISION ONE, HALF, FOUR
PARAMETER ( ONE = 1.0D0, HALF = 0.5D+0, FOUR = 4.0D0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION BIGNUM, SMLNUM
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. Executable Statements ..
*
*
* Determine machine dependent parameters to control overflow.
*
SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
BIGNUM = ( ONE / SMLNUM ) / FOUR
*
* Compute a scale factor.
*
DLARMM = ONE
IF( BNORM .LE. ONE ) THEN
IF( ANORM * BNORM .GT. BIGNUM - CNORM ) THEN
DLARMM = HALF
END IF
ELSE
IF( ANORM .GT. (BIGNUM - CNORM) / BNORM ) THEN
DLARMM = HALF / BNORM
END IF
END IF
RETURN
*
* ==== End of DLARMM ====
*
END

1265
lapack-netlib/SRC/dlatrs3.c Normal file

File diff suppressed because it is too large Load Diff

656
lapack-netlib/SRC/dlatrs3.f Normal file
View File

@ -0,0 +1,656 @@
*> \brief \b DLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
*
* Definition:
* ===========
*
* SUBROUTINE DLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
* X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* CHARACTER DIAG, NORMIN, TRANS, UPLO
* INTEGER INFO, LDA, LWORK, LDX, N, NRHS
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A( LDA, * ), CNORM( * ), SCALE( * ),
* WORK( * ), X( LDX, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DLATRS3 solves one of the triangular systems
*>
*> A * X = B * diag(scale) or A**T * X = B * diag(scale)
*>
*> with scaling to prevent overflow. Here A is an upper or lower
*> triangular matrix, A**T denotes the transpose of A. X and B are
*> n by nrhs matrices and scale is an nrhs element vector of scaling
*> factors. A scaling factor scale(j) is usually less than or equal
*> to 1, chosen such that X(:,j) is less than the overflow threshold.
*> If the matrix A is singular (A(j,j) = 0 for some j), then
*> a non-trivial solution to A*X = 0 is returned. If the system is
*> so badly scaled that the solution cannot be represented as
*> (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
*>
*> This is a BLAS-3 version of LATRS for solving several right
*> hand sides simultaneously.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> Specifies whether the matrix A is upper or lower triangular.
*> = 'U': Upper triangular
*> = 'L': Lower triangular
*> \endverbatim
*>
*> \param[in] TRANS
*> \verbatim
*> TRANS is CHARACTER*1
*> Specifies the operation applied to A.
*> = 'N': Solve A * x = s*b (No transpose)
*> = 'T': Solve A**T* x = s*b (Transpose)
*> = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose)
*> \endverbatim
*>
*> \param[in] DIAG
*> \verbatim
*> DIAG is CHARACTER*1
*> Specifies whether or not the matrix A is unit triangular.
*> = 'N': Non-unit triangular
*> = 'U': Unit triangular
*> \endverbatim
*>
*> \param[in] NORMIN
*> \verbatim
*> NORMIN is CHARACTER*1
*> Specifies whether CNORM has been set or not.
*> = 'Y': CNORM contains the column norms on entry
*> = 'N': CNORM is not set on entry. On exit, the norms will
*> be computed and stored in CNORM.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of columns of X. NRHS >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension (LDA,N)
*> The triangular matrix A. If UPLO = 'U', the leading n by n
*> upper triangular part of the array A contains the upper
*> triangular matrix, and the strictly lower triangular part of
*> A is not referenced. If UPLO = 'L', the leading n by n lower
*> triangular part of the array A contains the lower triangular
*> matrix, and the strictly upper triangular part of A is not
*> referenced. If DIAG = 'U', the diagonal elements of A are
*> also not referenced and are assumed to be 1.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max (1,N).
*> \endverbatim
*>
*> \param[in,out] X
*> \verbatim
*> X is DOUBLE PRECISION array, dimension (LDX,NRHS)
*> On entry, the right hand side B of the triangular system.
*> On exit, X is overwritten by the solution matrix X.
*> \endverbatim
*>
*> \param[in] LDX
*> \verbatim
*> LDX is INTEGER
*> The leading dimension of the array X. LDX >= max (1,N).
*> \endverbatim
*>
*> \param[out] SCALE
*> \verbatim
*> SCALE is DOUBLE PRECISION array, dimension (NRHS)
*> The scaling factor s(k) is for the triangular system
*> A * x(:,k) = s(k)*b(:,k) or A**T* x(:,k) = s(k)*b(:,k).
*> If SCALE = 0, the matrix A is singular or badly scaled.
*> If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
*> that is an exact or approximate solution to A*x(:,k) = 0
*> is returned. If the system so badly scaled that solution
*> cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
*> is returned.
*> \endverbatim
*>
*> \param[in,out] CNORM
*> \verbatim
*> CNORM is DOUBLE PRECISION array, dimension (N)
*>
*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
*> contains the norm of the off-diagonal part of the j-th column
*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal
*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
*> must be greater than or equal to the 1-norm.
*>
*> If NORMIN = 'N', CNORM is an output argument and CNORM(j)
*> returns the 1-norm of the offdiagonal part of the j-th column
*> of A.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (LWORK).
*> On exit, if INFO = 0, WORK(1) returns the optimal size of
*> WORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> LWORK is INTEGER
*> LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
*> NBA = (N + NB - 1)/NB and NB is the optimal block size.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal dimensions of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -k, the k-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*> \par Further Details:
* =====================
* \verbatim
* The algorithm follows the structure of a block triangular solve.
* The diagonal block is solved with a call to the robust the triangular
* solver LATRS for every right-hand side RHS = 1, ..., NRHS
* op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
* where op( A ) = A or op( A ) = A**T.
* The linear block updates operate on block columns of X,
* B( I, K ) - op(A( I, J )) * X( J, K )
* and use GEMM. To avoid overflow in the linear block update, the worst case
* growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
* such that
* || s * B( I, RHS )||_oo
* + || op(A( I, J )) ||_oo * || s * X( J, RHS ) ||_oo <= Overflow threshold
*
* Once all columns of a block column have been rescaled (BLAS-1), the linear
* update is executed with GEMM without overflow.
*
* To limit rescaling, local scale factors track the scaling of column segments.
* There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
* per right-hand side column RHS = 1, ..., NRHS. The global scale factor
* SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
* I = 1, ..., NBA.
* A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
* updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
* linear update of potentially inconsistently scaled vector segments
* s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
* computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
* if necessary, rescales the blocks prior to calling GEMM.
*
* \endverbatim
* =====================================================================
* References:
* C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
* Parallel robust solution of triangular linear systems. Concurrency
* and Computation: Practice and Experience, 31(19), e5064.
*
* Contributor:
* Angelika Schwarz, Umea University, Sweden.
*
* =====================================================================
SUBROUTINE DLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
$ X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
IMPLICIT NONE
*
* .. Scalar Arguments ..
CHARACTER DIAG, TRANS, NORMIN, UPLO
INTEGER INFO, LDA, LWORK, LDX, N, NRHS
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), CNORM( * ), X( LDX, * ),
$ SCALE( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
INTEGER NBMAX, NBMIN, NBRHS, NRHSMIN
PARAMETER ( NRHSMIN = 2, NBRHS = 32 )
PARAMETER ( NBMIN = 8, NBMAX = 64 )
* ..
* .. Local Arrays ..
DOUBLE PRECISION W( NBMAX ), XNRM( NBRHS )
* ..
* .. Local Scalars ..
LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER
INTEGER AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
$ JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
$ LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
DOUBLE PRECISION ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
$ SCAMIN, SMLNUM, TMAX
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
DOUBLE PRECISION DLAMCH, DLANGE, DLARMM
EXTERNAL DLAMCH, DLANGE, DLARMM, ILAENV, LSAME
* ..
* .. External Subroutines ..
EXTERNAL DLATRS, DSCAL, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN
* ..
* .. Executable Statements ..
*
INFO = 0
UPPER = LSAME( UPLO, 'U' )
NOTRAN = LSAME( TRANS, 'N' )
NOUNIT = LSAME( DIAG, 'N' )
LQUERY = ( LWORK.EQ.-1 )
*
* Partition A and X into blocks
*
NB = MAX( 8, ILAENV( 1, 'DLATRS', '', N, N, -1, -1 ) )
NB = MIN( NBMAX, NB )
NBA = MAX( 1, (N + NB - 1) / NB )
NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
*
* Compute the workspace
*
* The workspace comprises two parts.
* The first part stores the local scale factors. Each simultaneously
* computed right-hand side requires one local scale factor per block
* row. WORK( I+KK*LDS ) is the scale factor of the vector
* segment associated with the I-th block row and the KK-th vector
* in the block column.
LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
LDS = NBA
* The second part stores upper bounds of the triangular A. There are
* a total of NBA x NBA blocks, of which only the upper triangular
* part or the lower triangular part is referenced. The upper bound of
* the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
LANRM = NBA * NBA
AWRK = LSCALE
WORK( 1 ) = LSCALE + LANRM
*
* Test the input parameters
*
IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
INFO = -1
ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
$ LSAME( TRANS, 'C' ) ) THEN
INFO = -2
ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
INFO = -3
ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
$ LSAME( NORMIN, 'N' ) ) THEN
INFO = -4
ELSE IF( N.LT.0 ) THEN
INFO = -5
ELSE IF( NRHS.LT.0 ) THEN
INFO = -6
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -8
ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
INFO = -10
ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
INFO = -14
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLATRS3', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Initialize scaling factors
*
DO KK = 1, NRHS
SCALE( KK ) = ONE
END DO
*
* Quick return if possible
*
IF( MIN( N, NRHS ).EQ.0 )
$ RETURN
*
* Determine machine dependent constant to control overflow.
*
BIGNUM = DLAMCH( 'Overflow' )
SMLNUM = DLAMCH( 'Safe Minimum' )
*
* Use unblocked code for small problems
*
IF( NRHS.LT.NRHSMIN ) THEN
CALL DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
$ SCALE( 1 ), CNORM, INFO )
DO K = 2, NRHS
CALL DLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
$ SCALE( K ), CNORM, INFO )
END DO
RETURN
END IF
*
* Compute norms of blocks of A excluding diagonal blocks and find
* the block with the largest norm TMAX.
*
TMAX = ZERO
DO J = 1, NBA
J1 = (J-1)*NB + 1
J2 = MIN( J*NB, N ) + 1
IF ( UPPER ) THEN
IFIRST = 1
ILAST = J - 1
ELSE
IFIRST = J + 1
ILAST = NBA
END IF
DO I = IFIRST, ILAST
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
*
* Compute upper bound of A( I1:I2-1, J1:J2-1 ).
*
IF( NOTRAN ) THEN
ANRM = DLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
WORK( AWRK + I+(J-1)*NBA ) = ANRM
ELSE
ANRM = DLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
WORK( AWRK + J+(I-1)*NBA ) = ANRM
END IF
TMAX = MAX( TMAX, ANRM )
END DO
END DO
*
IF( .NOT. TMAX.LE.DLAMCH('Overflow') ) THEN
*
* Some matrix entries have huge absolute value. At least one upper
* bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
* number, either due to overflow in LANGE or due to Inf in A.
* Fall back to LATRS. Set normin = 'N' for every right-hand side to
* force computation of TSCAL in LATRS to avoid the likely overflow
* in the computation of the column norms CNORM.
*
DO K = 1, NRHS
CALL DLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
$ SCALE( K ), CNORM, INFO )
END DO
RETURN
END IF
*
* Every right-hand side requires workspace to store NBA local scale
* factors. To save workspace, X is computed successively in block columns
* of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
* workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
DO K = 1, NBX
* Loop over block columns (index = K) of X and, for column-wise scalings,
* over individual columns (index = KK).
* K1: column index of the first column in X( J, K )
* K2: column index of the first column in X( J, K+1 )
* so the K2 - K1 is the column count of the block X( J, K )
K1 = (K-1)*NBRHS + 1
K2 = MIN( K*NBRHS, NRHS ) + 1
*
* Initialize local scaling factors of current block column X( J, K )
*
DO KK = 1, K2-K1
DO I = 1, NBA
WORK( I+KK*LDS ) = ONE
END DO
END DO
*
IF( NOTRAN ) THEN
*
* Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
*
IF( UPPER ) THEN
JFIRST = NBA
JLAST = 1
JINC = -1
ELSE
JFIRST = 1
JLAST = NBA
JINC = 1
END IF
ELSE
*
* Solve A**T * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
*
IF( UPPER ) THEN
JFIRST = 1
JLAST = NBA
JINC = 1
ELSE
JFIRST = NBA
JLAST = 1
JINC = -1
END IF
END IF
*
DO J = JFIRST, JLAST, JINC
* J1: row index of the first row in A( J, J )
* J2: row index of the first row in A( J+1, J+1 )
* so that J2 - J1 is the row count of the block A( J, J )
J1 = (J-1)*NB + 1
J2 = MIN( J*NB, N ) + 1
*
* Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
* for all right-hand sides in the current block column,
* one RHS at a time.
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
IF( KK.EQ.1 ) THEN
CALL DLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
$ A( J1, J1 ), LDA, X( J1, RHS ),
$ SCALOC, CNORM, INFO )
ELSE
CALL DLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
$ A( J1, J1 ), LDA, X( J1, RHS ),
$ SCALOC, CNORM, INFO )
END IF
* Find largest absolute value entry in the vector segment
* X( J1:J2-1, RHS ) as an upper bound for the worst case
* growth in the linear updates.
XNRM( KK ) = DLANGE( 'I', J2-J1, 1, X( J1, RHS ),
$ LDX, W )
*
IF( SCALOC .EQ. ZERO ) THEN
* LATRS found that A is singular through A(j,j) = 0.
* Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
* and compute A*x = 0 (or A**T*x = 0). Note that
* X(J1:J2-1, KK) is set by LATRS.
SCALE( RHS ) = ZERO
DO II = 1, J1-1
X( II, KK ) = ZERO
END DO
DO II = J2, N
X( II, KK ) = ZERO
END DO
* Discard the local scale factors.
DO II = 1, NBA
WORK( II+KK*LDS ) = ONE
END DO
SCALOC = ONE
ELSE IF( SCALOC * WORK( J+KK*LDS ) .EQ. ZERO ) THEN
* LATRS computed a valid scale factor, but combined with
* the current scaling the solution does not have a
* scale factor > 0.
*
* Set WORK( J+KK*LDS ) to smallest valid scale
* factor and increase SCALOC accordingly.
SCAL = WORK( J+KK*LDS ) / SMLNUM
SCALOC = SCALOC * SCAL
WORK( J+KK*LDS ) = SMLNUM
* If LATRS overestimated the growth, x may be
* rescaled to preserve a valid combined scale
* factor WORK( J, KK ) > 0.
RSCAL = ONE / SCALOC
IF( XNRM( KK ) * RSCAL .LE. BIGNUM ) THEN
XNRM( KK ) = XNRM( KK ) * RSCAL
CALL DSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
SCALOC = ONE
ELSE
* The system op(A) * x = b is badly scaled and its
* solution cannot be represented as (1/scale) * x.
* Set x to zero. This approach deviates from LATRS
* where a completely meaningless non-zero vector
* is returned that is not a solution to op(A) * x = b.
SCALE( RHS ) = ZERO
DO II = 1, N
X( II, KK ) = ZERO
END DO
* Discard the local scale factors.
DO II = 1, NBA
WORK( II+KK*LDS ) = ONE
END DO
SCALOC = ONE
END IF
END IF
SCALOC = SCALOC * WORK( J+KK*LDS )
WORK( J+KK*LDS ) = SCALOC
END DO
*
* Linear block updates
*
IF( NOTRAN ) THEN
IF( UPPER ) THEN
IFIRST = J - 1
ILAST = 1
IINC = -1
ELSE
IFIRST = J + 1
ILAST = NBA
IINC = 1
END IF
ELSE
IF( UPPER ) THEN
IFIRST = J + 1
ILAST = NBA
IINC = 1
ELSE
IFIRST = J - 1
ILAST = 1
IINC = -1
END IF
END IF
*
DO I = IFIRST, ILAST, IINC
* I1: row index of the first column in X( I, K )
* I2: row index of the first column in X( I+1, K )
* so the I2 - I1 is the row count of the block X( I, K )
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
*
* Prepare the linear update to be executed with GEMM.
* For each column, compute a consistent scaling, a
* scaling factor to survive the linear update, and
* rescale the column segments, if necesssary. Then
* the linear update is safely executed.
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
* Compute consistent scaling
SCAMIN = MIN( WORK( I + KK*LDS), WORK( J + KK*LDS ) )
*
* Compute scaling factor to survive the linear update
* simulating consistent scaling.
*
BNRM = DLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
XNRM( KK ) = XNRM( KK )*(SCAMIN / WORK( J+KK*LDS ))
ANRM = WORK( AWRK + I+(J-1)*NBA )
SCALOC = DLARMM( ANRM, XNRM( KK ), BNRM )
*
* Simultaneously apply the robust update factor and the
* consistency scaling factor to B( I, KK ) and B( J, KK ).
*
SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
IF( SCAL.NE.ONE ) THEN
CALL DSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
WORK( I+KK*LDS ) = SCAMIN*SCALOC
END IF
*
SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
IF( SCAL.NE.ONE ) THEN
CALL DSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
WORK( J+KK*LDS ) = SCAMIN*SCALOC
END IF
END DO
*
IF( NOTRAN ) THEN
*
* B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
*
CALL DGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -ONE,
$ A( I1, J1 ), LDA, X( J1, K1 ), LDX,
$ ONE, X( I1, K1 ), LDX )
ELSE
*
* B( I, K ) := B( I, K ) - A( J, I )**T * X( J, K )
*
CALL DGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -ONE,
$ A( J1, I1 ), LDA, X( J1, K1 ), LDX,
$ ONE, X( I1, K1 ), LDX )
END IF
END DO
END DO
*
* Reduce local scaling factors
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
DO I = 1, NBA
SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
END DO
END DO
*
* Realize consistent scaling
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
DO I = 1, NBA
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
IF( SCAL.NE.ONE )
$ CALL DSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
END DO
END IF
END DO
END DO
RETURN
*
* End of DLATRS3
*
END

2060
lapack-netlib/SRC/dtrsyl3.c Normal file

File diff suppressed because it is too large Load Diff

1241
lapack-netlib/SRC/dtrsyl3.f Normal file

File diff suppressed because it is too large Load Diff

View File

@ -469,6 +469,15 @@
ELSE
NB = 64
END IF
ELSE IF( C3.EQ.'SYL' ) THEN
* The upper bound is to prevent overly aggressive scaling.
IF( SNAME ) THEN
NB = MIN( MAX( 48, INT( ( MIN( N1, N2 ) * 16 ) / 100) ),
$ 240 )
ELSE
NB = MIN( MAX( 24, INT( ( MIN( N1, N2 ) * 8 ) / 100) ),
$ 80 )
END IF
END IF
ELSE IF( C2.EQ.'LA' ) THEN
IF( C3.EQ.'UUM' ) THEN
@ -477,6 +486,12 @@
ELSE
NB = 64
END IF
ELSE IF( C3.EQ.'TRS' ) THEN
IF( SNAME ) THEN
NB = 32
ELSE
NB = 32
END IF
END IF
ELSE IF( SNAME .AND. C2.EQ.'ST' ) THEN
IF( C3.EQ.'EBZ' ) THEN

605
lapack-netlib/SRC/slarmm.c Normal file
View File

@ -0,0 +1,605 @@
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <complex.h>
#ifdef complex
#undef complex
#endif
#ifdef I
#undef I
#endif
#if defined(_WIN64)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#ifdef LAPACK_ILP64
typedef BLASLONG blasint;
#if defined(_WIN64)
#define blasabs(x) llabs(x)
#else
#define blasabs(x) labs(x)
#endif
#else
typedef int blasint;
#define blasabs(x) abs(x)
#endif
typedef blasint integer;
typedef unsigned int uinteger;
typedef char *address;
typedef short int shortint;
typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
typedef char integer1;
#define TRUE_ (1)
#define FALSE_ (0)
/* Extern is for use with -E */
#ifndef Extern
#define Extern extern
#endif
/* I/O stuff */
typedef int flag;
typedef int ftnlen;
typedef int ftnint;
/*external read, write*/
typedef struct
{ flag cierr;
ftnint ciunit;
flag ciend;
char *cifmt;
ftnint cirec;
} cilist;
/*internal read, write*/
typedef struct
{ flag icierr;
char *iciunit;
flag iciend;
char *icifmt;
ftnint icirlen;
ftnint icirnum;
} icilist;
/*open*/
typedef struct
{ flag oerr;
ftnint ounit;
char *ofnm;
ftnlen ofnmlen;
char *osta;
char *oacc;
char *ofm;
ftnint orl;
char *oblnk;
} olist;
/*close*/
typedef struct
{ flag cerr;
ftnint cunit;
char *csta;
} cllist;
/*rewind, backspace, endfile*/
typedef struct
{ flag aerr;
ftnint aunit;
} alist;
/* inquire */
typedef struct
{ flag inerr;
ftnint inunit;
char *infile;
ftnlen infilen;
ftnint *inex; /*parameters in standard's order*/
ftnint *inopen;
ftnint *innum;
ftnint *innamed;
char *inname;
ftnlen innamlen;
char *inacc;
ftnlen inacclen;
char *inseq;
ftnlen inseqlen;
char *indir;
ftnlen indirlen;
char *infmt;
ftnlen infmtlen;
char *inform;
ftnint informlen;
char *inunf;
ftnlen inunflen;
ftnint *inrecl;
ftnint *innrec;
char *inblank;
ftnlen inblanklen;
} inlist;
#define VOID void
union Multitype { /* for multiple entry points */
integer1 g;
shortint h;
integer i;
/* longint j; */
real r;
doublereal d;
complex c;
doublecomplex z;
};
typedef union Multitype Multitype;
struct Vardesc { /* for Namelist */
char *name;
char *addr;
ftnlen *dims;
int type;
};
typedef struct Vardesc Vardesc;
struct Namelist {
char *name;
Vardesc **vars;
int nvars;
};
typedef struct Namelist Namelist;
#define abs(x) ((x) >= 0 ? (x) : -(x))
#define dabs(x) (fabs(x))
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
#define dmin(a,b) (f2cmin(a,b))
#define dmax(a,b) (f2cmax(a,b))
#define bit_test(a,b) ((a) >> (b) & 1)
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
#define abort_() { sig_die("Fortran abort routine called", 1); }
#define c_abs(z) (cabsf(Cf(z)))
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
#ifdef _MSC_VER
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
#else
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
#endif
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
#define d_abs(x) (fabs(*(x)))
#define d_acos(x) (acos(*(x)))
#define d_asin(x) (asin(*(x)))
#define d_atan(x) (atan(*(x)))
#define d_atn2(x, y) (atan2(*(x),*(y)))
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
#define d_cos(x) (cos(*(x)))
#define d_cosh(x) (cosh(*(x)))
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
#define d_exp(x) (exp(*(x)))
#define d_imag(z) (cimag(Cd(z)))
#define r_imag(z) (cimagf(Cf(z)))
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define d_log(x) (log(*(x)))
#define d_mod(x, y) (fmod(*(x), *(y)))
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
#define d_nint(x) u_nint(*(x))
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
#define d_sign(a,b) u_sign(*(a),*(b))
#define r_sign(a,b) u_sign(*(a),*(b))
#define d_sin(x) (sin(*(x)))
#define d_sinh(x) (sinh(*(x)))
#define d_sqrt(x) (sqrt(*(x)))
#define d_tan(x) (tan(*(x)))
#define d_tanh(x) (tanh(*(x)))
#define i_abs(x) abs(*(x))
#define i_dnnt(x) ((integer)u_nint(*(x)))
#define i_len(s, n) (n)
#define i_nint(x) ((integer)u_nint(*(x)))
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
#define pow_si(B,E) spow_ui(*(B),*(E))
#define pow_ri(B,E) spow_ui(*(B),*(E))
#define pow_di(B,E) dpow_ui(*(B),*(E))
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
#define myexit_() break;
#define mycycle_() continue;
#define myceiling_(w) {ceil(w)}
#define myhuge_(w) {HUGE_VAL}
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
#define myexp_(w) my_expfunc(w)
static int my_expfunc(float *x) {int e; (void)frexpf(*x,&e); return e;}
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* > \brief \b SLARMM */
/* Definition: */
/* =========== */
/* REAL FUNCTION SLARMM( ANORM, BNORM, CNORM ) */
/* REAL ANORM, BNORM, CNORM */
/* > \par Purpose: */
/* ======= */
/* > */
/* > \verbatim */
/* > */
/* > SLARMM returns a factor s in (0, 1] such that the linear updates */
/* > */
/* > (s * C) - A * (s * B) and (s * C) - (s * A) * B */
/* > */
/* > cannot overflow, where A, B, and C are matrices of conforming */
/* > dimensions. */
/* > */
/* > This is an auxiliary routine so there is no argument checking. */
/* > \endverbatim */
/* Arguments: */
/* ========= */
/* > \param[in] ANORM */
/* > \verbatim */
/* > ANORM is REAL */
/* > The infinity norm of A. ANORM >= 0. */
/* > The number of rows of the matrix A. M >= 0. */
/* > \endverbatim */
/* > */
/* > \param[in] BNORM */
/* > \verbatim */
/* > BNORM is REAL */
/* > The infinity norm of B. BNORM >= 0. */
/* > \endverbatim */
/* > */
/* > \param[in] CNORM */
/* > \verbatim */
/* > CNORM is REAL */
/* > The infinity norm of C. CNORM >= 0. */
/* > \endverbatim */
/* > */
/* > */
/* ===================================================================== */
/* > References: */
/* > C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for */
/* > Robust Solution of Triangular Linear Systems. In: International */
/* > Conference on Parallel Processing and Applied Mathematics, pages */
/* > 68--78. Springer, 2017. */
/* > */
/* > \ingroup OTHERauxiliary */
/* ===================================================================== */
real slarmm_(real *anorm, real *bnorm, real *cnorm)
{
/* System generated locals */
real ret_val;
/* Local variables */
extern real slamch_(char *);
real bignum, smlnum;
/* Determine machine dependent parameters to control overflow. */
smlnum = slamch_("Safe minimum") / slamch_("Precision");
bignum = 1.f / smlnum / 4.f;
/* Compute a scale factor. */
ret_val = 1.f;
if (*bnorm <= 1.f) {
if (*anorm * *bnorm > bignum - *cnorm) {
ret_val = .5f;
}
} else {
if (*anorm > (bignum - *cnorm) / *bnorm) {
ret_val = .5f / *bnorm;
}
}
return ret_val;
/* ==== End of SLARMM ==== */
} /* slarmm_ */

View File

@ -0,0 +1,99 @@
*> \brief \b SLARMM
*
* Definition:
* ===========
*
* REAL FUNCTION SLARMM( ANORM, BNORM, CNORM )
*
* .. Scalar Arguments ..
* REAL ANORM, BNORM, CNORM
* ..
*
*> \par Purpose:
* =======
*>
*> \verbatim
*>
*> SLARMM returns a factor s in (0, 1] such that the linear updates
*>
*> (s * C) - A * (s * B) and (s * C) - (s * A) * B
*>
*> cannot overflow, where A, B, and C are matrices of conforming
*> dimensions.
*>
*> This is an auxiliary routine so there is no argument checking.
*> \endverbatim
*
* Arguments:
* =========
*
*> \param[in] ANORM
*> \verbatim
*> ANORM is REAL
*> The infinity norm of A. ANORM >= 0.
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] BNORM
*> \verbatim
*> BNORM is REAL
*> The infinity norm of B. BNORM >= 0.
*> \endverbatim
*>
*> \param[in] CNORM
*> \verbatim
*> CNORM is REAL
*> The infinity norm of C. CNORM >= 0.
*> \endverbatim
*>
*>
* =====================================================================
*> References:
*> C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for
*> Robust Solution of Triangular Linear Systems. In: International
*> Conference on Parallel Processing and Applied Mathematics, pages
*> 68--78. Springer, 2017.
*>
*> \ingroup OTHERauxiliary
* =====================================================================
REAL FUNCTION SLARMM( ANORM, BNORM, CNORM )
IMPLICIT NONE
* .. Scalar Arguments ..
REAL ANORM, BNORM, CNORM
* .. Parameters ..
REAL ONE, HALF, FOUR
PARAMETER ( ONE = 1.0E0, HALF = 0.5E+0, FOUR = 4.0E+0 )
* ..
* .. Local Scalars ..
REAL BIGNUM, SMLNUM
* ..
* .. External Functions ..
REAL SLAMCH
EXTERNAL SLAMCH
* ..
* .. Executable Statements ..
*
*
* Determine machine dependent parameters to control overflow.
*
SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
BIGNUM = ( ONE / SMLNUM ) / FOUR
*
* Compute a scale factor.
*
SLARMM = ONE
IF( BNORM .LE. ONE ) THEN
IF( ANORM * BNORM .GT. BIGNUM - CNORM ) THEN
SLARMM = HALF
END IF
ELSE
IF( ANORM .GT. (BIGNUM - CNORM) / BNORM ) THEN
SLARMM = HALF / BNORM
END IF
END IF
RETURN
*
* ==== End of SLARMM ====
*
END

1262
lapack-netlib/SRC/slatrs3.c Normal file

File diff suppressed because it is too large Load Diff

656
lapack-netlib/SRC/slatrs3.f Normal file
View File

@ -0,0 +1,656 @@
*> \brief \b SLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
*
* Definition:
* ===========
*
* SUBROUTINE SLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
* X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* CHARACTER DIAG, NORMIN, TRANS, UPLO
* INTEGER INFO, LDA, LWORK, LDX, N, NRHS
* ..
* .. Array Arguments ..
* REAL A( LDA, * ), CNORM( * ), SCALE( * ),
* WORK( * ), X( LDX, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SLATRS3 solves one of the triangular systems
*>
*> A * X = B * diag(scale) or A**T * X = B * diag(scale)
*>
*> with scaling to prevent overflow. Here A is an upper or lower
*> triangular matrix, A**T denotes the transpose of A. X and B are
*> n by nrhs matrices and scale is an nrhs element vector of scaling
*> factors. A scaling factor scale(j) is usually less than or equal
*> to 1, chosen such that X(:,j) is less than the overflow threshold.
*> If the matrix A is singular (A(j,j) = 0 for some j), then
*> a non-trivial solution to A*X = 0 is returned. If the system is
*> so badly scaled that the solution cannot be represented as
*> (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
*>
*> This is a BLAS-3 version of LATRS for solving several right
*> hand sides simultaneously.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> Specifies whether the matrix A is upper or lower triangular.
*> = 'U': Upper triangular
*> = 'L': Lower triangular
*> \endverbatim
*>
*> \param[in] TRANS
*> \verbatim
*> TRANS is CHARACTER*1
*> Specifies the operation applied to A.
*> = 'N': Solve A * x = s*b (No transpose)
*> = 'T': Solve A**T* x = s*b (Transpose)
*> = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose)
*> \endverbatim
*>
*> \param[in] DIAG
*> \verbatim
*> DIAG is CHARACTER*1
*> Specifies whether or not the matrix A is unit triangular.
*> = 'N': Non-unit triangular
*> = 'U': Unit triangular
*> \endverbatim
*>
*> \param[in] NORMIN
*> \verbatim
*> NORMIN is CHARACTER*1
*> Specifies whether CNORM has been set or not.
*> = 'Y': CNORM contains the column norms on entry
*> = 'N': CNORM is not set on entry. On exit, the norms will
*> be computed and stored in CNORM.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of columns of X. NRHS >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is REAL array, dimension (LDA,N)
*> The triangular matrix A. If UPLO = 'U', the leading n by n
*> upper triangular part of the array A contains the upper
*> triangular matrix, and the strictly lower triangular part of
*> A is not referenced. If UPLO = 'L', the leading n by n lower
*> triangular part of the array A contains the lower triangular
*> matrix, and the strictly upper triangular part of A is not
*> referenced. If DIAG = 'U', the diagonal elements of A are
*> also not referenced and are assumed to be 1.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max (1,N).
*> \endverbatim
*>
*> \param[in,out] X
*> \verbatim
*> X is REAL array, dimension (LDX,NRHS)
*> On entry, the right hand side B of the triangular system.
*> On exit, X is overwritten by the solution matrix X.
*> \endverbatim
*>
*> \param[in] LDX
*> \verbatim
*> LDX is INTEGER
*> The leading dimension of the array X. LDX >= max (1,N).
*> \endverbatim
*>
*> \param[out] SCALE
*> \verbatim
*> SCALE is REAL array, dimension (NRHS)
*> The scaling factor s(k) is for the triangular system
*> A * x(:,k) = s(k)*b(:,k) or A**T* x(:,k) = s(k)*b(:,k).
*> If SCALE = 0, the matrix A is singular or badly scaled.
*> If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
*> that is an exact or approximate solution to A*x(:,k) = 0
*> is returned. If the system so badly scaled that solution
*> cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
*> is returned.
*> \endverbatim
*>
*> \param[in,out] CNORM
*> \verbatim
*> CNORM is REAL array, dimension (N)
*>
*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
*> contains the norm of the off-diagonal part of the j-th column
*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal
*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
*> must be greater than or equal to the 1-norm.
*>
*> If NORMIN = 'N', CNORM is an output argument and CNORM(j)
*> returns the 1-norm of the offdiagonal part of the j-th column
*> of A.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is REAL array, dimension (LWORK).
*> On exit, if INFO = 0, WORK(1) returns the optimal size of
*> WORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> LWORK is INTEGER
*> LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
*> NBA = (N + NB - 1)/NB and NB is the optimal block size.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal dimensions of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -k, the k-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*> \par Further Details:
* =====================
* \verbatim
* The algorithm follows the structure of a block triangular solve.
* The diagonal block is solved with a call to the robust the triangular
* solver LATRS for every right-hand side RHS = 1, ..., NRHS
* op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
* where op( A ) = A or op( A ) = A**T.
* The linear block updates operate on block columns of X,
* B( I, K ) - op(A( I, J )) * X( J, K )
* and use GEMM. To avoid overflow in the linear block update, the worst case
* growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
* such that
* || s * B( I, RHS )||_oo
* + || op(A( I, J )) ||_oo * || s * X( J, RHS ) ||_oo <= Overflow threshold
*
* Once all columns of a block column have been rescaled (BLAS-1), the linear
* update is executed with GEMM without overflow.
*
* To limit rescaling, local scale factors track the scaling of column segments.
* There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
* per right-hand side column RHS = 1, ..., NRHS. The global scale factor
* SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
* I = 1, ..., NBA.
* A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
* updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
* linear update of potentially inconsistently scaled vector segments
* s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
* computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
* if necessary, rescales the blocks prior to calling GEMM.
*
* \endverbatim
* =====================================================================
* References:
* C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
* Parallel robust solution of triangular linear systems. Concurrency
* and Computation: Practice and Experience, 31(19), e5064.
*
* Contributor:
* Angelika Schwarz, Umea University, Sweden.
*
* =====================================================================
SUBROUTINE SLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
$ X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
IMPLICIT NONE
*
* .. Scalar Arguments ..
CHARACTER DIAG, TRANS, NORMIN, UPLO
INTEGER INFO, LDA, LWORK, LDX, N, NRHS
* ..
* .. Array Arguments ..
REAL A( LDA, * ), CNORM( * ), X( LDX, * ),
$ SCALE( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
INTEGER NBMAX, NBMIN, NBRHS, NRHSMIN
PARAMETER ( NRHSMIN = 2, NBRHS = 32 )
PARAMETER ( NBMIN = 8, NBMAX = 64 )
* ..
* .. Local Arrays ..
REAL W( NBMAX ), XNRM( NBRHS )
* ..
* .. Local Scalars ..
LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER
INTEGER AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
$ JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
$ LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
REAL ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
$ SCAMIN, SMLNUM, TMAX
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
REAL SLAMCH, SLANGE, SLARMM
EXTERNAL ILAENV, LSAME, SLAMCH, SLANGE, SLARMM
* ..
* .. External Subroutines ..
EXTERNAL SLATRS, SSCAL, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN
* ..
* .. Executable Statements ..
*
INFO = 0
UPPER = LSAME( UPLO, 'U' )
NOTRAN = LSAME( TRANS, 'N' )
NOUNIT = LSAME( DIAG, 'N' )
LQUERY = ( LWORK.EQ.-1 )
*
* Partition A and X into blocks.
*
NB = MAX( 8, ILAENV( 1, 'SLATRS', '', N, N, -1, -1 ) )
NB = MIN( NBMAX, NB )
NBA = MAX( 1, (N + NB - 1) / NB )
NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
*
* Compute the workspace
*
* The workspace comprises two parts.
* The first part stores the local scale factors. Each simultaneously
* computed right-hand side requires one local scale factor per block
* row. WORK( I + KK * LDS ) is the scale factor of the vector
* segment associated with the I-th block row and the KK-th vector
* in the block column.
LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
LDS = NBA
* The second part stores upper bounds of the triangular A. There are
* a total of NBA x NBA blocks, of which only the upper triangular
* part or the lower triangular part is referenced. The upper bound of
* the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
LANRM = NBA * NBA
AWRK = LSCALE
WORK( 1 ) = LSCALE + LANRM
*
* Test the input parameters.
*
IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
INFO = -1
ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
$ LSAME( TRANS, 'C' ) ) THEN
INFO = -2
ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
INFO = -3
ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
$ LSAME( NORMIN, 'N' ) ) THEN
INFO = -4
ELSE IF( N.LT.0 ) THEN
INFO = -5
ELSE IF( NRHS.LT.0 ) THEN
INFO = -6
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -8
ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
INFO = -10
ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
INFO = -14
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SLATRS3', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Initialize scaling factors
*
DO KK = 1, NRHS
SCALE( KK ) = ONE
END DO
*
* Quick return if possible
*
IF( MIN( N, NRHS ).EQ.0 )
$ RETURN
*
* Determine machine dependent constant to control overflow.
*
BIGNUM = SLAMCH( 'Overflow' )
SMLNUM = SLAMCH( 'Safe Minimum' )
*
* Use unblocked code for small problems
*
IF( NRHS.LT.NRHSMIN ) THEN
CALL SLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
$ SCALE( 1 ), CNORM, INFO )
DO K = 2, NRHS
CALL SLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
$ SCALE( K ), CNORM, INFO )
END DO
RETURN
END IF
*
* Compute norms of blocks of A excluding diagonal blocks and find
* the block with the largest norm TMAX.
*
TMAX = ZERO
DO J = 1, NBA
J1 = (J-1)*NB + 1
J2 = MIN( J*NB, N ) + 1
IF ( UPPER ) THEN
IFIRST = 1
ILAST = J - 1
ELSE
IFIRST = J + 1
ILAST = NBA
END IF
DO I = IFIRST, ILAST
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
*
* Compute upper bound of A( I1:I2-1, J1:J2-1 ).
*
IF( NOTRAN ) THEN
ANRM = SLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
WORK( AWRK + I+(J-1)*NBA ) = ANRM
ELSE
ANRM = SLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
WORK( AWRK + J+(I-1)*NBA ) = ANRM
END IF
TMAX = MAX( TMAX, ANRM )
END DO
END DO
*
IF( .NOT. TMAX.LE.SLAMCH('Overflow') ) THEN
*
* Some matrix entries have huge absolute value. At least one upper
* bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
* number, either due to overflow in LANGE or due to Inf in A.
* Fall back to LATRS. Set normin = 'N' for every right-hand side to
* force computation of TSCAL in LATRS to avoid the likely overflow
* in the computation of the column norms CNORM.
*
DO K = 1, NRHS
CALL SLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
$ SCALE( K ), CNORM, INFO )
END DO
RETURN
END IF
*
* Every right-hand side requires workspace to store NBA local scale
* factors. To save workspace, X is computed successively in block columns
* of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
* workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
DO K = 1, NBX
* Loop over block columns (index = K) of X and, for column-wise scalings,
* over individual columns (index = KK).
* K1: column index of the first column in X( J, K )
* K2: column index of the first column in X( J, K+1 )
* so the K2 - K1 is the column count of the block X( J, K )
K1 = (K-1)*NBRHS + 1
K2 = MIN( K*NBRHS, NRHS ) + 1
*
* Initialize local scaling factors of current block column X( J, K )
*
DO KK = 1, K2 - K1
DO I = 1, NBA
WORK( I+KK*LDS ) = ONE
END DO
END DO
*
IF( NOTRAN ) THEN
*
* Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
*
IF( UPPER ) THEN
JFIRST = NBA
JLAST = 1
JINC = -1
ELSE
JFIRST = 1
JLAST = NBA
JINC = 1
END IF
ELSE
*
* Solve A**T * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
*
IF( UPPER ) THEN
JFIRST = 1
JLAST = NBA
JINC = 1
ELSE
JFIRST = NBA
JLAST = 1
JINC = -1
END IF
END IF
*
DO J = JFIRST, JLAST, JINC
* J1: row index of the first row in A( J, J )
* J2: row index of the first row in A( J+1, J+1 )
* so that J2 - J1 is the row count of the block A( J, J )
J1 = (J-1)*NB + 1
J2 = MIN( J*NB, N ) + 1
*
* Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
* for all right-hand sides in the current block column,
* one RHS at a time.
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
IF( KK.EQ.1 ) THEN
CALL SLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
$ A( J1, J1 ), LDA, X( J1, RHS ),
$ SCALOC, CNORM, INFO )
ELSE
CALL SLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
$ A( J1, J1 ), LDA, X( J1, RHS ),
$ SCALOC, CNORM, INFO )
END IF
* Find largest absolute value entry in the vector segment
* X( J1:J2-1, RHS ) as an upper bound for the worst case
* growth in the linear updates.
XNRM( KK ) = SLANGE( 'I', J2-J1, 1, X( J1, RHS ),
$ LDX, W )
*
IF( SCALOC .EQ. ZERO ) THEN
* LATRS found that A is singular through A(j,j) = 0.
* Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
* and compute A*x = 0 (or A**T*x = 0). Note that
* X(J1:J2-1, KK) is set by LATRS.
SCALE( RHS ) = ZERO
DO II = 1, J1-1
X( II, KK ) = ZERO
END DO
DO II = J2, N
X( II, KK ) = ZERO
END DO
* Discard the local scale factors.
DO II = 1, NBA
WORK( II+KK*LDS ) = ONE
END DO
SCALOC = ONE
ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
* LATRS computed a valid scale factor, but combined with
* the current scaling the solution does not have a
* scale factor > 0.
*
* Set WORK( J+KK*LDS ) to smallest valid scale
* factor and increase SCALOC accordingly.
SCAL = WORK( J+KK*LDS ) / SMLNUM
SCALOC = SCALOC * SCAL
WORK( J+KK*LDS ) = SMLNUM
* If LATRS overestimated the growth, x may be
* rescaled to preserve a valid combined scale
* factor WORK( J, KK ) > 0.
RSCAL = ONE / SCALOC
IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
XNRM( KK ) = XNRM( KK ) * RSCAL
CALL SSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
SCALOC = ONE
ELSE
* The system op(A) * x = b is badly scaled and its
* solution cannot be represented as (1/scale) * x.
* Set x to zero. This approach deviates from LATRS
* where a completely meaningless non-zero vector
* is returned that is not a solution to op(A) * x = b.
SCALE( RHS ) = ZERO
DO II = 1, N
X( II, KK ) = ZERO
END DO
* Discard the local scale factors.
DO II = 1, NBA
WORK( II+KK*LDS ) = ONE
END DO
SCALOC = ONE
END IF
END IF
SCALOC = SCALOC * WORK( J+KK*LDS )
WORK( J+KK*LDS ) = SCALOC
END DO
*
* Linear block updates
*
IF( NOTRAN ) THEN
IF( UPPER ) THEN
IFIRST = J - 1
ILAST = 1
IINC = -1
ELSE
IFIRST = J + 1
ILAST = NBA
IINC = 1
END IF
ELSE
IF( UPPER ) THEN
IFIRST = J + 1
ILAST = NBA
IINC = 1
ELSE
IFIRST = J - 1
ILAST = 1
IINC = -1
END IF
END IF
*
DO I = IFIRST, ILAST, IINC
* I1: row index of the first column in X( I, K )
* I2: row index of the first column in X( I+1, K )
* so the I2 - I1 is the row count of the block X( I, K )
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
*
* Prepare the linear update to be executed with GEMM.
* For each column, compute a consistent scaling, a
* scaling factor to survive the linear update, and
* rescale the column segments, if necesssary. Then
* the linear update is safely executed.
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
* Compute consistent scaling
SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
*
* Compute scaling factor to survive the linear update
* simulating consistent scaling.
*
BNRM = SLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
XNRM( KK ) = XNRM( KK )*(SCAMIN / WORK( J+KK*LDS ))
ANRM = WORK( AWRK + I+(J-1)*NBA )
SCALOC = SLARMM( ANRM, XNRM( KK ), BNRM )
*
* Simultaneously apply the robust update factor and the
* consistency scaling factor to B( I, KK ) and B( J, KK ).
*
SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
IF( SCAL.NE.ONE ) THEN
CALL SSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
WORK( I+KK*LDS ) = SCAMIN*SCALOC
END IF
*
SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
IF( SCAL.NE.ONE ) THEN
CALL SSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
WORK( J+KK*LDS ) = SCAMIN*SCALOC
END IF
END DO
*
IF( NOTRAN ) THEN
*
* B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
*
CALL SGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -ONE,
$ A( I1, J1 ), LDA, X( J1, K1 ), LDX,
$ ONE, X( I1, K1 ), LDX )
ELSE
*
* B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
*
CALL SGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -ONE,
$ A( J1, I1 ), LDA, X( J1, K1 ), LDX,
$ ONE, X( I1, K1 ), LDX )
END IF
END DO
END DO
*
* Reduce local scaling factors
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
DO I = 1, NBA
SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
END DO
END DO
*
* Realize consistent scaling
*
DO KK = 1, K2-K1
RHS = K1 + KK - 1
IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
DO I = 1, NBA
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
IF( SCAL.NE.ONE )
$ CALL SSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
END DO
END IF
END DO
END DO
RETURN
*
* End of SLATRS3
*
END

2066
lapack-netlib/SRC/strsyl3.c Normal file

File diff suppressed because it is too large Load Diff

1244
lapack-netlib/SRC/strsyl3.f Normal file

File diff suppressed because it is too large Load Diff

1283
lapack-netlib/SRC/zlatrs3.c Normal file

File diff suppressed because it is too large Load Diff

667
lapack-netlib/SRC/zlatrs3.f Normal file
View File

@ -0,0 +1,667 @@
*> \brief \b ZLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
*
* Definition:
* ===========
*
* SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
* X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
*
* .. Scalar Arguments ..
* CHARACTER DIAG, NORMIN, TRANS, UPLO
* INTEGER INFO, LDA, LWORK, LDX, N, NRHS
* ..
* .. Array Arguments ..
* DOUBLE PRECISION CNORM( * ), SCALE( * ), WORK( * )
* COMPLEX*16 A( LDA, * ), X( LDX, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> ZLATRS3 solves one of the triangular systems
*>
*> A * X = B * diag(scale), A**T * X = B * diag(scale), or
*> A**H * X = B * diag(scale)
*>
*> with scaling to prevent overflow. Here A is an upper or lower
*> triangular matrix, A**T denotes the transpose of A, A**H denotes the
*> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
*> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
*> is usually less than or equal to 1, chosen such that X(:,j) is less
*> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
*> for some j), then a non-trivial solution to A*X = 0 is returned. If
*> the system is so badly scaled that the solution cannot be represented
*> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
*>
*> This is a BLAS-3 version of LATRS for solving several right
*> hand sides simultaneously.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] UPLO
*> \verbatim
*> UPLO is CHARACTER*1
*> Specifies whether the matrix A is upper or lower triangular.
*> = 'U': Upper triangular
*> = 'L': Lower triangular
*> \endverbatim
*>
*> \param[in] TRANS
*> \verbatim
*> TRANS is CHARACTER*1
*> Specifies the operation applied to A.
*> = 'N': Solve A * x = s*b (No transpose)
*> = 'T': Solve A**T* x = s*b (Transpose)
*> = 'C': Solve A**T* x = s*b (Conjugate transpose)
*> \endverbatim
*>
*> \param[in] DIAG
*> \verbatim
*> DIAG is CHARACTER*1
*> Specifies whether or not the matrix A is unit triangular.
*> = 'N': Non-unit triangular
*> = 'U': Unit triangular
*> \endverbatim
*>
*> \param[in] NORMIN
*> \verbatim
*> NORMIN is CHARACTER*1
*> Specifies whether CNORM has been set or not.
*> = 'Y': CNORM contains the column norms on entry
*> = 'N': CNORM is not set on entry. On exit, the norms will
*> be computed and stored in CNORM.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the matrix A. N >= 0.
*> \endverbatim
*>
*> \param[in] NRHS
*> \verbatim
*> NRHS is INTEGER
*> The number of columns of X. NRHS >= 0.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is COMPLEX*16 array, dimension (LDA,N)
*> The triangular matrix A. If UPLO = 'U', the leading n by n
*> upper triangular part of the array A contains the upper
*> triangular matrix, and the strictly lower triangular part of
*> A is not referenced. If UPLO = 'L', the leading n by n lower
*> triangular part of the array A contains the lower triangular
*> matrix, and the strictly upper triangular part of A is not
*> referenced. If DIAG = 'U', the diagonal elements of A are
*> also not referenced and are assumed to be 1.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> The leading dimension of the array A. LDA >= max (1,N).
*> \endverbatim
*>
*> \param[in,out] X
*> \verbatim
*> X is COMPLEX*16 array, dimension (LDX,NRHS)
*> On entry, the right hand side B of the triangular system.
*> On exit, X is overwritten by the solution matrix X.
*> \endverbatim
*>
*> \param[in] LDX
*> \verbatim
*> LDX is INTEGER
*> The leading dimension of the array X. LDX >= max (1,N).
*> \endverbatim
*>
*> \param[out] SCALE
*> \verbatim
*> SCALE is DOUBLE PRECISION array, dimension (NRHS)
*> The scaling factor s(k) is for the triangular system
*> A * x(:,k) = s(k)*b(:,k) or A**T* x(:,k) = s(k)*b(:,k).
*> If SCALE = 0, the matrix A is singular or badly scaled.
*> If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
*> that is an exact or approximate solution to A*x(:,k) = 0
*> is returned. If the system so badly scaled that solution
*> cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
*> is returned.
*> \endverbatim
*>
*> \param[in,out] CNORM
*> \verbatim
*> CNORM is DOUBLE PRECISION array, dimension (N)
*>
*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
*> contains the norm of the off-diagonal part of the j-th column
*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal
*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
*> must be greater than or equal to the 1-norm.
*>
*> If NORMIN = 'N', CNORM is an output argument and CNORM(j)
*> returns the 1-norm of the offdiagonal part of the j-th column
*> of A.
*> \endverbatim
*>
*> \param[out] WORK
*> \verbatim
*> WORK is DOUBLE PRECISION array, dimension (LWORK).
*> On exit, if INFO = 0, WORK(1) returns the optimal size of
*> WORK.
*> \endverbatim
*>
*> \param[in] LWORK
*> LWORK is INTEGER
*> LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
*> NBA = (N + NB - 1)/NB and NB is the optimal block size.
*>
*> If LWORK = -1, then a workspace query is assumed; the routine
*> only calculates the optimal dimensions of the WORK array, returns
*> this value as the first entry of the WORK array, and no error
*> message related to LWORK is issued by XERBLA.
*>
*> \param[out] INFO
*> \verbatim
*> INFO is INTEGER
*> = 0: successful exit
*> < 0: if INFO = -k, the k-th argument had an illegal value
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERauxiliary
*> \par Further Details:
* =====================
* \verbatim
* The algorithm follows the structure of a block triangular solve.
* The diagonal block is solved with a call to the robust the triangular
* solver LATRS for every right-hand side RHS = 1, ..., NRHS
* op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
* where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
* The linear block updates operate on block columns of X,
* B( I, K ) - op(A( I, J )) * X( J, K )
* and use GEMM. To avoid overflow in the linear block update, the worst case
* growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
* such that
* || s * B( I, RHS )||_oo
* + || op(A( I, J )) ||_oo * || s * X( J, RHS ) ||_oo <= Overflow threshold
*
* Once all columns of a block column have been rescaled (BLAS-1), the linear
* update is executed with GEMM without overflow.
*
* To limit rescaling, local scale factors track the scaling of column segments.
* There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
* per right-hand side column RHS = 1, ..., NRHS. The global scale factor
* SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
* I = 1, ..., NBA.
* A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
* updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
* linear update of potentially inconsistently scaled vector segments
* s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
* computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
* if necessary, rescales the blocks prior to calling GEMM.
*
* \endverbatim
* =====================================================================
* References:
* C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
* Parallel robust solution of triangular linear systems. Concurrency
* and Computation: Practice and Experience, 31(19), e5064.
*
* Contributor:
* Angelika Schwarz, Umea University, Sweden.
*
* =====================================================================
SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
$ X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
IMPLICIT NONE
*
* .. Scalar Arguments ..
CHARACTER DIAG, TRANS, NORMIN, UPLO
INTEGER INFO, LDA, LWORK, LDX, N, NRHS
* ..
* .. Array Arguments ..
COMPLEX*16 A( LDA, * ), X( LDX, * )
DOUBLE PRECISION CNORM( * ), SCALE( * ), WORK( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
COMPLEX*16 CZERO, CONE
PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) )
PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) )
INTEGER NBMAX, NBMIN, NBRHS, NRHSMIN
PARAMETER ( NRHSMIN = 2, NBRHS = 32 )
PARAMETER ( NBMIN = 8, NBMAX = 64 )
* ..
* .. Local Arrays ..
DOUBLE PRECISION W( NBMAX ), XNRM( NBRHS )
* ..
* .. Local Scalars ..
LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER
INTEGER AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
$ JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
$ LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
DOUBLE PRECISION ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
$ SCAMIN, SMLNUM, TMAX
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
DOUBLE PRECISION DLAMCH, ZLANGE, DLARMM
EXTERNAL ILAENV, LSAME, DLAMCH, ZLANGE, DLARMM
* ..
* .. External Subroutines ..
EXTERNAL ZLATRS, ZDSCAL, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN
* ..
* .. Executable Statements ..
*
INFO = 0
UPPER = LSAME( UPLO, 'U' )
NOTRAN = LSAME( TRANS, 'N' )
NOUNIT = LSAME( DIAG, 'N' )
LQUERY = ( LWORK.EQ.-1 )
*
* Partition A and X into blocks.
*
NB = MAX( NBMIN, ILAENV( 1, 'ZLATRS', '', N, N, -1, -1 ) )
NB = MIN( NBMAX, NB )
NBA = MAX( 1, (N + NB - 1) / NB )
NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
*
* Compute the workspace
*
* The workspace comprises two parts.
* The first part stores the local scale factors. Each simultaneously
* computed right-hand side requires one local scale factor per block
* row. WORK( I + KK * LDS ) is the scale factor of the vector
* segment associated with the I-th block row and the KK-th vector
* in the block column.
LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
LDS = NBA
* The second part stores upper bounds of the triangular A. There are
* a total of NBA x NBA blocks, of which only the upper triangular
* part or the lower triangular part is referenced. The upper bound of
* the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
LANRM = NBA * NBA
AWRK = LSCALE
WORK( 1 ) = LSCALE + LANRM
*
* Test the input parameters.
*
IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
INFO = -1
ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
$ LSAME( TRANS, 'C' ) ) THEN
INFO = -2
ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
INFO = -3
ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
$ LSAME( NORMIN, 'N' ) ) THEN
INFO = -4
ELSE IF( N.LT.0 ) THEN
INFO = -5
ELSE IF( NRHS.LT.0 ) THEN
INFO = -6
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
INFO = -8
ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
INFO = -10
ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
INFO = -14
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'ZLATRS3', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Initialize scaling factors
*
DO KK = 1, NRHS
SCALE( KK ) = ONE
END DO
*
* Quick return if possible
*
IF( MIN( N, NRHS ).EQ.0 )
$ RETURN
*
* Determine machine dependent constant to control overflow.
*
BIGNUM = DLAMCH( 'Overflow' )
SMLNUM = DLAMCH( 'Safe Minimum' )
*
* Use unblocked code for small problems
*
IF( NRHS.LT.NRHSMIN ) THEN
CALL ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
$ SCALE( 1 ), CNORM, INFO )
DO K = 2, NRHS
CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
$ SCALE( K ), CNORM, INFO )
END DO
RETURN
END IF
*
* Compute norms of blocks of A excluding diagonal blocks and find
* the block with the largest norm TMAX.
*
TMAX = ZERO
DO J = 1, NBA
J1 = (J-1)*NB + 1
J2 = MIN( J*NB, N ) + 1
IF ( UPPER ) THEN
IFIRST = 1
ILAST = J - 1
ELSE
IFIRST = J + 1
ILAST = NBA
END IF
DO I = IFIRST, ILAST
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
*
* Compute upper bound of A( I1:I2-1, J1:J2-1 ).
*
IF( NOTRAN ) THEN
ANRM = ZLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
WORK( AWRK + I+(J-1)*NBA ) = ANRM
ELSE
ANRM = ZLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
WORK( AWRK + J+(I-1) * NBA ) = ANRM
END IF
TMAX = MAX( TMAX, ANRM )
END DO
END DO
*
IF( .NOT. TMAX.LE.DLAMCH('Overflow') ) THEN
*
* Some matrix entries have huge absolute value. At least one upper
* bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
* number, either due to overflow in LANGE or due to Inf in A.
* Fall back to LATRS. Set normin = 'N' for every right-hand side to
* force computation of TSCAL in LATRS to avoid the likely overflow
* in the computation of the column norms CNORM.
*
DO K = 1, NRHS
CALL ZLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
$ SCALE( K ), CNORM, INFO )
END DO
RETURN
END IF
*
* Every right-hand side requires workspace to store NBA local scale
* factors. To save workspace, X is computed successively in block columns
* of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
* workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
DO K = 1, NBX
* Loop over block columns (index = K) of X and, for column-wise scalings,
* over individual columns (index = KK).
* K1: column index of the first column in X( J, K )
* K2: column index of the first column in X( J, K+1 )
* so the K2 - K1 is the column count of the block X( J, K )
K1 = (K-1)*NBRHS + 1
K2 = MIN( K*NBRHS, NRHS ) + 1
*
* Initialize local scaling factors of current block column X( J, K )
*
DO KK = 1, K2 - K1
DO I = 1, NBA
WORK( I+KK*LDS ) = ONE
END DO
END DO
*
IF( NOTRAN ) THEN
*
* Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
*
IF( UPPER ) THEN
JFIRST = NBA
JLAST = 1
JINC = -1
ELSE
JFIRST = 1
JLAST = NBA
JINC = 1
END IF
ELSE
*
* Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
* where op(A) = A**T or op(A) = A**H
*
IF( UPPER ) THEN
JFIRST = 1
JLAST = NBA
JINC = 1
ELSE
JFIRST = NBA
JLAST = 1
JINC = -1
END IF
END IF
DO J = JFIRST, JLAST, JINC
* J1: row index of the first row in A( J, J )
* J2: row index of the first row in A( J+1, J+1 )
* so that J2 - J1 is the row count of the block A( J, J )
J1 = (J-1)*NB + 1
J2 = MIN( J*NB, N ) + 1
*
* Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
*
DO KK = 1, K2 - K1
RHS = K1 + KK - 1
IF( KK.EQ.1 ) THEN
CALL ZLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
$ A( J1, J1 ), LDA, X( J1, RHS ),
$ SCALOC, CNORM, INFO )
ELSE
CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
$ A( J1, J1 ), LDA, X( J1, RHS ),
$ SCALOC, CNORM, INFO )
END IF
* Find largest absolute value entry in the vector segment
* X( J1:J2-1, RHS ) as an upper bound for the worst case
* growth in the linear updates.
XNRM( KK ) = ZLANGE( 'I', J2-J1, 1, X( J1, RHS ),
$ LDX, W )
*
IF( SCALOC .EQ. ZERO ) THEN
* LATRS found that A is singular through A(j,j) = 0.
* Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
* and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
* set by LATRS.
SCALE( RHS ) = ZERO
DO II = 1, J1-1
X( II, KK ) = CZERO
END DO
DO II = J2, N
X( II, KK ) = CZERO
END DO
* Discard the local scale factors.
DO II = 1, NBA
WORK( II+KK*LDS ) = ONE
END DO
SCALOC = ONE
ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
* LATRS computed a valid scale factor, but combined with
* the current scaling the solution does not have a
* scale factor > 0.
*
* Set WORK( J+KK*LDS ) to smallest valid scale
* factor and increase SCALOC accordingly.
SCAL = WORK( J+KK*LDS ) / SMLNUM
SCALOC = SCALOC * SCAL
WORK( J+KK*LDS ) = SMLNUM
* If LATRS overestimated the growth, x may be
* rescaled to preserve a valid combined scale
* factor WORK( J, KK ) > 0.
RSCAL = ONE / SCALOC
IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
XNRM( KK ) = XNRM( KK ) * RSCAL
CALL ZDSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
SCALOC = ONE
ELSE
* The system op(A) * x = b is badly scaled and its
* solution cannot be represented as (1/scale) * x.
* Set x to zero. This approach deviates from LATRS
* where a completely meaningless non-zero vector
* is returned that is not a solution to op(A) * x = b.
SCALE( RHS ) = ZERO
DO II = 1, N
X( II, KK ) = CZERO
END DO
* Discard the local scale factors.
DO II = 1, NBA
WORK( II+KK*LDS ) = ONE
END DO
SCALOC = ONE
END IF
END IF
SCALOC = SCALOC * WORK( J+KK*LDS )
WORK( J+KK*LDS ) = SCALOC
END DO
*
* Linear block updates
*
IF( NOTRAN ) THEN
IF( UPPER ) THEN
IFIRST = J - 1
ILAST = 1
IINC = -1
ELSE
IFIRST = J + 1
ILAST = NBA
IINC = 1
END IF
ELSE
IF( UPPER ) THEN
IFIRST = J + 1
ILAST = NBA
IINC = 1
ELSE
IFIRST = J - 1
ILAST = 1
IINC = -1
END IF
END IF
*
DO I = IFIRST, ILAST, IINC
* I1: row index of the first column in X( I, K )
* I2: row index of the first column in X( I+1, K )
* so the I2 - I1 is the row count of the block X( I, K )
I1 = (I-1)*NB + 1
I2 = MIN( I*NB, N ) + 1
*
* Prepare the linear update to be executed with GEMM.
* For each column, compute a consistent scaling, a
* scaling factor to survive the linear update, and
* rescale the column segments, if necesssary. Then
* the linear update is safely executed.
*
DO KK = 1, K2 - K1
RHS = K1 + KK - 1
* Compute consistent scaling
SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
*
* Compute scaling factor to survive the linear update
* simulating consistent scaling.
*
BNRM = ZLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
XNRM( KK ) = XNRM( KK )*( SCAMIN / WORK( J+KK*LDS) )
ANRM = WORK( AWRK + I+(J-1)*NBA )
SCALOC = DLARMM( ANRM, XNRM( KK ), BNRM )
*
* Simultaneously apply the robust update factor and the
* consistency scaling factor to X( I, KK ) and X( J, KK ).
*
SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
IF( SCAL.NE.ONE ) THEN
CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
WORK( I+KK*LDS ) = SCAMIN*SCALOC
END IF
*
SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
IF( SCAL.NE.ONE ) THEN
CALL ZDSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
WORK( J+KK*LDS ) = SCAMIN*SCALOC
END IF
END DO
*
IF( NOTRAN ) THEN
*
* B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
*
CALL ZGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -CONE,
$ A( I1, J1 ), LDA, X( J1, K1 ), LDX,
$ CONE, X( I1, K1 ), LDX )
ELSE IF( LSAME( TRANS, 'T' ) ) THEN
*
* B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
*
CALL ZGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -CONE,
$ A( J1, I1 ), LDA, X( J1, K1 ), LDX,
$ CONE, X( I1, K1 ), LDX )
ELSE
*
* B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
*
CALL ZGEMM( 'C', 'N', I2-I1, K2-K1, J2-J1, -CONE,
$ A( J1, I1 ), LDA, X( J1, K1 ), LDX,
$ CONE, X( I1, K1 ), LDX )
END IF
END DO
END DO
*
* Reduce local scaling factors
*
DO KK = 1, K2 - K1
RHS = K1 + KK - 1
DO I = 1, NBA
SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
END DO
END DO
*
* Realize consistent scaling
*
DO KK = 1, K2 - K1
RHS = K1 + KK - 1
IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
DO I = 1, NBA
I1 = (I - 1) * NB + 1
I2 = MIN( I * NB, N ) + 1
SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
IF( SCAL.NE.ONE )
$ CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
END DO
END IF
END DO
END DO
RETURN
*
* End of ZLATRS3
*
END

2027
lapack-netlib/SRC/ztrsyl3.c Normal file

File diff suppressed because it is too large Load Diff

1142
lapack-netlib/SRC/ztrsyl3.f Normal file

File diff suppressed because it is too large Load Diff

View File

@ -40,7 +40,7 @@ set(SEIGTST schkee.F
sget54.f sglmts.f sgqrts.f sgrqts.f sgsvts3.f
shst01.f slarfy.f slarhs.f slatm4.f slctes.f slctsx.f slsets.f sort01.f
sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f
sstt22.f ssyt21.f ssyt22.f)
sstt22.f ssyl01.f ssyt21.f ssyt22.f)
set(CEIGTST cchkee.F
cbdt01.f cbdt02.f cbdt03.f cbdt05.f
@ -56,7 +56,7 @@ set(CEIGTST cchkee.F
cget54.f cglmts.f cgqrts.f cgrqts.f cgsvts3.f
chbt21.f chet21.f chet22.f chpt21.f chst01.f
clarfy.f clarhs.f clatm4.f clctes.f clctsx.f clsets.f csbmv.f
csgt01.f cslect.f
csgt01.f cslect.f csyl01.f
cstt21.f cstt22.f cunt01.f cunt03.f)
set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f
@ -77,7 +77,7 @@ set(DEIGTST dchkee.F
dget54.f dglmts.f dgqrts.f dgrqts.f dgsvts3.f
dhst01.f dlarfy.f dlarhs.f dlatm4.f dlctes.f dlctsx.f dlsets.f dort01.f
dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f
dstt22.f dsyt21.f dsyt22.f)
dstt22.f dsyl01.f dsyt21.f dsyt22.f)
set(ZEIGTST zchkee.F
zbdt01.f zbdt02.f zbdt03.f zbdt05.f
@ -93,7 +93,7 @@ set(ZEIGTST zchkee.F
zget54.f zglmts.f zgqrts.f zgrqts.f zgsvts3.f
zhbt21.f zhet21.f zhet22.f zhpt21.f zhst01.f
zlarfy.f zlarhs.f zlatm4.f zlctes.f zlctsx.f zlsets.f zsbmv.f
zsgt01.f zslect.f
zsgt01.f zslect.f zsyl01.f
zstt21.f zstt22.f zunt01.f zunt03.f)
macro(add_eig_executable name)

View File

@ -62,7 +62,7 @@ SEIGTST = schkee.o \
sget54.o sglmts.o sgqrts.o sgrqts.o sgsvts3.o \
shst01.o slarfy.o slarhs.o slatm4.o slctes.o slctsx.o slsets.o sort01.o \
sort03.o ssbt21.o ssgt01.o sslect.o sspt21.o sstt21.o \
sstt22.o ssyt21.o ssyt22.o
sstt22.o ssyl01.o ssyt21.o ssyt22.o
CEIGTST = cchkee.o \
cbdt01.o cbdt02.o cbdt03.o cbdt05.o \
@ -78,7 +78,7 @@ CEIGTST = cchkee.o \
cget54.o cglmts.o cgqrts.o cgrqts.o cgsvts3.o \
chbt21.o chet21.o chet22.o chpt21.o chst01.o \
clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o csbmv.o \
csgt01.o cslect.o \
csgt01.o cslect.o csyl01.o\
cstt21.o cstt22.o cunt01.o cunt03.o
DZIGTST = dlafts.o dlahd2.o dlasum.o dlatb9.o dstech.o dstect.o \
@ -99,7 +99,7 @@ DEIGTST = dchkee.o \
dget54.o dglmts.o dgqrts.o dgrqts.o dgsvts3.o \
dhst01.o dlarfy.o dlarhs.o dlatm4.o dlctes.o dlctsx.o dlsets.o dort01.o \
dort03.o dsbt21.o dsgt01.o dslect.o dspt21.o dstt21.o \
dstt22.o dsyt21.o dsyt22.o
dstt22.o dsyl01.o dsyt21.o dsyt22.o
ZEIGTST = zchkee.o \
zbdt01.o zbdt02.o zbdt03.o zbdt05.o \
@ -115,7 +115,7 @@ ZEIGTST = zchkee.o \
zget54.o zglmts.o zgqrts.o zgrqts.o zgsvts3.o \
zhbt21.o zhet21.o zhet22.o zhpt21.o zhst01.o \
zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o zsbmv.o \
zsgt01.o zslect.o \
zsgt01.o zslect.o zsyl01.o\
zstt21.o zstt22.o zunt01.o zunt03.o
.PHONY: all

View File

@ -23,7 +23,7 @@
*> \verbatim
*>
*> CCHKEC tests eigen- condition estimation routines
*> CTRSYL, CTREXC, CTRSNA, CTRSEN
*> CTRSYL, CTRSYL3, CTREXC, CTRSNA, CTRSEN
*>
*> In all cases, the routine runs through a fixed set of numerical
*> examples, subjects them to various tests, and compares the test
@ -88,17 +88,17 @@
* .. Local Scalars ..
LOGICAL OK
CHARACTER*3 PATH
INTEGER KTREXC, KTRSEN, KTRSNA, KTRSYL, LTREXC, LTRSYL,
$ NTESTS, NTREXC, NTRSYL
REAL EPS, RTREXC, RTRSYL, SFMIN
INTEGER KTREXC, KTRSEN, KTRSNA, KTRSYL, KTRSYL3,
$ LTREXC, LTRSYL, NTESTS, NTREXC, NTRSYL
REAL EPS, RTREXC, SFMIN
* ..
* .. Local Arrays ..
INTEGER LTRSEN( 3 ), LTRSNA( 3 ), NTRSEN( 3 ),
$ NTRSNA( 3 )
REAL RTRSEN( 3 ), RTRSNA( 3 )
INTEGER FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
$ LTRSNA( 3 ), NTRSEN( 3 ), NTRSNA( 3 )
REAL RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
* ..
* .. External Subroutines ..
EXTERNAL CERREC, CGET35, CGET36, CGET37, CGET38
EXTERNAL CERREC, CGET35, CGET36, CGET37, CGET38, CSYL01
* ..
* .. External Functions ..
REAL SLAMCH
@ -120,10 +120,24 @@
$ CALL CERREC( PATH, NOUT )
*
OK = .TRUE.
CALL CGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL, NIN )
IF( RTRSYL.GT.THRESH ) THEN
CALL CGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL, NIN )
IF( RTRSYL( 1 ).GT.THRESH ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9999 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
WRITE( NOUT, FMT = 9999 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
END IF
*
CALL CSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
IF( FTRSYL( 1 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
END IF
IF( FTRSYL( 2 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
END IF
IF( FTRSYL( 3 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
END IF
*
CALL CGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@ -169,6 +183,12 @@
$ / ' Safe minimum (SFMIN) = ', E16.6, / )
9992 FORMAT( ' Routines pass computational tests if test ratio is ',
$ 'less than', F8.2, / / )
9972 FORMAT( 'CTRSYL and CTRSYL3 compute an inconsistent scale ',
$ 'factor in ', I8, ' tests.')
9971 FORMAT( 'Error in CTRSYL3: ', I8, ' tests fail the threshold.', /
$ 'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
9970 FORMAT( 'Error in CTRSYL: ', I8, ' tests fail the threshold.', /
$ 'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
RETURN
*
* End of CCHKEC

View File

@ -23,7 +23,7 @@
*>
*> CERREC tests the error exits for the routines for eigen- condition
*> estimation for REAL matrices:
*> CTRSYL, CTREXC, CTRSNA and CTRSEN.
*> CTRSYL, CTRSYL3, CTREXC, CTRSNA and CTRSEN.
*> \endverbatim
*
* Arguments:
@ -77,12 +77,12 @@
* ..
* .. Local Arrays ..
LOGICAL SEL( NMAX )
REAL RW( LW ), S( NMAX ), SEP( NMAX )
REAL RW( LW ), S( NMAX ), SEP( NMAX ), SWORK( NMAX )
COMPLEX A( NMAX, NMAX ), B( NMAX, NMAX ),
$ C( NMAX, NMAX ), WORK( LW ), X( NMAX )
* ..
* .. External Subroutines ..
EXTERNAL CHKXER, CTREXC, CTRSEN, CTRSNA, CTRSYL
EXTERNAL CHKXER, CTREXC, CTRSEN, CTRSNA, CTRSYL, CTRSYL3
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -141,6 +141,43 @@
CALL CHKXER( 'CTRSYL', INFOT, NOUT, LERR, OK )
NT = NT + 8
*
* Test CTRSYL3
*
SRNAMT = 'CTRSYL3'
INFOT = 1
CALL CTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 2
CALL CTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 3
CALL CTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 4
CALL CTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 5
CALL CTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 7
CALL CTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 9
CALL CTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 11
CALL CTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
NT = NT + 8
*
* Test CTREXC
*
SRNAMT = 'CTREXC'

View File

@ -0,0 +1,294 @@
*> \brief \b CSYL01
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE CSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
*
* .. Scalar Arguments ..
* INTEGER KNT
* REAL THRESH
* ..
* .. Array Arguments ..
* INTEGER NFAIL( 3 ), NINFO( 2 )
* REAL RMAX( 2 )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CSYL01 tests CTRSYL and CTRSYL3, routines for solving the Sylvester matrix
*> equation
*>
*> op(A)*X + ISGN*X*op(B) = scale*C,
*>
*> where op(A) and op(B) are both upper triangular form, op() represents an
*> optional conjugate transpose, and ISGN can be -1 or +1. Scale is an output
*> less than or equal to 1, chosen to avoid overflow in X.
*>
*> The test code verifies that the following residual does not exceed
*> the provided threshold:
*>
*> norm(op(A)*X + ISGN*X*op(B) - scale*C) /
*> (EPS*max(norm(A),norm(B))*norm(X))
*>
*> This routine complements CGET35 by testing with larger,
*> random matrices, of which some require rescaling of X to avoid overflow.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] THRESH
*> \verbatim
*> THRESH is REAL
*> A test will count as "failed" if the residual, computed as
*> described above, exceeds THRESH.
*> \endverbatim
*>
*> \param[out] NFAIL
*> \verbatim
*> NFAIL is INTEGER array, dimension (3)
*> NFAIL(1) = No. of times residual CTRSYL exceeds threshold THRESH
*> NFAIL(2) = No. of times residual CTRSYL3 exceeds threshold THRESH
*> NFAIL(3) = No. of times CTRSYL3 and CTRSYL deviate
*> \endverbatim
*>
*> \param[out] RMAX
*> \verbatim
*> RMAX is DOUBLE PRECISION array, dimension (2)
*> RMAX(1) = Value of the largest test ratio of CTRSYL
*> RMAX(2) = Value of the largest test ratio of CTRSYL3
*> \endverbatim
*>
*> \param[out] NINFO
*> \verbatim
*> NINFO is INTEGER array, dimension (2)
*> NINFO(1) = No. of times CTRSYL where INFO is nonzero
*> NINFO(2) = No. of times CTRSYL3 where INFO is nonzero
*> \endverbatim
*>
*> \param[out] KNT
*> \verbatim
*> KNT is INTEGER
*> Total number of examples tested.
*> \endverbatim
*
* -- LAPACK test routine --
SUBROUTINE CSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
IMPLICIT NONE
*
* -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER KNT
REAL THRESH
* ..
* .. Array Arguments ..
INTEGER NFAIL( 3 ), NINFO( 2 )
REAL RMAX( 2 )
* ..
*
* =====================================================================
* ..
* .. Parameters ..
COMPLEX CONE
PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) )
REAL ONE, ZERO
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
INTEGER MAXM, MAXN, LDSWORK
PARAMETER ( MAXM = 101, MAXN = 138, LDSWORK = 18 )
* ..
* .. Local Scalars ..
CHARACTER TRANA, TRANB
INTEGER I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
$ KUA, KLB, KUB, M, N
REAL ANRM, BNRM, BIGNUM, EPS, RES, RES1,
$ SCALE, SCALE3, SMLNUM, TNRM, XNRM
COMPLEX RMUL
* ..
* .. Local Arrays ..
COMPLEX A( MAXM, MAXM ), B( MAXN, MAXN ),
$ C( MAXM, MAXN ), CC( MAXM, MAXN ),
$ X( MAXM, MAXN ),
$ DUML( MAXM ), DUMR( MAXN ),
$ D( MIN( MAXM, MAXN ) )
REAL SWORK( LDSWORK, 54 ), DUM( MAXN ), VM( 2 )
INTEGER ISEED( 4 ), IWORK( MAXM + MAXN + 2 )
* ..
* .. External Functions ..
LOGICAL SISNAN
REAL SLAMCH, CLANGE
EXTERNAL SISNAN, SLAMCH, CLANGE
* ..
* .. External Subroutines ..
EXTERNAL CLATMR, CLACPY, CGEMM, CTRSYL, CTRSYL3
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, REAL, MAX
* ..
* .. Executable Statements ..
*
* Get machine parameters
*
EPS = SLAMCH( 'P' )
SMLNUM = SLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
*
* Expect INFO = 0
VM( 1 ) = ONE
* Expect INFO = 1
VM( 2 ) = 0.5E+0
*
* Begin test loop
*
NINFO( 1 ) = 0
NINFO( 2 ) = 0
NFAIL( 1 ) = 0
NFAIL( 2 ) = 0
NFAIL( 3 ) = 0
RMAX( 1 ) = ZERO
RMAX( 2 ) = ZERO
KNT = 0
ISEED( 1 ) = 1
ISEED( 2 ) = 1
ISEED( 3 ) = 1
ISEED( 4 ) = 1
SCALE = ONE
SCALE3 = ONE
DO J = 1, 2
DO ISGN = -1, 1, 2
* Reset seed (overwritten by LATMR)
ISEED( 1 ) = 1
ISEED( 2 ) = 1
ISEED( 3 ) = 1
ISEED( 4 ) = 1
DO M = 32, MAXM, 23
KLA = 0
KUA = M - 1
CALL CLATMR( M, M, 'S', ISEED, 'N', D,
$ 6, ONE, CONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, KLA, KUA, ZERO,
$ ONE, 'NO', A, MAXM, IWORK,
$ IINFO )
DO I = 1, M
A( I, I ) = A( I, I ) * VM( J )
END DO
ANRM = CLANGE( 'M', M, M, A, MAXM, DUM )
DO N = 51, MAXN, 29
KLB = 0
KUB = N - 1
CALL CLATMR( N, N, 'S', ISEED, 'N', D,
$ 6, ONE, CONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, KLB, KUB, ZERO,
$ ONE, 'NO', B, MAXN, IWORK,
$ IINFO )
DO I = 1, N
B( I, I ) = B( I, I ) * VM ( J )
END DO
BNRM = CLANGE( 'M', N, N, B, MAXN, DUM )
TNRM = MAX( ANRM, BNRM )
CALL CLATMR( M, N, 'S', ISEED, 'N', D,
$ 6, ONE, CONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, M, N, ZERO, ONE,
$ 'NO', C, MAXM, IWORK, IINFO )
DO ITRANA = 1, 2
IF( ITRANA.EQ.1 )
$ TRANA = 'N'
IF( ITRANA.EQ.2 )
$ TRANA = 'C'
DO ITRANB = 1, 2
IF( ITRANB.EQ.1 )
$ TRANB = 'N'
IF( ITRANB.EQ.2 )
$ TRANB = 'C'
KNT = KNT + 1
*
CALL CLACPY( 'All', M, N, C, MAXM, X, MAXM)
CALL CLACPY( 'All', M, N, C, MAXM, CC, MAXM)
CALL CTRSYL( TRANA, TRANB, ISGN, M, N,
$ A, MAXM, B, MAXN, X, MAXM,
$ SCALE, IINFO )
IF( IINFO.NE.0 )
$ NINFO( 1 ) = NINFO( 1 ) + 1
XNRM = CLANGE( 'M', M, N, X, MAXM, DUM )
RMUL = CONE
IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
IF( XNRM.GT.BIGNUM / TNRM ) THEN
RMUL = CONE / MAX( XNRM, TNRM )
END IF
END IF
CALL CGEMM( TRANA, 'N', M, N, M, RMUL,
$ A, MAXM, X, MAXM, -SCALE*RMUL,
$ CC, MAXM )
CALL CGEMM( 'N', TRANB, M, N, N,
$ REAL( ISGN )*RMUL, X, MAXM, B,
$ MAXN, CONE, CC, MAXM )
RES1 = CLANGE( 'M', M, N, CC, MAXM, DUM )
RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
$ ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
IF( RES.GT.THRESH )
$ NFAIL( 1 ) = NFAIL( 1 ) + 1
IF( RES.GT.RMAX( 1 ) )
$ RMAX( 1 ) = RES
*
CALL CLACPY( 'All', M, N, C, MAXM, X, MAXM )
CALL CLACPY( 'All', M, N, C, MAXM, CC, MAXM )
CALL CTRSYL3( TRANA, TRANB, ISGN, M, N,
$ A, MAXM, B, MAXN, X, MAXM,
$ SCALE3, SWORK, LDSWORK, INFO)
IF( INFO.NE.0 )
$ NINFO( 2 ) = NINFO( 2 ) + 1
XNRM = CLANGE( 'M', M, N, X, MAXM, DUM )
RMUL = CONE
IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
IF( XNRM.GT.BIGNUM / TNRM ) THEN
RMUL = CONE / MAX( XNRM, TNRM )
END IF
END IF
CALL CGEMM( TRANA, 'N', M, N, M, RMUL,
$ A, MAXM, X, MAXM, -SCALE3*RMUL,
$ CC, MAXM )
CALL CGEMM( 'N', TRANB, M, N, N,
$ REAL( ISGN )*RMUL, X, MAXM, B,
$ MAXN, CONE, CC, MAXM )
RES1 = CLANGE( 'M', M, N, CC, MAXM, DUM )
RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
$ ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
* Verify that TRSYL3 only flushes if TRSYL flushes (but
* there may be cases where TRSYL3 avoid flushing).
IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR.
$ IINFO.NE.INFO ) THEN
NFAIL( 3 ) = NFAIL( 3 ) + 1
END IF
IF( RES.GT.THRESH .OR. SISNAN( RES ) )
$ NFAIL( 2 ) = NFAIL( 2 ) + 1
IF( RES.GT.RMAX( 2 ) )
$ RMAX( 2 ) = RES
END DO
END DO
END DO
END DO
END DO
END DO
*
RETURN
*
* End of CSYL01
*
END

View File

@ -90,21 +90,23 @@
LOGICAL OK
CHARACTER*3 PATH
INTEGER KLAEXC, KLALN2, KLANV2, KLAQTR, KLASY2, KTREXC,
$ KTRSEN, KTRSNA, KTRSYL, LLAEXC, LLALN2, LLANV2,
$ LLAQTR, LLASY2, LTREXC, LTRSYL, NLANV2, NLAQTR,
$ NLASY2, NTESTS, NTRSYL, KTGEXC, NTGEXC, LTGEXC
$ KTRSEN, KTRSNA, KTRSYL, KTRSYL3, LLAEXC,
$ LLALN2, LLANV2, LLAQTR, LLASY2, LTREXC, LTRSYL,
$ NLANV2, NLAQTR, NLASY2, NTESTS, NTRSYL, KTGEXC,
$ LTGEXC
DOUBLE PRECISION EPS, RLAEXC, RLALN2, RLANV2, RLAQTR, RLASY2,
$ RTREXC, RTRSYL, SFMIN, RTGEXC
$ RTREXC, SFMIN, RTGEXC
* ..
* .. Local Arrays ..
INTEGER LTRSEN( 3 ), LTRSNA( 3 ), NLAEXC( 2 ),
$ NLALN2( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
INTEGER FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
$ LTRSNA( 3 ), NLAEXC( 2 ), NLALN2( 2 ),
$ NTGEXC( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
$ NTRSNA( 3 )
DOUBLE PRECISION RTRSEN( 3 ), RTRSNA( 3 )
DOUBLE PRECISION RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
* ..
* .. External Subroutines ..
EXTERNAL DERREC, DGET31, DGET32, DGET33, DGET34, DGET35,
$ DGET36, DGET37, DGET38, DGET39, DGET40
$ DGET36, DGET37, DGET38, DGET39, DGET40, DSYL01
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
@ -153,10 +155,24 @@
WRITE( NOUT, FMT = 9996 )RLAEXC, LLAEXC, NLAEXC, KLAEXC
END IF
*
CALL DGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL )
IF( RTRSYL.GT.THRESH ) THEN
CALL DGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL )
IF( RTRSYL( 1 ).GT.THRESH ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9995 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
WRITE( NOUT, FMT = 9995 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
END IF
*
CALL DSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
IF( FTRSYL( 1 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
END IF
IF( FTRSYL( 2 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
END IF
IF( FTRSYL( 3 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
END IF
*
CALL DGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@ -227,7 +243,13 @@
9987 FORMAT( ' Routines pass computational tests if test ratio is les',
$ 's than', F8.2, / / )
9986 FORMAT( ' Error in DTGEXC: RMAX =', D12.3, / ' LMAX = ', I8, ' N',
$ 'INFO=', I8, ' KNT=', I8 )
$ 'INFO=', 2I8, ' KNT=', I8 )
9972 FORMAT( 'DTRSYL and DTRSYL3 compute an inconsistent result ',
$ 'factor in ', I8, ' tests.')
9971 FORMAT( 'Error in DTRSYL3: ', I8, ' tests fail the threshold.', /
$ 'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
9970 FORMAT( 'Error in DTRSYL: ', I8, ' tests fail the threshold.', /
$ 'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
*
* End of DCHKEC
*

View File

@ -23,7 +23,7 @@
*>
*> DERREC tests the error exits for the routines for eigen- condition
*> estimation for DOUBLE PRECISION matrices:
*> DTRSYL, DTREXC, DTRSNA and DTRSEN.
*> DTRSYL, DTRSYL3, DTREXC, DTRSNA and DTRSEN.
*> \endverbatim
*
* Arguments:
@ -82,7 +82,7 @@
$ WI( NMAX ), WORK( NMAX ), WR( NMAX )
* ..
* .. External Subroutines ..
EXTERNAL CHKXER, DTREXC, DTRSEN, DTRSNA, DTRSYL
EXTERNAL CHKXER, DTREXC, DTRSEN, DTRSNA, DTRSYL, DTRSYL3
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -141,6 +141,43 @@
CALL CHKXER( 'DTRSYL', INFOT, NOUT, LERR, OK )
NT = NT + 8
*
* Test DTRSYL3
*
SRNAMT = 'DTRSYL3'
INFOT = 1
CALL DTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 2
CALL DTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 3
CALL DTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 4
CALL DTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 5
CALL DTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 7
CALL DTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 9
CALL DTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 11
CALL DTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
NT = NT + 8
*
* Test DTREXC
*
SRNAMT = 'DTREXC'

View File

@ -0,0 +1,288 @@
*> \brief \b DSYL01
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
*
* .. Scalar Arguments ..
* INTEGER KNT
* DOUBLE PRECISION THRESH
* ..
* .. Array Arguments ..
* INTEGER NFAIL( 3 ), NINFO( 2 )
* DOUBLE PRECISION RMAX( 2 )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DSYL01 tests DTRSYL and DTRSYL3, routines for solving the Sylvester matrix
*> equation
*>
*> op(A)*X + ISGN*X*op(B) = scale*C,
*>
*> A and B are assumed to be in Schur canonical form, op() represents an
*> optional transpose, and ISGN can be -1 or +1. Scale is an output
*> less than or equal to 1, chosen to avoid overflow in X.
*>
*> The test code verifies that the following residual does not exceed
*> the provided threshold:
*>
*> norm(op(A)*X + ISGN*X*op(B) - scale*C) /
*> (EPS*max(norm(A),norm(B))*norm(X))
*>
*> This routine complements DGET35 by testing with larger,
*> random matrices, of which some require rescaling of X to avoid overflow.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] THRESH
*> \verbatim
*> THRESH is DOUBLE PRECISION
*> A test will count as "failed" if the residual, computed as
*> described above, exceeds THRESH.
*> \endverbatim
*>
*> \param[out] NFAIL
*> \verbatim
*> NFAIL is INTEGER array, dimension (3)
*> NFAIL(1) = No. of times residual DTRSYL exceeds threshold THRESH
*> NFAIL(2) = No. of times residual DTRSYL3 exceeds threshold THRESH
*> NFAIL(3) = No. of times DTRSYL3 and DTRSYL deviate
*> \endverbatim
*>
*> \param[out] RMAX
*> \verbatim
*> RMAX is DOUBLE PRECISION, dimension (2)
*> RMAX(1) = Value of the largest test ratio of DTRSYL
*> RMAX(2) = Value of the largest test ratio of DTRSYL3
*> \endverbatim
*>
*> \param[out] NINFO
*> \verbatim
*> NINFO is INTEGER array, dimension (2)
*> NINFO(1) = No. of times DTRSYL returns an expected INFO
*> NINFO(2) = No. of times DTRSYL3 returns an expected INFO
*> \endverbatim
*>
*> \param[out] KNT
*> \verbatim
*> KNT is INTEGER
*> Total number of examples tested.
*> \endverbatim
*
* -- LAPACK test routine --
SUBROUTINE DSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
IMPLICIT NONE
*
* -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER KNT
DOUBLE PRECISION THRESH
* ..
* .. Array Arguments ..
INTEGER NFAIL( 3 ), NINFO( 2 )
DOUBLE PRECISION RMAX( 2 )
* ..
*
* =====================================================================
* ..
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
INTEGER MAXM, MAXN, LDSWORK
PARAMETER ( MAXM = 245, MAXN = 192, LDSWORK = 36 )
* ..
* .. Local Scalars ..
CHARACTER TRANA, TRANB
INTEGER I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
$ KUA, KLB, KUB, LIWORK, M, N
DOUBLE PRECISION ANRM, BNRM, BIGNUM, EPS, RES, RES1, RMUL,
$ SCALE, SCALE3, SMLNUM, TNRM, XNRM
* ..
* .. Local Arrays ..
DOUBLE PRECISION A( MAXM, MAXM ), B( MAXN, MAXN ),
$ C( MAXM, MAXN ), CC( MAXM, MAXN ),
$ X( MAXM, MAXN ),
$ DUML( MAXM ), DUMR( MAXN ),
$ D( MAX( MAXM, MAXN ) ), DUM( MAXN ),
$ SWORK( LDSWORK, 126 ), VM( 2 )
INTEGER ISEED( 4 ), IWORK( MAXM + MAXN + 2 ), IDUM( 2 )
* ..
* .. External Functions ..
LOGICAL DISNAN
DOUBLE PRECISION DLAMCH, DLANGE
EXTERNAL DLAMCH, DLANGE
* ..
* .. External Subroutines ..
EXTERNAL DLATMR, DLACPY, DGEMM, DTRSYL, DTRSYL3
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, MAX
* ..
* .. Executable Statements ..
*
* Get machine parameters
*
EPS = DLAMCH( 'P' )
SMLNUM = DLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
*
VM( 1 ) = ONE
VM( 2 ) = 0.000001D+0
*
* Begin test loop
*
NINFO( 1 ) = 0
NINFO( 2 ) = 0
NFAIL( 1 ) = 0
NFAIL( 2 ) = 0
NFAIL( 3 ) = 0
RMAX( 1 ) = ZERO
RMAX( 2 ) = ZERO
KNT = 0
DO I = 1, 4
ISEED( I ) = 1
END DO
SCALE = ONE
SCALE3 = ONE
LIWORK = MAXM + MAXN + 2
DO J = 1, 2
DO ISGN = -1, 1, 2
* Reset seed (overwritten by LATMR)
DO I = 1, 4
ISEED( I ) = 1
END DO
DO M = 32, MAXM, 71
KLA = 0
KUA = M - 1
CALL DLATMR( M, M, 'S', ISEED, 'N', D,
$ 6, ONE, ONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, KLA, KUA, ZERO,
$ ONE, 'NO', A, MAXM, IWORK, IINFO )
DO I = 1, M
A( I, I ) = A( I, I ) * VM( J )
END DO
ANRM = DLANGE( 'M', M, M, A, MAXM, DUM )
DO N = 51, MAXN, 47
KLB = 0
KUB = N - 1
CALL DLATMR( N, N, 'S', ISEED, 'N', D,
$ 6, ONE, ONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, KLB, KUB, ZERO,
$ ONE, 'NO', B, MAXN, IWORK, IINFO )
BNRM = DLANGE( 'M', N, N, B, MAXN, DUM )
TNRM = MAX( ANRM, BNRM )
CALL DLATMR( M, N, 'S', ISEED, 'N', D,
$ 6, ONE, ONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, M, N, ZERO, ONE,
$ 'NO', C, MAXM, IWORK, IINFO )
DO ITRANA = 1, 2
IF( ITRANA.EQ.1 ) THEN
TRANA = 'N'
END IF
IF( ITRANA.EQ.2 ) THEN
TRANA = 'T'
END IF
DO ITRANB = 1, 2
IF( ITRANB.EQ.1 ) THEN
TRANB = 'N'
END IF
IF( ITRANB.EQ.2 ) THEN
TRANB = 'T'
END IF
KNT = KNT + 1
*
CALL DLACPY( 'All', M, N, C, MAXM, X, MAXM)
CALL DLACPY( 'All', M, N, C, MAXM, CC, MAXM)
CALL DTRSYL( TRANA, TRANB, ISGN, M, N,
$ A, MAXM, B, MAXN, X, MAXM,
$ SCALE, IINFO )
IF( IINFO.NE.0 )
$ NINFO( 1 ) = NINFO( 1 ) + 1
XNRM = DLANGE( 'M', M, N, X, MAXM, DUM )
RMUL = ONE
IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
IF( XNRM.GT.BIGNUM / TNRM ) THEN
RMUL = ONE / MAX( XNRM, TNRM )
END IF
END IF
CALL DGEMM( TRANA, 'N', M, N, M, RMUL,
$ A, MAXM, X, MAXM, -SCALE*RMUL,
$ CC, MAXM )
CALL DGEMM( 'N', TRANB, M, N, N,
$ DBLE( ISGN )*RMUL, X, MAXM, B,
$ MAXN, ONE, CC, MAXM )
RES1 = DLANGE( 'M', M, N, CC, MAXM, DUM )
RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
$ ( ( RMUL*TNRM )*EPS )*XNRM )
IF( RES.GT.THRESH )
$ NFAIL( 1 ) = NFAIL( 1 ) + 1
IF( RES.GT.RMAX( 1 ) )
$ RMAX( 1 ) = RES
*
CALL DLACPY( 'All', M, N, C, MAXM, X, MAXM )
CALL DLACPY( 'All', M, N, C, MAXM, CC, MAXM )
CALL DTRSYL3( TRANA, TRANB, ISGN, M, N,
$ A, MAXM, B, MAXN, X, MAXM,
$ SCALE3, IWORK, LIWORK,
$ SWORK, LDSWORK, INFO)
IF( INFO.NE.0 )
$ NINFO( 2 ) = NINFO( 2 ) + 1
XNRM = DLANGE( 'M', M, N, X, MAXM, DUM )
RMUL = ONE
IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
IF( XNRM.GT.BIGNUM / TNRM ) THEN
RMUL = ONE / MAX( XNRM, TNRM )
END IF
END IF
CALL DGEMM( TRANA, 'N', M, N, M, RMUL,
$ A, MAXM, X, MAXM, -SCALE3*RMUL,
$ CC, MAXM )
CALL DGEMM( 'N', TRANB, M, N, N,
$ DBLE( ISGN )*RMUL, X, MAXM, B,
$ MAXN, ONE, CC, MAXM )
RES1 = DLANGE( 'M', M, N, CC, MAXM, DUM )
RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
$ ( ( RMUL*TNRM )*EPS )*XNRM )
* Verify that TRSYL3 only flushes if TRSYL flushes (but
* there may be cases where TRSYL3 avoid flushing).
IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR.
$ IINFO.NE.INFO ) THEN
NFAIL( 3 ) = NFAIL( 3 ) + 1
END IF
IF( RES.GT.THRESH .OR. DISNAN( RES ) )
$ NFAIL( 2 ) = NFAIL( 2 ) + 1
IF( RES.GT.RMAX( 2 ) )
$ RMAX( 2 ) = RES
END DO
END DO
END DO
END DO
END DO
END DO
*
RETURN
*
* End of DSYL01
*
END

View File

@ -90,21 +90,23 @@
LOGICAL OK
CHARACTER*3 PATH
INTEGER KLAEXC, KLALN2, KLANV2, KLAQTR, KLASY2, KTREXC,
$ KTRSEN, KTRSNA, KTRSYL, LLAEXC, LLALN2, LLANV2,
$ LLAQTR, LLASY2, LTREXC, LTRSYL, NLANV2, NLAQTR,
$ NLASY2, NTESTS, NTRSYL, KTGEXC, NTGEXC, LTGEXC
$ KTRSEN, KTRSNA, KTRSYL, KTRSYL3, LLAEXC,
$ LLALN2, LLANV2, LLAQTR, LLASY2, LTREXC, LTRSYL,
$ NLANV2, NLAQTR, NLASY2, NTESTS, NTRSYL, KTGEXC,
$ LTGEXC
REAL EPS, RLAEXC, RLALN2, RLANV2, RLAQTR, RLASY2,
$ RTREXC, RTRSYL, SFMIN, RTGEXC
$ RTREXC, SFMIN, RTGEXC
* ..
* .. Local Arrays ..
INTEGER LTRSEN( 3 ), LTRSNA( 3 ), NLAEXC( 2 ),
$ NLALN2( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
INTEGER FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
$ LTRSNA( 3 ), NLAEXC( 2 ), NLALN2( 2 ),
$ NTGEXC( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
$ NTRSNA( 3 )
REAL RTRSEN( 3 ), RTRSNA( 3 )
REAL RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
* ..
* .. External Subroutines ..
EXTERNAL SERREC, SGET31, SGET32, SGET33, SGET34, SGET35,
$ SGET36, SGET37, SGET38, SGET39, SGET40
$ SGET36, SGET37, SGET38, SGET39, SGET40, SSYL01
* ..
* .. External Functions ..
REAL SLAMCH
@ -153,10 +155,24 @@
WRITE( NOUT, FMT = 9996 )RLAEXC, LLAEXC, NLAEXC, KLAEXC
END IF
*
CALL SGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL )
IF( RTRSYL.GT.THRESH ) THEN
CALL SGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL )
IF( RTRSYL( 1 ).GT.THRESH ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9995 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
WRITE( NOUT, FMT = 9995 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
END IF
*
CALL SSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
IF( FTRSYL( 1 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
END IF
IF( FTRSYL( 2 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
END IF
IF( FTRSYL( 3 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
END IF
*
CALL SGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@ -227,7 +243,13 @@
9987 FORMAT( ' Routines pass computational tests if test ratio is les',
$ 's than', F8.2, / / )
9986 FORMAT( ' Error in STGEXC: RMAX =', E12.3, / ' LMAX = ', I8, ' N',
$ 'INFO=', I8, ' KNT=', I8 )
$ 'INFO=', 2I8, ' KNT=', I8 )
9972 FORMAT( 'STRSYL and STRSYL3 compute an inconsistent result ',
$ 'factor in ', I8, ' tests.')
9971 FORMAT( 'Error in STRSYL3: ', I8, ' tests fail the threshold.', /
$ 'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
9970 FORMAT( 'Error in STRSYL: ', I8, ' tests fail the threshold.', /
$ 'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
*
* End of SCHKEC
*

View File

@ -23,7 +23,7 @@
*>
*> SERREC tests the error exits for the routines for eigen- condition
*> estimation for REAL matrices:
*> STRSYL, STREXC, STRSNA and STRSEN.
*> STRSYL, STRSYL3, STREXC, STRSNA and STRSEN.
*> \endverbatim
*
* Arguments:
@ -82,7 +82,7 @@
$ WI( NMAX ), WORK( NMAX ), WR( NMAX )
* ..
* .. External Subroutines ..
EXTERNAL CHKXER, STREXC, STRSEN, STRSNA, STRSYL
EXTERNAL CHKXER, STREXC, STRSEN, STRSNA, STRSYL, STRSYL3
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -141,6 +141,43 @@
CALL CHKXER( 'STRSYL', INFOT, NOUT, LERR, OK )
NT = NT + 8
*
* Test STRSYL3
*
SRNAMT = 'STRSYL3'
INFOT = 1
CALL STRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 2
CALL STRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 3
CALL STRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 4
CALL STRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 5
CALL STRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 7
CALL STRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 9
CALL STRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 11
CALL STRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
$ IWORK, NMAX, WORK, NMAX, INFO )
CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
NT = NT + 8
*
* Test STREXC
*
SRNAMT = 'STREXC'

View File

@ -0,0 +1,288 @@
*> \brief \b SSYL01
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE SSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
*
* .. Scalar Arguments ..
* INTEGER KNT
* REAL THRESH
* ..
* .. Array Arguments ..
* INTEGER NFAIL( 3 ), NINFO( 2 )
* REAL RMAX( 2 )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SSYL01 tests STRSYL and STRSYL3, routines for solving the Sylvester matrix
*> equation
*>
*> op(A)*X + ISGN*X*op(B) = scale*C,
*>
*> A and B are assumed to be in Schur canonical form, op() represents an
*> optional transpose, and ISGN can be -1 or +1. Scale is an output
*> less than or equal to 1, chosen to avoid overflow in X.
*>
*> The test code verifies that the following residual does not exceed
*> the provided threshold:
*>
*> norm(op(A)*X + ISGN*X*op(B) - scale*C) /
*> (EPS*max(norm(A),norm(B))*norm(X))
*>
*> This routine complements SGET35 by testing with larger,
*> random matrices, of which some require rescaling of X to avoid overflow.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] THRESH
*> \verbatim
*> THRESH is REAL
*> A test will count as "failed" if the residual, computed as
*> described above, exceeds THRESH.
*> \endverbatim
*>
*> \param[out] NFAIL
*> \verbatim
*> NFAIL is INTEGER array, dimension (3)
*> NFAIL(1) = No. of times residual STRSYL exceeds threshold THRESH
*> NFAIL(2) = No. of times residual STRSYL3 exceeds threshold THRESH
*> NFAIL(3) = No. of times STRSYL3 and STRSYL deviate
*> \endverbatim
*>
*> \param[out] RMAX
*> \verbatim
*> RMAX is REAL, dimension (2)
*> RMAX(1) = Value of the largest test ratio of STRSYL
*> RMAX(2) = Value of the largest test ratio of STRSYL3
*> \endverbatim
*>
*> \param[out] NINFO
*> \verbatim
*> NINFO is INTEGER array, dimension (2)
*> NINFO(1) = No. of times STRSYL returns an expected INFO
*> NINFO(2) = No. of times STRSYL3 returns an expected INFO
*> \endverbatim
*>
*> \param[out] KNT
*> \verbatim
*> KNT is INTEGER
*> Total number of examples tested.
*> \endverbatim
*
* -- LAPACK test routine --
SUBROUTINE SSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
IMPLICIT NONE
*
* -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER KNT
REAL THRESH
* ..
* .. Array Arguments ..
INTEGER NFAIL( 3 ), NINFO( 2 )
REAL RMAX( 2 )
* ..
*
* =====================================================================
* ..
* .. Parameters ..
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
INTEGER MAXM, MAXN, LDSWORK
PARAMETER ( MAXM = 101, MAXN = 138, LDSWORK = 18 )
* ..
* .. Local Scalars ..
CHARACTER TRANA, TRANB
INTEGER I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
$ KUA, KLB, KUB, LIWORK, M, N
REAL ANRM, BNRM, BIGNUM, EPS, RES, RES1, RMUL,
$ SCALE, SCALE3, SMLNUM, TNRM, XNRM
* ..
* .. Local Arrays ..
REAL A( MAXM, MAXM ), B( MAXN, MAXN ),
$ C( MAXM, MAXN ), CC( MAXM, MAXN ),
$ X( MAXM, MAXN ),
$ DUML( MAXM ), DUMR( MAXN ),
$ D( MAX( MAXM, MAXN ) ), DUM( MAXN ),
$ SWORK( LDSWORK, 54 ), VM( 2 )
INTEGER ISEED( 4 ), IWORK( MAXM + MAXN + 2 ), IDUM( 2 )
* ..
* .. External Functions ..
LOGICAL SISNAN
REAL SLAMCH, SLANGE
EXTERNAL SISNAN, SLAMCH, SLANGE
* ..
* .. External Subroutines ..
EXTERNAL SLATMR, SLACPY, SGEMM, STRSYL, STRSYL3
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, REAL, MAX
* ..
* .. Executable Statements ..
*
* Get machine parameters
*
EPS = SLAMCH( 'P' )
SMLNUM = SLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
*
VM( 1 ) = ONE
VM( 2 ) = 0.05E+0
*
* Begin test loop
*
NINFO( 1 ) = 0
NINFO( 2 ) = 0
NFAIL( 1 ) = 0
NFAIL( 2 ) = 0
NFAIL( 3 ) = 0
RMAX( 1 ) = ZERO
RMAX( 2 ) = ZERO
KNT = 0
DO I = 1, 4
ISEED( I ) = 1
END DO
SCALE = ONE
SCALE3 = ONE
LIWORK = MAXM + MAXN + 2
DO J = 1, 2
DO ISGN = -1, 1, 2
* Reset seed (overwritten by LATMR)
DO I = 1, 4
ISEED( I ) = 1
END DO
DO M = 32, MAXM, 71
KLA = 0
KUA = M - 1
CALL SLATMR( M, M, 'S', ISEED, 'N', D,
$ 6, ONE, ONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, KLA, KUA, ZERO,
$ ONE, 'NO', A, MAXM, IWORK, IINFO )
DO I = 1, M
A( I, I ) = A( I, I ) * VM( J )
END DO
ANRM = SLANGE( 'M', M, M, A, MAXM, DUM )
DO N = 51, MAXN, 47
KLB = 0
KUB = N - 1
CALL SLATMR( N, N, 'S', ISEED, 'N', D,
$ 6, ONE, ONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, KLB, KUB, ZERO,
$ ONE, 'NO', B, MAXN, IWORK, IINFO )
BNRM = SLANGE( 'M', N, N, B, MAXN, DUM )
TNRM = MAX( ANRM, BNRM )
CALL SLATMR( M, N, 'S', ISEED, 'N', D,
$ 6, ONE, ONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, M, N, ZERO, ONE,
$ 'NO', C, MAXM, IWORK, IINFO )
DO ITRANA = 1, 2
IF( ITRANA.EQ.1 ) THEN
TRANA = 'N'
END IF
IF( ITRANA.EQ.2 ) THEN
TRANA = 'T'
END IF
DO ITRANB = 1, 2
IF( ITRANB.EQ.1 ) THEN
TRANB = 'N'
END IF
IF( ITRANB.EQ.2 ) THEN
TRANB = 'T'
END IF
KNT = KNT + 1
*
CALL SLACPY( 'All', M, N, C, MAXM, X, MAXM)
CALL SLACPY( 'All', M, N, C, MAXM, CC, MAXM)
CALL STRSYL( TRANA, TRANB, ISGN, M, N,
$ A, MAXM, B, MAXN, X, MAXM,
$ SCALE, IINFO )
IF( IINFO.NE.0 )
$ NINFO( 1 ) = NINFO( 1 ) + 1
XNRM = SLANGE( 'M', M, N, X, MAXM, DUM )
RMUL = ONE
IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
IF( XNRM.GT.BIGNUM / TNRM ) THEN
RMUL = ONE / MAX( XNRM, TNRM )
END IF
END IF
CALL SGEMM( TRANA, 'N', M, N, M, RMUL,
$ A, MAXM, X, MAXM, -SCALE*RMUL,
$ C, MAXM )
CALL SGEMM( 'N', TRANB, M, N, N,
$ REAL( ISGN )*RMUL, X, MAXM, B,
$ MAXN, ONE, C, MAXM )
RES1 = SLANGE( 'M', M, N, C, MAXM, DUM )
RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
$ ( ( RMUL*TNRM )*EPS )*XNRM )
IF( RES.GT.THRESH )
$ NFAIL( 1 ) = NFAIL( 1 ) + 1
IF( RES.GT.RMAX( 1 ) )
$ RMAX( 1 ) = RES
*
CALL SLACPY( 'All', M, N, C, MAXM, X, MAXM )
CALL SLACPY( 'All', M, N, C, MAXM, CC, MAXM )
CALL STRSYL3( TRANA, TRANB, ISGN, M, N,
$ A, MAXM, B, MAXN, X, MAXM,
$ SCALE3, IWORK, LIWORK,
$ SWORK, LDSWORK, INFO)
IF( INFO.NE.0 )
$ NINFO( 2 ) = NINFO( 2 ) + 1
XNRM = SLANGE( 'M', M, N, X, MAXM, DUM )
RMUL = ONE
IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
IF( XNRM.GT.BIGNUM / TNRM ) THEN
RMUL = ONE / MAX( XNRM, TNRM )
END IF
END IF
CALL SGEMM( TRANA, 'N', M, N, M, RMUL,
$ A, MAXM, X, MAXM, -SCALE3*RMUL,
$ CC, MAXM )
CALL SGEMM( 'N', TRANB, M, N, N,
$ REAL( ISGN )*RMUL, X, MAXM, B,
$ MAXN, ONE, CC, MAXM )
RES1 = SLANGE( 'M', M, N, CC, MAXM, DUM )
RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
$ ( ( RMUL*TNRM )*EPS )*XNRM )
* Verify that TRSYL3 only flushes if TRSYL flushes (but
* there may be cases where TRSYL3 avoid flushing).
IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR.
$ IINFO.NE.INFO ) THEN
NFAIL( 3 ) = NFAIL( 3 ) + 1
END IF
IF( RES.GT.THRESH .OR. SISNAN( RES ) )
$ NFAIL( 2 ) = NFAIL( 2 ) + 1
IF( RES.GT.RMAX( 2 ) )
$ RMAX( 2 ) = RES
END DO
END DO
END DO
END DO
END DO
END DO
*
RETURN
*
* End of SSYL01
*
END

View File

@ -88,17 +88,17 @@
* .. Local Scalars ..
LOGICAL OK
CHARACTER*3 PATH
INTEGER KTREXC, KTRSEN, KTRSNA, KTRSYL, LTREXC, LTRSYL,
$ NTESTS, NTREXC, NTRSYL
DOUBLE PRECISION EPS, RTREXC, RTRSYL, SFMIN
INTEGER KTREXC, KTRSEN, KTRSNA, KTRSYL, KTRSYL3,
$ LTREXC, LTRSYL, NTESTS, NTREXC, NTRSYL
DOUBLE PRECISION EPS, RTREXC, SFMIN
* ..
* .. Local Arrays ..
INTEGER LTRSEN( 3 ), LTRSNA( 3 ), NTRSEN( 3 ),
$ NTRSNA( 3 )
DOUBLE PRECISION RTRSEN( 3 ), RTRSNA( 3 )
INTEGER FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
$ LTRSNA( 3 ), NTRSEN( 3 ), NTRSNA( 3 )
DOUBLE PRECISION RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
* ..
* .. External Subroutines ..
EXTERNAL ZERREC, ZGET35, ZGET36, ZGET37, ZGET38
EXTERNAL ZERREC, ZGET35, ZGET36, ZGET37, ZGET38, ZSYL01
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
@ -120,10 +120,24 @@
$ CALL ZERREC( PATH, NOUT )
*
OK = .TRUE.
CALL ZGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL, NIN )
IF( RTRSYL.GT.THRESH ) THEN
CALL ZGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL, NIN )
IF( RTRSYL( 1 ).GT.THRESH ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9999 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
WRITE( NOUT, FMT = 9999 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
END IF
*
CALL ZSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
IF( FTRSYL( 1 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
END IF
IF( FTRSYL( 2 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
END IF
IF( FTRSYL( 3 ).GT.0 ) THEN
OK = .FALSE.
WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
END IF
*
CALL ZGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@ -148,7 +162,7 @@
WRITE( NOUT, FMT = 9996 )RTRSEN, LTRSEN, NTRSEN, KTRSEN
END IF
*
NTESTS = KTRSYL + KTREXC + KTRSNA + KTRSEN
NTESTS = KTRSYL + KTRSYL3 + KTREXC + KTRSNA + KTRSEN
IF( OK )
$ WRITE( NOUT, FMT = 9995 )PATH, NTESTS
*
@ -169,6 +183,12 @@
$ / ' Safe minimum (SFMIN) = ', D16.6, / )
9992 FORMAT( ' Routines pass computational tests if test ratio is ',
$ 'less than', F8.2, / / )
9970 FORMAT( 'Error in ZTRSYL: ', I8, ' tests fail the threshold.', /
$ 'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
9971 FORMAT( 'Error in ZTRSYL3: ', I8, ' tests fail the threshold.', /
$ 'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
9972 FORMAT( 'ZTRSYL and ZTRSYL3 compute an inconsistent scale ',
$ 'factor in ', I8, ' tests.')
RETURN
*
* End of ZCHKEC

View File

@ -23,7 +23,7 @@
*>
*> ZERREC tests the error exits for the routines for eigen- condition
*> estimation for DOUBLE PRECISION matrices:
*> ZTRSYL, ZTREXC, ZTRSNA and ZTRSEN.
*> ZTRSYL, ZTRSYL3, ZTREXC, ZTRSNA and ZTRSEN.
*> \endverbatim
*
* Arguments:
@ -77,7 +77,7 @@
* ..
* .. Local Arrays ..
LOGICAL SEL( NMAX )
DOUBLE PRECISION RW( LW ), S( NMAX ), SEP( NMAX )
DOUBLE PRECISION RW( LW ), S( NMAX ), SEP( NMAX ), SWORK( NMAX )
COMPLEX*16 A( NMAX, NMAX ), B( NMAX, NMAX ),
$ C( NMAX, NMAX ), WORK( LW ), X( NMAX )
* ..
@ -141,6 +141,43 @@
CALL CHKXER( 'ZTRSYL', INFOT, NOUT, LERR, OK )
NT = NT + 8
*
* Test ZTRSYL3
*
SRNAMT = 'ZTRSYL3'
INFOT = 1
CALL ZTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 2
CALL ZTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 3
CALL ZTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 4
CALL ZTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 5
CALL ZTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 7
CALL ZTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 9
CALL ZTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
INFOT = 11
CALL ZTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
$ SWORK, NMAX, INFO )
CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
NT = NT + 8
*
* Test ZTREXC
*
SRNAMT = 'ZTREXC'

View File

@ -0,0 +1,294 @@
*> \brief \b ZSYL01
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE ZSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
*
* .. Scalar Arguments ..
* INTEGER KNT
* DOUBLE PRECISION THRESH
* ..
* .. Array Arguments ..
* INTEGER NFAIL( 3 ), NINFO( 2 )
* DOUBLE PRECISION RMAX( 2 )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> ZSYL01 tests ZTRSYL and ZTRSYL3, routines for solving the Sylvester matrix
*> equation
*>
*> op(A)*X + ISGN*X*op(B) = scale*C,
*>
*> where op(A) and op(B) are both upper triangular form, op() represents an
*> optional conjugate transpose, and ISGN can be -1 or +1. Scale is an output
*> less than or equal to 1, chosen to avoid overflow in X.
*>
*> The test code verifies that the following residual does not exceed
*> the provided threshold:
*>
*> norm(op(A)*X + ISGN*X*op(B) - scale*C) /
*> (EPS*max(norm(A),norm(B))*norm(X))
*>
*> This routine complements ZGET35 by testing with larger,
*> random matrices, of which some require rescaling of X to avoid overflow.
*>
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] THRESH
*> \verbatim
*> THRESH is DOUBLE PRECISION
*> A test will count as "failed" if the residual, computed as
*> described above, exceeds THRESH.
*> \endverbatim
*>
*> \param[out] NFAIL
*> \verbatim
*> NFAIL is INTEGER array, dimension (3)
*> NFAIL(1) = No. of times residual ZTRSYL exceeds threshold THRESH
*> NFAIL(2) = No. of times residual ZTRSYL3 exceeds threshold THRESH
*> NFAIL(3) = No. of times ZTRSYL3 and ZTRSYL deviate
*> \endverbatim
*>
*> \param[out] RMAX
*> \verbatim
*> RMAX is DOUBLE PRECISION array, dimension (2)
*> RMAX(1) = Value of the largest test ratio of ZTRSYL
*> RMAX(2) = Value of the largest test ratio of ZTRSYL3
*> \endverbatim
*>
*> \param[out] NINFO
*> \verbatim
*> NINFO is INTEGER array, dimension (2)
*> NINFO(1) = No. of times ZTRSYL returns an expected INFO
*> NINFO(2) = No. of times ZTRSYL3 returns an expected INFO
*> \endverbatim
*>
*> \param[out] KNT
*> \verbatim
*> KNT is INTEGER
*> Total number of examples tested.
*> \endverbatim
*
* -- LAPACK test routine --
SUBROUTINE ZSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
IMPLICIT NONE
*
* -- LAPACK test routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER KNT
DOUBLE PRECISION THRESH
* ..
* .. Array Arguments ..
INTEGER NFAIL( 3 ), NINFO( 2 )
DOUBLE PRECISION RMAX( 2 )
* ..
*
* =====================================================================
* ..
* .. Parameters ..
COMPLEX*16 CONE
PARAMETER ( CONE = ( 1.0D0, 0.0D+0 ) )
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
INTEGER MAXM, MAXN, LDSWORK
PARAMETER ( MAXM = 185, MAXN = 192, LDSWORK = 36 )
* ..
* .. Local Scalars ..
CHARACTER TRANA, TRANB
INTEGER I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
$ KUA, KLB, KUB, M, N
DOUBLE PRECISION ANRM, BNRM, BIGNUM, EPS, RES, RES1,
$ SCALE, SCALE3, SMLNUM, TNRM, XNRM
COMPLEX*16 RMUL
* ..
* .. Local Arrays ..
COMPLEX*16 A( MAXM, MAXM ), B( MAXN, MAXN ),
$ C( MAXM, MAXN ), CC( MAXM, MAXN ),
$ X( MAXM, MAXN ),
$ DUML( MAXM ), DUMR( MAXN ),
$ D( MIN( MAXM, MAXN ) )
DOUBLE PRECISION SWORK( LDSWORK, 103 ), DUM( MAXN ), VM( 2 )
INTEGER ISEED( 4 ), IWORK( MAXM + MAXN + 2 )
* ..
* .. External Functions ..
LOGICAL DISNAN
DOUBLE PRECISION DLAMCH, ZLANGE
EXTERNAL DISNAN, DLAMCH, ZLANGE
* ..
* .. External Subroutines ..
EXTERNAL ZLATMR, ZLACPY, ZGEMM, ZTRSYL, ZTRSYL3
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, MAX, SQRT
* ..
* .. Executable Statements ..
*
* Get machine parameters
*
EPS = DLAMCH( 'P' )
SMLNUM = DLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
*
* Expect INFO = 0
VM( 1 ) = ONE
* Expect INFO = 1
VM( 2 ) = 0.05D+0
*
* Begin test loop
*
NINFO( 1 ) = 0
NINFO( 2 ) = 0
NFAIL( 1 ) = 0
NFAIL( 2 ) = 0
NFAIL( 3 ) = 0
RMAX( 1 ) = ZERO
RMAX( 2 ) = ZERO
KNT = 0
ISEED( 1 ) = 1
ISEED( 2 ) = 1
ISEED( 3 ) = 1
ISEED( 4 ) = 1
SCALE = ONE
SCALE3 = ONE
DO J = 1, 2
DO ISGN = -1, 1, 2
* Reset seed (overwritten by LATMR)
ISEED( 1 ) = 1
ISEED( 2 ) = 1
ISEED( 3 ) = 1
ISEED( 4 ) = 1
DO M = 32, MAXM, 51
KLA = 0
KUA = M - 1
CALL ZLATMR( M, M, 'S', ISEED, 'N', D,
$ 6, ONE, CONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, KLA, KUA, ZERO,
$ ONE, 'NO', A, MAXM, IWORK,
$ IINFO )
DO I = 1, M
A( I, I ) = A( I, I ) * VM( J )
END DO
ANRM = ZLANGE( 'M', M, M, A, MAXM, DUM )
DO N = 51, MAXN, 47
KLB = 0
KUB = N - 1
CALL ZLATMR( N, N, 'S', ISEED, 'N', D,
$ 6, ONE, CONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, KLB, KUB, ZERO,
$ ONE, 'NO', B, MAXN, IWORK,
$ IINFO )
DO I = 1, N
B( I, I ) = B( I, I ) * VM ( J )
END DO
BNRM = ZLANGE( 'M', N, N, B, MAXN, DUM )
TNRM = MAX( ANRM, BNRM )
CALL ZLATMR( M, N, 'S', ISEED, 'N', D,
$ 6, ONE, CONE, 'T', 'N',
$ DUML, 1, ONE, DUMR, 1, ONE,
$ 'N', IWORK, M, N, ZERO, ONE,
$ 'NO', C, MAXM, IWORK, IINFO )
DO ITRANA = 1, 2
IF( ITRANA.EQ.1 )
$ TRANA = 'N'
IF( ITRANA.EQ.2 )
$ TRANA = 'C'
DO ITRANB = 1, 2
IF( ITRANB.EQ.1 )
$ TRANB = 'N'
IF( ITRANB.EQ.2 )
$ TRANB = 'C'
KNT = KNT + 1
*
CALL ZLACPY( 'All', M, N, C, MAXM, X, MAXM)
CALL ZLACPY( 'All', M, N, C, MAXM, CC, MAXM)
CALL ZTRSYL( TRANA, TRANB, ISGN, M, N,
$ A, MAXM, B, MAXN, X, MAXM,
$ SCALE, IINFO )
IF( IINFO.NE.0 )
$ NINFO( 1 ) = NINFO( 1 ) + 1
XNRM = ZLANGE( 'M', M, N, X, MAXM, DUM )
RMUL = CONE
IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
IF( XNRM.GT.BIGNUM / TNRM ) THEN
RMUL = CONE / MAX( XNRM, TNRM )
END IF
END IF
CALL ZGEMM( TRANA, 'N', M, N, M, RMUL,
$ A, MAXM, X, MAXM, -SCALE*RMUL,
$ CC, MAXM )
CALL ZGEMM( 'N', TRANB, M, N, N,
$ DBLE( ISGN )*RMUL, X, MAXM, B,
$ MAXN, CONE, CC, MAXM )
RES1 = ZLANGE( 'M', M, N, CC, MAXM, DUM )
RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
$ ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
IF( RES.GT.THRESH )
$ NFAIL( 1 ) = NFAIL( 1 ) + 1
IF( RES.GT.RMAX( 1 ) )
$ RMAX( 1 ) = RES
*
CALL ZLACPY( 'All', M, N, C, MAXM, X, MAXM )
CALL ZLACPY( 'All', M, N, C, MAXM, CC, MAXM )
CALL ZTRSYL3( TRANA, TRANB, ISGN, M, N,
$ A, MAXM, B, MAXN, X, MAXM,
$ SCALE3, SWORK, LDSWORK, INFO)
IF( INFO.NE.0 )
$ NINFO( 2 ) = NINFO( 2 ) + 1
XNRM = ZLANGE( 'M', M, N, X, MAXM, DUM )
RMUL = CONE
IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
IF( XNRM.GT.BIGNUM / TNRM ) THEN
RMUL = CONE / MAX( XNRM, TNRM )
END IF
END IF
CALL ZGEMM( TRANA, 'N', M, N, M, RMUL,
$ A, MAXM, X, MAXM, -SCALE3*RMUL,
$ CC, MAXM )
CALL ZGEMM( 'N', TRANB, M, N, N,
$ DBLE( ISGN )*RMUL, X, MAXM, B,
$ MAXN, CONE, CC, MAXM )
RES1 = ZLANGE( 'M', M, N, CC, MAXM, DUM )
RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
$ ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
* Verify that TRSYL3 only flushes if TRSYL flushes (but
* there may be cases where TRSYL3 avoid flushing).
IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR.
$ IINFO.NE.INFO ) THEN
NFAIL( 3 ) = NFAIL( 3 ) + 1
END IF
IF( RES.GT.THRESH .OR. DISNAN( RES ) )
$ NFAIL( 2 ) = NFAIL( 2 ) + 1
IF( RES.GT.RMAX( 2 ) )
$ RMAX( 2 ) = RES
END DO
END DO
END DO
END DO
END DO
END DO
*
RETURN
*
* End of ZSYL01
*
END

View File

@ -31,7 +31,7 @@
*>
*> \verbatim
*>
*> CCHKTR tests CTRTRI, -TRS, -RFS, and -CON, and CLATRS
*> CCHKTR tests CTRTRI, -TRS, -RFS, and -CON, and CLATRS(3)
*> \endverbatim
*
* Arguments:
@ -184,7 +184,7 @@
INTEGER NTYPE1, NTYPES
PARAMETER ( NTYPE1 = 10, NTYPES = 18 )
INTEGER NTESTS
PARAMETER ( NTESTS = 9 )
PARAMETER ( NTESTS = 10 )
INTEGER NTRAN
PARAMETER ( NTRAN = 3 )
REAL ONE, ZERO
@ -195,13 +195,13 @@
CHARACTER*3 PATH
INTEGER I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
$ IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
REAL AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
$ RCONDO, SCALE
REAL AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
$ RCONDI, RCONDO, RES, SCALE, SLAMCH
* ..
* .. Local Arrays ..
CHARACTER TRANSS( NTRAN ), UPLOS( 2 )
INTEGER ISEED( 4 ), ISEEDY( 4 )
REAL RESULT( NTESTS )
REAL RESULT( NTESTS ), SCALE3( 2 )
* ..
* .. External Functions ..
LOGICAL LSAME
@ -210,9 +210,9 @@
* ..
* .. External Subroutines ..
EXTERNAL ALAERH, ALAHD, ALASUM, CCOPY, CERRTR, CGET04,
$ CLACPY, CLARHS, CLATRS, CLATTR, CTRCON, CTRRFS,
$ CTRT01, CTRT02, CTRT03, CTRT05, CTRT06, CTRTRI,
$ CTRTRS, XLAENV
$ CLACPY, CLARHS, CLATRS, CLATRS3, CLATTR,
$ CSSCAL, CTRCON, CTRRFS, CTRT01, CTRT02, CTRT03,
$ CTRT05, CTRT06, CTRTRI, CTRTRS, XLAENV, SLAMCH
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -236,6 +236,7 @@
*
PATH( 1: 1 ) = 'Complex precision'
PATH( 2: 3 ) = 'TR'
BIGNUM = SLAMCH('Overflow') / SLAMCH('Precision')
NRUN = 0
NFAIL = 0
NERRS = 0
@ -380,7 +381,7 @@
* This line is needed on a Sun SPARCstation.
*
IF( N.GT.0 )
$ DUMMY = A( 1 )
$ DUMMY = REAL( A( 1 ) )
*
CALL CTRT02( UPLO, TRANS, DIAG, N, NRHS, A, LDA,
$ X, LDA, B, LDA, WORK, RWORK,
@ -535,6 +536,32 @@
$ RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
$ RESULT( 9 ) )
*
*+ TEST 10
* Solve op(A)*X = B.
*
SRNAMT = 'CLATRS3'
CALL CCOPY( N, X, 1, B, 1 )
CALL CCOPY( N, X, 1, B, 1 )
CALL CSCAL( N, BIGNUM, B( N+1 ), 1 )
CALL CLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
$ B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
$ INFO )
*
* Check error code from CLATRS3.
*
IF( INFO.NE.0 )
$ CALL ALAERH( PATH, 'CLATRS3', INFO, 0,
$ UPLO // TRANS // DIAG // 'Y', N, N,
$ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
$ SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
$ X, LDA, WORK, RESULT( 10 ) )
CALL CSSCAL( N, BIGNUM, X, 1 )
CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
$ SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
$ X, LDA, WORK, RESULT( 10 ) )
RESULT( 10 ) = MAX( RESULT( 10 ), RES )
*
* Print information about the tests that did not pass
* the threshold.
*
@ -552,7 +579,14 @@
$ DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
NFAIL = NFAIL + 1
END IF
NRUN = NRUN + 2
IF( RESULT( 10 ).GE.THRESH ) THEN
IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
$ CALL ALAHD( NOUT, PATH )
WRITE( NOUT, FMT = 9996 )'CLATRS3', UPLO, TRANS,
$ DIAG, 'N', N, IMAT, 10, RESULT( 10 )
NFAIL = NFAIL + 1
END IF
NRUN = NRUN + 3
90 CONTINUE
100 CONTINUE
110 CONTINUE

View File

@ -82,9 +82,10 @@
EXTERNAL LSAMEN
* ..
* .. External Subroutines ..
EXTERNAL ALAESM, CHKXER, CLATBS, CLATPS, CLATRS, CTBCON,
$ CTBRFS, CTBTRS, CTPCON, CTPRFS, CTPTRI, CTPTRS,
$ CTRCON, CTRRFS, CTRTI2, CTRTRI, CTRTRS
EXTERNAL ALAESM, CHKXER, CLATBS, CLATPS, CLATRS,
$ CLATRS3, CTBCON, CTBRFS, CTBTRS, CTPCON,
$ CTPRFS, CTPTRI, CTPTRS, CTRCON, CTRRFS, CTRTI2,
$ CTRTRI, CTRTRS
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -240,6 +241,46 @@
CALL CLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, RW, INFO )
CALL CHKXER( 'CLATRS', INFOT, NOUT, LERR, OK )
*
* CLATRS3
*
SRNAMT = 'CLATRS3'
INFOT = 1
CALL CLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 2
CALL CLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 3
CALL CLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 4
CALL CLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 5
CALL CLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 6
CALL CLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 8
CALL CLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 10
CALL CLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 14
CALL CLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 0, INFO )
CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
*
* Test error exits for the packed triangular routines.
*
ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN

View File

@ -30,7 +30,7 @@
*>
*> \verbatim
*>
*> DCHKTR tests DTRTRI, -TRS, -RFS, and -CON, and DLATRS
*> DCHKTR tests DTRTRI, -TRS, -RFS, and -CON, and DLATRS(3)
*> \endverbatim
*
* Arguments:
@ -187,7 +187,7 @@
INTEGER NTYPE1, NTYPES
PARAMETER ( NTYPE1 = 10, NTYPES = 18 )
INTEGER NTESTS
PARAMETER ( NTESTS = 9 )
PARAMETER ( NTESTS = 10 )
INTEGER NTRAN
PARAMETER ( NTRAN = 3 )
DOUBLE PRECISION ONE, ZERO
@ -198,13 +198,13 @@
CHARACTER*3 PATH
INTEGER I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
$ IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
DOUBLE PRECISION AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
$ RCONDO, SCALE
DOUBLE PRECISION AINVNM, ANORM, BIGNUM, DLAMCH, DUMMY, RCOND,
$ RCONDC, RCONDI, RCONDO, RES, SCALE
* ..
* .. Local Arrays ..
CHARACTER TRANSS( NTRAN ), UPLOS( 2 )
INTEGER ISEED( 4 ), ISEEDY( 4 )
DOUBLE PRECISION RESULT( NTESTS )
DOUBLE PRECISION RESULT( NTESTS ), SCALE3( 2 )
* ..
* .. External Functions ..
LOGICAL LSAME
@ -213,9 +213,9 @@
* ..
* .. External Subroutines ..
EXTERNAL ALAERH, ALAHD, ALASUM, DCOPY, DERRTR, DGET04,
$ DLACPY, DLARHS, DLATRS, DLATTR, DTRCON, DTRRFS,
$ DTRT01, DTRT02, DTRT03, DTRT05, DTRT06, DTRTRI,
$ DTRTRS, XLAENV
$ DLACPY, DLAMCH, DSCAL, DLARHS, DLATRS, DLATRS3,
$ DLATTR, DTRCON, DTRRFS, DTRT01, DTRT02, DTRT03,
$ DTRT05, DTRT06, DTRTRI, DTRTRS, XLAENV
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -239,6 +239,7 @@
*
PATH( 1: 1 ) = 'Double precision'
PATH( 2: 3 ) = 'TR'
BIGNUM = DLAMCH('Overflow') / DLAMCH('Precision')
NRUN = 0
NFAIL = 0
NERRS = 0
@ -539,6 +540,32 @@
$ RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
$ RESULT( 9 ) )
*
*+ TEST 10
* Solve op(A)*X = B
*
SRNAMT = 'DLATRS3'
CALL DCOPY( N, X, 1, B, 1 )
CALL DCOPY( N, X, 1, B( N+1 ), 1 )
CALL DSCAL( N, BIGNUM, B( N+1 ), 1 )
CALL DLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
$ B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
$ INFO )
*
* Check error code from DLATRS3.
*
IF( INFO.NE.0 )
$ CALL ALAERH( PATH, 'DLATRS3', INFO, 0,
$ UPLO // TRANS // DIAG // 'N', N, N,
$ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
CALL DTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
$ SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
$ X, LDA, WORK, RESULT( 10 ) )
CALL DSCAL( N, BIGNUM, X, 1 )
CALL DTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
$ SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
$ X, LDA, WORK, RES )
RESULT( 10 ) = MAX( RESULT( 10 ), RES )
*
* Print information about the tests that did not pass
* the threshold.
*
@ -556,7 +583,14 @@
$ DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
NFAIL = NFAIL + 1
END IF
NRUN = NRUN + 2
IF( RESULT( 10 ).GE.THRESH ) THEN
IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
$ CALL ALAHD( NOUT, PATH )
WRITE( NOUT, FMT = 9996 )'DLATRS3', UPLO, TRANS,
$ DIAG, 'N', N, IMAT, 10, RESULT( 10 )
NFAIL = NFAIL + 1
END IF
NRUN = NRUN + 3
90 CONTINUE
100 CONTINUE
110 CONTINUE
@ -569,8 +603,8 @@
9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
$ I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
$ ''', N=', I5, ', NB=', I4, ', type ', I2, ',
$ test(', I2, ')= ', G12.5 )
$ ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
$ I2, ')= ', G12.5 )
9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
$ 11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',

View File

@ -83,9 +83,10 @@
EXTERNAL LSAMEN
* ..
* .. External Subroutines ..
EXTERNAL ALAESM, CHKXER, DLATBS, DLATPS, DLATRS, DTBCON,
$ DTBRFS, DTBTRS, DTPCON, DTPRFS, DTPTRI, DTPTRS,
$ DTRCON, DTRRFS, DTRTI2, DTRTRI, DTRTRS
EXTERNAL ALAESM, CHKXER, DLATBS, DLATPS, DLATRS,
$ DLATRS3, DTBCON, DTBRFS, DTBTRS, DTPCON,
$ DTPRFS, DTPTRI, DTPTRS, DTRCON, DTRRFS,
$ DTRTI2, DTRTRI, DTRTRS
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -244,6 +245,46 @@
INFOT = 7
CALL DLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, W, INFO )
CALL CHKXER( 'DLATRS', INFOT, NOUT, LERR, OK )
*
* DLATRS3
*
SRNAMT = 'DLATRS3'
INFOT = 1
CALL DLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 2
CALL DLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 3
CALL DLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 4
CALL DLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 5
CALL DLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 6
CALL DLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 8
CALL DLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 10
CALL DLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 14
CALL DLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 0, INFO )
CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
*
ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN
*

View File

@ -30,7 +30,7 @@
*>
*> \verbatim
*>
*> SCHKTR tests STRTRI, -TRS, -RFS, and -CON, and SLATRS
*> SCHKTR tests STRTRI, -TRS, -RFS, and -CON, and SLATRS(3)
*> \endverbatim
*
* Arguments:
@ -187,7 +187,7 @@
INTEGER NTYPE1, NTYPES
PARAMETER ( NTYPE1 = 10, NTYPES = 18 )
INTEGER NTESTS
PARAMETER ( NTESTS = 9 )
PARAMETER ( NTESTS = 10 )
INTEGER NTRAN
PARAMETER ( NTRAN = 3 )
REAL ONE, ZERO
@ -198,13 +198,13 @@
CHARACTER*3 PATH
INTEGER I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
$ IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
REAL AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
$ RCONDO, SCALE
REAL AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
$ RCONDI, RCONDO, RES, SCALE, SLAMCH
* ..
* .. Local Arrays ..
CHARACTER TRANSS( NTRAN ), UPLOS( 2 )
INTEGER ISEED( 4 ), ISEEDY( 4 )
REAL RESULT( NTESTS )
REAL RESULT( NTESTS ), SCALE3( 2 )
* ..
* .. External Functions ..
LOGICAL LSAME
@ -213,9 +213,9 @@
* ..
* .. External Subroutines ..
EXTERNAL ALAERH, ALAHD, ALASUM, SCOPY, SERRTR, SGET04,
$ SLACPY, SLARHS, SLATRS, SLATTR, STRCON, STRRFS,
$ STRT01, STRT02, STRT03, STRT05, STRT06, STRTRI,
$ STRTRS, XLAENV
$ SLACPY, SLARHS, SLATRS, SLATRS3, SLATTR, SSCAL,
$ STRCON, STRRFS, STRT01, STRT02, STRT03, STRT05,
$ STRT06, STRTRI, STRTRS, XLAENV, SLAMCH
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -239,6 +239,7 @@
*
PATH( 1: 1 ) = 'Single precision'
PATH( 2: 3 ) = 'TR'
BIGNUM = SLAMCH('Overflow') / SLAMCH('Precision')
NRUN = 0
NFAIL = 0
NERRS = 0
@ -539,6 +540,33 @@
$ RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
$ RESULT( 9 ) )
*
*+ TEST 10
* Solve op(A)*X = B
*
SRNAMT = 'SLATRS3'
CALL SCOPY( N, X, 1, B, 1 )
CALL SCOPY( N, X, 1, B( N+1 ), 1 )
CALL SSCAL( N, BIGNUM, B( N+1 ), 1 )
CALL SLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
$ B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
$ INFO )
*
* Check error code from SLATRS3.
*
IF( INFO.NE.0 )
$ CALL ALAERH( PATH, 'SLATRS3', INFO, 0,
$ UPLO // TRANS // DIAG // 'Y', N, N,
$ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
*
CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
$ SCALE3 ( 1 ), RWORK, ONE, B( N+1 ), LDA,
$ X, LDA, WORK, RESULT( 10 ) )
CALL SSCAL( N, BIGNUM, X, 1 )
CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
$ SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
$ X, LDA, WORK, RES )
RESULT( 10 ) = MAX( RESULT( 10 ), RES )
*
* Print information about the tests that did not pass
* the threshold.
*
@ -556,7 +584,14 @@
$ DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
NFAIL = NFAIL + 1
END IF
NRUN = NRUN + 2
IF( RESULT( 10 ).GE.THRESH ) THEN
IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
$ CALL ALAHD( NOUT, PATH )
WRITE( NOUT, FMT = 9996 )'SLATRS3', UPLO, TRANS,
$ DIAG, 'N', N, IMAT, 10, RESULT( 10 )
NFAIL = NFAIL + 1
END IF
NRUN = NRUN + 3
90 CONTINUE
100 CONTINUE
110 CONTINUE
@ -569,8 +604,8 @@
9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
$ I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
$ ''', N=', I5, ', NB=', I4, ', type ', I2, ',
$ test(', I2, ')= ', G12.5 )
$ ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
$ I2, ')= ', G12.5 )
9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
$ 11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',

View File

@ -83,9 +83,10 @@
EXTERNAL LSAMEN
* ..
* .. External Subroutines ..
EXTERNAL ALAESM, CHKXER, SLATBS, SLATPS, SLATRS, STBCON,
$ STBRFS, STBTRS, STPCON, STPRFS, STPTRI, STPTRS,
$ STRCON, STRRFS, STRTI2, STRTRI, STRTRS
EXTERNAL ALAESM, CHKXER, SLATBS, SLATPS, SLATRS,
$ SLATRS3, STBCON, STBRFS, STBTRS, STPCON,
$ STPRFS, STPTRI, STPTRS, STRCON, STRRFS, STRTI2,
$ STRTRI, STRTRS
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -244,6 +245,46 @@
INFOT = 7
CALL SLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, W, INFO )
CALL CHKXER( 'SLATRS', INFOT, NOUT, LERR, OK )
*
* SLATRS3
*
SRNAMT = 'SLATRS3'
INFOT = 1
CALL SLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 2
CALL SLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 3
CALL SLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 4
CALL SLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 5
CALL SLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 6
CALL SLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 8
CALL SLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 10
CALL SLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, W,
$ W( 2 ), 1, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 14
CALL SLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, W,
$ W( 2 ), 0, INFO )
CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
*
ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN
*

View File

@ -31,7 +31,7 @@
*>
*> \verbatim
*>
*> ZCHKTR tests ZTRTRI, -TRS, -RFS, and -CON, and ZLATRS
*> ZCHKTR tests ZTRTRI, -TRS, -RFS, and -CON, and ZLATRS(3)
*> \endverbatim
*
* Arguments:
@ -184,7 +184,7 @@
INTEGER NTYPE1, NTYPES
PARAMETER ( NTYPE1 = 10, NTYPES = 18 )
INTEGER NTESTS
PARAMETER ( NTESTS = 9 )
PARAMETER ( NTESTS = 10 )
INTEGER NTRAN
PARAMETER ( NTRAN = 3 )
DOUBLE PRECISION ONE, ZERO
@ -195,13 +195,13 @@
CHARACTER*3 PATH
INTEGER I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
$ IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
DOUBLE PRECISION AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
$ RCONDO, SCALE
DOUBLE PRECISION AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
$ RCONDI, RCONDO, RES, SCALE, DLAMCH
* ..
* .. Local Arrays ..
CHARACTER TRANSS( NTRAN ), UPLOS( 2 )
INTEGER ISEED( 4 ), ISEEDY( 4 )
DOUBLE PRECISION RESULT( NTESTS )
DOUBLE PRECISION RESULT( NTESTS ), SCALE3( 2 )
* ..
* .. External Functions ..
LOGICAL LSAME
@ -209,10 +209,10 @@
EXTERNAL LSAME, ZLANTR
* ..
* .. External Subroutines ..
EXTERNAL ALAERH, ALAHD, ALASUM, XLAENV, ZCOPY, ZERRTR,
$ ZGET04, ZLACPY, ZLARHS, ZLATRS, ZLATTR, ZTRCON,
$ ZTRRFS, ZTRT01, ZTRT02, ZTRT03, ZTRT05, ZTRT06,
$ ZTRTRI, ZTRTRS
EXTERNAL ALAERH, ALAHD, ALASUM, DLAMCH, XLAENV, ZCOPY,
$ ZDSCAL, ZERRTR, ZGET04, ZLACPY, ZLARHS, ZLATRS,
$ ZLATRS3, ZLATTR, ZTRCON, ZTRRFS, ZTRT01,
$ ZTRT02, ZTRT03, ZTRT05, ZTRT06, ZTRTRI, ZTRTRS
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -236,6 +236,7 @@
*
PATH( 1: 1 ) = 'Zomplex precision'
PATH( 2: 3 ) = 'TR'
BIGNUM = DLAMCH('Overflow') / DLAMCH('Precision')
NRUN = 0
NFAIL = 0
NERRS = 0
@ -380,7 +381,7 @@
* This line is needed on a Sun SPARCstation.
*
IF( N.GT.0 )
$ DUMMY = A( 1 )
$ DUMMY = DBLE( A( 1 ) )
*
CALL ZTRT02( UPLO, TRANS, DIAG, N, NRHS, A, LDA,
$ X, LDA, B, LDA, WORK, RWORK,
@ -535,6 +536,32 @@
$ RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
$ RESULT( 9 ) )
*
*+ TEST 10
* Solve op(A)*X = B
*
SRNAMT = 'ZLATRS3'
CALL ZCOPY( N, X, 1, B, 1 )
CALL ZCOPY( N, X, 1, B( N+1 ), 1 )
CALL ZDSCAL( N, BIGNUM, B( N+1 ), 1 )
CALL ZLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
$ B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
$ INFO )
*
* Check error code from ZLATRS3.
*
IF( INFO.NE.0 )
$ CALL ALAERH( PATH, 'ZLATRS3', INFO, 0,
$ UPLO // TRANS // DIAG // 'N', N, N,
$ -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
CALL ZTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
$ SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
$ X, LDA, WORK, RESULT( 10 ) )
CALL ZDSCAL( N, BIGNUM, X, 1 )
CALL ZTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
$ SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
$ X, LDA, WORK, RES )
RESULT( 10 ) = MAX( RESULT( 10 ), RES )
*
* Print information about the tests that did not pass
* the threshold.
*
@ -552,7 +579,14 @@
$ DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
NFAIL = NFAIL + 1
END IF
NRUN = NRUN + 2
IF( RESULT( 10 ).GE.THRESH ) THEN
IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
$ CALL ALAHD( NOUT, PATH )
WRITE( NOUT, FMT = 9996 )'ZLATRS3', UPLO, TRANS,
$ DIAG, 'N', N, IMAT, 10, RESULT( 10 )
NFAIL = NFAIL + 1
END IF
NRUN = NRUN + 3
90 CONTINUE
100 CONTINUE
110 CONTINUE
@ -565,8 +599,8 @@
9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
$ I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
$ ''', N=', I5, ', NB=', I4, ', type ', I2, ',
$ test(', I2, ')= ', G12.5 )
$ ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
$ I2, ')= ', G12.5 )
9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
$ 11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',

View File

@ -82,9 +82,10 @@
EXTERNAL LSAMEN
* ..
* .. External Subroutines ..
EXTERNAL ALAESM, CHKXER, ZLATBS, ZLATPS, ZLATRS, ZTBCON,
$ ZTBRFS, ZTBTRS, ZTPCON, ZTPRFS, ZTPTRI, ZTPTRS,
$ ZTRCON, ZTRRFS, ZTRTI2, ZTRTRI, ZTRTRS
EXTERNAL ALAESM, CHKXER, ZLATBS, ZLATPS, ZLATRS,
$ ZLATRS3, ZTBCON, ZTBRFS, ZTBTRS, ZTPCON,
$ ZTPRFS, ZTPTRI, ZTPTRS, ZTRCON, ZTRRFS, ZTRTI2,
$ ZTRTRI, ZTRTRS
* ..
* .. Scalars in Common ..
LOGICAL LERR, OK
@ -240,6 +241,46 @@
CALL ZLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, RW, INFO )
CALL CHKXER( 'ZLATRS', INFOT, NOUT, LERR, OK )
*
* ZLATRS3
*
SRNAMT = 'ZLATRS3'
INFOT = 1
CALL ZLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 2
CALL ZLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 3
CALL ZLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 4
CALL ZLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 5
CALL ZLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 6
CALL ZLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 8
CALL ZLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 10
CALL ZLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, RW,
$ RW( 2 ), 1, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
INFOT = 14
CALL ZLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, RW,
$ RW( 2 ), 0, INFO )
CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
*
* Test error exits for the packed triangular routines.
*
ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN