diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake
index 3b221d420..82511d41b 100644
--- a/cmake/lapack.cmake
+++ b/cmake/lapack.cmake
@@ -123,7 +123,8 @@ set(SLASRC
    ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
    ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
    sgesvdq.f slaorhr_col_getrfnp.f
-   slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
+   slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f 
+   slarmm.f slatrs3.f strsyl3.f)
 
 set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
    sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@@ -221,7 +222,8 @@ set(CLASRC
    cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
    chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
    cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f 
-   cungtsqr.f cungtsqr_row.f cunhr_col.f )
+   cungtsqr.f cungtsqr_row.f cunhr_col.f 
+   clatrs3.f ctrsyl3.f )
 
 set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
    cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@@ -313,7 +315,8 @@ set(DLASRC
    dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
    dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
    dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
-   dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
+   dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f 
+   dlarmm.f dlatrs3.f dtrsyl3.f)
 
 set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
    dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@@ -415,7 +418,8 @@ set(ZLASRC
    zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
    zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
    zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
-   zungtsqr.f zungtsqr_row.f zunhr_col.f)
+   zungtsqr.f zungtsqr_row.f zunhr_col.f
+   zlatrs3.f ztrsyl3.f)
 
 set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
    zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
@@ -617,7 +621,8 @@ set(SLASRC
    ssyevd_2stage.c ssyev_2stage.c ssyevx_2stage.c ssyevr_2stage.c
    ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c
    sgesvdq.c slaorhr_col_getrfnp.c
-   slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c )
+   slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c 
+   slarmm.c slatrs3.c strsyl3.c)
 
 set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c
    sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c
@@ -714,7 +719,8 @@ set(CLASRC
    cheevd_2stage.c cheev_2stage.c cheevx_2stage.c cheevr_2stage.c
    chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c
    cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c 
-   cungtsqr.c cungtsqr_row.c cunhr_col.c )
+   cungtsqr.c cungtsqr_row.c cunhr_col.c 
+   clatrs3.c ctrsyl3.c)
 
 set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c
    cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c
@@ -805,7 +811,8 @@ set(DLASRC
    dsyevd_2stage.c dsyev_2stage.c dsyevx_2stage.c dsyevr_2stage.c
    dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c
    dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c
-   dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c )
+   dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c 
+   dlarmm.c dlatrs3.c dtrsyl3.c)
 
 set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c
    dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c
@@ -906,7 +913,7 @@ set(ZLASRC
    zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c
    zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c
    zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c
-   zungtsqr.c zungtsqr_row.c zunhr_col.c)
+   zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c)
 
 set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
    zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c
diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h
index 14695fdc8..b5a276f5a 100644
--- a/lapack-netlib/LAPACKE/include/lapack.h
+++ b/lapack-netlib/LAPACKE/include/lapack.h
@@ -12,6 +12,7 @@
 
 #include <stdlib.h>
 #include <stdarg.h>
+#include <inttypes.h>
 
 /* It seems all current Fortran compilers put strlen at end.
 *  Some historical compilers put strlen after the str argument
@@ -80,11 +81,26 @@ extern "C" {
 
 /*----------------------------------------------------------------------------*/
 #ifndef lapack_int
-#define lapack_int     int
+#if defined(LAPACK_ILP64)
+#define lapack_int        int64_t
+#else
+#define lapack_int        int32_t
+#endif
+#endif
+
+/*
+ * Integer format string
+ */
+#ifndef LAPACK_IFMT
+#if defined(LAPACK_ILP64)
+#define LAPACK_IFMT       PRId64
+#else
+#define LAPACK_IFMT       PRId32
+#endif
 #endif
 
 #ifndef lapack_logical
-#define lapack_logical lapack_int
+#define lapack_logical    lapack_int
 #endif
 
 /* f2c, hence clapack and MacOS Accelerate, returns double instead of float
@@ -115,7 +131,7 @@ typedef lapack_logical (*LAPACK_Z_SELECT2)
     ( const lapack_complex_double*, const lapack_complex_double* );
 
 #define LAPACK_lsame_base LAPACK_GLOBAL(lsame,LSAME)
-lapack_logical LAPACK_lsame_base( const char* ca, const char* cb,
+lapack_logical LAPACK_lsame_base( const char* ca,  const char* cb,
                               lapack_int lca, lapack_int lcb
 #ifdef LAPACK_FORTRAN_STRLEN_END
     , size_t, size_t
@@ -21986,6 +22002,84 @@ void LAPACK_ztrsyl_base(
     #define LAPACK_ztrsyl(...) LAPACK_ztrsyl_base(__VA_ARGS__)
 #endif
 
+#define LAPACK_ctrsyl3_base LAPACK_GLOBAL(ctrsyl3,CTRSYL3)
+void LAPACK_ctrsyl3_base(
+    char const* trana, char const* tranb,
+    lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
+    lapack_complex_float const* A, lapack_int const* lda,
+    lapack_complex_float const* B, lapack_int const* ldb,
+    lapack_complex_float* C, lapack_int const* ldc, float* scale,
+    float* swork, lapack_int const *ldswork,
+    lapack_int* info
+#ifdef LAPACK_FORTRAN_STRLEN_END
+    , size_t, size_t
+#endif
+);
+#ifdef LAPACK_FORTRAN_STRLEN_END
+    #define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__, 1, 1)
+#else
+    #define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__)
+#endif
+
+#define LAPACK_dtrsyl3_base LAPACK_GLOBAL(dtrsyl3,DTRSYL3)
+void LAPACK_dtrsyl3_base(
+    char const* trana, char const* tranb,
+    lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
+    double const* A, lapack_int const* lda,
+    double const* B, lapack_int const* ldb,
+    double* C, lapack_int const* ldc, double* scale,
+    lapack_int* iwork, lapack_int const* liwork,
+    double* swork, lapack_int const *ldswork,
+    lapack_int* info
+#ifdef LAPACK_FORTRAN_STRLEN_END
+    , size_t, size_t
+#endif
+);
+#ifdef LAPACK_FORTRAN_STRLEN_END
+    #define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__, 1, 1)
+#else
+    #define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__)
+#endif
+
+#define LAPACK_strsyl3_base LAPACK_GLOBAL(strsyl3,STRSYL3)
+void LAPACK_strsyl3_base(
+    char const* trana, char const* tranb,
+    lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
+    float const* A, lapack_int const* lda,
+    float const* B, lapack_int const* ldb,
+    float* C, lapack_int const* ldc, float* scale,
+    lapack_int* iwork, lapack_int const* liwork,
+    float* swork, lapack_int const *ldswork,
+    lapack_int* info
+#ifdef LAPACK_FORTRAN_STRLEN_END
+    , size_t, size_t
+#endif
+);
+#ifdef LAPACK_FORTRAN_STRLEN_END
+    #define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__, 1, 1)
+#else
+    #define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__)
+#endif
+
+#define LAPACK_ztrsyl3_base LAPACK_GLOBAL(ztrsyl3,ZTRSYL3)
+void LAPACK_ztrsyl3_base(
+    char const* trana, char const* tranb,
+    lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
+    lapack_complex_double const* A, lapack_int const* lda,
+    lapack_complex_double const* B, lapack_int const* ldb,
+    lapack_complex_double* C, lapack_int const* ldc, double* scale,
+    double* swork, lapack_int const *ldswork,
+    lapack_int* info
+#ifdef LAPACK_FORTRAN_STRLEN_END
+    , size_t, size_t
+#endif
+);
+#ifdef LAPACK_FORTRAN_STRLEN_END
+    #define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__, 1, 1)
+#else
+    #define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__)
+#endif
+
 #define LAPACK_ctrtri_base LAPACK_GLOBAL(ctrtri,CTRTRI)
 void LAPACK_ctrtri_base(
     char const* uplo, char const* diag,
diff --git a/lapack-netlib/LAPACKE/include/lapacke.h b/lapack-netlib/LAPACKE/include/lapacke.h
index f6fbfcc33..9998b1504 100644
--- a/lapack-netlib/LAPACKE/include/lapacke.h
+++ b/lapack-netlib/LAPACKE/include/lapacke.h
@@ -2313,6 +2313,19 @@ lapack_int LAPACKE_zlagge( int matrix_layout, lapack_int m, lapack_int n,
 float LAPACKE_slamch( char cmach );
 double LAPACKE_dlamch( char cmach );
 
+float LAPACKE_slangb( int matrix_layout, char norm, lapack_int n,
+                      lapack_int kl, lapack_int ku, const float* ab,
+                      lapack_int ldab );
+double LAPACKE_dlangb( int matrix_layout, char norm, lapack_int n,
+                       lapack_int kl, lapack_int ku, const double* ab,
+                       lapack_int ldab );
+float LAPACKE_clangb( int matrix_layout, char norm, lapack_int n,
+                      lapack_int kl, lapack_int ku,
+                      const lapack_complex_float* ab, lapack_int ldab );
+double LAPACKE_zlangb( int matrix_layout, char norm, lapack_int n,
+                       lapack_int kl, lapack_int ku,
+                       const lapack_complex_double* ab, lapack_int ldab );
+
 float LAPACKE_slange( int matrix_layout, char norm, lapack_int m,
                            lapack_int n, const float* a, lapack_int lda );
 double LAPACKE_dlange( int matrix_layout, char norm, lapack_int m,
@@ -4477,6 +4490,23 @@ lapack_int LAPACKE_ztrsyl( int matrix_layout, char trana, char tranb,
                            lapack_complex_double* c, lapack_int ldc,
                            double* scale );
 
+lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb,
+                            lapack_int isgn, lapack_int m, lapack_int n,
+                            const float* a, lapack_int lda, const float* b,
+                            lapack_int ldb, float* c, lapack_int ldc,
+                            float* scale );
+lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb,
+                            lapack_int isgn, lapack_int m, lapack_int n,
+                            const double* a, lapack_int lda, const double* b,
+                            lapack_int ldb, double* c, lapack_int ldc,
+                            double* scale );
+lapack_int LAPACKE_ztrsyl3( int matrix_layout, char trana, char tranb,
+                            lapack_int isgn, lapack_int m, lapack_int n,
+                            const lapack_complex_double* a, lapack_int lda,
+                            const lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* c, lapack_int ldc,
+                            double* scale );
+
 lapack_int LAPACKE_strtri( int matrix_layout, char uplo, char diag, lapack_int n,
                            float* a, lapack_int lda );
 lapack_int LAPACKE_dtrtri( int matrix_layout, char uplo, char diag, lapack_int n,
@@ -7576,6 +7606,21 @@ double LAPACKE_dlapy3_work( double x, double y, double z );
 float LAPACKE_slamch_work( char cmach );
 double LAPACKE_dlamch_work( char cmach );
 
+float LAPACKE_slangb_work( int matrix_layout, char norm, lapack_int n,
+                           lapack_int kl, lapack_int ku, const float* ab,
+                           lapack_int ldab, float* work );
+double LAPACKE_dlangb_work( int matrix_layout, char norm, lapack_int n,
+                            lapack_int kl, lapack_int ku, const double* ab,
+                            lapack_int ldab, double* work );
+float LAPACKE_clangb_work( int matrix_layout, char norm, lapack_int n,
+                           lapack_int kl, lapack_int ku,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           float* work );
+double LAPACKE_zlangb_work( int matrix_layout, char norm, lapack_int n,
+                            lapack_int kl, lapack_int ku,
+                            const lapack_complex_double* ab, lapack_int ldab,
+                            double* work );
+
 float LAPACKE_slange_work( int matrix_layout, char norm, lapack_int m,
                                 lapack_int n, const float* a, lapack_int lda,
                                 float* work );
@@ -10174,6 +10219,35 @@ lapack_int LAPACKE_ztrsyl_work( int matrix_layout, char trana, char tranb,
                                 lapack_complex_double* c, lapack_int ldc,
                                 double* scale );
 
+lapack_int LAPACKE_strsyl3_work( int matrix_layout, char trana, char tranb,
+                                 lapack_int isgn, lapack_int m, lapack_int n,
+                                 const float* a, lapack_int lda,
+                                 const float* b, lapack_int ldb,
+                                 float* c, lapack_int ldc, float* scale,
+                                 lapack_int* iwork, lapack_int liwork,
+                                 float* swork, lapack_int ldswork );
+lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb,
+                                 lapack_int isgn, lapack_int m, lapack_int n,
+                                 const double* a, lapack_int lda,
+                                 const double* b, lapack_int ldb,
+                                 double* c, lapack_int ldc, double* scale,
+                                 lapack_int* iwork, lapack_int liwork,
+                                 double* swork, lapack_int ldswork );
+lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb,
+                                 lapack_int isgn, lapack_int m, lapack_int n,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 const lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* c, lapack_int ldc,
+                                 float* scale, float* swork,
+                                 lapack_int ldswork );
+lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb,
+                                 lapack_int isgn, lapack_int m, lapack_int n,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 const lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* c, lapack_int ldc,
+                                 double* scale, double* swork,
+                                 lapack_int ldswork );
+
 lapack_int LAPACKE_strtri_work( int matrix_layout, char uplo, char diag,
                                 lapack_int n, float* a, lapack_int lda );
 lapack_int LAPACKE_dtrtri_work( int matrix_layout, char uplo, char diag,
diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c
index 8406635e9..05ff8d57f 100644
--- a/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c
@@ -48,7 +48,6 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
     lapack_int lrwork = -1;
     float* rwork = NULL;
     float rwork_query;
-    lapack_int i;
     if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
         LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 );
         return -1;
diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3.c b/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3.c
new file mode 100644
index 000000000..c931aac48
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3.c
@@ -0,0 +1,56 @@
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_ctrsyl3( int matrix_layout, char trana, char tranb,
+                            lapack_int isgn, lapack_int m, lapack_int n,
+                            const lapack_complex_float* a, lapack_int lda,
+                            const lapack_complex_float* b, lapack_int ldb,
+                            lapack_complex_float* c, lapack_int ldc,
+                            float* scale )
+{
+    lapack_int info = 0;
+    float swork_query[2];
+    float* swork = NULL;
+    lapack_int ldswork = -1;
+    lapack_int swork_size = -1;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_ctrsyl3", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) {
+            return -7;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
+            return -9;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) {
+            return -11;
+        }
+    }
+#endif
+    /* Query optimal working array sizes */
+    info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
+                                 b, ldb, c, ldc, scale, swork_query, ldswork );
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    ldswork = swork_query[0];
+    swork_size = ldswork * swork_query[1];
+    swork = (float*)LAPACKE_malloc( sizeof(float) * swork_size);
+    if( swork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
+                                 lda, b, ldb, c, ldc, scale, swork, ldswork );
+    /* Release memory and exit */
+    LAPACKE_free( swork );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_ctrsyl3", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3_work.c b/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3_work.c
new file mode 100644
index 000000000..09c08d92a
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3_work.c
@@ -0,0 +1,88 @@
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb,
+                                 lapack_int isgn, lapack_int m, lapack_int n,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 const lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* c, lapack_int ldc,
+                                 float* scale, float* swork,
+                                 lapack_int ldswork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_ctrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
+                        scale, swork, &ldswork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int lda_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,n);
+        lapack_int ldc_t = MAX(1,m);
+        lapack_complex_float* a_t = NULL;
+        lapack_complex_float* b_t = NULL;
+        lapack_complex_float* c_t = NULL;
+        /* Check leading dimension(s) */
+        if( lda < m ) {
+            info = -8;
+            LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -10;
+            LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
+            return info;
+        }
+        if( ldc < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
+            return info;
+        }
+        /* Allocate memory for temporary array(s) */
+        a_t = (lapack_complex_float*)
+            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,m) );
+        if( a_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        b_t = (lapack_complex_float*)
+            LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        c_t = (lapack_complex_float*)
+            LAPACKE_malloc( sizeof(lapack_complex_float) * ldc_t * MAX(1,n) );
+        if( c_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        /* Transpose input matrices */
+        LAPACKE_cge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
+        LAPACKE_cge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_ctrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
+                        c_t, &ldc_t, scale, swork, &ldswork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
+        /* Release memory and exit */
+        LAPACKE_free( c_t );
+exit_level_2:
+        LAPACKE_free( b_t );
+exit_level_1:
+        LAPACKE_free( a_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesvdq.c b/lapack-netlib/LAPACKE/src/lapacke_dgesvdq.c
index 4e1b87681..4a0d427b3 100644
--- a/lapack-netlib/LAPACKE/src/lapacke_dgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvdq.c
@@ -48,7 +48,6 @@ lapack_int LAPACKE_dgesvdq( int matrix_layout, char joba, char jobp,
     lapack_int lrwork = -1;
     double* rwork = NULL;
     double rwork_query;
-    lapack_int i;
     if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
         LAPACKE_xerbla( "LAPACKE_dgesvdq", -1 );
         return -1;
diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3.c b/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3.c
new file mode 100644
index 000000000..c95a772de
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3.c
@@ -0,0 +1,68 @@
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb,
+                            lapack_int isgn, lapack_int m, lapack_int n,
+                            const double* a, lapack_int lda, const double* b,
+                            lapack_int ldb, double* c, lapack_int ldc,
+                            double* scale )
+{
+    lapack_int info = 0;
+    double swork_query[2];
+    double* swork = NULL;
+    lapack_int ldswork = -1;
+    lapack_int swork_size = -1;
+    lapack_int iwork_query;
+    lapack_int* iwork = NULL;
+    lapack_int liwork = -1;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_dtrsyl3", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_dge_nancheck( matrix_layout, m, m, a, lda ) ) {
+            return -7;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
+            return -9;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) {
+            return -11;
+        }
+    }
+#endif
+    /* Query optimal working array sizes */
+    info = LAPACKE_dtrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
+                                 b, ldb, c, ldc, scale, &iwork_query, liwork,
+                                 swork_query, ldswork );
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    ldswork = swork_query[0];
+    swork_size = ldswork * swork_query[1];
+    swork = (double*)LAPACKE_malloc( sizeof(double) * swork_size);
+    if( swork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    liwork = iwork_query;
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if ( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_dtrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
+                                 lda, b, ldb, c, ldc, scale, iwork, liwork,
+                                 swork, ldswork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( swork );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_dtrsyl3", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3_work.c b/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3_work.c
new file mode 100644
index 000000000..272c35b38
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3_work.c
@@ -0,0 +1,86 @@
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb,
+                                 lapack_int isgn, lapack_int m, lapack_int n,
+                                 const double* a, lapack_int lda,
+                                 const double* b, lapack_int ldb, double* c,
+                                 lapack_int ldc, double* scale,
+                                 lapack_int* iwork, lapack_int liwork,
+                                 double* swork, lapack_int ldswork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_dtrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
+                        scale, iwork, &liwork, swork, &ldswork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int lda_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,n);
+        lapack_int ldc_t = MAX(1,m);
+        double* a_t = NULL;
+        double* b_t = NULL;
+        double* c_t = NULL;
+        /* Check leading dimension(s) */
+        if( lda < m ) {
+            info = -8;
+            LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -10;
+            LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
+            return info;
+        }
+        if( ldc < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
+            return info;
+        }
+        /* Allocate memory for temporary array(s) */
+        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,m) );
+        if( a_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        c_t = (double*)LAPACKE_malloc( sizeof(double) * ldc_t * MAX(1,n) );
+        if( c_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        /* Transpose input matrices */
+        LAPACKE_dge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
+        LAPACKE_dge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_dtrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
+                        c_t, &ldc_t, scale, iwork, &liwork, swork, &ldswork,
+                        &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
+        /* Release memory and exit */
+        LAPACKE_free( c_t );
+exit_level_2:
+        LAPACKE_free( b_t );
+exit_level_1:
+        LAPACKE_free( a_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesvdq.c b/lapack-netlib/LAPACKE/src/lapacke_sgesvdq.c
index 0b6406dec..627d2406c 100644
--- a/lapack-netlib/LAPACKE/src/lapacke_sgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvdq.c
@@ -48,7 +48,6 @@ lapack_int LAPACKE_sgesvdq( int matrix_layout, char joba, char jobp,
     lapack_int lrwork = -1;
     float* rwork = NULL;
     float rwork_query;
-    lapack_int i;
     if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
         LAPACKE_xerbla( "LAPACKE_sgesvdq", -1 );
         return -1;
diff --git a/lapack-netlib/LAPACKE/src/lapacke_strsyl3.c b/lapack-netlib/LAPACKE/src/lapacke_strsyl3.c
new file mode 100644
index 000000000..1cfc626c2
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_strsyl3.c
@@ -0,0 +1,68 @@
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb,
+                            lapack_int isgn, lapack_int m, lapack_int n,
+                            const float* a, lapack_int lda, const float* b,
+                            lapack_int ldb, float* c, lapack_int ldc,
+                            float* scale )
+{
+    lapack_int info = 0;
+    float swork_query[2];
+    float* swork = NULL;
+    lapack_int ldswork = -1;
+    lapack_int swork_size = -1;
+    lapack_int iwork_query;
+    lapack_int* iwork = NULL;
+    lapack_int liwork = -1;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_strsyl3", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_sge_nancheck( matrix_layout, m, m, a, lda ) ) {
+            return -7;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) {
+            return -9;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) {
+            return -11;
+        }
+    }
+#endif
+    /* Query optimal working array sizes */
+    info = LAPACKE_strsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
+                                 b, ldb, c, ldc, scale, &iwork_query, liwork,
+                                 swork_query, ldswork );
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    ldswork = swork_query[0];
+    swork_size = ldswork * swork_query[1];
+    swork = (float*)LAPACKE_malloc( sizeof(float) * swork_size);
+    if( swork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    liwork = iwork_query;
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if ( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_strsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
+                                 lda, b, ldb, c, ldc, scale, iwork, liwork,
+                                 swork, ldswork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( swork );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_strsyl3", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_strsyl3_work.c b/lapack-netlib/LAPACKE/src/lapacke_strsyl3_work.c
new file mode 100644
index 000000000..3c50e4a45
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_strsyl3_work.c
@@ -0,0 +1,86 @@
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_strsyl3_work( int matrix_layout, char trana, char tranb,
+                                 lapack_int isgn, lapack_int m, lapack_int n,
+                                 const float* a, lapack_int lda,
+                                 const float* b, lapack_int ldb, float* c,
+                                 lapack_int ldc, float* scale,
+                                 lapack_int* iwork, lapack_int liwork,
+                                 float* swork, lapack_int ldswork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_strsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
+                        scale, iwork, &liwork, swork, &ldswork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int lda_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,n);
+        lapack_int ldc_t = MAX(1,m);
+        float* a_t = NULL;
+        float* b_t = NULL;
+        float* c_t = NULL;
+        /* Check leading dimension(s) */
+        if( lda < m ) {
+            info = -8;
+            LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -10;
+            LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
+            return info;
+        }
+        if( ldc < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
+            return info;
+        }
+        /* Allocate memory for temporary array(s) */
+        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) );
+        if( a_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        c_t = (float*)LAPACKE_malloc( sizeof(float) * ldc_t * MAX(1,n) );
+        if( c_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        /* Transpose input matrices */
+        LAPACKE_sge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
+        LAPACKE_sge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_strsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
+                        c_t, &ldc_t, scale, iwork, &liwork, swork, &ldswork,
+                        &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
+        /* Release memory and exit */
+        LAPACKE_free( c_t );
+exit_level_2:
+        LAPACKE_free( b_t );
+exit_level_1:
+        LAPACKE_free( a_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c
index 528b94a47..1d318e571 100644
--- a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c
@@ -48,7 +48,6 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp,
     lapack_int lrwork = -1;
     double* rwork = NULL;
     double rwork_query;
-    lapack_int i;
     if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
         LAPACKE_xerbla( "LAPACKE_zgesvdq", -1 );
         return -1;
diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3.c b/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3.c
new file mode 100644
index 000000000..dbc9bcf9f
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3.c
@@ -0,0 +1,56 @@
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_ztrsyl3( int matrix_layout, char trana, char tranb,
+                            lapack_int isgn, lapack_int m, lapack_int n,
+                            const lapack_complex_double* a, lapack_int lda,
+                            const lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* c, lapack_int ldc,
+                            double* scale )
+{
+    lapack_int info = 0;
+    double swork_query[2];
+    double* swork = NULL;
+    lapack_int ldswork = -1;
+    lapack_int swork_size = -1;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_ztrsyl3", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_zge_nancheck( matrix_layout, m, m, a, lda ) ) {
+            return -7;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) {
+            return -9;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) {
+            return -11;
+        }
+    }
+#endif
+    /* Query optimal working array sizes */
+    info = LAPACKE_ztrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
+                                 b, ldb, c, ldc, scale, swork_query, ldswork );
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    ldswork = swork_query[0];
+    swork_size = ldswork * swork_query[1];
+    swork = (double*)LAPACKE_malloc( sizeof(double) * swork_size);
+    if( swork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_ztrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
+                                 lda, b, ldb, c, ldc, scale, swork, ldswork );
+    /* Release memory and exit */
+    LAPACKE_free( swork );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_ztrsyl3", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3_work.c b/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3_work.c
new file mode 100644
index 000000000..a7ebd5da6
--- /dev/null
+++ b/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3_work.c
@@ -0,0 +1,88 @@
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb,
+                                 lapack_int isgn, lapack_int m, lapack_int n,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 const lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* c, lapack_int ldc,
+                                 double* scale, double* swork,
+                                 lapack_int ldswork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_ztrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
+                        scale, swork, &ldswork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int lda_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,n);
+        lapack_int ldc_t = MAX(1,m);
+        lapack_complex_double* a_t = NULL;
+        lapack_complex_double* b_t = NULL;
+        lapack_complex_double* c_t = NULL;
+        /* Check leading dimension(s) */
+        if( lda < m ) {
+            info = -8;
+            LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -10;
+            LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
+            return info;
+        }
+        if( ldc < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
+            return info;
+        }
+        /* Allocate memory for temporary array(s) */
+        a_t = (lapack_complex_double*)
+            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,m) );
+        if( a_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        b_t = (lapack_complex_double*)
+            LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        c_t = (lapack_complex_double*)
+            LAPACKE_malloc( sizeof(lapack_complex_double) * ldc_t * MAX(1,n) );
+        if( c_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        /* Transpose input matrices */
+        LAPACKE_zge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
+        LAPACKE_zge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_ztrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
+                        c_t, &ldc_t, scale, swork, &ldswork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
+        /* Release memory and exit */
+        LAPACKE_free( c_t );
+exit_level_2:
+        LAPACKE_free( b_t );
+exit_level_1:
+        LAPACKE_free( a_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
+    }
+    return info;
+}
diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile
index 03d15c23c..49eb69cfe 100644
--- a/lapack-netlib/SRC/Makefile
+++ b/lapack-netlib/SRC/Makefile
@@ -207,7 +207,7 @@ SLASRC_O = \
    ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \
    ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \
    ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \
-   sgesvdq.o 
+   sgesvdq.o slarmm.o slatrs3.o strsyl3.o 
    
 endif
 
@@ -316,7 +316,7 @@ CLASRC_O = \
    chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \
    cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \
    chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \
-   cgesvdq.o
+   cgesvdq.o clatrs3.o ctrsyl3.o
 endif
 
 ifdef USEXBLAS
@@ -417,7 +417,7 @@ DLASRC_O = \
    dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \
    dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \
    dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \
-   dgesvdq.o 
+   dgesvdq.o dlarmm.o dlatrs3.o dtrsyl3.o
 endif
 
 ifdef USEXBLAS
@@ -526,7 +526,7 @@ ZLASRC_O = \
    zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \
    zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \
    zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \
-   zgesvdq.o
+   zgesvdq.o zlatrs3.o ztrsyl3.o
 endif
 
 ifdef USEXBLAS
diff --git a/lapack-netlib/SRC/clatrs3.c b/lapack-netlib/SRC/clatrs3.c
new file mode 100644
index 000000000..f6d76cf49
--- /dev/null
+++ b/lapack-netlib/SRC/clatrs3.c
@@ -0,0 +1,1282 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(float *x) {int e; (void)frexpf(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+
+/* Table of constant values */
+
+static complex c_b2 = {1.f,0.f};
+static integer c__1 = 1;
+static integer c_n1 = -1;
+
+/* > \brief \b CLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
+ */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA, */
+/*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          DIAG, NORMIN, TRANS, UPLO */
+/*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS */
+/*       REAL               CNORM( * ), SCALE( * ), WORK( * ) */
+/*       COMPLEX            A( LDA, * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > CLATRS3 solves one of the triangular systems */
+/* > */
+/* >    A * X = B * diag(scale),  A**T * X = B * diag(scale), or */
+/* >    A**H * X = B * diag(scale) */
+/* > */
+/* > with scaling to prevent overflow.  Here A is an upper or lower */
+/* > triangular matrix, A**T denotes the transpose of A, A**H denotes the */
+/* > conjugate transpose of A. X and B are n-by-nrhs matrices and scale */
+/* > is an nrhs-element vector of scaling factors. A scaling factor scale(j) */
+/* > is usually less than or equal to 1, chosen such that X(:,j) is less */
+/* > than the overflow threshold. If the matrix A is singular (A(j,j) = 0 */
+/* > for some j), then a non-trivial solution to A*X = 0 is returned. If */
+/* > the system is so badly scaled that the solution cannot be represented */
+/* > as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned. */
+/* > */
+/* > This is a BLAS-3 version of LATRS for solving several right */
+/* > hand sides simultaneously. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] UPLO */
+/* > \verbatim */
+/* >          UPLO is CHARACTER*1 */
+/* >          Specifies whether the matrix A is upper or lower triangular. */
+/* >          = 'U':  Upper triangular */
+/* >          = 'L':  Lower triangular */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the operation applied to A. */
+/* >          = 'N':  Solve A * x = s*b  (No transpose) */
+/* >          = 'T':  Solve A**T* x = s*b  (Transpose) */
+/* >          = 'C':  Solve A**T* x = s*b  (Conjugate transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DIAG */
+/* > \verbatim */
+/* >          DIAG is CHARACTER*1 */
+/* >          Specifies whether or not the matrix A is unit triangular. */
+/* >          = 'N':  Non-unit triangular */
+/* >          = 'U':  Unit triangular */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NORMIN */
+/* > \verbatim */
+/* >          NORMIN is CHARACTER*1 */
+/* >          Specifies whether CNORM has been set or not. */
+/* >          = 'Y':  CNORM contains the column norms on entry */
+/* >          = 'N':  CNORM is not set on entry.  On exit, the norms will */
+/* >                  be computed and stored in CNORM. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of columns of X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is COMPLEX array, dimension (LDA,N) */
+/* >          The triangular matrix A.  If UPLO = 'U', the leading n by n */
+/* >          upper triangular part of the array A contains the upper */
+/* >          triangular matrix, and the strictly lower triangular part of */
+/* >          A is not referenced.  If UPLO = 'L', the leading n by n lower */
+/* >          triangular part of the array A contains the lower triangular */
+/* >          matrix, and the strictly upper triangular part of A is not */
+/* >          referenced.  If DIAG = 'U', the diagonal elements of A are */
+/* >          also not referenced and are assumed to be 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax (1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is COMPLEX array, dimension (LDX,NRHS) */
+/* >          On entry, the right hand side B of the triangular system. */
+/* >          On exit, X is overwritten by the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax (1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is REAL array, dimension (NRHS) */
+/* >          The scaling factor s(k) is for the triangular system */
+/* >          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k). */
+/* >          If SCALE = 0, the matrix A is singular or badly scaled. */
+/* >          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k) */
+/* >          that is an exact or approximate solution to A*x(:,k) = 0 */
+/* >          is returned. If the system so badly scaled that solution */
+/* >          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0 */
+/* >          is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] CNORM */
+/* > \verbatim */
+/* >          CNORM is REAL array, dimension (N) */
+/* > */
+/* >          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
+/* >          contains the norm of the off-diagonal part of the j-th column */
+/* >          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
+/* >          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
+/* >          must be greater than or equal to the 1-norm. */
+/* > */
+/* >          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
+/* >          returns the 1-norm of the offdiagonal part of the j-th column */
+/* >          of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK). */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal size of */
+/* >          WORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* >          LWORK is INTEGER */
+/* >          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where */
+/* >          NBA = (N + NB - 1)/NB and NB is the optimal block size. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimensions of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup doubleOTHERauxiliary */
+/* > \par Further Details: */
+/*  ===================== */
+/*  \verbatim */
+/*  The algorithm follows the structure of a block triangular solve. */
+/*  The diagonal block is solved with a call to the robust the triangular */
+/*  solver LATRS for every right-hand side RHS = 1, ..., NRHS */
+/*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ), */
+/*  where op( A ) = A or op( A ) = A**T or op( A ) = A**H. */
+/*  The linear block updates operate on block columns of X, */
+/*     B( I, K ) - op(A( I, J )) * X( J, K ) */
+/*  and use GEMM. To avoid overflow in the linear block update, the worst case */
+/*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed */
+/*  such that */
+/*     || s * B( I, RHS )||_oo */
+/*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold */
+
+/*  Once all columns of a block column have been rescaled (BLAS-1), the linear */
+/*  update is executed with GEMM without overflow. */
+
+/*  To limit rescaling, local scale factors track the scaling of column segments. */
+/*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA */
+/*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor */
+/*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS ) */
+/*  I = 1, ..., NBA. */
+/*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ) */
+/*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The */
+/*  linear update of potentially inconsistently scaled vector segments */
+/*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) ) */
+/*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and, */
+/*  if necessary, rescales the blocks prior to calling GEMM. */
+
+/*  \endverbatim */
+/*  ===================================================================== */
+/*  References: */
+/*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019). */
+/*  Parallel robust solution of triangular linear systems. Concurrency */
+/*  and Computation: Practice and Experience, 31(19), e5064. */
+
+/*  Contributor: */
+/*   Angelika Schwarz, Umea University, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int clatrs3_(char *uplo, char *trans, char *diag, char *
+	normin, integer *n, integer *nrhs, complex *a, integer *lda, complex *
+	x, integer *ldx, real *scale, real *cnorm, real *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, 
+	    i__6, i__7, i__8;
+    real r__1, r__2;
+    complex q__1;
+
+    /* Local variables */
+    integer iinc, jinc;
+    real scal, anrm, bnrm;
+    integer awrk;
+    real tmax, xnrm[32];
+    integer i__, j, k;
+    real w[64];
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
+	    integer *, complex *, complex *, integer *, complex *, integer *, 
+	    complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    real rscal;
+    integer lanrm, ilast, jlast, i1;
+    logical upper;
+    integer i2, j1, j2, k1, k2, nb, ii, kk;
+    extern real clange_(char *, integer *, integer *, complex *, integer *, 
+	    real *);
+    integer lscale;
+    real scaloc;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
+	    *);
+    real scamin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int clatrs_(char *, char *, char *, char *, 
+	    integer *, complex *, integer *, complex *, real *, real *, 
+	    integer *);
+    extern real slarmm_(real *, real *, real *);
+    integer ifirst;
+    logical notran;
+    integer jfirst;
+    real smlnum;
+    logical nounit, lquery;
+    integer nba, lds, nbx, rhs;
+
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --scale;
+    --cnorm;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    notran = lsame_(trans, "N");
+    nounit = lsame_(diag, "N");
+    lquery = *lwork == -1;
+
+/*     Partition A and X into blocks. */
+
+/* Computing MAX */
+    i__1 = 8, i__2 = ilaenv_(&c__1, "CLATRS", "", n, n, &c_n1, &c_n1, (ftnlen)
+	    6, (ftnlen)0);
+    nb = f2cmax(i__1,i__2);
+    nb = f2cmin(64,nb);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*n + nb - 1) / nb;
+    nba = f2cmax(i__1,i__2);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*nrhs + 31) / 32;
+    nbx = f2cmax(i__1,i__2);
+
+/*     Compute the workspace */
+
+/*     The workspace comprises two parts. */
+/*     The first part stores the local scale factors. Each simultaneously */
+/*     computed right-hand side requires one local scale factor per block */
+/*     row. WORK( I + KK * LDS ) is the scale factor of the vector */
+/*     segment associated with the I-th block row and the KK-th vector */
+/*     in the block column. */
+/* Computing MAX */
+    i__1 = nba, i__2 = f2cmin(*nrhs,32);
+    lscale = nba * f2cmax(i__1,i__2);
+    lds = nba;
+/*     The second part stores upper bounds of the triangular A. There are */
+/*     a total of NBA x NBA blocks, of which only the upper triangular */
+/*     part or the lower triangular part is referenced. The upper bound of */
+/*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ). */
+    lanrm = nba * nba;
+    awrk = lscale;
+    work[1] = (real) (lscale + lanrm);
+
+/*     Test the input parameters. */
+
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -3;
+    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
+	     "N")) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*nrhs < 0) {
+	*info = -6;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -10;
+    } else if (! lquery && (real) (*lwork) < work[1]) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLATRS3", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize scaling factors */
+
+    i__1 = *nrhs;
+    for (kk = 1; kk <= i__1; ++kk) {
+	scale[kk] = 1.f;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*n,*nrhs) == 0) {
+	return 0;
+    }
+
+/*     Determine machine dependent constant to control overflow. */
+
+    bignum = slamch_("Overflow");
+    smlnum = slamch_("Safe Minimum");
+
+/*     Use unblocked code for small problems */
+
+    if (*nrhs < 2) {
+	clatrs_(uplo, trans, diag, normin, n, &a[a_offset], lda, &x[x_dim1 + 
+		1], &scale[1], &cnorm[1], info);
+	i__1 = *nrhs;
+	for (k = 2; k <= i__1; ++k) {
+	    clatrs_(uplo, trans, diag, "Y", n, &a[a_offset], lda, &x[k * 
+		    x_dim1 + 1], &scale[k], &cnorm[1], info);
+	}
+	return 0;
+    }
+
+/*     Compute norms of blocks of A excluding diagonal blocks and find */
+/*     the block with the largest norm TMAX. */
+
+    tmax = 0.f;
+    i__1 = nba;
+    for (j = 1; j <= i__1; ++j) {
+	j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = j * nb;
+	j2 = f2cmin(i__2,*n) + 1;
+	if (upper) {
+	    ifirst = 1;
+	    ilast = j - 1;
+	} else {
+	    ifirst = j + 1;
+	    ilast = nba;
+	}
+	i__2 = ilast;
+	for (i__ = ifirst; i__ <= i__2; ++i__) {
+	    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = i__ * nb;
+	    i2 = f2cmin(i__3,*n) + 1;
+
+/*           Compute upper bound of A( I1:I2-1, J1:J2-1 ). */
+
+	    if (notran) {
+		i__3 = i2 - i1;
+		i__4 = j2 - j1;
+		anrm = clange_("I", &i__3, &i__4, &a[i1 + j1 * a_dim1], lda, 
+			w);
+		work[awrk + i__ + (j - 1) * nba] = anrm;
+	    } else {
+		i__3 = i2 - i1;
+		i__4 = j2 - j1;
+		anrm = clange_("1", &i__3, &i__4, &a[i1 + j1 * a_dim1], lda, 
+			w);
+		work[awrk + j + (i__ - 1) * nba] = anrm;
+	    }
+	    tmax = f2cmax(tmax,anrm);
+	}
+    }
+
+    if (! (tmax <= slamch_("Overflow"))) {
+
+/*        Some matrix entries have huge absolute value. At least one upper */
+/*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point */
+/*        number, either due to overflow in LANGE or due to Inf in A. */
+/*        Fall back to LATRS. Set normin = 'N' for every right-hand side to */
+/*        force computation of TSCAL in LATRS to avoid the likely overflow */
+/*        in the computation of the column norms CNORM. */
+
+	i__1 = *nrhs;
+	for (k = 1; k <= i__1; ++k) {
+	    clatrs_(uplo, trans, diag, "N", n, &a[a_offset], lda, &x[k * 
+		    x_dim1 + 1], &scale[k], &cnorm[1], info);
+	}
+	return 0;
+    }
+
+/*     Every right-hand side requires workspace to store NBA local scale */
+/*     factors. To save workspace, X is computed successively in block columns */
+/*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient */
+/*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable. */
+    i__1 = nbx;
+    for (k = 1; k <= i__1; ++k) {
+/*        Loop over block columns (index = K) of X and, for column-wise scalings, */
+/*        over individual columns (index = KK). */
+/*        K1: column index of the first column in X( J, K ) */
+/*        K2: column index of the first column in X( J, K+1 ) */
+/*        so the K2 - K1 is the column count of the block X( J, K ) */
+	k1 = (k - 1 << 5) + 1;
+/* Computing MIN */
+	i__2 = k << 5;
+	k2 = f2cmin(i__2,*nrhs) + 1;
+
+/*        Initialize local scaling factors of current block column X( J, K ) */
+
+	i__2 = k2 - k1;
+	for (kk = 1; kk <= i__2; ++kk) {
+	    i__3 = nba;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		work[i__ + kk * lds] = 1.f;
+	    }
+	}
+
+	if (notran) {
+
+/*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1)) */
+
+	    if (upper) {
+		jfirst = nba;
+		jlast = 1;
+		jinc = -1;
+	    } else {
+		jfirst = 1;
+		jlast = nba;
+		jinc = 1;
+	    }
+	} else {
+
+/*           Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1)) */
+/*           where op(A) = A**T or op(A) = A**H */
+
+	    if (upper) {
+		jfirst = 1;
+		jlast = nba;
+		jinc = 1;
+	    } else {
+		jfirst = nba;
+		jlast = 1;
+		jinc = -1;
+	    }
+	}
+	i__2 = jlast;
+	i__3 = jinc;
+	for (j = jfirst; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) {
+/*           J1: row index of the first row in A( J, J ) */
+/*           J2: row index of the first row in A( J+1, J+1 ) */
+/*           so that J2 - J1 is the row count of the block A( J, J ) */
+	    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+	    i__4 = j * nb;
+	    j2 = f2cmin(i__4,*n) + 1;
+
+/*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS ) */
+
+	    i__4 = k2 - k1;
+	    for (kk = 1; kk <= i__4; ++kk) {
+		rhs = k1 + kk - 1;
+		if (kk == 1) {
+		    i__5 = j2 - j1;
+		    clatrs_(uplo, trans, diag, "N", &i__5, &a[j1 + j1 * 
+			    a_dim1], lda, &x[j1 + rhs * x_dim1], &scaloc, &
+			    cnorm[1], info);
+		} else {
+		    i__5 = j2 - j1;
+		    clatrs_(uplo, trans, diag, "Y", &i__5, &a[j1 + j1 * 
+			    a_dim1], lda, &x[j1 + rhs * x_dim1], &scaloc, &
+			    cnorm[1], info);
+		}
+/*              Find largest absolute value entry in the vector segment */
+/*              X( J1:J2-1, RHS ) as an upper bound for the worst case */
+/*              growth in the linear updates. */
+		i__5 = j2 - j1;
+		xnrm[kk - 1] = clange_("I", &i__5, &c__1, &x[j1 + rhs * 
+			x_dim1], ldx, w);
+
+		if (scaloc == 0.f) {
+/*                 LATRS found that A is singular through A(j,j) = 0. */
+/*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0 */
+/*                 and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is */
+/*                 set by LATRS. */
+		    scale[rhs] = 0.f;
+		    i__5 = j1 - 1;
+		    for (ii = 1; ii <= i__5; ++ii) {
+			i__6 = ii + kk * x_dim1;
+			x[i__6].r = 0.f, x[i__6].i = 0.f;
+		    }
+		    i__5 = *n;
+		    for (ii = j2; ii <= i__5; ++ii) {
+			i__6 = ii + kk * x_dim1;
+			x[i__6].r = 0.f, x[i__6].i = 0.f;
+		    }
+/*                 Discard the local scale factors. */
+		    i__5 = nba;
+		    for (ii = 1; ii <= i__5; ++ii) {
+			work[ii + kk * lds] = 1.f;
+		    }
+		    scaloc = 1.f;
+		} else if (scaloc * work[j + kk * lds] == 0.f) {
+/*                 LATRS computed a valid scale factor, but combined with */
+/*                 the current scaling the solution does not have a */
+/*                 scale factor > 0. */
+
+/*                 Set WORK( J+KK*LDS ) to smallest valid scale */
+/*                 factor and increase SCALOC accordingly. */
+		    scal = work[j + kk * lds] / smlnum;
+		    scaloc *= scal;
+		    work[j + kk * lds] = smlnum;
+/*                 If LATRS overestimated the growth, x may be */
+/*                 rescaled to preserve a valid combined scale */
+/*                 factor WORK( J, KK ) > 0. */
+		    rscal = 1.f / scaloc;
+		    if (xnrm[kk - 1] * rscal <= bignum) {
+			xnrm[kk - 1] *= rscal;
+			i__5 = j2 - j1;
+			csscal_(&i__5, &rscal, &x[j1 + rhs * x_dim1], &c__1);
+			scaloc = 1.f;
+		    } else {
+/*                    The system op(A) * x = b is badly scaled and its */
+/*                    solution cannot be represented as (1/scale) * x. */
+/*                    Set x to zero. This approach deviates from LATRS */
+/*                    where a completely meaningless non-zero vector */
+/*                    is returned that is not a solution to op(A) * x = b. */
+			scale[rhs] = 0.f;
+			i__5 = *n;
+			for (ii = 1; ii <= i__5; ++ii) {
+			    i__6 = ii + kk * x_dim1;
+			    x[i__6].r = 0.f, x[i__6].i = 0.f;
+			}
+/*                    Discard the local scale factors. */
+			i__5 = nba;
+			for (ii = 1; ii <= i__5; ++ii) {
+			    work[ii + kk * lds] = 1.f;
+			}
+			scaloc = 1.f;
+		    }
+		}
+		scaloc *= work[j + kk * lds];
+		work[j + kk * lds] = scaloc;
+	    }
+
+/*           Linear block updates */
+
+	    if (notran) {
+		if (upper) {
+		    ifirst = j - 1;
+		    ilast = 1;
+		    iinc = -1;
+		} else {
+		    ifirst = j + 1;
+		    ilast = nba;
+		    iinc = 1;
+		}
+	    } else {
+		if (upper) {
+		    ifirst = j + 1;
+		    ilast = nba;
+		    iinc = 1;
+		} else {
+		    ifirst = j - 1;
+		    ilast = 1;
+		    iinc = -1;
+		}
+	    }
+
+	    i__4 = ilast;
+	    i__5 = iinc;
+	    for (i__ = ifirst; i__5 < 0 ? i__ >= i__4 : i__ <= i__4; i__ += 
+		    i__5) {
+/*              I1: row index of the first column in X( I, K ) */
+/*              I2: row index of the first column in X( I+1, K ) */
+/*              so the I2 - I1 is the row count of the block X( I, K ) */
+		i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		i__6 = i__ * nb;
+		i2 = f2cmin(i__6,*n) + 1;
+
+/*              Prepare the linear update to be executed with GEMM. */
+/*              For each column, compute a consistent scaling, a */
+/*              scaling factor to survive the linear update, and */
+/*              rescale the column segments, if necesssary. Then */
+/*              the linear update is safely executed. */
+
+		i__6 = k2 - k1;
+		for (kk = 1; kk <= i__6; ++kk) {
+		    rhs = k1 + kk - 1;
+/*                 Compute consistent scaling */
+/* Computing MIN */
+		    r__1 = work[i__ + kk * lds], r__2 = work[j + kk * lds];
+		    scamin = f2cmin(r__1,r__2);
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__7 = i2 - i1;
+		    bnrm = clange_("I", &i__7, &c__1, &x[i1 + rhs * x_dim1], 
+			    ldx, w);
+		    bnrm *= scamin / work[i__ + kk * lds];
+		    xnrm[kk - 1] *= scamin / work[j + kk * lds];
+		    anrm = work[awrk + i__ + (j - 1) * nba];
+		    scaloc = slarmm_(&anrm, &xnrm[kk - 1], &bnrm);
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to X( I, KK ) and X( J, KK ). */
+
+		    scal = scamin / work[i__ + kk * lds] * scaloc;
+		    if (scal != 1.f) {
+			i__7 = i2 - i1;
+			csscal_(&i__7, &scal, &x[i1 + rhs * x_dim1], &c__1);
+			work[i__ + kk * lds] = scamin * scaloc;
+		    }
+
+		    scal = scamin / work[j + kk * lds] * scaloc;
+		    if (scal != 1.f) {
+			i__7 = j2 - j1;
+			csscal_(&i__7, &scal, &x[j1 + rhs * x_dim1], &c__1);
+			work[j + kk * lds] = scamin * scaloc;
+		    }
+		}
+
+		if (notran) {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    q__1.r = -1.f, q__1.i = 0.f;
+		    cgemm_("N", "N", &i__6, &i__7, &i__8, &q__1, &a[i1 + j1 * 
+			    a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b2, &
+			    x[i1 + k1 * x_dim1], ldx);
+		} else if (lsame_(trans, "T")) {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    q__1.r = -1.f, q__1.i = 0.f;
+		    cgemm_("T", "N", &i__6, &i__7, &i__8, &q__1, &a[j1 + i1 * 
+			    a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b2, &
+			    x[i1 + k1 * x_dim1], ldx);
+		} else {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    q__1.r = -1.f, q__1.i = 0.f;
+		    cgemm_("C", "N", &i__6, &i__7, &i__8, &q__1, &a[j1 + i1 * 
+			    a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b2, &
+			    x[i1 + k1 * x_dim1], ldx);
+		}
+	    }
+	}
+
+/*        Reduce local scaling factors */
+
+	i__3 = k2 - k1;
+	for (kk = 1; kk <= i__3; ++kk) {
+	    rhs = k1 + kk - 1;
+	    i__2 = nba;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MIN */
+		r__1 = scale[rhs], r__2 = work[i__ + kk * lds];
+		scale[rhs] = f2cmin(r__1,r__2);
+	    }
+	}
+
+/*        Realize consistent scaling */
+
+	i__3 = k2 - k1;
+	for (kk = 1; kk <= i__3; ++kk) {
+	    rhs = k1 + kk - 1;
+	    if (scale[rhs] != 1.f && scale[rhs] != 0.f) {
+		i__2 = nba;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__5 = i__ * nb;
+		    i2 = f2cmin(i__5,*n) + 1;
+		    scal = scale[rhs] / work[i__ + kk * lds];
+		    if (scal != 1.f) {
+			i__5 = i2 - i1;
+			csscal_(&i__5, &scal, &x[i1 + rhs * x_dim1], &c__1);
+		    }
+		}
+	    }
+	}
+    }
+    return 0;
+
+/*     End of CLATRS3 */
+
+} /* clatrs3_ */
+
diff --git a/lapack-netlib/SRC/clatrs3.f b/lapack-netlib/SRC/clatrs3.f
new file mode 100644
index 000000000..a902f1ed0
--- /dev/null
+++ b/lapack-netlib/SRC/clatrs3.f
@@ -0,0 +1,666 @@
+*> \brief \b CLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
+*
+*  Definition:
+*  ===========
+*
+*      SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
+*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIAG, NORMIN, TRANS, UPLO
+*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
+*       ..
+*       .. Array Arguments ..
+*       REAL               CNORM( * ), SCALE( * ), WORK( * )
+*       COMPLEX            A( LDA, * ), X( LDX, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> CLATRS3 solves one of the triangular systems
+*>
+*>    A * X = B * diag(scale),  A**T * X = B * diag(scale), or
+*>    A**H * X = B * diag(scale)
+*>
+*> with scaling to prevent overflow.  Here A is an upper or lower
+*> triangular matrix, A**T denotes the transpose of A, A**H denotes the
+*> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
+*> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
+*> is usually less than or equal to 1, chosen such that X(:,j) is less
+*> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
+*> for some j), then a non-trivial solution to A*X = 0 is returned. If
+*> the system is so badly scaled that the solution cannot be represented
+*> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
+*>
+*> This is a BLAS-3 version of LATRS for solving several right
+*> hand sides simultaneously.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>          Specifies whether the matrix A is upper or lower triangular.
+*>          = 'U':  Upper triangular
+*>          = 'L':  Lower triangular
+*> \endverbatim
+*>
+*> \param[in] TRANS
+*> \verbatim
+*>          TRANS is CHARACTER*1
+*>          Specifies the operation applied to A.
+*>          = 'N':  Solve A * x = s*b  (No transpose)
+*>          = 'T':  Solve A**T* x = s*b  (Transpose)
+*>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose)
+*> \endverbatim
+*>
+*> \param[in] DIAG
+*> \verbatim
+*>          DIAG is CHARACTER*1
+*>          Specifies whether or not the matrix A is unit triangular.
+*>          = 'N':  Non-unit triangular
+*>          = 'U':  Unit triangular
+*> \endverbatim
+*>
+*> \param[in] NORMIN
+*> \verbatim
+*>          NORMIN is CHARACTER*1
+*>          Specifies whether CNORM has been set or not.
+*>          = 'Y':  CNORM contains the column norms on entry
+*>          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*>                  be computed and stored in CNORM.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>          The number of columns of X.  NRHS >= 0.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is COMPLEX array, dimension (LDA,N)
+*>          The triangular matrix A.  If UPLO = 'U', the leading n by n
+*>          upper triangular part of the array A contains the upper
+*>          triangular matrix, and the strictly lower triangular part of
+*>          A is not referenced.  If UPLO = 'L', the leading n by n lower
+*>          triangular part of the array A contains the lower triangular
+*>          matrix, and the strictly upper triangular part of A is not
+*>          referenced.  If DIAG = 'U', the diagonal elements of A are
+*>          also not referenced and are assumed to be 1.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max (1,N).
+*> \endverbatim
+*>
+*> \param[in,out] X
+*> \verbatim
+*>          X is COMPLEX array, dimension (LDX,NRHS)
+*>          On entry, the right hand side B of the triangular system.
+*>          On exit, X is overwritten by the solution matrix X.
+*> \endverbatim
+*>
+*> \param[in] LDX
+*> \verbatim
+*>          LDX is INTEGER
+*>          The leading dimension of the array X.  LDX >= max (1,N).
+*> \endverbatim
+*>
+*> \param[out] SCALE
+*> \verbatim
+*>          SCALE is REAL array, dimension (NRHS)
+*>          The scaling factor s(k) is for the triangular system
+*>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
+*>          If SCALE = 0, the matrix A is singular or badly scaled.
+*>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
+*>          that is an exact or approximate solution to A*x(:,k) = 0
+*>          is returned. If the system so badly scaled that solution
+*>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
+*>          is returned.
+*> \endverbatim
+*>
+*> \param[in,out] CNORM
+*> \verbatim
+*>          CNORM is REAL array, dimension (N)
+*>
+*>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*>          contains the norm of the off-diagonal part of the j-th column
+*>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*>          must be greater than or equal to the 1-norm.
+*>
+*>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*>          returns the 1-norm of the offdiagonal part of the j-th column
+*>          of A.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is REAL array, dimension (LWORK).
+*>          On exit, if INFO = 0, WORK(1) returns the optimal size of
+*>          WORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*>          LWORK is INTEGER
+*>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
+*>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimensions of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -k, the k-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup doubleOTHERauxiliary
+*> \par Further Details:
+*  =====================
+*  \verbatim
+*  The algorithm follows the structure of a block triangular solve.
+*  The diagonal block is solved with a call to the robust the triangular
+*  solver LATRS for every right-hand side RHS = 1, ..., NRHS
+*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
+*  where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
+*  The linear block updates operate on block columns of X,
+*     B( I, K ) - op(A( I, J )) * X( J, K )
+*  and use GEMM. To avoid overflow in the linear block update, the worst case
+*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
+*  such that
+*     || s * B( I, RHS )||_oo
+*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
+*
+*  Once all columns of a block column have been rescaled (BLAS-1), the linear
+*  update is executed with GEMM without overflow.
+*
+*  To limit rescaling, local scale factors track the scaling of column segments.
+*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
+*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
+*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
+*  I = 1, ..., NBA.
+*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
+*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
+*  linear update of potentially inconsistently scaled vector segments
+*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
+*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
+*  if necessary, rescales the blocks prior to calling GEMM.
+*
+*  \endverbatim
+*  =====================================================================
+*  References:
+*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
+*  Parallel robust solution of triangular linear systems. Concurrency
+*  and Computation: Practice and Experience, 31(19), e5064.
+*
+*  Contributor:
+*   Angelika Schwarz, Umea University, Sweden.
+*
+*  =====================================================================
+      SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
+     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
+      IMPLICIT NONE
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, NORMIN, UPLO
+      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( LDX, * )
+      REAL               CNORM( * ), SCALE( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
+      PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
+      PARAMETER          ( NBMIN = 8, NBMAX = 64 )
+*     ..
+*     .. Local Arrays ..
+      REAL               W( NBMAX ), XNRM( NBRHS )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
+      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
+     $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
+     $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
+      REAL               ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
+     $                   SCAMIN, SMLNUM, TMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, CLANGE, SLARMM
+      EXTERNAL           ILAENV, LSAME, SLAMCH, CLANGE, SLARMM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLATRS, CSSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     Partition A and X into blocks.
+*
+      NB = MAX( NBMIN, ILAENV( 1, 'CLATRS', '', N, N, -1, -1 ) )
+      NB = MIN( NBMAX, NB )
+      NBA = MAX( 1, (N + NB - 1) / NB )
+      NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
+*
+*     Compute the workspace
+*
+*     The workspace comprises two parts.
+*     The first part stores the local scale factors. Each simultaneously
+*     computed right-hand side requires one local scale factor per block
+*     row. WORK( I + KK * LDS ) is the scale factor of the vector
+*     segment associated with the I-th block row and the KK-th vector
+*     in the block column.
+      LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
+      LDS = NBA
+*     The second part stores upper bounds of the triangular A. There are
+*     a total of NBA x NBA blocks, of which only the upper triangular
+*     part or the lower triangular part is referenced. The upper bound of
+*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
+      LANRM = NBA * NBA
+      AWRK = LSCALE
+      WORK( 1 ) = LSCALE + LANRM
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLATRS3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Initialize scaling factors
+*
+      DO KK = 1, NRHS
+         SCALE( KK ) = ONE
+      END DO
+*
+*     Quick return if possible
+*
+      IF( MIN( N, NRHS ).EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent constant to control overflow.
+*
+      BIGNUM = SLAMCH( 'Overflow' )
+      SMLNUM = SLAMCH( 'Safe Minimum' )
+*
+*     Use unblocked code for small problems
+*
+      IF( NRHS.LT.NRHSMIN ) THEN
+         CALL CLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1 ),
+     $                SCALE( 1 ), CNORM, INFO )
+         DO K = 2, NRHS
+            CALL CLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
+     $                   SCALE( K ), CNORM, INFO )
+         END DO
+         RETURN
+      END IF
+*
+*     Compute norms of blocks of A excluding diagonal blocks and find
+*     the block with the largest norm TMAX.
+*
+      TMAX = ZERO
+      DO J = 1, NBA
+         J1 = (J-1)*NB + 1
+         J2 = MIN( J*NB, N ) + 1
+         IF ( UPPER ) THEN
+            IFIRST = 1
+            ILAST = J - 1
+         ELSE
+            IFIRST = J + 1
+            ILAST = NBA
+         END IF
+         DO I = IFIRST, ILAST
+            I1 = (I-1)*NB + 1
+            I2 = MIN( I*NB, N ) + 1
+*
+*           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
+*
+            IF( NOTRAN ) THEN
+               ANRM = CLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
+               WORK( AWRK + I+(J-1)*NBA ) = ANRM
+            ELSE
+               ANRM = CLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
+               WORK( AWRK + J+(I-1)*NBA ) = ANRM
+            END IF
+            TMAX = MAX( TMAX, ANRM )
+         END DO
+      END DO
+*
+      IF( .NOT. TMAX.LE.SLAMCH('Overflow') ) THEN
+*
+*        Some matrix entries have huge absolute value. At least one upper
+*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
+*        number, either due to overflow in LANGE or due to Inf in A.
+*        Fall back to LATRS. Set normin = 'N' for every right-hand side to
+*        force computation of TSCAL in LATRS to avoid the likely overflow
+*        in the computation of the column norms CNORM.
+*
+         DO K = 1, NRHS
+            CALL CLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
+     $                   SCALE( K ), CNORM, INFO )
+         END DO
+         RETURN
+      END IF
+*
+*     Every right-hand side requires workspace to store NBA local scale
+*     factors. To save workspace, X is computed successively in block columns
+*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
+*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
+      DO K = 1, NBX
+*        Loop over block columns (index = K) of X and, for column-wise scalings,
+*        over individual columns (index = KK).
+*        K1: column index of the first column in X( J, K )
+*        K2: column index of the first column in X( J, K+1 )
+*        so the K2 - K1 is the column count of the block X( J, K )
+         K1 = (K-1)*NBRHS + 1
+         K2 = MIN( K*NBRHS, NRHS ) + 1
+*
+*        Initialize local scaling factors of current block column X( J, K )
+*
+         DO KK = 1, K2-K1
+            DO I = 1, NBA
+               WORK( I+KK*LDS ) = ONE
+            END DO
+         END DO
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
+*
+            IF( UPPER ) THEN
+               JFIRST = NBA
+               JLAST = 1
+               JINC = -1
+            ELSE
+               JFIRST = 1
+               JLAST = NBA
+               JINC = 1
+            END IF
+         ELSE
+*
+*           Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
+*           where op(A) = A**T or op(A) = A**H
+*
+            IF( UPPER ) THEN
+               JFIRST = 1
+               JLAST = NBA
+               JINC = 1
+            ELSE
+               JFIRST = NBA
+               JLAST = 1
+               JINC = -1
+            END IF
+         END IF
+
+         DO J = JFIRST, JLAST, JINC
+*           J1: row index of the first row in A( J, J )
+*           J2: row index of the first row in A( J+1, J+1 )
+*           so that J2 - J1 is the row count of the block A( J, J )
+            J1 = (J-1)*NB + 1
+            J2 = MIN( J*NB, N ) + 1
+*
+*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
+*
+            DO KK = 1, K2-K1
+               RHS = K1 + KK - 1
+               IF( KK.EQ.1 ) THEN
+                  CALL CLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
+     $                         A( J1, J1 ), LDA, X( J1, RHS ),
+     $                         SCALOC, CNORM, INFO )
+               ELSE
+                  CALL CLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
+     $                         A( J1, J1 ), LDA, X( J1, RHS ),
+     $                         SCALOC, CNORM, INFO )
+               END IF
+*              Find largest absolute value entry in the vector segment
+*              X( J1:J2-1, RHS ) as an upper bound for the worst case
+*              growth in the linear updates.
+               XNRM( KK ) = CLANGE( 'I', J2-J1, 1, X( J1, RHS ),
+     $                              LDX, W )
+*
+               IF( SCALOC .EQ. ZERO ) THEN
+*                 LATRS found that A is singular through A(j,j) = 0.
+*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
+*                 and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
+*                 set by LATRS.
+                  SCALE( RHS ) = ZERO
+                  DO II = 1, J1-1
+                     X( II, KK ) = CZERO
+                  END DO
+                  DO II = J2, N
+                     X( II, KK ) = CZERO
+                  END DO
+*                 Discard the local scale factors.
+                  DO II = 1, NBA
+                     WORK( II+KK*LDS ) = ONE
+                  END DO
+                  SCALOC = ONE
+               ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
+*                 LATRS computed a valid scale factor, but combined with
+*                 the current scaling the solution does not have a
+*                 scale factor > 0.
+*
+*                 Set WORK( J+KK*LDS ) to smallest valid scale
+*                 factor and increase SCALOC accordingly.
+                  SCAL = WORK( J+KK*LDS ) / SMLNUM
+                  SCALOC = SCALOC * SCAL
+                  WORK( J+KK*LDS ) = SMLNUM
+*                 If LATRS overestimated the growth, x may be
+*                 rescaled to preserve a valid combined scale
+*                 factor WORK( J, KK ) > 0.
+                  RSCAL = ONE / SCALOC
+                  IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
+                     XNRM( KK ) = XNRM( KK ) * RSCAL
+                     CALL CSSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
+                     SCALOC = ONE
+                  ELSE
+*                    The system op(A) * x = b is badly scaled and its
+*                    solution cannot be represented as (1/scale) * x.
+*                    Set x to zero. This approach deviates from LATRS
+*                    where a completely meaningless non-zero vector
+*                    is returned that is not a solution to op(A) * x = b.
+                     SCALE( RHS ) = ZERO
+                     DO II = 1, N
+                        X( II, KK ) = CZERO
+                     END DO
+*                    Discard the local scale factors.
+                     DO II = 1, NBA
+                        WORK( II+KK*LDS ) = ONE
+                     END DO
+                     SCALOC = ONE
+                  END IF
+               END IF
+               SCALOC = SCALOC * WORK( J+KK*LDS )
+               WORK( J+KK*LDS ) = SCALOC
+            END DO
+*
+*           Linear block updates
+*
+            IF( NOTRAN ) THEN
+               IF( UPPER ) THEN
+                  IFIRST = J - 1
+                  ILAST = 1
+                  IINC = -1
+               ELSE
+                  IFIRST = J + 1
+                  ILAST = NBA
+                  IINC = 1
+               END IF
+            ELSE
+               IF( UPPER ) THEN
+                  IFIRST = J + 1
+                  ILAST = NBA
+                  IINC = 1
+               ELSE
+                  IFIRST = J - 1
+                  ILAST = 1
+                  IINC = -1
+               END IF
+            END IF
+*
+            DO I = IFIRST, ILAST, IINC
+*              I1: row index of the first column in X( I, K )
+*              I2: row index of the first column in X( I+1, K )
+*              so the I2 - I1 is the row count of the block X( I, K )
+               I1 = (I-1)*NB + 1
+               I2 = MIN( I*NB, N ) + 1
+*
+*              Prepare the linear update to be executed with GEMM.
+*              For each column, compute a consistent scaling, a
+*              scaling factor to survive the linear update, and
+*              rescale the column segments, if necesssary. Then
+*              the linear update is safely executed.
+*
+               DO KK = 1, K2-K1
+                  RHS = K1 + KK - 1
+*                 Compute consistent scaling
+                  SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  BNRM = CLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
+                  BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
+                  XNRM( KK ) = XNRM( KK )*( SCAMIN / WORK( J+KK*LDS) )
+                  ANRM = WORK( AWRK + I+(J-1)*NBA )
+                  SCALOC = SLARMM( ANRM, XNRM( KK ), BNRM )
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to X( I, KK ) and X( J, KK ).
+*
+                  SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                     CALL CSSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
+                     WORK( I+KK*LDS ) = SCAMIN*SCALOC
+                  END IF
+*
+                  SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                     CALL CSSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
+                     WORK( J+KK*LDS ) = SCAMIN*SCALOC
+                  END IF
+               END DO
+*
+               IF( NOTRAN ) THEN
+*
+*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
+*
+                  CALL CGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -CONE,
+     $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
+     $                        CONE, X( I1, K1 ), LDX )
+               ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
+*
+                  CALL CGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -CONE,
+     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
+     $                        CONE, X( I1, K1 ), LDX )
+               ELSE
+*
+*                 B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
+*
+                  CALL CGEMM( 'C', 'N', I2-I1, K2-K1, J2-J1, -CONE,
+     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
+     $                        CONE, X( I1, K1 ), LDX )
+               END IF
+            END DO
+         END DO
+*
+*        Reduce local scaling factors
+*
+         DO KK = 1, K2-K1
+            RHS = K1 + KK - 1
+            DO I = 1, NBA
+               SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
+            END DO
+         END DO
+*
+*        Realize consistent scaling
+*
+         DO KK = 1, K2-K1
+            RHS = K1 + KK - 1
+            IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
+               DO I = 1, NBA
+                  I1 = (I-1)*NB + 1
+                  I2 = MIN( I*NB, N ) + 1
+                  SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
+                  IF( SCAL.NE.ONE )
+     $               CALL CSSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
+               END DO
+            END IF
+         END DO
+      END DO
+      RETURN
+*
+*     End of CLATRS3
+*
+      END
diff --git a/lapack-netlib/SRC/ctrsyl3.c b/lapack-netlib/SRC/ctrsyl3.c
new file mode 100644
index 000000000..3c119157c
--- /dev/null
+++ b/lapack-netlib/SRC/ctrsyl3.c
@@ -0,0 +1,2022 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(float *x) {int e; (void)frexpf(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static complex c_b1 = {1.f,0.f};
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b18 = 2.f;
+static real c_b106 = 1.f;
+
+/* > \brief \b CTRSYL3 */
+
+/* Definition: */
+/* =========== */
+
+
+/* >  \par Purpose */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  CTRSYL3 solves the complex Sylvester matrix equation: */
+/* > */
+/* >     op(A)*X + X*op(B) = scale*C or */
+/* >     op(A)*X - X*op(B) = scale*C, */
+/* > */
+/* >  where op(A) = A or A**H, and  A and B are both upper triangular. A is */
+/* >  M-by-M and B is N-by-N; the right hand side C and the solution X are */
+/* >  M-by-N; and scale is an output scale factor, set <= 1 to avoid */
+/* >  overflow in X. */
+/* > */
+/* >  This is the block version of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments */
+/*  ========= */
+
+/* > \param[in] TRANA */
+/* > \verbatim */
+/* >          TRANA is CHARACTER*1 */
+/* >          Specifies the option op(A): */
+/* >          = 'N': op(A) = A    (No transpose) */
+/* >          = 'C': op(A) = A**H (Conjugate transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANB */
+/* > \verbatim */
+/* >          TRANB is CHARACTER*1 */
+/* >          Specifies the option op(B): */
+/* >          = 'N': op(B) = B    (No transpose) */
+/* >          = 'C': op(B) = B**H (Conjugate transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ISGN */
+/* > \verbatim */
+/* >          ISGN is INTEGER */
+/* >          Specifies the sign in the equation: */
+/* >          = +1: solve op(A)*X + X*op(B) = scale*C */
+/* >          = -1: solve op(A)*X - X*op(B) = scale*C */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The order of the matrix A, and the number of rows in the */
+/* >          matrices X and C. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix B, and the number of columns in the */
+/* >          matrices X and C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is COMPLEX array, dimension (LDA,M) */
+/* >          The upper triangular matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is COMPLEX array, dimension (LDB,N) */
+/* >          The upper triangular matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is COMPLEX array, dimension (LDC,N) */
+/* >          On entry, the M-by-N right hand side matrix C. */
+/* >          On exit, C is overwritten by the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is REAL */
+/* >          The scale factor, scale, set <= 1 to avoid overflow in X. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SWORK */
+/* > \verbatim */
+/* >          SWORK is REAL array, dimension (MAX(2, ROWS), MAX(1,COLS)). */
+/* >          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS */
+/* >          and SWORK(2) returns the optimal COLS. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDSWORK */
+/* > \verbatim */
+/* >          LDSWORK is INTEGER */
+/* >          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1) */
+/* >          and NB is the optimal block size. */
+/* > */
+/* >          If LDSWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimensions of the SWORK matrix, */
+/* >          returns these values as the first and second entry of the SWORK */
+/* >          matrix, and no error message related LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* >          = 1: A and B have common or very close eigenvalues; perturbed */
+/* >               values were used to solve the equation (but the matrices */
+/* >               A and B are unchanged). */
+/* > \endverbatim */
+
+/* > \ingroup complexSYcomputational */
+
+/*  ===================================================================== */
+/*  References: */
+/*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of */
+/*   algorithms: The triangular Sylvester equation, ACM Transactions */
+/*   on Mathematical Software (TOMS), volume 29, pages 218--243. */
+
+/*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel */
+/*   Solution of the Triangular Sylvester Equation. Lecture Notes in */
+/*   Computer Science, vol 12043, pages 82--92, Springer. */
+
+/*  Contributor: */
+/*   Angelika Schwarz, Umea University, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int ctrsyl3_(char *trana, char *tranb, integer *isgn, 
+	integer *m, integer *n, complex *a, integer *lda, complex *b, integer 
+	*ldb, complex *c__, integer *ldc, real *scale, real *swork, integer *
+	ldswork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, swork_dim1, 
+	    swork_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    real r__1, r__2, r__3, r__4;
+    complex q__1;
+
+    /* Local variables */
+    real scal;
+    complex csgn;
+    real anrm, bnrm, cnrm;
+    integer awrk, bwrk;
+    real *wnrm, xnrm;
+    integer i__, j, k, l;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
+	    integer *, complex *, complex *, integer *, complex *, integer *, 
+	    complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    integer iinfo, i1, i2, j1, j2, k1, k2, l1, l2;
+//    extern integer myexp_(real *);
+    integer nb, jj, ll;
+    extern real clange_(char *, integer *, integer *, complex *, integer *, 
+	    real *);
+    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, complex *, integer *, integer *);
+    real scaloc;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
+	    *);
+    real scamin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern real slarmm_(real *, real *, real *);
+    logical notrna, notrnb;
+    real smlnum;
+    extern /* Subroutine */ int ctrsyl_(char *, char *, integer *, integer *, 
+	    integer *, complex *, integer *, complex *, integer *, complex *, 
+	    integer *, real *, integer *);
+    logical lquery;
+    integer nba, nbb;
+    real buf, sgn;
+
+
+
+/*     Decode and Test input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    swork_dim1 = *ldswork;
+    swork_offset = 1 + swork_dim1 * 1;
+    swork -= swork_offset;
+
+    /* Function Body */
+    notrna = lsame_(trana, "N");
+    notrnb = lsame_(tranb, "N");
+
+/*     Use the same block size for all matrices. */
+
+/* Computing MAX */
+    i__1 = 8, i__2 = ilaenv_(&c__1, "CTRSYL", "", m, n, &c_n1, &c_n1, (ftnlen)
+	    6, (ftnlen)0);
+    nb = f2cmax(i__1,i__2);
+
+/*     Compute number of blocks in A and B */
+
+/* Computing MAX */
+    i__1 = 1, i__2 = (*m + nb - 1) / nb;
+    nba = f2cmax(i__1,i__2);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*n + nb - 1) / nb;
+    nbb = f2cmax(i__1,i__2);
+
+/*     Compute workspace */
+
+    *info = 0;
+    lquery = *ldswork == -1;
+    if (lquery) {
+	*ldswork = 2;
+	swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb);
+	swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba);
+    }
+
+/*     Test the input arguments */
+
+    if (! notrna && ! lsame_(trana, "C")) {
+	*info = -1;
+    } else if (! notrnb && ! lsame_(tranb, "C")) {
+	*info = -2;
+    } else if (*isgn != 1 && *isgn != -1) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -11;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CTRSYL3", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    *scale = 1.f;
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    wnrm = (real*)malloc(f2cmax(*m,*n)*sizeof(real));
+/*     Use unblocked code for small problems or if insufficient */
+/*     workspace is provided */
+
+    if (f2cmin(nba,nbb) == 1 || *ldswork < f2cmax(nba,nbb)) {
+	ctrsyl_(trana, tranb, isgn, m, n, &a[a_offset], lda, &b[b_offset], 
+		ldb, &c__[c_offset], ldc, scale, info);
+	return 0;
+    }
+
+/*     Set constants to control overflow */
+
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+
+/*     Set local scaling factors. */
+
+    i__1 = nbb;
+    for (l = 1; l <= i__1; ++l) {
+	i__2 = nba;
+	for (k = 1; k <= i__2; ++k) {
+	    swork[k + l * swork_dim1] = 1.f;
+	}
+    }
+
+/*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero. */
+/*     This scaling is to ensure compatibility with TRSYL and may get flushed. */
+
+    buf = 1.f;
+
+/*      Compute upper bounds of blocks of A and B */
+
+    awrk = nbb;
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = k * nb;
+	k2 = f2cmin(i__2,*m) + 1;
+	i__2 = nba;
+	for (l = k; l <= i__2; ++l) {
+	    l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = l * nb;
+	    l2 = f2cmin(i__3,*m) + 1;
+	    if (notrna) {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[k + (awrk + l) * swork_dim1] = clange_("I", &i__3, &
+			i__4, &a[k1 + l1 * a_dim1], lda, wnrm);
+	    } else {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[l + (awrk + k) * swork_dim1] = clange_("1", &i__3, &
+			i__4, &a[k1 + l1 * a_dim1], lda, wnrm);
+	    }
+	}
+    }
+    bwrk = nbb + nba;
+    i__1 = nbb;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = k * nb;
+	k2 = f2cmin(i__2,*n) + 1;
+	i__2 = nbb;
+	for (l = k; l <= i__2; ++l) {
+	    l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = l * nb;
+	    l2 = f2cmin(i__3,*n) + 1;
+	    if (notrnb) {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[k + (bwrk + l) * swork_dim1] = clange_("I", &i__3, &
+			i__4, &b[k1 + l1 * b_dim1], ldb, wnrm);
+	    } else {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[l + (bwrk + k) * swork_dim1] = clange_("1", &i__3, &
+			i__4, &b[k1 + l1 * b_dim1], ldb, wnrm);
+	    }
+	}
+    }
+
+    sgn = (real) (*isgn);
+    q__1.r = sgn, q__1.i = 0.f;
+    csgn.r = q__1.r, csgn.i = q__1.i;
+
+    if (notrna && notrnb) {
+
+/*        Solve    A*X + ISGN*X*B = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        bottom-left corner column by column by */
+
+/*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                  M                         L-1 */
+/*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */
+/*                I=K+1                       J=1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	for (k = nba; k >= 1; --k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	    i__1 = k * nb;
+	    k2 = f2cmin(i__1,*m) + 1;
+	    i__1 = nbb;
+	    for (l = 1; l <= i__1; ++l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+		i__2 = l * nb;
+		l2 = f2cmin(i__2,*n) + 1;
+
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		ctrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.f) {
+		    if (scaloc == 0.f) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.f;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__2);
+		    }
+		    i__2 = nbb;
+		    for (jj = 1; jj <= i__2; ++jj) {
+			i__3 = nba;
+			for (ll = 1; ll <= i__3; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__4 = myexp_(&scaloc);
+			    r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] 
+				    / pow_ri(&c_b18, &i__4);
+			    swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		xnrm = clange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		for (i__ = k - 1; i__ >= 1; --i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */
+
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__2 = i__ * nb;
+		    i2 = f2cmin(i__2,*m) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    cnrm = clange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = slarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b18, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b18, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b18, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (jj = l1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    csscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = i2 - i1;
+			    csscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    i__4 = k2 - k1;
+		    q__1.r = -1.f, q__1.i = 0.f;
+		    cgemm_("N", "N", &i__2, &i__3, &i__4, &q__1, &a[i1 + k1 * 
+			    a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, 
+			    &c__[i1 + l1 * c_dim1], ldc)
+			    ;
+
+		}
+
+		i__2 = nbb;
+		for (j = l + 1; j <= i__2; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */
+
+		    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+		    i__3 = j * nb;
+		    j2 = f2cmin(i__3,*n) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    cnrm = clange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = slarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b18, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b18, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b18, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    csscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = j2 - 1;
+			for (jj = j1; jj <= i__3; ++jj) {
+			    i__4 = k2 - k1;
+			    csscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    i__5 = l2 - l1;
+		    q__1.r = -csgn.r, q__1.i = -csgn.i;
+		    cgemm_("N", "N", &i__3, &i__4, &i__5, &q__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b1, 
+			    &c__[k1 + j1 * c_dim1], ldc)
+			    ;
+		}
+	    }
+	}
+    } else if (! notrna && notrnb) {
+
+/*        Solve    A**H *X + ISGN*X*B = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        upper-left corner column by column by */
+
+/*          A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                   K-1                        L-1 */
+/*          R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */
+/*                   I=1                        J=1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	i__1 = nba;
+	for (k = 1; k <= i__1; ++k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	    i__2 = k * nb;
+	    k2 = f2cmin(i__2,*m) + 1;
+	    i__2 = nbb;
+	    for (l = 1; l <= i__2; ++l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+		i__3 = l * nb;
+		l2 = f2cmin(i__3,*n) + 1;
+
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		ctrsyl_(trana, tranb, isgn, &i__3, &i__4, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.f) {
+		    if (scaloc == 0.f) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.f;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__3);
+		    }
+		    i__3 = nbb;
+		    for (jj = 1; jj <= i__3; ++jj) {
+			i__4 = nba;
+			for (ll = 1; ll <= i__4; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__5 = myexp_(&scaloc);
+			    r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] 
+				    / pow_ri(&c_b18, &i__5);
+			    swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		xnrm = clange_("I", &i__3, &i__4, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__3 = nba;
+		for (i__ = k + 1; i__ <= i__3; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L ) */
+
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__4 = i__ * nb;
+		    i2 = f2cmin(i__4,*m) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__4 = i2 - i1;
+		    i__5 = l2 - l1;
+		    cnrm = clange_("I", &i__4, &i__5, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = slarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__4 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__4);
+			i__4 = nbb;
+			for (jj = 1; jj <= i__4; ++jj) {
+			    i__5 = nba;
+			    for (ll = 1; ll <= i__5; ++ll) {
+/* Computing MIN */
+				i__6 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b18, &i__6);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__4 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b18, &i__4);
+			i__4 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b18, &i__4);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to to C( I, L ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = k2 - k1;
+			    csscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = i2 - i1;
+			    csscal_(&i__5, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__4 = i2 - i1;
+		    i__5 = l2 - l1;
+		    i__6 = k2 - k1;
+		    q__1.r = -1.f, q__1.i = 0.f;
+		    cgemm_("C", "N", &i__4, &i__5, &i__6, &q__1, &a[k1 + i1 * 
+			    a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, 
+			    &c__[i1 + l1 * c_dim1], ldc)
+			    ;
+		}
+
+		i__3 = nbb;
+		for (j = l + 1; j <= i__3; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */
+
+		    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+		    i__4 = j * nb;
+		    j2 = f2cmin(i__4,*n) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__4 = k2 - k1;
+		    i__5 = j2 - j1;
+		    cnrm = clange_("I", &i__4, &i__5, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = slarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__4 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__4);
+			i__4 = nbb;
+			for (jj = 1; jj <= i__4; ++jj) {
+			    i__5 = nba;
+			    for (ll = 1; ll <= i__5; ++ll) {
+/* Computing MIN */
+				i__6 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b18, &i__6);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__4 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b18, &i__4);
+			i__4 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b18, &i__4);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = k2 - k1;
+			    csscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__4 = j2 - 1;
+			for (jj = j1; jj <= i__4; ++jj) {
+			    i__5 = k2 - k1;
+			    csscal_(&i__5, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__4 = k2 - k1;
+		    i__5 = j2 - j1;
+		    i__6 = l2 - l1;
+		    q__1.r = -csgn.r, q__1.i = -csgn.i;
+		    cgemm_("N", "N", &i__4, &i__5, &i__6, &q__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b1, 
+			    &c__[k1 + j1 * c_dim1], ldc)
+			    ;
+		}
+	    }
+	}
+    } else if (! notrna && ! notrnb) {
+
+/*        Solve    A**H *X + ISGN*X*B**H = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        top-right corner column by column by */
+
+/*           A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                     K-1                          N */
+/*            R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H]. */
+/*                     I=1                        J=L+1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	i__1 = nba;
+	for (k = 1; k <= i__1; ++k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	    i__2 = k * nb;
+	    k2 = f2cmin(i__2,*m) + 1;
+	    for (l = nbb; l >= 1; --l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+		i__2 = l * nb;
+		l2 = f2cmin(i__2,*n) + 1;
+
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		ctrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.f) {
+		    if (scaloc == 0.f) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.f;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__2);
+		    }
+		    i__2 = nbb;
+		    for (jj = 1; jj <= i__2; ++jj) {
+			i__3 = nba;
+			for (ll = 1; ll <= i__3; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__4 = myexp_(&scaloc);
+			    r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] 
+				    / pow_ri(&c_b18, &i__4);
+			    swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		xnrm = clange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__2 = nba;
+		for (i__ = k + 1; i__ <= i__2; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L ) */
+
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__3 = i__ * nb;
+		    i2 = f2cmin(i__3,*m) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = i2 - i1;
+		    i__4 = l2 - l1;
+		    cnrm = clange_("I", &i__3, &i__4, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = slarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b18, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b18, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b18, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    csscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = i2 - i1;
+			    csscal_(&i__4, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__3 = i2 - i1;
+		    i__4 = l2 - l1;
+		    i__5 = k2 - k1;
+		    q__1.r = -1.f, q__1.i = 0.f;
+		    cgemm_("C", "N", &i__3, &i__4, &i__5, &q__1, &a[k1 + i1 * 
+			    a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, 
+			    &c__[i1 + l1 * c_dim1], ldc)
+			    ;
+		}
+
+		i__2 = l - 1;
+		for (j = 1; j <= i__2; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H */
+
+		    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+		    i__3 = j * nb;
+		    j2 = f2cmin(i__3,*n) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    cnrm = clange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = slarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b18, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b18, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b18, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    csscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = j2 - 1;
+			for (jj = j1; jj <= i__3; ++jj) {
+			    i__4 = k2 - k1;
+			    csscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    i__5 = l2 - l1;
+		    q__1.r = -csgn.r, q__1.i = -csgn.i;
+		    cgemm_("N", "C", &i__3, &i__4, &i__5, &q__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b1, 
+			    &c__[k1 + j1 * c_dim1], ldc)
+			    ;
+		}
+	    }
+	}
+    } else if (notrna && ! notrnb) {
+
+/*        Solve    A*X + ISGN*X*B**H = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        bottom-right corner column by column by */
+
+/*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                      M                          N */
+/*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H]. */
+/*                    I=K+1                      J=L+1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	for (k = nba; k >= 1; --k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	    i__1 = k * nb;
+	    k2 = f2cmin(i__1,*m) + 1;
+	    for (l = nbb; l >= 1; --l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+		i__1 = l * nb;
+		l2 = f2cmin(i__1,*n) + 1;
+
+		i__1 = k2 - k1;
+		i__2 = l2 - l1;
+		ctrsyl_(trana, tranb, isgn, &i__1, &i__2, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.f) {
+		    if (scaloc == 0.f) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.f;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__1 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__1);
+		    }
+		    i__1 = nbb;
+		    for (jj = 1; jj <= i__1; ++jj) {
+			i__2 = nba;
+			for (ll = 1; ll <= i__2; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__3 = myexp_(&scaloc);
+			    r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] 
+				    / pow_ri(&c_b18, &i__3);
+			    swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__1 = k2 - k1;
+		i__2 = l2 - l1;
+		xnrm = clange_("I", &i__1, &i__2, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__1 = k - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */
+
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__2 = i__ * nb;
+		    i2 = f2cmin(i__2,*m) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    cnrm = clange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = slarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b18, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b18, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b18, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = k2 - k1;
+			    csscal_(&i__3, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = i2 - i1;
+			    csscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    i__4 = k2 - k1;
+		    q__1.r = -1.f, q__1.i = 0.f;
+		    cgemm_("N", "N", &i__2, &i__3, &i__4, &q__1, &a[i1 + k1 * 
+			    a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, 
+			    &c__[i1 + l1 * c_dim1], ldc)
+			    ;
+
+		}
+
+		i__1 = l - 1;
+		for (j = 1; j <= i__1; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H */
+
+		    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+		    i__2 = j * nb;
+		    j2 = f2cmin(i__2,*n) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = k2 - k1;
+		    i__3 = j2 - j1;
+		    cnrm = clange_("I", &i__2, &i__3, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = slarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b18, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b18, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b18, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b18, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (jj = l1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    csscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = j2 - 1;
+			for (jj = j1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    csscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__2 = k2 - k1;
+		    i__3 = j2 - j1;
+		    i__4 = l2 - l1;
+		    q__1.r = -csgn.r, q__1.i = -csgn.i;
+		    cgemm_("N", "C", &i__2, &i__3, &i__4, &q__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b1, 
+			    &c__[k1 + j1 * c_dim1], ldc)
+			    ;
+		}
+	    }
+	}
+
+    }
+
+    free(wnrm);
+
+/*     Reduce local scaling factors */
+
+    *scale = swork[swork_dim1 + 1];
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = nbb;
+	for (l = 1; l <= i__2; ++l) {
+/* Computing MIN */
+	    r__1 = *scale, r__2 = swork[k + l * swork_dim1];
+	    *scale = f2cmin(r__1,r__2);
+	}
+    }
+    if (*scale == 0.f) {
+
+/*        The magnitude of the largest entry of the solution is larger */
+/*        than the product of BIGNUM**2 and cannot be represented in the */
+/*        form (1/SCALE)*X if SCALE is REAL. Set SCALE to */
+/*        zero and give up. */
+
+	swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb);
+	swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba);
+	return 0;
+    }
+
+/*     Realize consistent scaling */
+
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = k * nb;
+	k2 = f2cmin(i__2,*m) + 1;
+	i__2 = nbb;
+	for (l = 1; l <= i__2; ++l) {
+	    l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = l * nb;
+	    l2 = f2cmin(i__3,*n) + 1;
+	    scal = *scale / swork[k + l * swork_dim1];
+	    if (scal != 1.f) {
+		i__3 = l2 - 1;
+		for (ll = l1; ll <= i__3; ++ll) {
+		    i__4 = k2 - k1;
+		    csscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &c__1);
+		}
+	    }
+	}
+    }
+
+    if (buf != 1.f && buf > 0.f) {
+
+/*        Decrease SCALE as much as possible. */
+
+/* Computing MIN */
+	r__1 = *scale / smlnum, r__2 = 1.f / buf;
+	scaloc = f2cmin(r__1,r__2);
+	buf *= scaloc;
+	*scale /= scaloc;
+    }
+
+    if (buf != 1.f && buf > 0.f) {
+
+/*        In case of overly aggressive scaling during the computation, */
+/*        flushing of the global scale factor may be prevented by */
+/*        undoing some of the scaling. This step is to ensure that */
+/*        this routine flushes only scale factors that TRSYL also */
+/*        flushes and be usable as a drop-in replacement. */
+
+/*        How much can the normwise largest entry be upscaled? */
+
+/* Computing MAX */
+	i__1 = c_dim1 + 1;
+	r__3 = (r__1 = c__[i__1].r, abs(r__1)), r__4 = (r__2 = r_imag(&c__[
+		c_dim1 + 1]), abs(r__2));
+	scal = f2cmax(r__3,r__4);
+	i__1 = *m;
+	for (k = 1; k <= i__1; ++k) {
+	    i__2 = *n;
+	    for (l = 1; l <= i__2; ++l) {
+/* Computing MAX */
+		i__3 = k + l * c_dim1;
+		r__3 = scal, r__4 = (r__1 = c__[i__3].r, abs(r__1)), r__3 = 
+			f2cmax(r__3,r__4), r__4 = (r__2 = r_imag(&c__[k + l * 
+			c_dim1]), abs(r__2));
+		scal = f2cmax(r__3,r__4);
+	    }
+	}
+
+/*        Increase BUF as close to 1 as possible and apply scaling. */
+
+/* Computing MIN */
+	r__1 = bignum / scal, r__2 = 1.f / buf;
+	scaloc = f2cmin(r__1,r__2);
+	buf *= scaloc;
+	clascl_("G", &c_n1, &c_n1, &c_b106, &scaloc, m, n, &c__[c_offset], 
+		ldc, &iinfo);
+    }
+
+/*     Combine with buffer scaling factor. SCALE will be flushed if */
+/*     BUF is less than one here. */
+
+    *scale *= buf;
+
+/*     Restore workspace dimensions */
+
+    swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb);
+    swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba);
+
+    return 0;
+
+/*     End of CTRSYL3 */
+
+} /* ctrsyl3_ */
+
diff --git a/lapack-netlib/SRC/ctrsyl3.f b/lapack-netlib/SRC/ctrsyl3.f
new file mode 100644
index 000000000..586dc0207
--- /dev/null
+++ b/lapack-netlib/SRC/ctrsyl3.f
@@ -0,0 +1,1142 @@
+*> \brief \b CTRSYL3
+*
+* Definition:
+* ===========
+*
+*
+*>  \par Purpose
+*  =============
+*>
+*> \verbatim
+*>
+*>  CTRSYL3 solves the complex Sylvester matrix equation:
+*>
+*>     op(A)*X + X*op(B) = scale*C or
+*>     op(A)*X - X*op(B) = scale*C,
+*>
+*>  where op(A) = A or A**H, and  A and B are both upper triangular. A is
+*>  M-by-M and B is N-by-N; the right hand side C and the solution X are
+*>  M-by-N; and scale is an output scale factor, set <= 1 to avoid
+*>  overflow in X.
+*>
+*>  This is the block version of the algorithm.
+*> \endverbatim
+*
+*  Arguments
+*  =========
+*
+*> \param[in] TRANA
+*> \verbatim
+*>          TRANA is CHARACTER*1
+*>          Specifies the option op(A):
+*>          = 'N': op(A) = A    (No transpose)
+*>          = 'C': op(A) = A**H (Conjugate transpose)
+*> \endverbatim
+*>
+*> \param[in] TRANB
+*> \verbatim
+*>          TRANB is CHARACTER*1
+*>          Specifies the option op(B):
+*>          = 'N': op(B) = B    (No transpose)
+*>          = 'C': op(B) = B**H (Conjugate transpose)
+*> \endverbatim
+*>
+*> \param[in] ISGN
+*> \verbatim
+*>          ISGN is INTEGER
+*>          Specifies the sign in the equation:
+*>          = +1: solve op(A)*X + X*op(B) = scale*C
+*>          = -1: solve op(A)*X - X*op(B) = scale*C
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The order of the matrix A, and the number of rows in the
+*>          matrices X and C. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix B, and the number of columns in the
+*>          matrices X and C. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is COMPLEX array, dimension (LDA,M)
+*>          The upper triangular matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*>          B is COMPLEX array, dimension (LDB,N)
+*>          The upper triangular matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,N).
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*>          C is COMPLEX array, dimension (LDC,N)
+*>          On entry, the M-by-N right hand side matrix C.
+*>          On exit, C is overwritten by the solution matrix X.
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*>          LDC is INTEGER
+*>          The leading dimension of the array C. LDC >= max(1,M)
+*> \endverbatim
+*>
+*> \param[out] SCALE
+*> \verbatim
+*>          SCALE is REAL
+*>          The scale factor, scale, set <= 1 to avoid overflow in X.
+*> \endverbatim
+*>
+*> \param[out] SWORK
+*> \verbatim
+*>          SWORK is REAL array, dimension (MAX(2, ROWS), MAX(1,COLS)).
+*>          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
+*>          and SWORK(2) returns the optimal COLS.
+*> \endverbatim
+*>
+*> \param[in] LDSWORK
+*> \verbatim
+*>          LDSWORK is INTEGER
+*>          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
+*>          and NB is the optimal block size.
+*>
+*>          If LDSWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimensions of the SWORK matrix,
+*>          returns these values as the first and second entry of the SWORK
+*>          matrix, and no error message related LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0: successful exit
+*>          < 0: if INFO = -i, the i-th argument had an illegal value
+*>          = 1: A and B have common or very close eigenvalues; perturbed
+*>               values were used to solve the equation (but the matrices
+*>               A and B are unchanged).
+*> \endverbatim
+*
+*> \ingroup complexSYcomputational
+*
+*  =====================================================================
+*  References:
+*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
+*   algorithms: The triangular Sylvester equation, ACM Transactions
+*   on Mathematical Software (TOMS), volume 29, pages 218--243.
+*
+*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
+*   Solution of the Triangular Sylvester Equation. Lecture Notes in
+*   Computer Science, vol 12043, pages 82--92, Springer.
+*
+*  Contributor:
+*   Angelika Schwarz, Umea University, Sweden.
+*
+*  =====================================================================
+      SUBROUTINE CTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
+     $                    LDC, SCALE, SWORK, LDSWORK, INFO )
+      IMPLICIT NONE
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+      REAL               SWORK( LDSWORK, * )
+*     ..
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRNA, NOTRNB, LQUERY
+      INTEGER            AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
+     $                   K, K1, K2, L, L1, L2, LL, NBA, NB, NBB
+      REAL               ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
+     $                   SCAMIN, SGN, XNRM, BUF, SMLNUM
+      COMPLEX            CSGN
+*     ..
+*     .. Local Arrays ..
+      REAL               WNRM( MAX( M, N ) )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH, SLARMM
+      EXTERNAL           CLANGE, ILAENV, LSAME, SLAMCH, SLARMM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSSCAL, CGEMM, CLASCL, CTRSYL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, EXPONENT, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test input parameters
+*
+      NOTRNA = LSAME( TRANA, 'N' )
+      NOTRNB = LSAME( TRANB, 'N' )
+*
+*     Use the same block size for all matrices.
+*
+      NB = MAX( 8, ILAENV( 1, 'CTRSYL', '', M, N, -1, -1) )
+*
+*     Compute number of blocks in A and B
+*
+      NBA = MAX( 1, (M + NB - 1) / NB )
+      NBB = MAX( 1, (N + NB - 1) / NB )
+*
+*     Compute workspace
+*
+      INFO = 0
+      LQUERY = ( LDSWORK.EQ.-1 )
+      IF( LQUERY ) THEN
+         LDSWORK = 2
+         SWORK(1,1) = MAX( NBA, NBB )
+         SWORK(2,1) = 2 * NBB + NBA
+      END IF
+*
+*     Test the input arguments
+*
+      IF( .NOT.NOTRNA .AND. .NOT. LSAME( TRANA, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRNB .AND. .NOT. LSAME( TRANB, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRSYL3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      SCALE = ONE
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Use unblocked code for small problems or if insufficient
+*     workspace is provided
+*
+      IF( MIN( NBA, NBB ).EQ.1 .OR. LDSWORK.LT.MAX( NBA, NBB ) ) THEN
+        CALL CTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
+     $               C, LDC, SCALE, INFO )
+        RETURN
+      END IF
+*
+*     Set constants to control overflow
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Set local scaling factors.
+*
+      DO L = 1, NBB
+         DO K = 1, NBA
+            SWORK( K, L ) = ONE
+         END DO
+      END DO
+*
+*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
+*     This scaling is to ensure compatibility with TRSYL and may get flushed.
+*
+      BUF = ONE
+*
+*      Compute upper bounds of blocks of A and B
+*
+      AWRK = NBB
+      DO K = 1, NBA
+         K1 = (K - 1) * NB + 1
+         K2 = MIN( K * NB, M ) + 1
+         DO L = K, NBA
+            L1 = (L - 1) * NB + 1
+            L2 = MIN( L * NB, M ) + 1
+            IF( NOTRNA ) THEN
+               SWORK( K, AWRK + L ) = CLANGE( 'I', K2-K1, L2-L1,
+     $                                        A( K1, L1 ), LDA, WNRM )
+            ELSE
+               SWORK( L, AWRK + K ) = CLANGE( '1', K2-K1, L2-L1,
+     $                                        A( K1, L1 ), LDA, WNRM )
+            END IF
+         END DO
+      END DO
+      BWRK = NBB + NBA
+      DO K = 1, NBB
+         K1 = (K - 1) * NB + 1
+         K2 = MIN( K * NB, N ) + 1
+         DO L = K, NBB
+            L1 = (L - 1) * NB + 1
+            L2 = MIN( L * NB, N ) + 1
+            IF( NOTRNB ) THEN
+               SWORK( K, BWRK + L ) = CLANGE( 'I', K2-K1, L2-L1,
+     $                                        B( K1, L1 ), LDB, WNRM )
+            ELSE
+               SWORK( L, BWRK + K ) = CLANGE( '1', K2-K1, L2-L1,
+     $                                        B( K1, L1 ), LDB, WNRM )
+            END IF
+         END DO
+      END DO
+*
+      SGN = REAL( ISGN )
+      CSGN = CMPLX( SGN, ZERO )
+*
+      IF( NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                  M                         L-1
+*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
+*                I=K+1                       J=1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = NBA, 1, -1
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = (K - 1) * NB + 1
+            K2 = MIN( K * NB, M ) + 1
+            DO L = 1, NBB
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = (L - 1) * NB + 1
+               L2 = MIN( L * NB, N ) + 1
+*
+               CALL CTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = CLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K - 1, 1, -1
+*
+*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
+*
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, M ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = CLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = SLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                      DO JJ = L1, L2-1
+                         CALL CSSCAL( K2-K1, SCAL, C( K1, JJ ), 1)
+                      END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                      DO LL = L1, L2-1
+                         CALL CSSCAL( I2-I1, SCAL, C( I1, LL ), 1)
+                      END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL CGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -CONE,
+     $                        A( I1, K1 ), LDA, C( K1, L1 ), LDC,
+     $                        CONE, C( I1, L1 ), LDC )
+*
+               END DO
+*
+               DO J = L + 1, NBB
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
+*
+                  J1 = (J - 1) * NB + 1
+                  J2 = MIN( J * NB, N ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = CLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK(L, BWRK + J)
+                  SCALOC = SLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO JJ = J1, J2-1
+                         CALL CSSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                      END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL CGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -CSGN,
+     $                        C( K1, L1 ), LDC, B( L1, J1 ), LDB,
+     $                        CONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A**H *X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-left corner column by column by
+*
+*          A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                   K-1                        L-1
+*          R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
+*                   I=1                        J=1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = 1, NBA
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = (K - 1) * NB + 1
+            K2 = MIN( K * NB, M ) + 1
+            DO L = 1, NBB
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = (L - 1) * NB + 1
+               L2 = MIN( L * NB, N ) + 1
+*
+               CALL CTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = CLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K + 1, NBA
+*
+*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
+*
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, M ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = CLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = SLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to to C( I, L ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL CSSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL CGEMM( 'C', 'N', I2-I1, L2-L1, K2-K1, -CONE,
+     $                        A( K1, I1 ), LDA, C( K1, L1 ), LDC,
+     $                        CONE, C( I1, L1 ), LDC )
+               END DO
+*
+               DO J = L + 1, NBB
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
+*
+                  J1 = (J - 1) * NB + 1
+                  J2 = MIN( J * NB, N ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = CLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = SLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO LL = L1, L2-1
+                         CALL CSSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                      END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL CGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -CSGN,
+     $                        C( K1, L1 ), LDC, B( L1, J1 ), LDB,
+     $                        CONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A**H *X + ISGN*X*B**H = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        top-right corner column by column by
+*
+*           A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
+*
+*        Where
+*                     K-1                          N
+*            R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
+*                     I=1                        J=L+1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = 1, NBA
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = (K - 1) * NB + 1
+            K2 = MIN( K * NB, M ) + 1
+            DO L = NBB, 1, -1
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = (L - 1) * NB + 1
+               L2 = MIN( L * NB, N ) + 1
+*
+               CALL CTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = CLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K + 1, NBA
+*
+*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
+*
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, M ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = CLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = SLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL CSSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL CGEMM( 'C', 'N', I2-I1, L2-L1, K2-K1, -CONE,
+     $                        A( K1, I1 ), LDA, C( K1, L1 ), LDC,
+     $                        CONE, C( I1, L1 ), LDC )
+               END DO
+*
+               DO J = 1, L - 1
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
+*
+                  J1 = (J - 1) * NB + 1
+                  J2 = MIN( J * NB, N ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = CLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = SLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, LL ), 1)
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL CGEMM( 'N', 'C', K2-K1, J2-J1, L2-L1, -CSGN,
+     $                        C( K1, L1 ), LDC, B( J1, L1 ), LDB,
+     $                        CONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B**H = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-right corner column by column by
+*
+*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
+*
+*        Where
+*                      M                          N
+*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
+*                    I=K+1                      J=L+1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = NBA, 1, -1
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = (K - 1) * NB + 1
+            K2 = MIN( K * NB, M ) + 1
+            DO L = NBB, 1, -1
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = (L - 1) * NB + 1
+               L2 = MIN( L * NB, N ) + 1
+*
+               CALL CTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = CLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = 1, K - 1
+*
+*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
+*
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, M ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = CLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = SLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL CSSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL CGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -CONE,
+     $                        A( I1, K1 ), LDA, C( K1, L1 ), LDC,
+     $                        CONE, C( I1, L1 ), LDC )
+*
+               END DO
+*
+               DO J = 1, L - 1
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
+*
+                  J1 = (J - 1) * NB + 1
+                  J2 = MIN( J * NB, N ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = CLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = SLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC 
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = L1, L2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL CSSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL CGEMM( 'N', 'C', K2-K1, J2-J1, L2-L1, -CSGN,
+     $                        C( K1, L1 ), LDC, B( J1, L1 ), LDB,
+     $                        CONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+*
+      END IF
+*
+*     Reduce local scaling factors
+*
+      SCALE = SWORK( 1, 1 )
+      DO K = 1, NBA
+         DO L = 1, NBB
+            SCALE = MIN( SCALE, SWORK( K, L ) )
+         END DO
+      END DO
+      IF( SCALE .EQ. ZERO ) THEN
+*
+*        The magnitude of the largest entry of the solution is larger
+*        than the product of BIGNUM**2 and cannot be represented in the
+*        form (1/SCALE)*X if SCALE is REAL. Set SCALE to
+*        zero and give up.
+*
+         SWORK(1,1) = MAX( NBA, NBB )
+         SWORK(2,1) = 2 * NBB + NBA
+         RETURN
+      END IF
+*
+*     Realize consistent scaling
+*
+      DO K = 1, NBA
+         K1 = (K - 1) * NB + 1
+         K2 = MIN( K * NB, M ) + 1
+         DO L = 1, NBB
+            L1 = (L - 1) * NB + 1
+            L2 = MIN( L * NB, N ) + 1
+            SCAL = SCALE / SWORK( K, L )
+            IF( SCAL .NE. ONE ) THEN
+               DO LL = L1, L2-1
+                  CALL CSSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+               END DO
+            ENDIF
+         END DO
+      END DO
+*
+      IF( BUF .NE. ONE .AND. BUF.GT.ZERO ) THEN
+*
+*        Decrease SCALE as much as possible.
+*
+         SCALOC = MIN( SCALE / SMLNUM, ONE / BUF )
+         BUF = BUF * SCALOC
+         SCALE = SCALE / SCALOC
+      END IF
+*
+      IF( BUF.NE.ONE .AND. BUF.GT.ZERO ) THEN
+*
+*        In case of overly aggressive scaling during the computation,
+*        flushing of the global scale factor may be prevented by
+*        undoing some of the scaling. This step is to ensure that
+*        this routine flushes only scale factors that TRSYL also
+*        flushes and be usable as a drop-in replacement.
+*
+*        How much can the normwise largest entry be upscaled?
+*
+         SCAL = MAX( ABS( REAL( C( 1, 1 ) ) ),
+     $               ABS( AIMAG( C ( 1, 1 ) ) ) )
+         DO K = 1, M
+            DO L = 1, N
+               SCAL = MAX( SCAL, ABS( REAL ( C( K, L ) ) ),
+     $                     ABS( AIMAG ( C( K, L ) ) ) )
+            END DO
+         END DO
+*
+*        Increase BUF as close to 1 as possible and apply scaling.
+*
+         SCALOC = MIN( BIGNUM / SCAL, ONE / BUF )
+         BUF = BUF * SCALOC
+         CALL CLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IINFO )
+      END IF
+*
+*     Combine with buffer scaling factor. SCALE will be flushed if
+*     BUF is less than one here.
+*
+      SCALE = SCALE * BUF
+*
+*     Restore workspace dimensions
+*
+      SWORK(1,1) = MAX( NBA, NBB )
+      SWORK(2,1) = 2 * NBB + NBA
+*
+      RETURN
+*
+*     End of CTRSYL3
+*
+      END
diff --git a/lapack-netlib/SRC/dlarmm.c b/lapack-netlib/SRC/dlarmm.c
new file mode 100644
index 000000000..eec5d143a
--- /dev/null
+++ b/lapack-netlib/SRC/dlarmm.c
@@ -0,0 +1,605 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+
+/* > \brief \b DLARMM */
+
+/* Definition: */
+/* =========== */
+
+/*      DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM ) */
+
+/*      DOUBLE PRECISION   ANORM, BNORM, CNORM */
+
+/* >  \par Purpose: */
+/*  ======= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DLARMM returns a factor s in (0, 1] such that the linear updates */
+/* > */
+/* >    (s * C) - A * (s * B)  and  (s * C) - (s * A) * B */
+/* > */
+/* > cannot overflow, where A, B, and C are matrices of conforming */
+/* > dimensions. */
+/* > */
+/* > This is an auxiliary routine so there is no argument checking. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========= */
+
+/* > \param[in] ANORM */
+/* > \verbatim */
+/* >          ANORM is DOUBLE PRECISION */
+/* >          The infinity norm of A. ANORM >= 0. */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] BNORM */
+/* > \verbatim */
+/* >          BNORM is DOUBLE PRECISION */
+/* >          The infinity norm of B. BNORM >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] CNORM */
+/* > \verbatim */
+/* >          CNORM is DOUBLE PRECISION */
+/* >          The infinity norm of C. CNORM >= 0. */
+/* > \endverbatim */
+/* > */
+/* > */
+/*  ===================================================================== */
+/* >  References: */
+/* >    C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for */
+/* >    Robust Solution of Triangular Linear Systems. In: International */
+/* >    Conference on Parallel Processing and Applied Mathematics, pages */
+/* >    68--78. Springer, 2017. */
+/* > */
+/* > \ingroup OTHERauxiliary */
+/*  ===================================================================== */
+doublereal dlarmm_(doublereal *anorm, doublereal *bnorm, doublereal *cnorm)
+{
+    /* System generated locals */
+    doublereal ret_val;
+
+    /* Local variables */
+    extern doublereal dlamch_(char *);
+    doublereal bignum, smlnum;
+
+
+
+/*     Determine machine dependent parameters to control overflow. */
+
+    smlnum = dlamch_("Safe minimum") / dlamch_("Precision");
+    bignum = 1. / smlnum / 4.;
+
+/*     Compute a scale factor. */
+
+    ret_val = 1.;
+    if (*bnorm <= 1.) {
+	if (*anorm * *bnorm > bignum - *cnorm) {
+	    ret_val = .5;
+	}
+    } else {
+	if (*anorm > (bignum - *cnorm) / *bnorm) {
+	    ret_val = .5 / *bnorm;
+	}
+    }
+    return ret_val;
+
+/*     ==== End of DLARMM ==== */
+
+} /* dlarmm_ */
+
diff --git a/lapack-netlib/SRC/dlarmm.f b/lapack-netlib/SRC/dlarmm.f
new file mode 100644
index 000000000..c36042009
--- /dev/null
+++ b/lapack-netlib/SRC/dlarmm.f
@@ -0,0 +1,99 @@
+*> \brief \b DLARMM
+*
+* Definition:
+* ===========
+*
+*      DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM )
+*
+*     .. Scalar Arguments ..
+*      DOUBLE PRECISION   ANORM, BNORM, CNORM
+*     ..
+*
+*>  \par Purpose:
+*  =======
+*>
+*> \verbatim
+*>
+*> DLARMM returns a factor s in (0, 1] such that the linear updates
+*>
+*>    (s * C) - A * (s * B)  and  (s * C) - (s * A) * B
+*>
+*> cannot overflow, where A, B, and C are matrices of conforming
+*> dimensions.
+*>
+*> This is an auxiliary routine so there is no argument checking.
+*> \endverbatim
+*
+*  Arguments:
+*  =========
+*
+*> \param[in] ANORM
+*> \verbatim
+*>          ANORM is DOUBLE PRECISION
+*>          The infinity norm of A. ANORM >= 0.
+*>          The number of rows of the matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] BNORM
+*> \verbatim
+*>          BNORM is DOUBLE PRECISION
+*>          The infinity norm of B. BNORM >= 0.
+*> \endverbatim
+*>
+*> \param[in] CNORM
+*> \verbatim
+*>          CNORM is DOUBLE PRECISION
+*>          The infinity norm of C. CNORM >= 0.
+*> \endverbatim
+*>
+*>
+*  =====================================================================
+*>  References:
+*>    C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for
+*>    Robust Solution of Triangular Linear Systems. In: International
+*>    Conference on Parallel Processing and Applied Mathematics, pages
+*>    68--78. Springer, 2017.
+*>
+*> \ingroup OTHERauxiliary
+*  =====================================================================
+
+      DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM )
+      IMPLICIT NONE
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ANORM, BNORM, CNORM
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, HALF, FOUR
+      PARAMETER          ( ONE = 1.0D0, HALF = 0.5D+0, FOUR = 4.0D0 )
+*     ..
+*     .. Local Scalars ..
+       DOUBLE PRECISION   BIGNUM, SMLNUM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      BIGNUM = ( ONE / SMLNUM ) / FOUR
+*
+*     Compute a scale factor.
+*
+      DLARMM = ONE
+      IF( BNORM .LE. ONE ) THEN
+         IF( ANORM * BNORM .GT. BIGNUM - CNORM ) THEN
+            DLARMM = HALF
+         END IF
+      ELSE
+         IF( ANORM .GT. (BIGNUM - CNORM) / BNORM ) THEN
+            DLARMM = HALF / BNORM
+         END IF
+      END IF
+      RETURN
+*
+*     ==== End of DLARMM ====
+*
+      END
diff --git a/lapack-netlib/SRC/dlatrs3.c b/lapack-netlib/SRC/dlatrs3.c
new file mode 100644
index 000000000..46eca6379
--- /dev/null
+++ b/lapack-netlib/SRC/dlatrs3.c
@@ -0,0 +1,1265 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static doublereal c_b35 = -1.;
+static doublereal c_b36 = 1.;
+
+/* > \brief \b DLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
+ */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE DLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA, */
+/*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          DIAG, NORMIN, TRANS, UPLO */
+/*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS */
+/*       DOUBLE PRECISION   A( LDA, * ), CNORM( * ), SCALE( * ), */
+/*                          WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DLATRS3 solves one of the triangular systems */
+/* > */
+/* >    A * X = B * diag(scale)  or  A**T * X = B * diag(scale) */
+/* > */
+/* > with scaling to prevent overflow.  Here A is an upper or lower */
+/* > triangular matrix, A**T denotes the transpose of A. X and B are */
+/* > n by nrhs matrices and scale is an nrhs element vector of scaling */
+/* > factors. A scaling factor scale(j) is usually less than or equal */
+/* > to 1, chosen such that X(:,j) is less than the overflow threshold. */
+/* > If the matrix A is singular (A(j,j) = 0 for some j), then */
+/* > a non-trivial solution to A*X = 0 is returned. If the system is */
+/* > so badly scaled that the solution cannot be represented as */
+/* > (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned. */
+/* > */
+/* > This is a BLAS-3 version of LATRS for solving several right */
+/* > hand sides simultaneously. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] UPLO */
+/* > \verbatim */
+/* >          UPLO is CHARACTER*1 */
+/* >          Specifies whether the matrix A is upper or lower triangular. */
+/* >          = 'U':  Upper triangular */
+/* >          = 'L':  Lower triangular */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the operation applied to A. */
+/* >          = 'N':  Solve A * x = s*b  (No transpose) */
+/* >          = 'T':  Solve A**T* x = s*b  (Transpose) */
+/* >          = 'C':  Solve A**T* x = s*b  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DIAG */
+/* > \verbatim */
+/* >          DIAG is CHARACTER*1 */
+/* >          Specifies whether or not the matrix A is unit triangular. */
+/* >          = 'N':  Non-unit triangular */
+/* >          = 'U':  Unit triangular */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NORMIN */
+/* > \verbatim */
+/* >          NORMIN is CHARACTER*1 */
+/* >          Specifies whether CNORM has been set or not. */
+/* >          = 'Y':  CNORM contains the column norms on entry */
+/* >          = 'N':  CNORM is not set on entry.  On exit, the norms will */
+/* >                  be computed and stored in CNORM. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of columns of X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          The triangular matrix A.  If UPLO = 'U', the leading n by n */
+/* >          upper triangular part of the array A contains the upper */
+/* >          triangular matrix, and the strictly lower triangular part of */
+/* >          A is not referenced.  If UPLO = 'L', the leading n by n lower */
+/* >          triangular part of the array A contains the lower triangular */
+/* >          matrix, and the strictly upper triangular part of A is not */
+/* >          referenced.  If DIAG = 'U', the diagonal elements of A are */
+/* >          also not referenced and are assumed to be 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax (1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >          On entry, the right hand side B of the triangular system. */
+/* >          On exit, X is overwritten by the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax (1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The scaling factor s(k) is for the triangular system */
+/* >          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k). */
+/* >          If SCALE = 0, the matrix A is singular or badly scaled. */
+/* >          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k) */
+/* >          that is an exact or approximate solution to A*x(:,k) = 0 */
+/* >          is returned. If the system so badly scaled that solution */
+/* >          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0 */
+/* >          is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] CNORM */
+/* > \verbatim */
+/* >          CNORM is DOUBLE PRECISION array, dimension (N) */
+/* > */
+/* >          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
+/* >          contains the norm of the off-diagonal part of the j-th column */
+/* >          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
+/* >          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
+/* >          must be greater than or equal to the 1-norm. */
+/* > */
+/* >          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
+/* >          returns the 1-norm of the offdiagonal part of the j-th column */
+/* >          of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (LWORK). */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal size of */
+/* >          WORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* >          LWORK is INTEGER */
+/* >          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where */
+/* >          NBA = (N + NB - 1)/NB and NB is the optimal block size. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimensions of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup doubleOTHERauxiliary */
+/* > \par Further Details: */
+/*  ===================== */
+/*  \verbatim */
+/*  The algorithm follows the structure of a block triangular solve. */
+/*  The diagonal block is solved with a call to the robust the triangular */
+/*  solver LATRS for every right-hand side RHS = 1, ..., NRHS */
+/*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ), */
+/*  where op( A ) = A or op( A ) = A**T. */
+/*  The linear block updates operate on block columns of X, */
+/*     B( I, K ) - op(A( I, J )) * X( J, K ) */
+/*  and use GEMM. To avoid overflow in the linear block update, the worst case */
+/*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed */
+/*  such that */
+/*     || s * B( I, RHS )||_oo */
+/*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold */
+
+/*  Once all columns of a block column have been rescaled (BLAS-1), the linear */
+/*  update is executed with GEMM without overflow. */
+
+/*  To limit rescaling, local scale factors track the scaling of column segments. */
+/*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA */
+/*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor */
+/*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS ) */
+/*  I = 1, ..., NBA. */
+/*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ) */
+/*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The */
+/*  linear update of potentially inconsistently scaled vector segments */
+/*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) ) */
+/*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and, */
+/*  if necessary, rescales the blocks prior to calling GEMM. */
+
+/*  \endverbatim */
+/*  ===================================================================== */
+/*  References: */
+/*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019). */
+/*  Parallel robust solution of triangular linear systems. Concurrency */
+/*  and Computation: Practice and Experience, 31(19), e5064. */
+
+/*  Contributor: */
+/*   Angelika Schwarz, Umea University, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int dlatrs3_(char *uplo, char *trans, char *diag, char *
+	normin, integer *n, integer *nrhs, doublereal *a, integer *lda, 
+	doublereal *x, integer *ldx, doublereal *scale, doublereal *cnorm, 
+	doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, 
+	    i__6, i__7, i__8;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    integer iinc, jinc;
+    doublereal scal, anrm, bnrm;
+    integer awrk;
+    doublereal tmax, xnrm[32];
+    integer i__, j, k;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    doublereal w[64];
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    doublereal rscal;
+    integer lanrm, ilast, jlast, i1;
+    logical upper;
+    integer i2, j1, j2, k1, k2, nb, ii, kk;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    integer lscale;
+    doublereal scaloc, scamin;
+    extern doublereal dlarmm_(doublereal *, doublereal *, doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum;
+    extern /* Subroutine */ int dlatrs_(char *, char *, char *, char *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *);
+    integer ifirst;
+    logical notran;
+    integer jfirst;
+    doublereal smlnum;
+    logical nounit, lquery;
+    integer nba, lds, nbx, rhs;
+
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --scale;
+    --cnorm;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    notran = lsame_(trans, "N");
+    nounit = lsame_(diag, "N");
+    lquery = *lwork == -1;
+
+/*     Partition A and X into blocks */
+
+/* Computing MAX */
+    i__1 = 8, i__2 = ilaenv_(&c__1, "DLATRS", "", n, n, &c_n1, &c_n1, (ftnlen)
+	    6, (ftnlen)0);
+    nb = f2cmax(i__1,i__2);
+    nb = f2cmin(64,nb);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*n + nb - 1) / nb;
+    nba = f2cmax(i__1,i__2);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*nrhs + 31) / 32;
+    nbx = f2cmax(i__1,i__2);
+
+/*     Compute the workspace */
+
+/*     The workspace comprises two parts. */
+/*     The first part stores the local scale factors. Each simultaneously */
+/*     computed right-hand side requires one local scale factor per block */
+/*     row. WORK( I+KK*LDS ) is the scale factor of the vector */
+/*     segment associated with the I-th block row and the KK-th vector */
+/*     in the block column. */
+/* Computing MAX */
+    i__1 = nba, i__2 = f2cmin(*nrhs,32);
+    lscale = nba * f2cmax(i__1,i__2);
+    lds = nba;
+/*     The second part stores upper bounds of the triangular A. There are */
+/*     a total of NBA x NBA blocks, of which only the upper triangular */
+/*     part or the lower triangular part is referenced. The upper bound of */
+/*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ). */
+    lanrm = nba * nba;
+    awrk = lscale;
+    work[1] = (doublereal) (lscale + lanrm);
+
+/*     Test the input parameters */
+
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -3;
+    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
+	     "N")) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*nrhs < 0) {
+	*info = -6;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -10;
+    } else if (! lquery && (doublereal) (*lwork) < work[1]) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLATRS3", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize scaling factors */
+
+    i__1 = *nrhs;
+    for (kk = 1; kk <= i__1; ++kk) {
+	scale[kk] = 1.;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*n,*nrhs) == 0) {
+	return 0;
+    }
+
+/*     Determine machine dependent constant to control overflow. */
+
+    bignum = dlamch_("Overflow");
+    smlnum = dlamch_("Safe Minimum");
+
+/*     Use unblocked code for small problems */
+
+    if (*nrhs < 2) {
+	dlatrs_(uplo, trans, diag, normin, n, &a[a_offset], lda, &x[x_dim1 + 
+		1], &scale[1], &cnorm[1], info);
+	i__1 = *nrhs;
+	for (k = 2; k <= i__1; ++k) {
+	    dlatrs_(uplo, trans, diag, "Y", n, &a[a_offset], lda, &x[k * 
+		    x_dim1 + 1], &scale[k], &cnorm[1], info);
+	}
+	return 0;
+    }
+
+/*     Compute norms of blocks of A excluding diagonal blocks and find */
+/*     the block with the largest norm TMAX. */
+
+    tmax = 0.;
+    i__1 = nba;
+    for (j = 1; j <= i__1; ++j) {
+	j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = j * nb;
+	j2 = f2cmin(i__2,*n) + 1;
+	if (upper) {
+	    ifirst = 1;
+	    ilast = j - 1;
+	} else {
+	    ifirst = j + 1;
+	    ilast = nba;
+	}
+	i__2 = ilast;
+	for (i__ = ifirst; i__ <= i__2; ++i__) {
+	    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = i__ * nb;
+	    i2 = f2cmin(i__3,*n) + 1;
+
+/*           Compute upper bound of A( I1:I2-1, J1:J2-1 ). */
+
+	    if (notran) {
+		i__3 = i2 - i1;
+		i__4 = j2 - j1;
+		anrm = dlange_("I", &i__3, &i__4, &a[i1 + j1 * a_dim1], lda, 
+			w);
+		work[awrk + i__ + (j - 1) * nba] = anrm;
+	    } else {
+		i__3 = i2 - i1;
+		i__4 = j2 - j1;
+		anrm = dlange_("1", &i__3, &i__4, &a[i1 + j1 * a_dim1], lda, 
+			w);
+		work[awrk + j + (i__ - 1) * nba] = anrm;
+	    }
+	    tmax = f2cmax(tmax,anrm);
+	}
+    }
+
+    if (! (tmax <= dlamch_("Overflow"))) {
+
+/*        Some matrix entries have huge absolute value. At least one upper */
+/*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point */
+/*        number, either due to overflow in LANGE or due to Inf in A. */
+/*        Fall back to LATRS. Set normin = 'N' for every right-hand side to */
+/*        force computation of TSCAL in LATRS to avoid the likely overflow */
+/*        in the computation of the column norms CNORM. */
+
+	i__1 = *nrhs;
+	for (k = 1; k <= i__1; ++k) {
+	    dlatrs_(uplo, trans, diag, "N", n, &a[a_offset], lda, &x[k * 
+		    x_dim1 + 1], &scale[k], &cnorm[1], info);
+	}
+	return 0;
+    }
+
+/*     Every right-hand side requires workspace to store NBA local scale */
+/*     factors. To save workspace, X is computed successively in block columns */
+/*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient */
+/*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable. */
+    i__1 = nbx;
+    for (k = 1; k <= i__1; ++k) {
+/*        Loop over block columns (index = K) of X and, for column-wise scalings, */
+/*        over individual columns (index = KK). */
+/*        K1: column index of the first column in X( J, K ) */
+/*        K2: column index of the first column in X( J, K+1 ) */
+/*        so the K2 - K1 is the column count of the block X( J, K ) */
+	k1 = (k - 1 << 5) + 1;
+/* Computing MIN */
+	i__2 = k << 5;
+	k2 = f2cmin(i__2,*nrhs) + 1;
+
+/*        Initialize local scaling factors of current block column X( J, K ) */
+
+	i__2 = k2 - k1;
+	for (kk = 1; kk <= i__2; ++kk) {
+	    i__3 = nba;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		work[i__ + kk * lds] = 1.;
+	    }
+	}
+
+	if (notran) {
+
+/*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1)) */
+
+	    if (upper) {
+		jfirst = nba;
+		jlast = 1;
+		jinc = -1;
+	    } else {
+		jfirst = 1;
+		jlast = nba;
+		jinc = 1;
+	    }
+	} else {
+
+/*           Solve A**T * X(:, K1:K2-1) = B * diag(scale(K1:K2-1)) */
+
+	    if (upper) {
+		jfirst = 1;
+		jlast = nba;
+		jinc = 1;
+	    } else {
+		jfirst = nba;
+		jlast = 1;
+		jinc = -1;
+	    }
+	}
+
+	i__2 = jlast;
+	i__3 = jinc;
+	for (j = jfirst; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) {
+/*           J1: row index of the first row in A( J, J ) */
+/*           J2: row index of the first row in A( J+1, J+1 ) */
+/*           so that J2 - J1 is the row count of the block A( J, J ) */
+	    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+	    i__4 = j * nb;
+	    j2 = f2cmin(i__4,*n) + 1;
+
+/*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS ) */
+/*           for all right-hand sides in the current block column, */
+/*           one RHS at a time. */
+
+	    i__4 = k2 - k1;
+	    for (kk = 1; kk <= i__4; ++kk) {
+		rhs = k1 + kk - 1;
+		if (kk == 1) {
+		    i__5 = j2 - j1;
+		    dlatrs_(uplo, trans, diag, "N", &i__5, &a[j1 + j1 * 
+			    a_dim1], lda, &x[j1 + rhs * x_dim1], &scaloc, &
+			    cnorm[1], info);
+		} else {
+		    i__5 = j2 - j1;
+		    dlatrs_(uplo, trans, diag, "Y", &i__5, &a[j1 + j1 * 
+			    a_dim1], lda, &x[j1 + rhs * x_dim1], &scaloc, &
+			    cnorm[1], info);
+		}
+/*              Find largest absolute value entry in the vector segment */
+/*              X( J1:J2-1, RHS ) as an upper bound for the worst case */
+/*              growth in the linear updates. */
+		i__5 = j2 - j1;
+		xnrm[kk - 1] = dlange_("I", &i__5, &c__1, &x[j1 + rhs * 
+			x_dim1], ldx, w);
+
+		if (scaloc == 0.) {
+/*                 LATRS found that A is singular through A(j,j) = 0. */
+/*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0 */
+/*                 and compute A*x = 0 (or A**T*x = 0). Note that */
+/*                 X(J1:J2-1, KK) is set by LATRS. */
+		    scale[rhs] = 0.;
+		    i__5 = j1 - 1;
+		    for (ii = 1; ii <= i__5; ++ii) {
+			x[ii + kk * x_dim1] = 0.;
+		    }
+		    i__5 = *n;
+		    for (ii = j2; ii <= i__5; ++ii) {
+			x[ii + kk * x_dim1] = 0.;
+		    }
+/*                 Discard the local scale factors. */
+		    i__5 = nba;
+		    for (ii = 1; ii <= i__5; ++ii) {
+			work[ii + kk * lds] = 1.;
+		    }
+		    scaloc = 1.;
+		} else if (scaloc * work[j + kk * lds] == 0.) {
+/*                 LATRS computed a valid scale factor, but combined with */
+/*                 the current scaling the solution does not have a */
+/*                 scale factor > 0. */
+
+/*                 Set WORK( J+KK*LDS ) to smallest valid scale */
+/*                 factor and increase SCALOC accordingly. */
+		    scal = work[j + kk * lds] / smlnum;
+		    scaloc *= scal;
+		    work[j + kk * lds] = smlnum;
+/*                 If LATRS overestimated the growth, x may be */
+/*                 rescaled to preserve a valid combined scale */
+/*                 factor WORK( J, KK ) > 0. */
+		    rscal = 1. / scaloc;
+		    if (xnrm[kk - 1] * rscal <= bignum) {
+			xnrm[kk - 1] *= rscal;
+			i__5 = j2 - j1;
+			dscal_(&i__5, &rscal, &x[j1 + rhs * x_dim1], &c__1);
+			scaloc = 1.;
+		    } else {
+/*                    The system op(A) * x = b is badly scaled and its */
+/*                    solution cannot be represented as (1/scale) * x. */
+/*                    Set x to zero. This approach deviates from LATRS */
+/*                    where a completely meaningless non-zero vector */
+/*                    is returned that is not a solution to op(A) * x = b. */
+			scale[rhs] = 0.;
+			i__5 = *n;
+			for (ii = 1; ii <= i__5; ++ii) {
+			    x[ii + kk * x_dim1] = 0.;
+			}
+/*                    Discard the local scale factors. */
+			i__5 = nba;
+			for (ii = 1; ii <= i__5; ++ii) {
+			    work[ii + kk * lds] = 1.;
+			}
+			scaloc = 1.;
+		    }
+		}
+		scaloc *= work[j + kk * lds];
+		work[j + kk * lds] = scaloc;
+	    }
+
+/*           Linear block updates */
+
+	    if (notran) {
+		if (upper) {
+		    ifirst = j - 1;
+		    ilast = 1;
+		    iinc = -1;
+		} else {
+		    ifirst = j + 1;
+		    ilast = nba;
+		    iinc = 1;
+		}
+	    } else {
+		if (upper) {
+		    ifirst = j + 1;
+		    ilast = nba;
+		    iinc = 1;
+		} else {
+		    ifirst = j - 1;
+		    ilast = 1;
+		    iinc = -1;
+		}
+	    }
+
+	    i__4 = ilast;
+	    i__5 = iinc;
+	    for (i__ = ifirst; i__5 < 0 ? i__ >= i__4 : i__ <= i__4; i__ += 
+		    i__5) {
+/*              I1: row index of the first column in X( I, K ) */
+/*              I2: row index of the first column in X( I+1, K ) */
+/*              so the I2 - I1 is the row count of the block X( I, K ) */
+		i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		i__6 = i__ * nb;
+		i2 = f2cmin(i__6,*n) + 1;
+
+/*              Prepare the linear update to be executed with GEMM. */
+/*              For each column, compute a consistent scaling, a */
+/*              scaling factor to survive the linear update, and */
+/*              rescale the column segments, if necesssary. Then */
+/*              the linear update is safely executed. */
+
+		i__6 = k2 - k1;
+		for (kk = 1; kk <= i__6; ++kk) {
+		    rhs = k1 + kk - 1;
+/*                 Compute consistent scaling */
+/* Computing MIN */
+		    d__1 = work[i__ + kk * lds], d__2 = work[j + kk * lds];
+		    scamin = f2cmin(d__1,d__2);
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__7 = i2 - i1;
+		    bnrm = dlange_("I", &i__7, &c__1, &x[i1 + rhs * x_dim1], 
+			    ldx, w);
+		    bnrm *= scamin / work[i__ + kk * lds];
+		    xnrm[kk - 1] *= scamin / work[j + kk * lds];
+		    anrm = work[awrk + i__ + (j - 1) * nba];
+		    scaloc = dlarmm_(&anrm, &xnrm[kk - 1], &bnrm);
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to B( I, KK ) and B( J, KK ). */
+
+		    scal = scamin / work[i__ + kk * lds] * scaloc;
+		    if (scal != 1.) {
+			i__7 = i2 - i1;
+			dscal_(&i__7, &scal, &x[i1 + rhs * x_dim1], &c__1);
+			work[i__ + kk * lds] = scamin * scaloc;
+		    }
+
+		    scal = scamin / work[j + kk * lds] * scaloc;
+		    if (scal != 1.) {
+			i__7 = j2 - j1;
+			dscal_(&i__7, &scal, &x[j1 + rhs * x_dim1], &c__1);
+			work[j + kk * lds] = scamin * scaloc;
+		    }
+		}
+
+		if (notran) {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    dgemm_("N", "N", &i__6, &i__7, &i__8, &c_b35, &a[i1 + j1 *
+			     a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b36, 
+			    &x[i1 + k1 * x_dim1], ldx);
+		} else {
+
+/*                 B( I, K ) := B( I, K ) - A( J, I )**T * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    dgemm_("T", "N", &i__6, &i__7, &i__8, &c_b35, &a[j1 + i1 *
+			     a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b36, 
+			    &x[i1 + k1 * x_dim1], ldx);
+		}
+	    }
+	}
+
+/*        Reduce local scaling factors */
+
+	i__3 = k2 - k1;
+	for (kk = 1; kk <= i__3; ++kk) {
+	    rhs = k1 + kk - 1;
+	    i__2 = nba;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MIN */
+		d__1 = scale[rhs], d__2 = work[i__ + kk * lds];
+		scale[rhs] = f2cmin(d__1,d__2);
+	    }
+	}
+
+/*        Realize consistent scaling */
+
+	i__3 = k2 - k1;
+	for (kk = 1; kk <= i__3; ++kk) {
+	    rhs = k1 + kk - 1;
+	    if (scale[rhs] != 1. && scale[rhs] != 0.) {
+		i__2 = nba;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__5 = i__ * nb;
+		    i2 = f2cmin(i__5,*n) + 1;
+		    scal = scale[rhs] / work[i__ + kk * lds];
+		    if (scal != 1.) {
+			i__5 = i2 - i1;
+			dscal_(&i__5, &scal, &x[i1 + rhs * x_dim1], &c__1);
+		    }
+		}
+	    }
+	}
+    }
+    return 0;
+
+/*     End of DLATRS3 */
+
+} /* dlatrs3_ */
+
diff --git a/lapack-netlib/SRC/dlatrs3.f b/lapack-netlib/SRC/dlatrs3.f
new file mode 100644
index 000000000..b4a98bc78
--- /dev/null
+++ b/lapack-netlib/SRC/dlatrs3.f
@@ -0,0 +1,656 @@
+*> \brief \b DLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
+*
+*  Definition:
+*  ===========
+*
+*      SUBROUTINE DLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
+*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIAG, NORMIN, TRANS, UPLO
+*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   A( LDA, * ), CNORM( * ), SCALE( * ), 
+*                          WORK( * ), X( LDX, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLATRS3 solves one of the triangular systems
+*>
+*>    A * X = B * diag(scale)  or  A**T * X = B * diag(scale)
+*>
+*> with scaling to prevent overflow.  Here A is an upper or lower
+*> triangular matrix, A**T denotes the transpose of A. X and B are
+*> n by nrhs matrices and scale is an nrhs element vector of scaling
+*> factors. A scaling factor scale(j) is usually less than or equal
+*> to 1, chosen such that X(:,j) is less than the overflow threshold.
+*> If the matrix A is singular (A(j,j) = 0 for some j), then
+*> a non-trivial solution to A*X = 0 is returned. If the system is
+*> so badly scaled that the solution cannot be represented as
+*> (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
+*>
+*> This is a BLAS-3 version of LATRS for solving several right
+*> hand sides simultaneously.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>          Specifies whether the matrix A is upper or lower triangular.
+*>          = 'U':  Upper triangular
+*>          = 'L':  Lower triangular
+*> \endverbatim
+*>
+*> \param[in] TRANS
+*> \verbatim
+*>          TRANS is CHARACTER*1
+*>          Specifies the operation applied to A.
+*>          = 'N':  Solve A * x = s*b  (No transpose)
+*>          = 'T':  Solve A**T* x = s*b  (Transpose)
+*>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose = Transpose)
+*> \endverbatim
+*>
+*> \param[in] DIAG
+*> \verbatim
+*>          DIAG is CHARACTER*1
+*>          Specifies whether or not the matrix A is unit triangular.
+*>          = 'N':  Non-unit triangular
+*>          = 'U':  Unit triangular
+*> \endverbatim
+*>
+*> \param[in] NORMIN
+*> \verbatim
+*>          NORMIN is CHARACTER*1
+*>          Specifies whether CNORM has been set or not.
+*>          = 'Y':  CNORM contains the column norms on entry
+*>          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*>                  be computed and stored in CNORM.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>          The number of columns of X.  NRHS >= 0.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          The triangular matrix A.  If UPLO = 'U', the leading n by n
+*>          upper triangular part of the array A contains the upper
+*>          triangular matrix, and the strictly lower triangular part of
+*>          A is not referenced.  If UPLO = 'L', the leading n by n lower
+*>          triangular part of the array A contains the lower triangular
+*>          matrix, and the strictly upper triangular part of A is not
+*>          referenced.  If DIAG = 'U', the diagonal elements of A are
+*>          also not referenced and are assumed to be 1.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max (1,N).
+*> \endverbatim
+*>
+*> \param[in,out] X
+*> \verbatim
+*>          X is DOUBLE PRECISION array, dimension (LDX,NRHS)
+*>          On entry, the right hand side B of the triangular system.
+*>          On exit, X is overwritten by the solution matrix X.
+*> \endverbatim
+*>
+*> \param[in] LDX
+*> \verbatim
+*>          LDX is INTEGER
+*>          The leading dimension of the array X.  LDX >= max (1,N).
+*> \endverbatim
+*>
+*> \param[out] SCALE
+*> \verbatim
+*>          SCALE is DOUBLE PRECISION array, dimension (NRHS)
+*>          The scaling factor s(k) is for the triangular system
+*>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
+*>          If SCALE = 0, the matrix A is singular or badly scaled.
+*>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
+*>          that is an exact or approximate solution to A*x(:,k) = 0
+*>          is returned. If the system so badly scaled that solution
+*>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
+*>          is returned.
+*> \endverbatim
+*>
+*> \param[in,out] CNORM
+*> \verbatim
+*>          CNORM is DOUBLE PRECISION array, dimension (N)
+*>
+*>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*>          contains the norm of the off-diagonal part of the j-th column
+*>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*>          must be greater than or equal to the 1-norm.
+*>
+*>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*>          returns the 1-norm of the offdiagonal part of the j-th column
+*>          of A.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (LWORK).
+*>          On exit, if INFO = 0, WORK(1) returns the optimal size of
+*>          WORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*>          LWORK is INTEGER
+*>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
+*>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimensions of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -k, the k-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup doubleOTHERauxiliary
+*> \par Further Details:
+*  =====================
+*  \verbatim
+*  The algorithm follows the structure of a block triangular solve.
+*  The diagonal block is solved with a call to the robust the triangular
+*  solver LATRS for every right-hand side RHS = 1, ..., NRHS
+*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
+*  where op( A ) = A or op( A ) = A**T.
+*  The linear block updates operate on block columns of X,
+*     B( I, K ) - op(A( I, J )) * X( J, K )
+*  and use GEMM. To avoid overflow in the linear block update, the worst case
+*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
+*  such that
+*     || s * B( I, RHS )||_oo
+*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
+*
+*  Once all columns of a block column have been rescaled (BLAS-1), the linear
+*  update is executed with GEMM without overflow.
+*
+*  To limit rescaling, local scale factors track the scaling of column segments.
+*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
+*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
+*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
+*  I = 1, ..., NBA.
+*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
+*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
+*  linear update of potentially inconsistently scaled vector segments
+*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
+*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
+*  if necessary, rescales the blocks prior to calling GEMM.
+*
+*  \endverbatim
+*  =====================================================================
+*  References:
+*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
+*  Parallel robust solution of triangular linear systems. Concurrency
+*  and Computation: Practice and Experience, 31(19), e5064.
+*
+*  Contributor:
+*   Angelika Schwarz, Umea University, Sweden.
+*
+*  =====================================================================
+      SUBROUTINE DLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
+     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
+      IMPLICIT NONE
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, NORMIN, UPLO
+      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), CNORM( * ), X( LDX, * ),
+     $                   SCALE( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
+      PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
+      PARAMETER          ( NBMIN = 8, NBMAX = 64 )
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   W( NBMAX ), XNRM( NBRHS )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
+      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
+     $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
+     $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
+     $                   SCAMIN, SMLNUM, TMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE, DLARMM
+      EXTERNAL           DLAMCH, DLANGE, DLARMM, ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLATRS, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     Partition A and X into blocks
+*
+      NB = MAX( 8, ILAENV( 1, 'DLATRS', '', N, N, -1, -1 ) )
+      NB = MIN( NBMAX, NB )
+      NBA = MAX( 1, (N + NB - 1) / NB )
+      NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
+*
+*     Compute the workspace
+*
+*     The workspace comprises two parts.
+*     The first part stores the local scale factors. Each simultaneously
+*     computed right-hand side requires one local scale factor per block
+*     row. WORK( I+KK*LDS ) is the scale factor of the vector
+*     segment associated with the I-th block row and the KK-th vector
+*     in the block column.
+      LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
+      LDS = NBA
+*     The second part stores upper bounds of the triangular A. There are
+*     a total of NBA x NBA blocks, of which only the upper triangular
+*     part or the lower triangular part is referenced. The upper bound of
+*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
+      LANRM = NBA * NBA
+      AWRK = LSCALE
+      WORK( 1 ) = LSCALE + LANRM
+*
+*     Test the input parameters
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLATRS3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Initialize scaling factors
+*
+      DO KK = 1, NRHS
+         SCALE( KK ) = ONE
+      END DO
+*
+*     Quick return if possible
+*
+      IF( MIN( N, NRHS ).EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent constant to control overflow.
+*
+      BIGNUM = DLAMCH( 'Overflow' )
+      SMLNUM = DLAMCH( 'Safe Minimum' )
+*
+*     Use unblocked code for small problems
+*
+      IF( NRHS.LT.NRHSMIN ) THEN
+         CALL DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
+     $                SCALE( 1 ), CNORM, INFO )
+         DO K = 2, NRHS
+            CALL DLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
+     $                   SCALE( K ), CNORM, INFO )
+         END DO
+         RETURN
+      END IF
+*
+*     Compute norms of blocks of A excluding diagonal blocks and find
+*     the block with the largest norm TMAX.
+*
+      TMAX = ZERO
+      DO J = 1, NBA
+         J1 = (J-1)*NB + 1
+         J2 = MIN( J*NB, N ) + 1
+         IF ( UPPER ) THEN
+            IFIRST = 1
+            ILAST = J - 1
+         ELSE
+            IFIRST = J + 1
+            ILAST = NBA
+         END IF
+         DO I = IFIRST, ILAST
+            I1 = (I-1)*NB + 1
+            I2 = MIN( I*NB, N ) + 1
+*
+*           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
+*
+            IF( NOTRAN ) THEN
+               ANRM = DLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
+               WORK( AWRK + I+(J-1)*NBA ) = ANRM
+            ELSE
+               ANRM = DLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
+               WORK( AWRK + J+(I-1)*NBA ) = ANRM
+            END IF
+            TMAX = MAX( TMAX, ANRM )
+         END DO
+      END DO
+*
+      IF( .NOT. TMAX.LE.DLAMCH('Overflow') ) THEN
+*
+*        Some matrix entries have huge absolute value. At least one upper
+*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
+*        number, either due to overflow in LANGE or due to Inf in A.
+*        Fall back to LATRS. Set normin = 'N' for every right-hand side to
+*        force computation of TSCAL in LATRS to avoid the likely overflow
+*        in the computation of the column norms CNORM.
+*
+         DO K = 1, NRHS
+            CALL DLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
+     $                   SCALE( K ), CNORM, INFO )
+         END DO
+         RETURN
+      END IF
+*
+*     Every right-hand side requires workspace to store NBA local scale
+*     factors. To save workspace, X is computed successively in block columns
+*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
+*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
+      DO K = 1, NBX
+*        Loop over block columns (index = K) of X and, for column-wise scalings,
+*        over individual columns (index = KK).
+*        K1: column index of the first column in X( J, K )
+*        K2: column index of the first column in X( J, K+1 )
+*        so the K2 - K1 is the column count of the block X( J, K )
+         K1 = (K-1)*NBRHS + 1
+         K2 = MIN( K*NBRHS, NRHS ) + 1
+*
+*        Initialize local scaling factors of current block column X( J, K )
+*
+         DO KK = 1, K2-K1
+            DO I = 1, NBA
+               WORK( I+KK*LDS ) = ONE
+            END DO
+         END DO
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
+*
+            IF( UPPER ) THEN
+               JFIRST = NBA
+               JLAST = 1
+               JINC = -1
+            ELSE
+               JFIRST = 1
+               JLAST = NBA
+               JINC = 1
+            END IF
+         ELSE
+*
+*           Solve A**T * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
+*
+            IF( UPPER ) THEN
+               JFIRST = 1
+               JLAST = NBA
+               JINC = 1
+            ELSE
+               JFIRST = NBA
+               JLAST = 1
+               JINC = -1
+            END IF
+         END IF
+*
+         DO J = JFIRST, JLAST, JINC
+*           J1: row index of the first row in A( J, J )
+*           J2: row index of the first row in A( J+1, J+1 )
+*           so that J2 - J1 is the row count of the block A( J, J )
+            J1 = (J-1)*NB + 1
+            J2 = MIN( J*NB, N ) + 1
+*
+*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
+*           for all right-hand sides in the current block column,
+*           one RHS at a time.
+*
+            DO KK = 1, K2-K1
+               RHS = K1 + KK - 1
+               IF( KK.EQ.1 ) THEN
+                  CALL DLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
+     $                         A( J1, J1 ), LDA, X( J1, RHS ),
+     $                         SCALOC, CNORM, INFO )
+               ELSE
+                  CALL DLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
+     $                         A( J1, J1 ), LDA, X( J1, RHS ),
+     $                         SCALOC, CNORM, INFO )
+               END IF
+*              Find largest absolute value entry in the vector segment
+*              X( J1:J2-1, RHS ) as an upper bound for the worst case
+*              growth in the linear updates.
+               XNRM( KK ) = DLANGE( 'I', J2-J1, 1, X( J1, RHS ),
+     $                              LDX, W )
+*
+               IF( SCALOC .EQ. ZERO ) THEN
+*                 LATRS found that A is singular through A(j,j) = 0.
+*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
+*                 and compute A*x = 0 (or A**T*x = 0). Note that
+*                 X(J1:J2-1, KK) is set by LATRS.
+                  SCALE( RHS ) = ZERO
+                  DO II = 1, J1-1
+                     X( II, KK ) = ZERO
+                  END DO
+                  DO II = J2, N
+                     X( II, KK ) = ZERO
+                  END DO
+*                 Discard the local scale factors.
+                  DO II = 1, NBA
+                     WORK( II+KK*LDS ) = ONE
+                  END DO
+                  SCALOC = ONE
+               ELSE IF( SCALOC * WORK( J+KK*LDS ) .EQ. ZERO ) THEN
+*                 LATRS computed a valid scale factor, but combined with
+*                 the current scaling the solution does not have a
+*                 scale factor > 0.
+*
+*                 Set WORK( J+KK*LDS ) to smallest valid scale
+*                 factor and increase SCALOC accordingly.
+                  SCAL = WORK( J+KK*LDS ) / SMLNUM
+                  SCALOC = SCALOC * SCAL
+                  WORK( J+KK*LDS ) = SMLNUM
+*                 If LATRS overestimated the growth, x may be
+*                 rescaled to preserve a valid combined scale
+*                 factor WORK( J, KK ) > 0.
+                  RSCAL = ONE / SCALOC
+                  IF( XNRM( KK ) * RSCAL .LE. BIGNUM ) THEN
+                     XNRM( KK ) = XNRM( KK ) * RSCAL
+                     CALL DSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
+                     SCALOC = ONE
+                  ELSE
+*                    The system op(A) * x = b is badly scaled and its
+*                    solution cannot be represented as (1/scale) * x.
+*                    Set x to zero. This approach deviates from LATRS
+*                    where a completely meaningless non-zero vector
+*                    is returned that is not a solution to op(A) * x = b.
+                     SCALE( RHS ) = ZERO
+                     DO II = 1, N
+                        X( II, KK ) = ZERO
+                     END DO
+*                    Discard the local scale factors.
+                     DO II = 1, NBA
+                        WORK( II+KK*LDS ) = ONE
+                     END DO
+                     SCALOC = ONE
+                  END IF
+               END IF
+               SCALOC = SCALOC * WORK( J+KK*LDS )
+               WORK( J+KK*LDS ) = SCALOC
+            END DO
+*
+*           Linear block updates
+*
+            IF( NOTRAN ) THEN
+               IF( UPPER ) THEN
+                  IFIRST = J - 1
+                  ILAST = 1
+                  IINC = -1
+               ELSE
+                  IFIRST = J + 1
+                  ILAST = NBA
+                  IINC = 1
+               END IF
+            ELSE
+               IF( UPPER ) THEN
+                  IFIRST = J + 1
+                  ILAST = NBA
+                  IINC = 1
+               ELSE
+                  IFIRST = J - 1
+                  ILAST = 1
+                  IINC = -1
+               END IF
+            END IF
+*
+            DO I = IFIRST, ILAST, IINC
+*              I1: row index of the first column in X( I, K )
+*              I2: row index of the first column in X( I+1, K )
+*              so the I2 - I1 is the row count of the block X( I, K )
+               I1 = (I-1)*NB + 1
+               I2 = MIN( I*NB, N ) + 1
+*
+*              Prepare the linear update to be executed with GEMM.
+*              For each column, compute a consistent scaling, a
+*              scaling factor to survive the linear update, and
+*              rescale the column segments, if necesssary. Then
+*              the linear update is safely executed.
+*
+               DO KK = 1, K2-K1
+                  RHS = K1 + KK - 1
+*                 Compute consistent scaling
+                  SCAMIN = MIN( WORK( I + KK*LDS), WORK( J + KK*LDS ) )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  BNRM = DLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
+                  BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
+                  XNRM( KK ) = XNRM( KK )*(SCAMIN / WORK( J+KK*LDS ))
+                  ANRM = WORK( AWRK + I+(J-1)*NBA )
+                  SCALOC = DLARMM( ANRM, XNRM( KK ), BNRM )
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to B( I, KK ) and B( J, KK ).
+*
+                  SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                     CALL DSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
+                     WORK( I+KK*LDS ) = SCAMIN*SCALOC
+                  END IF
+*
+                  SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                     CALL DSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
+                     WORK( J+KK*LDS ) = SCAMIN*SCALOC
+                  END IF
+               END DO
+*
+               IF( NOTRAN ) THEN
+*
+*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
+*
+                  CALL DGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -ONE,
+     $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
+     $                        ONE, X( I1, K1 ), LDX )
+               ELSE
+*
+*                 B( I, K ) := B( I, K ) - A( J, I )**T * X( J, K )
+*
+                  CALL DGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -ONE,
+     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
+     $                        ONE, X( I1, K1 ), LDX )
+               END IF
+            END DO
+         END DO
+*
+*        Reduce local scaling factors
+*
+         DO KK = 1, K2-K1
+            RHS = K1 + KK - 1
+            DO I = 1, NBA
+               SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
+            END DO
+         END DO
+*
+*        Realize consistent scaling
+*
+         DO KK = 1, K2-K1
+            RHS = K1 + KK - 1
+            IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
+               DO I = 1, NBA
+                  I1 = (I-1)*NB + 1
+                  I2 = MIN( I*NB, N ) + 1
+                  SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
+                  IF( SCAL.NE.ONE )
+     $               CALL DSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
+               END DO
+            END IF
+         END DO
+      END DO
+      RETURN
+*
+*     End of DLATRS3
+*
+      END
diff --git a/lapack-netlib/SRC/dtrsyl3.c b/lapack-netlib/SRC/dtrsyl3.c
new file mode 100644
index 000000000..9cfbe3dab
--- /dev/null
+++ b/lapack-netlib/SRC/dtrsyl3.c
@@ -0,0 +1,2060 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static doublereal c_b19 = 2.;
+static doublereal c_b31 = -1.;
+static doublereal c_b32 = 1.;
+
+/* > \brief \b DTRSYL3 */
+
+/* Definition: */
+/* =========== */
+
+
+/* >  \par Purpose */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  DTRSYL3 solves the real Sylvester matrix equation: */
+/* > */
+/* >     op(A)*X + X*op(B) = scale*C or */
+/* >     op(A)*X - X*op(B) = scale*C, */
+/* > */
+/* >  where op(A) = A or A**T, and  A and B are both upper quasi- */
+/* >  triangular. A is M-by-M and B is N-by-N; the right hand side C and */
+/* >  the solution X are M-by-N; and scale is an output scale factor, set */
+/* >  <= 1 to avoid overflow in X. */
+/* > */
+/* >  A and B must be in Schur canonical form (as returned by DHSEQR), that */
+/* >  is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; */
+/* >  each 2-by-2 diagonal block has its diagonal elements equal and its */
+/* >  off-diagonal elements of opposite sign. */
+/* > */
+/* >  This is the block version of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments */
+/*  ========= */
+
+/* > \param[in] TRANA */
+/* > \verbatim */
+/* >          TRANA is CHARACTER*1 */
+/* >          Specifies the option op(A): */
+/* >          = 'N': op(A) = A    (No transpose) */
+/* >          = 'T': op(A) = A**T (Transpose) */
+/* >          = 'C': op(A) = A**H (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANB */
+/* > \verbatim */
+/* >          TRANB is CHARACTER*1 */
+/* >          Specifies the option op(B): */
+/* >          = 'N': op(B) = B    (No transpose) */
+/* >          = 'T': op(B) = B**T (Transpose) */
+/* >          = 'C': op(B) = B**H (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ISGN */
+/* > \verbatim */
+/* >          ISGN is INTEGER */
+/* >          Specifies the sign in the equation: */
+/* >          = +1: solve op(A)*X + X*op(B) = scale*C */
+/* >          = -1: solve op(A)*X - X*op(B) = scale*C */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The order of the matrix A, and the number of rows in the */
+/* >          matrices X and C. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix B, and the number of columns in the */
+/* >          matrices X and C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,M) */
+/* >          The upper quasi-triangular matrix A, in Schur canonical form. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,N) */
+/* >          The upper quasi-triangular matrix B, in Schur canonical form. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
+/* >          On entry, the M-by-N right hand side matrix C. */
+/* >          On exit, C is overwritten by the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is DOUBLE PRECISION */
+/* >          The scale factor, scale, set <= 1 to avoid overflow in X. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
+/* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LIWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER */
+/* >          The dimension of the array IWORK. LIWORK >=  ((M + NB - 1) / NB + 1) */
+/* >          + ((N + NB - 1) / NB + 1), where NB is the optimal block size. */
+/* > */
+/* >          If LIWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimension of the IWORK array, */
+/* >          returns this value as the first entry of the IWORK array, and */
+/* >          no error message related to LIWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SWORK */
+/* > \verbatim */
+/* >          SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS), */
+/* >          MAX(1,COLS)). */
+/* >          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS */
+/* >          and SWORK(2) returns the optimal COLS. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDSWORK */
+/* > \verbatim */
+/* >          LDSWORK is INTEGER */
+/* >          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1) */
+/* >          and NB is the optimal block size. */
+/* > */
+/* >          If LDSWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimensions of the SWORK matrix, */
+/* >          returns these values as the first and second entry of the SWORK */
+/* >          matrix, and no error message related LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* >          = 1: A and B have common or very close eigenvalues; perturbed */
+/* >               values were used to solve the equation (but the matrices */
+/* >               A and B are unchanged). */
+/* > \endverbatim */
+
+/*  ===================================================================== */
+/*  References: */
+/*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of */
+/*   algorithms: The triangular Sylvester equation, ACM Transactions */
+/*   on Mathematical Software (TOMS), volume 29, pages 218--243. */
+
+/*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel */
+/*   Solution of the Triangular Sylvester Equation. Lecture Notes in */
+/*   Computer Science, vol 12043, pages 82--92, Springer. */
+
+/*  Contributor: */
+/*   Angelika Schwarz, Umea University, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int dtrsyl3_(char *trana, char *tranb, integer *isgn, 
+	integer *m, integer *n, doublereal *a, integer *lda, doublereal *b, 
+	integer *ldb, doublereal *c__, integer *ldc, doublereal *scale, 
+	integer *iwork, integer *liwork, doublereal *swork, integer *ldswork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, swork_dim1, 
+	    swork_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    doublereal scal, anrm, bnrm, cnrm;
+    integer awrk, bwrk;
+    logical skip;
+    doublereal *wnrm, xnrm;
+    integer i__, j, k, l;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *), dgemm_(char *, char *, integer *, integer *, integer *
+	    , doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    integer iinfo, i1, i2, j1, j2, k1, k2, l1;
+//    extern integer myexp_(doublereal *);
+    integer l2, nb, pc, jj, ll;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    doublereal scaloc, scamin;
+    extern doublereal dlarmm_(doublereal *, doublereal *, doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum;
+    logical notrna, notrnb;
+    doublereal smlnum;
+    logical lquery;
+    extern /* Subroutine */ int dtrsyl_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *);
+    integer nba, nbb;
+    doublereal buf, sgn;
+
+
+/*     Decode and Test input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --iwork;
+    swork_dim1 = *ldswork;
+    swork_offset = 1 + swork_dim1 * 1;
+    swork -= swork_offset;
+
+    /* Function Body */
+    notrna = lsame_(trana, "N");
+    notrnb = lsame_(tranb, "N");
+
+/*     Use the same block size for all matrices. */
+
+/* Computing MAX */
+    i__1 = 8, i__2 = ilaenv_(&c__1, "DTRSYL", "", m, n, &c_n1, &c_n1, (ftnlen)
+	    6, (ftnlen)0);
+    nb = f2cmax(i__1,i__2);
+
+/*     Compute number of blocks in A and B */
+
+/* Computing MAX */
+    i__1 = 1, i__2 = (*m + nb - 1) / nb;
+    nba = f2cmax(i__1,i__2);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*n + nb - 1) / nb;
+    nbb = f2cmax(i__1,i__2);
+
+/*     Compute workspace */
+
+    *info = 0;
+    lquery = *liwork == -1 || *ldswork == -1;
+    iwork[1] = nba + nbb + 2;
+    if (lquery) {
+	*ldswork = 2;
+	swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb);
+	swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba);
+    }
+
+/*     Test the input arguments */
+
+    if (! notrna && ! lsame_(trana, "T") && ! lsame_(
+	    trana, "C")) {
+	*info = -1;
+    } else if (! notrnb && ! lsame_(tranb, "T") && ! 
+	    lsame_(tranb, "C")) {
+	*info = -2;
+    } else if (*isgn != 1 && *isgn != -1) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -11;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DTRSYL3", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    *scale = 1.;
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+     wnrm = (doublereal*)malloc(f2cmax(*m,*n)*sizeof(doublereal));
+/*     Use unblocked code for small problems or if insufficient */
+/*     workspaces are provided */
+
+    if (f2cmin(nba,nbb) == 1 || *ldswork < f2cmax(nba,nbb) || *liwork < iwork[1]) {
+	dtrsyl_(trana, tranb, isgn, m, n, &a[a_offset], lda, &b[b_offset], 
+		ldb, &c__[c_offset], ldc, scale, info);
+	return 0;
+    }
+
+/*     Set constants to control overflow */
+
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+
+/*      Partition A such that 2-by-2 blocks on the diagonal are not split */
+
+    skip = FALSE_;
+    i__1 = nba;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	iwork[i__] = (i__ - 1) * nb + 1;
+    }
+    iwork[nba + 1] = *m + 1;
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	l1 = iwork[k];
+	l2 = iwork[k + 1] - 1;
+	i__2 = l2;
+	for (l = l1; l <= i__2; ++l) {
+	    if (skip) {
+		skip = FALSE_;
+		mycycle_();
+	    }
+	    if (l >= *m) {
+/*               A( M, M ) is a 1-by-1 block */
+		mycycle_();
+	    }
+	    if (a[l + (l + 1) * a_dim1] != 0. && a[l + 1 + l * a_dim1] != 0.) 
+		    {
+/*               Check if 2-by-2 block is split */
+		if (l + 1 == iwork[k + 1]) {
+		    ++iwork[k + 1];
+		    mycycle_();
+		}
+		skip = TRUE_;
+	    }
+	}
+    }
+    iwork[nba + 1] = *m + 1;
+    if (iwork[nba] >= iwork[nba + 1]) {
+	iwork[nba] = iwork[nba + 1];
+	--nba;
+    }
+
+/*      Partition B such that 2-by-2 blocks on the diagonal are not split */
+
+    pc = nba + 1;
+    skip = FALSE_;
+    i__1 = nbb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	iwork[pc + i__] = (i__ - 1) * nb + 1;
+    }
+    iwork[pc + nbb + 1] = *n + 1;
+    i__1 = nbb;
+    for (k = 1; k <= i__1; ++k) {
+	l1 = iwork[pc + k];
+	l2 = iwork[pc + k + 1] - 1;
+	i__2 = l2;
+	for (l = l1; l <= i__2; ++l) {
+	    if (skip) {
+		skip = FALSE_;
+		mycycle_();
+	    }
+	    if (l >= *n) {
+/*               B( N, N ) is a 1-by-1 block */
+		mycycle_();
+	    }
+	    if (b[l + (l + 1) * b_dim1] != 0. && b[l + 1 + l * b_dim1] != 0.) 
+		    {
+/*               Check if 2-by-2 block is split */
+		if (l + 1 == iwork[pc + k + 1]) {
+		    ++iwork[pc + k + 1];
+		    mycycle_();
+		}
+		skip = TRUE_;
+	    }
+	}
+    }
+    iwork[pc + nbb + 1] = *n + 1;
+    if (iwork[pc + nbb] >= iwork[pc + nbb + 1]) {
+	iwork[pc + nbb] = iwork[pc + nbb + 1];
+	--nbb;
+    }
+
+/*     Set local scaling factors - must never attain zero. */
+
+    i__1 = nbb;
+    for (l = 1; l <= i__1; ++l) {
+	i__2 = nba;
+	for (k = 1; k <= i__2; ++k) {
+	    swork[k + l * swork_dim1] = 1.;
+	}
+    }
+
+/*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero. */
+/*     This scaling is to ensure compatibility with TRSYL and may get flushed. */
+
+    buf = 1.;
+
+/*     Compute upper bounds of blocks of A and B */
+
+    awrk = nbb;
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = iwork[k];
+	k2 = iwork[k + 1];
+	i__2 = nba;
+	for (l = k; l <= i__2; ++l) {
+	    l1 = iwork[l];
+	    l2 = iwork[l + 1];
+	    if (notrna) {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[k + (awrk + l) * swork_dim1] = dlange_("I", &i__3, &
+			i__4, &a[k1 + l1 * a_dim1], lda, wnrm);
+	    } else {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[l + (awrk + k) * swork_dim1] = dlange_("1", &i__3, &
+			i__4, &a[k1 + l1 * a_dim1], lda, wnrm);
+	    }
+	}
+    }
+    bwrk = nbb + nba;
+    i__1 = nbb;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = iwork[pc + k];
+	k2 = iwork[pc + k + 1];
+	i__2 = nbb;
+	for (l = k; l <= i__2; ++l) {
+	    l1 = iwork[pc + l];
+	    l2 = iwork[pc + l + 1];
+	    if (notrnb) {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[k + (bwrk + l) * swork_dim1] = dlange_("I", &i__3, &
+			i__4, &b[k1 + l1 * b_dim1], ldb, wnrm);
+	    } else {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[l + (bwrk + k) * swork_dim1] = dlange_("1", &i__3, &
+			i__4, &b[k1 + l1 * b_dim1], ldb, wnrm);
+	    }
+	}
+    }
+
+    sgn = (doublereal) (*isgn);
+
+    if (notrna && notrnb) {
+
+/*        Solve    A*X + ISGN*X*B = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        bottom-left corner column by column by */
+
+/*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                  M                         L-1 */
+/*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */
+/*                I=K+1                       J=1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	for (k = nba; k >= 1; --k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = iwork[k];
+	    k2 = iwork[k + 1];
+	    i__1 = nbb;
+	    for (l = 1; l <= i__1; ++l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = iwork[pc + l];
+		l2 = iwork[pc + l + 1];
+
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		dtrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.) {
+		    if (scaloc == 0.) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__2);
+		    }
+		    i__2 = nbb;
+		    for (jj = 1; jj <= i__2; ++jj) {
+			i__3 = nba;
+			for (ll = 1; ll <= i__3; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__4 = myexp_(&scaloc);
+			    d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] 
+				    / pow_di(&c_b19, &i__4);
+			    swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		xnrm = dlange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		for (i__ = k - 1; i__ >= 1; --i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */
+
+		    i1 = iwork[i__];
+		    i2 = iwork[i__ + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    cnrm = dlange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = dlarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b19, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b19, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b19, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (jj = l1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    dscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = i2 - i1;
+			    dscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    i__4 = k2 - k1;
+		    dgemm_("N", "N", &i__2, &i__3, &i__4, &c_b31, &a[i1 + k1 *
+			     a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &
+			    c_b32, &c__[i1 + l1 * c_dim1], ldc);
+
+		}
+
+		i__2 = nbb;
+		for (j = l + 1; j <= i__2; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */
+
+		    j1 = iwork[pc + j];
+		    j2 = iwork[pc + j + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    cnrm = dlange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = dlarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b19, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b19, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b19, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    dscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = j2 - 1;
+			for (jj = j1; jj <= i__3; ++jj) {
+			    i__4 = k2 - k1;
+			    dscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    i__5 = l2 - l1;
+		    d__1 = -sgn;
+		    dgemm_("N", "N", &i__3, &i__4, &i__5, &d__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b32,
+			     &c__[k1 + j1 * c_dim1], ldc);
+		}
+	    }
+	}
+    } else if (! notrna && notrnb) {
+
+/*        Solve    A**T*X + ISGN*X*B = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        upper-left corner column by column by */
+
+/*          A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                   K-1                        L-1 */
+/*          R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */
+/*                   I=1                        J=1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	i__1 = nba;
+	for (k = 1; k <= i__1; ++k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = iwork[k];
+	    k2 = iwork[k + 1];
+	    i__2 = nbb;
+	    for (l = 1; l <= i__2; ++l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = iwork[pc + l];
+		l2 = iwork[pc + l + 1];
+
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		dtrsyl_(trana, tranb, isgn, &i__3, &i__4, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.) {
+		    if (scaloc == 0.) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__3);
+		    }
+		    i__3 = nbb;
+		    for (jj = 1; jj <= i__3; ++jj) {
+			i__4 = nba;
+			for (ll = 1; ll <= i__4; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__5 = myexp_(&scaloc);
+			    d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] 
+				    / pow_di(&c_b19, &i__5);
+			    swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		xnrm = dlange_("I", &i__3, &i__4, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__3 = nba;
+		for (i__ = k + 1; i__ <= i__3; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L ) */
+
+		    i1 = iwork[i__];
+		    i2 = iwork[i__ + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__4 = i2 - i1;
+		    i__5 = l2 - l1;
+		    cnrm = dlange_("I", &i__4, &i__5, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = dlarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__4 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__4);
+			i__4 = nbb;
+			for (jj = 1; jj <= i__4; ++jj) {
+			    i__5 = nba;
+			    for (ll = 1; ll <= i__5; ++ll) {
+/* Computing MIN */
+				i__6 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b19, &i__6);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__4 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b19, &i__4);
+			i__4 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b19, &i__4);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = k2 - k1;
+			    dscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = i2 - i1;
+			    dscal_(&i__5, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__4 = i2 - i1;
+		    i__5 = l2 - l1;
+		    i__6 = k2 - k1;
+		    dgemm_("T", "N", &i__4, &i__5, &i__6, &c_b31, &a[k1 + i1 *
+			     a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &
+			    c_b32, &c__[i1 + l1 * c_dim1], ldc);
+		}
+
+		i__3 = nbb;
+		for (j = l + 1; j <= i__3; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */
+
+		    j1 = iwork[pc + j];
+		    j2 = iwork[pc + j + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__4 = k2 - k1;
+		    i__5 = j2 - j1;
+		    cnrm = dlange_("I", &i__4, &i__5, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = dlarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__4 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__4);
+			i__4 = nbb;
+			for (jj = 1; jj <= i__4; ++jj) {
+			    i__5 = nba;
+			    for (ll = 1; ll <= i__5; ++ll) {
+/* Computing MIN */
+				i__6 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b19, &i__6);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__4 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b19, &i__4);
+			i__4 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b19, &i__4);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to to C( K, J ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = k2 - k1;
+			    dscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__4 = j2 - 1;
+			for (jj = j1; jj <= i__4; ++jj) {
+			    i__5 = k2 - k1;
+			    dscal_(&i__5, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__4 = k2 - k1;
+		    i__5 = j2 - j1;
+		    i__6 = l2 - l1;
+		    d__1 = -sgn;
+		    dgemm_("N", "N", &i__4, &i__5, &i__6, &d__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b32,
+			     &c__[k1 + j1 * c_dim1], ldc);
+		}
+	    }
+	}
+    } else if (! notrna && ! notrnb) {
+
+/*        Solve    A**T*X + ISGN*X*B**T = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        top-right corner column by column by */
+
+/*           A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                     K-1                          N */
+/*            R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. */
+/*                     I=1                        J=L+1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	i__1 = nba;
+	for (k = 1; k <= i__1; ++k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = iwork[k];
+	    k2 = iwork[k + 1];
+	    for (l = nbb; l >= 1; --l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = iwork[pc + l];
+		l2 = iwork[pc + l + 1];
+
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		dtrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		if (scaloc * swork[k + l * swork_dim1] == 0.) {
+		    if (scaloc == 0.) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__2);
+		    }
+		    i__2 = nbb;
+		    for (jj = 1; jj <= i__2; ++jj) {
+			i__3 = nba;
+			for (ll = 1; ll <= i__3; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__4 = myexp_(&scaloc);
+			    d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] 
+				    / pow_di(&c_b19, &i__4);
+			    swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			}
+		    }
+		}
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		xnrm = dlange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__2 = nba;
+		for (i__ = k + 1; i__ <= i__2; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L ) */
+
+		    i1 = iwork[i__];
+		    i2 = iwork[i__ + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = i2 - i1;
+		    i__4 = l2 - l1;
+		    cnrm = dlange_("I", &i__3, &i__4, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = dlarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b19, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b19, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b19, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    dscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = i2 - i1;
+			    dscal_(&i__4, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__3 = i2 - i1;
+		    i__4 = l2 - l1;
+		    i__5 = k2 - k1;
+		    dgemm_("T", "N", &i__3, &i__4, &i__5, &c_b31, &a[k1 + i1 *
+			     a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &
+			    c_b32, &c__[i1 + l1 * c_dim1], ldc);
+		}
+
+		i__2 = l - 1;
+		for (j = 1; j <= i__2; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T */
+
+		    j1 = iwork[pc + j];
+		    j2 = iwork[pc + j + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    cnrm = dlange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = dlarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b19, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b19, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b19, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    dscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = j2 - 1;
+			for (jj = j1; jj <= i__3; ++jj) {
+			    i__4 = k2 - k1;
+			    dscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    i__5 = l2 - l1;
+		    d__1 = -sgn;
+		    dgemm_("N", "T", &i__3, &i__4, &i__5, &d__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b32,
+			     &c__[k1 + j1 * c_dim1], ldc);
+		}
+	    }
+	}
+    } else if (notrna && ! notrnb) {
+
+/*        Solve    A*X + ISGN*X*B**T = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        bottom-right corner column by column by */
+
+/*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                      M                          N */
+/*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. */
+/*                    I=K+1                      J=L+1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	for (k = nba; k >= 1; --k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = iwork[k];
+	    k2 = iwork[k + 1];
+	    for (l = nbb; l >= 1; --l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = iwork[pc + l];
+		l2 = iwork[pc + l + 1];
+
+		i__1 = k2 - k1;
+		i__2 = l2 - l1;
+		dtrsyl_(trana, tranb, isgn, &i__1, &i__2, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.) {
+		    if (scaloc == 0.) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__1 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__1);
+		    }
+		    i__1 = nbb;
+		    for (jj = 1; jj <= i__1; ++jj) {
+			i__2 = nba;
+			for (ll = 1; ll <= i__2; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__3 = myexp_(&scaloc);
+			    d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] 
+				    / pow_di(&c_b19, &i__3);
+			    swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__1 = k2 - k1;
+		i__2 = l2 - l1;
+		xnrm = dlange_("I", &i__1, &i__2, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__1 = k - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */
+
+		    i1 = iwork[i__];
+		    i2 = iwork[i__ + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    cnrm = dlange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = dlarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b19, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b19, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b19, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = k2 - k1;
+			    dscal_(&i__3, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = i2 - i1;
+			    dscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    i__4 = k2 - k1;
+		    dgemm_("N", "N", &i__2, &i__3, &i__4, &c_b31, &a[i1 + k1 *
+			     a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &
+			    c_b32, &c__[i1 + l1 * c_dim1], ldc);
+
+		}
+
+		i__1 = l - 1;
+		for (j = 1; j <= i__1; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T */
+
+		    j1 = iwork[pc + j];
+		    j2 = iwork[pc + j + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = k2 - k1;
+		    i__3 = j2 - j1;
+		    cnrm = dlange_("I", &i__2, &i__3, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = dlarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b19, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b19, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b19, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b19, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (jj = l1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    dscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = j2 - 1;
+			for (jj = j1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    dscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__2 = k2 - k1;
+		    i__3 = j2 - j1;
+		    i__4 = l2 - l1;
+		    d__1 = -sgn;
+		    dgemm_("N", "T", &i__2, &i__3, &i__4, &d__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b32,
+			     &c__[k1 + j1 * c_dim1], ldc);
+		}
+	    }
+	}
+
+    }
+    free(wnrm);
+/*     Reduce local scaling factors */
+
+    *scale = swork[swork_dim1 + 1];
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = nbb;
+	for (l = 1; l <= i__2; ++l) {
+/* Computing MIN */
+	    d__1 = *scale, d__2 = swork[k + l * swork_dim1];
+	    *scale = f2cmin(d__1,d__2);
+	}
+    }
+
+    if (*scale == 0.) {
+
+/*        The magnitude of the largest entry of the solution is larger */
+/*        than the product of BIGNUM**2 and cannot be represented in the */
+/*        form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to */
+/*        zero and give up. */
+
+	iwork[1] = nba + nbb + 2;
+	swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb);
+	swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba);
+	return 0;
+    }
+
+/*     Realize consistent scaling */
+
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = iwork[k];
+	k2 = iwork[k + 1];
+	i__2 = nbb;
+	for (l = 1; l <= i__2; ++l) {
+	    l1 = iwork[pc + l];
+	    l2 = iwork[pc + l + 1];
+	    scal = *scale / swork[k + l * swork_dim1];
+	    if (scal != 1.) {
+		i__3 = l2 - 1;
+		for (ll = l1; ll <= i__3; ++ll) {
+		    i__4 = k2 - k1;
+		    dscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &c__1);
+		}
+	    }
+	}
+    }
+
+    if (buf != 1. && buf > 0.) {
+
+/*        Decrease SCALE as much as possible. */
+
+/* Computing MIN */
+	d__1 = *scale / smlnum, d__2 = 1. / buf;
+	scaloc = f2cmin(d__1,d__2);
+	buf *= scaloc;
+	*scale /= scaloc;
+    }
+    if (buf != 1. && buf > 0.) {
+
+/*        In case of overly aggressive scaling during the computation, */
+/*        flushing of the global scale factor may be prevented by */
+/*        undoing some of the scaling. This step is to ensure that */
+/*        this routine flushes only scale factors that TRSYL also */
+/*        flushes and be usable as a drop-in replacement. */
+
+/*        How much can the normwise largest entry be upscaled? */
+
+	scal = c__[c_dim1 + 1];
+	i__1 = *m;
+	for (k = 1; k <= i__1; ++k) {
+	    i__2 = *n;
+	    for (l = 1; l <= i__2; ++l) {
+/* Computing MAX */
+		d__2 = scal, d__3 = (d__1 = c__[k + l * c_dim1], abs(d__1));
+		scal = f2cmax(d__2,d__3);
+	    }
+	}
+
+/*        Increase BUF as close to 1 as possible and apply scaling. */
+
+/* Computing MIN */
+	d__1 = bignum / scal, d__2 = 1. / buf;
+	scaloc = f2cmin(d__1,d__2);
+	buf *= scaloc;
+	dlascl_("G", &c_n1, &c_n1, &c_b32, &scaloc, m, n, &c__[c_offset], ldc,
+		 &iwork[1]);
+    }
+
+/*     Combine with buffer scaling factor. SCALE will be flushed if */
+/*     BUF is less than one here. */
+
+    *scale *= buf;
+
+/*     Restore workspace dimensions */
+
+    iwork[1] = nba + nbb + 2;
+    swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb);
+    swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba);
+
+    return 0;
+
+/*     End of DTRSYL3 */
+
+} /* dtrsyl3_ */
+
diff --git a/lapack-netlib/SRC/dtrsyl3.f b/lapack-netlib/SRC/dtrsyl3.f
new file mode 100644
index 000000000..c44ec3808
--- /dev/null
+++ b/lapack-netlib/SRC/dtrsyl3.f
@@ -0,0 +1,1241 @@
+*> \brief \b DTRSYL3
+*
+* Definition:
+* ===========
+*
+*
+*>  \par Purpose
+*  =============
+*>
+*> \verbatim
+*>
+*>  DTRSYL3 solves the real Sylvester matrix equation:
+*>
+*>     op(A)*X + X*op(B) = scale*C or
+*>     op(A)*X - X*op(B) = scale*C,
+*>
+*>  where op(A) = A or A**T, and  A and B are both upper quasi-
+*>  triangular. A is M-by-M and B is N-by-N; the right hand side C and
+*>  the solution X are M-by-N; and scale is an output scale factor, set
+*>  <= 1 to avoid overflow in X.
+*>
+*>  A and B must be in Schur canonical form (as returned by DHSEQR), that
+*>  is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks;
+*>  each 2-by-2 diagonal block has its diagonal elements equal and its
+*>  off-diagonal elements of opposite sign.
+*>
+*>  This is the block version of the algorithm.
+*> \endverbatim
+*
+*  Arguments
+*  =========
+*
+*> \param[in] TRANA
+*> \verbatim
+*>          TRANA is CHARACTER*1
+*>          Specifies the option op(A):
+*>          = 'N': op(A) = A    (No transpose)
+*>          = 'T': op(A) = A**T (Transpose)
+*>          = 'C': op(A) = A**H (Conjugate transpose = Transpose)
+*> \endverbatim
+*>
+*> \param[in] TRANB
+*> \verbatim
+*>          TRANB is CHARACTER*1
+*>          Specifies the option op(B):
+*>          = 'N': op(B) = B    (No transpose)
+*>          = 'T': op(B) = B**T (Transpose)
+*>          = 'C': op(B) = B**H (Conjugate transpose = Transpose)
+*> \endverbatim
+*>
+*> \param[in] ISGN
+*> \verbatim
+*>          ISGN is INTEGER
+*>          Specifies the sign in the equation:
+*>          = +1: solve op(A)*X + X*op(B) = scale*C
+*>          = -1: solve op(A)*X - X*op(B) = scale*C
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The order of the matrix A, and the number of rows in the
+*>          matrices X and C. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix B, and the number of columns in the
+*>          matrices X and C. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,M)
+*>          The upper quasi-triangular matrix A, in Schur canonical form.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension (LDB,N)
+*>          The upper quasi-triangular matrix B, in Schur canonical form.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,N).
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION array, dimension (LDC,N)
+*>          On entry, the M-by-N right hand side matrix C.
+*>          On exit, C is overwritten by the solution matrix X.
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*>          LDC is INTEGER
+*>          The leading dimension of the array C. LDC >= max(1,M)
+*> \endverbatim
+*>
+*> \param[out] SCALE
+*> \verbatim
+*>          SCALE is DOUBLE PRECISION
+*>          The scale factor, scale, set <= 1 to avoid overflow in X.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
+*>          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*> \endverbatim
+*>
+*> \param[in] LIWORK
+*> \verbatim
+*>          IWORK is INTEGER
+*>          The dimension of the array IWORK. LIWORK >=  ((M + NB - 1) / NB + 1)
+*>          + ((N + NB - 1) / NB + 1), where NB is the optimal block size.
+*>
+*>          If LIWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimension of the IWORK array,
+*>          returns this value as the first entry of the IWORK array, and
+*>          no error message related to LIWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] SWORK
+*> \verbatim
+*>          SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS),
+*>          MAX(1,COLS)).
+*>          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
+*>          and SWORK(2) returns the optimal COLS.
+*> \endverbatim
+*>
+*> \param[in] LDSWORK
+*> \verbatim
+*>          LDSWORK is INTEGER
+*>          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
+*>          and NB is the optimal block size.
+*>
+*>          If LDSWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimensions of the SWORK matrix,
+*>          returns these values as the first and second entry of the SWORK
+*>          matrix, and no error message related LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0: successful exit
+*>          < 0: if INFO = -i, the i-th argument had an illegal value
+*>          = 1: A and B have common or very close eigenvalues; perturbed
+*>               values were used to solve the equation (but the matrices
+*>               A and B are unchanged).
+*> \endverbatim
+*
+*  =====================================================================
+*  References:
+*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
+*   algorithms: The triangular Sylvester equation, ACM Transactions
+*   on Mathematical Software (TOMS), volume 29, pages 218--243.
+*
+*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
+*   Solution of the Triangular Sylvester Equation. Lecture Notes in
+*   Computer Science, vol 12043, pages 82--92, Springer.
+*
+*  Contributor:
+*   Angelika Schwarz, Umea University, Sweden.
+*
+*  =====================================================================
+      SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
+     $                    LDC, SCALE, IWORK, LIWORK, SWORK, LDSWORK,
+     $                    INFO )
+      IMPLICIT NONE
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            INFO, ISGN, LDA, LDB, LDC, M, N,
+     $                   LIWORK, LDSWORK
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   SWORK( LDSWORK, * )
+*     ..
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRNA, NOTRNB, LQUERY, SKIP
+      INTEGER            AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
+     $                   K, K1, K2, L, L1, L2, LL, NBA, NB, NBB, PC
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
+     $                   SCAMIN, SGN, XNRM, BUF, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   WNRM( MAX( M, N ) )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLANGE, DLAMCH, DLARMM
+      EXTERNAL           DLANGE, DLAMCH, DLARMM, ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLASCL, DSCAL, DTRSYL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, EXPONENT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test input parameters
+*
+      NOTRNA = LSAME( TRANA, 'N' )
+      NOTRNB = LSAME( TRANB, 'N' )
+*
+*     Use the same block size for all matrices.
+*
+      NB = MAX(8, ILAENV( 1, 'DTRSYL', '', M, N, -1, -1) )
+*
+*     Compute number of blocks in A and B
+*
+      NBA = MAX( 1, (M + NB - 1) / NB )
+      NBB = MAX( 1, (N + NB - 1) / NB )
+*
+*     Compute workspace
+*
+      INFO = 0
+      LQUERY = ( LIWORK.EQ.-1 .OR. LDSWORK.EQ.-1 )
+      IWORK( 1 ) = NBA + NBB + 2
+      IF( LQUERY ) THEN
+         LDSWORK = 2
+         SWORK( 1, 1 ) = MAX( NBA, NBB )
+         SWORK( 2, 1 ) = 2 * NBB + NBA
+      END IF
+*
+*     Test the input arguments
+*
+      IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
+     $    LSAME( TRANA, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
+     $         LSAME( TRANB, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRSYL3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      SCALE = ONE
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Use unblocked code for small problems or if insufficient
+*     workspaces are provided
+*
+      IF( MIN( NBA, NBB ).EQ.1 .OR. LDSWORK.LT.MAX( NBA, NBB ) .OR.
+     $    LIWORK.LT.IWORK(1) ) THEN
+        CALL DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
+     $               C, LDC, SCALE, INFO )
+        RETURN
+      END IF
+*
+*     Set constants to control overflow
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*      Partition A such that 2-by-2 blocks on the diagonal are not split
+*
+       SKIP = .FALSE.
+       DO I = 1, NBA
+          IWORK( I ) = ( I - 1 ) * NB + 1
+       END DO
+       IWORK( NBA + 1 ) = M + 1
+       DO K = 1, NBA
+          L1 = IWORK( K )
+          L2 = IWORK( K + 1 ) - 1
+          DO L = L1, L2
+             IF( SKIP ) THEN
+                SKIP = .FALSE.
+                CYCLE
+             END IF
+             IF( L.GE.M ) THEN
+*               A( M, M ) is a 1-by-1 block
+                CYCLE
+             END IF
+             IF( A( L, L+1 ).NE.ZERO .AND. A( L+1, L ).NE.ZERO ) THEN
+*               Check if 2-by-2 block is split
+                IF( L + 1 .EQ. IWORK( K + 1 ) ) THEN
+                   IWORK( K + 1 ) = IWORK( K + 1 ) + 1
+                   CYCLE
+                END IF
+                SKIP = .TRUE.
+             END IF
+          END DO
+       END DO
+       IWORK( NBA + 1 ) = M + 1
+       IF( IWORK( NBA ).GE.IWORK( NBA + 1 ) ) THEN
+          IWORK( NBA ) = IWORK( NBA + 1 )
+          NBA = NBA - 1
+       END IF
+*
+*      Partition B such that 2-by-2 blocks on the diagonal are not split
+*
+       PC = NBA + 1
+       SKIP = .FALSE.
+       DO I = 1, NBB
+          IWORK( PC + I ) = ( I - 1 ) * NB + 1
+       END DO
+       IWORK( PC + NBB + 1 ) = N + 1
+       DO K = 1, NBB
+          L1 = IWORK( PC + K )
+          L2 = IWORK( PC + K + 1 ) - 1
+          DO L = L1, L2
+             IF( SKIP ) THEN
+                SKIP = .FALSE.
+                CYCLE
+             END IF
+             IF( L.GE.N ) THEN
+*               B( N, N ) is a 1-by-1 block
+                CYCLE
+             END IF
+             IF( B( L, L+1 ).NE.ZERO .AND. B( L+1, L ).NE.ZERO ) THEN
+*               Check if 2-by-2 block is split
+                IF( L + 1 .EQ. IWORK( PC + K + 1 ) ) THEN
+                   IWORK( PC + K + 1 ) = IWORK( PC + K + 1 ) + 1
+                   CYCLE
+                END IF
+                SKIP = .TRUE.
+             END IF
+          END DO
+       END DO
+       IWORK( PC + NBB + 1 ) = N + 1
+       IF( IWORK( PC + NBB ).GE.IWORK( PC + NBB + 1 ) ) THEN
+          IWORK( PC + NBB ) = IWORK( PC + NBB + 1 )
+          NBB = NBB - 1
+       END IF
+*
+*     Set local scaling factors - must never attain zero.
+*
+      DO L = 1, NBB
+         DO K = 1, NBA
+            SWORK( K, L ) = ONE
+         END DO
+      END DO
+*
+*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
+*     This scaling is to ensure compatibility with TRSYL and may get flushed.
+*
+      BUF = ONE
+*
+*     Compute upper bounds of blocks of A and B
+*
+      AWRK = NBB
+      DO K = 1, NBA
+         K1 = IWORK( K )
+         K2 = IWORK( K + 1 )
+         DO L = K, NBA
+            L1 = IWORK( L )
+            L2 = IWORK( L + 1 )
+            IF( NOTRNA ) THEN
+               SWORK( K, AWRK + L ) = DLANGE( 'I', K2-K1, L2-L1,
+     $                                        A( K1, L1 ), LDA, WNRM )
+            ELSE
+               SWORK( L, AWRK + K ) = DLANGE( '1', K2-K1, L2-L1,
+     $                                        A( K1, L1 ), LDA, WNRM )
+            END IF
+         END DO
+      END DO
+      BWRK = NBB + NBA
+      DO K = 1, NBB
+         K1 = IWORK( PC + K )
+         K2 = IWORK( PC + K + 1 )
+         DO L = K, NBB
+            L1 = IWORK( PC + L )
+            L2 = IWORK( PC + L + 1 )
+            IF( NOTRNB ) THEN
+               SWORK( K, BWRK + L ) = DLANGE( 'I', K2-K1, L2-L1,
+     $                                        B( K1, L1 ), LDB, WNRM )
+            ELSE
+               SWORK( L, BWRK + K ) = DLANGE( '1', K2-K1, L2-L1,
+     $                                        B( K1, L1 ), LDB, WNRM )
+            END IF
+         END DO
+      END DO
+*
+      SGN = DBLE( ISGN )
+*
+      IF( NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                  M                         L-1
+*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
+*                I=K+1                       J=1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = NBA, 1, -1
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = IWORK( K )
+            K2 = IWORK( K + 1 )
+            DO L = 1, NBB
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = IWORK( PC + L )
+               L2 = IWORK( PC + L + 1 )
+*
+               CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF ( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K - 1, 1, -1
+*
+*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
+*
+                  I1 = IWORK( I )
+                  I2 = IWORK( I + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = DLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                      DO JJ = L1, L2-1
+                         CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1)
+                      END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                      DO LL = L1, L2-1
+                         CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1)
+                      END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL DGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
+     $                        A( I1, K1 ), LDA, C( K1, L1 ), LDC,
+     $                        ONE, C( I1, L1 ), LDC )
+*
+               END DO
+*
+               DO J = L + 1, NBB
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
+*
+                  J1 = IWORK( PC + J )
+                  J2 = IWORK( PC + J + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK(L, BWRK + J)
+                  SCALOC = DLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO JJ = J1, J2-1
+                         CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                      END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL DGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
+     $                        C( K1, L1 ), LDC, B( L1, J1 ), LDB,
+     $                        ONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A**T*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-left corner column by column by
+*
+*          A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                   K-1                        L-1
+*          R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
+*                   I=1                        J=1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = 1, NBA
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = IWORK( K )
+            K2 = IWORK( K + 1 )
+            DO L = 1, NBB
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = IWORK( PC + L )
+               L2 = IWORK( PC + L + 1 )
+*
+               CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K + 1, NBA
+*
+*                 C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
+*
+                  I1 = IWORK( I )
+                  I2 = IWORK( I + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = DLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL DGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
+     $                        A( K1, I1 ), LDA, C( K1, L1 ), LDC,
+     $                        ONE, C( I1, L1 ), LDC )
+               END DO
+*
+               DO J = L + 1, NBB
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
+*
+                  J1 = IWORK( PC + J )
+                  J2 = IWORK( PC + J + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = DLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to to C( K, J ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO LL = L1, L2-1
+                         CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                      END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL DGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
+     $                        C( K1, L1 ), LDC, B( L1, J1 ), LDB,
+     $                        ONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A**T*X + ISGN*X*B**T = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        top-right corner column by column by
+*
+*           A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
+*
+*        Where
+*                     K-1                          N
+*            R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
+*                     I=1                        J=L+1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = 1, NBA
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = IWORK( K )
+            K2 = IWORK( K + 1 )
+            DO L = NBB, 1, -1
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = IWORK( PC + L )
+               L2 = IWORK( PC + L + 1 )
+*
+               CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K + 1, NBA
+*
+*                 C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
+*
+                  I1 = IWORK( I )
+                  I2 = IWORK( I + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = DLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL DGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
+     $                        A( K1, I1 ), LDA, C( K1, L1 ), LDC,
+     $                        ONE, C( I1, L1 ), LDC )
+               END DO
+*
+               DO J = 1, L - 1
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
+*
+                  J1 = IWORK( PC + J )
+                  J2 = IWORK( PC + J + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = DLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1)
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL DGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
+     $                        C( K1, L1 ), LDC, B( J1, L1 ), LDB,
+     $                        ONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B**T = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-right corner column by column by
+*
+*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
+*
+*        Where
+*                      M                          N
+*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
+*                    I=K+1                      J=L+1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = NBA, 1, -1
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = IWORK( K )
+            K2 = IWORK( K + 1 )
+            DO L = NBB, 1, -1
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = IWORK( PC + L )
+               L2 = IWORK( PC + L + 1 )
+*
+               CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = 1, K - 1
+*
+*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
+*
+                  I1 = IWORK( I )
+                  I2 = IWORK( I + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = DLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL DGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
+     $                        A( I1, K1 ), LDA, C( K1, L1 ), LDC,
+     $                        ONE, C( I1, L1 ), LDC )
+*
+               END DO
+*
+               DO J = 1, L - 1
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
+*
+                  J1 = IWORK( PC + J )
+                  J2 = IWORK( PC + J + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = DLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = L1, L2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL DGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
+     $                        C( K1, L1 ), LDC, B( J1, L1 ), LDB,
+     $                        ONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+*
+      END IF
+*
+*     Reduce local scaling factors
+*
+      SCALE = SWORK( 1, 1 )
+      DO K = 1, NBA
+         DO L = 1, NBB
+            SCALE = MIN( SCALE, SWORK( K, L ) )
+         END DO
+      END DO
+*
+      IF( SCALE .EQ. ZERO ) THEN
+*
+*        The magnitude of the largest entry of the solution is larger
+*        than the product of BIGNUM**2 and cannot be represented in the
+*        form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to
+*        zero and give up.
+*
+         IWORK(1) = NBA + NBB + 2
+         SWORK(1,1) = MAX( NBA, NBB )
+         SWORK(2,1) = 2 * NBB + NBA
+         RETURN
+      END IF
+*
+*     Realize consistent scaling
+*
+      DO K = 1, NBA
+         K1 = IWORK( K )
+         K2 = IWORK( K + 1 )
+         DO L = 1, NBB
+            L1 = IWORK( PC + L )
+            L2 = IWORK( PC + L + 1 )
+            SCAL = SCALE / SWORK( K, L )
+            IF( SCAL .NE. ONE ) THEN
+               DO LL = L1, L2-1
+                  CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+               END DO
+            ENDIF
+         END DO
+      END DO
+*
+      IF( BUF .NE. ONE .AND. BUF.GT.ZERO ) THEN
+*
+*        Decrease SCALE as much as possible.
+*
+         SCALOC = MIN( SCALE / SMLNUM, ONE / BUF )
+         BUF = BUF * SCALOC
+         SCALE = SCALE / SCALOC
+      END IF
+
+      IF( BUF.NE.ONE .AND. BUF.GT.ZERO ) THEN
+*
+*        In case of overly aggressive scaling during the computation,
+*        flushing of the global scale factor may be prevented by
+*        undoing some of the scaling. This step is to ensure that
+*        this routine flushes only scale factors that TRSYL also
+*        flushes and be usable as a drop-in replacement.
+*
+*        How much can the normwise largest entry be upscaled?
+*
+         SCAL = C( 1, 1 )
+         DO K = 1, M
+            DO L = 1, N
+               SCAL = MAX( SCAL, ABS( C( K, L ) ) )
+            END DO
+         END DO
+*
+*        Increase BUF as close to 1 as possible and apply scaling.
+*
+         SCALOC = MIN( BIGNUM / SCAL, ONE / BUF )
+         BUF = BUF * SCALOC
+         CALL DLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IWORK )
+      END IF
+*
+*     Combine with buffer scaling factor. SCALE will be flushed if
+*     BUF is less than one here.
+*
+      SCALE = SCALE * BUF
+*
+*     Restore workspace dimensions
+*
+      IWORK(1) = NBA + NBB + 2
+      SWORK(1,1) = MAX( NBA, NBB )
+      SWORK(2,1) = 2 * NBB + NBA
+*
+      RETURN
+*
+*     End of DTRSYL3
+*
+      END
diff --git a/lapack-netlib/SRC/ilaenv.f b/lapack-netlib/SRC/ilaenv.f
index af2850398..a639e0375 100644
--- a/lapack-netlib/SRC/ilaenv.f
+++ b/lapack-netlib/SRC/ilaenv.f
@@ -469,6 +469,15 @@
             ELSE
                NB = 64
             END IF
+         ELSE IF( C3.EQ.'SYL' ) THEN
+*           The upper bound is to prevent overly aggressive scaling.
+            IF( SNAME ) THEN
+               NB = MIN( MAX( 48, INT( ( MIN( N1, N2 ) * 16 ) / 100) ),
+     $                   240 )
+            ELSE
+               NB = MIN( MAX( 24, INT( ( MIN( N1, N2 ) * 8 ) / 100) ),
+     $                   80 )
+            END IF
          END IF
       ELSE IF( C2.EQ.'LA' ) THEN
          IF( C3.EQ.'UUM' ) THEN
@@ -477,6 +486,12 @@
             ELSE
                NB = 64
             END IF
+         ELSE IF( C3.EQ.'TRS' ) THEN
+            IF( SNAME ) THEN
+               NB = 32
+            ELSE
+               NB = 32
+            END IF
          END IF
       ELSE IF( SNAME .AND. C2.EQ.'ST' ) THEN
          IF( C3.EQ.'EBZ' ) THEN
diff --git a/lapack-netlib/SRC/slarmm.c b/lapack-netlib/SRC/slarmm.c
new file mode 100644
index 000000000..95114e2f1
--- /dev/null
+++ b/lapack-netlib/SRC/slarmm.c
@@ -0,0 +1,605 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(float *x) {int e; (void)frexpf(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+
+/* > \brief \b SLARMM */
+
+/* Definition: */
+/* =========== */
+
+/*      REAL FUNCTION SLARMM( ANORM, BNORM, CNORM ) */
+
+/*      REAL               ANORM, BNORM, CNORM */
+
+/* >  \par Purpose: */
+/*  ======= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLARMM returns a factor s in (0, 1] such that the linear updates */
+/* > */
+/* >    (s * C) - A * (s * B)  and  (s * C) - (s * A) * B */
+/* > */
+/* > cannot overflow, where A, B, and C are matrices of conforming */
+/* > dimensions. */
+/* > */
+/* > This is an auxiliary routine so there is no argument checking. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========= */
+
+/* > \param[in] ANORM */
+/* > \verbatim */
+/* >          ANORM is REAL */
+/* >          The infinity norm of A. ANORM >= 0. */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] BNORM */
+/* > \verbatim */
+/* >          BNORM is REAL */
+/* >          The infinity norm of B. BNORM >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] CNORM */
+/* > \verbatim */
+/* >          CNORM is REAL */
+/* >          The infinity norm of C. CNORM >= 0. */
+/* > \endverbatim */
+/* > */
+/* > */
+/*  ===================================================================== */
+/* >  References: */
+/* >    C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for */
+/* >    Robust Solution of Triangular Linear Systems. In: International */
+/* >    Conference on Parallel Processing and Applied Mathematics, pages */
+/* >    68--78. Springer, 2017. */
+/* > */
+/* > \ingroup OTHERauxiliary */
+/*  ===================================================================== */
+real slarmm_(real *anorm, real *bnorm, real *cnorm)
+{
+    /* System generated locals */
+    real ret_val;
+
+    /* Local variables */
+    extern real slamch_(char *);
+    real bignum, smlnum;
+
+
+
+/*     Determine machine dependent parameters to control overflow. */
+
+    smlnum = slamch_("Safe minimum") / slamch_("Precision");
+    bignum = 1.f / smlnum / 4.f;
+
+/*     Compute a scale factor. */
+
+    ret_val = 1.f;
+    if (*bnorm <= 1.f) {
+	if (*anorm * *bnorm > bignum - *cnorm) {
+	    ret_val = .5f;
+	}
+    } else {
+	if (*anorm > (bignum - *cnorm) / *bnorm) {
+	    ret_val = .5f / *bnorm;
+	}
+    }
+    return ret_val;
+
+/*     ==== End of SLARMM ==== */
+
+} /* slarmm_ */
+
diff --git a/lapack-netlib/SRC/slarmm.f b/lapack-netlib/SRC/slarmm.f
new file mode 100644
index 000000000..643dd6748
--- /dev/null
+++ b/lapack-netlib/SRC/slarmm.f
@@ -0,0 +1,99 @@
+*> \brief \b SLARMM
+*
+* Definition:
+* ===========
+*
+*      REAL FUNCTION SLARMM( ANORM, BNORM, CNORM )
+*
+*     .. Scalar Arguments ..
+*      REAL               ANORM, BNORM, CNORM
+*     ..
+*
+*>  \par Purpose:
+*  =======
+*>
+*> \verbatim
+*>
+*> SLARMM returns a factor s in (0, 1] such that the linear updates
+*>
+*>    (s * C) - A * (s * B)  and  (s * C) - (s * A) * B
+*>
+*> cannot overflow, where A, B, and C are matrices of conforming
+*> dimensions.
+*>
+*> This is an auxiliary routine so there is no argument checking.
+*> \endverbatim
+*
+*  Arguments:
+*  =========
+*
+*> \param[in] ANORM
+*> \verbatim
+*>          ANORM is REAL
+*>          The infinity norm of A. ANORM >= 0.
+*>          The number of rows of the matrix A.  M >= 0.
+*> \endverbatim
+*>
+*> \param[in] BNORM
+*> \verbatim
+*>          BNORM is REAL
+*>          The infinity norm of B. BNORM >= 0.
+*> \endverbatim
+*>
+*> \param[in] CNORM
+*> \verbatim
+*>          CNORM is REAL
+*>          The infinity norm of C. CNORM >= 0.
+*> \endverbatim
+*>
+*>
+*  =====================================================================
+*>  References:
+*>    C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for
+*>    Robust Solution of Triangular Linear Systems. In: International
+*>    Conference on Parallel Processing and Applied Mathematics, pages
+*>    68--78. Springer, 2017.
+*>
+*> \ingroup OTHERauxiliary
+*  =====================================================================
+
+      REAL FUNCTION SLARMM( ANORM, BNORM, CNORM )
+      IMPLICIT NONE
+*     .. Scalar Arguments ..
+      REAL               ANORM, BNORM, CNORM
+*     .. Parameters ..
+      REAL               ONE, HALF, FOUR
+      PARAMETER          ( ONE = 1.0E0, HALF = 0.5E+0, FOUR = 4.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               BIGNUM, SMLNUM
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      BIGNUM = ( ONE / SMLNUM ) / FOUR
+*
+*     Compute a scale factor.
+*
+      SLARMM = ONE
+      IF( BNORM .LE. ONE ) THEN
+         IF( ANORM * BNORM .GT. BIGNUM - CNORM ) THEN
+            SLARMM = HALF
+         END IF
+      ELSE
+         IF( ANORM .GT. (BIGNUM - CNORM) / BNORM ) THEN
+            SLARMM = HALF / BNORM
+         END IF
+      END IF
+      RETURN
+*
+*     ==== End of SLARMM ====
+*
+      END
diff --git a/lapack-netlib/SRC/slatrs3.c b/lapack-netlib/SRC/slatrs3.c
new file mode 100644
index 000000000..e5c48a55b
--- /dev/null
+++ b/lapack-netlib/SRC/slatrs3.c
@@ -0,0 +1,1262 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(float *x) {int e; (void)frexpf(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b35 = -1.f;
+static real c_b36 = 1.f;
+
+/* > \brief \b SLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
+ */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE SLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA, */
+/*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          DIAG, NORMIN, TRANS, UPLO */
+/*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS */
+/*       REAL               A( LDA, * ), CNORM( * ), SCALE( * ), */
+/*                          WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLATRS3 solves one of the triangular systems */
+/* > */
+/* >    A * X = B * diag(scale)  or  A**T * X = B * diag(scale) */
+/* > */
+/* > with scaling to prevent overflow.  Here A is an upper or lower */
+/* > triangular matrix, A**T denotes the transpose of A. X and B are */
+/* > n by nrhs matrices and scale is an nrhs element vector of scaling */
+/* > factors. A scaling factor scale(j) is usually less than or equal */
+/* > to 1, chosen such that X(:,j) is less than the overflow threshold. */
+/* > If the matrix A is singular (A(j,j) = 0 for some j), then */
+/* > a non-trivial solution to A*X = 0 is returned. If the system is */
+/* > so badly scaled that the solution cannot be represented as */
+/* > (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned. */
+/* > */
+/* > This is a BLAS-3 version of LATRS for solving several right */
+/* > hand sides simultaneously. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] UPLO */
+/* > \verbatim */
+/* >          UPLO is CHARACTER*1 */
+/* >          Specifies whether the matrix A is upper or lower triangular. */
+/* >          = 'U':  Upper triangular */
+/* >          = 'L':  Lower triangular */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the operation applied to A. */
+/* >          = 'N':  Solve A * x = s*b  (No transpose) */
+/* >          = 'T':  Solve A**T* x = s*b  (Transpose) */
+/* >          = 'C':  Solve A**T* x = s*b  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DIAG */
+/* > \verbatim */
+/* >          DIAG is CHARACTER*1 */
+/* >          Specifies whether or not the matrix A is unit triangular. */
+/* >          = 'N':  Non-unit triangular */
+/* >          = 'U':  Unit triangular */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NORMIN */
+/* > \verbatim */
+/* >          NORMIN is CHARACTER*1 */
+/* >          Specifies whether CNORM has been set or not. */
+/* >          = 'Y':  CNORM contains the column norms on entry */
+/* >          = 'N':  CNORM is not set on entry.  On exit, the norms will */
+/* >                  be computed and stored in CNORM. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of columns of X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          The triangular matrix A.  If UPLO = 'U', the leading n by n */
+/* >          upper triangular part of the array A contains the upper */
+/* >          triangular matrix, and the strictly lower triangular part of */
+/* >          A is not referenced.  If UPLO = 'L', the leading n by n lower */
+/* >          triangular part of the array A contains the lower triangular */
+/* >          matrix, and the strictly upper triangular part of A is not */
+/* >          referenced.  If DIAG = 'U', the diagonal elements of A are */
+/* >          also not referenced and are assumed to be 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax (1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (LDX,NRHS) */
+/* >          On entry, the right hand side B of the triangular system. */
+/* >          On exit, X is overwritten by the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax (1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is REAL array, dimension (NRHS) */
+/* >          The scaling factor s(k) is for the triangular system */
+/* >          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k). */
+/* >          If SCALE = 0, the matrix A is singular or badly scaled. */
+/* >          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k) */
+/* >          that is an exact or approximate solution to A*x(:,k) = 0 */
+/* >          is returned. If the system so badly scaled that solution */
+/* >          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0 */
+/* >          is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] CNORM */
+/* > \verbatim */
+/* >          CNORM is REAL array, dimension (N) */
+/* > */
+/* >          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
+/* >          contains the norm of the off-diagonal part of the j-th column */
+/* >          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
+/* >          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
+/* >          must be greater than or equal to the 1-norm. */
+/* > */
+/* >          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
+/* >          returns the 1-norm of the offdiagonal part of the j-th column */
+/* >          of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK). */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal size of */
+/* >          WORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* >          LWORK is INTEGER */
+/* >          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where */
+/* >          NBA = (N + NB - 1)/NB and NB is the optimal block size. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimensions of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup doubleOTHERauxiliary */
+/* > \par Further Details: */
+/*  ===================== */
+/*  \verbatim */
+/*  The algorithm follows the structure of a block triangular solve. */
+/*  The diagonal block is solved with a call to the robust the triangular */
+/*  solver LATRS for every right-hand side RHS = 1, ..., NRHS */
+/*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ), */
+/*  where op( A ) = A or op( A ) = A**T. */
+/*  The linear block updates operate on block columns of X, */
+/*     B( I, K ) - op(A( I, J )) * X( J, K ) */
+/*  and use GEMM. To avoid overflow in the linear block update, the worst case */
+/*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed */
+/*  such that */
+/*     || s * B( I, RHS )||_oo */
+/*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold */
+
+/*  Once all columns of a block column have been rescaled (BLAS-1), the linear */
+/*  update is executed with GEMM without overflow. */
+
+/*  To limit rescaling, local scale factors track the scaling of column segments. */
+/*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA */
+/*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor */
+/*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS ) */
+/*  I = 1, ..., NBA. */
+/*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ) */
+/*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The */
+/*  linear update of potentially inconsistently scaled vector segments */
+/*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) ) */
+/*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and, */
+/*  if necessary, rescales the blocks prior to calling GEMM. */
+
+/*  \endverbatim */
+/*  ===================================================================== */
+/*  References: */
+/*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019). */
+/*  Parallel robust solution of triangular linear systems. Concurrency */
+/*  and Computation: Practice and Experience, 31(19), e5064. */
+
+/*  Contributor: */
+/*   Angelika Schwarz, Umea University, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int slatrs3_(char *uplo, char *trans, char *diag, char *
+	normin, integer *n, integer *nrhs, real *a, integer *lda, real *x, 
+	integer *ldx, real *scale, real *cnorm, real *work, integer *lwork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, 
+	    i__6, i__7, i__8;
+    real r__1, r__2;
+
+    /* Local variables */
+    integer iinc, jinc;
+    real scal, anrm, bnrm;
+    integer awrk;
+    real tmax, xnrm[32];
+    integer i__, j, k;
+    real w[64];
+    extern logical lsame_(char *, char *);
+    real rscal;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
+	    sgemm_(char *, char *, integer *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    integer lanrm, ilast, jlast, i1;
+    logical upper;
+    integer i2, j1, j2, k1, k2, nb, ii, kk, lscale;
+    real scaloc;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    real scamin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern real slarmm_(real *, real *, real *);
+    integer ifirst;
+    logical notran;
+    integer jfirst;
+    extern /* Subroutine */ int slatrs_(char *, char *, char *, char *, 
+	    integer *, real *, integer *, real *, real *, real *, integer *);
+    real smlnum;
+    logical nounit, lquery;
+    integer nba, lds, nbx, rhs;
+
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --scale;
+    --cnorm;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    notran = lsame_(trans, "N");
+    nounit = lsame_(diag, "N");
+    lquery = *lwork == -1;
+
+/*     Partition A and X into blocks. */
+
+/* Computing MAX */
+    i__1 = 8, i__2 = ilaenv_(&c__1, "SLATRS", "", n, n, &c_n1, &c_n1, (ftnlen)
+	    6, (ftnlen)0);
+    nb = f2cmax(i__1,i__2);
+    nb = f2cmin(64,nb);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*n + nb - 1) / nb;
+    nba = f2cmax(i__1,i__2);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*nrhs + 31) / 32;
+    nbx = f2cmax(i__1,i__2);
+
+/*     Compute the workspace */
+
+/*     The workspace comprises two parts. */
+/*     The first part stores the local scale factors. Each simultaneously */
+/*     computed right-hand side requires one local scale factor per block */
+/*     row. WORK( I + KK * LDS ) is the scale factor of the vector */
+/*     segment associated with the I-th block row and the KK-th vector */
+/*     in the block column. */
+/* Computing MAX */
+    i__1 = nba, i__2 = f2cmin(*nrhs,32);
+    lscale = nba * f2cmax(i__1,i__2);
+    lds = nba;
+/*     The second part stores upper bounds of the triangular A. There are */
+/*     a total of NBA x NBA blocks, of which only the upper triangular */
+/*     part or the lower triangular part is referenced. The upper bound of */
+/*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ). */
+    lanrm = nba * nba;
+    awrk = lscale;
+    work[1] = (real) (lscale + lanrm);
+
+/*     Test the input parameters. */
+
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -3;
+    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
+	     "N")) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*nrhs < 0) {
+	*info = -6;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -10;
+    } else if (! lquery && (real) (*lwork) < work[1]) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLATRS3", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize scaling factors */
+
+    i__1 = *nrhs;
+    for (kk = 1; kk <= i__1; ++kk) {
+	scale[kk] = 1.f;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*n,*nrhs) == 0) {
+	return 0;
+    }
+
+/*     Determine machine dependent constant to control overflow. */
+
+    bignum = slamch_("Overflow");
+    smlnum = slamch_("Safe Minimum");
+
+/*     Use unblocked code for small problems */
+
+    if (*nrhs < 2) {
+	slatrs_(uplo, trans, diag, normin, n, &a[a_offset], lda, &x[x_dim1 + 
+		1], &scale[1], &cnorm[1], info);
+	i__1 = *nrhs;
+	for (k = 2; k <= i__1; ++k) {
+	    slatrs_(uplo, trans, diag, "Y", n, &a[a_offset], lda, &x[k * 
+		    x_dim1 + 1], &scale[k], &cnorm[1], info);
+	}
+	return 0;
+    }
+
+/*     Compute norms of blocks of A excluding diagonal blocks and find */
+/*     the block with the largest norm TMAX. */
+
+    tmax = 0.f;
+    i__1 = nba;
+    for (j = 1; j <= i__1; ++j) {
+	j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = j * nb;
+	j2 = f2cmin(i__2,*n) + 1;
+	if (upper) {
+	    ifirst = 1;
+	    ilast = j - 1;
+	} else {
+	    ifirst = j + 1;
+	    ilast = nba;
+	}
+	i__2 = ilast;
+	for (i__ = ifirst; i__ <= i__2; ++i__) {
+	    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = i__ * nb;
+	    i2 = f2cmin(i__3,*n) + 1;
+
+/*           Compute upper bound of A( I1:I2-1, J1:J2-1 ). */
+
+	    if (notran) {
+		i__3 = i2 - i1;
+		i__4 = j2 - j1;
+		anrm = slange_("I", &i__3, &i__4, &a[i1 + j1 * a_dim1], lda, 
+			w);
+		work[awrk + i__ + (j - 1) * nba] = anrm;
+	    } else {
+		i__3 = i2 - i1;
+		i__4 = j2 - j1;
+		anrm = slange_("1", &i__3, &i__4, &a[i1 + j1 * a_dim1], lda, 
+			w);
+		work[awrk + j + (i__ - 1) * nba] = anrm;
+	    }
+	    tmax = f2cmax(tmax,anrm);
+	}
+    }
+
+    if (! (tmax <= slamch_("Overflow"))) {
+
+/*        Some matrix entries have huge absolute value. At least one upper */
+/*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point */
+/*        number, either due to overflow in LANGE or due to Inf in A. */
+/*        Fall back to LATRS. Set normin = 'N' for every right-hand side to */
+/*        force computation of TSCAL in LATRS to avoid the likely overflow */
+/*        in the computation of the column norms CNORM. */
+
+	i__1 = *nrhs;
+	for (k = 1; k <= i__1; ++k) {
+	    slatrs_(uplo, trans, diag, "N", n, &a[a_offset], lda, &x[k * 
+		    x_dim1 + 1], &scale[k], &cnorm[1], info);
+	}
+	return 0;
+    }
+
+/*     Every right-hand side requires workspace to store NBA local scale */
+/*     factors. To save workspace, X is computed successively in block columns */
+/*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient */
+/*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable. */
+    i__1 = nbx;
+    for (k = 1; k <= i__1; ++k) {
+/*        Loop over block columns (index = K) of X and, for column-wise scalings, */
+/*        over individual columns (index = KK). */
+/*        K1: column index of the first column in X( J, K ) */
+/*        K2: column index of the first column in X( J, K+1 ) */
+/*        so the K2 - K1 is the column count of the block X( J, K ) */
+	k1 = (k - 1 << 5) + 1;
+/* Computing MIN */
+	i__2 = k << 5;
+	k2 = f2cmin(i__2,*nrhs) + 1;
+
+/*        Initialize local scaling factors of current block column X( J, K ) */
+
+	i__2 = k2 - k1;
+	for (kk = 1; kk <= i__2; ++kk) {
+	    i__3 = nba;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		work[i__ + kk * lds] = 1.f;
+	    }
+	}
+
+	if (notran) {
+
+/*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1)) */
+
+	    if (upper) {
+		jfirst = nba;
+		jlast = 1;
+		jinc = -1;
+	    } else {
+		jfirst = 1;
+		jlast = nba;
+		jinc = 1;
+	    }
+	} else {
+
+/*           Solve A**T * X(:, K1:K2-1) = B * diag(scale(K1:K2-1)) */
+
+	    if (upper) {
+		jfirst = 1;
+		jlast = nba;
+		jinc = 1;
+	    } else {
+		jfirst = nba;
+		jlast = 1;
+		jinc = -1;
+	    }
+	}
+
+	i__2 = jlast;
+	i__3 = jinc;
+	for (j = jfirst; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) {
+/*           J1: row index of the first row in A( J, J ) */
+/*           J2: row index of the first row in A( J+1, J+1 ) */
+/*           so that J2 - J1 is the row count of the block A( J, J ) */
+	    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+	    i__4 = j * nb;
+	    j2 = f2cmin(i__4,*n) + 1;
+
+/*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS ) */
+/*           for all right-hand sides in the current block column, */
+/*           one RHS at a time. */
+
+	    i__4 = k2 - k1;
+	    for (kk = 1; kk <= i__4; ++kk) {
+		rhs = k1 + kk - 1;
+		if (kk == 1) {
+		    i__5 = j2 - j1;
+		    slatrs_(uplo, trans, diag, "N", &i__5, &a[j1 + j1 * 
+			    a_dim1], lda, &x[j1 + rhs * x_dim1], &scaloc, &
+			    cnorm[1], info);
+		} else {
+		    i__5 = j2 - j1;
+		    slatrs_(uplo, trans, diag, "Y", &i__5, &a[j1 + j1 * 
+			    a_dim1], lda, &x[j1 + rhs * x_dim1], &scaloc, &
+			    cnorm[1], info);
+		}
+/*              Find largest absolute value entry in the vector segment */
+/*              X( J1:J2-1, RHS ) as an upper bound for the worst case */
+/*              growth in the linear updates. */
+		i__5 = j2 - j1;
+		xnrm[kk - 1] = slange_("I", &i__5, &c__1, &x[j1 + rhs * 
+			x_dim1], ldx, w);
+
+		if (scaloc == 0.f) {
+/*                 LATRS found that A is singular through A(j,j) = 0. */
+/*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0 */
+/*                 and compute A*x = 0 (or A**T*x = 0). Note that */
+/*                 X(J1:J2-1, KK) is set by LATRS. */
+		    scale[rhs] = 0.f;
+		    i__5 = j1 - 1;
+		    for (ii = 1; ii <= i__5; ++ii) {
+			x[ii + kk * x_dim1] = 0.f;
+		    }
+		    i__5 = *n;
+		    for (ii = j2; ii <= i__5; ++ii) {
+			x[ii + kk * x_dim1] = 0.f;
+		    }
+/*                 Discard the local scale factors. */
+		    i__5 = nba;
+		    for (ii = 1; ii <= i__5; ++ii) {
+			work[ii + kk * lds] = 1.f;
+		    }
+		    scaloc = 1.f;
+		} else if (scaloc * work[j + kk * lds] == 0.f) {
+/*                 LATRS computed a valid scale factor, but combined with */
+/*                 the current scaling the solution does not have a */
+/*                 scale factor > 0. */
+
+/*                 Set WORK( J+KK*LDS ) to smallest valid scale */
+/*                 factor and increase SCALOC accordingly. */
+		    scal = work[j + kk * lds] / smlnum;
+		    scaloc *= scal;
+		    work[j + kk * lds] = smlnum;
+/*                 If LATRS overestimated the growth, x may be */
+/*                 rescaled to preserve a valid combined scale */
+/*                 factor WORK( J, KK ) > 0. */
+		    rscal = 1.f / scaloc;
+		    if (xnrm[kk - 1] * rscal <= bignum) {
+			xnrm[kk - 1] *= rscal;
+			i__5 = j2 - j1;
+			sscal_(&i__5, &rscal, &x[j1 + rhs * x_dim1], &c__1);
+			scaloc = 1.f;
+		    } else {
+/*                    The system op(A) * x = b is badly scaled and its */
+/*                    solution cannot be represented as (1/scale) * x. */
+/*                    Set x to zero. This approach deviates from LATRS */
+/*                    where a completely meaningless non-zero vector */
+/*                    is returned that is not a solution to op(A) * x = b. */
+			scale[rhs] = 0.f;
+			i__5 = *n;
+			for (ii = 1; ii <= i__5; ++ii) {
+			    x[ii + kk * x_dim1] = 0.f;
+			}
+/*                    Discard the local scale factors. */
+			i__5 = nba;
+			for (ii = 1; ii <= i__5; ++ii) {
+			    work[ii + kk * lds] = 1.f;
+			}
+			scaloc = 1.f;
+		    }
+		}
+		scaloc *= work[j + kk * lds];
+		work[j + kk * lds] = scaloc;
+	    }
+
+/*           Linear block updates */
+
+	    if (notran) {
+		if (upper) {
+		    ifirst = j - 1;
+		    ilast = 1;
+		    iinc = -1;
+		} else {
+		    ifirst = j + 1;
+		    ilast = nba;
+		    iinc = 1;
+		}
+	    } else {
+		if (upper) {
+		    ifirst = j + 1;
+		    ilast = nba;
+		    iinc = 1;
+		} else {
+		    ifirst = j - 1;
+		    ilast = 1;
+		    iinc = -1;
+		}
+	    }
+
+	    i__4 = ilast;
+	    i__5 = iinc;
+	    for (i__ = ifirst; i__5 < 0 ? i__ >= i__4 : i__ <= i__4; i__ += 
+		    i__5) {
+/*              I1: row index of the first column in X( I, K ) */
+/*              I2: row index of the first column in X( I+1, K ) */
+/*              so the I2 - I1 is the row count of the block X( I, K ) */
+		i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		i__6 = i__ * nb;
+		i2 = f2cmin(i__6,*n) + 1;
+
+/*              Prepare the linear update to be executed with GEMM. */
+/*              For each column, compute a consistent scaling, a */
+/*              scaling factor to survive the linear update, and */
+/*              rescale the column segments, if necesssary. Then */
+/*              the linear update is safely executed. */
+
+		i__6 = k2 - k1;
+		for (kk = 1; kk <= i__6; ++kk) {
+		    rhs = k1 + kk - 1;
+/*                 Compute consistent scaling */
+/* Computing MIN */
+		    r__1 = work[i__ + kk * lds], r__2 = work[j + kk * lds];
+		    scamin = f2cmin(r__1,r__2);
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__7 = i2 - i1;
+		    bnrm = slange_("I", &i__7, &c__1, &x[i1 + rhs * x_dim1], 
+			    ldx, w);
+		    bnrm *= scamin / work[i__ + kk * lds];
+		    xnrm[kk - 1] *= scamin / work[j + kk * lds];
+		    anrm = work[awrk + i__ + (j - 1) * nba];
+		    scaloc = slarmm_(&anrm, &xnrm[kk - 1], &bnrm);
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to B( I, KK ) and B( J, KK ). */
+
+		    scal = scamin / work[i__ + kk * lds] * scaloc;
+		    if (scal != 1.f) {
+			i__7 = i2 - i1;
+			sscal_(&i__7, &scal, &x[i1 + rhs * x_dim1], &c__1);
+			work[i__ + kk * lds] = scamin * scaloc;
+		    }
+
+		    scal = scamin / work[j + kk * lds] * scaloc;
+		    if (scal != 1.f) {
+			i__7 = j2 - j1;
+			sscal_(&i__7, &scal, &x[j1 + rhs * x_dim1], &c__1);
+			work[j + kk * lds] = scamin * scaloc;
+		    }
+		}
+
+		if (notran) {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    sgemm_("N", "N", &i__6, &i__7, &i__8, &c_b35, &a[i1 + j1 *
+			     a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b36, 
+			    &x[i1 + k1 * x_dim1], ldx);
+		} else {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    sgemm_("T", "N", &i__6, &i__7, &i__8, &c_b35, &a[j1 + i1 *
+			     a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b36, 
+			    &x[i1 + k1 * x_dim1], ldx);
+		}
+	    }
+	}
+
+/*        Reduce local scaling factors */
+
+	i__3 = k2 - k1;
+	for (kk = 1; kk <= i__3; ++kk) {
+	    rhs = k1 + kk - 1;
+	    i__2 = nba;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MIN */
+		r__1 = scale[rhs], r__2 = work[i__ + kk * lds];
+		scale[rhs] = f2cmin(r__1,r__2);
+	    }
+	}
+
+/*        Realize consistent scaling */
+
+	i__3 = k2 - k1;
+	for (kk = 1; kk <= i__3; ++kk) {
+	    rhs = k1 + kk - 1;
+	    if (scale[rhs] != 1.f && scale[rhs] != 0.f) {
+		i__2 = nba;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__5 = i__ * nb;
+		    i2 = f2cmin(i__5,*n) + 1;
+		    scal = scale[rhs] / work[i__ + kk * lds];
+		    if (scal != 1.f) {
+			i__5 = i2 - i1;
+			sscal_(&i__5, &scal, &x[i1 + rhs * x_dim1], &c__1);
+		    }
+		}
+	    }
+	}
+    }
+    return 0;
+
+/*     End of SLATRS3 */
+
+} /* slatrs3_ */
+
diff --git a/lapack-netlib/SRC/slatrs3.f b/lapack-netlib/SRC/slatrs3.f
new file mode 100644
index 000000000..c3a08e524
--- /dev/null
+++ b/lapack-netlib/SRC/slatrs3.f
@@ -0,0 +1,656 @@
+*> \brief \b SLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
+*
+*  Definition:
+*  ===========
+*
+*      SUBROUTINE SLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
+*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIAG, NORMIN, TRANS, UPLO
+*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
+*       ..
+*       .. Array Arguments ..
+*       REAL               A( LDA, * ), CNORM( * ), SCALE( * ), 
+*                          WORK( * ), X( LDX, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> SLATRS3 solves one of the triangular systems
+*>
+*>    A * X = B * diag(scale)  or  A**T * X = B * diag(scale)
+*>
+*> with scaling to prevent overflow.  Here A is an upper or lower
+*> triangular matrix, A**T denotes the transpose of A. X and B are
+*> n by nrhs matrices and scale is an nrhs element vector of scaling
+*> factors. A scaling factor scale(j) is usually less than or equal
+*> to 1, chosen such that X(:,j) is less than the overflow threshold.
+*> If the matrix A is singular (A(j,j) = 0 for some j), then
+*> a non-trivial solution to A*X = 0 is returned. If the system is
+*> so badly scaled that the solution cannot be represented as
+*> (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
+*>
+*> This is a BLAS-3 version of LATRS for solving several right
+*> hand sides simultaneously.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>          Specifies whether the matrix A is upper or lower triangular.
+*>          = 'U':  Upper triangular
+*>          = 'L':  Lower triangular
+*> \endverbatim
+*>
+*> \param[in] TRANS
+*> \verbatim
+*>          TRANS is CHARACTER*1
+*>          Specifies the operation applied to A.
+*>          = 'N':  Solve A * x = s*b  (No transpose)
+*>          = 'T':  Solve A**T* x = s*b  (Transpose)
+*>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose = Transpose)
+*> \endverbatim
+*>
+*> \param[in] DIAG
+*> \verbatim
+*>          DIAG is CHARACTER*1
+*>          Specifies whether or not the matrix A is unit triangular.
+*>          = 'N':  Non-unit triangular
+*>          = 'U':  Unit triangular
+*> \endverbatim
+*>
+*> \param[in] NORMIN
+*> \verbatim
+*>          NORMIN is CHARACTER*1
+*>          Specifies whether CNORM has been set or not.
+*>          = 'Y':  CNORM contains the column norms on entry
+*>          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*>                  be computed and stored in CNORM.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>          The number of columns of X.  NRHS >= 0.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is REAL array, dimension (LDA,N)
+*>          The triangular matrix A.  If UPLO = 'U', the leading n by n
+*>          upper triangular part of the array A contains the upper
+*>          triangular matrix, and the strictly lower triangular part of
+*>          A is not referenced.  If UPLO = 'L', the leading n by n lower
+*>          triangular part of the array A contains the lower triangular
+*>          matrix, and the strictly upper triangular part of A is not
+*>          referenced.  If DIAG = 'U', the diagonal elements of A are
+*>          also not referenced and are assumed to be 1.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max (1,N).
+*> \endverbatim
+*>
+*> \param[in,out] X
+*> \verbatim
+*>          X is REAL array, dimension (LDX,NRHS)
+*>          On entry, the right hand side B of the triangular system.
+*>          On exit, X is overwritten by the solution matrix X.
+*> \endverbatim
+*>
+*> \param[in] LDX
+*> \verbatim
+*>          LDX is INTEGER
+*>          The leading dimension of the array X.  LDX >= max (1,N).
+*> \endverbatim
+*>
+*> \param[out] SCALE
+*> \verbatim
+*>          SCALE is REAL array, dimension (NRHS)
+*>          The scaling factor s(k) is for the triangular system
+*>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
+*>          If SCALE = 0, the matrix A is singular or badly scaled.
+*>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
+*>          that is an exact or approximate solution to A*x(:,k) = 0
+*>          is returned. If the system so badly scaled that solution
+*>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
+*>          is returned.
+*> \endverbatim
+*>
+*> \param[in,out] CNORM
+*> \verbatim
+*>          CNORM is REAL array, dimension (N)
+*>
+*>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*>          contains the norm of the off-diagonal part of the j-th column
+*>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*>          must be greater than or equal to the 1-norm.
+*>
+*>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*>          returns the 1-norm of the offdiagonal part of the j-th column
+*>          of A.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is REAL array, dimension (LWORK).
+*>          On exit, if INFO = 0, WORK(1) returns the optimal size of
+*>          WORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*>          LWORK is INTEGER
+*>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
+*>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimensions of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -k, the k-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup doubleOTHERauxiliary
+*> \par Further Details:
+*  =====================
+*  \verbatim
+*  The algorithm follows the structure of a block triangular solve.
+*  The diagonal block is solved with a call to the robust the triangular
+*  solver LATRS for every right-hand side RHS = 1, ..., NRHS
+*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
+*  where op( A ) = A or op( A ) = A**T.
+*  The linear block updates operate on block columns of X,
+*     B( I, K ) - op(A( I, J )) * X( J, K )
+*  and use GEMM. To avoid overflow in the linear block update, the worst case
+*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
+*  such that
+*     || s * B( I, RHS )||_oo
+*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
+*
+*  Once all columns of a block column have been rescaled (BLAS-1), the linear
+*  update is executed with GEMM without overflow.
+*
+*  To limit rescaling, local scale factors track the scaling of column segments.
+*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
+*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
+*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
+*  I = 1, ..., NBA.
+*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
+*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
+*  linear update of potentially inconsistently scaled vector segments
+*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
+*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
+*  if necessary, rescales the blocks prior to calling GEMM.
+*
+*  \endverbatim
+*  =====================================================================
+*  References:
+*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
+*  Parallel robust solution of triangular linear systems. Concurrency
+*  and Computation: Practice and Experience, 31(19), e5064.
+*
+*  Contributor:
+*   Angelika Schwarz, Umea University, Sweden.
+*
+*  =====================================================================
+      SUBROUTINE SLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
+     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
+      IMPLICIT NONE
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, NORMIN, UPLO
+      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), CNORM( * ), X( LDX, * ),
+     $                   SCALE( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
+      PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
+      PARAMETER          ( NBMIN = 8, NBMAX = 64 )
+*     ..
+*     .. Local Arrays ..
+      REAL               W( NBMAX ), XNRM( NBRHS )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
+      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
+     $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
+     $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
+      REAL               ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
+     $                   SCAMIN, SMLNUM, TMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE, SLARMM
+      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANGE, SLARMM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLATRS, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     Partition A and X into blocks.
+*
+      NB = MAX( 8, ILAENV( 1, 'SLATRS', '', N, N, -1, -1 ) )
+      NB = MIN( NBMAX, NB )
+      NBA = MAX( 1, (N + NB - 1) / NB )
+      NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
+*
+*     Compute the workspace
+*
+*     The workspace comprises two parts.
+*     The first part stores the local scale factors. Each simultaneously
+*     computed right-hand side requires one local scale factor per block
+*     row. WORK( I + KK * LDS ) is the scale factor of the vector
+*     segment associated with the I-th block row and the KK-th vector
+*     in the block column.
+      LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
+      LDS = NBA
+*     The second part stores upper bounds of the triangular A. There are
+*     a total of NBA x NBA blocks, of which only the upper triangular
+*     part or the lower triangular part is referenced. The upper bound of
+*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
+      LANRM = NBA * NBA
+      AWRK = LSCALE
+      WORK( 1 ) = LSCALE + LANRM
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLATRS3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Initialize scaling factors
+*
+      DO KK = 1, NRHS
+         SCALE( KK ) = ONE
+      END DO
+*
+*     Quick return if possible
+*
+      IF( MIN( N, NRHS ).EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent constant to control overflow.
+*
+      BIGNUM = SLAMCH( 'Overflow' )
+      SMLNUM = SLAMCH( 'Safe Minimum' )
+*
+*     Use unblocked code for small problems
+*
+      IF( NRHS.LT.NRHSMIN ) THEN
+         CALL SLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
+     $                SCALE( 1 ), CNORM, INFO )
+         DO K = 2, NRHS
+            CALL SLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
+     $                   SCALE( K ), CNORM, INFO )
+         END DO
+         RETURN
+      END IF
+*
+*     Compute norms of blocks of A excluding diagonal blocks and find
+*     the block with the largest norm TMAX.
+*
+      TMAX = ZERO
+      DO J = 1, NBA
+         J1 = (J-1)*NB + 1
+         J2 = MIN( J*NB, N ) + 1
+         IF ( UPPER ) THEN
+            IFIRST = 1
+            ILAST = J - 1
+         ELSE
+            IFIRST = J + 1
+            ILAST = NBA
+         END IF
+         DO I = IFIRST, ILAST
+            I1 = (I-1)*NB + 1
+            I2 = MIN( I*NB, N ) + 1
+*
+*           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
+*
+            IF( NOTRAN ) THEN
+               ANRM = SLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
+               WORK( AWRK + I+(J-1)*NBA ) = ANRM
+            ELSE
+               ANRM = SLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
+               WORK( AWRK + J+(I-1)*NBA ) = ANRM
+            END IF
+            TMAX = MAX( TMAX, ANRM )
+         END DO
+      END DO
+*
+      IF( .NOT. TMAX.LE.SLAMCH('Overflow') ) THEN
+*
+*        Some matrix entries have huge absolute value. At least one upper
+*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
+*        number, either due to overflow in LANGE or due to Inf in A.
+*        Fall back to LATRS. Set normin = 'N' for every right-hand side to
+*        force computation of TSCAL in LATRS to avoid the likely overflow
+*        in the computation of the column norms CNORM.
+*
+         DO K = 1, NRHS
+            CALL SLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
+     $                   SCALE( K ), CNORM, INFO )
+         END DO
+         RETURN
+      END IF
+*
+*     Every right-hand side requires workspace to store NBA local scale
+*     factors. To save workspace, X is computed successively in block columns
+*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
+*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
+      DO K = 1, NBX
+*        Loop over block columns (index = K) of X and, for column-wise scalings,
+*        over individual columns (index = KK).
+*        K1: column index of the first column in X( J, K )
+*        K2: column index of the first column in X( J, K+1 )
+*        so the K2 - K1 is the column count of the block X( J, K )
+         K1 = (K-1)*NBRHS + 1
+         K2 = MIN( K*NBRHS, NRHS ) + 1
+*
+*        Initialize local scaling factors of current block column X( J, K )
+*
+         DO KK = 1, K2 - K1
+            DO I = 1, NBA
+               WORK( I+KK*LDS ) = ONE
+            END DO
+         END DO
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
+*
+            IF( UPPER ) THEN
+               JFIRST = NBA
+               JLAST = 1
+               JINC = -1
+            ELSE
+               JFIRST = 1
+               JLAST = NBA
+               JINC = 1
+            END IF
+         ELSE
+*
+*           Solve A**T * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
+*
+            IF( UPPER ) THEN
+               JFIRST = 1
+               JLAST = NBA
+               JINC = 1
+            ELSE
+               JFIRST = NBA
+               JLAST = 1
+               JINC = -1
+            END IF
+         END IF
+*
+         DO J = JFIRST, JLAST, JINC
+*           J1: row index of the first row in A( J, J )
+*           J2: row index of the first row in A( J+1, J+1 )
+*           so that J2 - J1 is the row count of the block A( J, J )
+            J1 = (J-1)*NB + 1
+            J2 = MIN( J*NB, N ) + 1
+*
+*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
+*           for all right-hand sides in the current block column,
+*           one RHS at a time.
+*
+            DO KK = 1, K2-K1
+               RHS = K1 + KK - 1
+               IF( KK.EQ.1 ) THEN
+                  CALL SLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
+     $                         A( J1, J1 ), LDA, X( J1, RHS ),
+     $                         SCALOC, CNORM, INFO )
+               ELSE
+                  CALL SLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
+     $                         A( J1, J1 ), LDA, X( J1, RHS ),
+     $                         SCALOC, CNORM, INFO )
+               END IF
+*              Find largest absolute value entry in the vector segment
+*              X( J1:J2-1, RHS ) as an upper bound for the worst case
+*              growth in the linear updates.
+               XNRM( KK ) = SLANGE( 'I', J2-J1, 1, X( J1, RHS ),
+     $                              LDX, W )
+*
+               IF( SCALOC .EQ. ZERO ) THEN
+*                 LATRS found that A is singular through A(j,j) = 0.
+*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
+*                 and compute A*x = 0 (or A**T*x = 0). Note that
+*                 X(J1:J2-1, KK) is set by LATRS.
+                  SCALE( RHS ) = ZERO
+                  DO II = 1, J1-1
+                     X( II, KK ) = ZERO
+                  END DO
+                  DO II = J2, N
+                     X( II, KK ) = ZERO
+                  END DO
+*                 Discard the local scale factors.
+                  DO II = 1, NBA
+                     WORK( II+KK*LDS ) = ONE
+                  END DO
+                  SCALOC = ONE
+               ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
+*                 LATRS computed a valid scale factor, but combined with
+*                 the current scaling the solution does not have a
+*                 scale factor > 0.
+*
+*                 Set WORK( J+KK*LDS ) to smallest valid scale
+*                 factor and increase SCALOC accordingly.
+                  SCAL = WORK( J+KK*LDS ) / SMLNUM
+                  SCALOC = SCALOC * SCAL
+                  WORK( J+KK*LDS ) = SMLNUM
+*                 If LATRS overestimated the growth, x may be
+*                 rescaled to preserve a valid combined scale
+*                 factor WORK( J, KK ) > 0.
+                  RSCAL = ONE / SCALOC
+                  IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
+                     XNRM( KK ) = XNRM( KK ) * RSCAL
+                     CALL SSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
+                     SCALOC = ONE
+                  ELSE
+*                    The system op(A) * x = b is badly scaled and its
+*                    solution cannot be represented as (1/scale) * x.
+*                    Set x to zero. This approach deviates from LATRS
+*                    where a completely meaningless non-zero vector
+*                    is returned that is not a solution to op(A) * x = b.
+                     SCALE( RHS ) = ZERO
+                     DO II = 1, N
+                        X( II, KK ) = ZERO
+                     END DO
+*                    Discard the local scale factors.
+                     DO II = 1, NBA
+                        WORK( II+KK*LDS ) = ONE
+                     END DO
+                     SCALOC = ONE
+                  END IF
+               END IF
+               SCALOC = SCALOC * WORK( J+KK*LDS )
+               WORK( J+KK*LDS ) = SCALOC
+            END DO
+*
+*           Linear block updates
+*
+            IF( NOTRAN ) THEN
+               IF( UPPER ) THEN
+                  IFIRST = J - 1
+                  ILAST = 1
+                  IINC = -1
+               ELSE
+                  IFIRST = J + 1
+                  ILAST = NBA
+                  IINC = 1
+               END IF
+            ELSE
+               IF( UPPER ) THEN
+                  IFIRST = J + 1
+                  ILAST = NBA
+                  IINC = 1
+               ELSE
+                  IFIRST = J - 1
+                  ILAST = 1
+                  IINC = -1
+               END IF
+            END IF
+*
+            DO I = IFIRST, ILAST, IINC
+*              I1: row index of the first column in X( I, K )
+*              I2: row index of the first column in X( I+1, K )
+*              so the I2 - I1 is the row count of the block X( I, K )
+               I1 = (I-1)*NB + 1
+               I2 = MIN( I*NB, N ) + 1
+*
+*              Prepare the linear update to be executed with GEMM.
+*              For each column, compute a consistent scaling, a
+*              scaling factor to survive the linear update, and
+*              rescale the column segments, if necesssary. Then
+*              the linear update is safely executed.
+*
+               DO KK = 1, K2-K1
+                  RHS = K1 + KK - 1
+*                 Compute consistent scaling
+                  SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  BNRM = SLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
+                  BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
+                  XNRM( KK ) = XNRM( KK )*(SCAMIN / WORK( J+KK*LDS ))
+                  ANRM = WORK( AWRK + I+(J-1)*NBA )
+                  SCALOC = SLARMM( ANRM, XNRM( KK ), BNRM )
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to B( I, KK ) and B( J, KK ).
+*
+                  SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                     CALL SSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
+                     WORK( I+KK*LDS ) = SCAMIN*SCALOC
+                  END IF
+*
+                  SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                     CALL SSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
+                     WORK( J+KK*LDS ) = SCAMIN*SCALOC
+                  END IF
+               END DO
+*
+               IF( NOTRAN ) THEN
+*
+*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
+*
+                  CALL SGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -ONE,
+     $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
+     $                        ONE, X( I1, K1 ), LDX )
+               ELSE
+*
+*                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
+*
+                  CALL SGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -ONE,
+     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
+     $                        ONE, X( I1, K1 ), LDX )
+               END IF
+            END DO
+         END DO
+*
+*        Reduce local scaling factors
+*
+         DO KK = 1, K2-K1
+            RHS = K1 + KK - 1
+            DO I = 1, NBA
+               SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
+            END DO
+         END DO
+*
+*        Realize consistent scaling
+*
+         DO KK = 1, K2-K1
+            RHS = K1 + KK - 1
+            IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
+               DO I = 1, NBA
+                  I1 = (I-1)*NB + 1
+                  I2 = MIN( I*NB, N ) + 1
+                  SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
+                  IF( SCAL.NE.ONE )
+     $               CALL SSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
+               END DO
+            END IF
+         END DO
+      END DO
+      RETURN
+*
+*     End of SLATRS3
+*
+      END
diff --git a/lapack-netlib/SRC/strsyl3.c b/lapack-netlib/SRC/strsyl3.c
new file mode 100644
index 000000000..8ce30ed56
--- /dev/null
+++ b/lapack-netlib/SRC/strsyl3.c
@@ -0,0 +1,2066 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(float *x) {int e; (void)frexpf(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b19 = 2.f;
+static real c_b31 = -1.f;
+static real c_b32 = 1.f;
+
+/* > \brief \b STRSYL3 */
+
+/* Definition: */
+/* =========== */
+
+
+/* >  \par Purpose */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  STRSYL3 solves the real Sylvester matrix equation: */
+/* > */
+/* >     op(A)*X + X*op(B) = scale*C or */
+/* >     op(A)*X - X*op(B) = scale*C, */
+/* > */
+/* >  where op(A) = A or A**T, and  A and B are both upper quasi- */
+/* >  triangular. A is M-by-M and B is N-by-N; the right hand side C and */
+/* >  the solution X are M-by-N; and scale is an output scale factor, set */
+/* >  <= 1 to avoid overflow in X. */
+/* > */
+/* >  A and B must be in Schur canonical form (as returned by SHSEQR), that */
+/* >  is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; */
+/* >  each 2-by-2 diagonal block has its diagonal elements equal and its */
+/* >  off-diagonal elements of opposite sign. */
+/* > */
+/* >  This is the block version of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments */
+/*  ========= */
+
+/* > \param[in] TRANA */
+/* > \verbatim */
+/* >          TRANA is CHARACTER*1 */
+/* >          Specifies the option op(A): */
+/* >          = 'N': op(A) = A    (No transpose) */
+/* >          = 'T': op(A) = A**T (Transpose) */
+/* >          = 'C': op(A) = A**H (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANB */
+/* > \verbatim */
+/* >          TRANB is CHARACTER*1 */
+/* >          Specifies the option op(B): */
+/* >          = 'N': op(B) = B    (No transpose) */
+/* >          = 'T': op(B) = B**T (Transpose) */
+/* >          = 'C': op(B) = B**H (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ISGN */
+/* > \verbatim */
+/* >          ISGN is INTEGER */
+/* >          Specifies the sign in the equation: */
+/* >          = +1: solve op(A)*X + X*op(B) = scale*C */
+/* >          = -1: solve op(A)*X - X*op(B) = scale*C */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The order of the matrix A, and the number of rows in the */
+/* >          matrices X and C. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix B, and the number of columns in the */
+/* >          matrices X and C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,M) */
+/* >          The upper quasi-triangular matrix A, in Schur canonical form. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,N) */
+/* >          The upper quasi-triangular matrix B, in Schur canonical form. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (LDC,N) */
+/* >          On entry, the M-by-N right hand side matrix C. */
+/* >          On exit, C is overwritten by the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is REAL */
+/* >          The scale factor, scale, set <= 1 to avoid overflow in X. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
+/* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LIWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER */
+/* >          The dimension of the array IWORK. LIWORK >=  ((M + NB - 1) / NB + 1) */
+/* >          + ((N + NB - 1) / NB + 1), where NB is the optimal block size. */
+/* > */
+/* >          If LIWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimension of the IWORK array, */
+/* >          returns this value as the first entry of the IWORK array, and */
+/* >          no error message related to LIWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SWORK */
+/* > \verbatim */
+/* >          SWORK is REAL array, dimension (MAX(2, ROWS), */
+/* >          MAX(1,COLS)). */
+/* >          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS */
+/* >          and SWORK(2) returns the optimal COLS. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDSWORK */
+/* > \verbatim */
+/* >          LDSWORK is INTEGER */
+/* >          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1) */
+/* >          and NB is the optimal block size. */
+/* > */
+/* >          If LDSWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimensions of the SWORK matrix, */
+/* >          returns these values as the first and second entry of the SWORK */
+/* >          matrix, and no error message related LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* >          = 1: A and B have common or very close eigenvalues; perturbed */
+/* >               values were used to solve the equation (but the matrices */
+/* >               A and B are unchanged). */
+/* > \endverbatim */
+
+/*  ===================================================================== */
+/*  References: */
+/*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of */
+/*   algorithms: The triangular Sylvester equation, ACM Transactions */
+/*   on Mathematical Software (TOMS), volume 29, pages 218--243. */
+
+/*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel */
+/*   Solution of the Triangular Sylvester Equation. Lecture Notes in */
+/*   Computer Science, vol 12043, pages 82--92, Springer. */
+
+/*  Contributor: */
+/*   Angelika Schwarz, Umea University, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int strsyl3_(char *trana, char *tranb, integer *isgn, 
+	integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb, 
+	real *c__, integer *ldc, real *scale, integer *iwork, integer *liwork,
+	 real *swork, integer *ldswork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, swork_dim1, 
+	    swork_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    real scal, anrm, bnrm, cnrm;
+    integer awrk, bwrk;
+    logical skip;
+    real *wnrm, xnrm;
+    integer i__, j, k, l;
+    extern logical lsame_(char *, char *);
+    integer iinfo;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
+	    sgemm_(char *, char *, integer *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    integer i1, i2, j1, j2, k1, k2, l1;
+//    extern integer myexp_(real *);
+    integer l2, nb, pc, jj, ll;
+    real scaloc;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    real scamin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern real slarmm_(real *, real *, real *);
+    logical notrna, notrnb;
+    real smlnum;
+    logical lquery;
+    extern /* Subroutine */ int strsyl_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , real *, integer *);
+    integer nba, nbb;
+    real buf, sgn;
+
+/*     Decode and Test input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --iwork;
+    swork_dim1 = *ldswork;
+    swork_offset = 1 + swork_dim1 * 1;
+    swork -= swork_offset;
+
+    /* Function Body */
+    notrna = lsame_(trana, "N");
+    notrnb = lsame_(tranb, "N");
+
+/*     Use the same block size for all matrices. */
+
+/* Computing MAX */
+    i__1 = 8, i__2 = ilaenv_(&c__1, "STRSYL", "", m, n, &c_n1, &c_n1, (ftnlen)
+	    6, (ftnlen)0);
+    nb = f2cmax(i__1,i__2);
+
+/*     Compute number of blocks in A and B */
+
+/* Computing MAX */
+    i__1 = 1, i__2 = (*m + nb - 1) / nb;
+    nba = f2cmax(i__1,i__2);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*n + nb - 1) / nb;
+    nbb = f2cmax(i__1,i__2);
+
+/*     Compute workspace */
+
+    *info = 0;
+    lquery = *liwork == -1 || *ldswork == -1;
+    iwork[1] = nba + nbb + 2;
+    if (lquery) {
+	*ldswork = 2;
+	swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb);
+	swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba);
+    }
+
+/*     Test the input arguments */
+
+    if (! notrna && ! lsame_(trana, "T") && ! lsame_(
+	    trana, "C")) {
+	*info = -1;
+    } else if (! notrnb && ! lsame_(tranb, "T") && ! 
+	    lsame_(tranb, "C")) {
+	*info = -2;
+    } else if (*isgn != 1 && *isgn != -1) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -11;
+    } else if (! lquery && *liwork < iwork[1]) {
+	*info = -14;
+    } else if (! lquery && *ldswork < f2cmax(nba,nbb)) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("STRSYL3", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    *scale = 1.f;
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Use unblocked code for small problems or if insufficient */
+/*     workspaces are provided */
+
+    if (f2cmin(nba,nbb) == 1 || *ldswork < f2cmax(nba,nbb) || *liwork < iwork[1]) {
+	strsyl_(trana, tranb, isgn, m, n, &a[a_offset], lda, &b[b_offset], 
+		ldb, &c__[c_offset], ldc, scale, info);
+	return 0;
+    }
+
+
+/*      REAL               WNRM( MAX( M, N ) ) */
+    wnrm=(real*)malloc (f2cmax(*m,*n)*sizeof(real));
+
+/*     Set constants to control overflow */
+
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+
+/*      Partition A such that 2-by-2 blocks on the diagonal are not split */
+
+    skip = FALSE_;
+    i__1 = nba;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	iwork[i__] = (i__ - 1) * nb + 1;
+    }
+    iwork[nba + 1] = *m + 1;
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	l1 = iwork[k];
+	l2 = iwork[k + 1] - 1;
+	i__2 = l2;
+	for (l = l1; l <= i__2; ++l) {
+	    if (skip) {
+		skip = FALSE_;
+		mycycle_();
+	    }
+	    if (l >= *m) {
+/*               A( M, M ) is a 1-by-1 block */
+		mycycle_();
+	    }
+	    if (a[l + (l + 1) * a_dim1] != 0.f && a[l + 1 + l * a_dim1] != 
+		    0.f) {
+/*               Check if 2-by-2 block is split */
+		if (l + 1 == iwork[k + 1]) {
+		    ++iwork[k + 1];
+		    mycycle_();
+		}
+		skip = TRUE_;
+	    }
+	}
+    }
+    iwork[nba + 1] = *m + 1;
+    if (iwork[nba] >= iwork[nba + 1]) {
+	iwork[nba] = iwork[nba + 1];
+	--nba;
+    }
+
+/*      Partition B such that 2-by-2 blocks on the diagonal are not split */
+
+    pc = nba + 1;
+    skip = FALSE_;
+    i__1 = nbb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	iwork[pc + i__] = (i__ - 1) * nb + 1;
+    }
+    iwork[pc + nbb + 1] = *n + 1;
+    i__1 = nbb;
+    for (k = 1; k <= i__1; ++k) {
+	l1 = iwork[pc + k];
+	l2 = iwork[pc + k + 1] - 1;
+	i__2 = l2;
+	for (l = l1; l <= i__2; ++l) {
+	    if (skip) {
+		skip = FALSE_;
+		mycycle_();
+	    }
+	    if (l >= *n) {
+/*               B( N, N ) is a 1-by-1 block */
+		mycycle_();
+	    }
+	    if (b[l + (l + 1) * b_dim1] != 0.f && b[l + 1 + l * b_dim1] != 
+		    0.f) {
+/*               Check if 2-by-2 block is split */
+		if (l + 1 == iwork[pc + k + 1]) {
+		    ++iwork[pc + k + 1];
+		    mycycle_();
+		}
+		skip = TRUE_;
+	    }
+	}
+    }
+    iwork[pc + nbb + 1] = *n + 1;
+    if (iwork[pc + nbb] >= iwork[pc + nbb + 1]) {
+	iwork[pc + nbb] = iwork[pc + nbb + 1];
+	--nbb;
+    }
+
+/*     Set local scaling factors - must never attain zero. */
+
+    i__1 = nbb;
+    for (l = 1; l <= i__1; ++l) {
+	i__2 = nba;
+	for (k = 1; k <= i__2; ++k) {
+	    swork[k + l * swork_dim1] = 1.f;
+	}
+    }
+
+/*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero. */
+/*     This scaling is to ensure compatibility with TRSYL and may get flushed. */
+
+    buf = 1.f;
+
+/*     Compute upper bounds of blocks of A and B */
+
+    awrk = nbb;
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = iwork[k];
+	k2 = iwork[k + 1];
+	i__2 = nba;
+	for (l = k; l <= i__2; ++l) {
+	    l1 = iwork[l];
+	    l2 = iwork[l + 1];
+	    if (notrna) {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[k + (awrk + l) * swork_dim1] = slange_("I", &i__3, &
+			i__4, &a[k1 + l1 * a_dim1], lda, wnrm);
+	    } else {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[l + (awrk + k) * swork_dim1] = slange_("1", &i__3, &
+			i__4, &a[k1 + l1 * a_dim1], lda, wnrm);
+	    }
+	}
+    }
+    bwrk = nbb + nba;
+    i__1 = nbb;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = iwork[pc + k];
+	k2 = iwork[pc + k + 1];
+	i__2 = nbb;
+	for (l = k; l <= i__2; ++l) {
+	    l1 = iwork[pc + l];
+	    l2 = iwork[pc + l + 1];
+	    if (notrnb) {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[k + (bwrk + l) * swork_dim1] = slange_("I", &i__3, &
+			i__4, &b[k1 + l1 * b_dim1], ldb, wnrm);
+	    } else {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[l + (bwrk + k) * swork_dim1] = slange_("1", &i__3, &
+			i__4, &b[k1 + l1 * b_dim1], ldb, wnrm);
+	    }
+	}
+    }
+
+    sgn = (real) (*isgn);
+
+    if (notrna && notrnb) {
+
+/*        Solve    A*X + ISGN*X*B = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        bottom-left corner column by column by */
+
+/*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                  M                         L-1 */
+/*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */
+/*                I=K+1                       J=1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	for (k = nba; k >= 1; --k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = iwork[k];
+	    k2 = iwork[k + 1];
+	    i__1 = nbb;
+	    for (l = 1; l <= i__1; ++l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = iwork[pc + l];
+		l2 = iwork[pc + l + 1];
+
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		strsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.f) {
+		    if (scaloc == 0.f) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.f;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__2);
+		    }
+		    i__2 = nbb;
+		    for (jj = 1; jj <= i__2; ++jj) {
+			i__3 = nba;
+			for (ll = 1; ll <= i__3; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__4 = myexp_(&scaloc);
+			    r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] 
+				    / pow_ri(&c_b19, &i__4);
+			    swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		xnrm = slange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		for (i__ = k - 1; i__ >= 1; --i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */
+
+		    i1 = iwork[i__];
+		    i2 = iwork[i__ + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    cnrm = slange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = slarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b19, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b19, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b19, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (jj = l1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    sscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = i2 - i1;
+			    sscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    i__4 = k2 - k1;
+		    sgemm_("N", "N", &i__2, &i__3, &i__4, &c_b31, &a[i1 + k1 *
+			     a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &
+			    c_b32, &c__[i1 + l1 * c_dim1], ldc);
+
+		}
+
+		i__2 = nbb;
+		for (j = l + 1; j <= i__2; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */
+
+		    j1 = iwork[pc + j];
+		    j2 = iwork[pc + j + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    cnrm = slange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = slarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b19, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b19, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b19, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    sscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = j2 - 1;
+			for (jj = j1; jj <= i__3; ++jj) {
+			    i__4 = k2 - k1;
+			    sscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    i__5 = l2 - l1;
+		    r__1 = -sgn;
+		    sgemm_("N", "N", &i__3, &i__4, &i__5, &r__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b32,
+			     &c__[k1 + j1 * c_dim1], ldc);
+		}
+	    }
+	}
+    } else if (! notrna && notrnb) {
+
+/*        Solve    A**T*X + ISGN*X*B = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        upper-left corner column by column by */
+
+/*          A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                   K-1                        L-1 */
+/*          R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */
+/*                   I=1                        J=1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	i__1 = nba;
+	for (k = 1; k <= i__1; ++k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = iwork[k];
+	    k2 = iwork[k + 1];
+	    i__2 = nbb;
+	    for (l = 1; l <= i__2; ++l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = iwork[pc + l];
+		l2 = iwork[pc + l + 1];
+
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		strsyl_(trana, tranb, isgn, &i__3, &i__4, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.f) {
+		    if (scaloc == 0.f) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.f;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__3);
+		    }
+		    i__3 = nbb;
+		    for (jj = 1; jj <= i__3; ++jj) {
+			i__4 = nba;
+			for (ll = 1; ll <= i__4; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__5 = myexp_(&scaloc);
+			    r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] 
+				    / pow_ri(&c_b19, &i__5);
+			    swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		xnrm = slange_("I", &i__3, &i__4, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__3 = nba;
+		for (i__ = k + 1; i__ <= i__3; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L ) */
+
+		    i1 = iwork[i__];
+		    i2 = iwork[i__ + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__4 = i2 - i1;
+		    i__5 = l2 - l1;
+		    cnrm = slange_("I", &i__4, &i__5, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = slarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__4 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__4);
+			i__4 = nbb;
+			for (jj = 1; jj <= i__4; ++jj) {
+			    i__5 = nba;
+			    for (ll = 1; ll <= i__5; ++ll) {
+/* Computing MIN */
+				i__6 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b19, &i__6);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__4 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b19, &i__4);
+			i__4 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b19, &i__4);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = k2 - k1;
+			    sscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = i2 - i1;
+			    sscal_(&i__5, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__4 = i2 - i1;
+		    i__5 = l2 - l1;
+		    i__6 = k2 - k1;
+		    sgemm_("T", "N", &i__4, &i__5, &i__6, &c_b31, &a[k1 + i1 *
+			     a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &
+			    c_b32, &c__[i1 + l1 * c_dim1], ldc);
+		}
+
+		i__3 = nbb;
+		for (j = l + 1; j <= i__3; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */
+
+		    j1 = iwork[pc + j];
+		    j2 = iwork[pc + j + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__4 = k2 - k1;
+		    i__5 = j2 - j1;
+		    cnrm = slange_("I", &i__4, &i__5, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = slarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__4 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__4);
+			i__4 = nbb;
+			for (jj = 1; jj <= i__4; ++jj) {
+			    i__5 = nba;
+			    for (ll = 1; ll <= i__5; ++ll) {
+/* Computing MIN */
+				i__6 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b19, &i__6);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__4 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b19, &i__4);
+			i__4 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b19, &i__4);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to to C( K, J ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = k2 - k1;
+			    sscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__4 = j2 - 1;
+			for (jj = j1; jj <= i__4; ++jj) {
+			    i__5 = k2 - k1;
+			    sscal_(&i__5, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__4 = k2 - k1;
+		    i__5 = j2 - j1;
+		    i__6 = l2 - l1;
+		    r__1 = -sgn;
+		    sgemm_("N", "N", &i__4, &i__5, &i__6, &r__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b32,
+			     &c__[k1 + j1 * c_dim1], ldc);
+		}
+	    }
+	}
+    } else if (! notrna && ! notrnb) {
+
+/*        Solve    A**T*X + ISGN*X*B**T = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        top-right corner column by column by */
+
+/*           A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                     K-1                          N */
+/*            R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. */
+/*                     I=1                        J=L+1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	i__1 = nba;
+	for (k = 1; k <= i__1; ++k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = iwork[k];
+	    k2 = iwork[k + 1];
+	    for (l = nbb; l >= 1; --l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = iwork[pc + l];
+		l2 = iwork[pc + l + 1];
+
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		strsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.f) {
+		    if (scaloc == 0.f) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.f;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__2);
+		    }
+		    i__2 = nbb;
+		    for (jj = 1; jj <= i__2; ++jj) {
+			i__3 = nba;
+			for (ll = 1; ll <= i__3; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__4 = myexp_(&scaloc);
+			    r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] 
+				    / pow_ri(&c_b19, &i__4);
+			    swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		xnrm = slange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__2 = nba;
+		for (i__ = k + 1; i__ <= i__2; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L ) */
+
+		    i1 = iwork[i__];
+		    i2 = iwork[i__ + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = i2 - i1;
+		    i__4 = l2 - l1;
+		    cnrm = slange_("I", &i__3, &i__4, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = slarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b19, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b19, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b19, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    sscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = i2 - i1;
+			    sscal_(&i__4, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__3 = i2 - i1;
+		    i__4 = l2 - l1;
+		    i__5 = k2 - k1;
+		    sgemm_("T", "N", &i__3, &i__4, &i__5, &c_b31, &a[k1 + i1 *
+			     a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &
+			    c_b32, &c__[i1 + l1 * c_dim1], ldc);
+		}
+
+		i__2 = l - 1;
+		for (j = 1; j <= i__2; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T */
+
+		    j1 = iwork[pc + j];
+		    j2 = iwork[pc + j + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    cnrm = slange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = slarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b19, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b19, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b19, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    sscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__3 = j2 - 1;
+			for (jj = j1; jj <= i__3; ++jj) {
+			    i__4 = k2 - k1;
+			    sscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    i__5 = l2 - l1;
+		    r__1 = -sgn;
+		    sgemm_("N", "T", &i__3, &i__4, &i__5, &r__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b32,
+			     &c__[k1 + j1 * c_dim1], ldc);
+		}
+	    }
+	}
+    } else if (notrna && ! notrnb) {
+
+/*        Solve    A*X + ISGN*X*B**T = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        bottom-right corner column by column by */
+
+/*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                      M                          N */
+/*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. */
+/*                    I=K+1                      J=L+1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	for (k = nba; k >= 1; --k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = iwork[k];
+	    k2 = iwork[k + 1];
+	    for (l = nbb; l >= 1; --l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = iwork[pc + l];
+		l2 = iwork[pc + l + 1];
+
+		i__1 = k2 - k1;
+		i__2 = l2 - l1;
+		strsyl_(trana, tranb, isgn, &i__1, &i__2, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.f) {
+		    if (scaloc == 0.f) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.f;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__1 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__1);
+		    }
+		    i__1 = nbb;
+		    for (jj = 1; jj <= i__1; ++jj) {
+			i__2 = nba;
+			for (ll = 1; ll <= i__2; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__3 = myexp_(&scaloc);
+			    r__1 = bignum, r__2 = swork[ll + jj * swork_dim1] 
+				    / pow_ri(&c_b19, &i__3);
+			    swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__1 = k2 - k1;
+		i__2 = l2 - l1;
+		xnrm = slange_("I", &i__1, &i__2, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__1 = k - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */
+
+		    i1 = iwork[i__];
+		    i2 = iwork[i__ + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    cnrm = slange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[i__ + l * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = slarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b19, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b19, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b19, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = k2 - k1;
+			    sscal_(&i__3, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = i2 - i1;
+			    sscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    i__4 = k2 - k1;
+		    sgemm_("N", "N", &i__2, &i__3, &i__4, &c_b31, &a[i1 + k1 *
+			     a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &
+			    c_b32, &c__[i1 + l1 * c_dim1], ldc);
+
+		}
+
+		i__1 = l - 1;
+		for (j = 1; j <= i__1; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T */
+
+		    j1 = iwork[pc + j];
+		    j2 = iwork[pc + j + 1];
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = k2 - k1;
+		    i__3 = j2 - j1;
+		    cnrm = slange_("I", &i__2, &i__3, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    r__1 = swork[k + j * swork_dim1], r__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(r__1,r__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = slarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.f) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_ri(&c_b19, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				r__1 = bignum, r__2 = swork[ll + jj * 
+					swork_dim1] / pow_ri(&c_b19, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(r__1,r__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_ri(&c_b19, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_ri(&c_b19, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = l2 - 1;
+			for (jj = l1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    sscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.f) {
+			i__2 = j2 - 1;
+			for (jj = j1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    sscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__2 = k2 - k1;
+		    i__3 = j2 - j1;
+		    i__4 = l2 - l1;
+		    r__1 = -sgn;
+		    sgemm_("N", "T", &i__2, &i__3, &i__4, &r__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b32,
+			     &c__[k1 + j1 * c_dim1], ldc);
+		}
+	    }
+	}
+
+    }
+
+    free(wnrm);
+/*     Reduce local scaling factors */
+
+    *scale = swork[swork_dim1 + 1];
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = nbb;
+	for (l = 1; l <= i__2; ++l) {
+/* Computing MIN */
+	    r__1 = *scale, r__2 = swork[k + l * swork_dim1];
+	    *scale = f2cmin(r__1,r__2);
+	}
+    }
+
+    if (*scale == 0.f) {
+
+/*        The magnitude of the largest entry of the solution is larger */
+/*        than the product of BIGNUM**2 and cannot be represented in the */
+/*        form (1/SCALE)*X if SCALE is REAL. Set SCALE to zero and give up. */
+
+	iwork[1] = nba + nbb + 2;
+	swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb);
+	swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba);
+	return 0;
+    }
+
+/*     Realize consistent scaling */
+
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = iwork[k];
+	k2 = iwork[k + 1];
+	i__2 = nbb;
+	for (l = 1; l <= i__2; ++l) {
+	    l1 = iwork[pc + l];
+	    l2 = iwork[pc + l + 1];
+	    scal = *scale / swork[k + l * swork_dim1];
+	    if (scal != 1.f) {
+		i__3 = l2 - 1;
+		for (ll = l1; ll <= i__3; ++ll) {
+		    i__4 = k2 - k1;
+		    sscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &c__1);
+		}
+	    }
+	}
+    }
+
+    if (buf != 1.f && buf > 0.f) {
+
+/*        Decrease SCALE as much as possible. */
+
+/* Computing MIN */
+	r__1 = *scale / smlnum, r__2 = 1.f / buf;
+	scaloc = f2cmin(r__1,r__2);
+	buf *= scaloc;
+	*scale /= scaloc;
+    }
+    if (buf != 1.f && buf > 0.f) {
+
+/*        In case of overly aggressive scaling during the computation, */
+/*        flushing of the global scale factor may be prevented by */
+/*        undoing some of the scaling. This step is to ensure that */
+/*        this routine flushes only scale factors that TRSYL also */
+/*        flushes and be usable as a drop-in replacement. */
+
+/*        How much can the normwise largest entry be upscaled? */
+
+	scal = c__[c_dim1 + 1];
+	i__1 = *m;
+	for (k = 1; k <= i__1; ++k) {
+	    i__2 = *n;
+	    for (l = 1; l <= i__2; ++l) {
+/* Computing MAX */
+		r__2 = scal, r__3 = (r__1 = c__[k + l * c_dim1], abs(r__1));
+		scal = f2cmax(r__2,r__3);
+	    }
+	}
+
+/*        Increase BUF as close to 1 as possible and apply scaling. */
+
+/* Computing MIN */
+	r__1 = bignum / scal, r__2 = 1.f / buf;
+	scaloc = f2cmin(r__1,r__2);
+	buf *= scaloc;
+	slascl_("G", &c_n1, &c_n1, &c_b32, &scaloc, m, n, &c__[c_offset], ldc,
+		 &iwork[1]);
+    }
+
+/*     Combine with buffer scaling factor. SCALE will be flushed if */
+/*     BUF is less than one here. */
+
+    *scale *= buf;
+
+/*     Restore workspace dimensions */
+
+    iwork[1] = nba + nbb + 2;
+    swork[swork_dim1 + 1] = (real) f2cmax(nba,nbb);
+    swork[swork_dim1 + 2] = (real) ((nbb << 1) + nba);
+
+    return 0;
+
+/*     End of STRSYL3 */
+
+} /* strsyl3_ */
+
diff --git a/lapack-netlib/SRC/strsyl3.f b/lapack-netlib/SRC/strsyl3.f
new file mode 100644
index 000000000..28762c2ed
--- /dev/null
+++ b/lapack-netlib/SRC/strsyl3.f
@@ -0,0 +1,1244 @@
+*> \brief \b STRSYL3
+*
+* Definition:
+* ===========
+*
+*
+*>  \par Purpose
+*  =============
+*>
+*> \verbatim
+*>
+*>  STRSYL3 solves the real Sylvester matrix equation:
+*>
+*>     op(A)*X + X*op(B) = scale*C or
+*>     op(A)*X - X*op(B) = scale*C,
+*>
+*>  where op(A) = A or A**T, and  A and B are both upper quasi-
+*>  triangular. A is M-by-M and B is N-by-N; the right hand side C and
+*>  the solution X are M-by-N; and scale is an output scale factor, set
+*>  <= 1 to avoid overflow in X.
+*>
+*>  A and B must be in Schur canonical form (as returned by SHSEQR), that
+*>  is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks;
+*>  each 2-by-2 diagonal block has its diagonal elements equal and its
+*>  off-diagonal elements of opposite sign.
+*>
+*>  This is the block version of the algorithm.
+*> \endverbatim
+*
+*  Arguments
+*  =========
+*
+*> \param[in] TRANA
+*> \verbatim
+*>          TRANA is CHARACTER*1
+*>          Specifies the option op(A):
+*>          = 'N': op(A) = A    (No transpose)
+*>          = 'T': op(A) = A**T (Transpose)
+*>          = 'C': op(A) = A**H (Conjugate transpose = Transpose)
+*> \endverbatim
+*>
+*> \param[in] TRANB
+*> \verbatim
+*>          TRANB is CHARACTER*1
+*>          Specifies the option op(B):
+*>          = 'N': op(B) = B    (No transpose)
+*>          = 'T': op(B) = B**T (Transpose)
+*>          = 'C': op(B) = B**H (Conjugate transpose = Transpose)
+*> \endverbatim
+*>
+*> \param[in] ISGN
+*> \verbatim
+*>          ISGN is INTEGER
+*>          Specifies the sign in the equation:
+*>          = +1: solve op(A)*X + X*op(B) = scale*C
+*>          = -1: solve op(A)*X - X*op(B) = scale*C
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The order of the matrix A, and the number of rows in the
+*>          matrices X and C. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix B, and the number of columns in the
+*>          matrices X and C. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is REAL array, dimension (LDA,M)
+*>          The upper quasi-triangular matrix A, in Schur canonical form.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*>          B is REAL array, dimension (LDB,N)
+*>          The upper quasi-triangular matrix B, in Schur canonical form.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,N).
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*>          C is REAL array, dimension (LDC,N)
+*>          On entry, the M-by-N right hand side matrix C.
+*>          On exit, C is overwritten by the solution matrix X.
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*>          LDC is INTEGER
+*>          The leading dimension of the array C. LDC >= max(1,M)
+*> \endverbatim
+*>
+*> \param[out] SCALE
+*> \verbatim
+*>          SCALE is REAL
+*>          The scale factor, scale, set <= 1 to avoid overflow in X.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
+*>          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*> \endverbatim
+*>
+*> \param[in] LIWORK
+*> \verbatim
+*>          IWORK is INTEGER
+*>          The dimension of the array IWORK. LIWORK >=  ((M + NB - 1) / NB + 1)
+*>          + ((N + NB - 1) / NB + 1), where NB is the optimal block size.
+*>
+*>          If LIWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimension of the IWORK array,
+*>          returns this value as the first entry of the IWORK array, and
+*>          no error message related to LIWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] SWORK
+*> \verbatim
+*>          SWORK is REAL array, dimension (MAX(2, ROWS),
+*>          MAX(1,COLS)).
+*>          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
+*>          and SWORK(2) returns the optimal COLS.
+*> \endverbatim
+*>
+*> \param[in] LDSWORK
+*> \verbatim
+*>          LDSWORK is INTEGER
+*>          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
+*>          and NB is the optimal block size.
+*>
+*>          If LDSWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimensions of the SWORK matrix,
+*>          returns these values as the first and second entry of the SWORK
+*>          matrix, and no error message related LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0: successful exit
+*>          < 0: if INFO = -i, the i-th argument had an illegal value
+*>          = 1: A and B have common or very close eigenvalues; perturbed
+*>               values were used to solve the equation (but the matrices
+*>               A and B are unchanged).
+*> \endverbatim
+*
+*  =====================================================================
+*  References:
+*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
+*   algorithms: The triangular Sylvester equation, ACM Transactions
+*   on Mathematical Software (TOMS), volume 29, pages 218--243.
+*
+*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
+*   Solution of the Triangular Sylvester Equation. Lecture Notes in
+*   Computer Science, vol 12043, pages 82--92, Springer.
+*
+*  Contributor:
+*   Angelika Schwarz, Umea University, Sweden.
+*
+*  =====================================================================
+      SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
+     $                    LDC, SCALE, IWORK, LIWORK, SWORK, LDSWORK,
+     $                    INFO )
+      IMPLICIT NONE
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            INFO, ISGN, LDA, LDB, LDC, M, N,
+     $                   LIWORK, LDSWORK
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   SWORK( LDSWORK, * )
+*     ..
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRNA, NOTRNB, LQUERY, SKIP
+      INTEGER            AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
+     $                   K, K1, K2, L, L1, L2, LL, NBA, NB, NBB, PC
+      REAL               ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
+     $                   SCAMIN, SGN, XNRM, BUF, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               WNRM( MAX( M, N ) )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLANGE, SLAMCH, SLARMM
+      EXTERNAL           SLANGE, SLAMCH, SLARMM, ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SLASCL, SSCAL, STRSYL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, EXPONENT, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test input parameters
+*
+      NOTRNA = LSAME( TRANA, 'N' )
+      NOTRNB = LSAME( TRANB, 'N' )
+*
+*     Use the same block size for all matrices.
+*
+      NB = MAX(8, ILAENV( 1, 'STRSYL', '', M, N, -1, -1) )
+*
+*     Compute number of blocks in A and B
+*
+      NBA = MAX( 1, (M + NB - 1) / NB )
+      NBB = MAX( 1, (N + NB - 1) / NB )
+*
+*     Compute workspace
+*
+      INFO = 0
+      LQUERY = ( LIWORK.EQ.-1 .OR. LDSWORK.EQ.-1 )
+      IWORK( 1 ) = NBA + NBB + 2
+      IF( LQUERY ) THEN
+         LDSWORK = 2
+         SWORK( 1, 1 ) = MAX( NBA, NBB )
+         SWORK( 2, 1 ) = 2 * NBB + NBA
+      END IF
+*
+*     Test the input arguments
+*
+      IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
+     $    LSAME( TRANA, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
+     $         LSAME( TRANB, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( .NOT.LQUERY .AND. LIWORK.LT.IWORK(1) ) THEN
+         INFO = -14
+      ELSE IF( .NOT.LQUERY .AND. LDSWORK.LT.MAX( NBA, NBB ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRSYL3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      SCALE = ONE
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Use unblocked code for small problems or if insufficient
+*     workspaces are provided
+*
+      IF( MIN( NBA, NBB ).EQ.1 .OR. LDSWORK.LT.MAX( NBA, NBB ) .OR.
+     $    LIWORK.LT.IWORK(1) ) THEN
+        CALL STRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
+     $               C, LDC, SCALE, INFO )
+        RETURN
+      END IF
+*
+*     Set constants to control overflow
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*      Partition A such that 2-by-2 blocks on the diagonal are not split
+*
+       SKIP = .FALSE.
+       DO I = 1, NBA
+          IWORK( I ) = ( I - 1 ) * NB + 1
+       END DO
+       IWORK( NBA + 1 ) = M + 1
+       DO K = 1, NBA
+          L1 = IWORK( K )
+          L2 = IWORK( K + 1 ) - 1
+          DO L = L1, L2
+             IF( SKIP ) THEN
+                SKIP = .FALSE.
+                CYCLE
+             END IF
+             IF( L.GE.M ) THEN
+*               A( M, M ) is a 1-by-1 block
+                CYCLE
+             END IF
+             IF( A( L, L+1 ).NE.ZERO .AND. A( L+1, L ).NE.ZERO ) THEN
+*               Check if 2-by-2 block is split
+                IF( L + 1 .EQ. IWORK( K + 1 ) ) THEN
+                   IWORK( K + 1 ) = IWORK( K + 1 ) + 1
+                   CYCLE
+                END IF
+                SKIP = .TRUE.
+             END IF
+          END DO
+       END DO
+       IWORK( NBA + 1 ) = M + 1
+       IF( IWORK( NBA ).GE.IWORK( NBA + 1 ) ) THEN
+          IWORK( NBA ) = IWORK( NBA + 1 )
+          NBA = NBA - 1
+       END IF
+*
+*      Partition B such that 2-by-2 blocks on the diagonal are not split
+*
+       PC = NBA + 1
+       SKIP = .FALSE.
+       DO I = 1, NBB
+          IWORK( PC + I ) = ( I - 1 ) * NB + 1
+       END DO
+       IWORK( PC + NBB + 1 ) = N + 1
+       DO K = 1, NBB
+          L1 = IWORK( PC + K )
+          L2 = IWORK( PC + K + 1 ) - 1
+          DO L = L1, L2
+             IF( SKIP ) THEN
+                SKIP = .FALSE.
+                CYCLE
+             END IF
+             IF( L.GE.N ) THEN
+*               B( N, N ) is a 1-by-1 block
+                CYCLE
+             END IF
+             IF( B( L, L+1 ).NE.ZERO .AND. B( L+1, L ).NE.ZERO ) THEN
+*               Check if 2-by-2 block is split
+                IF( L + 1 .EQ. IWORK( PC + K + 1 ) ) THEN
+                   IWORK( PC + K + 1 ) = IWORK( PC + K + 1 ) + 1
+                   CYCLE
+                END IF
+                SKIP = .TRUE.
+             END IF
+          END DO
+       END DO
+       IWORK( PC + NBB + 1 ) = N + 1
+       IF( IWORK( PC + NBB ).GE.IWORK( PC + NBB + 1 ) ) THEN
+          IWORK( PC + NBB ) = IWORK( PC + NBB + 1 )
+          NBB = NBB - 1
+       END IF
+*
+*     Set local scaling factors - must never attain zero.
+*
+      DO L = 1, NBB
+         DO K = 1, NBA
+            SWORK( K, L ) = ONE
+         END DO
+      END DO
+*
+*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
+*     This scaling is to ensure compatibility with TRSYL and may get flushed.
+*
+      BUF = ONE
+*
+*     Compute upper bounds of blocks of A and B
+*
+      AWRK = NBB
+      DO K = 1, NBA
+         K1 = IWORK( K )
+         K2 = IWORK( K + 1 )
+         DO L = K, NBA
+            L1 = IWORK( L )
+            L2 = IWORK( L + 1 )
+            IF( NOTRNA ) THEN
+               SWORK( K, AWRK + L ) = SLANGE( 'I', K2-K1, L2-L1,
+     $                                        A( K1, L1 ), LDA, WNRM )
+            ELSE
+               SWORK( L, AWRK + K ) = SLANGE( '1', K2-K1, L2-L1,
+     $                                        A( K1, L1 ), LDA, WNRM )
+            END IF
+         END DO
+      END DO
+      BWRK = NBB + NBA
+      DO K = 1, NBB
+         K1 = IWORK( PC + K )
+         K2 = IWORK( PC + K + 1 )
+         DO L = K, NBB
+            L1 = IWORK( PC + L )
+            L2 = IWORK( PC + L + 1 )
+            IF( NOTRNB ) THEN
+               SWORK( K, BWRK + L ) = SLANGE( 'I', K2-K1, L2-L1,
+     $                                        B( K1, L1 ), LDB, WNRM )
+            ELSE
+               SWORK( L, BWRK + K ) = SLANGE( '1', K2-K1, L2-L1,
+     $                                        B( K1, L1 ), LDB, WNRM )
+            END IF
+         END DO
+      END DO
+*
+      SGN = REAL( ISGN )
+*
+      IF( NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                  M                         L-1
+*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
+*                I=K+1                       J=1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = NBA, 1, -1
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = IWORK( K )
+            K2 = IWORK( K + 1 )
+            DO L = 1, NBB
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = IWORK( PC + L )
+               L2 = IWORK( PC + L + 1 )
+*
+               CALL STRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF ( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = SLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K - 1, 1, -1
+*
+*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
+*
+                  I1 = IWORK( I )
+                  I2 = IWORK( I + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = SLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = SLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                      DO JJ = L1, L2-1
+                         CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1)
+                      END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                      DO LL = L1, L2-1
+                         CALL SSCAL( I2-I1, SCAL, C( I1, LL ), 1)
+                      END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL SGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
+     $                        A( I1, K1 ), LDA, C( K1, L1 ), LDC,
+     $                        ONE, C( I1, L1 ), LDC )
+*
+               END DO
+*
+               DO J = L + 1, NBB
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
+*
+                  J1 = IWORK( PC + J )
+                  J2 = IWORK( PC + J + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = SLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK(L, BWRK + J)
+                  SCALOC = SLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO JJ = J1, J2-1
+                         CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                      END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL SGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
+     $                        C( K1, L1 ), LDC, B( L1, J1 ), LDB,
+     $                        ONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A**T*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-left corner column by column by
+*
+*          A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                   K-1                        L-1
+*          R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
+*                   I=1                        J=1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = 1, NBA
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = IWORK( K )
+            K2 = IWORK( K + 1 )
+            DO L = 1, NBB
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = IWORK( PC + L )
+               L2 = IWORK( PC + L + 1 )
+*
+               CALL STRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = SLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K + 1, NBA
+*
+*                 C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
+*
+                  I1 = IWORK( I )
+                  I2 = IWORK( I + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = SLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = SLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL SSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL SGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
+     $                        A( K1, I1 ), LDA, C( K1, L1 ), LDC,
+     $                        ONE, C( I1, L1 ), LDC )
+               END DO
+*
+               DO J = L + 1, NBB
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
+*
+                  J1 = IWORK( PC + J )
+                  J2 = IWORK( PC + J + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = SLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = SLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to to C( K, J ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO LL = L1, L2-1
+                         CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                      END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL SGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
+     $                        C( K1, L1 ), LDC, B( L1, J1 ), LDB,
+     $                        ONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A**T*X + ISGN*X*B**T = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        top-right corner column by column by
+*
+*           A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
+*
+*        Where
+*                     K-1                          N
+*            R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
+*                     I=1                        J=L+1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = 1, NBA
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = IWORK( K )
+            K2 = IWORK( K + 1 )
+            DO L = NBB, 1, -1
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = IWORK( PC + L )
+               L2 = IWORK( PC + L + 1 )
+*
+               CALL STRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = SLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K + 1, NBA
+*
+*                 C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
+*
+                  I1 = IWORK( I )
+                  I2 = IWORK( I + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = SLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = SLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL SSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL SGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
+     $                        A( K1, I1 ), LDA, C( K1, L1 ), LDC,
+     $                        ONE, C( I1, L1 ), LDC )
+               END DO
+*
+               DO J = 1, L - 1
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
+*
+                  J1 = IWORK( PC + J )
+                  J2 = IWORK( PC + J + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = SLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = SLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1)
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL SGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
+     $                        C( K1, L1 ), LDC, B( J1, L1 ), LDB,
+     $                        ONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B**T = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-right corner column by column by
+*
+*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
+*
+*        Where
+*                      M                          N
+*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
+*                    I=K+1                      J=L+1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = NBA, 1, -1
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = IWORK( K )
+            K2 = IWORK( K + 1 )
+            DO L = NBB, 1, -1
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = IWORK( PC + L )
+               L2 = IWORK( PC + L + 1 )
+*
+               CALL STRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = SLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = 1, K - 1
+*
+*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
+*
+                  I1 = IWORK( I )
+                  I2 = IWORK( I + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = SLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = SLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF (SCAL .NE. ONE) THEN
+                     DO LL = L1, L2-1
+                        CALL SSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL SGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
+     $                        A( I1, K1 ), LDA, C( K1, L1 ), LDC,
+     $                        ONE, C( I1, L1 ), LDC )
+*
+               END DO
+*
+               DO J = 1, L - 1
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
+*
+                  J1 = IWORK( PC + J )
+                  J2 = IWORK( PC + J + 1 )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = SLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = SLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.E0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = L1, L2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL SGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
+     $                        C( K1, L1 ), LDC, B( J1, L1 ), LDB,
+     $                        ONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+*
+      END IF
+*
+*     Reduce local scaling factors
+*
+      SCALE = SWORK( 1, 1 )
+      DO K = 1, NBA
+         DO L = 1, NBB
+            SCALE = MIN( SCALE, SWORK( K, L ) )
+         END DO
+      END DO
+*
+      IF( SCALE .EQ. ZERO ) THEN
+*
+*        The magnitude of the largest entry of the solution is larger
+*        than the product of BIGNUM**2 and cannot be represented in the
+*        form (1/SCALE)*X if SCALE is REAL. Set SCALE to zero and give up.
+*
+         IWORK(1) = NBA + NBB + 2
+         SWORK(1,1) = MAX( NBA, NBB )
+         SWORK(2,1) = 2 * NBB + NBA
+         RETURN
+      END IF
+*
+*     Realize consistent scaling
+*
+      DO K = 1, NBA
+         K1 = IWORK( K )
+         K2 = IWORK( K + 1 )
+         DO L = 1, NBB
+            L1 = IWORK( PC + L )
+            L2 = IWORK( PC + L + 1 )
+            SCAL = SCALE / SWORK( K, L )
+            IF( SCAL .NE. ONE ) THEN
+               DO LL = L1, L2-1
+                  CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+               END DO
+            ENDIF
+         END DO
+      END DO
+*
+      IF( BUF .NE. ONE .AND. BUF.GT.ZERO ) THEN
+*
+*        Decrease SCALE as much as possible.
+*
+         SCALOC = MIN( SCALE / SMLNUM, ONE / BUF )
+         BUF = BUF * SCALOC
+         SCALE = SCALE / SCALOC
+      END IF
+
+      IF( BUF.NE.ONE .AND. BUF.GT.ZERO ) THEN
+*
+*        In case of overly aggressive scaling during the computation,
+*        flushing of the global scale factor may be prevented by
+*        undoing some of the scaling. This step is to ensure that
+*        this routine flushes only scale factors that TRSYL also
+*        flushes and be usable as a drop-in replacement.
+*
+*        How much can the normwise largest entry be upscaled?
+*
+         SCAL = C( 1, 1 )
+         DO K = 1, M
+            DO L = 1, N
+               SCAL = MAX( SCAL, ABS( C( K, L ) ) )
+            END DO
+         END DO
+*
+*        Increase BUF as close to 1 as possible and apply scaling.
+*
+         SCALOC = MIN( BIGNUM / SCAL, ONE / BUF )
+         BUF = BUF * SCALOC
+         CALL SLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IWORK )
+      END IF
+*
+*     Combine with buffer scaling factor. SCALE will be flushed if
+*     BUF is less than one here.
+*
+      SCALE = SCALE * BUF
+*
+*     Restore workspace dimensions
+*
+      IWORK(1) = NBA + NBB + 2
+      SWORK(1,1) = MAX( NBA, NBB )
+      SWORK(2,1) = 2 * NBB + NBA
+*
+      RETURN
+*
+*     End of STRSYL3
+*
+      END
diff --git a/lapack-netlib/SRC/zlatrs3.c b/lapack-netlib/SRC/zlatrs3.c
new file mode 100644
index 000000000..0cb1cda54
--- /dev/null
+++ b/lapack-netlib/SRC/zlatrs3.c
@@ -0,0 +1,1283 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static doublecomplex c_b1 = {1.,0.};
+static integer c__1 = 1;
+static integer c_n1 = -1;
+
+/* > \brief \b ZLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
+ */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA, */
+/*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          DIAG, NORMIN, TRANS, UPLO */
+/*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS */
+/*       DOUBLE PRECISION   CNORM( * ), SCALE( * ), WORK( * ) */
+/*       COMPLEX*16         A( LDA, * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > ZLATRS3 solves one of the triangular systems */
+/* > */
+/* >    A * X = B * diag(scale),  A**T * X = B * diag(scale), or */
+/* >    A**H * X = B * diag(scale) */
+/* > */
+/* > with scaling to prevent overflow.  Here A is an upper or lower */
+/* > triangular matrix, A**T denotes the transpose of A, A**H denotes the */
+/* > conjugate transpose of A. X and B are n-by-nrhs matrices and scale */
+/* > is an nrhs-element vector of scaling factors. A scaling factor scale(j) */
+/* > is usually less than or equal to 1, chosen such that X(:,j) is less */
+/* > than the overflow threshold. If the matrix A is singular (A(j,j) = 0 */
+/* > for some j), then a non-trivial solution to A*X = 0 is returned. If */
+/* > the system is so badly scaled that the solution cannot be represented */
+/* > as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned. */
+/* > */
+/* > This is a BLAS-3 version of LATRS for solving several right */
+/* > hand sides simultaneously. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] UPLO */
+/* > \verbatim */
+/* >          UPLO is CHARACTER*1 */
+/* >          Specifies whether the matrix A is upper or lower triangular. */
+/* >          = 'U':  Upper triangular */
+/* >          = 'L':  Lower triangular */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the operation applied to A. */
+/* >          = 'N':  Solve A * x = s*b  (No transpose) */
+/* >          = 'T':  Solve A**T* x = s*b  (Transpose) */
+/* >          = 'C':  Solve A**T* x = s*b  (Conjugate transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DIAG */
+/* > \verbatim */
+/* >          DIAG is CHARACTER*1 */
+/* >          Specifies whether or not the matrix A is unit triangular. */
+/* >          = 'N':  Non-unit triangular */
+/* >          = 'U':  Unit triangular */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NORMIN */
+/* > \verbatim */
+/* >          NORMIN is CHARACTER*1 */
+/* >          Specifies whether CNORM has been set or not. */
+/* >          = 'Y':  CNORM contains the column norms on entry */
+/* >          = 'N':  CNORM is not set on entry.  On exit, the norms will */
+/* >                  be computed and stored in CNORM. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of columns of X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is COMPLEX*16 array, dimension (LDA,N) */
+/* >          The triangular matrix A.  If UPLO = 'U', the leading n by n */
+/* >          upper triangular part of the array A contains the upper */
+/* >          triangular matrix, and the strictly lower triangular part of */
+/* >          A is not referenced.  If UPLO = 'L', the leading n by n lower */
+/* >          triangular part of the array A contains the lower triangular */
+/* >          matrix, and the strictly upper triangular part of A is not */
+/* >          referenced.  If DIAG = 'U', the diagonal elements of A are */
+/* >          also not referenced and are assumed to be 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax (1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is COMPLEX*16 array, dimension (LDX,NRHS) */
+/* >          On entry, the right hand side B of the triangular system. */
+/* >          On exit, X is overwritten by the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax (1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The scaling factor s(k) is for the triangular system */
+/* >          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k). */
+/* >          If SCALE = 0, the matrix A is singular or badly scaled. */
+/* >          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k) */
+/* >          that is an exact or approximate solution to A*x(:,k) = 0 */
+/* >          is returned. If the system so badly scaled that solution */
+/* >          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0 */
+/* >          is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] CNORM */
+/* > \verbatim */
+/* >          CNORM is DOUBLE PRECISION array, dimension (N) */
+/* > */
+/* >          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
+/* >          contains the norm of the off-diagonal part of the j-th column */
+/* >          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
+/* >          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
+/* >          must be greater than or equal to the 1-norm. */
+/* > */
+/* >          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
+/* >          returns the 1-norm of the offdiagonal part of the j-th column */
+/* >          of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (LWORK). */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal size of */
+/* >          WORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* >          LWORK is INTEGER */
+/* >          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where */
+/* >          NBA = (N + NB - 1)/NB and NB is the optimal block size. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimensions of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup doubleOTHERauxiliary */
+/* > \par Further Details: */
+/*  ===================== */
+/*  \verbatim */
+/*  The algorithm follows the structure of a block triangular solve. */
+/*  The diagonal block is solved with a call to the robust the triangular */
+/*  solver LATRS for every right-hand side RHS = 1, ..., NRHS */
+/*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ), */
+/*  where op( A ) = A or op( A ) = A**T or op( A ) = A**H. */
+/*  The linear block updates operate on block columns of X, */
+/*     B( I, K ) - op(A( I, J )) * X( J, K ) */
+/*  and use GEMM. To avoid overflow in the linear block update, the worst case */
+/*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed */
+/*  such that */
+/*     || s * B( I, RHS )||_oo */
+/*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold */
+
+/*  Once all columns of a block column have been rescaled (BLAS-1), the linear */
+/*  update is executed with GEMM without overflow. */
+
+/*  To limit rescaling, local scale factors track the scaling of column segments. */
+/*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA */
+/*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor */
+/*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS ) */
+/*  I = 1, ..., NBA. */
+/*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ) */
+/*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The */
+/*  linear update of potentially inconsistently scaled vector segments */
+/*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) ) */
+/*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and, */
+/*  if necessary, rescales the blocks prior to calling GEMM. */
+
+/*  \endverbatim */
+/*  ===================================================================== */
+/*  References: */
+/*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019). */
+/*  Parallel robust solution of triangular linear systems. Concurrency */
+/*  and Computation: Practice and Experience, 31(19), e5064. */
+
+/*  Contributor: */
+/*   Angelika Schwarz, Umea University, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int zlatrs3_(char *uplo, char *trans, char *diag, char *
+	normin, integer *n, integer *nrhs, doublecomplex *a, integer *lda, 
+	doublecomplex *x, integer *ldx, doublereal *scale, doublereal *cnorm, 
+	doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, 
+	    i__6, i__7, i__8;
+    doublereal d__1, d__2;
+    doublecomplex z__1;
+
+    /* Local variables */
+    integer iinc, jinc;
+    doublereal scal, anrm, bnrm;
+    integer awrk;
+    doublereal tmax, xnrm[32];
+    integer i__, j, k;
+    doublereal w[64];
+    extern logical lsame_(char *, char *);
+    doublereal rscal;
+    integer lanrm, ilast, jlast;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublecomplex *, doublecomplex *, integer *, 
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
+	    integer *);
+    integer i1;
+    logical upper;
+    integer i2, j1, j2, k1, k2, nb, ii, kk;
+    extern doublereal dlamch_(char *);
+    integer lscale;
+    doublereal scaloc, scamin;
+    extern doublereal dlarmm_(doublereal *, doublereal *, doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, 
+	    integer *, doublereal *);
+    doublereal bignum;
+    extern /* Subroutine */ int zdscal_(integer *, doublereal *, 
+	    doublecomplex *, integer *);
+    integer ifirst;
+    logical notran;
+    integer jfirst;
+    doublereal smlnum;
+    logical nounit;
+    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, 
+	    integer *, doublecomplex *, integer *, doublecomplex *, 
+	    doublereal *, doublereal *, integer *);
+    logical lquery;
+    integer nba, lds, nbx, rhs;
+
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --scale;
+    --cnorm;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    notran = lsame_(trans, "N");
+    nounit = lsame_(diag, "N");
+    lquery = *lwork == -1;
+
+/*     Partition A and X into blocks. */
+
+/* Computing MAX */
+    i__1 = 8, i__2 = ilaenv_(&c__1, "ZLATRS", "", n, n, &c_n1, &c_n1, (ftnlen)
+	    6, (ftnlen)0);
+    nb = f2cmax(i__1,i__2);
+    nb = f2cmin(64,nb);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*n + nb - 1) / nb;
+    nba = f2cmax(i__1,i__2);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*nrhs + 31) / 32;
+    nbx = f2cmax(i__1,i__2);
+
+/*     Compute the workspace */
+
+/*     The workspace comprises two parts. */
+/*     The first part stores the local scale factors. Each simultaneously */
+/*     computed right-hand side requires one local scale factor per block */
+/*     row. WORK( I + KK * LDS ) is the scale factor of the vector */
+/*     segment associated with the I-th block row and the KK-th vector */
+/*     in the block column. */
+/* Computing MAX */
+    i__1 = nba, i__2 = f2cmin(*nrhs,32);
+    lscale = nba * f2cmax(i__1,i__2);
+    lds = nba;
+/*     The second part stores upper bounds of the triangular A. There are */
+/*     a total of NBA x NBA blocks, of which only the upper triangular */
+/*     part or the lower triangular part is referenced. The upper bound of */
+/*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ). */
+    lanrm = nba * nba;
+    awrk = lscale;
+    work[1] = (doublereal) (lscale + lanrm);
+
+/*     Test the input parameters. */
+
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -3;
+    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
+	     "N")) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*nrhs < 0) {
+	*info = -6;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -10;
+    } else if (! lquery && (doublereal) (*lwork) < work[1]) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLATRS3", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize scaling factors */
+
+    i__1 = *nrhs;
+    for (kk = 1; kk <= i__1; ++kk) {
+	scale[kk] = 1.;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*n,*nrhs) == 0) {
+	return 0;
+    }
+
+/*     Determine machine dependent constant to control overflow. */
+
+    bignum = dlamch_("Overflow");
+    smlnum = dlamch_("Safe Minimum");
+
+/*     Use unblocked code for small problems */
+
+    if (*nrhs < 2) {
+	zlatrs_(uplo, trans, diag, normin, n, &a[a_offset], lda, &x[x_dim1 + 
+		1], &scale[1], &cnorm[1], info);
+	i__1 = *nrhs;
+	for (k = 2; k <= i__1; ++k) {
+	    zlatrs_(uplo, trans, diag, "Y", n, &a[a_offset], lda, &x[k * 
+		    x_dim1 + 1], &scale[k], &cnorm[1], info);
+	}
+	return 0;
+    }
+
+/*     Compute norms of blocks of A excluding diagonal blocks and find */
+/*     the block with the largest norm TMAX. */
+
+    tmax = 0.;
+    i__1 = nba;
+    for (j = 1; j <= i__1; ++j) {
+	j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = j * nb;
+	j2 = f2cmin(i__2,*n) + 1;
+	if (upper) {
+	    ifirst = 1;
+	    ilast = j - 1;
+	} else {
+	    ifirst = j + 1;
+	    ilast = nba;
+	}
+	i__2 = ilast;
+	for (i__ = ifirst; i__ <= i__2; ++i__) {
+	    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = i__ * nb;
+	    i2 = f2cmin(i__3,*n) + 1;
+
+/*           Compute upper bound of A( I1:I2-1, J1:J2-1 ). */
+
+	    if (notran) {
+		i__3 = i2 - i1;
+		i__4 = j2 - j1;
+		anrm = zlange_("I", &i__3, &i__4, &a[i1 + j1 * a_dim1], lda, 
+			w);
+		work[awrk + i__ + (j - 1) * nba] = anrm;
+	    } else {
+		i__3 = i2 - i1;
+		i__4 = j2 - j1;
+		anrm = zlange_("1", &i__3, &i__4, &a[i1 + j1 * a_dim1], lda, 
+			w);
+		work[awrk + j + (i__ - 1) * nba] = anrm;
+	    }
+	    tmax = f2cmax(tmax,anrm);
+	}
+    }
+
+    if (! (tmax <= dlamch_("Overflow"))) {
+
+/*        Some matrix entries have huge absolute value. At least one upper */
+/*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point */
+/*        number, either due to overflow in LANGE or due to Inf in A. */
+/*        Fall back to LATRS. Set normin = 'N' for every right-hand side to */
+/*        force computation of TSCAL in LATRS to avoid the likely overflow */
+/*        in the computation of the column norms CNORM. */
+
+	i__1 = *nrhs;
+	for (k = 1; k <= i__1; ++k) {
+	    zlatrs_(uplo, trans, diag, "N", n, &a[a_offset], lda, &x[k * 
+		    x_dim1 + 1], &scale[k], &cnorm[1], info);
+	}
+	return 0;
+    }
+
+/*     Every right-hand side requires workspace to store NBA local scale */
+/*     factors. To save workspace, X is computed successively in block columns */
+/*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient */
+/*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable. */
+    i__1 = nbx;
+    for (k = 1; k <= i__1; ++k) {
+/*        Loop over block columns (index = K) of X and, for column-wise scalings, */
+/*        over individual columns (index = KK). */
+/*        K1: column index of the first column in X( J, K ) */
+/*        K2: column index of the first column in X( J, K+1 ) */
+/*        so the K2 - K1 is the column count of the block X( J, K ) */
+	k1 = (k - 1 << 5) + 1;
+/* Computing MIN */
+	i__2 = k << 5;
+	k2 = f2cmin(i__2,*nrhs) + 1;
+
+/*        Initialize local scaling factors of current block column X( J, K ) */
+
+	i__2 = k2 - k1;
+	for (kk = 1; kk <= i__2; ++kk) {
+	    i__3 = nba;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		work[i__ + kk * lds] = 1.;
+	    }
+	}
+
+	if (notran) {
+
+/*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1)) */
+
+	    if (upper) {
+		jfirst = nba;
+		jlast = 1;
+		jinc = -1;
+	    } else {
+		jfirst = 1;
+		jlast = nba;
+		jinc = 1;
+	    }
+	} else {
+
+/*           Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1)) */
+/*           where op(A) = A**T or op(A) = A**H */
+
+	    if (upper) {
+		jfirst = 1;
+		jlast = nba;
+		jinc = 1;
+	    } else {
+		jfirst = nba;
+		jlast = 1;
+		jinc = -1;
+	    }
+	}
+	i__2 = jlast;
+	i__3 = jinc;
+	for (j = jfirst; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) {
+/*           J1: row index of the first row in A( J, J ) */
+/*           J2: row index of the first row in A( J+1, J+1 ) */
+/*           so that J2 - J1 is the row count of the block A( J, J ) */
+	    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+	    i__4 = j * nb;
+	    j2 = f2cmin(i__4,*n) + 1;
+
+/*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS ) */
+
+	    i__4 = k2 - k1;
+	    for (kk = 1; kk <= i__4; ++kk) {
+		rhs = k1 + kk - 1;
+		if (kk == 1) {
+		    i__5 = j2 - j1;
+		    zlatrs_(uplo, trans, diag, "N", &i__5, &a[j1 + j1 * 
+			    a_dim1], lda, &x[j1 + rhs * x_dim1], &scaloc, &
+			    cnorm[1], info);
+		} else {
+		    i__5 = j2 - j1;
+		    zlatrs_(uplo, trans, diag, "Y", &i__5, &a[j1 + j1 * 
+			    a_dim1], lda, &x[j1 + rhs * x_dim1], &scaloc, &
+			    cnorm[1], info);
+		}
+/*              Find largest absolute value entry in the vector segment */
+/*              X( J1:J2-1, RHS ) as an upper bound for the worst case */
+/*              growth in the linear updates. */
+		i__5 = j2 - j1;
+		xnrm[kk - 1] = zlange_("I", &i__5, &c__1, &x[j1 + rhs * 
+			x_dim1], ldx, w);
+
+		if (scaloc == 0.) {
+/*                 LATRS found that A is singular through A(j,j) = 0. */
+/*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0 */
+/*                 and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is */
+/*                 set by LATRS. */
+		    scale[rhs] = 0.;
+		    i__5 = j1 - 1;
+		    for (ii = 1; ii <= i__5; ++ii) {
+			i__6 = ii + kk * x_dim1;
+			x[i__6].r = 0., x[i__6].i = 0.;
+		    }
+		    i__5 = *n;
+		    for (ii = j2; ii <= i__5; ++ii) {
+			i__6 = ii + kk * x_dim1;
+			x[i__6].r = 0., x[i__6].i = 0.;
+		    }
+/*                 Discard the local scale factors. */
+		    i__5 = nba;
+		    for (ii = 1; ii <= i__5; ++ii) {
+			work[ii + kk * lds] = 1.;
+		    }
+		    scaloc = 1.;
+		} else if (scaloc * work[j + kk * lds] == 0.) {
+/*                 LATRS computed a valid scale factor, but combined with */
+/*                 the current scaling the solution does not have a */
+/*                 scale factor > 0. */
+
+/*                 Set WORK( J+KK*LDS ) to smallest valid scale */
+/*                 factor and increase SCALOC accordingly. */
+		    scal = work[j + kk * lds] / smlnum;
+		    scaloc *= scal;
+		    work[j + kk * lds] = smlnum;
+/*                 If LATRS overestimated the growth, x may be */
+/*                 rescaled to preserve a valid combined scale */
+/*                 factor WORK( J, KK ) > 0. */
+		    rscal = 1. / scaloc;
+		    if (xnrm[kk - 1] * rscal <= bignum) {
+			xnrm[kk - 1] *= rscal;
+			i__5 = j2 - j1;
+			zdscal_(&i__5, &rscal, &x[j1 + rhs * x_dim1], &c__1);
+			scaloc = 1.;
+		    } else {
+/*                    The system op(A) * x = b is badly scaled and its */
+/*                    solution cannot be represented as (1/scale) * x. */
+/*                    Set x to zero. This approach deviates from LATRS */
+/*                    where a completely meaningless non-zero vector */
+/*                    is returned that is not a solution to op(A) * x = b. */
+			scale[rhs] = 0.;
+			i__5 = *n;
+			for (ii = 1; ii <= i__5; ++ii) {
+			    i__6 = ii + kk * x_dim1;
+			    x[i__6].r = 0., x[i__6].i = 0.;
+			}
+/*                    Discard the local scale factors. */
+			i__5 = nba;
+			for (ii = 1; ii <= i__5; ++ii) {
+			    work[ii + kk * lds] = 1.;
+			}
+			scaloc = 1.;
+		    }
+		}
+		scaloc *= work[j + kk * lds];
+		work[j + kk * lds] = scaloc;
+	    }
+
+/*           Linear block updates */
+
+	    if (notran) {
+		if (upper) {
+		    ifirst = j - 1;
+		    ilast = 1;
+		    iinc = -1;
+		} else {
+		    ifirst = j + 1;
+		    ilast = nba;
+		    iinc = 1;
+		}
+	    } else {
+		if (upper) {
+		    ifirst = j + 1;
+		    ilast = nba;
+		    iinc = 1;
+		} else {
+		    ifirst = j - 1;
+		    ilast = 1;
+		    iinc = -1;
+		}
+	    }
+
+	    i__4 = ilast;
+	    i__5 = iinc;
+	    for (i__ = ifirst; i__5 < 0 ? i__ >= i__4 : i__ <= i__4; i__ += 
+		    i__5) {
+/*              I1: row index of the first column in X( I, K ) */
+/*              I2: row index of the first column in X( I+1, K ) */
+/*              so the I2 - I1 is the row count of the block X( I, K ) */
+		i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		i__6 = i__ * nb;
+		i2 = f2cmin(i__6,*n) + 1;
+
+/*              Prepare the linear update to be executed with GEMM. */
+/*              For each column, compute a consistent scaling, a */
+/*              scaling factor to survive the linear update, and */
+/*              rescale the column segments, if necesssary. Then */
+/*              the linear update is safely executed. */
+
+		i__6 = k2 - k1;
+		for (kk = 1; kk <= i__6; ++kk) {
+		    rhs = k1 + kk - 1;
+/*                 Compute consistent scaling */
+/* Computing MIN */
+		    d__1 = work[i__ + kk * lds], d__2 = work[j + kk * lds];
+		    scamin = f2cmin(d__1,d__2);
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__7 = i2 - i1;
+		    bnrm = zlange_("I", &i__7, &c__1, &x[i1 + rhs * x_dim1], 
+			    ldx, w);
+		    bnrm *= scamin / work[i__ + kk * lds];
+		    xnrm[kk - 1] *= scamin / work[j + kk * lds];
+		    anrm = work[awrk + i__ + (j - 1) * nba];
+		    scaloc = dlarmm_(&anrm, &xnrm[kk - 1], &bnrm);
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to X( I, KK ) and X( J, KK ). */
+
+		    scal = scamin / work[i__ + kk * lds] * scaloc;
+		    if (scal != 1.) {
+			i__7 = i2 - i1;
+			zdscal_(&i__7, &scal, &x[i1 + rhs * x_dim1], &c__1);
+			work[i__ + kk * lds] = scamin * scaloc;
+		    }
+
+		    scal = scamin / work[j + kk * lds] * scaloc;
+		    if (scal != 1.) {
+			i__7 = j2 - j1;
+			zdscal_(&i__7, &scal, &x[j1 + rhs * x_dim1], &c__1);
+			work[j + kk * lds] = scamin * scaloc;
+		    }
+		}
+
+		if (notran) {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    z__1.r = -1., z__1.i = 0.;
+		    zgemm_("N", "N", &i__6, &i__7, &i__8, &z__1, &a[i1 + j1 * 
+			    a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b1, &
+			    x[i1 + k1 * x_dim1], ldx);
+		} else if (lsame_(trans, "T")) {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    z__1.r = -1., z__1.i = 0.;
+		    zgemm_("T", "N", &i__6, &i__7, &i__8, &z__1, &a[j1 + i1 * 
+			    a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b1, &
+			    x[i1 + k1 * x_dim1], ldx);
+		} else {
+
+/*                 B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K ) */
+
+		    i__6 = i2 - i1;
+		    i__7 = k2 - k1;
+		    i__8 = j2 - j1;
+		    z__1.r = -1., z__1.i = 0.;
+		    zgemm_("C", "N", &i__6, &i__7, &i__8, &z__1, &a[j1 + i1 * 
+			    a_dim1], lda, &x[j1 + k1 * x_dim1], ldx, &c_b1, &
+			    x[i1 + k1 * x_dim1], ldx);
+		}
+	    }
+	}
+
+/*        Reduce local scaling factors */
+
+	i__3 = k2 - k1;
+	for (kk = 1; kk <= i__3; ++kk) {
+	    rhs = k1 + kk - 1;
+	    i__2 = nba;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MIN */
+		d__1 = scale[rhs], d__2 = work[i__ + kk * lds];
+		scale[rhs] = f2cmin(d__1,d__2);
+	    }
+	}
+
+/*        Realize consistent scaling */
+
+	i__3 = k2 - k1;
+	for (kk = 1; kk <= i__3; ++kk) {
+	    rhs = k1 + kk - 1;
+	    if (scale[rhs] != 1. && scale[rhs] != 0.) {
+		i__2 = nba;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__5 = i__ * nb;
+		    i2 = f2cmin(i__5,*n) + 1;
+		    scal = scale[rhs] / work[i__ + kk * lds];
+		    if (scal != 1.) {
+			i__5 = i2 - i1;
+			zdscal_(&i__5, &scal, &x[i1 + rhs * x_dim1], &c__1);
+		    }
+		}
+	    }
+	}
+    }
+    return 0;
+
+/*     End of ZLATRS3 */
+
+} /* zlatrs3_ */
+
diff --git a/lapack-netlib/SRC/zlatrs3.f b/lapack-netlib/SRC/zlatrs3.f
new file mode 100644
index 000000000..fc1be0517
--- /dev/null
+++ b/lapack-netlib/SRC/zlatrs3.f
@@ -0,0 +1,667 @@
+*> \brief \b ZLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
+*
+*  Definition:
+*  ===========
+*
+*      SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
+*                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIAG, NORMIN, TRANS, UPLO
+*       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   CNORM( * ), SCALE( * ), WORK( * )
+*       COMPLEX*16         A( LDA, * ), X( LDX, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> ZLATRS3 solves one of the triangular systems
+*>
+*>    A * X = B * diag(scale),  A**T * X = B * diag(scale), or
+*>    A**H * X = B * diag(scale)
+*>
+*> with scaling to prevent overflow.  Here A is an upper or lower
+*> triangular matrix, A**T denotes the transpose of A, A**H denotes the
+*> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
+*> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
+*> is usually less than or equal to 1, chosen such that X(:,j) is less
+*> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
+*> for some j), then a non-trivial solution to A*X = 0 is returned. If
+*> the system is so badly scaled that the solution cannot be represented
+*> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
+*>
+*> This is a BLAS-3 version of LATRS for solving several right
+*> hand sides simultaneously.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>          Specifies whether the matrix A is upper or lower triangular.
+*>          = 'U':  Upper triangular
+*>          = 'L':  Lower triangular
+*> \endverbatim
+*>
+*> \param[in] TRANS
+*> \verbatim
+*>          TRANS is CHARACTER*1
+*>          Specifies the operation applied to A.
+*>          = 'N':  Solve A * x = s*b  (No transpose)
+*>          = 'T':  Solve A**T* x = s*b  (Transpose)
+*>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose)
+*> \endverbatim
+*>
+*> \param[in] DIAG
+*> \verbatim
+*>          DIAG is CHARACTER*1
+*>          Specifies whether or not the matrix A is unit triangular.
+*>          = 'N':  Non-unit triangular
+*>          = 'U':  Unit triangular
+*> \endverbatim
+*>
+*> \param[in] NORMIN
+*> \verbatim
+*>          NORMIN is CHARACTER*1
+*>          Specifies whether CNORM has been set or not.
+*>          = 'Y':  CNORM contains the column norms on entry
+*>          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*>                  be computed and stored in CNORM.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix A.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>          The number of columns of X.  NRHS >= 0.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is COMPLEX*16 array, dimension (LDA,N)
+*>          The triangular matrix A.  If UPLO = 'U', the leading n by n
+*>          upper triangular part of the array A contains the upper
+*>          triangular matrix, and the strictly lower triangular part of
+*>          A is not referenced.  If UPLO = 'L', the leading n by n lower
+*>          triangular part of the array A contains the lower triangular
+*>          matrix, and the strictly upper triangular part of A is not
+*>          referenced.  If DIAG = 'U', the diagonal elements of A are
+*>          also not referenced and are assumed to be 1.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max (1,N).
+*> \endverbatim
+*>
+*> \param[in,out] X
+*> \verbatim
+*>          X is COMPLEX*16 array, dimension (LDX,NRHS)
+*>          On entry, the right hand side B of the triangular system.
+*>          On exit, X is overwritten by the solution matrix X.
+*> \endverbatim
+*>
+*> \param[in] LDX
+*> \verbatim
+*>          LDX is INTEGER
+*>          The leading dimension of the array X.  LDX >= max (1,N).
+*> \endverbatim
+*>
+*> \param[out] SCALE
+*> \verbatim
+*>          SCALE is DOUBLE PRECISION array, dimension (NRHS)
+*>          The scaling factor s(k) is for the triangular system
+*>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
+*>          If SCALE = 0, the matrix A is singular or badly scaled.
+*>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
+*>          that is an exact or approximate solution to A*x(:,k) = 0
+*>          is returned. If the system so badly scaled that solution
+*>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
+*>          is returned.
+*> \endverbatim
+*>
+*> \param[in,out] CNORM
+*> \verbatim
+*>          CNORM is DOUBLE PRECISION array, dimension (N)
+*>
+*>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*>          contains the norm of the off-diagonal part of the j-th column
+*>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*>          must be greater than or equal to the 1-norm.
+*>
+*>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*>          returns the 1-norm of the offdiagonal part of the j-th column
+*>          of A.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (LWORK).
+*>          On exit, if INFO = 0, WORK(1) returns the optimal size of
+*>          WORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*>          LWORK is INTEGER
+*>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
+*>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimensions of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -k, the k-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup doubleOTHERauxiliary
+*> \par Further Details:
+*  =====================
+*  \verbatim
+*  The algorithm follows the structure of a block triangular solve.
+*  The diagonal block is solved with a call to the robust the triangular
+*  solver LATRS for every right-hand side RHS = 1, ..., NRHS
+*     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
+*  where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
+*  The linear block updates operate on block columns of X,
+*     B( I, K ) - op(A( I, J )) * X( J, K )
+*  and use GEMM. To avoid overflow in the linear block update, the worst case
+*  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
+*  such that
+*     || s * B( I, RHS )||_oo
+*   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
+*
+*  Once all columns of a block column have been rescaled (BLAS-1), the linear
+*  update is executed with GEMM without overflow.
+*
+*  To limit rescaling, local scale factors track the scaling of column segments.
+*  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
+*  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
+*  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
+*  I = 1, ..., NBA.
+*  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
+*  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
+*  linear update of potentially inconsistently scaled vector segments
+*     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
+*  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
+*  if necessary, rescales the blocks prior to calling GEMM.
+*
+*  \endverbatim
+*  =====================================================================
+*  References:
+*  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
+*  Parallel robust solution of triangular linear systems. Concurrency
+*  and Computation: Practice and Experience, 31(19), e5064.
+*
+*  Contributor:
+*   Angelika Schwarz, Umea University, Sweden.
+*
+*  =====================================================================
+      SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
+     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
+      IMPLICIT NONE
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, NORMIN, UPLO
+      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( LDX, * )
+      DOUBLE PRECISION   CNORM( * ), SCALE( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ) )
+      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
+      PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
+      PARAMETER          ( NBMIN = 8, NBMAX = 64 )
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   W( NBMAX ), XNRM( NBRHS )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
+      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
+     $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
+     $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
+     $                   SCAMIN, SMLNUM, TMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE, DLARMM
+      EXTERNAL           ILAENV, LSAME, DLAMCH, ZLANGE, DLARMM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLATRS, ZDSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     Partition A and X into blocks.
+*
+      NB = MAX( NBMIN, ILAENV( 1, 'ZLATRS', '', N, N, -1, -1 ) )
+      NB = MIN( NBMAX, NB )
+      NBA = MAX( 1, (N + NB - 1) / NB )
+      NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
+*
+*     Compute the workspace
+*
+*     The workspace comprises two parts.
+*     The first part stores the local scale factors. Each simultaneously
+*     computed right-hand side requires one local scale factor per block
+*     row. WORK( I + KK * LDS ) is the scale factor of the vector
+*     segment associated with the I-th block row and the KK-th vector
+*     in the block column.
+      LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
+      LDS = NBA
+*     The second part stores upper bounds of the triangular A. There are
+*     a total of NBA x NBA blocks, of which only the upper triangular
+*     part or the lower triangular part is referenced. The upper bound of
+*     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
+      LANRM = NBA * NBA
+      AWRK = LSCALE
+      WORK( 1 ) = LSCALE + LANRM
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLATRS3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Initialize scaling factors
+*
+      DO KK = 1, NRHS
+         SCALE( KK ) = ONE
+      END DO
+*
+*     Quick return if possible
+*
+      IF( MIN( N, NRHS ).EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent constant to control overflow.
+*
+      BIGNUM = DLAMCH( 'Overflow' )
+      SMLNUM = DLAMCH( 'Safe Minimum' )
+*
+*     Use unblocked code for small problems
+*
+      IF( NRHS.LT.NRHSMIN ) THEN
+         CALL ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
+     $                SCALE( 1 ), CNORM, INFO )
+         DO K = 2, NRHS
+            CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
+     $                   SCALE( K ), CNORM, INFO )
+         END DO
+         RETURN
+      END IF
+*
+*     Compute norms of blocks of A excluding diagonal blocks and find
+*     the block with the largest norm TMAX.
+*
+      TMAX = ZERO
+      DO J = 1, NBA
+         J1 = (J-1)*NB + 1
+         J2 = MIN( J*NB, N ) + 1
+         IF ( UPPER ) THEN
+            IFIRST = 1
+            ILAST = J - 1
+         ELSE
+            IFIRST = J + 1
+            ILAST = NBA
+         END IF
+         DO I = IFIRST, ILAST
+            I1 = (I-1)*NB + 1
+            I2 = MIN( I*NB, N ) + 1
+*
+*           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
+*
+            IF( NOTRAN ) THEN
+               ANRM = ZLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
+               WORK( AWRK + I+(J-1)*NBA ) = ANRM
+            ELSE
+               ANRM = ZLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
+               WORK( AWRK + J+(I-1) * NBA ) = ANRM
+            END IF
+            TMAX = MAX( TMAX, ANRM )
+         END DO
+      END DO
+*
+      IF( .NOT. TMAX.LE.DLAMCH('Overflow') ) THEN
+*
+*        Some matrix entries have huge absolute value. At least one upper
+*        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
+*        number, either due to overflow in LANGE or due to Inf in A.
+*        Fall back to LATRS. Set normin = 'N' for every right-hand side to
+*        force computation of TSCAL in LATRS to avoid the likely overflow
+*        in the computation of the column norms CNORM.
+*
+         DO K = 1, NRHS
+            CALL ZLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
+     $                   SCALE( K ), CNORM, INFO )
+         END DO
+         RETURN
+      END IF
+*
+*     Every right-hand side requires workspace to store NBA local scale
+*     factors. To save workspace, X is computed successively in block columns
+*     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
+*     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
+      DO K = 1, NBX
+*        Loop over block columns (index = K) of X and, for column-wise scalings,
+*        over individual columns (index = KK).
+*        K1: column index of the first column in X( J, K )
+*        K2: column index of the first column in X( J, K+1 )
+*        so the K2 - K1 is the column count of the block X( J, K )
+         K1 = (K-1)*NBRHS + 1
+         K2 = MIN( K*NBRHS, NRHS ) + 1
+*
+*        Initialize local scaling factors of current block column X( J, K )
+*
+         DO KK = 1, K2 - K1
+            DO I = 1, NBA
+               WORK( I+KK*LDS ) = ONE
+            END DO
+         END DO
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
+*
+            IF( UPPER ) THEN
+               JFIRST = NBA
+               JLAST = 1
+               JINC = -1
+            ELSE
+               JFIRST = 1
+               JLAST = NBA
+               JINC = 1
+            END IF
+         ELSE
+*
+*           Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
+*           where op(A) = A**T or op(A) = A**H
+*
+            IF( UPPER ) THEN
+               JFIRST = 1
+               JLAST = NBA
+               JINC = 1
+            ELSE
+               JFIRST = NBA
+               JLAST = 1
+               JINC = -1
+            END IF
+         END IF
+
+         DO J = JFIRST, JLAST, JINC
+*           J1: row index of the first row in A( J, J )
+*           J2: row index of the first row in A( J+1, J+1 )
+*           so that J2 - J1 is the row count of the block A( J, J )
+            J1 = (J-1)*NB + 1
+            J2 = MIN( J*NB, N ) + 1
+*
+*           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
+*
+            DO KK = 1, K2 - K1
+               RHS = K1 + KK - 1
+               IF( KK.EQ.1 ) THEN
+                  CALL ZLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
+     $                         A( J1, J1 ), LDA, X( J1, RHS ),
+     $                         SCALOC, CNORM, INFO )
+               ELSE
+                  CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
+     $                         A( J1, J1 ), LDA, X( J1, RHS ),
+     $                         SCALOC, CNORM, INFO )
+               END IF
+*              Find largest absolute value entry in the vector segment
+*              X( J1:J2-1, RHS ) as an upper bound for the worst case
+*              growth in the linear updates.
+               XNRM( KK ) = ZLANGE( 'I', J2-J1, 1, X( J1, RHS ),
+     $                              LDX, W )
+*
+               IF( SCALOC .EQ. ZERO ) THEN
+*                 LATRS found that A is singular through A(j,j) = 0.
+*                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
+*                 and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
+*                 set by LATRS.
+                  SCALE( RHS ) = ZERO
+                  DO II = 1, J1-1
+                     X( II, KK ) = CZERO
+                  END DO
+                  DO II = J2, N
+                     X( II, KK ) = CZERO
+                  END DO
+*                 Discard the local scale factors.
+                  DO II = 1, NBA
+                     WORK( II+KK*LDS ) = ONE
+                  END DO
+                  SCALOC = ONE
+               ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
+*                 LATRS computed a valid scale factor, but combined with
+*                 the current scaling the solution does not have a
+*                 scale factor > 0.
+*
+*                 Set WORK( J+KK*LDS ) to smallest valid scale
+*                 factor and increase SCALOC accordingly.
+                  SCAL = WORK( J+KK*LDS ) / SMLNUM
+                  SCALOC = SCALOC * SCAL
+                  WORK( J+KK*LDS ) = SMLNUM
+*                 If LATRS overestimated the growth, x may be
+*                 rescaled to preserve a valid combined scale
+*                 factor WORK( J, KK ) > 0.
+                  RSCAL = ONE / SCALOC
+                  IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
+                     XNRM( KK ) = XNRM( KK ) * RSCAL
+                     CALL ZDSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
+                     SCALOC = ONE
+                  ELSE
+*                    The system op(A) * x = b is badly scaled and its
+*                    solution cannot be represented as (1/scale) * x.
+*                    Set x to zero. This approach deviates from LATRS
+*                    where a completely meaningless non-zero vector
+*                    is returned that is not a solution to op(A) * x = b.
+                     SCALE( RHS ) = ZERO
+                     DO II = 1, N
+                        X( II, KK ) = CZERO
+                     END DO
+*                    Discard the local scale factors.
+                     DO II = 1, NBA
+                        WORK( II+KK*LDS ) = ONE
+                     END DO
+                     SCALOC = ONE
+                  END IF
+               END IF
+               SCALOC = SCALOC * WORK( J+KK*LDS )
+               WORK( J+KK*LDS ) = SCALOC
+            END DO
+*
+*           Linear block updates
+*
+            IF( NOTRAN ) THEN
+               IF( UPPER ) THEN
+                  IFIRST = J - 1
+                  ILAST = 1
+                  IINC = -1
+               ELSE
+                  IFIRST = J + 1
+                  ILAST = NBA
+                  IINC = 1
+               END IF
+            ELSE
+               IF( UPPER ) THEN
+                  IFIRST = J + 1
+                  ILAST = NBA
+                  IINC = 1
+               ELSE
+                  IFIRST = J - 1
+                  ILAST = 1
+                  IINC = -1
+               END IF
+            END IF
+*
+            DO I = IFIRST, ILAST, IINC
+*              I1: row index of the first column in X( I, K )
+*              I2: row index of the first column in X( I+1, K )
+*              so the I2 - I1 is the row count of the block X( I, K )
+               I1 = (I-1)*NB + 1
+               I2 = MIN( I*NB, N ) + 1
+*
+*              Prepare the linear update to be executed with GEMM.
+*              For each column, compute a consistent scaling, a
+*              scaling factor to survive the linear update, and
+*              rescale the column segments, if necesssary. Then
+*              the linear update is safely executed.
+*
+               DO KK = 1, K2 - K1
+                  RHS = K1 + KK - 1
+*                 Compute consistent scaling
+                  SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  BNRM = ZLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
+                  BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
+                  XNRM( KK ) = XNRM( KK )*( SCAMIN / WORK( J+KK*LDS) )
+                  ANRM = WORK( AWRK + I+(J-1)*NBA )
+                  SCALOC = DLARMM( ANRM, XNRM( KK ), BNRM )
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to X( I, KK ) and X( J, KK ).
+*
+                  SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                     CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
+                     WORK( I+KK*LDS ) = SCAMIN*SCALOC
+                  END IF
+*
+                  SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
+                  IF( SCAL.NE.ONE ) THEN
+                     CALL ZDSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
+                     WORK( J+KK*LDS ) = SCAMIN*SCALOC
+                  END IF
+               END DO
+*
+               IF( NOTRAN ) THEN
+*
+*                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
+*
+                  CALL ZGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -CONE,
+     $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
+     $                        CONE, X( I1, K1 ), LDX )
+               ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
+*
+                  CALL ZGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -CONE,
+     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
+     $                        CONE, X( I1, K1 ), LDX )
+               ELSE
+*
+*                 B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
+*
+                  CALL ZGEMM( 'C', 'N', I2-I1, K2-K1, J2-J1, -CONE,
+     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
+     $                        CONE, X( I1, K1 ), LDX )
+               END IF
+            END DO
+         END DO
+
+*
+*        Reduce local scaling factors
+*
+         DO KK = 1, K2 - K1
+            RHS = K1 + KK - 1
+            DO I = 1, NBA
+               SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
+            END DO
+         END DO
+*
+*        Realize consistent scaling
+*
+         DO KK = 1, K2 - K1
+            RHS = K1 + KK - 1
+            IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
+               DO I = 1, NBA
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, N ) + 1
+                  SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
+                  IF( SCAL.NE.ONE )
+     $               CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
+               END DO
+            END IF
+         END DO
+      END DO
+      RETURN
+*
+*     End of ZLATRS3
+*
+      END
diff --git a/lapack-netlib/SRC/ztrsyl3.c b/lapack-netlib/SRC/ztrsyl3.c
new file mode 100644
index 000000000..314b0f98d
--- /dev/null
+++ b/lapack-netlib/SRC/ztrsyl3.c
@@ -0,0 +1,2027 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
+#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) {ceil(w)}
+#define myhuge_(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+#define myexp_(w) my_expfunc(w)
+
+static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;}
+
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+
+
+
+/* Table of constant values */
+
+static doublecomplex c_b1 = {1.,0.};
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static doublereal c_b18 = 2.;
+static doublereal c_b106 = 1.;
+
+/* > \brief \b ZTRSYL3 */
+
+/* Definition: */
+/* =========== */
+
+
+/* >  \par Purpose */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  ZTRSYL3 solves the complex Sylvester matrix equation: */
+/* > */
+/* >     op(A)*X + X*op(B) = scale*C or */
+/* >     op(A)*X - X*op(B) = scale*C, */
+/* > */
+/* >  where op(A) = A or A**H, and  A and B are both upper triangular. A is */
+/* >  M-by-M and B is N-by-N; the right hand side C and the solution X are */
+/* >  M-by-N; and scale is an output scale factor, set <= 1 to avoid */
+/* >  overflow in X. */
+/* > */
+/* >  This is the block version of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments */
+/*  ========= */
+
+/* > \param[in] TRANA */
+/* > \verbatim */
+/* >          TRANA is CHARACTER*1 */
+/* >          Specifies the option op(A): */
+/* >          = 'N': op(A) = A    (No transpose) */
+/* >          = 'C': op(A) = A**H (Conjugate transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANB */
+/* > \verbatim */
+/* >          TRANB is CHARACTER*1 */
+/* >          Specifies the option op(B): */
+/* >          = 'N': op(B) = B    (No transpose) */
+/* >          = 'C': op(B) = B**H (Conjugate transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ISGN */
+/* > \verbatim */
+/* >          ISGN is INTEGER */
+/* >          Specifies the sign in the equation: */
+/* >          = +1: solve op(A)*X + X*op(B) = scale*C */
+/* >          = -1: solve op(A)*X - X*op(B) = scale*C */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The order of the matrix A, and the number of rows in the */
+/* >          matrices X and C. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix B, and the number of columns in the */
+/* >          matrices X and C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is COMPLEX*16 array, dimension (LDA,M) */
+/* >          The upper triangular matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is COMPLEX*16 array, dimension (LDB,N) */
+/* >          The upper triangular matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is COMPLEX*16 array, dimension (LDC,N) */
+/* >          On entry, the M-by-N right hand side matrix C. */
+/* >          On exit, C is overwritten by the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is DOUBLE PRECISION */
+/* >          The scale factor, scale, set <= 1 to avoid overflow in X. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SWORK */
+/* > \verbatim */
+/* >          SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS), */
+/* >          MAX(1,COLS)). */
+/* >          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS */
+/* >          and SWORK(2) returns the optimal COLS. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDSWORK */
+/* > \verbatim */
+/* >          LDSWORK is INTEGER */
+/* >          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1) */
+/* >          and NB is the optimal block size. */
+/* > */
+/* >          If LDSWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal dimensions of the SWORK matrix, */
+/* >          returns these values as the first and second entry of the SWORK */
+/* >          matrix, and no error message related LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* >          = 1: A and B have common or very close eigenvalues; perturbed */
+/* >               values were used to solve the equation (but the matrices */
+/* >               A and B are unchanged). */
+/* > \endverbatim */
+
+/* > \ingroup complex16SYcomputational */
+
+/*  ===================================================================== */
+/*  References: */
+/*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of */
+/*   algorithms: The triangular Sylvester equation, ACM Transactions */
+/*   on Mathematical Software (TOMS), volume 29, pages 218--243. */
+
+/*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel */
+/*   Solution of the Triangular Sylvester Equation. Lecture Notes in */
+/*   Computer Science, vol 12043, pages 82--92, Springer. */
+
+/*  Contributor: */
+/*   Angelika Schwarz, Umea University, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int ztrsyl3_(char *trana, char *tranb, integer *isgn, 
+	integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex 
+	*b, integer *ldb, doublecomplex *c__, integer *ldc, doublereal *scale,
+	 doublereal *swork, integer *ldswork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, swork_dim1, 
+	    swork_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    doublereal d__1, d__2, d__3, d__4;
+    doublecomplex z__1;
+
+    /* Local variables */
+    doublereal scal;
+    doublecomplex csgn;
+    doublereal anrm, bnrm, cnrm;
+    integer awrk, bwrk;
+    doublereal *wnrm, xnrm;
+    integer i__, j, k, l;
+    extern logical lsame_(char *, char *);
+    integer iinfo;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublecomplex *, doublecomplex *, integer *, 
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
+	    integer *);
+    integer i1, i2, j1, j2, k1, k2, l1, l2;
+//    extern integer myexp_(doublereal *);
+    integer nb, jj, ll;
+    extern doublereal dlamch_(char *);
+    doublereal scaloc, scamin;
+    extern doublereal dlarmm_(doublereal *, doublereal *, doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, 
+	    integer *, doublereal *);
+    doublereal bignum;
+    extern /* Subroutine */ int zdscal_(integer *, doublereal *, 
+	    doublecomplex *, integer *), zlascl_(char *, integer *, integer *,
+	     doublereal *, doublereal *, integer *, integer *, doublecomplex *
+	    , integer *, integer *);
+    logical notrna, notrnb;
+    doublereal smlnum;
+    logical lquery;
+    extern /* Subroutine */ int ztrsyl_(char *, char *, integer *, integer *, 
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
+	     doublecomplex *, integer *, doublereal *, integer *);
+    integer nba, nbb;
+    doublereal buf, sgn;
+
+
+
+/*     Decode and Test input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    swork_dim1 = *ldswork;
+    swork_offset = 1 + swork_dim1 * 1;
+    swork -= swork_offset;
+
+    /* Function Body */
+    notrna = lsame_(trana, "N");
+    notrnb = lsame_(tranb, "N");
+
+/*     Use the same block size for all matrices. */
+
+/* Computing MAX */
+    i__1 = 8, i__2 = ilaenv_(&c__1, "ZTRSYL", "", m, n, &c_n1, &c_n1, (ftnlen)
+	    6, (ftnlen)0);
+    nb = f2cmax(i__1,i__2);
+
+/*     Compute number of blocks in A and B */
+
+/* Computing MAX */
+    i__1 = 1, i__2 = (*m + nb - 1) / nb;
+    nba = f2cmax(i__1,i__2);
+/* Computing MAX */
+    i__1 = 1, i__2 = (*n + nb - 1) / nb;
+    nbb = f2cmax(i__1,i__2);
+
+/*     Compute workspace */
+
+    *info = 0;
+    lquery = *ldswork == -1;
+    if (lquery) {
+	*ldswork = 2;
+	swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb);
+	swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba);
+    }
+
+/*     Test the input arguments */
+
+    if (! notrna && ! lsame_(trana, "C")) {
+	*info = -1;
+    } else if (! notrnb && ! lsame_(tranb, "C")) {
+	*info = -2;
+    } else if (*isgn != 1 && *isgn != -1) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -11;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZTRSYL3", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    *scale = 1.;
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    wnrm = (doublereal*)malloc(f2cmax(*m,*n)*sizeof(doublereal));
+/*     Use unblocked code for small problems or if insufficient */
+/*     workspace is provided */
+
+    if (f2cmin(nba,nbb) == 1 || *ldswork < f2cmax(nba,nbb)) {
+	ztrsyl_(trana, tranb, isgn, m, n, &a[a_offset], lda, &b[b_offset], 
+		ldb, &c__[c_offset], ldc, scale, info);
+	return 0;
+    }
+
+/*     Set constants to control overflow */
+
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+
+/*     Set local scaling factors. */
+
+    i__1 = nbb;
+    for (l = 1; l <= i__1; ++l) {
+	i__2 = nba;
+	for (k = 1; k <= i__2; ++k) {
+	    swork[k + l * swork_dim1] = 1.;
+	}
+    }
+
+/*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero. */
+/*     This scaling is to ensure compatibility with TRSYL and may get flushed. */
+
+    buf = 1.;
+
+/*      Compute upper bounds of blocks of A and B */
+
+    awrk = nbb;
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = k * nb;
+	k2 = f2cmin(i__2,*m) + 1;
+	i__2 = nba;
+	for (l = k; l <= i__2; ++l) {
+	    l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = l * nb;
+	    l2 = f2cmin(i__3,*m) + 1;
+	    if (notrna) {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[k + (awrk + l) * swork_dim1] = zlange_("I", &i__3, &
+			i__4, &a[k1 + l1 * a_dim1], lda, wnrm);
+	    } else {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[l + (awrk + k) * swork_dim1] = zlange_("1", &i__3, &
+			i__4, &a[k1 + l1 * a_dim1], lda, wnrm);
+	    }
+	}
+    }
+    bwrk = nbb + nba;
+    i__1 = nbb;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = k * nb;
+	k2 = f2cmin(i__2,*n) + 1;
+	i__2 = nbb;
+	for (l = k; l <= i__2; ++l) {
+	    l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = l * nb;
+	    l2 = f2cmin(i__3,*n) + 1;
+	    if (notrnb) {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[k + (bwrk + l) * swork_dim1] = zlange_("I", &i__3, &
+			i__4, &b[k1 + l1 * b_dim1], ldb, wnrm);
+	    } else {
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		swork[l + (bwrk + k) * swork_dim1] = zlange_("1", &i__3, &
+			i__4, &b[k1 + l1 * b_dim1], ldb, wnrm);
+	    }
+	}
+    }
+
+    sgn = (doublereal) (*isgn);
+    z__1.r = sgn, z__1.i = 0.;
+    csgn.r = z__1.r, csgn.i = z__1.i;
+
+    if (notrna && notrnb) {
+
+/*        Solve    A*X + ISGN*X*B = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        bottom-left corner column by column by */
+
+/*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                  M                         L-1 */
+/*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */
+/*                I=K+1                       J=1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	for (k = nba; k >= 1; --k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	    i__1 = k * nb;
+	    k2 = f2cmin(i__1,*m) + 1;
+	    i__1 = nbb;
+	    for (l = 1; l <= i__1; ++l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+		i__2 = l * nb;
+		l2 = f2cmin(i__2,*n) + 1;
+
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		ztrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.) {
+		    if (scaloc == 0.) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.;
+		    } else {
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__2);
+		    }
+		    i__2 = nbb;
+		    for (jj = 1; jj <= i__2; ++jj) {
+			i__3 = nba;
+			for (ll = 1; ll <= i__3; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__4 = myexp_(&scaloc);
+			    d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] 
+				    / pow_di(&c_b18, &i__4);
+			    swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		xnrm = zlange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		for (i__ = k - 1; i__ >= 1; --i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */
+
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__2 = i__ * nb;
+		    i2 = f2cmin(i__2,*m) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    cnrm = zlange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = dlarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b18, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b18, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b18, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L ). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (jj = l1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    zdscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = i2 - i1;
+			    zdscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    i__4 = k2 - k1;
+		    z__1.r = -1., z__1.i = 0.;
+		    zgemm_("N", "N", &i__2, &i__3, &i__4, &z__1, &a[i1 + k1 * 
+			    a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, 
+			    &c__[i1 + l1 * c_dim1], ldc)
+			    ;
+
+		}
+
+		i__2 = nbb;
+		for (j = l + 1; j <= i__2; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */
+
+		    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+		    i__3 = j * nb;
+		    j2 = f2cmin(i__3,*n) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    cnrm = zlange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = dlarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b18, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b18, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b18, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    zdscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = j2 - 1;
+			for (jj = j1; jj <= i__3; ++jj) {
+			    i__4 = k2 - k1;
+			    zdscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    i__5 = l2 - l1;
+		    z__1.r = -csgn.r, z__1.i = -csgn.i;
+		    zgemm_("N", "N", &i__3, &i__4, &i__5, &z__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b1, 
+			    &c__[k1 + j1 * c_dim1], ldc)
+			    ;
+		}
+	    }
+	}
+    } else if (! notrna && notrnb) {
+
+/*        Solve    A**H *X + ISGN*X*B = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        upper-left corner column by column by */
+
+/*          A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                   K-1                        L-1 */
+/*          R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */
+/*                   I=1                        J=1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	i__1 = nba;
+	for (k = 1; k <= i__1; ++k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	    i__2 = k * nb;
+	    k2 = f2cmin(i__2,*m) + 1;
+	    i__2 = nbb;
+	    for (l = 1; l <= i__2; ++l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+		i__3 = l * nb;
+		l2 = f2cmin(i__3,*n) + 1;
+
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		ztrsyl_(trana, tranb, isgn, &i__3, &i__4, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.) {
+		    if (scaloc == 0.) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__3);
+		    }
+		    i__3 = nbb;
+		    for (jj = 1; jj <= i__3; ++jj) {
+			i__4 = nba;
+			for (ll = 1; ll <= i__4; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__5 = myexp_(&scaloc);
+			    d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] 
+				    / pow_di(&c_b18, &i__5);
+			    swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__3 = k2 - k1;
+		i__4 = l2 - l1;
+		xnrm = zlange_("I", &i__3, &i__4, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__3 = nba;
+		for (i__ = k + 1; i__ <= i__3; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L ) */
+
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__4 = i__ * nb;
+		    i2 = f2cmin(i__4,*m) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__4 = i2 - i1;
+		    i__5 = l2 - l1;
+		    cnrm = zlange_("I", &i__4, &i__5, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = dlarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__4 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__4);
+			i__4 = nbb;
+			for (jj = 1; jj <= i__4; ++jj) {
+			    i__5 = nba;
+			    for (ll = 1; ll <= i__5; ++ll) {
+/* Computing MIN */
+				i__6 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b18, &i__6);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__4 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b18, &i__4);
+			i__4 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b18, &i__4);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to to C( I, L ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = k2 - k1;
+			    zdscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = i2 - i1;
+			    zdscal_(&i__5, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__4 = i2 - i1;
+		    i__5 = l2 - l1;
+		    i__6 = k2 - k1;
+		    z__1.r = -1., z__1.i = 0.;
+		    zgemm_("C", "N", &i__4, &i__5, &i__6, &z__1, &a[k1 + i1 * 
+			    a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, 
+			    &c__[i1 + l1 * c_dim1], ldc)
+			    ;
+		}
+
+		i__3 = nbb;
+		for (j = l + 1; j <= i__3; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J ) */
+
+		    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+		    i__4 = j * nb;
+		    j2 = f2cmin(i__4,*n) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__4 = k2 - k1;
+		    i__5 = j2 - j1;
+		    cnrm = zlange_("I", &i__4, &i__5, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = dlarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__4 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__4);
+			i__4 = nbb;
+			for (jj = 1; jj <= i__4; ++jj) {
+			    i__5 = nba;
+			    for (ll = 1; ll <= i__5; ++ll) {
+/* Computing MIN */
+				i__6 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b18, &i__6);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__4 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b18, &i__4);
+			i__4 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b18, &i__4);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__4 = l2 - 1;
+			for (ll = l1; ll <= i__4; ++ll) {
+			    i__5 = k2 - k1;
+			    zdscal_(&i__5, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__4 = j2 - 1;
+			for (jj = j1; jj <= i__4; ++jj) {
+			    i__5 = k2 - k1;
+			    zdscal_(&i__5, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__4 = k2 - k1;
+		    i__5 = j2 - j1;
+		    i__6 = l2 - l1;
+		    z__1.r = -csgn.r, z__1.i = -csgn.i;
+		    zgemm_("N", "N", &i__4, &i__5, &i__6, &z__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[l1 + j1 * b_dim1], ldb, &c_b1, 
+			    &c__[k1 + j1 * c_dim1], ldc)
+			    ;
+		}
+	    }
+	}
+    } else if (! notrna && ! notrnb) {
+
+/*        Solve    A**H *X + ISGN*X*B**H = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        top-right corner column by column by */
+
+/*           A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                     K-1                          N */
+/*            R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H]. */
+/*                     I=1                        J=L+1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	i__1 = nba;
+	for (k = 1; k <= i__1; ++k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	    i__2 = k * nb;
+	    k2 = f2cmin(i__2,*m) + 1;
+	    for (l = nbb; l >= 1; --l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+		i__2 = l * nb;
+		l2 = f2cmin(i__2,*n) + 1;
+
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		ztrsyl_(trana, tranb, isgn, &i__2, &i__3, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.) {
+		    if (scaloc == 0.) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__2);
+		    }
+		    i__2 = nbb;
+		    for (jj = 1; jj <= i__2; ++jj) {
+			i__3 = nba;
+			for (ll = 1; ll <= i__3; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__4 = myexp_(&scaloc);
+			    d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] 
+				    / pow_di(&c_b18, &i__4);
+			    swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__2 = k2 - k1;
+		i__3 = l2 - l1;
+		xnrm = zlange_("I", &i__2, &i__3, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__2 = nba;
+		for (i__ = k + 1; i__ <= i__2; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L ) */
+
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__3 = i__ * nb;
+		    i2 = f2cmin(i__3,*m) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = i2 - i1;
+		    i__4 = l2 - l1;
+		    cnrm = zlange_("I", &i__3, &i__4, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = dlarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b18, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b18, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b18, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    zdscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = i2 - i1;
+			    zdscal_(&i__4, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__3 = i2 - i1;
+		    i__4 = l2 - l1;
+		    i__5 = k2 - k1;
+		    z__1.r = -1., z__1.i = 0.;
+		    zgemm_("C", "N", &i__3, &i__4, &i__5, &z__1, &a[k1 + i1 * 
+			    a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, 
+			    &c__[i1 + l1 * c_dim1], ldc)
+			    ;
+		}
+
+		i__2 = l - 1;
+		for (j = 1; j <= i__2; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H */
+
+		    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+		    i__3 = j * nb;
+		    j2 = f2cmin(i__3,*n) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    cnrm = zlange_("I", &i__3, &i__4, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = dlarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__3 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__3);
+			i__3 = nbb;
+			for (jj = 1; jj <= i__3; ++jj) {
+			    i__4 = nba;
+			    for (ll = 1; ll <= i__4; ++ll) {
+/* Computing MIN */
+				i__5 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b18, &i__5);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__3 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b18, &i__3);
+			i__3 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b18, &i__3);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = l2 - 1;
+			for (ll = l1; ll <= i__3; ++ll) {
+			    i__4 = k2 - k1;
+			    zdscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__3 = j2 - 1;
+			for (jj = j1; jj <= i__3; ++jj) {
+			    i__4 = k2 - k1;
+			    zdscal_(&i__4, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__3 = k2 - k1;
+		    i__4 = j2 - j1;
+		    i__5 = l2 - l1;
+		    z__1.r = -csgn.r, z__1.i = -csgn.i;
+		    zgemm_("N", "C", &i__3, &i__4, &i__5, &z__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b1, 
+			    &c__[k1 + j1 * c_dim1], ldc)
+			    ;
+		}
+	    }
+	}
+    } else if (notrna && ! notrnb) {
+
+/*        Solve    A*X + ISGN*X*B**H = scale*C. */
+
+/*        The (K,L)th block of X is determined starting from */
+/*        bottom-right corner column by column by */
+
+/*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L) */
+
+/*        Where */
+/*                      M                          N */
+/*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H]. */
+/*                    I=K+1                      J=L+1 */
+
+/*        Start loop over block rows (index = K) and block columns (index = L) */
+
+	for (k = nba; k >= 1; --k) {
+
+/*           K1: row index of the first row in X( K, L ) */
+/*           K2: row index of the first row in X( K+1, L ) */
+/*           so the K2 - K1 is the column count of the block X( K, L ) */
+
+	    k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	    i__1 = k * nb;
+	    k2 = f2cmin(i__1,*m) + 1;
+	    for (l = nbb; l >= 1; --l) {
+
+/*              L1: column index of the first column in X( K, L ) */
+/*              L2: column index of the first column in X( K, L + 1) */
+/*              so that L2 - L1 is the row count of the block X( K, L ) */
+
+		l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+		i__1 = l * nb;
+		l2 = f2cmin(i__1,*n) + 1;
+
+		i__1 = k2 - k1;
+		i__2 = l2 - l1;
+		ztrsyl_(trana, tranb, isgn, &i__1, &i__2, &a[k1 + k1 * a_dim1]
+			, lda, &b[l1 + l1 * b_dim1], ldb, &c__[k1 + l1 * 
+			c_dim1], ldc, &scaloc, &iinfo);
+		*info = f2cmax(*info,iinfo);
+
+		if (scaloc * swork[k + l * swork_dim1] == 0.) {
+		    if (scaloc == 0.) {
+/*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1) */
+/*                    is larger than the product of BIGNUM**2 and cannot be */
+/*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1). */
+/*                    Mark the computation as pointless. */
+			buf = 0.;
+		    } else {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__1 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__1);
+		    }
+		    i__1 = nbb;
+		    for (jj = 1; jj <= i__1; ++jj) {
+			i__2 = nba;
+			for (ll = 1; ll <= i__2; ++ll) {
+/*                       Bound by BIGNUM to not introduce Inf. The value */
+/*                       is irrelevant; corresponding entries of the */
+/*                       solution will be flushed in consistency scaling. */
+/* Computing MIN */
+			    i__3 = myexp_(&scaloc);
+			    d__1 = bignum, d__2 = swork[ll + jj * swork_dim1] 
+				    / pow_di(&c_b18, &i__3);
+			    swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			}
+		    }
+		}
+		swork[k + l * swork_dim1] = scaloc * swork[k + l * swork_dim1]
+			;
+		i__1 = k2 - k1;
+		i__2 = l2 - l1;
+		xnrm = zlange_("I", &i__1, &i__2, &c__[k1 + l1 * c_dim1], ldc,
+			 wnrm);
+
+		i__1 = k - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L ) */
+
+		    i1 = (i__ - 1) * nb + 1;
+/* Computing MIN */
+		    i__2 = i__ * nb;
+		    i2 = f2cmin(i__2,*m) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    cnrm = zlange_("I", &i__2, &i__3, &c__[i1 + l1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[i__ + l * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[i__ + l * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    anrm = swork[i__ + (awrk + k) * swork_dim1];
+		    scaloc = dlarmm_(&anrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b18, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b18, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b18, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( I, L ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = k2 - k1;
+			    zdscal_(&i__3, &scal, &c__[k1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[i__ + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (ll = l1; ll <= i__2; ++ll) {
+			    i__3 = i2 - i1;
+			    zdscal_(&i__3, &scal, &c__[i1 + ll * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[i__ + l * swork_dim1] = scamin * scaloc;
+
+		    i__2 = i2 - i1;
+		    i__3 = l2 - l1;
+		    i__4 = k2 - k1;
+		    z__1.r = -1., z__1.i = 0.;
+		    zgemm_("N", "N", &i__2, &i__3, &i__4, &z__1, &a[i1 + k1 * 
+			    a_dim1], lda, &c__[k1 + l1 * c_dim1], ldc, &c_b1, 
+			    &c__[i1 + l1 * c_dim1], ldc)
+			    ;
+
+		}
+
+		i__1 = l - 1;
+		for (j = 1; j <= i__1; ++j) {
+
+/*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H */
+
+		    j1 = (j - 1) * nb + 1;
+/* Computing MIN */
+		    i__2 = j * nb;
+		    j2 = f2cmin(i__2,*n) + 1;
+
+/*                 Compute scaling factor to survive the linear update */
+/*                 simulating consistent scaling. */
+
+		    i__2 = k2 - k1;
+		    i__3 = j2 - j1;
+		    cnrm = zlange_("I", &i__2, &i__3, &c__[k1 + j1 * c_dim1], 
+			    ldc, wnrm);
+/* Computing MIN */
+		    d__1 = swork[k + j * swork_dim1], d__2 = swork[k + l * 
+			    swork_dim1];
+		    scamin = f2cmin(d__1,d__2);
+		    cnrm *= scamin / swork[k + j * swork_dim1];
+		    xnrm *= scamin / swork[k + l * swork_dim1];
+		    bnrm = swork[l + (bwrk + j) * swork_dim1];
+		    scaloc = dlarmm_(&bnrm, &xnrm, &cnrm);
+		    if (scaloc * scamin == 0.) {
+/*                    Use second scaling factor to prevent flushing to zero. */
+			i__2 = myexp_(&scaloc);
+			buf *= pow_di(&c_b18, &i__2);
+			i__2 = nbb;
+			for (jj = 1; jj <= i__2; ++jj) {
+			    i__3 = nba;
+			    for (ll = 1; ll <= i__3; ++ll) {
+/* Computing MIN */
+				i__4 = myexp_(&scaloc);
+				d__1 = bignum, d__2 = swork[ll + jj * 
+					swork_dim1] / pow_di(&c_b18, &i__4);
+				swork[ll + jj * swork_dim1] = f2cmin(d__1,d__2);
+			    }
+			}
+			i__2 = myexp_(&scaloc);
+			scamin /= pow_di(&c_b18, &i__2);
+			i__2 = myexp_(&scaloc);
+			scaloc /= pow_di(&c_b18, &i__2);
+		    }
+		    cnrm *= scaloc;
+		    xnrm *= scaloc;
+
+/*                 Simultaneously apply the robust update factor and the */
+/*                 consistency scaling factor to C( K, J ) and C( K, L). */
+
+		    scal = scamin / swork[k + l * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = l2 - 1;
+			for (jj = l1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    zdscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+		    scal = scamin / swork[k + j * swork_dim1] * scaloc;
+		    if (scal != 1.) {
+			i__2 = j2 - 1;
+			for (jj = j1; jj <= i__2; ++jj) {
+			    i__3 = k2 - k1;
+			    zdscal_(&i__3, &scal, &c__[k1 + jj * c_dim1], &
+				    c__1);
+			}
+		    }
+
+/*                 Record current scaling factor */
+
+		    swork[k + l * swork_dim1] = scamin * scaloc;
+		    swork[k + j * swork_dim1] = scamin * scaloc;
+
+		    i__2 = k2 - k1;
+		    i__3 = j2 - j1;
+		    i__4 = l2 - l1;
+		    z__1.r = -csgn.r, z__1.i = -csgn.i;
+		    zgemm_("N", "C", &i__2, &i__3, &i__4, &z__1, &c__[k1 + l1 
+			    * c_dim1], ldc, &b[j1 + l1 * b_dim1], ldb, &c_b1, 
+			    &c__[k1 + j1 * c_dim1], ldc)
+			    ;
+		}
+	    }
+	}
+
+    }
+
+    free(wnrm);
+
+/*     Reduce local scaling factors */
+
+    *scale = swork[swork_dim1 + 1];
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = nbb;
+	for (l = 1; l <= i__2; ++l) {
+/* Computing MIN */
+	    d__1 = *scale, d__2 = swork[k + l * swork_dim1];
+	    *scale = f2cmin(d__1,d__2);
+	}
+    }
+    if (*scale == 0.) {
+
+/*        The magnitude of the largest entry of the solution is larger */
+/*        than the product of BIGNUM**2 and cannot be represented in the */
+/*        form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to */
+/*        zero and give up. */
+
+	swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb);
+	swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba);
+	return 0;
+    }
+
+/*     Realize consistent scaling */
+
+    i__1 = nba;
+    for (k = 1; k <= i__1; ++k) {
+	k1 = (k - 1) * nb + 1;
+/* Computing MIN */
+	i__2 = k * nb;
+	k2 = f2cmin(i__2,*m) + 1;
+	i__2 = nbb;
+	for (l = 1; l <= i__2; ++l) {
+	    l1 = (l - 1) * nb + 1;
+/* Computing MIN */
+	    i__3 = l * nb;
+	    l2 = f2cmin(i__3,*n) + 1;
+	    scal = *scale / swork[k + l * swork_dim1];
+	    if (scal != 1.) {
+		i__3 = l2 - 1;
+		for (ll = l1; ll <= i__3; ++ll) {
+		    i__4 = k2 - k1;
+		    zdscal_(&i__4, &scal, &c__[k1 + ll * c_dim1], &c__1);
+		}
+	    }
+	}
+    }
+
+    if (buf != 1. && buf > 0.) {
+
+/*        Decrease SCALE as much as possible. */
+
+/* Computing MIN */
+	d__1 = *scale / smlnum, d__2 = 1. / buf;
+	scaloc = f2cmin(d__1,d__2);
+	buf *= scaloc;
+	*scale /= scaloc;
+    }
+
+    if (buf != 1. && buf > 0.) {
+
+/*        In case of overly aggressive scaling during the computation, */
+/*        flushing of the global scale factor may be prevented by */
+/*        undoing some of the scaling. This step is to ensure that */
+/*        this routine flushes only scale factors that TRSYL also */
+/*        flushes and be usable as a drop-in replacement. */
+
+/*        How much can the normwise largest entry be upscaled? */
+
+/* Computing MAX */
+	i__1 = c_dim1 + 1;
+	d__3 = (d__1 = c__[i__1].r, abs(d__1)), d__4 = (d__2 = d_imag(&c__[
+		c_dim1 + 1]), abs(d__2));
+	scal = f2cmax(d__3,d__4);
+	i__1 = *m;
+	for (k = 1; k <= i__1; ++k) {
+	    i__2 = *n;
+	    for (l = 1; l <= i__2; ++l) {
+/* Computing MAX */
+		i__3 = k + l * c_dim1;
+		d__3 = scal, d__4 = (d__1 = c__[i__3].r, abs(d__1)), d__3 = 
+			f2cmax(d__3,d__4), d__4 = (d__2 = d_imag(&c__[k + l * 
+			c_dim1]), abs(d__2));
+		scal = f2cmax(d__3,d__4);
+	    }
+	}
+
+/*        Increase BUF as close to 1 as possible and apply scaling. */
+
+/* Computing MIN */
+	d__1 = bignum / scal, d__2 = 1. / buf;
+	scaloc = f2cmin(d__1,d__2);
+	buf *= scaloc;
+	zlascl_("G", &c_n1, &c_n1, &c_b106, &scaloc, m, n, &c__[c_offset], 
+		ldc, &iinfo);
+    }
+
+/*     Combine with buffer scaling factor. SCALE will be flushed if */
+/*     BUF is less than one here. */
+
+    *scale *= buf;
+
+/*     Restore workspace dimensions */
+
+    swork[swork_dim1 + 1] = (doublereal) f2cmax(nba,nbb);
+    swork[swork_dim1 + 2] = (doublereal) ((nbb << 1) + nba);
+
+    return 0;
+
+/*     End of ZTRSYL3 */
+
+} /* ztrsyl3_ */
+
diff --git a/lapack-netlib/SRC/ztrsyl3.f b/lapack-netlib/SRC/ztrsyl3.f
new file mode 100644
index 000000000..b5a058da4
--- /dev/null
+++ b/lapack-netlib/SRC/ztrsyl3.f
@@ -0,0 +1,1142 @@
+*> \brief \b ZTRSYL3
+*
+* Definition:
+* ===========
+*
+*
+*>  \par Purpose
+*  =============
+*>
+*> \verbatim
+*>
+*>  ZTRSYL3 solves the complex Sylvester matrix equation:
+*>
+*>     op(A)*X + X*op(B) = scale*C or
+*>     op(A)*X - X*op(B) = scale*C,
+*>
+*>  where op(A) = A or A**H, and  A and B are both upper triangular. A is
+*>  M-by-M and B is N-by-N; the right hand side C and the solution X are
+*>  M-by-N; and scale is an output scale factor, set <= 1 to avoid
+*>  overflow in X.
+*>
+*>  This is the block version of the algorithm.
+*> \endverbatim
+*
+*  Arguments
+*  =========
+*
+*> \param[in] TRANA
+*> \verbatim
+*>          TRANA is CHARACTER*1
+*>          Specifies the option op(A):
+*>          = 'N': op(A) = A    (No transpose)
+*>          = 'C': op(A) = A**H (Conjugate transpose)
+*> \endverbatim
+*>
+*> \param[in] TRANB
+*> \verbatim
+*>          TRANB is CHARACTER*1
+*>          Specifies the option op(B):
+*>          = 'N': op(B) = B    (No transpose)
+*>          = 'C': op(B) = B**H (Conjugate transpose)
+*> \endverbatim
+*>
+*> \param[in] ISGN
+*> \verbatim
+*>          ISGN is INTEGER
+*>          Specifies the sign in the equation:
+*>          = +1: solve op(A)*X + X*op(B) = scale*C
+*>          = -1: solve op(A)*X - X*op(B) = scale*C
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The order of the matrix A, and the number of rows in the
+*>          matrices X and C. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the matrix B, and the number of columns in the
+*>          matrices X and C. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] A
+*> \verbatim
+*>          A is COMPLEX*16 array, dimension (LDA,M)
+*>          The upper triangular matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in] B
+*> \verbatim
+*>          B is COMPLEX*16 array, dimension (LDB,N)
+*>          The upper triangular matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,N).
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*>          C is COMPLEX*16 array, dimension (LDC,N)
+*>          On entry, the M-by-N right hand side matrix C.
+*>          On exit, C is overwritten by the solution matrix X.
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*>          LDC is INTEGER
+*>          The leading dimension of the array C. LDC >= max(1,M)
+*> \endverbatim
+*>
+*> \param[out] SCALE
+*> \verbatim
+*>          SCALE is DOUBLE PRECISION
+*>          The scale factor, scale, set <= 1 to avoid overflow in X.
+*> \endverbatim
+*>
+*> \param[out] SWORK
+*> \verbatim
+*>          SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS),
+*>          MAX(1,COLS)).
+*>          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
+*>          and SWORK(2) returns the optimal COLS.
+*> \endverbatim
+*>
+*> \param[in] LDSWORK
+*> \verbatim
+*>          LDSWORK is INTEGER
+*>          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
+*>          and NB is the optimal block size.
+*>
+*>          If LDSWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal dimensions of the SWORK matrix,
+*>          returns these values as the first and second entry of the SWORK
+*>          matrix, and no error message related LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0: successful exit
+*>          < 0: if INFO = -i, the i-th argument had an illegal value
+*>          = 1: A and B have common or very close eigenvalues; perturbed
+*>               values were used to solve the equation (but the matrices
+*>               A and B are unchanged).
+*> \endverbatim
+*
+*> \ingroup complex16SYcomputational
+*
+*  =====================================================================
+*  References:
+*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
+*   algorithms: The triangular Sylvester equation, ACM Transactions
+*   on Mathematical Software (TOMS), volume 29, pages 218--243.
+*
+*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
+*   Solution of the Triangular Sylvester Equation. Lecture Notes in
+*   Computer Science, vol 12043, pages 82--92, Springer.
+*
+*  Contributor:
+*   Angelika Schwarz, Umea University, Sweden.
+*
+*  =====================================================================
+      SUBROUTINE ZTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
+     $                    LDC, SCALE, SWORK, LDSWORK, INFO )
+      IMPLICIT NONE
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+      DOUBLE PRECISION   SWORK( LDSWORK, * )
+*     ..
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRNA, NOTRNB, LQUERY
+      INTEGER            AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
+     $                   K, K1, K2, L, L1, L2, LL, NBA, NB, NBB
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
+     $                   SCAMIN, SGN, XNRM, BUF, SMLNUM
+      COMPLEX*16         CSGN
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   WNRM( MAX( M, N ) )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLARMM, ZLANGE
+      EXTERNAL           DLAMCH, DLARMM, ILAENV, LSAME, ZLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZGEMM, ZLASCL, ZTRSYL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, EXPONENT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test input parameters
+*
+      NOTRNA = LSAME( TRANA, 'N' )
+      NOTRNB = LSAME( TRANB, 'N' )
+*
+*     Use the same block size for all matrices.
+*
+      NB = MAX( 8, ILAENV( 1, 'ZTRSYL', '', M, N, -1, -1) )
+*
+*     Compute number of blocks in A and B
+*
+      NBA = MAX( 1, (M + NB - 1) / NB )
+      NBB = MAX( 1, (N + NB - 1) / NB )
+*
+*     Compute workspace
+*
+      INFO = 0
+      LQUERY = ( LDSWORK.EQ.-1 )
+      IF( LQUERY ) THEN
+         LDSWORK = 2
+         SWORK(1,1) = MAX( NBA, NBB )
+         SWORK(2,1) = 2 * NBB + NBA
+      END IF
+*
+*     Test the input arguments
+*
+      IF( .NOT.NOTRNA .AND. .NOT. LSAME( TRANA, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRNB .AND. .NOT. LSAME( TRANB, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRSYL3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      SCALE = ONE
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Use unblocked code for small problems or if insufficient
+*     workspace is provided
+*
+      IF( MIN( NBA, NBB ).EQ.1 .OR. LDSWORK.LT.MAX( NBA, NBB ) ) THEN
+        CALL ZTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
+     $               C, LDC, SCALE, INFO )
+        RETURN
+      END IF
+*
+*     Set constants to control overflow
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Set local scaling factors.
+*
+      DO L = 1, NBB
+         DO K = 1, NBA
+            SWORK( K, L ) = ONE
+         END DO
+      END DO
+*
+*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
+*     This scaling is to ensure compatibility with TRSYL and may get flushed.
+*
+      BUF = ONE
+*
+*      Compute upper bounds of blocks of A and B
+*
+      AWRK = NBB
+      DO K = 1, NBA
+         K1 = (K - 1) * NB + 1
+         K2 = MIN( K * NB, M ) + 1
+         DO L = K, NBA
+            L1 = (L - 1) * NB + 1
+            L2 = MIN( L * NB, M ) + 1
+            IF( NOTRNA ) THEN
+               SWORK( K, AWRK + L ) = ZLANGE( 'I', K2-K1, L2-L1,
+     $                                        A( K1, L1 ), LDA, WNRM )
+            ELSE
+               SWORK( L, AWRK + K ) = ZLANGE( '1', K2-K1, L2-L1,
+     $                                        A( K1, L1 ), LDA, WNRM )
+            END IF
+         END DO
+      END DO
+      BWRK = NBB + NBA
+      DO K = 1, NBB
+         K1 = (K - 1) * NB + 1
+         K2 = MIN( K * NB, N ) + 1
+         DO L = K, NBB
+            L1 = (L - 1) * NB + 1
+            L2 = MIN( L * NB, N ) + 1
+            IF( NOTRNB ) THEN
+               SWORK( K, BWRK + L ) = ZLANGE( 'I', K2-K1, L2-L1,
+     $                                        B( K1, L1 ), LDB, WNRM )
+            ELSE
+               SWORK( L, BWRK + K ) = ZLANGE( '1', K2-K1, L2-L1,
+     $                                        B( K1, L1 ), LDB, WNRM )
+            END IF
+         END DO
+      END DO
+*
+      SGN = DBLE( ISGN )
+      CSGN = DCMPLX( SGN, ZERO )
+*
+      IF( NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                  M                         L-1
+*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
+*                I=K+1                       J=1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = NBA, 1, -1
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = (K - 1) * NB + 1
+            K2 = MIN( K * NB, M ) + 1
+            DO L = 1, NBB
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = (L - 1) * NB + 1
+               L2 = MIN( L * NB, N ) + 1
+*
+               CALL ZTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = ZLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K - 1, 1, -1
+*
+*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
+*
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, M ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = ZLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = DLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L ).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO JJ = L1, L2-1
+                         CALL ZDSCAL( K2-K1, SCAL, C( K1, JJ ), 1)
+                      END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO LL = L1, L2-1
+                         CALL ZDSCAL( I2-I1, SCAL, C( I1, LL ), 1)
+                      END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL ZGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -CONE,
+     $                        A( I1, K1 ), LDA, C( K1, L1 ), LDC,
+     $                        CONE, C( I1, L1 ), LDC )
+*
+               END DO
+*
+               DO J = L + 1, NBB
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
+*
+                  J1 = (J - 1) * NB + 1
+                  J2 = MIN( J * NB, N ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = ZLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK(L, BWRK + J)
+                  SCALOC = DLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO JJ = J1, J2-1
+                         CALL ZDSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                      END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL ZGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -CSGN,
+     $                        C( K1, L1 ), LDC, B( L1, J1 ), LDB,
+     $                        CONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A**H *X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-left corner column by column by
+*
+*          A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                   K-1                        L-1
+*          R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
+*                   I=1                        J=1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = 1, NBA
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = (K - 1) * NB + 1
+            K2 = MIN( K * NB, M ) + 1
+            DO L = 1, NBB
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = (L - 1) * NB + 1
+               L2 = MIN( L * NB, N ) + 1
+*
+               CALL ZTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = ZLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K + 1, NBA
+*
+*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
+*
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, M ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = ZLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = DLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to to C( I, L ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL ZDSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL ZGEMM( 'C', 'N', I2-I1, L2-L1, K2-K1, -CONE,
+     $                        A( K1, I1 ), LDA, C( K1, L1 ), LDC,
+     $                        CONE, C( I1, L1 ), LDC )
+               END DO
+*
+               DO J = L + 1, NBB
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
+*
+                  J1 = (J - 1) * NB + 1
+                  J2 = MIN( J * NB, N ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = ZLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = DLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                      DO LL = L1, L2-1
+                         CALL ZDSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                      END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL ZGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -CSGN,
+     $                        C( K1, L1 ), LDC, B( L1, J1 ), LDB,
+     $                        CONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A**H *X + ISGN*X*B**H = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        top-right corner column by column by
+*
+*           A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
+*
+*        Where
+*                     K-1                          N
+*            R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
+*                     I=1                        J=L+1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = 1, NBA
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = (K - 1) * NB + 1
+            K2 = MIN( K * NB, M ) + 1
+            DO L = NBB, 1, -1
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = (L - 1) * NB + 1
+               L2 = MIN( L * NB, N ) + 1
+*
+               CALL ZTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = ZLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = K + 1, NBA
+*
+*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
+*
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, M ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = ZLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = DLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL ZDSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL ZGEMM( 'C', 'N', I2-I1, L2-L1, K2-K1, -CONE,
+     $                        A( K1, I1 ), LDA, C( K1, L1 ), LDC,
+     $                        CONE, C( I1, L1 ), LDC )
+               END DO
+*
+               DO J = 1, L - 1
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
+*
+                  J1 = (J - 1) * NB + 1
+                  J2 = MIN( J * NB, N ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = ZLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = DLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, LL ), 1)
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL ZGEMM( 'N', 'C', K2-K1, J2-J1, L2-L1, -CSGN,
+     $                        C( K1, L1 ), LDC, B( J1, L1 ), LDB,
+     $                        CONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+      ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B**H = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-right corner column by column by
+*
+*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
+*
+*        Where
+*                      M                          N
+*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
+*                    I=K+1                      J=L+1
+*
+*        Start loop over block rows (index = K) and block columns (index = L)
+*
+         DO K = NBA, 1, -1
+*
+*           K1: row index of the first row in X( K, L )
+*           K2: row index of the first row in X( K+1, L )
+*           so the K2 - K1 is the column count of the block X( K, L )
+*
+            K1 = (K - 1) * NB + 1
+            K2 = MIN( K * NB, M ) + 1
+            DO L = NBB, 1, -1
+*
+*              L1: column index of the first column in X( K, L )
+*              L2: column index of the first column in X( K, L + 1)
+*              so that L2 - L1 is the row count of the block X( K, L )
+*
+               L1 = (L - 1) * NB + 1
+               L2 = MIN( L * NB, N ) + 1
+*
+               CALL ZTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
+     $                      A( K1, K1 ), LDA,
+     $                      B( L1, L1 ), LDB,
+     $                      C( K1, L1 ), LDC, SCALOC, IINFO )
+               INFO = MAX( INFO, IINFO )
+*
+               IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
+                  IF( SCALOC .EQ. ZERO ) THEN
+*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
+*                    is larger than the product of BIGNUM**2 and cannot be
+*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
+*                    Mark the computation as pointless.
+                     BUF = ZERO
+                  ELSE
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                  END IF
+                  DO JJ = 1, NBB
+                     DO LL = 1, NBA
+*                       Bound by BIGNUM to not introduce Inf. The value
+*                       is irrelevant; corresponding entries of the
+*                       solution will be flushed in consistency scaling.
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                     END DO
+                  END DO
+               END IF
+               SWORK( K, L ) = SCALOC * SWORK( K, L )
+               XNRM = ZLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
+     $                        WNRM )
+*
+               DO I = 1, K - 1
+*
+*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
+*
+                  I1 = (I - 1) * NB + 1
+                  I2 = MIN( I * NB, M ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = ZLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  ANRM = SWORK( I, AWRK + K )
+                  SCALOC = DLARMM( ANRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( I, L ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO LL = L1, L2-1
+                        CALL ZDSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( I, L ) = SCAMIN * SCALOC
+*
+                  CALL ZGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -CONE,
+     $                        A( I1, K1 ), LDA, C( K1, L1 ), LDC,
+     $                        CONE, C( I1, L1 ), LDC )
+*
+               END DO
+*
+               DO J = 1, L - 1
+*
+*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
+*
+                  J1 = (J - 1) * NB + 1
+                  J2 = MIN( J * NB, N ) + 1
+*
+*                 Compute scaling factor to survive the linear update
+*                 simulating consistent scaling.
+*
+                  CNRM = ZLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
+     $                           LDC, WNRM )
+                  SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
+                  CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
+                  XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
+                  BNRM = SWORK( L, BWRK + J )
+                  SCALOC = DLARMM( BNRM, XNRM, CNRM )
+                  IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
+*                    Use second scaling factor to prevent flushing to zero.
+                     BUF = BUF*2.D0**EXPONENT( SCALOC )
+                     DO JJ = 1, NBB
+                        DO LL = 1, NBA
+                        SWORK( LL, JJ ) = MIN( BIGNUM,
+     $                     SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
+                        END DO
+                     END DO
+                     SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
+                     SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
+                  END IF
+                  CNRM = CNRM * SCALOC
+                  XNRM = XNRM * SCALOC
+*
+*                 Simultaneously apply the robust update factor and the
+*                 consistency scaling factor to C( K, J ) and C( K, L).
+*
+                  SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = L1, L2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+                  SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
+                  IF( SCAL .NE. ONE ) THEN
+                     DO JJ = J1, J2-1
+                        CALL ZDSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
+                     END DO
+                  ENDIF
+*
+*                 Record current scaling factor
+*
+                  SWORK( K, L ) = SCAMIN * SCALOC
+                  SWORK( K, J ) = SCAMIN * SCALOC
+*
+                  CALL ZGEMM( 'N', 'C', K2-K1, J2-J1, L2-L1, -CSGN,
+     $                        C( K1, L1 ), LDC, B( J1, L1 ), LDB,
+     $                        CONE, C( K1, J1 ), LDC )
+               END DO
+            END DO
+         END DO
+*
+      END IF
+*
+*     Reduce local scaling factors
+*
+      SCALE = SWORK( 1, 1 )
+      DO K = 1, NBA
+         DO L = 1, NBB
+            SCALE = MIN( SCALE, SWORK( K, L ) )
+         END DO
+      END DO
+      IF( SCALE .EQ. ZERO ) THEN
+*
+*        The magnitude of the largest entry of the solution is larger
+*        than the product of BIGNUM**2 and cannot be represented in the
+*        form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to
+*        zero and give up.
+*
+         SWORK(1,1) = MAX( NBA, NBB )
+         SWORK(2,1) = 2 * NBB + NBA
+         RETURN
+      END IF
+*
+*     Realize consistent scaling
+*
+      DO K = 1, NBA
+         K1 = (K - 1) * NB + 1
+         K2 = MIN( K * NB, M ) + 1
+         DO L = 1, NBB
+            L1 = (L - 1) * NB + 1
+            L2 = MIN( L * NB, N ) + 1
+            SCAL = SCALE / SWORK( K, L )
+            IF( SCAL .NE. ONE ) THEN
+               DO LL = L1, L2-1
+                  CALL ZDSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
+               END DO
+            ENDIF
+         END DO
+      END DO
+*
+      IF( BUF .NE. ONE .AND. BUF.GT.ZERO ) THEN
+*
+*        Decrease SCALE as much as possible.
+*
+         SCALOC = MIN( SCALE / SMLNUM, ONE / BUF )
+         BUF = BUF * SCALOC
+         SCALE = SCALE / SCALOC
+      END IF
+*
+      IF( BUF.NE.ONE .AND. BUF.GT.ZERO ) THEN
+*
+*        In case of overly aggressive scaling during the computation,
+*        flushing of the global scale factor may be prevented by
+*        undoing some of the scaling. This step is to ensure that
+*        this routine flushes only scale factors that TRSYL also
+*        flushes and be usable as a drop-in replacement.
+*
+*        How much can the normwise largest entry be upscaled?
+*
+         SCAL = MAX( ABS( DBLE( C( 1, 1 ) ) ),
+     $               ABS( DIMAG( C ( 1, 1 ) ) ) )
+         DO K = 1, M
+            DO L = 1, N
+               SCAL = MAX( SCAL, ABS( DBLE ( C( K, L ) ) ),
+     $                     ABS( DIMAG ( C( K, L ) ) ) )
+            END DO
+         END DO
+*
+*        Increase BUF as close to 1 as possible and apply scaling.
+*
+         SCALOC = MIN( BIGNUM / SCAL, ONE / BUF )
+         BUF = BUF * SCALOC
+         CALL ZLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IINFO )
+      END IF
+*
+*     Combine with buffer scaling factor. SCALE will be flushed if
+*     BUF is less than one here.
+*
+      SCALE = SCALE * BUF
+*
+*     Restore workspace dimensions
+*
+      SWORK(1,1) = MAX( NBA, NBB )
+      SWORK(2,1) = 2 * NBB + NBA
+*
+      RETURN
+*
+*     End of ZTRSYL3
+*
+      END
diff --git a/lapack-netlib/TESTING/EIG/CMakeLists.txt b/lapack-netlib/TESTING/EIG/CMakeLists.txt
index 226004a90..d252c7fa9 100644
--- a/lapack-netlib/TESTING/EIG/CMakeLists.txt
+++ b/lapack-netlib/TESTING/EIG/CMakeLists.txt
@@ -40,7 +40,7 @@ set(SEIGTST schkee.F
    sget54.f sglmts.f sgqrts.f sgrqts.f sgsvts3.f
    shst01.f slarfy.f slarhs.f slatm4.f slctes.f slctsx.f slsets.f sort01.f
    sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f
-   sstt22.f ssyt21.f ssyt22.f)
+   sstt22.f ssyl01.f ssyt21.f ssyt22.f)
 
 set(CEIGTST cchkee.F
    cbdt01.f cbdt02.f cbdt03.f cbdt05.f
@@ -56,7 +56,7 @@ set(CEIGTST cchkee.F
    cget54.f cglmts.f cgqrts.f cgrqts.f cgsvts3.f
    chbt21.f chet21.f chet22.f chpt21.f chst01.f
    clarfy.f clarhs.f clatm4.f clctes.f clctsx.f clsets.f csbmv.f
-   csgt01.f cslect.f
+   csgt01.f cslect.f csyl01.f
    cstt21.f cstt22.f cunt01.f cunt03.f)
 
 set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f
@@ -77,7 +77,7 @@ set(DEIGTST dchkee.F
    dget54.f dglmts.f dgqrts.f dgrqts.f dgsvts3.f
    dhst01.f dlarfy.f dlarhs.f dlatm4.f dlctes.f dlctsx.f dlsets.f dort01.f
    dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f
-   dstt22.f dsyt21.f dsyt22.f)
+   dstt22.f dsyl01.f dsyt21.f dsyt22.f)
 
 set(ZEIGTST zchkee.F
    zbdt01.f zbdt02.f zbdt03.f zbdt05.f
@@ -93,7 +93,7 @@ set(ZEIGTST zchkee.F
    zget54.f zglmts.f zgqrts.f zgrqts.f zgsvts3.f
    zhbt21.f zhet21.f zhet22.f zhpt21.f zhst01.f
    zlarfy.f zlarhs.f zlatm4.f zlctes.f zlctsx.f zlsets.f zsbmv.f
-   zsgt01.f zslect.f
+   zsgt01.f zslect.f zsyl01.f
    zstt21.f zstt22.f zunt01.f zunt03.f)
 
 macro(add_eig_executable name)
diff --git a/lapack-netlib/TESTING/EIG/Makefile b/lapack-netlib/TESTING/EIG/Makefile
index bccfccf95..942ae6982 100644
--- a/lapack-netlib/TESTING/EIG/Makefile
+++ b/lapack-netlib/TESTING/EIG/Makefile
@@ -62,7 +62,7 @@ SEIGTST = schkee.o \
    sget54.o sglmts.o sgqrts.o sgrqts.o sgsvts3.o \
    shst01.o slarfy.o slarhs.o slatm4.o slctes.o slctsx.o slsets.o sort01.o \
    sort03.o ssbt21.o ssgt01.o sslect.o sspt21.o sstt21.o \
-   sstt22.o ssyt21.o ssyt22.o
+   sstt22.o ssyl01.o ssyt21.o ssyt22.o
 
 CEIGTST = cchkee.o \
    cbdt01.o cbdt02.o cbdt03.o cbdt05.o \
@@ -78,7 +78,7 @@ CEIGTST = cchkee.o \
    cget54.o cglmts.o cgqrts.o cgrqts.o cgsvts3.o \
    chbt21.o chet21.o chet22.o chpt21.o chst01.o \
    clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o csbmv.o \
-   csgt01.o cslect.o \
+   csgt01.o cslect.o csyl01.o\
    cstt21.o cstt22.o cunt01.o cunt03.o
 
 DZIGTST = dlafts.o dlahd2.o dlasum.o dlatb9.o dstech.o dstect.o \
@@ -99,7 +99,7 @@ DEIGTST = dchkee.o \
    dget54.o dglmts.o dgqrts.o dgrqts.o dgsvts3.o \
    dhst01.o dlarfy.o dlarhs.o dlatm4.o dlctes.o dlctsx.o dlsets.o dort01.o \
    dort03.o dsbt21.o dsgt01.o dslect.o dspt21.o dstt21.o \
-   dstt22.o dsyt21.o dsyt22.o
+   dstt22.o dsyl01.o dsyt21.o dsyt22.o
 
 ZEIGTST = zchkee.o \
    zbdt01.o zbdt02.o zbdt03.o zbdt05.o \
@@ -115,7 +115,7 @@ ZEIGTST = zchkee.o \
    zget54.o zglmts.o zgqrts.o zgrqts.o zgsvts3.o \
    zhbt21.o zhet21.o zhet22.o zhpt21.o zhst01.o \
    zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o zsbmv.o \
-   zsgt01.o zslect.o \
+   zsgt01.o zslect.o zsyl01.o\
    zstt21.o zstt22.o zunt01.o zunt03.o
 
 .PHONY: all
diff --git a/lapack-netlib/TESTING/EIG/cchkec.f b/lapack-netlib/TESTING/EIG/cchkec.f
index 6727a0954..c892b0a54 100644
--- a/lapack-netlib/TESTING/EIG/cchkec.f
+++ b/lapack-netlib/TESTING/EIG/cchkec.f
@@ -23,7 +23,7 @@
 *> \verbatim
 *>
 *> CCHKEC tests eigen- condition estimation routines
-*>        CTRSYL, CTREXC, CTRSNA, CTRSEN
+*>        CTRSYL, CTRSYL3, CTREXC, CTRSNA, CTRSEN
 *>
 *> In all cases, the routine runs through a fixed set of numerical
 *> examples, subjects them to various tests, and compares the test
@@ -88,17 +88,17 @@
 *     .. Local Scalars ..
       LOGICAL            OK
       CHARACTER*3        PATH
-      INTEGER            KTREXC, KTRSEN, KTRSNA, KTRSYL, LTREXC, LTRSYL,
-     $                   NTESTS, NTREXC, NTRSYL
-      REAL               EPS, RTREXC, RTRSYL, SFMIN
+      INTEGER            KTREXC, KTRSEN, KTRSNA, KTRSYL, KTRSYL3,
+     $                   LTREXC, LTRSYL, NTESTS, NTREXC, NTRSYL
+      REAL               EPS, RTREXC, SFMIN
 *     ..
 *     .. Local Arrays ..
-      INTEGER            LTRSEN( 3 ), LTRSNA( 3 ), NTRSEN( 3 ),
-     $                   NTRSNA( 3 )
-      REAL               RTRSEN( 3 ), RTRSNA( 3 )
+      INTEGER            FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
+     $                   LTRSNA( 3 ), NTRSEN( 3 ), NTRSNA( 3 )
+      REAL               RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           CERREC, CGET35, CGET36, CGET37, CGET38
+      EXTERNAL           CERREC, CGET35, CGET36, CGET37, CGET38, CSYL01
 *     ..
 *     .. External Functions ..
       REAL               SLAMCH
@@ -120,10 +120,24 @@
      $   CALL CERREC( PATH, NOUT )
 *
       OK = .TRUE.
-      CALL CGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL, NIN )
-      IF( RTRSYL.GT.THRESH ) THEN
+      CALL CGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL, NIN )
+      IF( RTRSYL( 1 ).GT.THRESH ) THEN
          OK = .FALSE.
-         WRITE( NOUT, FMT = 9999 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
+         WRITE( NOUT, FMT = 9999 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
+      END IF
+*
+      CALL CSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
+      IF( FTRSYL( 1 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
+      END IF
+      IF( FTRSYL( 2 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
+      END IF
+      IF( FTRSYL( 3 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
       END IF
 *
       CALL CGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@@ -169,6 +183,12 @@
      $      / ' Safe minimum (SFMIN)             = ', E16.6, / )
  9992 FORMAT( ' Routines pass computational tests if test ratio is ',
      $      'less than', F8.2, / / )
+ 9972 FORMAT( 'CTRSYL and CTRSYL3 compute an inconsistent scale ',
+     $      'factor in ', I8, ' tests.')
+ 9971 FORMAT( 'Error in CTRSYL3: ', I8, ' tests fail the threshold.', /
+     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
+ 9970 FORMAT( 'Error in CTRSYL: ', I8, ' tests fail the threshold.', /
+     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
       RETURN
 *
 *     End of CCHKEC
diff --git a/lapack-netlib/TESTING/EIG/cerrec.f b/lapack-netlib/TESTING/EIG/cerrec.f
index 650ab2b6e..6e2e1d38a 100644
--- a/lapack-netlib/TESTING/EIG/cerrec.f
+++ b/lapack-netlib/TESTING/EIG/cerrec.f
@@ -23,7 +23,7 @@
 *>
 *> CERREC tests the error exits for the routines for eigen- condition
 *> estimation for REAL matrices:
-*>    CTRSYL, CTREXC, CTRSNA and CTRSEN.
+*>    CTRSYL, CTRSYL3, CTREXC, CTRSNA and CTRSEN.
 *> \endverbatim
 *
 *  Arguments:
@@ -77,12 +77,12 @@
 *     ..
 *     .. Local Arrays ..
       LOGICAL            SEL( NMAX )
-      REAL               RW( LW ), S( NMAX ), SEP( NMAX )
+      REAL               RW( LW ), S( NMAX ), SEP( NMAX ), SWORK( NMAX )
       COMPLEX            A( NMAX, NMAX ), B( NMAX, NMAX ),
      $                   C( NMAX, NMAX ), WORK( LW ), X( NMAX )
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           CHKXER, CTREXC, CTRSEN, CTRSNA, CTRSYL
+      EXTERNAL           CHKXER, CTREXC, CTRSEN, CTRSNA, CTRSYL, CTRSYL3
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -141,6 +141,43 @@
       CALL CHKXER( 'CTRSYL', INFOT, NOUT, LERR, OK )
       NT = NT + 8
 *
+*     Test CTRSYL3
+*
+      SRNAMT = 'CTRSYL3'
+      INFOT = 1
+      CALL CTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 2
+      CALL CTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 3
+      CALL CTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 4
+      CALL CTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 5
+      CALL CTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 7
+      CALL CTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 9
+      CALL CTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 11
+      CALL CTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
+      NT = NT + 8
+*
 *     Test CTREXC
 *
       SRNAMT = 'CTREXC'
diff --git a/lapack-netlib/TESTING/EIG/csyl01.f b/lapack-netlib/TESTING/EIG/csyl01.f
new file mode 100644
index 000000000..e21f1a7a0
--- /dev/null
+++ b/lapack-netlib/TESTING/EIG/csyl01.f
@@ -0,0 +1,294 @@
+*> \brief \b CSYL01
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE CSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
+*
+*     .. Scalar Arguments ..
+*     INTEGER            KNT
+*     REAL               THRESH
+*     ..
+*     .. Array Arguments ..
+*     INTEGER            NFAIL( 3 ), NINFO( 2 )
+*     REAL               RMAX( 2 )
+*     ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> CSYL01 tests CTRSYL and CTRSYL3, routines for solving the Sylvester matrix
+*> equation
+*>
+*>    op(A)*X + ISGN*X*op(B) = scale*C,
+*>
+*> where op(A) and op(B) are both upper triangular form, op() represents an
+*> optional conjugate transpose, and ISGN can be -1 or +1. Scale is an output
+*> less than or equal to 1, chosen to avoid overflow in X.
+*>
+*> The test code verifies that the following residual does not exceed
+*> the provided threshold:
+*>
+*>    norm(op(A)*X + ISGN*X*op(B) - scale*C) /
+*>        (EPS*max(norm(A),norm(B))*norm(X))
+*>
+*> This routine complements CGET35 by testing with larger,
+*> random matrices, of which some require rescaling of X to avoid overflow.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] THRESH
+*> \verbatim
+*>          THRESH is REAL
+*>          A test will count as "failed" if the residual, computed as
+*>          described above, exceeds THRESH.
+*> \endverbatim
+*>
+*> \param[out] NFAIL
+*> \verbatim
+*>          NFAIL is INTEGER array, dimension (3)
+*>          NFAIL(1) = No. of times residual CTRSYL exceeds threshold THRESH
+*>          NFAIL(2) = No. of times residual CTRSYL3 exceeds threshold THRESH
+*>          NFAIL(3) = No. of times CTRSYL3 and CTRSYL deviate
+*> \endverbatim
+*>
+*> \param[out] RMAX
+*> \verbatim
+*>          RMAX is DOUBLE PRECISION array, dimension (2)
+*>          RMAX(1) = Value of the largest test ratio of CTRSYL
+*>          RMAX(2) = Value of the largest test ratio of CTRSYL3
+*> \endverbatim
+*>
+*> \param[out] NINFO
+*> \verbatim
+*>          NINFO is INTEGER array, dimension (2)
+*>          NINFO(1) = No. of times CTRSYL where INFO is nonzero
+*>          NINFO(2) = No. of times CTRSYL3 where INFO is nonzero
+*> \endverbatim
+*>
+*> \param[out] KNT
+*> \verbatim
+*>          KNT is INTEGER
+*>          Total number of examples tested.
+*> \endverbatim
+
+*
+*  -- LAPACK test routine --
+      SUBROUTINE CSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
+      IMPLICIT NONE
+*
+*  -- LAPACK test routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      INTEGER            KNT
+      REAL               THRESH
+*     ..
+*     .. Array Arguments ..
+      INTEGER            NFAIL( 3 ), NINFO( 2 )
+      REAL               RMAX( 2 )
+*     ..
+*
+*  =====================================================================
+*     ..
+*     .. Parameters ..
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               ONE, ZERO
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      INTEGER            MAXM, MAXN, LDSWORK
+      PARAMETER          ( MAXM = 101, MAXN = 138, LDSWORK = 18 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
+     $                   KUA, KLB, KUB, M, N
+      REAL               ANRM, BNRM, BIGNUM, EPS, RES, RES1,
+     $                   SCALE, SCALE3, SMLNUM, TNRM, XNRM
+      COMPLEX            RMUL
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            A( MAXM, MAXM ), B( MAXN, MAXN ),
+     $                   C( MAXM, MAXN ), CC( MAXM, MAXN ),
+     $                   X( MAXM, MAXN ),
+     $                   DUML( MAXM ), DUMR( MAXN ),
+     $                   D( MIN( MAXM, MAXN ) )
+      REAL               SWORK( LDSWORK, 54 ), DUM( MAXN ), VM( 2 )
+      INTEGER            ISEED( 4 ), IWORK( MAXM + MAXN + 2 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            SISNAN
+      REAL               SLAMCH, CLANGE
+      EXTERNAL           SISNAN, SLAMCH, CLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLATMR, CLACPY, CGEMM, CTRSYL, CTRSYL3
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, REAL, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Get machine parameters
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Expect INFO = 0
+      VM( 1 ) = ONE
+*     Expect INFO = 1
+      VM( 2 ) = 0.5E+0
+*
+*     Begin test loop
+*
+      NINFO( 1 ) = 0
+      NINFO( 2 ) = 0
+      NFAIL( 1 ) = 0
+      NFAIL( 2 ) = 0
+      NFAIL( 3 ) = 0
+      RMAX( 1 ) = ZERO
+      RMAX( 2 ) = ZERO
+      KNT = 0
+      ISEED( 1 ) = 1
+      ISEED( 2 ) = 1
+      ISEED( 3 ) = 1
+      ISEED( 4 ) = 1
+      SCALE = ONE
+      SCALE3 = ONE
+      DO J = 1, 2
+         DO ISGN = -1, 1, 2
+*           Reset seed (overwritten by LATMR)
+            ISEED( 1 ) = 1
+            ISEED( 2 ) = 1
+            ISEED( 3 ) = 1
+            ISEED( 4 ) = 1
+            DO M = 32, MAXM, 23
+               KLA = 0
+               KUA = M - 1
+               CALL CLATMR( M, M, 'S', ISEED, 'N', D,
+     $                      6, ONE, CONE, 'T', 'N',
+     $                      DUML, 1, ONE, DUMR, 1, ONE,
+     $                      'N', IWORK, KLA, KUA, ZERO,
+     $                      ONE, 'NO', A, MAXM, IWORK,
+     $                      IINFO )
+               DO I = 1, M
+                  A( I, I ) = A( I, I ) * VM( J )
+               END DO
+               ANRM = CLANGE( 'M', M, M, A, MAXM, DUM )
+               DO N = 51, MAXN, 29
+                  KLB = 0
+                  KUB = N - 1
+                  CALL CLATMR( N, N, 'S', ISEED, 'N', D,
+     $                         6, ONE, CONE, 'T', 'N',
+     $                         DUML, 1, ONE, DUMR, 1, ONE,
+     $                         'N', IWORK, KLB, KUB, ZERO,
+     $                         ONE, 'NO', B, MAXN, IWORK,
+     $                         IINFO )
+                  DO I = 1, N
+                     B( I, I ) = B( I, I ) * VM ( J )
+                  END DO
+                  BNRM = CLANGE( 'M', N, N, B, MAXN, DUM )
+                  TNRM = MAX( ANRM, BNRM )
+                  CALL CLATMR( M, N, 'S', ISEED, 'N', D,
+     $                         6, ONE, CONE, 'T', 'N',
+     $                         DUML, 1, ONE, DUMR, 1, ONE,
+     $                         'N', IWORK, M, N, ZERO, ONE,
+     $                         'NO', C, MAXM, IWORK, IINFO )
+                  DO ITRANA = 1, 2
+                     IF( ITRANA.EQ.1 )
+     $                   TRANA = 'N'
+                     IF( ITRANA.EQ.2 )
+     $                   TRANA = 'C'
+                     DO ITRANB = 1, 2
+                        IF( ITRANB.EQ.1 )
+     $                     TRANB = 'N'
+                        IF( ITRANB.EQ.2 )
+     $                     TRANB = 'C'
+                        KNT = KNT + 1
+*
+                        CALL CLACPY( 'All', M, N, C, MAXM, X, MAXM)
+                        CALL CLACPY( 'All', M, N, C, MAXM, CC, MAXM)
+                        CALL CTRSYL( TRANA, TRANB, ISGN, M, N, 
+     $                               A, MAXM, B, MAXN, X, MAXM,
+     $                               SCALE, IINFO )
+                        IF( IINFO.NE.0 )
+     $                     NINFO( 1 ) = NINFO( 1 ) + 1
+                        XNRM = CLANGE( 'M', M, N, X, MAXM, DUM )
+                        RMUL = CONE
+                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
+                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
+                              RMUL = CONE / MAX( XNRM, TNRM )
+                           END IF
+                        END IF
+                        CALL CGEMM( TRANA, 'N', M, N, M, RMUL,
+     $                              A, MAXM, X, MAXM, -SCALE*RMUL,
+     $                              CC, MAXM )
+                        CALL CGEMM( 'N', TRANB, M, N, N,
+     $                              REAL( ISGN )*RMUL, X, MAXM, B,
+     $                              MAXN, CONE, CC, MAXM )
+                        RES1 = CLANGE( 'M', M, N, CC, MAXM, DUM )
+                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
+     $                        ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
+                        IF( RES.GT.THRESH )
+     $                     NFAIL( 1 ) = NFAIL( 1 ) + 1
+                        IF( RES.GT.RMAX( 1 ) )
+     $                     RMAX( 1 ) = RES
+*
+                        CALL CLACPY( 'All', M, N, C, MAXM, X, MAXM )
+                        CALL CLACPY( 'All', M, N, C, MAXM, CC, MAXM )
+                        CALL CTRSYL3( TRANA, TRANB, ISGN, M, N,
+     $                                A, MAXM, B, MAXN, X, MAXM,
+     $                                SCALE3, SWORK, LDSWORK, INFO)
+                        IF( INFO.NE.0 )
+     $                     NINFO( 2 ) = NINFO( 2 ) + 1
+                        XNRM = CLANGE( 'M', M, N, X, MAXM, DUM )
+                        RMUL = CONE
+                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
+                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
+                              RMUL = CONE / MAX( XNRM, TNRM )
+                           END IF
+                        END IF
+                        CALL CGEMM( TRANA, 'N', M, N, M, RMUL,
+     $                              A, MAXM, X, MAXM, -SCALE3*RMUL,
+     $                              CC, MAXM )
+                        CALL CGEMM( 'N', TRANB, M, N, N,
+     $                              REAL( ISGN )*RMUL, X, MAXM, B,
+     $                              MAXN, CONE, CC, MAXM )
+                        RES1 = CLANGE( 'M', M, N, CC, MAXM, DUM )
+                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
+     $                             ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
+*                       Verify that TRSYL3 only flushes if TRSYL flushes (but
+*                       there may be cases where TRSYL3 avoid flushing).
+                        IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR. 
+     $                      IINFO.NE.INFO ) THEN
+                           NFAIL( 3 ) = NFAIL( 3 ) + 1
+                        END IF
+                        IF( RES.GT.THRESH .OR. SISNAN( RES ) )
+     $                     NFAIL( 2 ) = NFAIL( 2 ) + 1
+                        IF( RES.GT.RMAX( 2 ) )
+     $                     RMAX( 2 ) = RES
+                     END DO
+                  END DO
+               END DO
+            END DO
+         END DO
+      END DO
+*
+      RETURN
+*
+*     End of CSYL01
+*
+      END
diff --git a/lapack-netlib/TESTING/EIG/dchkec.f b/lapack-netlib/TESTING/EIG/dchkec.f
index 854961884..c4451a627 100644
--- a/lapack-netlib/TESTING/EIG/dchkec.f
+++ b/lapack-netlib/TESTING/EIG/dchkec.f
@@ -90,21 +90,23 @@
       LOGICAL            OK
       CHARACTER*3        PATH
       INTEGER            KLAEXC, KLALN2, KLANV2, KLAQTR, KLASY2, KTREXC,
-     $                   KTRSEN, KTRSNA, KTRSYL, LLAEXC, LLALN2, LLANV2,
-     $                   LLAQTR, LLASY2, LTREXC, LTRSYL, NLANV2, NLAQTR,
-     $                   NLASY2, NTESTS, NTRSYL, KTGEXC, NTGEXC, LTGEXC
+     $                   KTRSEN, KTRSNA, KTRSYL, KTRSYL3, LLAEXC,
+     $                   LLALN2, LLANV2, LLAQTR, LLASY2, LTREXC, LTRSYL,
+     $                   NLANV2, NLAQTR, NLASY2, NTESTS, NTRSYL, KTGEXC,
+     $                   LTGEXC
       DOUBLE PRECISION   EPS, RLAEXC, RLALN2, RLANV2, RLAQTR, RLASY2,
-     $                   RTREXC, RTRSYL, SFMIN, RTGEXC
+     $                   RTREXC, SFMIN, RTGEXC
 *     ..
 *     .. Local Arrays ..
-      INTEGER            LTRSEN( 3 ), LTRSNA( 3 ), NLAEXC( 2 ),
-     $                   NLALN2( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
+      INTEGER            FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
+     $                   LTRSNA( 3 ), NLAEXC( 2 ), NLALN2( 2 ),
+     $                   NTGEXC( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
      $                   NTRSNA( 3 )
-      DOUBLE PRECISION   RTRSEN( 3 ), RTRSNA( 3 )
+      DOUBLE PRECISION   RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           DERREC, DGET31, DGET32, DGET33, DGET34, DGET35,
-     $                   DGET36, DGET37, DGET38, DGET39, DGET40
+     $                   DGET36, DGET37, DGET38, DGET39, DGET40, DSYL01
 *     ..
 *     .. External Functions ..
       DOUBLE PRECISION   DLAMCH
@@ -153,10 +155,24 @@
          WRITE( NOUT, FMT = 9996 )RLAEXC, LLAEXC, NLAEXC, KLAEXC
       END IF
 *
-      CALL DGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL )
-      IF( RTRSYL.GT.THRESH ) THEN
+      CALL DGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL )
+      IF( RTRSYL( 1 ).GT.THRESH ) THEN
          OK = .FALSE.
-         WRITE( NOUT, FMT = 9995 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
+         WRITE( NOUT, FMT = 9995 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
+      END IF
+*
+      CALL DSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
+      IF( FTRSYL( 1 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
+      END IF
+      IF( FTRSYL( 2 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
+      END IF
+      IF( FTRSYL( 3 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
       END IF
 *
       CALL DGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@@ -227,7 +243,13 @@
  9987 FORMAT( ' Routines pass computational tests if test ratio is les',
      $      's than', F8.2, / / )
  9986 FORMAT( ' Error in DTGEXC: RMAX =', D12.3, / ' LMAX = ', I8, ' N',
-     $      'INFO=', I8, ' KNT=', I8 )
+     $      'INFO=', 2I8, ' KNT=', I8 )
+ 9972 FORMAT( 'DTRSYL and DTRSYL3 compute an inconsistent result ',
+     $      'factor in ', I8, ' tests.')
+ 9971 FORMAT( 'Error in DTRSYL3: ', I8, ' tests fail the threshold.', /
+     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
+ 9970 FORMAT( 'Error in DTRSYL: ', I8, ' tests fail the threshold.', /
+     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 *
 *     End of DCHKEC
 *
diff --git a/lapack-netlib/TESTING/EIG/derrec.f b/lapack-netlib/TESTING/EIG/derrec.f
index d5863ad42..f11f48887 100644
--- a/lapack-netlib/TESTING/EIG/derrec.f
+++ b/lapack-netlib/TESTING/EIG/derrec.f
@@ -23,7 +23,7 @@
 *>
 *> DERREC tests the error exits for the routines for eigen- condition
 *> estimation for DOUBLE PRECISION matrices:
-*>    DTRSYL, DTREXC, DTRSNA and DTRSEN.
+*>    DTRSYL, DTRSYL3, DTREXC, DTRSNA and DTRSEN.
 *> \endverbatim
 *
 *  Arguments:
@@ -82,7 +82,7 @@
      $                   WI( NMAX ), WORK( NMAX ), WR( NMAX )
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           CHKXER, DTREXC, DTRSEN, DTRSNA, DTRSYL
+      EXTERNAL           CHKXER, DTREXC, DTRSEN, DTRSNA, DTRSYL, DTRSYL3
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -141,6 +141,43 @@
       CALL CHKXER( 'DTRSYL', INFOT, NOUT, LERR, OK )
       NT = NT + 8
 *
+*     Test DTRSYL3
+*
+      SRNAMT = 'DTRSYL3'
+      INFOT = 1
+      CALL DTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 2
+      CALL DTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 3
+      CALL DTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 4
+      CALL DTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 5
+      CALL DTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 7
+      CALL DTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 9
+      CALL DTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 11
+      CALL DTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
+      NT = NT + 8
+*
 *     Test DTREXC
 *
       SRNAMT = 'DTREXC'
diff --git a/lapack-netlib/TESTING/EIG/dsyl01.f b/lapack-netlib/TESTING/EIG/dsyl01.f
new file mode 100644
index 000000000..782d2cd42
--- /dev/null
+++ b/lapack-netlib/TESTING/EIG/dsyl01.f
@@ -0,0 +1,288 @@
+*> \brief \b DSYL01
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            KNT
+*       DOUBLE PRECISION   THRESH
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            NFAIL( 3 ), NINFO( 2 )
+*       DOUBLE PRECISION   RMAX( 2 )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DSYL01 tests DTRSYL and DTRSYL3, routines for solving the Sylvester matrix
+*> equation
+*>
+*>    op(A)*X + ISGN*X*op(B) = scale*C,
+*>
+*> A and B are assumed to be in Schur canonical form, op() represents an
+*> optional transpose, and ISGN can be -1 or +1.  Scale is an output
+*> less than or equal to 1, chosen to avoid overflow in X.
+*>
+*> The test code verifies that the following residual does not exceed
+*> the provided threshold:
+*>
+*>    norm(op(A)*X + ISGN*X*op(B) - scale*C) /
+*>        (EPS*max(norm(A),norm(B))*norm(X))
+*>
+*> This routine complements DGET35 by testing with larger,
+*> random matrices, of which some require rescaling of X to avoid overflow.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] THRESH
+*> \verbatim
+*>          THRESH is DOUBLE PRECISION
+*>          A test will count as "failed" if the residual, computed as
+*>          described above, exceeds THRESH.
+*> \endverbatim
+*>
+*> \param[out] NFAIL
+*> \verbatim
+*>          NFAIL is INTEGER array, dimension (3)
+*>          NFAIL(1) = No. of times residual DTRSYL exceeds threshold THRESH
+*>          NFAIL(2) = No. of times residual DTRSYL3 exceeds threshold THRESH
+*>          NFAIL(3) = No. of times DTRSYL3 and DTRSYL deviate
+*> \endverbatim
+*>
+*> \param[out] RMAX
+*> \verbatim
+*>          RMAX is DOUBLE PRECISION, dimension (2)
+*>          RMAX(1) = Value of the largest test ratio of DTRSYL
+*>          RMAX(2) = Value of the largest test ratio of DTRSYL3
+*> \endverbatim
+*>
+*> \param[out] NINFO
+*> \verbatim
+*>          NINFO is INTEGER array, dimension (2)
+*>          NINFO(1) = No. of times DTRSYL returns an expected INFO
+*>          NINFO(2) = No. of times DTRSYL3 returns an expected INFO
+*> \endverbatim
+*>
+*> \param[out] KNT
+*> \verbatim
+*>          KNT is INTEGER
+*>          Total number of examples tested.
+*> \endverbatim
+
+*
+*  -- LAPACK test routine --
+      SUBROUTINE DSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
+      IMPLICIT NONE
+*
+*  -- LAPACK test routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      INTEGER            KNT
+      DOUBLE PRECISION   THRESH
+*     ..
+*     .. Array Arguments ..
+      INTEGER            NFAIL( 3 ), NINFO( 2 )
+      DOUBLE PRECISION   RMAX( 2 )
+*     ..
+*
+*  =====================================================================
+*     ..
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      INTEGER            MAXM, MAXN, LDSWORK
+      PARAMETER          ( MAXM = 245, MAXN = 192, LDSWORK = 36 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
+     $                   KUA, KLB, KUB, LIWORK, M, N
+      DOUBLE PRECISION   ANRM, BNRM, BIGNUM, EPS, RES, RES1, RMUL,
+     $                   SCALE, SCALE3, SMLNUM, TNRM, XNRM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   A( MAXM, MAXM ), B( MAXN, MAXN ),
+     $                   C( MAXM, MAXN ), CC( MAXM, MAXN ),
+     $                   X( MAXM, MAXN ),
+     $                   DUML( MAXM ), DUMR( MAXN ),
+     $                   D( MAX( MAXM, MAXN ) ), DUM( MAXN ),
+     $                   SWORK( LDSWORK, 126 ), VM( 2 )
+      INTEGER            ISEED( 4 ), IWORK( MAXM + MAXN + 2 ), IDUM( 2 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            DISNAN
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLATMR, DLACPY, DGEMM, DTRSYL, DTRSYL3
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Get machine parameters
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+      VM( 1 ) = ONE
+      VM( 2 ) = 0.000001D+0
+*
+*     Begin test loop
+*
+      NINFO( 1 ) = 0
+      NINFO( 2 ) = 0
+      NFAIL( 1 ) = 0
+      NFAIL( 2 ) = 0
+      NFAIL( 3 ) = 0
+      RMAX( 1 ) = ZERO
+      RMAX( 2 ) = ZERO
+      KNT = 0
+      DO I = 1, 4
+         ISEED( I ) = 1
+      END DO
+      SCALE = ONE
+      SCALE3 = ONE
+      LIWORK = MAXM + MAXN + 2
+      DO J = 1, 2
+         DO ISGN = -1, 1, 2
+*           Reset seed (overwritten by LATMR)
+            DO I = 1, 4
+               ISEED( I ) = 1
+            END DO
+            DO M = 32, MAXM, 71
+               KLA = 0
+               KUA = M - 1
+               CALL DLATMR( M, M, 'S', ISEED, 'N', D,
+     $                      6, ONE, ONE, 'T', 'N',
+     $                      DUML, 1, ONE, DUMR, 1, ONE,
+     $                      'N', IWORK, KLA, KUA, ZERO,
+     $                      ONE, 'NO', A, MAXM, IWORK, IINFO )
+               DO I = 1, M
+                  A( I, I ) = A( I, I ) * VM( J )
+               END DO
+               ANRM = DLANGE( 'M', M, M, A, MAXM, DUM )
+               DO N = 51, MAXN, 47
+                  KLB = 0
+                  KUB = N - 1
+                  CALL DLATMR( N, N, 'S', ISEED, 'N', D,
+     $                         6, ONE, ONE, 'T', 'N',
+     $                         DUML, 1, ONE, DUMR, 1, ONE,
+     $                         'N', IWORK, KLB, KUB, ZERO,
+     $                         ONE, 'NO', B, MAXN, IWORK, IINFO )
+                  BNRM = DLANGE( 'M', N, N, B, MAXN, DUM )
+                  TNRM = MAX( ANRM, BNRM )
+                  CALL DLATMR( M, N, 'S', ISEED, 'N', D,
+     $                         6, ONE, ONE, 'T', 'N',
+     $                         DUML, 1, ONE, DUMR, 1, ONE,
+     $                         'N', IWORK, M, N, ZERO, ONE,
+     $                         'NO', C, MAXM, IWORK, IINFO )
+                  DO ITRANA = 1, 2
+                     IF( ITRANA.EQ.1 ) THEN
+                        TRANA = 'N'
+                     END IF
+                     IF( ITRANA.EQ.2 ) THEN
+                        TRANA = 'T'
+                     END IF
+                     DO ITRANB = 1, 2
+                        IF( ITRANB.EQ.1 ) THEN
+                           TRANB = 'N'
+                        END IF
+                        IF( ITRANB.EQ.2 ) THEN
+                           TRANB = 'T'
+                        END IF
+                        KNT = KNT + 1
+*
+                        CALL DLACPY( 'All', M, N, C, MAXM, X, MAXM)
+                        CALL DLACPY( 'All', M, N, C, MAXM, CC, MAXM)
+                        CALL DTRSYL( TRANA, TRANB, ISGN, M, N, 
+     $                               A, MAXM, B, MAXN, X, MAXM,
+     $                               SCALE, IINFO )
+                        IF( IINFO.NE.0 )
+     $                     NINFO( 1 ) = NINFO( 1 ) + 1
+                        XNRM = DLANGE( 'M', M, N, X, MAXM, DUM )
+                        RMUL = ONE
+                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
+                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
+                              RMUL = ONE / MAX( XNRM, TNRM )
+                           END IF
+                        END IF
+                        CALL DGEMM( TRANA, 'N', M, N, M, RMUL,
+     $                              A, MAXM, X, MAXM, -SCALE*RMUL,
+     $                              CC, MAXM )
+                        CALL DGEMM( 'N', TRANB, M, N, N,
+     $                               DBLE( ISGN )*RMUL, X, MAXM, B,
+     $                               MAXN, ONE, CC, MAXM )
+                        RES1 = DLANGE( 'M', M, N, CC, MAXM, DUM )
+                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
+     $                              ( ( RMUL*TNRM )*EPS )*XNRM )
+                        IF( RES.GT.THRESH )
+     $                     NFAIL( 1 ) = NFAIL( 1 ) + 1
+                        IF( RES.GT.RMAX( 1 ) )
+     $                     RMAX( 1 ) = RES
+*
+                        CALL DLACPY( 'All', M, N, C, MAXM, X, MAXM )
+                        CALL DLACPY( 'All', M, N, C, MAXM, CC, MAXM )
+                        CALL DTRSYL3( TRANA, TRANB, ISGN, M, N,
+     $                                A, MAXM, B, MAXN, X, MAXM,
+     $                                SCALE3, IWORK, LIWORK,
+     $                                SWORK, LDSWORK, INFO)
+                        IF( INFO.NE.0 )
+     $                     NINFO( 2 ) = NINFO( 2 ) + 1
+                        XNRM = DLANGE( 'M', M, N, X, MAXM, DUM )
+                        RMUL = ONE
+                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
+                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
+                              RMUL = ONE / MAX( XNRM, TNRM )
+                           END IF
+                        END IF
+                        CALL DGEMM( TRANA, 'N', M, N, M, RMUL,
+     $                              A, MAXM, X, MAXM, -SCALE3*RMUL,
+     $                              CC, MAXM )
+                        CALL DGEMM( 'N', TRANB, M, N, N,
+     $                              DBLE( ISGN )*RMUL, X, MAXM, B,
+     $                              MAXN, ONE, CC, MAXM )
+                        RES1 = DLANGE( 'M', M, N, CC, MAXM, DUM )
+                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
+     $                             ( ( RMUL*TNRM )*EPS )*XNRM )
+*                       Verify that TRSYL3 only flushes if TRSYL flushes (but
+*                       there may be cases where TRSYL3 avoid flushing).
+                        IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR. 
+     $                      IINFO.NE.INFO ) THEN
+                           NFAIL( 3 ) = NFAIL( 3 ) + 1
+                        END IF
+                        IF( RES.GT.THRESH .OR. DISNAN( RES ) )
+     $                     NFAIL( 2 ) = NFAIL( 2 ) + 1
+                        IF( RES.GT.RMAX( 2 ) )
+     $                     RMAX( 2 ) = RES
+                     END DO
+                  END DO
+               END DO
+            END DO
+         END DO
+      END DO
+*
+      RETURN
+*
+*     End of DSYL01
+*
+      END
diff --git a/lapack-netlib/TESTING/EIG/schkec.f b/lapack-netlib/TESTING/EIG/schkec.f
index e6123e1ad..59abb2466 100644
--- a/lapack-netlib/TESTING/EIG/schkec.f
+++ b/lapack-netlib/TESTING/EIG/schkec.f
@@ -90,21 +90,23 @@
       LOGICAL            OK
       CHARACTER*3        PATH
       INTEGER            KLAEXC, KLALN2, KLANV2, KLAQTR, KLASY2, KTREXC,
-     $                   KTRSEN, KTRSNA, KTRSYL, LLAEXC, LLALN2, LLANV2,
-     $                   LLAQTR, LLASY2, LTREXC, LTRSYL, NLANV2, NLAQTR,
-     $                   NLASY2, NTESTS, NTRSYL, KTGEXC, NTGEXC, LTGEXC
+     $                   KTRSEN, KTRSNA, KTRSYL, KTRSYL3, LLAEXC,
+     $                   LLALN2, LLANV2, LLAQTR, LLASY2, LTREXC, LTRSYL,
+     $                   NLANV2, NLAQTR, NLASY2, NTESTS, NTRSYL, KTGEXC,
+     $                   LTGEXC
       REAL               EPS, RLAEXC, RLALN2, RLANV2, RLAQTR, RLASY2,
-     $                   RTREXC, RTRSYL, SFMIN, RTGEXC
+     $                   RTREXC, SFMIN, RTGEXC
 *     ..
 *     .. Local Arrays ..
-      INTEGER            LTRSEN( 3 ), LTRSNA( 3 ), NLAEXC( 2 ),
-     $                   NLALN2( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
+      INTEGER            FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
+     $                   LTRSNA( 3 ), NLAEXC( 2 ), NLALN2( 2 ),
+     $                   NTGEXC( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
      $                   NTRSNA( 3 )
-      REAL               RTRSEN( 3 ), RTRSNA( 3 )
+      REAL               RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           SERREC, SGET31, SGET32, SGET33, SGET34, SGET35,
-     $                   SGET36, SGET37, SGET38, SGET39, SGET40
+     $                   SGET36, SGET37, SGET38, SGET39, SGET40, SSYL01
 *     ..
 *     .. External Functions ..
       REAL               SLAMCH
@@ -153,10 +155,24 @@
          WRITE( NOUT, FMT = 9996 )RLAEXC, LLAEXC, NLAEXC, KLAEXC
       END IF
 *
-      CALL SGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL )
-      IF( RTRSYL.GT.THRESH ) THEN
+      CALL SGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL )
+      IF( RTRSYL( 1 ).GT.THRESH ) THEN
          OK = .FALSE.
-         WRITE( NOUT, FMT = 9995 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
+         WRITE( NOUT, FMT = 9995 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
+      END IF
+*
+      CALL SSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
+      IF( FTRSYL( 1 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
+      END IF
+      IF( FTRSYL( 2 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
+      END IF
+      IF( FTRSYL( 3 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
       END IF
 *
       CALL SGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@@ -227,7 +243,13 @@
  9987 FORMAT( ' Routines pass computational tests if test ratio is les',
      $      's than', F8.2, / / )
  9986 FORMAT( ' Error in STGEXC: RMAX =', E12.3, / ' LMAX = ', I8, ' N',
-     $      'INFO=', I8, ' KNT=', I8 )
+     $      'INFO=', 2I8, ' KNT=', I8 )
+ 9972 FORMAT( 'STRSYL and STRSYL3 compute an inconsistent result ',
+     $      'factor in ', I8, ' tests.')
+ 9971 FORMAT( 'Error in STRSYL3: ', I8, ' tests fail the threshold.', /
+     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
+ 9970 FORMAT( 'Error in STRSYL: ', I8, ' tests fail the threshold.', /
+     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 *
 *     End of SCHKEC
 *
diff --git a/lapack-netlib/TESTING/EIG/serrec.f b/lapack-netlib/TESTING/EIG/serrec.f
index 249f0e642..9a7ceb362 100644
--- a/lapack-netlib/TESTING/EIG/serrec.f
+++ b/lapack-netlib/TESTING/EIG/serrec.f
@@ -23,7 +23,7 @@
 *>
 *> SERREC tests the error exits for the routines for eigen- condition
 *> estimation for REAL matrices:
-*>    STRSYL, STREXC, STRSNA and STRSEN.
+*>    STRSYL, STRSYL3, STREXC, STRSNA and STRSEN.
 *> \endverbatim
 *
 *  Arguments:
@@ -82,7 +82,7 @@
      $                   WI( NMAX ), WORK( NMAX ), WR( NMAX )
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           CHKXER, STREXC, STRSEN, STRSNA, STRSYL
+      EXTERNAL           CHKXER, STREXC, STRSEN, STRSNA, STRSYL, STRSYL3
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -141,6 +141,43 @@
       CALL CHKXER( 'STRSYL', INFOT, NOUT, LERR, OK )
       NT = NT + 8
 *
+*     Test STRSYL3
+*
+      SRNAMT = 'STRSYL3'
+      INFOT = 1
+      CALL STRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 2
+      CALL STRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 3
+      CALL STRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 4
+      CALL STRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 5
+      CALL STRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 7
+      CALL STRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 9
+      CALL STRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 11
+      CALL STRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
+     $              IWORK, NMAX, WORK, NMAX, INFO )
+      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
+      NT = NT + 8
+*
 *     Test STREXC
 *
       SRNAMT = 'STREXC'
diff --git a/lapack-netlib/TESTING/EIG/ssyl01.f b/lapack-netlib/TESTING/EIG/ssyl01.f
new file mode 100644
index 000000000..22d089dc8
--- /dev/null
+++ b/lapack-netlib/TESTING/EIG/ssyl01.f
@@ -0,0 +1,288 @@
+*> \brief \b SSYL01
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE SSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
+*
+*      .. Scalar Arguments ..
+*      INTEGER            KNT
+*      REAL               THRESH
+*      ..
+*      .. Array Arguments ..
+*      INTEGER            NFAIL( 3 ), NINFO( 2 )
+*      REAL               RMAX( 2 )
+*      ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> SSYL01 tests STRSYL and STRSYL3, routines for solving the Sylvester matrix
+*> equation
+*>
+*>    op(A)*X + ISGN*X*op(B) = scale*C,
+*>
+*> A and B are assumed to be in Schur canonical form, op() represents an
+*> optional transpose, and ISGN can be -1 or +1.  Scale is an output
+*> less than or equal to 1, chosen to avoid overflow in X.
+*>
+*> The test code verifies that the following residual does not exceed
+*> the provided threshold:
+*>
+*>    norm(op(A)*X + ISGN*X*op(B) - scale*C) /
+*>        (EPS*max(norm(A),norm(B))*norm(X))
+*>
+*> This routine complements SGET35 by testing with larger,
+*> random matrices, of which some require rescaling of X to avoid overflow.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] THRESH
+*> \verbatim
+*>          THRESH is REAL
+*>          A test will count as "failed" if the residual, computed as
+*>          described above, exceeds THRESH.
+*> \endverbatim
+*>
+*> \param[out] NFAIL
+*> \verbatim
+*>          NFAIL is INTEGER array, dimension (3)
+*>          NFAIL(1) = No. of times residual STRSYL exceeds threshold THRESH
+*>          NFAIL(2) = No. of times residual STRSYL3 exceeds threshold THRESH
+*>          NFAIL(3) = No. of times STRSYL3 and STRSYL deviate
+*> \endverbatim
+*>
+*> \param[out] RMAX
+*> \verbatim
+*>          RMAX is REAL, dimension (2)
+*>          RMAX(1) = Value of the largest test ratio of STRSYL
+*>          RMAX(2) = Value of the largest test ratio of STRSYL3
+*> \endverbatim
+*>
+*> \param[out] NINFO
+*> \verbatim
+*>          NINFO is INTEGER array, dimension (2)
+*>          NINFO(1) = No. of times STRSYL returns an expected INFO
+*>          NINFO(2) = No. of times STRSYL3 returns an expected INFO
+*> \endverbatim
+*>
+*> \param[out] KNT
+*> \verbatim
+*>          KNT is INTEGER
+*>          Total number of examples tested.
+*> \endverbatim
+
+*
+*  -- LAPACK test routine --
+      SUBROUTINE SSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
+      IMPLICIT NONE
+*
+*  -- LAPACK test routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      INTEGER            KNT
+      REAL               THRESH
+*     ..
+*     .. Array Arguments ..
+      INTEGER            NFAIL( 3 ), NINFO( 2 )
+      REAL               RMAX( 2 )
+*     ..
+*
+*  =====================================================================
+*     ..
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      INTEGER            MAXM, MAXN, LDSWORK
+      PARAMETER          ( MAXM = 101, MAXN = 138, LDSWORK = 18 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
+     $                   KUA, KLB, KUB, LIWORK, M, N
+      REAL               ANRM, BNRM, BIGNUM, EPS, RES, RES1, RMUL,
+     $                   SCALE, SCALE3, SMLNUM, TNRM, XNRM
+*     ..
+*     .. Local Arrays ..
+      REAL               A( MAXM, MAXM ), B( MAXN, MAXN ),
+     $                   C( MAXM, MAXN ), CC( MAXM, MAXN ),
+     $                   X( MAXM, MAXN ),
+     $                   DUML( MAXM ), DUMR( MAXN ),
+     $                   D( MAX( MAXM, MAXN ) ), DUM( MAXN ),
+     $                   SWORK( LDSWORK, 54 ), VM( 2 )
+      INTEGER            ISEED( 4 ), IWORK( MAXM + MAXN + 2 ), IDUM( 2 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            SISNAN
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           SISNAN, SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLATMR, SLACPY, SGEMM, STRSYL, STRSYL3
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, REAL, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Get machine parameters
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+      VM( 1 ) = ONE
+      VM( 2 ) = 0.05E+0
+*
+*     Begin test loop
+*
+      NINFO( 1 ) = 0
+      NINFO( 2 ) = 0
+      NFAIL( 1 ) = 0
+      NFAIL( 2 ) = 0
+      NFAIL( 3 ) = 0
+      RMAX( 1 ) = ZERO
+      RMAX( 2 ) = ZERO
+      KNT = 0
+      DO I = 1, 4
+         ISEED( I ) = 1
+      END DO
+      SCALE = ONE
+      SCALE3 = ONE
+      LIWORK = MAXM + MAXN + 2
+      DO J = 1, 2
+         DO ISGN = -1, 1, 2
+*           Reset seed (overwritten by LATMR)
+            DO I = 1, 4
+               ISEED( I ) = 1
+            END DO
+            DO M = 32, MAXM, 71
+               KLA = 0
+               KUA = M - 1
+               CALL SLATMR( M, M, 'S', ISEED, 'N', D,
+     $                      6, ONE, ONE, 'T', 'N',
+     $                      DUML, 1, ONE, DUMR, 1, ONE,
+     $                      'N', IWORK, KLA, KUA, ZERO,
+     $                      ONE, 'NO', A, MAXM, IWORK, IINFO )
+               DO I = 1, M
+                  A( I, I ) = A( I, I ) * VM( J )
+               END DO
+               ANRM = SLANGE( 'M', M, M, A, MAXM, DUM )
+               DO N = 51, MAXN, 47
+                  KLB = 0
+                  KUB = N - 1
+                  CALL SLATMR( N, N, 'S', ISEED, 'N', D,
+     $                         6, ONE, ONE, 'T', 'N',
+     $                         DUML, 1, ONE, DUMR, 1, ONE,
+     $                         'N', IWORK, KLB, KUB, ZERO,
+     $                         ONE, 'NO', B, MAXN, IWORK, IINFO )
+                  BNRM = SLANGE( 'M', N, N, B, MAXN, DUM )
+                  TNRM = MAX( ANRM, BNRM )
+                  CALL SLATMR( M, N, 'S', ISEED, 'N', D,
+     $                         6, ONE, ONE, 'T', 'N',
+     $                         DUML, 1, ONE, DUMR, 1, ONE,
+     $                         'N', IWORK, M, N, ZERO, ONE,
+     $                         'NO', C, MAXM, IWORK, IINFO )
+                  DO ITRANA = 1, 2
+                     IF( ITRANA.EQ.1 ) THEN
+                        TRANA = 'N'
+                     END IF
+                     IF( ITRANA.EQ.2 ) THEN
+                        TRANA = 'T'
+                     END IF
+                     DO ITRANB = 1, 2
+                        IF( ITRANB.EQ.1 ) THEN
+                           TRANB = 'N'
+                        END IF
+                        IF( ITRANB.EQ.2 ) THEN
+                           TRANB = 'T'
+                        END IF
+                        KNT = KNT + 1
+*
+                        CALL SLACPY( 'All', M, N, C, MAXM, X, MAXM)
+                        CALL SLACPY( 'All', M, N, C, MAXM, CC, MAXM)
+                        CALL STRSYL( TRANA, TRANB, ISGN, M, N, 
+     $                               A, MAXM, B, MAXN, X, MAXM,
+     $                               SCALE, IINFO )
+                        IF( IINFO.NE.0 )
+     $                     NINFO( 1 ) = NINFO( 1 ) + 1
+                        XNRM = SLANGE( 'M', M, N, X, MAXM, DUM )
+                        RMUL = ONE
+                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
+                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
+                              RMUL = ONE / MAX( XNRM, TNRM )
+                           END IF
+                        END IF
+                        CALL SGEMM( TRANA, 'N', M, N, M, RMUL,
+     $                              A, MAXM, X, MAXM, -SCALE*RMUL,
+     $                              C, MAXM )
+                        CALL SGEMM( 'N', TRANB, M, N, N,
+     $                               REAL( ISGN )*RMUL, X, MAXM, B,
+     $                               MAXN, ONE, C, MAXM )
+                        RES1 = SLANGE( 'M', M, N, C, MAXM, DUM )
+                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
+     $                              ( ( RMUL*TNRM )*EPS )*XNRM )
+                        IF( RES.GT.THRESH )
+     $                     NFAIL( 1 ) = NFAIL( 1 ) + 1
+                        IF( RES.GT.RMAX( 1 ) )
+     $                     RMAX( 1 ) = RES
+*
+                        CALL SLACPY( 'All', M, N, C, MAXM, X, MAXM )
+                        CALL SLACPY( 'All', M, N, C, MAXM, CC, MAXM )
+                        CALL STRSYL3( TRANA, TRANB, ISGN, M, N,
+     $                                A, MAXM, B, MAXN, X, MAXM,
+     $                                SCALE3, IWORK, LIWORK,
+     $                                SWORK, LDSWORK, INFO)
+                        IF( INFO.NE.0 )
+     $                     NINFO( 2 ) = NINFO( 2 ) + 1
+                        XNRM = SLANGE( 'M', M, N, X, MAXM, DUM )
+                        RMUL = ONE
+                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
+                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
+                              RMUL = ONE / MAX( XNRM, TNRM )
+                           END IF
+                        END IF
+                        CALL SGEMM( TRANA, 'N', M, N, M, RMUL,
+     $                              A, MAXM, X, MAXM, -SCALE3*RMUL,
+     $                              CC, MAXM )
+                        CALL SGEMM( 'N', TRANB, M, N, N,
+     $                              REAL( ISGN )*RMUL, X, MAXM, B,
+     $                              MAXN, ONE, CC, MAXM )
+                        RES1 = SLANGE( 'M', M, N, CC, MAXM, DUM )
+                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
+     $                             ( ( RMUL*TNRM )*EPS )*XNRM )
+*                       Verify that TRSYL3 only flushes if TRSYL flushes (but
+*                       there may be cases where TRSYL3 avoid flushing).
+                        IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR. 
+     $                      IINFO.NE.INFO ) THEN
+                           NFAIL( 3 ) = NFAIL( 3 ) + 1
+                        END IF
+                        IF( RES.GT.THRESH .OR. SISNAN( RES ) )
+     $                     NFAIL( 2 ) = NFAIL( 2 ) + 1
+                        IF( RES.GT.RMAX( 2 ) )
+     $                     RMAX( 2 ) = RES
+                     END DO
+                  END DO
+               END DO
+            END DO
+         END DO
+      END DO
+*
+      RETURN
+*
+*     End of SSYL01
+*
+      END
diff --git a/lapack-netlib/TESTING/EIG/zchkec.f b/lapack-netlib/TESTING/EIG/zchkec.f
index 1e1c29e0d..62a76d357 100644
--- a/lapack-netlib/TESTING/EIG/zchkec.f
+++ b/lapack-netlib/TESTING/EIG/zchkec.f
@@ -88,17 +88,17 @@
 *     .. Local Scalars ..
       LOGICAL            OK
       CHARACTER*3        PATH
-      INTEGER            KTREXC, KTRSEN, KTRSNA, KTRSYL, LTREXC, LTRSYL,
-     $                   NTESTS, NTREXC, NTRSYL
-      DOUBLE PRECISION   EPS, RTREXC, RTRSYL, SFMIN
+      INTEGER            KTREXC, KTRSEN, KTRSNA, KTRSYL, KTRSYL3,
+     $                   LTREXC, LTRSYL, NTESTS, NTREXC, NTRSYL
+      DOUBLE PRECISION   EPS, RTREXC, SFMIN
 *     ..
 *     .. Local Arrays ..
-      INTEGER            LTRSEN( 3 ), LTRSNA( 3 ), NTRSEN( 3 ),
-     $                   NTRSNA( 3 )
-      DOUBLE PRECISION   RTRSEN( 3 ), RTRSNA( 3 )
+      INTEGER            FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
+     $                   LTRSNA( 3 ), NTRSEN( 3 ), NTRSNA( 3 )
+      DOUBLE PRECISION   RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           ZERREC, ZGET35, ZGET36, ZGET37, ZGET38
+      EXTERNAL           ZERREC, ZGET35, ZGET36, ZGET37, ZGET38, ZSYL01
 *     ..
 *     .. External Functions ..
       DOUBLE PRECISION   DLAMCH
@@ -120,10 +120,24 @@
      $   CALL ZERREC( PATH, NOUT )
 *
       OK = .TRUE.
-      CALL ZGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL, NIN )
-      IF( RTRSYL.GT.THRESH ) THEN
+      CALL ZGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL, NIN )
+      IF( RTRSYL( 1 ).GT.THRESH ) THEN
          OK = .FALSE.
-         WRITE( NOUT, FMT = 9999 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
+         WRITE( NOUT, FMT = 9999 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
+      END IF
+*
+      CALL ZSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
+      IF( FTRSYL( 1 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
+      END IF
+      IF( FTRSYL( 2 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
+      END IF
+      IF( FTRSYL( 3 ).GT.0 ) THEN
+         OK = .FALSE.
+         WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
       END IF
 *
       CALL ZGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@@ -148,7 +162,7 @@
          WRITE( NOUT, FMT = 9996 )RTRSEN, LTRSEN, NTRSEN, KTRSEN
       END IF
 *
-      NTESTS = KTRSYL + KTREXC + KTRSNA + KTRSEN
+      NTESTS = KTRSYL + KTRSYL3 + KTREXC + KTRSNA + KTRSEN
       IF( OK )
      $   WRITE( NOUT, FMT = 9995 )PATH, NTESTS
 *
@@ -169,6 +183,12 @@
      $      / ' Safe minimum (SFMIN)             = ', D16.6, / )
  9992 FORMAT( ' Routines pass computational tests if test ratio is ',
      $      'less than', F8.2, / / )
+ 9970 FORMAT( 'Error in ZTRSYL: ', I8, ' tests fail the threshold.', /
+     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
+ 9971 FORMAT( 'Error in ZTRSYL3: ', I8, ' tests fail the threshold.', /
+     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
+ 9972 FORMAT( 'ZTRSYL and ZTRSYL3 compute an inconsistent scale ',
+     $      'factor in ', I8, ' tests.')
       RETURN
 *
 *     End of ZCHKEC
diff --git a/lapack-netlib/TESTING/EIG/zerrec.f b/lapack-netlib/TESTING/EIG/zerrec.f
index dc6129da9..e1938f57d 100644
--- a/lapack-netlib/TESTING/EIG/zerrec.f
+++ b/lapack-netlib/TESTING/EIG/zerrec.f
@@ -23,7 +23,7 @@
 *>
 *> ZERREC tests the error exits for the routines for eigen- condition
 *> estimation for DOUBLE PRECISION matrices:
-*>    ZTRSYL, ZTREXC, ZTRSNA and ZTRSEN.
+*>    ZTRSYL, ZTRSYL3, ZTREXC, ZTRSNA and ZTRSEN.
 *> \endverbatim
 *
 *  Arguments:
@@ -77,7 +77,7 @@
 *     ..
 *     .. Local Arrays ..
       LOGICAL            SEL( NMAX )
-      DOUBLE PRECISION   RW( LW ), S( NMAX ), SEP( NMAX )
+      DOUBLE PRECISION   RW( LW ), S( NMAX ), SEP( NMAX ), SWORK( NMAX )
       COMPLEX*16         A( NMAX, NMAX ), B( NMAX, NMAX ),
      $                   C( NMAX, NMAX ), WORK( LW ), X( NMAX )
 *     ..
@@ -141,6 +141,43 @@
       CALL CHKXER( 'ZTRSYL', INFOT, NOUT, LERR, OK )
       NT = NT + 8
 *
+*     Test ZTRSYL3
+*
+      SRNAMT = 'ZTRSYL3'
+      INFOT = 1
+      CALL ZTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 2
+      CALL ZTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 3
+      CALL ZTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 4
+      CALL ZTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 5
+      CALL ZTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 7
+      CALL ZTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 9
+      CALL ZTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
+      INFOT = 11
+      CALL ZTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
+     $              SWORK, NMAX, INFO )
+      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
+      NT = NT + 8
+*
 *     Test ZTREXC
 *
       SRNAMT = 'ZTREXC'
diff --git a/lapack-netlib/TESTING/EIG/zsyl01.f b/lapack-netlib/TESTING/EIG/zsyl01.f
new file mode 100644
index 000000000..1e8619a34
--- /dev/null
+++ b/lapack-netlib/TESTING/EIG/zsyl01.f
@@ -0,0 +1,294 @@
+*> \brief \b ZSYL01
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE ZSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
+*
+*     .. Scalar Arguments ..
+*     INTEGER            KNT
+*     DOUBLE PRECISION   THRESH
+*     ..
+*     .. Array Arguments ..
+*     INTEGER            NFAIL( 3 ), NINFO( 2 )
+*     DOUBLE PRECISION   RMAX( 2 )
+*     ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> ZSYL01 tests ZTRSYL and ZTRSYL3, routines for solving the Sylvester matrix
+*> equation
+*>
+*>    op(A)*X + ISGN*X*op(B) = scale*C,
+*>
+*> where op(A) and op(B) are both upper triangular form, op() represents an
+*> optional conjugate transpose, and ISGN can be -1 or +1. Scale is an output
+*> less than or equal to 1, chosen to avoid overflow in X.
+*>
+*> The test code verifies that the following residual does not exceed
+*> the provided threshold:
+*>
+*>    norm(op(A)*X + ISGN*X*op(B) - scale*C) /
+*>        (EPS*max(norm(A),norm(B))*norm(X))
+*>
+*> This routine complements ZGET35 by testing with larger,
+*> random matrices, of which some require rescaling of X to avoid overflow.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] THRESH
+*> \verbatim
+*>          THRESH is DOUBLE PRECISION
+*>          A test will count as "failed" if the residual, computed as
+*>          described above, exceeds THRESH.
+*> \endverbatim
+*>
+*> \param[out] NFAIL
+*> \verbatim
+*>          NFAIL is INTEGER array, dimension (3)
+*>          NFAIL(1) = No. of times residual ZTRSYL exceeds threshold THRESH
+*>          NFAIL(2) = No. of times residual ZTRSYL3 exceeds threshold THRESH
+*>          NFAIL(3) = No. of times ZTRSYL3 and ZTRSYL deviate
+*> \endverbatim
+*>
+*> \param[out] RMAX
+*> \verbatim
+*>          RMAX is DOUBLE PRECISION array, dimension (2)
+*>          RMAX(1) = Value of the largest test ratio of ZTRSYL
+*>          RMAX(2) = Value of the largest test ratio of ZTRSYL3
+*> \endverbatim
+*>
+*> \param[out] NINFO
+*> \verbatim
+*>          NINFO is INTEGER array, dimension (2)
+*>          NINFO(1) = No. of times ZTRSYL returns an expected INFO
+*>          NINFO(2) = No. of times ZTRSYL3 returns an expected INFO
+*> \endverbatim
+*>
+*> \param[out] KNT
+*> \verbatim
+*>          KNT is INTEGER
+*>          Total number of examples tested.
+*> \endverbatim
+
+*
+*  -- LAPACK test routine --
+      SUBROUTINE ZSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
+      IMPLICIT NONE
+*
+*  -- LAPACK test routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      INTEGER            KNT
+      DOUBLE PRECISION   THRESH
+*     ..
+*     .. Array Arguments ..
+      INTEGER            NFAIL( 3 ), NINFO( 2 )
+      DOUBLE PRECISION   RMAX( 2 )
+*     ..
+*
+*  =====================================================================
+*     ..
+*     .. Parameters ..
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D0, 0.0D+0 ) )
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      INTEGER            MAXM, MAXN, LDSWORK
+      PARAMETER          ( MAXM = 185, MAXN = 192, LDSWORK = 36 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
+     $                   KUA, KLB, KUB, M, N
+      DOUBLE PRECISION   ANRM, BNRM, BIGNUM, EPS, RES, RES1,
+     $                   SCALE, SCALE3, SMLNUM, TNRM, XNRM
+      COMPLEX*16         RMUL
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         A( MAXM, MAXM ), B( MAXN, MAXN ),
+     $                   C( MAXM, MAXN ), CC( MAXM, MAXN ),
+     $                   X( MAXM, MAXN ),
+     $                   DUML( MAXM ), DUMR( MAXN ),
+     $                   D( MIN( MAXM, MAXN ) )
+      DOUBLE PRECISION   SWORK( LDSWORK, 103 ), DUM( MAXN ), VM( 2 )
+      INTEGER            ISEED( 4 ), IWORK( MAXM + MAXN + 2 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            DISNAN
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           DISNAN, DLAMCH, ZLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLATMR, ZLACPY, ZGEMM, ZTRSYL, ZTRSYL3
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Get machine parameters
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Expect INFO = 0
+      VM( 1 ) = ONE
+*     Expect INFO = 1
+      VM( 2 ) = 0.05D+0
+*
+*     Begin test loop
+*
+      NINFO( 1 ) = 0
+      NINFO( 2 ) = 0
+      NFAIL( 1 ) = 0
+      NFAIL( 2 ) = 0
+      NFAIL( 3 ) = 0
+      RMAX( 1 ) = ZERO
+      RMAX( 2 ) = ZERO
+      KNT = 0
+      ISEED( 1 ) = 1
+      ISEED( 2 ) = 1
+      ISEED( 3 ) = 1
+      ISEED( 4 ) = 1
+      SCALE = ONE
+      SCALE3 = ONE
+      DO J = 1, 2
+         DO ISGN = -1, 1, 2
+*           Reset seed (overwritten by LATMR)
+            ISEED( 1 ) = 1
+            ISEED( 2 ) = 1
+            ISEED( 3 ) = 1
+            ISEED( 4 ) = 1
+            DO M = 32, MAXM, 51
+               KLA = 0
+               KUA = M - 1
+               CALL ZLATMR( M, M, 'S', ISEED, 'N', D,
+     $                      6, ONE, CONE, 'T', 'N',
+     $                      DUML, 1, ONE, DUMR, 1, ONE,
+     $                      'N', IWORK, KLA, KUA, ZERO,
+     $                      ONE, 'NO', A, MAXM, IWORK,
+     $                      IINFO )
+               DO I = 1, M
+                  A( I, I ) = A( I, I ) * VM( J )
+               END DO
+               ANRM = ZLANGE( 'M', M, M, A, MAXM, DUM )
+               DO N = 51, MAXN, 47
+                  KLB = 0
+                  KUB = N - 1
+                  CALL ZLATMR( N, N, 'S', ISEED, 'N', D,
+     $                         6, ONE, CONE, 'T', 'N',
+     $                         DUML, 1, ONE, DUMR, 1, ONE,
+     $                         'N', IWORK, KLB, KUB, ZERO,
+     $                         ONE, 'NO', B, MAXN, IWORK,
+     $                         IINFO )
+                  DO I = 1, N
+                     B( I, I ) = B( I, I ) * VM ( J )
+                  END DO
+                  BNRM = ZLANGE( 'M', N, N, B, MAXN, DUM )
+                  TNRM = MAX( ANRM, BNRM )
+                  CALL ZLATMR( M, N, 'S', ISEED, 'N', D,
+     $                         6, ONE, CONE, 'T', 'N',
+     $                         DUML, 1, ONE, DUMR, 1, ONE,
+     $                         'N', IWORK, M, N, ZERO, ONE,
+     $                         'NO', C, MAXM, IWORK, IINFO )
+                  DO ITRANA = 1, 2
+                     IF( ITRANA.EQ.1 )
+     $                   TRANA = 'N'
+                     IF( ITRANA.EQ.2 )
+     $                   TRANA = 'C'
+                     DO ITRANB = 1, 2
+                        IF( ITRANB.EQ.1 )
+     $                     TRANB = 'N'
+                        IF( ITRANB.EQ.2 )
+     $                     TRANB = 'C'
+                        KNT = KNT + 1
+*
+                        CALL ZLACPY( 'All', M, N, C, MAXM, X, MAXM)
+                        CALL ZLACPY( 'All', M, N, C, MAXM, CC, MAXM)
+                        CALL ZTRSYL( TRANA, TRANB, ISGN, M, N, 
+     $                               A, MAXM, B, MAXN, X, MAXM,
+     $                               SCALE, IINFO )
+                        IF( IINFO.NE.0 )
+     $                     NINFO( 1 ) = NINFO( 1 ) + 1
+                        XNRM = ZLANGE( 'M', M, N, X, MAXM, DUM )
+                        RMUL = CONE
+                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
+                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
+                              RMUL = CONE / MAX( XNRM, TNRM )
+                           END IF
+                        END IF
+                        CALL ZGEMM( TRANA, 'N', M, N, M, RMUL,
+     $                              A, MAXM, X, MAXM, -SCALE*RMUL,
+     $                              CC, MAXM )
+                        CALL ZGEMM( 'N', TRANB, M, N, N,
+     $                              DBLE( ISGN )*RMUL, X, MAXM, B,
+     $                              MAXN, CONE, CC, MAXM )
+                        RES1 = ZLANGE( 'M', M, N, CC, MAXM, DUM )
+                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
+     $                        ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
+                        IF( RES.GT.THRESH )
+     $                     NFAIL( 1 ) = NFAIL( 1 ) + 1
+                        IF( RES.GT.RMAX( 1 ) )
+     $                     RMAX( 1 ) = RES
+*
+                        CALL ZLACPY( 'All', M, N, C, MAXM, X, MAXM )
+                        CALL ZLACPY( 'All', M, N, C, MAXM, CC, MAXM )
+                        CALL ZTRSYL3( TRANA, TRANB, ISGN, M, N,
+     $                                A, MAXM, B, MAXN, X, MAXM,
+     $                                SCALE3, SWORK, LDSWORK, INFO)
+                        IF( INFO.NE.0 )
+     $                     NINFO( 2 ) = NINFO( 2 ) + 1
+                        XNRM = ZLANGE( 'M', M, N, X, MAXM, DUM )
+                        RMUL = CONE
+                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
+                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
+                              RMUL = CONE / MAX( XNRM, TNRM )
+                           END IF
+                        END IF
+                        CALL ZGEMM( TRANA, 'N', M, N, M, RMUL,
+     $                              A, MAXM, X, MAXM, -SCALE3*RMUL,
+     $                              CC, MAXM )
+                        CALL ZGEMM( 'N', TRANB, M, N, N,
+     $                              DBLE( ISGN )*RMUL, X, MAXM, B,
+     $                              MAXN, CONE, CC, MAXM )
+                        RES1 = ZLANGE( 'M', M, N, CC, MAXM, DUM )
+                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
+     $                             ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
+*                       Verify that TRSYL3 only flushes if TRSYL flushes (but
+*                       there may be cases where TRSYL3 avoid flushing).
+                        IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR.
+     $                      IINFO.NE.INFO ) THEN
+                           NFAIL( 3 ) = NFAIL( 3 ) + 1
+                        END IF
+                        IF( RES.GT.THRESH .OR. DISNAN( RES ) )
+     $                     NFAIL( 2 ) = NFAIL( 2 ) + 1
+                        IF( RES.GT.RMAX( 2 ) )
+     $                     RMAX( 2 ) = RES
+                     END DO
+                  END DO
+               END DO
+            END DO
+         END DO
+      END DO
+*
+      RETURN
+*
+*     End of ZSYL01
+*
+      END
diff --git a/lapack-netlib/TESTING/LIN/cchktr.f b/lapack-netlib/TESTING/LIN/cchktr.f
index ce1ecf761..c55b07643 100644
--- a/lapack-netlib/TESTING/LIN/cchktr.f
+++ b/lapack-netlib/TESTING/LIN/cchktr.f
@@ -31,7 +31,7 @@
 *>
 *> \verbatim
 *>
-*> CCHKTR tests CTRTRI, -TRS, -RFS, and -CON, and CLATRS
+*> CCHKTR tests CTRTRI, -TRS, -RFS, and -CON, and CLATRS(3)
 *> \endverbatim
 *
 *  Arguments:
@@ -184,7 +184,7 @@
       INTEGER            NTYPE1, NTYPES
       PARAMETER          ( NTYPE1 = 10, NTYPES = 18 )
       INTEGER            NTESTS
-      PARAMETER          ( NTESTS = 9 )
+      PARAMETER          ( NTESTS = 10 )
       INTEGER            NTRAN
       PARAMETER          ( NTRAN = 3 )
       REAL               ONE, ZERO
@@ -195,13 +195,13 @@
       CHARACTER*3        PATH
       INTEGER            I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
      $                   IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
-      REAL               AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
-     $                   RCONDO, SCALE
+      REAL               AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
+     $                   RCONDI, RCONDO, RES, SCALE, SLAMCH
 *     ..
 *     .. Local Arrays ..
       CHARACTER          TRANSS( NTRAN ), UPLOS( 2 )
       INTEGER            ISEED( 4 ), ISEEDY( 4 )
-      REAL               RESULT( NTESTS )
+      REAL               RESULT( NTESTS ), SCALE3( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
@@ -210,9 +210,9 @@
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           ALAERH, ALAHD, ALASUM, CCOPY, CERRTR, CGET04,
-     $                   CLACPY, CLARHS, CLATRS, CLATTR, CTRCON, CTRRFS,
-     $                   CTRT01, CTRT02, CTRT03, CTRT05, CTRT06, CTRTRI,
-     $                   CTRTRS, XLAENV
+     $                   CLACPY, CLARHS, CLATRS, CLATRS3, CLATTR,
+     $                   CSSCAL, CTRCON, CTRRFS, CTRT01, CTRT02, CTRT03,
+     $                   CTRT05, CTRT06, CTRTRI, CTRTRS, XLAENV, SLAMCH
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -236,6 +236,7 @@
 *
       PATH( 1: 1 ) = 'Complex precision'
       PATH( 2: 3 ) = 'TR'
+      BIGNUM = SLAMCH('Overflow') / SLAMCH('Precision')
       NRUN = 0
       NFAIL = 0
       NERRS = 0
@@ -380,7 +381,7 @@
 *                       This line is needed on a Sun SPARCstation.
 *
                         IF( N.GT.0 )
-     $                     DUMMY = A( 1 )
+     $                     DUMMY = REAL( A( 1 ) )
 *
                         CALL CTRT02( UPLO, TRANS, DIAG, N, NRHS, A, LDA,
      $                               X, LDA, B, LDA, WORK, RWORK,
@@ -535,6 +536,32 @@
      $                         RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
      $                         RESULT( 9 ) )
 *
+*+    TEST 10
+*                 Solve op(A)*X = B.
+*
+                  SRNAMT = 'CLATRS3'
+                  CALL CCOPY( N, X, 1, B, 1 )
+                  CALL CCOPY( N, X, 1, B, 1 )
+                  CALL CSCAL( N, BIGNUM, B( N+1 ), 1 )
+                  CALL CLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
+     $                          B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
+     $                          INFO )
+*
+*                 Check error code from CLATRS3.
+*
+                  IF( INFO.NE.0 )
+     $               CALL ALAERH( PATH, 'CLATRS3', INFO, 0,
+     $                            UPLO // TRANS // DIAG // 'Y', N, N,
+     $                            -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
+                  CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
+     $                         SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
+     $                         X, LDA, WORK, RESULT( 10 ) )
+                  CALL CSSCAL( N, BIGNUM, X, 1 )
+                  CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
+     $                         SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
+     $                         X, LDA, WORK, RESULT( 10 ) )
+                  RESULT( 10 ) = MAX( RESULT( 10 ), RES )
+*
 *                 Print information about the tests that did not pass
 *                 the threshold.
 *
@@ -552,7 +579,14 @@
      $                  DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
                      NFAIL = NFAIL + 1
                   END IF
-                  NRUN = NRUN + 2
+                  IF( RESULT( 10 ).GE.THRESH ) THEN
+                     IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
+     $                  CALL ALAHD( NOUT, PATH )
+                     WRITE( NOUT, FMT = 9996 )'CLATRS3', UPLO, TRANS,
+     $                  DIAG, 'N', N, IMAT, 10, RESULT( 10 )
+                     NFAIL = NFAIL + 1
+                  END IF
+                  NRUN = NRUN + 3
    90          CONTINUE
   100       CONTINUE
   110    CONTINUE
diff --git a/lapack-netlib/TESTING/LIN/cerrtr.f b/lapack-netlib/TESTING/LIN/cerrtr.f
index db65edd88..9ba784f62 100644
--- a/lapack-netlib/TESTING/LIN/cerrtr.f
+++ b/lapack-netlib/TESTING/LIN/cerrtr.f
@@ -82,9 +82,10 @@
       EXTERNAL           LSAMEN
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           ALAESM, CHKXER, CLATBS, CLATPS, CLATRS, CTBCON,
-     $                   CTBRFS, CTBTRS, CTPCON, CTPRFS, CTPTRI, CTPTRS,
-     $                   CTRCON, CTRRFS, CTRTI2, CTRTRI, CTRTRS
+      EXTERNAL           ALAESM, CHKXER, CLATBS, CLATPS, CLATRS,
+     $                   CLATRS3, CTBCON, CTBRFS, CTBTRS, CTPCON,
+     $                   CTPRFS, CTPTRI, CTPTRS, CTRCON, CTRRFS, CTRTI2,
+     $                   CTRTRI, CTRTRS
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -240,6 +241,46 @@
          CALL CLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, RW, INFO )
          CALL CHKXER( 'CLATRS', INFOT, NOUT, LERR, OK )
 *
+*        CLATRS3
+*
+         SRNAMT = 'CLATRS3'
+         INFOT = 1
+         CALL CLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 2
+         CALL CLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 3
+         CALL CLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 4
+         CALL CLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 5
+         CALL CLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 6
+         CALL CLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 8
+         CALL CLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 10
+         CALL CLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 14
+         CALL CLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 0, INFO )
+         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
+*
 *     Test error exits for the packed triangular routines.
 *
       ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN
diff --git a/lapack-netlib/TESTING/LIN/dchktr.f b/lapack-netlib/TESTING/LIN/dchktr.f
index a4a1150c0..57e87326b 100644
--- a/lapack-netlib/TESTING/LIN/dchktr.f
+++ b/lapack-netlib/TESTING/LIN/dchktr.f
@@ -30,7 +30,7 @@
 *>
 *> \verbatim
 *>
-*> DCHKTR tests DTRTRI, -TRS, -RFS, and -CON, and DLATRS
+*> DCHKTR tests DTRTRI, -TRS, -RFS, and -CON, and DLATRS(3)
 *> \endverbatim
 *
 *  Arguments:
@@ -187,7 +187,7 @@
       INTEGER            NTYPE1, NTYPES
       PARAMETER          ( NTYPE1 = 10, NTYPES = 18 )
       INTEGER            NTESTS
-      PARAMETER          ( NTESTS = 9 )
+      PARAMETER          ( NTESTS = 10 )
       INTEGER            NTRAN
       PARAMETER          ( NTRAN = 3 )
       DOUBLE PRECISION   ONE, ZERO
@@ -198,13 +198,13 @@
       CHARACTER*3        PATH
       INTEGER            I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
      $                   IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
-      DOUBLE PRECISION   AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
-     $                   RCONDO, SCALE
+      DOUBLE PRECISION   AINVNM, ANORM, BIGNUM, DLAMCH, DUMMY, RCOND,
+     $                   RCONDC, RCONDI, RCONDO, RES, SCALE
 *     ..
 *     .. Local Arrays ..
       CHARACTER          TRANSS( NTRAN ), UPLOS( 2 )
       INTEGER            ISEED( 4 ), ISEEDY( 4 )
-      DOUBLE PRECISION   RESULT( NTESTS )
+      DOUBLE PRECISION   RESULT( NTESTS ), SCALE3( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
@@ -213,9 +213,9 @@
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           ALAERH, ALAHD, ALASUM, DCOPY, DERRTR, DGET04,
-     $                   DLACPY, DLARHS, DLATRS, DLATTR, DTRCON, DTRRFS,
-     $                   DTRT01, DTRT02, DTRT03, DTRT05, DTRT06, DTRTRI,
-     $                   DTRTRS, XLAENV
+     $                   DLACPY, DLAMCH, DSCAL, DLARHS, DLATRS, DLATRS3,
+     $                   DLATTR, DTRCON, DTRRFS, DTRT01, DTRT02, DTRT03,
+     $                   DTRT05, DTRT06, DTRTRI, DTRTRS, XLAENV
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -239,6 +239,7 @@
 *
       PATH( 1: 1 ) = 'Double precision'
       PATH( 2: 3 ) = 'TR'
+      BIGNUM = DLAMCH('Overflow') / DLAMCH('Precision')
       NRUN = 0
       NFAIL = 0
       NERRS = 0
@@ -539,6 +540,32 @@
      $                         RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
      $                         RESULT( 9 ) )
 *
+*+    TEST 10
+*                 Solve op(A)*X = B
+*
+                  SRNAMT = 'DLATRS3'
+                  CALL DCOPY( N, X, 1, B, 1 )
+                  CALL DCOPY( N, X, 1, B( N+1 ), 1 )
+                  CALL DSCAL( N, BIGNUM, B( N+1 ), 1 )
+                  CALL DLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
+     $                          B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
+     $                          INFO )
+*
+*                 Check error code from DLATRS3.
+*
+                  IF( INFO.NE.0 )
+     $               CALL ALAERH( PATH, 'DLATRS3', INFO, 0,
+     $                            UPLO // TRANS // DIAG // 'N', N, N,
+     $                            -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
+                  CALL DTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
+     $                         SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
+     $                         X, LDA, WORK, RESULT( 10 ) )
+                  CALL DSCAL( N, BIGNUM, X, 1 )
+                  CALL DTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
+     $                         SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
+     $                         X, LDA, WORK, RES )
+                  RESULT( 10 ) = MAX( RESULT( 10 ), RES )
+*
 *                 Print information about the tests that did not pass
 *                 the threshold.
 *
@@ -556,7 +583,14 @@
      $                  DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
                      NFAIL = NFAIL + 1
                   END IF
-                  NRUN = NRUN + 2
+                  IF( RESULT( 10 ).GE.THRESH ) THEN
+                     IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
+     $                  CALL ALAHD( NOUT, PATH )
+                     WRITE( NOUT, FMT = 9996 )'DLATRS3', UPLO, TRANS,
+     $                  DIAG, 'N', N, IMAT, 10, RESULT( 10 )
+                     NFAIL = NFAIL + 1
+                  END IF
+                  NRUN = NRUN + 3
    90          CONTINUE
   100       CONTINUE
   110    CONTINUE
@@ -569,8 +603,8 @@
  9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
      $      I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
  9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
-     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ',
-     $      test(', I2, ')= ', G12.5 )
+     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
+     $      I2, ')= ', G12.5 )
  9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
      $      11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
  9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',
diff --git a/lapack-netlib/TESTING/LIN/derrtr.f b/lapack-netlib/TESTING/LIN/derrtr.f
index a667f0d2b..d0580497d 100644
--- a/lapack-netlib/TESTING/LIN/derrtr.f
+++ b/lapack-netlib/TESTING/LIN/derrtr.f
@@ -83,9 +83,10 @@
       EXTERNAL           LSAMEN
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           ALAESM, CHKXER, DLATBS, DLATPS, DLATRS, DTBCON,
-     $                   DTBRFS, DTBTRS, DTPCON, DTPRFS, DTPTRI, DTPTRS,
-     $                   DTRCON, DTRRFS, DTRTI2, DTRTRI, DTRTRS
+      EXTERNAL           ALAESM, CHKXER, DLATBS, DLATPS, DLATRS,
+     $                   DLATRS3, DTBCON, DTBRFS, DTBTRS, DTPCON,
+     $                   DTPRFS, DTPTRI, DTPTRS, DTRCON, DTRRFS,
+     $                   DTRTI2, DTRTRI, DTRTRS
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -244,6 +245,46 @@
          INFOT = 7
          CALL DLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, W, INFO )
          CALL CHKXER( 'DLATRS', INFOT, NOUT, LERR, OK )
+*
+*        DLATRS3
+*
+         SRNAMT = 'DLATRS3'
+         INFOT = 1
+         CALL DLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 2
+         CALL DLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 3
+         CALL DLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 4
+         CALL DLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 5
+         CALL DLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 6
+         CALL DLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 8
+         CALL DLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 10
+         CALL DLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 14
+         CALL DLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 0, INFO )
+         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
 *
       ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN
 *
diff --git a/lapack-netlib/TESTING/LIN/schktr.f b/lapack-netlib/TESTING/LIN/schktr.f
index 66fa0bee7..5aeb1ce88 100644
--- a/lapack-netlib/TESTING/LIN/schktr.f
+++ b/lapack-netlib/TESTING/LIN/schktr.f
@@ -30,7 +30,7 @@
 *>
 *> \verbatim
 *>
-*> SCHKTR tests STRTRI, -TRS, -RFS, and -CON, and SLATRS
+*> SCHKTR tests STRTRI, -TRS, -RFS, and -CON, and SLATRS(3)
 *> \endverbatim
 *
 *  Arguments:
@@ -187,7 +187,7 @@
       INTEGER            NTYPE1, NTYPES
       PARAMETER          ( NTYPE1 = 10, NTYPES = 18 )
       INTEGER            NTESTS
-      PARAMETER          ( NTESTS = 9 )
+      PARAMETER          ( NTESTS = 10 )
       INTEGER            NTRAN
       PARAMETER          ( NTRAN = 3 )
       REAL               ONE, ZERO
@@ -198,13 +198,13 @@
       CHARACTER*3        PATH
       INTEGER            I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
      $                   IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
-      REAL               AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
-     $                   RCONDO, SCALE
+      REAL               AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
+     $                   RCONDI, RCONDO, RES, SCALE, SLAMCH
 *     ..
 *     .. Local Arrays ..
       CHARACTER          TRANSS( NTRAN ), UPLOS( 2 )
       INTEGER            ISEED( 4 ), ISEEDY( 4 )
-      REAL               RESULT( NTESTS )
+      REAL               RESULT( NTESTS ), SCALE3( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
@@ -213,9 +213,9 @@
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           ALAERH, ALAHD, ALASUM, SCOPY, SERRTR, SGET04,
-     $                   SLACPY, SLARHS, SLATRS, SLATTR, STRCON, STRRFS,
-     $                   STRT01, STRT02, STRT03, STRT05, STRT06, STRTRI,
-     $                   STRTRS, XLAENV
+     $                   SLACPY, SLARHS, SLATRS, SLATRS3, SLATTR, SSCAL,
+     $                   STRCON, STRRFS, STRT01, STRT02, STRT03, STRT05,
+     $                   STRT06, STRTRI, STRTRS, XLAENV, SLAMCH
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -239,6 +239,7 @@
 *
       PATH( 1: 1 ) = 'Single precision'
       PATH( 2: 3 ) = 'TR'
+      BIGNUM = SLAMCH('Overflow') / SLAMCH('Precision')
       NRUN = 0
       NFAIL = 0
       NERRS = 0
@@ -539,6 +540,33 @@
      $                         RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
      $                         RESULT( 9 ) )
 *
+*+    TEST 10
+*                 Solve op(A)*X = B
+*
+                  SRNAMT = 'SLATRS3'
+                  CALL SCOPY( N, X, 1, B, 1 )
+                  CALL SCOPY( N, X, 1, B( N+1 ), 1 )
+                  CALL SSCAL( N, BIGNUM, B( N+1 ), 1 )
+                  CALL SLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
+     $                          B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
+     $                          INFO )
+*
+*                 Check error code from SLATRS3.
+*
+                  IF( INFO.NE.0 )
+     $               CALL ALAERH( PATH, 'SLATRS3', INFO, 0,
+     $                            UPLO // TRANS // DIAG // 'Y', N, N,
+     $                            -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
+*
+                  CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
+     $                         SCALE3 ( 1 ), RWORK, ONE, B( N+1 ), LDA,
+     $                         X, LDA, WORK, RESULT( 10 ) )
+                  CALL SSCAL( N, BIGNUM, X, 1 )
+                  CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
+     $                         SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
+     $                         X, LDA, WORK, RES )
+                  RESULT( 10 ) = MAX( RESULT( 10 ), RES )
+*
 *                 Print information about the tests that did not pass
 *                 the threshold.
 *
@@ -556,7 +584,14 @@
      $                  DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
                      NFAIL = NFAIL + 1
                   END IF
-                  NRUN = NRUN + 2
+                  IF( RESULT( 10 ).GE.THRESH ) THEN
+                     IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
+     $                  CALL ALAHD( NOUT, PATH )
+                     WRITE( NOUT, FMT = 9996 )'SLATRS3', UPLO, TRANS,
+     $                  DIAG, 'N', N, IMAT, 10, RESULT( 10 )
+                     NFAIL = NFAIL + 1
+                  END IF
+                  NRUN = NRUN + 3
    90          CONTINUE
   100       CONTINUE
   110    CONTINUE
@@ -569,8 +604,8 @@
  9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
      $      I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
  9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
-     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ',
-     $      test(', I2, ')= ', G12.5 )
+     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
+     $      I2, ')= ', G12.5 )
  9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
      $      11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
  9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',
diff --git a/lapack-netlib/TESTING/LIN/serrtr.f b/lapack-netlib/TESTING/LIN/serrtr.f
index f0d0a0ef2..af1ce0a8e 100644
--- a/lapack-netlib/TESTING/LIN/serrtr.f
+++ b/lapack-netlib/TESTING/LIN/serrtr.f
@@ -83,9 +83,10 @@
       EXTERNAL           LSAMEN
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           ALAESM, CHKXER, SLATBS, SLATPS, SLATRS, STBCON,
-     $                   STBRFS, STBTRS, STPCON, STPRFS, STPTRI, STPTRS,
-     $                   STRCON, STRRFS, STRTI2, STRTRI, STRTRS
+      EXTERNAL           ALAESM, CHKXER, SLATBS, SLATPS, SLATRS,
+     $                   SLATRS3, STBCON, STBRFS, STBTRS, STPCON,
+     $                   STPRFS, STPTRI, STPTRS, STRCON, STRRFS, STRTI2,
+     $                   STRTRI, STRTRS
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -244,6 +245,46 @@
          INFOT = 7
          CALL SLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, W, INFO )
          CALL CHKXER( 'SLATRS', INFOT, NOUT, LERR, OK )
+*
+*        SLATRS3
+*
+         SRNAMT = 'SLATRS3'
+         INFOT = 1
+         CALL SLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 2
+         CALL SLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 3
+         CALL SLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 4
+         CALL SLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 5
+         CALL SLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 6
+         CALL SLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 8
+         CALL SLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 10
+         CALL SLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, W,
+     $                 W( 2 ), 1, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 14
+         CALL SLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, W,
+     $                 W( 2 ), 0, INFO )
+         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
 *
       ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN
 *
diff --git a/lapack-netlib/TESTING/LIN/zchktr.f b/lapack-netlib/TESTING/LIN/zchktr.f
index 0a6f47b1e..275ca2857 100644
--- a/lapack-netlib/TESTING/LIN/zchktr.f
+++ b/lapack-netlib/TESTING/LIN/zchktr.f
@@ -31,7 +31,7 @@
 *>
 *> \verbatim
 *>
-*> ZCHKTR tests ZTRTRI, -TRS, -RFS, and -CON, and ZLATRS
+*> ZCHKTR tests ZTRTRI, -TRS, -RFS, and -CON, and ZLATRS(3)
 *> \endverbatim
 *
 *  Arguments:
@@ -184,7 +184,7 @@
       INTEGER            NTYPE1, NTYPES
       PARAMETER          ( NTYPE1 = 10, NTYPES = 18 )
       INTEGER            NTESTS
-      PARAMETER          ( NTESTS = 9 )
+      PARAMETER          ( NTESTS = 10 )
       INTEGER            NTRAN
       PARAMETER          ( NTRAN = 3 )
       DOUBLE PRECISION   ONE, ZERO
@@ -195,13 +195,13 @@
       CHARACTER*3        PATH
       INTEGER            I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
      $                   IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
-      DOUBLE PRECISION   AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
-     $                   RCONDO, SCALE
+      DOUBLE PRECISION   AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
+     $                   RCONDI, RCONDO, RES, SCALE, DLAMCH
 *     ..
 *     .. Local Arrays ..
       CHARACTER          TRANSS( NTRAN ), UPLOS( 2 )
       INTEGER            ISEED( 4 ), ISEEDY( 4 )
-      DOUBLE PRECISION   RESULT( NTESTS )
+      DOUBLE PRECISION   RESULT( NTESTS ), SCALE3( 2 )
 *     ..
 *     .. External Functions ..
       LOGICAL            LSAME
@@ -209,10 +209,10 @@
       EXTERNAL           LSAME, ZLANTR
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           ALAERH, ALAHD, ALASUM, XLAENV, ZCOPY, ZERRTR,
-     $                   ZGET04, ZLACPY, ZLARHS, ZLATRS, ZLATTR, ZTRCON,
-     $                   ZTRRFS, ZTRT01, ZTRT02, ZTRT03, ZTRT05, ZTRT06,
-     $                   ZTRTRI, ZTRTRS
+      EXTERNAL           ALAERH, ALAHD, ALASUM, DLAMCH, XLAENV, ZCOPY,
+     $                   ZDSCAL, ZERRTR, ZGET04, ZLACPY, ZLARHS, ZLATRS,
+     $                   ZLATRS3, ZLATTR, ZTRCON, ZTRRFS, ZTRT01,
+     $                   ZTRT02, ZTRT03, ZTRT05, ZTRT06, ZTRTRI, ZTRTRS
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -236,6 +236,7 @@
 *
       PATH( 1: 1 ) = 'Zomplex precision'
       PATH( 2: 3 ) = 'TR'
+      BIGNUM = DLAMCH('Overflow') / DLAMCH('Precision')
       NRUN = 0
       NFAIL = 0
       NERRS = 0
@@ -380,7 +381,7 @@
 *                       This line is needed on a Sun SPARCstation.
 *
                         IF( N.GT.0 )
-     $                     DUMMY = A( 1 )
+     $                     DUMMY = DBLE( A( 1 ) )
 *
                         CALL ZTRT02( UPLO, TRANS, DIAG, N, NRHS, A, LDA,
      $                               X, LDA, B, LDA, WORK, RWORK,
@@ -535,6 +536,32 @@
      $                         RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
      $                         RESULT( 9 ) )
 *
+*+    TEST 10
+*                 Solve op(A)*X = B
+*
+                  SRNAMT = 'ZLATRS3'
+                  CALL ZCOPY( N, X, 1, B, 1 )
+                  CALL ZCOPY( N, X, 1, B( N+1 ), 1 )
+                  CALL ZDSCAL( N, BIGNUM, B( N+1 ), 1 )
+                  CALL ZLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
+     $                          B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
+     $                          INFO )
+*
+*                 Check error code from ZLATRS3.
+*
+                  IF( INFO.NE.0 )
+     $               CALL ALAERH( PATH, 'ZLATRS3', INFO, 0,
+     $                            UPLO // TRANS // DIAG // 'N', N, N,
+     $                            -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
+                  CALL ZTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
+     $                         SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
+     $                         X, LDA, WORK, RESULT( 10 ) )
+                  CALL ZDSCAL( N, BIGNUM, X, 1 )
+                  CALL ZTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
+     $                         SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
+     $                         X, LDA, WORK, RES )
+                  RESULT( 10 ) = MAX( RESULT( 10 ), RES )
+*
 *                 Print information about the tests that did not pass
 *                 the threshold.
 *
@@ -552,7 +579,14 @@
      $                  DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
                      NFAIL = NFAIL + 1
                   END IF
-                  NRUN = NRUN + 2
+                  IF( RESULT( 10 ).GE.THRESH ) THEN
+                     IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
+     $                  CALL ALAHD( NOUT, PATH )
+                     WRITE( NOUT, FMT = 9996 )'ZLATRS3', UPLO, TRANS,
+     $                  DIAG, 'N', N, IMAT, 10, RESULT( 10 )
+                     NFAIL = NFAIL + 1
+                  END IF
+                  NRUN = NRUN + 3
    90          CONTINUE
   100       CONTINUE
   110    CONTINUE
@@ -565,8 +599,8 @@
  9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
      $      I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
  9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
-     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ',
-     $      test(', I2, ')= ', G12.5 )
+     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
+     $      I2, ')= ', G12.5 )
  9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
      $      11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
  9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',
diff --git a/lapack-netlib/TESTING/LIN/zerrtr.f b/lapack-netlib/TESTING/LIN/zerrtr.f
index 098040ace..211b92154 100644
--- a/lapack-netlib/TESTING/LIN/zerrtr.f
+++ b/lapack-netlib/TESTING/LIN/zerrtr.f
@@ -82,9 +82,10 @@
       EXTERNAL           LSAMEN
 *     ..
 *     .. External Subroutines ..
-      EXTERNAL           ALAESM, CHKXER, ZLATBS, ZLATPS, ZLATRS, ZTBCON,
-     $                   ZTBRFS, ZTBTRS, ZTPCON, ZTPRFS, ZTPTRI, ZTPTRS,
-     $                   ZTRCON, ZTRRFS, ZTRTI2, ZTRTRI, ZTRTRS
+      EXTERNAL           ALAESM, CHKXER, ZLATBS, ZLATPS, ZLATRS,
+     $                   ZLATRS3, ZTBCON, ZTBRFS, ZTBTRS, ZTPCON,
+     $                   ZTPRFS, ZTPTRI, ZTPTRS, ZTRCON, ZTRRFS, ZTRTI2,
+     $                   ZTRTRI, ZTRTRS
 *     ..
 *     .. Scalars in Common ..
       LOGICAL            LERR, OK
@@ -240,6 +241,46 @@
          CALL ZLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, RW, INFO )
          CALL CHKXER( 'ZLATRS', INFOT, NOUT, LERR, OK )
 *
+*        ZLATRS3
+*
+         SRNAMT = 'ZLATRS3'
+         INFOT = 1
+         CALL ZLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 2
+         CALL ZLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 3
+         CALL ZLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 4
+         CALL ZLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 5
+         CALL ZLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 6
+         CALL ZLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 8
+         CALL ZLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 10
+         CALL ZLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, RW,
+     $                 RW( 2 ), 1, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+         INFOT = 14
+         CALL ZLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, RW,
+     $                 RW( 2 ), 0, INFO )
+         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
+*
 *     Test error exits for the packed triangular routines.
 *
       ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN