Merge pull request #3213 from martin-frbg/lapack382

Avoid allocating the transposed triangular matrix in LAPACKE_xlantr_work (Reference-LAPACK 382)
This commit is contained in:
Martin Kroeker 2021-05-02 18:45:15 +02:00 committed by GitHub
commit b8232c9054
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 80 additions and 74 deletions

View File

@ -41,45 +41,46 @@ float LAPACKE_clantr_work( int matrix_layout, char norm, char uplo,
lapack_int info = 0; lapack_int info = 0;
float res = 0.; float res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) { if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */ /* Call LAPACK function */
res = LAPACK_clantr( &norm, &uplo, &diag, &m, &n, a, &lda, work ); res = LAPACK_clantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
} else if( matrix_layout == LAPACK_ROW_MAJOR ) { } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_complex_float* a_t = NULL;
float* work_lapack = NULL; float* work_lapack = NULL;
char norm_lapack;
char uplo_lapack;
/* Check leading dimension(s) */ /* Check leading dimension(s) */
if( lda < n ) { if( lda < n ) {
info = -8; info = -8;
LAPACKE_xerbla( "LAPACKE_clantr_work", info ); LAPACKE_xerbla( "LAPACKE_clantr_work", info );
return info; return info;
} }
/* Allocate memory for temporary array(s) */ if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) {
a_t = (lapack_complex_float*) norm_lapack = 'i';
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,MAX(m,n)) ); } else if( LAPACKE_lsame( norm, 'i' ) ) {
if( a_t == NULL ) { norm_lapack = '1';
info = LAPACK_TRANSPOSE_MEMORY_ERROR; } else {
goto exit_level_0; norm_lapack = norm;
}
if( LAPACKE_lsame( uplo, 'u' ) ) {
uplo_lapack = 'l';
} else {
uplo_lapack = 'u';
} }
/* Allocate memory for work array(s) */ /* Allocate memory for work array(s) */
if( LAPACKE_lsame( norm, 'i' ) ) { if( LAPACKE_lsame( norm_lapack, 'i' ) ) {
work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,m) ); work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,n) );
if( work_lapack == NULL ) { if( work_lapack == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR; info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_1; goto exit_level_0;
} }
} }
/* Transpose input matrices */ /* Call LAPACK function */
LAPACKE_ctr_trans( matrix_layout, uplo, diag, MAX(m,n), a, lda, a_t, lda_t ); res = LAPACK_clantr( &norm_lapack, &uplo_lapack, &diag, &n, &m, a, &lda, work_lapack );
/* Call LAPACK function and adjust info */
res = LAPACK_clantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work_lapack );
/* Release memory and exit */ /* Release memory and exit */
if( work_lapack ) { if( work_lapack ) {
LAPACKE_free( work_lapack ); LAPACKE_free( work_lapack );
} }
exit_level_1:
LAPACKE_free( a_t );
exit_level_0: exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_clantr_work", info ); LAPACKE_xerbla( "LAPACKE_clantr_work", info );
} }
} else { } else {

View File

@ -40,44 +40,46 @@ double LAPACKE_dlantr_work( int matrix_layout, char norm, char uplo,
lapack_int info = 0; lapack_int info = 0;
double res = 0.; double res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) { if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */ /* Call LAPACK function */
res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work ); res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
} else if( matrix_layout == LAPACK_ROW_MAJOR ) { } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
double* a_t = NULL;
double* work_lapack = NULL; double* work_lapack = NULL;
char norm_lapack;
char uplo_lapack;
/* Check leading dimension(s) */ /* Check leading dimension(s) */
if( lda < n ) { if( lda < n ) {
info = -8; info = -8;
LAPACKE_xerbla( "LAPACKE_dlantr_work", info ); LAPACKE_xerbla( "LAPACKE_dlantr_work", info );
return info; return info;
} }
/* Allocate memory for temporary array(s) */ if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) {
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,MAX(m,n)) ); norm_lapack = 'i';
if( a_t == NULL ) { } else if( LAPACKE_lsame( norm, 'i' ) ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR; norm_lapack = '1';
goto exit_level_0; } else {
norm_lapack = norm;
}
if( LAPACKE_lsame( uplo, 'u' ) ) {
uplo_lapack = 'l';
} else {
uplo_lapack = 'u';
} }
/* Allocate memory for work array(s) */ /* Allocate memory for work array(s) */
if( LAPACKE_lsame( norm, 'i' ) ) { if( LAPACKE_lsame( norm_lapack, 'i' ) ) {
work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,m) ); work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,n) );
if( work_lapack == NULL ) { if( work_lapack == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR; info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_1; goto exit_level_0;
} }
} }
/* Transpose input matrices */ /* Call LAPACK function */
LAPACKE_dtr_trans( matrix_layout, uplo, diag, MAX(m,n), a, lda, a_t, lda_t ); res = LAPACK_dlantr( &norm_lapack, &uplo_lapack, &diag, &n, &m, a, &lda, work_lapack );
/* Call LAPACK function and adjust info */
res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work_lapack );
/* Release memory and exit */ /* Release memory and exit */
if( work_lapack ) { if( work_lapack ) {
LAPACKE_free( work_lapack ); LAPACKE_free( work_lapack );
} }
exit_level_1:
LAPACKE_free( a_t );
exit_level_0: exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dlantr_work", info ); LAPACKE_xerbla( "LAPACKE_dlantr_work", info );
} }
} else { } else {

View File

@ -40,44 +40,46 @@ float LAPACKE_slantr_work( int matrix_layout, char norm, char uplo,
lapack_int info = 0; lapack_int info = 0;
float res = 0.; float res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) { if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */ /* Call LAPACK function */
res = LAPACK_slantr( &norm, &uplo, &diag, &m, &n, a, &lda, work ); res = LAPACK_slantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
} else if( matrix_layout == LAPACK_ROW_MAJOR ) { } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
float* a_t = NULL;
float* work_lapack = NULL; float* work_lapack = NULL;
char norm_lapack;
char uplo_lapack;
/* Check leading dimension(s) */ /* Check leading dimension(s) */
if( lda < n ) { if( lda < n ) {
info = -8; info = -8;
LAPACKE_xerbla( "LAPACKE_slantr_work", info ); LAPACKE_xerbla( "LAPACKE_slantr_work", info );
return info; return info;
} }
/* Allocate memory for temporary array(s) */ if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,MAX(m,n)) ); norm_lapack = 'i';
if( a_t == NULL ) { } else if( LAPACKE_lsame( norm, 'i' ) ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR; norm_lapack = '1';
goto exit_level_0; } else {
norm_lapack = norm;
}
if( LAPACKE_lsame( uplo, 'u' ) ) {
uplo_lapack = 'l';
} else {
uplo_lapack = 'u';
} }
/* Allocate memory for work array(s) */ /* Allocate memory for work array(s) */
if( LAPACKE_lsame( norm, 'i' ) ) { if( LAPACKE_lsame( norm_lapack, 'i' ) ) {
work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,m) ); work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,n) );
if( work_lapack == NULL ) { if( work_lapack == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR; info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_1; goto exit_level_0;
} }
} }
/* Transpose input matrices */ /* Call LAPACK function */
LAPACKE_str_trans( matrix_layout, uplo, diag, MAX(m,n), a, lda, a_t, lda_t ); res = LAPACK_slantr( &norm_lapack, &uplo_lapack, &diag, &n, &m, a, &lda, work_lapack );
/* Call LAPACK function and adjust info */
res = LAPACK_slantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work_lapack );
/* Release memory and exit */ /* Release memory and exit */
if( work_lapack ) { if( work_lapack ) {
LAPACKE_free( work_lapack ); LAPACKE_free( work_lapack );
} }
exit_level_1:
LAPACKE_free( a_t );
exit_level_0: exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_slantr_work", info ); LAPACKE_xerbla( "LAPACKE_slantr_work", info );
} }
} else { } else {

View File

@ -41,45 +41,46 @@ double LAPACKE_zlantr_work( int matrix_layout, char norm, char uplo,
lapack_int info = 0; lapack_int info = 0;
double res = 0.; double res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) { if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */ /* Call LAPACK function */
res = LAPACK_zlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work ); res = LAPACK_zlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
} else if( matrix_layout == LAPACK_ROW_MAJOR ) { } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_complex_double* a_t = NULL;
double* work_lapack = NULL; double* work_lapack = NULL;
char norm_lapack;
char uplo_lapack;
/* Check leading dimension(s) */ /* Check leading dimension(s) */
if( lda < n ) { if( lda < n ) {
info = -8; info = -8;
LAPACKE_xerbla( "LAPACKE_zlantr_work", info ); LAPACKE_xerbla( "LAPACKE_zlantr_work", info );
return info; return info;
} }
/* Allocate memory for temporary array(s) */ if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) {
a_t = (lapack_complex_double*) norm_lapack = 'i';
LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,MAX(m,n)) ); } else if( LAPACKE_lsame( norm, 'i' ) ) {
if( a_t == NULL ) { norm_lapack = '1';
info = LAPACK_TRANSPOSE_MEMORY_ERROR; } else {
goto exit_level_0; norm_lapack = norm;
}
if( LAPACKE_lsame( uplo, 'u' ) ) {
uplo_lapack = 'l';
} else {
uplo_lapack = 'u';
} }
/* Allocate memory for work array(s) */ /* Allocate memory for work array(s) */
if( LAPACKE_lsame( norm, 'i' ) ) { if( LAPACKE_lsame( norm_lapack, 'i' ) ) {
work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,m) ); work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,n) );
if( work_lapack == NULL ) { if( work_lapack == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR; info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_1; goto exit_level_0;
} }
} }
/* Transpose input matrices */ /* Call LAPACK function */
LAPACKE_ztr_trans( matrix_layout, uplo, diag, MAX(m,n), a, lda, a_t, lda_t ); res = LAPACK_zlantr( &norm_lapack, &uplo_lapack, &diag, &n, &m, a, &lda, work_lapack );
/* Call LAPACK function and adjust info */
res = LAPACK_zlantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work_lapack );
/* Release memory and exit */ /* Release memory and exit */
if( work_lapack ) { if( work_lapack ) {
LAPACKE_free( work_lapack ); LAPACKE_free( work_lapack );
} }
exit_level_1:
LAPACKE_free( a_t );
exit_level_0: exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_zlantr_work", info ); LAPACKE_xerbla( "LAPACKE_zlantr_work", info );
} }
} else { } else {