From c62aad62e551cc238cee2e4f78169c62df88bc63 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 14 Aug 2020 00:35:45 +0200 Subject: [PATCH 01/10] Fix incorrect calls to DLASET Reference-LAPACK issue 429 --- lapack-netlib/TESTING/EIG/cchkhb2stg.f | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lapack-netlib/TESTING/EIG/cchkhb2stg.f b/lapack-netlib/TESTING/EIG/cchkhb2stg.f index 61537f44b..cd884febf 100644 --- a/lapack-netlib/TESTING/EIG/cchkhb2stg.f +++ b/lapack-netlib/TESTING/EIG/cchkhb2stg.f @@ -680,8 +680,8 @@ * the one from above. Compare it with D1 computed * using the DSBTRD. * - CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) - CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) CALL CLACPY( ' ', K+1, N, A, LDA, U, LDU ) LH = MAX(1, 4*N) LW = LWORK - LH @@ -753,8 +753,8 @@ * the one from above. Compare it with D1 computed * using the DSBTRD. * - CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) - CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) CALL CLACPY( ' ', K+1, N, A, LDA, U, LDU ) LH = MAX(1, 4*N) LW = LWORK - LH From d64f1ef26bc7c7f3ee6b54aaa2d394cf7842456d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 14 Aug 2020 00:40:24 +0200 Subject: [PATCH 02/10] Fix incorrect argument to SLASET Reference-LAPACK issue 425 (and 318) --- lapack-netlib/TESTING/EIG/schksb2stg.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lapack-netlib/TESTING/EIG/schksb2stg.f b/lapack-netlib/TESTING/EIG/schksb2stg.f index 07b6fa95c..7308bb690 100644 --- a/lapack-netlib/TESTING/EIG/schksb2stg.f +++ b/lapack-netlib/TESTING/EIG/schksb2stg.f @@ -670,8 +670,8 @@ * the one from above. Compare it with D1 computed * using the SSBTRD. * - CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) - CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, N ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, N ) CALL SLACPY( ' ', K+1, N, A, LDA, U, LDU ) LH = MAX(1, 4*N) LW = LWORK - LH From 597010a9688c9f5688dc459ba92ef8a28ea20769 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 14 Aug 2020 00:41:56 +0200 Subject: [PATCH 03/10] Fix incorrect argument to SLASET Reference-LAPACK issue 425 (and 318) --- lapack-netlib/TESTING/EIG/schkst2stg.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lapack-netlib/TESTING/EIG/schkst2stg.f b/lapack-netlib/TESTING/EIG/schkst2stg.f index f386ab43c..83edb9dce 100644 --- a/lapack-netlib/TESTING/EIG/schkst2stg.f +++ b/lapack-netlib/TESTING/EIG/schkst2stg.f @@ -999,8 +999,8 @@ * the one from above. Compare it with D1 computed * using the 1-stage. * - CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) - CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, N ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, N ) CALL SLACPY( "U", N, N, A, LDA, V, LDU ) LH = MAX(1, 4*N) LW = LWORK - LH From 0ce2aa3163fd2225c746cb5b8b1d82dc1a6fbceb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 2 Sep 2020 23:41:51 +0200 Subject: [PATCH 04/10] Fix data type of rwork array --- lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c index 91458136c..c5eca535e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c @@ -47,8 +47,8 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp, lapack_complex_float* cwork = NULL; lapack_complex_float cwork_query; lapack_int lrwork = -1; - double* rwork = NULL; - double rwork_query; + float* rwork = NULL; + float rwork_query; lapack_int i; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 ); @@ -84,7 +84,7 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp, info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } - rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork ); + rwork = (float*)LAPACKE_malloc( sizeof(float) * lrwork ); if( rwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; From c31b72965ecf2b745c3a515f8abf889f9dd24473 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 2 Sep 2020 23:44:44 +0200 Subject: [PATCH 05/10] Fix data type of work array in zgesvdq prototype --- lapack-netlib/LAPACKE/include/lapack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h index 4f48b7c87..c045892df 100644 --- a/lapack-netlib/LAPACKE/include/lapack.h +++ b/lapack-netlib/LAPACKE/include/lapack.h @@ -2513,7 +2513,7 @@ void LAPACK_zgesvdq( lapack_complex_double* U, lapack_int const* ldu, lapack_complex_double* V, lapack_int const* ldv, lapack_int* numrank, lapack_int* iwork, lapack_int const* liwork, - lapack_complex_float* cwork, lapack_int* lcwork, + lapack_complex_double* cwork, lapack_int* lcwork, double* rwork, lapack_int const* lrwork, lapack_int* info ); From 860247b5da58debb2082353a730f64049018bf35 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 2 Sep 2020 22:38:56 +0200 Subject: [PATCH 06/10] Follow-up to lapack#434 & lapack#409: fix signature mismatches --- lapack-netlib/LAPACKE/include/lapack.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h index c045892df..9a8e1a218 100644 --- a/lapack-netlib/LAPACKE/include/lapack.h +++ b/lapack-netlib/LAPACKE/include/lapack.h @@ -3665,7 +3665,7 @@ lapack_int LAPACK_dggsvd( char const* jobu, char const* jobv, char const* jobq, lapack_int* lda, double* b, lapack_int* ldb, double* alpha, double* beta, double* u, lapack_int* ldu, double* v, lapack_int* ldv, double* q, - lapack_int* ldq, float* work, lapack_int* iwork, lapack_int* info ); + lapack_int* ldq, double* work, lapack_int* iwork, lapack_int* info ); #define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD) lapack_int LAPACK_cggsvd( char const* jobu, char const* jobv, char const* jobq, @@ -3676,7 +3676,7 @@ lapack_int LAPACK_cggsvd( char const* jobu, char const* jobv, char const* jobq, float* alpha, float* beta, lapack_complex_float* u, lapack_int* ldu, lapack_complex_float* v, lapack_int* ldv, lapack_complex_float* q, - lapack_int* ldq, float* work, lapack_int* rwork, lapack_int* iwork, lapack_int *info ); + lapack_int* ldq, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* info ); #define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD) lapack_int LAPACK_zggsvd( char const* jobu, char const* jobv, char const* jobq, @@ -3688,7 +3688,7 @@ lapack_int LAPACK_zggsvd( char const* jobu, char const* jobv, char const* jobq, lapack_complex_double* u, lapack_int* ldu, lapack_complex_double* v, lapack_int* ldv, lapack_complex_double* q, lapack_int* ldq, - float* work, lapack_int* rwork, lapack_int* iwork, lapack_int* info ); + lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* info ); #define LAPACK_cggsvd3 LAPACK_GLOBAL(cggsvd3,CGGSVD3) void LAPACK_cggsvd3( @@ -3780,7 +3780,7 @@ lapack_int LAPACK_cggsvp( char const* jobu, char const* jobv, char const* jobq, lapack_complex_float* u, lapack_int* ldu, lapack_complex_float* v, lapack_int* ldv, lapack_complex_float* q, lapack_int* ldq, - lapack_int* iwork, lapack_int* rwork, + lapack_int* iwork, float* rwork, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* info); @@ -3793,7 +3793,7 @@ lapack_int LAPACK_zggsvp( char const* jobu, char const* jobv, char const* jobq, lapack_int* l, lapack_complex_double* u, lapack_int* ldu, lapack_complex_double* v, lapack_int* ldv, lapack_complex_double* q, - lapack_int* ldq, lapack_int* iwork, lapack_int* rwork, + lapack_int* ldq, lapack_int* iwork, double* rwork, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* info); From 1c6c71fa853226073779aba4cc5c08a2ba22300c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 2 Sep 2020 22:41:50 +0200 Subject: [PATCH 07/10] Follow-up to lapack#434 & lapack#409: add missing 'const' in signatures Based on how the surrounding functions in lapack.h are handling the parameters, particularly the ?ggsv?3-variants of the affected functions --- lapack-netlib/LAPACKE/include/lapack.h | 80 +++++++++++++------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h index 9a8e1a218..f0af3795d 100644 --- a/lapack-netlib/LAPACKE/include/lapack.h +++ b/lapack-netlib/LAPACKE/include/lapack.h @@ -3651,43 +3651,43 @@ void LAPACK_zggrqf( #define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD) lapack_int LAPACK_sggsvd( char const* jobu, char const* jobv, char const* jobq, - lapack_int* m, lapack_int* n, lapack_int* p, + lapack_int const* m, lapack_int const* n, lapack_int const* p, lapack_int* k, lapack_int* l, float* a, - lapack_int* lda, float* b, lapack_int* ldb, - float* alpha, float* beta, float* u, lapack_int* ldu, - float* v, lapack_int* ldv, float* q, lapack_int* ldq, + lapack_int const* lda, float* b, lapack_int const* ldb, + float* alpha, float* beta, float* u, lapack_int const* ldu, + float* v, lapack_int const* ldv, float* q, lapack_int const* ldq, float* work, lapack_int* iwork, lapack_int* info ); #define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD) lapack_int LAPACK_dggsvd( char const* jobu, char const* jobv, char const* jobq, - lapack_int* m, lapack_int* n, lapack_int* p, + lapack_int const* m, lapack_int const* n, lapack_int const* p, lapack_int* k, lapack_int* l, double* a, - lapack_int* lda, double* b, lapack_int* ldb, + lapack_int const* lda, double* b, lapack_int const* ldb, double* alpha, double* beta, double* u, - lapack_int* ldu, double* v, lapack_int* ldv, double* q, - lapack_int* ldq, double* work, lapack_int* iwork, lapack_int* info ); + lapack_int const* ldu, double* v, lapack_int const* ldv, double* q, + lapack_int const* ldq, double* work, lapack_int* iwork, lapack_int* info ); #define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD) lapack_int LAPACK_cggsvd( char const* jobu, char const* jobv, char const* jobq, - lapack_int* m, lapack_int* n, lapack_int* p, + lapack_int const* m, lapack_int const* n, lapack_int const* p, lapack_int* k, lapack_int* l, - lapack_complex_float* a, lapack_int* lda, - lapack_complex_float* b, lapack_int* ldb, + lapack_complex_float* a, lapack_int const* lda, + lapack_complex_float* b, lapack_int const* ldb, float* alpha, float* beta, lapack_complex_float* u, - lapack_int* ldu, lapack_complex_float* v, - lapack_int* ldv, lapack_complex_float* q, - lapack_int* ldq, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* info ); + lapack_int const* ldu, lapack_complex_float* v, + lapack_int const* ldv, lapack_complex_float* q, + lapack_int const* ldq, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* info ); #define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD) lapack_int LAPACK_zggsvd( char const* jobu, char const* jobv, char const* jobq, - lapack_int* m, lapack_int* n, lapack_int* p, + lapack_int const* m, lapack_int const* n, lapack_int const* p, lapack_int* k, lapack_int* l, - lapack_complex_double* a, lapack_int* lda, - lapack_complex_double* b, lapack_int* ldb, + lapack_complex_double* a, lapack_int const* lda, + lapack_complex_double* b, lapack_int const* ldb, double* alpha, double* beta, - lapack_complex_double* u, lapack_int* ldu, - lapack_complex_double* v, lapack_int* ldv, - lapack_complex_double* q, lapack_int* ldq, + lapack_complex_double* u, lapack_int const* ldu, + lapack_complex_double* v, lapack_int const* ldv, + lapack_complex_double* q, lapack_int const* ldq, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* info ); #define LAPACK_cggsvd3 LAPACK_GLOBAL(cggsvd3,CGGSVD3) @@ -3754,46 +3754,46 @@ void LAPACK_zggsvd3( #define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP) lapack_int LAPACK_sggsvp( char const* jobu, char const* jobv, char const* jobq, - lapack_int* m, lapack_int* p, lapack_int* n, float* a, - lapack_int* lda, float* b, lapack_int* ldb, float* tola, + lapack_int const* m, lapack_int const* p, lapack_int const* n, float* a, + lapack_int const* lda, float* b, lapack_int const* ldb, float* tola, float* tolb, lapack_int* k, lapack_int* l, float* u, - lapack_int* ldu, float* v, lapack_int* ldv, float* q, - lapack_int* ldq, lapack_int* iwork, float* tau, + lapack_int const* ldu, float* v, lapack_int const* ldv, float* q, + lapack_int const* ldq, lapack_int* iwork, float* tau, float* work, lapack_int* info); #define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP) lapack_int LAPACK_dggsvp( char const* jobu, char const* jobv, char const* jobq, - lapack_int* m, lapack_int* p, lapack_int* n, double* a, - lapack_int* lda, double* b, lapack_int* ldb, + lapack_int const* m, lapack_int const* p, lapack_int const* n, double* a, + lapack_int const* lda, double* b, lapack_int const* ldb, double* tola, double* tolb, lapack_int* k, - lapack_int* l, double* u, lapack_int* ldu, double* v, - lapack_int* ldv, double* q, lapack_int* ldq, + lapack_int* l, double* u, lapack_int const* ldu, double* v, + lapack_int const* ldv, double* q, lapack_int const* ldq, lapack_int* iwork, double* tau, double* work, lapack_int* info); #define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP) lapack_int LAPACK_cggsvp( char const* jobu, char const* jobv, char const* jobq, - lapack_int* m, lapack_int* p, lapack_int* n, - lapack_complex_float* a, lapack_int* lda, - lapack_complex_float* b, lapack_int* ldb, float* tola, + lapack_int const* m, lapack_int const* p, lapack_int const* n, + lapack_complex_float* a, lapack_int const* lda, + lapack_complex_float* b, lapack_int const* ldb, float* tola, float* tolb, lapack_int* k, lapack_int* l, - lapack_complex_float* u, lapack_int* ldu, - lapack_complex_float* v, lapack_int* ldv, - lapack_complex_float* q, lapack_int* ldq, + lapack_complex_float* u, lapack_int const* ldu, + lapack_complex_float* v, lapack_int const* ldv, + lapack_complex_float* q, lapack_int const* ldq, lapack_int* iwork, float* rwork, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* info); #define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP) lapack_int LAPACK_zggsvp( char const* jobu, char const* jobv, char const* jobq, - lapack_int* m, lapack_int* p, lapack_int* n, - lapack_complex_double* a, lapack_int* lda, - lapack_complex_double* b, lapack_int* ldb, + lapack_int const* m, lapack_int const* p, lapack_int const* n, + lapack_complex_double* a, lapack_int const* lda, + lapack_complex_double* b, lapack_int const* ldb, double* tola, double* tolb, lapack_int* k, lapack_int* l, lapack_complex_double* u, - lapack_int* ldu, lapack_complex_double* v, - lapack_int* ldv, lapack_complex_double* q, - lapack_int* ldq, lapack_int* iwork, double* rwork, + lapack_int const* ldu, lapack_complex_double* v, + lapack_int const* ldv, lapack_complex_double* q, + lapack_int const* ldq, lapack_int* iwork, double* rwork, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* info); From 3426519ae2e4210dc6088b484ce7b8f1abd1d38d Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 2 Sep 2020 22:46:47 +0200 Subject: [PATCH 08/10] adapt ?ggsv?-functions to ambient code style in LAPACKE/include/lapack.h --- lapack-netlib/LAPACKE/include/lapack.h | 162 ++++++++++++++----------- 1 file changed, 92 insertions(+), 70 deletions(-) diff --git a/lapack-netlib/LAPACKE/include/lapack.h b/lapack-netlib/LAPACKE/include/lapack.h index f0af3795d..aedaa308d 100644 --- a/lapack-netlib/LAPACKE/include/lapack.h +++ b/lapack-netlib/LAPACKE/include/lapack.h @@ -3650,45 +3650,58 @@ void LAPACK_zggrqf( lapack_int* info ); #define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD) -lapack_int LAPACK_sggsvd( char const* jobu, char const* jobv, char const* jobq, - lapack_int const* m, lapack_int const* n, lapack_int const* p, - lapack_int* k, lapack_int* l, float* a, - lapack_int const* lda, float* b, lapack_int const* ldb, - float* alpha, float* beta, float* u, lapack_int const* ldu, - float* v, lapack_int const* ldv, float* q, lapack_int const* ldq, - float* work, lapack_int* iwork, lapack_int* info ); +lapack_int LAPACK_sggsvd( + char const* jobu, char const* jobv, char const* jobq, + lapack_int const* m, lapack_int const* n, lapack_int const* p, + lapack_int* k, lapack_int* l, + float* a, lapack_int const* lda, + float* b, lapack_int const* ldb, + float* alpha, float* beta, + float* u, lapack_int const* ldu, + float* v, lapack_int const* ldv, + float* q, lapack_int const* ldq, + float* work, lapack_int* iwork, lapack_int* info ); #define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD) -lapack_int LAPACK_dggsvd( char const* jobu, char const* jobv, char const* jobq, - lapack_int const* m, lapack_int const* n, lapack_int const* p, - lapack_int* k, lapack_int* l, double* a, - lapack_int const* lda, double* b, lapack_int const* ldb, - double* alpha, double* beta, double* u, - lapack_int const* ldu, double* v, lapack_int const* ldv, double* q, - lapack_int const* ldq, double* work, lapack_int* iwork, lapack_int* info ); +lapack_int LAPACK_dggsvd( + char const* jobu, char const* jobv, char const* jobq, + lapack_int const* m, lapack_int const* n, lapack_int const* p, + lapack_int* k, lapack_int* l, + double* a, lapack_int const* lda, + double* b, lapack_int const* ldb, + double* alpha, double* beta, + double* u, lapack_int const* ldu, + double* v, lapack_int const* ldv, + double* q, lapack_int const* ldq, + double* work, lapack_int* iwork, lapack_int* info ); #define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD) -lapack_int LAPACK_cggsvd( char const* jobu, char const* jobv, char const* jobq, - lapack_int const* m, lapack_int const* n, lapack_int const* p, - lapack_int* k, lapack_int* l, - lapack_complex_float* a, lapack_int const* lda, - lapack_complex_float* b, lapack_int const* ldb, - float* alpha, float* beta, lapack_complex_float* u, - lapack_int const* ldu, lapack_complex_float* v, - lapack_int const* ldv, lapack_complex_float* q, - lapack_int const* ldq, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* info ); +lapack_int LAPACK_cggsvd( + char const* jobu, char const* jobv, char const* jobq, + lapack_int const* m, lapack_int const* n, lapack_int const* p, + lapack_int* k, lapack_int* l, + lapack_complex_float* a, lapack_int const* lda, + lapack_complex_float* b, lapack_int const* ldb, + float* alpha, float* beta, + lapack_complex_float* u, lapack_int const* ldu, + lapack_complex_float* v, lapack_int const* ldv, + lapack_complex_float* q, lapack_int const* ldq, + lapack_complex_float* work, float* rwork, + lapack_int* iwork, lapack_int* info ); #define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD) -lapack_int LAPACK_zggsvd( char const* jobu, char const* jobv, char const* jobq, - lapack_int const* m, lapack_int const* n, lapack_int const* p, - lapack_int* k, lapack_int* l, - lapack_complex_double* a, lapack_int const* lda, - lapack_complex_double* b, lapack_int const* ldb, - double* alpha, double* beta, - lapack_complex_double* u, lapack_int const* ldu, - lapack_complex_double* v, lapack_int const* ldv, - lapack_complex_double* q, lapack_int const* ldq, - lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* info ); +lapack_int LAPACK_zggsvd( + char const* jobu, char const* jobv, char const* jobq, + lapack_int const* m, lapack_int const* n, lapack_int const* p, + lapack_int* k, lapack_int* l, + lapack_complex_double* a, lapack_int const* lda, + lapack_complex_double* b, lapack_int const* ldb, + double* alpha, double* beta, + lapack_complex_double* u, lapack_int const* ldu, + lapack_complex_double* v, lapack_int const* ldv, + lapack_complex_double* q, lapack_int const* ldq, + lapack_complex_double* work, double* rwork, + lapack_int* iwork, lapack_int* info ); #define LAPACK_cggsvd3 LAPACK_GLOBAL(cggsvd3,CGGSVD3) void LAPACK_cggsvd3( @@ -3753,49 +3766,58 @@ void LAPACK_zggsvd3( lapack_int* info ); #define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP) -lapack_int LAPACK_sggsvp( char const* jobu, char const* jobv, char const* jobq, - lapack_int const* m, lapack_int const* p, lapack_int const* n, float* a, - lapack_int const* lda, float* b, lapack_int const* ldb, float* tola, - float* tolb, lapack_int* k, lapack_int* l, float* u, - lapack_int const* ldu, float* v, lapack_int const* ldv, float* q, - lapack_int const* ldq, lapack_int* iwork, float* tau, - float* work, lapack_int* info); +lapack_int LAPACK_sggsvp( + char const* jobu, char const* jobv, char const* jobq, + lapack_int const* m, lapack_int const* p, lapack_int const* n, + float* a, lapack_int const* lda, + float* b, lapack_int const* ldb, + float* tola, float* tolb, + lapack_int* k, lapack_int* l, + float* u, lapack_int const* ldu, + float* v, lapack_int const* ldv, + float* q, lapack_int const* ldq, + lapack_int* iwork, float* tau, + float* work, lapack_int* info ); #define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP) -lapack_int LAPACK_dggsvp( char const* jobu, char const* jobv, char const* jobq, - lapack_int const* m, lapack_int const* p, lapack_int const* n, double* a, - lapack_int const* lda, double* b, lapack_int const* ldb, - double* tola, double* tolb, lapack_int* k, - lapack_int* l, double* u, lapack_int const* ldu, double* v, - lapack_int const* ldv, double* q, lapack_int const* ldq, - lapack_int* iwork, double* tau, double* work, - lapack_int* info); +lapack_int LAPACK_dggsvp( + char const* jobu, char const* jobv, char const* jobq, + lapack_int const* m, lapack_int const* p, lapack_int const* n, + double* a, lapack_int const* lda, + double* b, lapack_int const* ldb, + double* tola, double* tolb, + lapack_int* k, lapack_int* l, + double* u, lapack_int const* ldu, + double* v, lapack_int const* ldv, + double* q, lapack_int const* ldq, + lapack_int* iwork, double* tau, + double* work, lapack_int* info ); #define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP) -lapack_int LAPACK_cggsvp( char const* jobu, char const* jobv, char const* jobq, - lapack_int const* m, lapack_int const* p, lapack_int const* n, - lapack_complex_float* a, lapack_int const* lda, - lapack_complex_float* b, lapack_int const* ldb, float* tola, - float* tolb, lapack_int* k, lapack_int* l, - lapack_complex_float* u, lapack_int const* ldu, - lapack_complex_float* v, lapack_int const* ldv, - lapack_complex_float* q, lapack_int const* ldq, - lapack_int* iwork, float* rwork, - lapack_complex_float* tau, lapack_complex_float* work, - lapack_int* info); +lapack_int LAPACK_cggsvp( + char const* jobu, char const* jobv, char const* jobq, + lapack_int const* m, lapack_int const* p, lapack_int const* n, + lapack_complex_float* a, lapack_int const* lda, + lapack_complex_float* b, lapack_int const* ldb, + float* tola, float* tolb, lapack_int* k, lapack_int* l, + lapack_complex_float* u, lapack_int const* ldu, + lapack_complex_float* v, lapack_int const* ldv, + lapack_complex_float* q, lapack_int const* ldq, + lapack_int* iwork, float* rwork, lapack_complex_float* tau, + lapack_complex_float* work, lapack_int* info ); #define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP) -lapack_int LAPACK_zggsvp( char const* jobu, char const* jobv, char const* jobq, - lapack_int const* m, lapack_int const* p, lapack_int const* n, - lapack_complex_double* a, lapack_int const* lda, - lapack_complex_double* b, lapack_int const* ldb, - double* tola, double* tolb, lapack_int* k, - lapack_int* l, lapack_complex_double* u, - lapack_int const* ldu, lapack_complex_double* v, - lapack_int const* ldv, lapack_complex_double* q, - lapack_int const* ldq, lapack_int* iwork, double* rwork, - lapack_complex_double* tau, lapack_complex_double* work, - lapack_int* info); +lapack_int LAPACK_zggsvp( + char const* jobu, char const* jobv, char const* jobq, + lapack_int const* m, lapack_int const* p, lapack_int const* n, + lapack_complex_double* a, lapack_int const* lda, + lapack_complex_double* b, lapack_int const* ldb, + double* tola, double* tolb, lapack_int* k, lapack_int* l, + lapack_complex_double* u, lapack_int const* ldu, + lapack_complex_double* v, lapack_int const* ldv, + lapack_complex_double* q, lapack_int const* ldq, + lapack_int* iwork, double* rwork, lapack_complex_double* tau, + lapack_complex_double* work, lapack_int* info ); #define LAPACK_cggsvp3 LAPACK_GLOBAL(cggsvp3,CGGSVP3) void LAPACK_cggsvp3( From 718f67421aaf83fb33722e4267a2be40185f63de Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Fri, 4 Sep 2020 10:36:19 -0500 Subject: [PATCH 09/10] POWER9: Fix mcpu option with clang Adding check for compiler type before checking GCC version in Makefile. This allows clang to use power9 instead of power8 when CORE is POWER9. --- Makefile.power | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile.power b/Makefile.power index 37a02d692..e766f8499 100644 --- a/Makefile.power +++ b/Makefile.power @@ -17,6 +17,7 @@ endif ifeq ($(CORE), POWER9) ifneq ($(C_COMPILER), PGI) CCOMMON_OPT += -Ofast -mvsx -fno-fast-math +ifeq ($(C_COMPILER), GCC) ifneq ($(GCCVERSIONGT4), 1) $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) CCOMMON_OPT += -mcpu=power8 -mtune=power8 @@ -24,10 +25,14 @@ else CCOMMON_OPT += -mcpu=power9 -mtune=power9 endif else +CCOMMON_OPT += -mcpu=power9 -mtune=power9 +endif +else CCOMMON_OPT += -fast -Mvect=simd -Mcache_align endif ifneq ($(F_COMPILER), PGI) FCOMMON_OPT += -O2 -frecursive -fno-fast-math +ifeq ($(C_COMPILER), GCC) ifneq ($(GCCVERSIONGT4), 1) $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) FCOMMON_OPT += -mcpu=power8 -mtune=power8 @@ -35,6 +40,9 @@ else FCOMMON_OPT += -mcpu=power9 -mtune=power9 endif else +FCOMMON_OPT += -mcpu=power9 -mtune=power9 +endif +else FCOMMON_OPT += -O2 -Mrecursive endif endif From 330044d82147a9a08fd10d503fec7f406cde2861 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 5 Sep 2020 09:44:33 +0200 Subject: [PATCH 10/10] Fix potentiol domain error in sqrt --- driver/level3/level3_syrk_threaded.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/driver/level3/level3_syrk_threaded.c b/driver/level3/level3_syrk_threaded.c index a041abac3..d7dcd68a3 100644 --- a/driver/level3/level3_syrk_threaded.c +++ b/driver/level3/level3_syrk_threaded.c @@ -526,7 +526,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG width, i, j, k; BLASLONG n, n_from, n_to; int mode, mask; - double dnum; + double dnum, di, dinum; if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) { SYRK_LOCAL(args, range_m, range_n, sa, sb, 0); @@ -601,9 +601,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO if (nthreads - num_cpu > 1) { - double di = (double)i; + di = (double)i; - width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) ); + dinum = di * di + dnum; + + if (dinum > 0) + width = (((BLASLONG)((sqrt(dinum) - di) + mask)/(mask+1)) * (mask+1) ); + else + width = (((BLASLONG)(- di + mask)/(mask+1)) * (mask+1) ); if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) ); @@ -643,10 +648,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO if (nthreads - num_cpu > 1) { - double di = (double)i; + di = (double)i; - width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1)); + dinum = di * di +dnum; + if (dinum > 0) + width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1)); + else + width = (((BLASLONG)(- di + mask)/(mask+1)) * (mask+1)); + if ((width > n - i) || (width < mask)) width = n - i; } else {