Merge pull request #797 from wernsaar/develop

bugfixes for lapack and lapacke
This commit is contained in:
wernsaar 2016-03-07 16:44:17 +01:00
commit 711ecb8bd5
24 changed files with 120 additions and 62 deletions

View File

@ -389,7 +389,7 @@ DGEMVTKERNEL = dgemv_t.S
endif
ifndef CGEMVNKERNEL
CGEMVNKERNEL = cgemv_n.S
CGEMVNKERNEL = cgemv_n_4.c
endif
ifndef CGEMVTKERNEL
@ -397,11 +397,11 @@ CGEMVTKERNEL = cgemv_t_4.c
endif
ifndef ZGEMVNKERNEL
ZGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n_4.c
endif
ifndef ZGEMVTKERNEL
ZGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t_4.c
endif
ifndef QGEMVNKERNEL

View File

@ -1,6 +1,3 @@
ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVTKERNEL = zgemv_t.S
SGEMMKERNEL = gemm_kernel_8x4_barcelona.S
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
SGEMMITCOPY = ../generic/gemm_tcopy_8.c

View File

@ -18,7 +18,7 @@ SSYMV_L_KERNEL = ssymv_L.c
SGEMVNKERNEL = sgemv_n_4.c
SGEMVTKERNEL = sgemv_t_4.c
ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVNKERNEL = zgemv_n_4.c
ZGEMVTKERNEL = zgemv_t_4.c
DGEMVNKERNEL = dgemv_n_bulldozer.S

View File

@ -51,8 +51,7 @@ float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag,
}
#endif
/* Allocate memory for working array(s) */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,MAX(m,n)) );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
@ -63,8 +62,7 @@ float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag,
res = LAPACKE_clantr_work( matrix_layout, norm, uplo, diag, m, n, a, lda,
work );
/* Release memory and exit */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
LAPACKE_free( work );
}
exit_level_0:

View File

@ -51,8 +51,7 @@ double LAPACKE_dlantr( int matrix_layout, char norm, char uplo, char diag,
}
#endif
/* Allocate memory for working array(s) */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,MAX(m,n)) );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
@ -63,8 +62,7 @@ double LAPACKE_dlantr( int matrix_layout, char norm, char uplo, char diag,
res = LAPACKE_dlantr_work( matrix_layout, norm, uplo, diag, m, n, a, lda,
work );
/* Release memory and exit */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
LAPACKE_free( work );
}
exit_level_0:

View File

@ -38,10 +38,10 @@ double LAPACKE_dlantr_work( int matrix_layout, char norm, char uplo,
const double* a, lapack_int lda, double* work )
{
lapack_int info = 0;
double res = 0.;
double res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
if( info < 0 ) {
info = info - 1;
}

View File

@ -74,11 +74,10 @@ lapack_int LAPACKE_dormbr_work( int matrix_layout, char vect, char side,
}
/* Allocate memory for temporary array(s) */
if( LAPACKE_lsame( vect, 'q' ) ) {
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * k );
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,k) );
} else {
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * nq );
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,nq) );
}
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
@ -89,11 +88,7 @@ lapack_int LAPACKE_dormbr_work( int matrix_layout, char vect, char side,
goto exit_level_1;
}
/* Transpose input matrices */
if( LAPACKE_lsame( vect, 'q' ) ) {
LAPACKE_dge_trans( matrix_layout, nq, k, a, lda, a_t, lda_t );
} else {
LAPACKE_dge_trans( matrix_layout, k, nq, a, lda, a_t, lda_t );
}
LAPACKE_dge_trans( matrix_layout, r, MIN(nq,k), a, lda, a_t, lda_t );
LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */
LAPACK_dormbr( &vect, &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t,

View File

@ -87,12 +87,7 @@ lapack_int LAPACKE_dormlq_work( int matrix_layout, char side, char trans,
goto exit_level_1;
}
/* Transpose input matrices */
if( LAPACKE_lsame( side, 'l' ) ){
LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t );
} else {
LAPACKE_dge_trans( matrix_layout, k, n, a, lda, a_t, lda_t );
}
LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t );
LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */
LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,

View File

@ -51,8 +51,7 @@ float LAPACKE_slantr( int matrix_layout, char norm, char uplo, char diag,
}
#endif
/* Allocate memory for working array(s) */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,MAX(m,n)) );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
@ -63,8 +62,7 @@ float LAPACKE_slantr( int matrix_layout, char norm, char uplo, char diag,
res = LAPACKE_slantr_work( matrix_layout, norm, uplo, diag, m, n, a, lda,
work );
/* Release memory and exit */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
LAPACKE_free( work );
}
exit_level_0:

View File

@ -41,7 +41,7 @@ float LAPACKE_slantr_work( int matrix_layout, char norm, char uplo,
float res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_slantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
res = LAPACK_slantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
if( info < 0 ) {
info = info - 1;
}

View File

@ -73,8 +73,11 @@ lapack_int LAPACKE_sormbr_work( int matrix_layout, char vect, char side,
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (float*)
LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,MIN(nq,k)) );
if( LAPACKE_lsame( vect, 'q' ) ) {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,k) );
} else {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,nq) );
}
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;

View File

@ -72,7 +72,11 @@ lapack_int LAPACKE_sormlq_work( int matrix_layout, char side, char trans,
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) );
if( LAPACKE_lsame( side, 'l' ) ) {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) );
} else {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
}
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;

View File

@ -51,8 +51,7 @@ double LAPACKE_zlantr( int matrix_layout, char norm, char uplo, char diag,
}
#endif
/* Allocate memory for working array(s) */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,MAX(m,n)) );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
@ -63,8 +62,7 @@ double LAPACKE_zlantr( int matrix_layout, char norm, char uplo, char diag,
res = LAPACKE_zlantr_work( matrix_layout, norm, uplo, diag, m, n, a, lda,
work );
/* Release memory and exit */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
LAPACKE_free( work );
}
exit_level_0:

View File

@ -39,7 +39,7 @@ double LAPACKE_zlantr_work( int matrix_layout, char norm, char uplo,
double* work )
{
lapack_int info = 0;
double res = 0.;
double res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
res = LAPACK_zlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );

View File

@ -405,9 +405,9 @@
$ WORK( IWRK ), LWORK-IWRK+1, INFO )
END IF
*
* If INFO > 0 from CHSEQR, then quit
* If INFO .NE. 0 from CHSEQR, then quit
*
IF( INFO.GT.0 )
IF( INFO.NE.0 )
$ GO TO 50
*
IF( WANTVL .OR. WANTVR ) THEN

View File

@ -145,15 +145,33 @@
INTRINSIC ABS, CMPLX, MAX
* ..
* .. Executable Statements ..
*
INFO = 0
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Set constants to control overflow
*
INFO = 0
EPS = SLAMCH( 'P' )
SMLNUM = SLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
CALL SLABAD( SMLNUM, BIGNUM )
*
* Handle the case N=1 by itself
*
IF( N.EQ.1 ) THEN
IPIV( 1 ) = 1
JPIV( 1 ) = 1
IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN
INFO = 1
A( 1, 1 ) = CMPLX( SMLNUM, ZERO )
END IF
RETURN
END IF
*
* Factorize A using complete pivoting.
* Set pivots less than SMIN to SMIN
*

View File

@ -339,16 +339,16 @@
$ LDVL, VR, LDVR, WORK, -1, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
CALL CHGEQZ( 'S', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB,
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK,
$ -1, WORK, IERR )
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1,
$ RWORK, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
ELSE
CALL CGGHD3( 'N', 'N', N, 1, N, A, LDA, B, LDB, VL, LDVL,
$ VR, LDVR, WORK, -1, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
CALL CHGEQZ( 'E', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB,
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK,
$ -1, WORK, IERR )
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1,
$ RWORK, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
END IF
WORK( 1 ) = CMPLX( LWKOPT )

View File

@ -418,9 +418,9 @@
$ WORK( IWRK ), LWORK-IWRK+1, INFO )
END IF
*
* If INFO > 0 from DHSEQR, then quit
* If INFO .NE. 0 from DHSEQR, then quit
*
IF( INFO.GT.0 )
IF( INFO.NE.0 )
$ GO TO 50
*
IF( WANTVL .OR. WANTVR ) THEN

View File

@ -145,15 +145,33 @@
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
INFO = 0
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Set constants to control overflow
*
INFO = 0
EPS = DLAMCH( 'P' )
SMLNUM = DLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Handle the case N=1 by itself
*
IF( N.EQ.1 ) THEN
IPIV( 1 ) = 1
JPIV( 1 ) = 1
IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN
INFO = 1
A( 1, 1 ) = SMLNUM
END IF
RETURN
END IF
*
* Factorize A using complete pivoting.
* Set pivots less than SMIN to SMIN.
*

View File

@ -418,9 +418,9 @@
$ WORK( IWRK ), LWORK-IWRK+1, INFO )
END IF
*
* If INFO > 0 from SHSEQR, then quit
* If INFO .NE. 0 from SHSEQR, then quit
*
IF( INFO.GT.0 )
IF( INFO.NE.0 )
$ GO TO 50
*
IF( WANTVL .OR. WANTVR ) THEN

View File

@ -145,15 +145,33 @@
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
INFO = 0
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Set constants to control overflow
*
INFO = 0
EPS = SLAMCH( 'P' )
SMLNUM = SLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
CALL SLABAD( SMLNUM, BIGNUM )
*
* Handle the case N=1 by itself
*
IF( N.EQ.1 ) THEN
IPIV( 1 ) = 1
JPIV( 1 ) = 1
IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN
INFO = 1
A( 1, 1 ) = SMLNUM
END IF
RETURN
END IF
*
* Factorize A using complete pivoting.
* Set pivots less than SMIN to SMIN.
*

View File

@ -404,9 +404,9 @@
$ WORK( IWRK ), LWORK-IWRK+1, INFO )
END IF
*
* If INFO > 0 from ZHSEQR, then quit
* If INFO .NE. 0 from ZHSEQR, then quit
*
IF( INFO.GT.0 )
IF( INFO.NE.0 )
$ GO TO 50
*
IF( WANTVL .OR. WANTVR ) THEN

View File

@ -145,15 +145,33 @@
INTRINSIC ABS, DCMPLX, MAX
* ..
* .. Executable Statements ..
*
INFO = 0
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Set constants to control overflow
*
INFO = 0
EPS = DLAMCH( 'P' )
SMLNUM = DLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Handle the case N=1 by itself
*
IF( N.EQ.1 ) THEN
IPIV( 1 ) = 1
JPIV( 1 ) = 1
IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN
INFO = 1
A( 1, 1 ) = DCMPLX( SMLNUM, ZERO )
END IF
RETURN
END IF
*
* Factorize A using complete pivoting.
* Set pivots less than SMIN to SMIN
*

View File

@ -340,7 +340,7 @@
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
CALL ZHGEQZ( 'S', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB,
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1,
$ WORK, IERR )
$ RWORK, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
ELSE
CALL ZGGHD3( JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, VL,
@ -348,7 +348,7 @@
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
CALL ZHGEQZ( 'E', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB,
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1,
$ WORK, IERR )
$ RWORK, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
END IF
WORK( 1 ) = DCMPLX( LWKOPT )