Refs #309. Fixed trtri_U single thread computational bug.
This commit is contained in:
parent
6d9d70c55c
commit
73770e60b8
|
@ -60,7 +60,6 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *
|
|||
};
|
||||
#endif
|
||||
|
||||
extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info);
|
||||
|
||||
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
|
||||
|
||||
|
@ -133,18 +132,6 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In
|
|||
if (args.nthreads == 1) {
|
||||
#endif
|
||||
|
||||
#if DOUBLE
|
||||
// double trtri_U single thread error
|
||||
// call dtrtri from lapack for a walk around.
|
||||
if(uplo==0){
|
||||
BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info);
|
||||
#ifndef PPC440
|
||||
blas_memory_free(buffer);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
*Info = (trtri_single[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0);
|
||||
|
||||
#ifdef SMP
|
||||
|
|
|
@ -13,7 +13,6 @@ ZBLASOBJS = ztrtri_UU_single.$(SUFFIX) ztrtri_UN_single.$(SUFFIX) ztrtri_LU_sing
|
|||
|
||||
XBLASOBJS = xtrtri_UU_single.$(SUFFIX) xtrtri_UN_single.$(SUFFIX) xtrtri_LU_single.$(SUFFIX) xtrtri_LN_single.$(SUFFIX)
|
||||
|
||||
DBLASOBJS += dtrtri_lapack.$(SUFFIX)
|
||||
|
||||
ifdef SMP
|
||||
SBLASOBJS += strtri_UU_parallel.$(SUFFIX) strtri_UN_parallel.$(SUFFIX) strtri_LU_parallel.$(SUFFIX) strtri_LN_parallel.$(SUFFIX)
|
||||
|
@ -54,9 +53,6 @@ dtrtri_UU_single.$(SUFFIX) : trtri_U_single.c
|
|||
dtrtri_UN_single.$(SUFFIX) : trtri_U_single.c
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUNIT $< -o $(@F)
|
||||
|
||||
dtrtri_lapack.$(SUFFIX) : dtrtri_lapack.f
|
||||
$(FC) -c $(FFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F)
|
||||
|
||||
dtrtri_LU_single.$(SUFFIX) : trtri_L_single.c
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F)
|
||||
|
||||
|
|
|
@ -1,242 +0,0 @@
|
|||
*> \brief \b DTRTRI
|
||||
*
|
||||
* =========== DOCUMENTATION ===========
|
||||
*
|
||||
* Online html documentation available at
|
||||
* http://www.netlib.org/lapack/explore-html/
|
||||
*
|
||||
*> \htmlonly
|
||||
*> Download DTRTRI + dependencies
|
||||
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrtri.f">
|
||||
*> [TGZ]</a>
|
||||
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrtri.f">
|
||||
*> [ZIP]</a>
|
||||
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrtri.f">
|
||||
*> [TXT]</a>
|
||||
*> \endhtmlonly
|
||||
*
|
||||
* Definition:
|
||||
* ===========
|
||||
*
|
||||
* SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO )
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
* CHARACTER DIAG, UPLO
|
||||
* INTEGER INFO, LDA, N
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
* DOUBLE PRECISION A( LDA, * )
|
||||
* ..
|
||||
*
|
||||
*
|
||||
*> \par Purpose:
|
||||
* =============
|
||||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> DTRTRI computes the inverse of a real upper or lower triangular
|
||||
*> matrix A.
|
||||
*>
|
||||
*> This is the Level 3 BLAS version of the algorithm.
|
||||
*> \endverbatim
|
||||
*
|
||||
* Arguments:
|
||||
* ==========
|
||||
*
|
||||
*> \param[in] UPLO
|
||||
*> \verbatim
|
||||
*> UPLO is CHARACTER*1
|
||||
*> = 'U': A is upper triangular;
|
||||
*> = 'L': A is lower triangular.
|
||||
*> \endverbatim
|
||||
*>
|
||||
*> \param[in] DIAG
|
||||
*> \verbatim
|
||||
*> DIAG is CHARACTER*1
|
||||
*> = 'N': A is non-unit triangular;
|
||||
*> = 'U': A is unit triangular.
|
||||
*> \endverbatim
|
||||
*>
|
||||
*> \param[in] N
|
||||
*> \verbatim
|
||||
*> N is INTEGER
|
||||
*> The order of the matrix A. N >= 0.
|
||||
*> \endverbatim
|
||||
*>
|
||||
*> \param[in,out] A
|
||||
*> \verbatim
|
||||
*> A is DOUBLE PRECISION array, dimension (LDA,N)
|
||||
*> On entry, the triangular matrix A. If UPLO = 'U', the
|
||||
*> leading N-by-N upper triangular part of the array A contains
|
||||
*> the upper triangular matrix, and the strictly lower
|
||||
*> triangular part of A is not referenced. If UPLO = 'L', the
|
||||
*> leading N-by-N lower triangular part of the array A contains
|
||||
*> the lower triangular matrix, and the strictly upper
|
||||
*> triangular part of A is not referenced. If DIAG = 'U', the
|
||||
*> diagonal elements of A are also not referenced and are
|
||||
*> assumed to be 1.
|
||||
*> On exit, the (triangular) inverse of the original matrix, in
|
||||
*> the same storage format.
|
||||
*> \endverbatim
|
||||
*>
|
||||
*> \param[in] LDA
|
||||
*> \verbatim
|
||||
*> LDA is INTEGER
|
||||
*> The leading dimension of the array A. LDA >= max(1,N).
|
||||
*> \endverbatim
|
||||
*>
|
||||
*> \param[out] INFO
|
||||
*> \verbatim
|
||||
*> INFO is INTEGER
|
||||
*> = 0: successful exit
|
||||
*> < 0: if INFO = -i, the i-th argument had an illegal value
|
||||
*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular
|
||||
*> matrix is singular and its inverse can not be computed.
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
* ========
|
||||
*
|
||||
*> \author Univ. of Tennessee
|
||||
*> \author Univ. of California Berkeley
|
||||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*
|
||||
*> \ingroup doubleOTHERcomputational
|
||||
*
|
||||
* =====================================================================
|
||||
SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO )
|
||||
*
|
||||
* -- LAPACK computational routine (version 3.4.0) --
|
||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
CHARACTER DIAG, UPLO
|
||||
INTEGER INFO, LDA, N
|
||||
* ..
|
||||
* .. Array Arguments ..
|
||||
DOUBLE PRECISION A( LDA, * )
|
||||
* ..
|
||||
*
|
||||
* =====================================================================
|
||||
*
|
||||
* .. Parameters ..
|
||||
DOUBLE PRECISION ONE, ZERO
|
||||
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
|
||||
* ..
|
||||
* .. Local Scalars ..
|
||||
LOGICAL NOUNIT, UPPER
|
||||
INTEGER J, JB, NB, NN
|
||||
* ..
|
||||
* .. External Functions ..
|
||||
LOGICAL LSAME
|
||||
INTEGER ILAENV
|
||||
EXTERNAL LSAME, ILAENV
|
||||
* ..
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA
|
||||
* ..
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
* ..
|
||||
* .. Executable Statements ..
|
||||
*
|
||||
* Test the input parameters.
|
||||
*
|
||||
INFO = 0
|
||||
UPPER = LSAME( UPLO, 'U' )
|
||||
NOUNIT = LSAME( DIAG, 'N' )
|
||||
IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
|
||||
INFO = -1
|
||||
ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
|
||||
INFO = -2
|
||||
ELSE IF( N.LT.0 ) THEN
|
||||
INFO = -3
|
||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
||||
INFO = -5
|
||||
END IF
|
||||
IF( INFO.NE.0 ) THEN
|
||||
CALL XERBLA( 'DTRTRI', -INFO )
|
||||
RETURN
|
||||
END IF
|
||||
*
|
||||
* Quick return if possible
|
||||
*
|
||||
IF( N.EQ.0 )
|
||||
$ RETURN
|
||||
*
|
||||
* Check for singularity if non-unit.
|
||||
*
|
||||
IF( NOUNIT ) THEN
|
||||
DO 10 INFO = 1, N
|
||||
IF( A( INFO, INFO ).EQ.ZERO )
|
||||
$ RETURN
|
||||
10 CONTINUE
|
||||
INFO = 0
|
||||
END IF
|
||||
*
|
||||
* Determine the block size for this environment.
|
||||
*
|
||||
NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 )
|
||||
IF( NB.LE.1 .OR. NB.GE.N ) THEN
|
||||
*
|
||||
* Use unblocked code
|
||||
*
|
||||
CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO )
|
||||
ELSE
|
||||
*
|
||||
* Use blocked code
|
||||
*
|
||||
IF( UPPER ) THEN
|
||||
*
|
||||
* Compute inverse of upper triangular matrix
|
||||
*
|
||||
DO 20 J = 1, N, NB
|
||||
JB = MIN( NB, N-J+1 )
|
||||
*
|
||||
* Compute rows 1:j-1 of current block column
|
||||
*
|
||||
CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1,
|
||||
$ JB, ONE, A, LDA, A( 1, J ), LDA )
|
||||
CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1,
|
||||
$ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA )
|
||||
*
|
||||
* Compute inverse of current diagonal block
|
||||
*
|
||||
CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO )
|
||||
20 CONTINUE
|
||||
ELSE
|
||||
*
|
||||
* Compute inverse of lower triangular matrix
|
||||
*
|
||||
NN = ( ( N-1 ) / NB )*NB + 1
|
||||
DO 30 J = NN, 1, -NB
|
||||
JB = MIN( NB, N-J+1 )
|
||||
IF( J+JB.LE.N ) THEN
|
||||
*
|
||||
* Compute rows j+jb:n of current block column
|
||||
*
|
||||
CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG,
|
||||
$ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA,
|
||||
$ A( J+JB, J ), LDA )
|
||||
CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG,
|
||||
$ N-J-JB+1, JB, -ONE, A( J, J ), LDA,
|
||||
$ A( J+JB, J ), LDA )
|
||||
END IF
|
||||
*
|
||||
* Compute inverse of current diagonal block
|
||||
*
|
||||
CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO )
|
||||
30 CONTINUE
|
||||
END IF
|
||||
END IF
|
||||
*
|
||||
RETURN
|
||||
*
|
||||
* End of DTRTRI
|
||||
*
|
||||
END
|
|
@ -127,8 +127,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
if (min_i > GEMM_P) min_i = GEMM_P;
|
||||
|
||||
if (ls == i + bk) {
|
||||
NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
|
||||
//NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
|
||||
GEMM_BETA(min_i, bk, 0, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
|
||||
|
||||
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
|
@ -171,8 +177,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
|
|||
min_i = i - is;
|
||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
||||
|
||||
NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
|
||||
//NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
||||
GEMM_BETA(min_i, bk, 0, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
#endif
|
||||
NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
|
||||
|
||||
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
|
||||
#ifdef COMPLEX
|
||||
ZERO,
|
||||
|
|
Loading…
Reference in New Issue