Merge pull request #409 from wernsaar/develop

some fixes for Lapack and ARM platform
This commit is contained in:
Zhang Xianyi 2014-07-09 21:11:00 +08:00
commit 2e2473f390
8 changed files with 123 additions and 106 deletions

View File

@ -25,9 +25,20 @@ VERSION = 0.2.10.rc2
# FC = gfortran
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
# cross compiler for Windows
# CC = x86_64-w64-mingw32-gcc
# FC = x86_64-w64-mingw32-gfortran
# cross compiler for 32bit ARM
# CC = arm-linux-gnueabihf-gcc
# FC = arm-linux-gnueabihf-gfortran
# cross compiler for 64bit ARM
# CC = aarch64-linux-gnu-gcc
# FC = aarch64-linux-gnu-gfortran
# If you use the cross compiler, please set this host compiler.
# HOSTCC = gcc

View File

@ -1,5 +1,7 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
CGEMVNKERNEL = ../arm/zgemv_n.c
CGEMVTKERNEL = ../arm/zgemv_t.c
DGEMVNKERNEL = ../arm/gemv_n.c
DGEMVTKERNEL = ../arm/gemv_t.c
@ -96,12 +98,12 @@ ZSWAPKERNEL = swap_vfp.S
# BAD SGEMVNKERNEL = gemv_n_vfp.S
# BAD DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
# CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S
# BAD SGEMVTKERNEL = gemv_t_vfp.S
# BAD DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
# CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S
STRMMKERNEL = strmm_kernel_4x2_vfp.S

View File

@ -1,5 +1,7 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
CGEMVNKERNEL = ../arm/zgemv_n.c
CGEMVTKERNEL = ../arm/zgemv_t.c
#################################################################################
@ -77,12 +79,12 @@ ZSCALKERNEL = zscal.c
# BAD SGEMVNKERNEL = gemv_n_vfp.S
DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
#CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S
# BAD SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
#CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S

View File

@ -321,24 +321,24 @@
*
MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
* Compute space needed for CGEQRF
CALL CGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CGEQRF=DUM(1)
CALL CGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEQRF=CDUM(1)
* Compute space needed for CUNGQR
CALL CUNGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CUNGQR_N=DUM(1)
CALL CUNGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CUNGQR_M=DUM(1)
CALL CUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CUNGQR_N=CDUM(1)
CALL CUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CUNGQR_M=CDUM(1)
* Compute space needed for CGEBRD
CALL CGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_CGEBRD=DUM(1)
CALL CGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEBRD=CDUM(1)
* Compute space needed for CUNGBR
CALL CUNGBR( 'P', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_P=DUM(1)
CALL CUNGBR( 'Q', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_Q=DUM(1)
CALL CUNGBR( 'P', N, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_P=CDUM(1)
CALL CUNGBR( 'Q', N, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_Q=CDUM(1)
*
MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
IF( M.GE.MNTHR ) THEN
@ -444,20 +444,20 @@
*
* Path 10 (M at least N, but not much larger)
*
CALL CGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_CGEBRD=DUM(1)
CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEBRD=CDUM(1)
MAXWRK = 2*N + LWORK_CGEBRD
IF( WNTUS .OR. WNTUO ) THEN
CALL CUNGBR( 'Q', M, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_Q=DUM(1)
CALL CUNGBR( 'Q', M, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_Q=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q )
END IF
IF( WNTUA ) THEN
CALL CUNGBR( 'Q', M, M, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_Q=DUM(1)
CALL CUNGBR( 'Q', M, M, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_Q=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q )
END IF
IF( .NOT.WNTVN ) THEN
@ -471,25 +471,26 @@
*
MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
* Compute space needed for CGELQF
CALL CGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CGELQF=DUM(1)
CALL CGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CGELQF=CDUM(1)
* Compute space needed for CUNGLQ
CALL CUNGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
LWORK_CUNGLQ_N=DUM(1)
CALL CUNGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CUNGLQ_M=DUM(1)
CALL CUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1,
$ IERR )
LWORK_CUNGLQ_N=CDUM(1)
CALL CUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CUNGLQ_M=CDUM(1)
* Compute space needed for CGEBRD
CALL CGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_CGEBRD=DUM(1)
CALL CGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEBRD=CDUM(1)
* Compute space needed for CUNGBR P
CALL CUNGBR( 'P', M, M, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_P=DUM(1)
CALL CUNGBR( 'P', M, M, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_P=CDUM(1)
* Compute space needed for CUNGBR Q
CALL CUNGBR( 'Q', M, M, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_Q=DUM(1)
CALL CUNGBR( 'Q', M, M, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_Q=CDUM(1)
IF( N.GE.MNTHR ) THEN
IF( WNTVN ) THEN
*
@ -593,21 +594,21 @@
*
* Path 10t(N greater than M, but not much larger)
*
CALL CGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_CGEBRD=DUM(1)
CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEBRD=CDUM(1)
MAXWRK = 2*M + LWORK_CGEBRD
IF( WNTVS .OR. WNTVO ) THEN
* Compute space needed for CUNGBR P
CALL CUNGBR( 'P', M, N, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_P=DUM(1)
CALL CUNGBR( 'P', M, N, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_P=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P )
END IF
IF( WNTVA ) THEN
CALL CUNGBR( 'P', N, N, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_P=DUM(1)
CALL CUNGBR( 'P', N, N, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_P=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P )
END IF
IF( .NOT.WNTUN ) THEN

View File

@ -286,7 +286,7 @@
CLANHF = ZERO
RETURN
ELSE IF( N.EQ.1 ) THEN
CLANHF = ABS( A(0) )
CLANHF = ABS(REAL(A(0)))
RETURN
END IF
*

View File

@ -321,24 +321,24 @@
*
MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
* Compute space needed for ZGEQRF
CALL ZGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZGEQRF=DUM(1)
CALL ZGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEQRF=CDUM(1)
* Compute space needed for ZUNGQR
CALL ZUNGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZUNGQR_N=DUM(1)
CALL ZUNGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZUNGQR_M=DUM(1)
CALL ZUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZUNGQR_N=CDUM(1)
CALL ZUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZUNGQR_M=CDUM(1)
* Compute space needed for ZGEBRD
CALL ZGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_ZGEBRD=DUM(1)
CALL ZGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEBRD=CDUM(1)
* Compute space needed for ZUNGBR
CALL ZUNGBR( 'P', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_P=DUM(1)
CALL ZUNGBR( 'Q', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_Q=DUM(1)
CALL ZUNGBR( 'P', N, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_P=CDUM(1)
CALL ZUNGBR( 'Q', N, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_Q=CDUM(1)
*
IF( M.GE.MNTHR ) THEN
IF( WNTUN ) THEN
@ -443,20 +443,20 @@
*
* Path 10 (M at least N, but not much larger)
*
CALL ZGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_ZGEBRD=DUM(1)
CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEBRD=CDUM(1)
MAXWRK = 2*N + LWORK_ZGEBRD
IF( WNTUS .OR. WNTUO ) THEN
CALL ZUNGBR( 'Q', M, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_Q=DUM(1)
CALL ZUNGBR( 'Q', M, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_Q=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q )
END IF
IF( WNTUA ) THEN
CALL ZUNGBR( 'Q', M, M, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_Q=DUM(1)
CALL ZUNGBR( 'Q', M, M, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_Q=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q )
END IF
IF( .NOT.WNTVN ) THEN
@ -470,25 +470,26 @@
*
MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
* Compute space needed for ZGELQF
CALL ZGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZGELQF=DUM(1)
CALL ZGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGELQF=CDUM(1)
* Compute space needed for ZUNGLQ
CALL ZUNGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
LWORK_ZUNGLQ_N=DUM(1)
CALL ZUNGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZUNGLQ_M=DUM(1)
CALL ZUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1,
$ IERR )
LWORK_ZUNGLQ_N=CDUM(1)
CALL ZUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZUNGLQ_M=CDUM(1)
* Compute space needed for ZGEBRD
CALL ZGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_ZGEBRD=DUM(1)
CALL ZGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEBRD=CDUM(1)
* Compute space needed for ZUNGBR P
CALL ZUNGBR( 'P', M, M, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_P=DUM(1)
CALL ZUNGBR( 'P', M, M, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_P=CDUM(1)
* Compute space needed for ZUNGBR Q
CALL ZUNGBR( 'Q', M, M, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_Q=DUM(1)
CALL ZUNGBR( 'Q', M, M, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_Q=CDUM(1)
IF( N.GE.MNTHR ) THEN
IF( WNTVN ) THEN
*
@ -592,21 +593,21 @@
*
* Path 10t(N greater than M, but not much larger)
*
CALL ZGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_ZGEBRD=DUM(1)
CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEBRD=CDUM(1)
MAXWRK = 2*M + LWORK_ZGEBRD
IF( WNTVS .OR. WNTVO ) THEN
* Compute space needed for ZUNGBR P
CALL ZUNGBR( 'P', M, N, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_P=DUM(1)
CALL ZUNGBR( 'P', M, N, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_P=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P )
END IF
IF( WNTVA ) THEN
CALL ZUNGBR( 'P', N, N, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_P=DUM(1)
CALL ZUNGBR( 'P', N, N, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_P=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P )
END IF
IF( .NOT.WNTUN ) THEN

View File

@ -286,7 +286,7 @@
ZLANHF = ZERO
RETURN
ELSE IF( N.EQ.1 ) THEN
ZLANHF = ABS( A(0) )
ZLANHF = ABS(DBLE(A(0)))
RETURN
END IF
*

View File

@ -526,10 +526,10 @@
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M-1) = 2
ISUPPZ(2*M) = 2
ELSE
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M) = 1
END IF
ELSE
ISUPPZ(2*M-1) = 2
@ -550,10 +550,10 @@
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M-1) = 2
ISUPPZ(2*M) = 2
ELSE
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M) = 1
END IF
ELSE
ISUPPZ(2*M-1) = 2