Improve FMA usage in ?LAQR5 (Reference-LAPACK PR681)

This commit is contained in:
Martin Kroeker 2022-11-20 19:37:28 +01:00 committed by GitHub
parent f63c93274c
commit 6f09e4c121
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 223 additions and 174 deletions

View File

@ -279,7 +279,7 @@
PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 )
* .. * ..
* .. Local Scalars .. * .. Local Scalars ..
COMPLEX ALPHA, BETA, CDUM, REFSUM COMPLEX ALPHA, BETA, CDUM, REFSUM, T1, T2, T3
REAL H11, H12, H21, H22, SAFMAX, SAFMIN, SCL, REAL H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
$ SMLNUM, TST1, TST2, ULP $ SMLNUM, TST1, TST2, ULP
INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN, INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
@ -424,12 +424,12 @@
* ==== Perform update from right within * ==== Perform update from right within
* . computational window. ==== * . computational window. ====
* *
T1 = V( 1, M22 )
T2 = T1*CONJG( V( 2, M22 ) )
DO 30 J = JTOP, MIN( KBOT, K+3 ) DO 30 J = JTOP, MIN( KBOT, K+3 )
REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* REFSUM = H( J, K+1 ) + V( 2, M22 )*H( J, K+2 )
$ H( J, K+2 ) ) H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
H( J, K+1 ) = H( J, K+1 ) - REFSUM H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
H( J, K+2 ) = H( J, K+2 ) -
$ REFSUM*CONJG( V( 2, M22 ) )
30 CONTINUE 30 CONTINUE
* *
* ==== Perform update from left within * ==== Perform update from left within
@ -442,12 +442,13 @@
ELSE ELSE
JBOT = KBOT JBOT = KBOT
END IF END IF
T1 = CONJG( V( 1, M22 ) )
T2 = T1*V( 2, M22 )
DO 40 J = K+1, JBOT DO 40 J = K+1, JBOT
REFSUM = CONJG( V( 1, M22 ) )* REFSUM = H( K+1, J ) +
$ ( H( K+1, J )+CONJG( V( 2, M22 ) )* $ CONJG( V( 2, M22 ) )*H( K+2, J )
$ H( K+2, J ) ) H( K+1, J ) = H( K+1, J ) - REFSUM*T1
H( K+1, J ) = H( K+1, J ) - REFSUM H( K+2, J ) = H( K+2, J ) - REFSUM*T2
H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
40 CONTINUE 40 CONTINUE
* *
* ==== The following convergence test requires that * ==== The following convergence test requires that
@ -610,25 +611,28 @@
* . deflation check. We still delay most of the * . deflation check. We still delay most of the
* . updates from the left for efficiency. ==== * . updates from the left for efficiency. ====
* *
T1 = V( 1, M )
T2 = T1*CONJG( V( 2, M ) )
T3 = T1*CONJG( V( 3, M ) )
DO 70 J = JTOP, MIN( KBOT, K+3 ) DO 70 J = JTOP, MIN( KBOT, K+3 )
REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* REFSUM = H( J, K+1 ) + V( 2, M )*H( J, K+2 )
$ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) $ + V( 3, M )*H( J, K+3 )
H( J, K+1 ) = H( J, K+1 ) - REFSUM H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
H( J, K+2 ) = H( J, K+2 ) - H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
$ REFSUM*CONJG( V( 2, M ) ) H( J, K+3 ) = H( J, K+3 ) - REFSUM*T3
H( J, K+3 ) = H( J, K+3 ) -
$ REFSUM*CONJG( V( 3, M ) )
70 CONTINUE 70 CONTINUE
* *
* ==== Perform update from left for subsequent * ==== Perform update from left for subsequent
* . column. ==== * . column. ====
* *
REFSUM = CONJG( V( 1, M ) )*( H( K+1, K+1 ) T1 = CONJG( V( 1, M ) )
$ +CONJG( V( 2, M ) )*H( K+2, K+1 ) T2 = T1*V( 2, M )
$ +CONJG( V( 3, M ) )*H( K+3, K+1 ) ) T3 = T1*V( 3, M )
H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM REFSUM = H( K+1, K+1 ) + CONJG( V( 2, M ) )*H( K+2, K+1 )
H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*V( 2, M ) $ + CONJG( V( 3, M ) )*H( K+3, K+1 )
H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*V( 3, M ) H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM*T1
H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*T2
H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*T3
* *
* ==== The following convergence test requires that * ==== The following convergence test requires that
* . the tradition small-compared-to-nearby-diagonals * . the tradition small-compared-to-nearby-diagonals
@ -688,13 +692,15 @@
* *
DO 100 M = MBOT, MTOP, -1 DO 100 M = MBOT, MTOP, -1
K = KRCOL + 2*( M-1 ) K = KRCOL + 2*( M-1 )
T1 = CONJG( V( 1, M ) )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT
REFSUM = CONJG( V( 1, M ) )* REFSUM = H( K+1, J ) + CONJG( V( 2, M ) )*
$ ( H( K+1, J )+CONJG( V( 2, M ) )* $ H( K+2, J ) + CONJG( V( 3, M ) )*H( K+3, J )
$ H( K+2, J )+CONJG( V( 3, M ) )*H( K+3, J ) ) H( K+1, J ) = H( K+1, J ) - REFSUM*T1
H( K+1, J ) = H( K+1, J ) - REFSUM H( K+2, J ) = H( K+2, J ) - REFSUM*T2
H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) H( K+3, J ) = H( K+3, J ) - REFSUM*T3
H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
90 CONTINUE 90 CONTINUE
100 CONTINUE 100 CONTINUE
* *
@ -712,14 +718,15 @@
I2 = MAX( 1, KTOP-INCOL ) I2 = MAX( 1, KTOP-INCOL )
I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 ) I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 )
I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 ) I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 )
T1 = V( 1, M )
T2 = T1*CONJG( V( 2, M ) )
T3 = T1*CONJG( V( 3, M ) )
DO 110 J = I2, I4 DO 110 J = I2, I4
REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* REFSUM = U( J, KMS+1 ) + V( 2, M )*U( J, KMS+2 )
$ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) $ + V( 3, M )*U( J, KMS+3 )
U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
U( J, KMS+2 ) = U( J, KMS+2 ) - U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
$ REFSUM*CONJG( V( 2, M ) ) U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*T3
U( J, KMS+3 ) = U( J, KMS+3 ) -
$ REFSUM*CONJG( V( 3, M ) )
110 CONTINUE 110 CONTINUE
120 CONTINUE 120 CONTINUE
ELSE IF( WANTZ ) THEN ELSE IF( WANTZ ) THEN
@ -730,14 +737,15 @@
* *
DO 140 M = MBOT, MTOP, -1 DO 140 M = MBOT, MTOP, -1
K = KRCOL + 2*( M-1 ) K = KRCOL + 2*( M-1 )
T1 = V( 1, M )
T2 = T1*CONJG( V( 2, M ) )
T3 = T1*CONJG( V( 3, M ) )
DO 130 J = ILOZ, IHIZ DO 130 J = ILOZ, IHIZ
REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* REFSUM = Z( J, K+1 ) + V( 2, M )*Z( J, K+2 )
$ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) $ + V( 3, M )*Z( J, K+3 )
Z( J, K+1 ) = Z( J, K+1 ) - REFSUM Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
Z( J, K+2 ) = Z( J, K+2 ) - Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
$ REFSUM*CONJG( V( 2, M ) ) Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*T3
Z( J, K+3 ) = Z( J, K+3 ) -
$ REFSUM*CONJG( V( 3, M ) )
130 CONTINUE 130 CONTINUE
140 CONTINUE 140 CONTINUE
END IF END IF

View File

@ -286,8 +286,8 @@
* .. * ..
* .. Local Scalars .. * .. Local Scalars ..
DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM, DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
$ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2, $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
$ ULP $ T3, TST1, TST2, ULP
INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN, INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
$ JROW, JTOP, K, K1, KDU, KMS, KRCOL, $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
$ M, M22, MBOT, MTOP, NBMPS, NDCOL, $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
@ -447,11 +447,12 @@
* ==== Perform update from right within * ==== Perform update from right within
* . computational window. ==== * . computational window. ====
* *
T1 = V( 1, M22 )
T2 = T1*V( 2, M22 )
DO 30 J = JTOP, MIN( KBOT, K+3 ) DO 30 J = JTOP, MIN( KBOT, K+3 )
REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* REFSUM = H( J, K+1 ) + V( 2, M22 )*H( J, K+2 )
$ H( J, K+2 ) ) H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
H( J, K+1 ) = H( J, K+1 ) - REFSUM H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
30 CONTINUE 30 CONTINUE
* *
* ==== Perform update from left within * ==== Perform update from left within
@ -464,11 +465,12 @@
ELSE ELSE
JBOT = KBOT JBOT = KBOT
END IF END IF
T1 = V( 1, M22 )
T2 = T1*V( 2, M22 )
DO 40 J = K+1, JBOT DO 40 J = K+1, JBOT
REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )* REFSUM = H( K+1, J ) + V( 2, M22 )*H( K+2, J )
$ H( K+2, J ) ) H( K+1, J ) = H( K+1, J ) - REFSUM*T1
H( K+1, J ) = H( K+1, J ) - REFSUM H( K+2, J ) = H( K+2, J ) - REFSUM*T2
H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
40 CONTINUE 40 CONTINUE
* *
* ==== The following convergence test requires that * ==== The following convergence test requires that
@ -522,18 +524,20 @@
* *
IF( ACCUM ) THEN IF( ACCUM ) THEN
KMS = K - INCOL KMS = K - INCOL
T1 = V( 1, M22 )
T2 = T1*V( 2, M22 )
DO 50 J = MAX( 1, KTOP-INCOL ), KDU DO 50 J = MAX( 1, KTOP-INCOL ), KDU
REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ REFSUM = U( J, KMS+1 ) + V( 2, M22 )*U( J, KMS+2 )
$ V( 2, M22 )*U( J, KMS+2 ) ) U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M22 )
50 CONTINUE 50 CONTINUE
ELSE IF( WANTZ ) THEN ELSE IF( WANTZ ) THEN
T1 = V( 1, M22 )
T2 = T1*V( 2, M22 )
DO 60 J = ILOZ, IHIZ DO 60 J = ILOZ, IHIZ
REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* REFSUM = Z( J, K+1 )+V( 2, M22 )*Z( J, K+2 )
$ Z( J, K+2 ) ) Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
Z( J, K+1 ) = Z( J, K+1 ) - REFSUM Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
60 CONTINUE 60 CONTINUE
END IF END IF
END IF END IF
@ -631,22 +635,25 @@
* . deflation check. We still delay most of the * . deflation check. We still delay most of the
* . updates from the left for efficiency. ==== * . updates from the left for efficiency. ====
* *
T1 = V( 1, M )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 70 J = JTOP, MIN( KBOT, K+3 ) DO 70 J = JTOP, MIN( KBOT, K+3 )
REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* REFSUM = H( J, K+1 ) + V( 2, M )*H( J, K+2 )
$ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) $ + V( 3, M )*H( J, K+3 )
H( J, K+1 ) = H( J, K+1 ) - REFSUM H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M ) H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M ) H( J, K+3 ) = H( J, K+3 ) - REFSUM*T3
70 CONTINUE 70 CONTINUE
* *
* ==== Perform update from left for subsequent * ==== Perform update from left for subsequent
* . column. ==== * . column. ====
* *
REFSUM = V( 1, M )*( H( K+1, K+1 )+V( 2, M )* REFSUM = H( K+1, K+1 ) + V( 2, M )*H( K+2, K+1 )
$ H( K+2, K+1 )+V( 3, M )*H( K+3, K+1 ) ) $ + V( 3, M )*H( K+3, K+1 )
H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM*T1
H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*V( 2, M ) H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*T2
H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*V( 3, M ) H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*T3
* *
* ==== The following convergence test requires that * ==== The following convergence test requires that
* . the tradition small-compared-to-nearby-diagonals * . the tradition small-compared-to-nearby-diagonals
@ -706,12 +713,15 @@
* *
DO 100 M = MBOT, MTOP, -1 DO 100 M = MBOT, MTOP, -1
K = KRCOL + 2*( M-1 ) K = KRCOL + 2*( M-1 )
T1 = V( 1, M )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT
REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )* REFSUM = H( K+1, J ) + V( 2, M )*H( K+2, J )
$ H( K+2, J )+V( 3, M )*H( K+3, J ) ) $ + V( 3, M )*H( K+3, J )
H( K+1, J ) = H( K+1, J ) - REFSUM H( K+1, J ) = H( K+1, J ) - REFSUM*T1
H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) H( K+2, J ) = H( K+2, J ) - REFSUM*T2
H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) H( K+3, J ) = H( K+3, J ) - REFSUM*T3
90 CONTINUE 90 CONTINUE
100 CONTINUE 100 CONTINUE
* *
@ -729,12 +739,15 @@
I2 = MAX( 1, KTOP-INCOL ) I2 = MAX( 1, KTOP-INCOL )
I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 ) I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 )
I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 ) I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 )
T1 = V( 1, M )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 110 J = I2, I4 DO 110 J = I2, I4
REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* REFSUM = U( J, KMS+1 ) + V( 2, M )*U( J, KMS+2 )
$ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) $ + V( 3, M )*U( J, KMS+3 )
U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M ) U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M ) U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*T3
110 CONTINUE 110 CONTINUE
120 CONTINUE 120 CONTINUE
ELSE IF( WANTZ ) THEN ELSE IF( WANTZ ) THEN
@ -745,12 +758,15 @@
* *
DO 140 M = MBOT, MTOP, -1 DO 140 M = MBOT, MTOP, -1
K = KRCOL + 2*( M-1 ) K = KRCOL + 2*( M-1 )
T1 = V( 1, M )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 130 J = ILOZ, IHIZ DO 130 J = ILOZ, IHIZ
REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* REFSUM = Z( J, K+1 ) + V( 2, M )*Z( J, K+2 )
$ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) $ + V( 3, M )*Z( J, K+3 )
Z( J, K+1 ) = Z( J, K+1 ) - REFSUM Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M ) Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M ) Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*T3
130 CONTINUE 130 CONTINUE
140 CONTINUE 140 CONTINUE
END IF END IF

View File

@ -286,8 +286,8 @@
* .. * ..
* .. Local Scalars .. * .. Local Scalars ..
REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM, REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
$ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2, $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, T1, T2,
$ ULP $ T3, TST1, TST2, ULP
INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN, INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
$ JROW, JTOP, K, K1, KDU, KMS, KRCOL, $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
$ M, M22, MBOT, MTOP, NBMPS, NDCOL, $ M, M22, MBOT, MTOP, NBMPS, NDCOL,
@ -447,11 +447,12 @@
* ==== Perform update from right within * ==== Perform update from right within
* . computational window. ==== * . computational window. ====
* *
T1 = V( 1, M22 )
T2 = T1*V( 2, M22 )
DO 30 J = JTOP, MIN( KBOT, K+3 ) DO 30 J = JTOP, MIN( KBOT, K+3 )
REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* REFSUM = H( J, K+1 ) + V( 2, M22 )*H( J, K+2 )
$ H( J, K+2 ) ) H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
H( J, K+1 ) = H( J, K+1 ) - REFSUM H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
30 CONTINUE 30 CONTINUE
* *
* ==== Perform update from left within * ==== Perform update from left within
@ -464,11 +465,12 @@
ELSE ELSE
JBOT = KBOT JBOT = KBOT
END IF END IF
T1 = V( 1, M22 )
T2 = T1*V( 2, M22 )
DO 40 J = K+1, JBOT DO 40 J = K+1, JBOT
REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )* REFSUM = H( K+1, J ) + V( 2, M22 )*H( K+2, J )
$ H( K+2, J ) ) H( K+1, J ) = H( K+1, J ) - REFSUM*T1
H( K+1, J ) = H( K+1, J ) - REFSUM H( K+2, J ) = H( K+2, J ) - REFSUM*T2
H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
40 CONTINUE 40 CONTINUE
* *
* ==== The following convergence test requires that * ==== The following convergence test requires that
@ -522,18 +524,20 @@
* *
IF( ACCUM ) THEN IF( ACCUM ) THEN
KMS = K - INCOL KMS = K - INCOL
T1 = V( 1, M22 )
T2 = T1*V( 2, M22 )
DO 50 J = MAX( 1, KTOP-INCOL ), KDU DO 50 J = MAX( 1, KTOP-INCOL ), KDU
REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ REFSUM = U( J, KMS+1 ) + V( 2, M22 )*U( J, KMS+2 )
$ V( 2, M22 )*U( J, KMS+2 ) ) U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M22 )
50 CONTINUE 50 CONTINUE
ELSE IF( WANTZ ) THEN ELSE IF( WANTZ ) THEN
T1 = V( 1, M22 )
T2 = T1*V( 2, M22 )
DO 60 J = ILOZ, IHIZ DO 60 J = ILOZ, IHIZ
REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* REFSUM = Z( J, K+1 )+V( 2, M22 )*Z( J, K+2 )
$ Z( J, K+2 ) ) Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
Z( J, K+1 ) = Z( J, K+1 ) - REFSUM Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
60 CONTINUE 60 CONTINUE
END IF END IF
END IF END IF
@ -631,22 +635,25 @@
* . deflation check. We still delay most of the * . deflation check. We still delay most of the
* . updates from the left for efficiency. ==== * . updates from the left for efficiency. ====
* *
T1 = V( 1, M )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 70 J = JTOP, MIN( KBOT, K+3 ) DO 70 J = JTOP, MIN( KBOT, K+3 )
REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* REFSUM = H( J, K+1 ) + V( 2, M )*H( J, K+2 )
$ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) $ + V( 3, M )*H( J, K+3 )
H( J, K+1 ) = H( J, K+1 ) - REFSUM H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M ) H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M ) H( J, K+3 ) = H( J, K+3 ) - REFSUM*T3
70 CONTINUE 70 CONTINUE
* *
* ==== Perform update from left for subsequent * ==== Perform update from left for subsequent
* . column. ==== * . column. ====
* *
REFSUM = V( 1, M )*( H( K+1, K+1 )+V( 2, M )* REFSUM = H( K+1, K+1 ) + V( 2, M )*H( K+2, K+1 )
$ H( K+2, K+1 )+V( 3, M )*H( K+3, K+1 ) ) $ + V( 3, M )*H( K+3, K+1 )
H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM*T1
H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*V( 2, M ) H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*T2
H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*V( 3, M ) H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*T3
* *
* ==== The following convergence test requires that * ==== The following convergence test requires that
* . the tradition small-compared-to-nearby-diagonals * . the tradition small-compared-to-nearby-diagonals
@ -706,12 +713,15 @@
* *
DO 100 M = MBOT, MTOP, -1 DO 100 M = MBOT, MTOP, -1
K = KRCOL + 2*( M-1 ) K = KRCOL + 2*( M-1 )
T1 = V( 1, M )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT
REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )* REFSUM = H( K+1, J ) + V( 2, M )*H( K+2, J )
$ H( K+2, J )+V( 3, M )*H( K+3, J ) ) $ + V( 3, M )*H( K+3, J )
H( K+1, J ) = H( K+1, J ) - REFSUM H( K+1, J ) = H( K+1, J ) - REFSUM*T1
H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) H( K+2, J ) = H( K+2, J ) - REFSUM*T2
H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) H( K+3, J ) = H( K+3, J ) - REFSUM*T3
90 CONTINUE 90 CONTINUE
100 CONTINUE 100 CONTINUE
* *
@ -729,12 +739,15 @@
I2 = MAX( 1, KTOP-INCOL ) I2 = MAX( 1, KTOP-INCOL )
I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 ) I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 )
I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 ) I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 )
T1 = V( 1, M )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 110 J = I2, I4 DO 110 J = I2, I4
REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* REFSUM = U( J, KMS+1 ) + V( 2, M )*U( J, KMS+2 )
$ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) $ + V( 3, M )*U( J, KMS+3 )
U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M ) U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M ) U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*T3
110 CONTINUE 110 CONTINUE
120 CONTINUE 120 CONTINUE
ELSE IF( WANTZ ) THEN ELSE IF( WANTZ ) THEN
@ -745,12 +758,15 @@
* *
DO 140 M = MBOT, MTOP, -1 DO 140 M = MBOT, MTOP, -1
K = KRCOL + 2*( M-1 ) K = KRCOL + 2*( M-1 )
T1 = V( 1, M )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 130 J = ILOZ, IHIZ DO 130 J = ILOZ, IHIZ
REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* REFSUM = Z( J, K+1 ) + V( 2, M )*Z( J, K+2 )
$ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) $ + V( 3, M )*Z( J, K+3 )
Z( J, K+1 ) = Z( J, K+1 ) - REFSUM Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M ) Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M ) Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*T3
130 CONTINUE 130 CONTINUE
140 CONTINUE 140 CONTINUE
END IF END IF

View File

@ -279,7 +279,7 @@
PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 ) PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 )
* .. * ..
* .. Local Scalars .. * .. Local Scalars ..
COMPLEX*16 ALPHA, BETA, CDUM, REFSUM COMPLEX*16 ALPHA, BETA, CDUM, REFSUM, T1, T2, T3
DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL, DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
$ SMLNUM, TST1, TST2, ULP $ SMLNUM, TST1, TST2, ULP
INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN, INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
@ -424,12 +424,12 @@
* ==== Perform update from right within * ==== Perform update from right within
* . computational window. ==== * . computational window. ====
* *
T1 = V( 1, M22 )
T2 = T1*DCONJG( V( 2, M22 ) )
DO 30 J = JTOP, MIN( KBOT, K+3 ) DO 30 J = JTOP, MIN( KBOT, K+3 )
REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* REFSUM = H( J, K+1 ) + V( 2, M22 )*H( J, K+2 )
$ H( J, K+2 ) ) H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
H( J, K+1 ) = H( J, K+1 ) - REFSUM H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
H( J, K+2 ) = H( J, K+2 ) -
$ REFSUM*DCONJG( V( 2, M22 ) )
30 CONTINUE 30 CONTINUE
* *
* ==== Perform update from left within * ==== Perform update from left within
@ -442,12 +442,13 @@
ELSE ELSE
JBOT = KBOT JBOT = KBOT
END IF END IF
T1 = DCONJG( V( 1, M22 ) )
T2 = T1*V( 2, M22 )
DO 40 J = K+1, JBOT DO 40 J = K+1, JBOT
REFSUM = DCONJG( V( 1, M22 ) )* REFSUM = H( K+1, J ) +
$ ( H( K+1, J )+DCONJG( V( 2, M22 ) )* $ DCONJG( V( 2, M22 ) )*H( K+2, J )
$ H( K+2, J ) ) H( K+1, J ) = H( K+1, J ) - REFSUM*T1
H( K+1, J ) = H( K+1, J ) - REFSUM H( K+2, J ) = H( K+2, J ) - REFSUM*T2
H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
40 CONTINUE 40 CONTINUE
* *
* ==== The following convergence test requires that * ==== The following convergence test requires that
@ -610,25 +611,29 @@
* . deflation check. We still delay most of the * . deflation check. We still delay most of the
* . updates from the left for efficiency. ==== * . updates from the left for efficiency. ====
* *
T1 = V( 1, M )
T2 = T1*DCONJG( V( 2, M ) )
T3 = T1*DCONJG( V( 3, M ) )
DO 70 J = JTOP, MIN( KBOT, K+3 ) DO 70 J = JTOP, MIN( KBOT, K+3 )
REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* REFSUM = H( J, K+1 ) + V( 2, M )*H( J, K+2 )
$ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) $ + V( 3, M )*H( J, K+3 )
H( J, K+1 ) = H( J, K+1 ) - REFSUM H( J, K+1 ) = H( J, K+1 ) - REFSUM*T1
H( J, K+2 ) = H( J, K+2 ) - H( J, K+2 ) = H( J, K+2 ) - REFSUM*T2
$ REFSUM*DCONJG( V( 2, M ) ) H( J, K+3 ) = H( J, K+3 ) - REFSUM*T3
H( J, K+3 ) = H( J, K+3 ) -
$ REFSUM*DCONJG( V( 3, M ) )
70 CONTINUE 70 CONTINUE
* *
* ==== Perform update from left for subsequent * ==== Perform update from left for subsequent
* . column. ==== * . column. ====
* *
REFSUM = DCONJG( V( 1, M ) )*( H( K+1, K+1 ) T1 = DCONJG( V( 1, M ) )
$ +DCONJG( V( 2, M ) )*H( K+2, K+1 ) T2 = T1*V( 2, M )
$ +DCONJG( V( 3, M ) )*H( K+3, K+1 ) ) T3 = T1*V( 3, M )
H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM REFSUM = H( K+1, K+1 )
H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*V( 2, M ) $ + DCONJG( V( 2, M ) )*H( K+2, K+1 )
H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*V( 3, M ) $ + DCONJG( V( 3, M ) )*H( K+3, K+1 )
H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM*T1
H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*T2
H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*T3
* *
* ==== The following convergence test requires that * ==== The following convergence test requires that
* . the tradition small-compared-to-nearby-diagonals * . the tradition small-compared-to-nearby-diagonals
@ -688,13 +693,15 @@
* *
DO 100 M = MBOT, MTOP, -1 DO 100 M = MBOT, MTOP, -1
K = KRCOL + 2*( M-1 ) K = KRCOL + 2*( M-1 )
T1 = DCONJG( V( 1, M ) )
T2 = T1*V( 2, M )
T3 = T1*V( 3, M )
DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT
REFSUM = DCONJG( V( 1, M ) )* REFSUM = H( K+1, J ) + DCONJG( V( 2, M ) )*H( K+2, J )
$ ( H( K+1, J )+DCONJG( V( 2, M ) )* $ + DCONJG( V( 3, M ) )*H( K+3, J )
$ H( K+2, J )+DCONJG( V( 3, M ) )*H( K+3, J ) ) H( K+1, J ) = H( K+1, J ) - REFSUM*T1
H( K+1, J ) = H( K+1, J ) - REFSUM H( K+2, J ) = H( K+2, J ) - REFSUM*T2
H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) H( K+3, J ) = H( K+3, J ) - REFSUM*T3
H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
90 CONTINUE 90 CONTINUE
100 CONTINUE 100 CONTINUE
* *
@ -712,14 +719,15 @@
I2 = MAX( 1, KTOP-INCOL ) I2 = MAX( 1, KTOP-INCOL )
I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 ) I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 )
I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 ) I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 )
T1 = V( 1, M )
T2 = T1*DCONJG( V( 2, M ) )
T3 = T1*DCONJG( V( 3, M ) )
DO 110 J = I2, I4 DO 110 J = I2, I4
REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* REFSUM = U( J, KMS+1 ) + V( 2, M )*U( J, KMS+2 )
$ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) $ + V( 3, M )*U( J, KMS+3 )
U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM*T1
U( J, KMS+2 ) = U( J, KMS+2 ) - U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*T2
$ REFSUM*DCONJG( V( 2, M ) ) U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*T3
U( J, KMS+3 ) = U( J, KMS+3 ) -
$ REFSUM*DCONJG( V( 3, M ) )
110 CONTINUE 110 CONTINUE
120 CONTINUE 120 CONTINUE
ELSE IF( WANTZ ) THEN ELSE IF( WANTZ ) THEN
@ -730,14 +738,15 @@
* *
DO 140 M = MBOT, MTOP, -1 DO 140 M = MBOT, MTOP, -1
K = KRCOL + 2*( M-1 ) K = KRCOL + 2*( M-1 )
T1 = V( 1, M )
T2 = T1*DCONJG( V( 2, M ) )
T3 = T1*DCONJG( V( 3, M ) )
DO 130 J = ILOZ, IHIZ DO 130 J = ILOZ, IHIZ
REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* REFSUM = Z( J, K+1 ) + V( 2, M )*Z( J, K+2 )
$ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) $ + V( 3, M )*Z( J, K+3 )
Z( J, K+1 ) = Z( J, K+1 ) - REFSUM Z( J, K+1 ) = Z( J, K+1 ) - REFSUM*T1
Z( J, K+2 ) = Z( J, K+2 ) - Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*T2
$ REFSUM*DCONJG( V( 2, M ) ) Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*T3
Z( J, K+3 ) = Z( J, K+3 ) -
$ REFSUM*DCONJG( V( 3, M ) )
130 CONTINUE 130 CONTINUE
140 CONTINUE 140 CONTINUE
END IF END IF