Rewrite to use FMA with Householder reflectors

This commit is contained in:
Martin Kroeker 2023-03-20 10:00:42 +01:00 committed by GitHub
parent e1c3c34178
commit 2a83ec1f79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 74 additions and 66 deletions

View File

@ -337,9 +337,9 @@
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, $ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, $ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
$ WR2 $ WABS, WI, WR, WR2
* .. * ..
* .. Local Arrays .. * .. Local Arrays ..
DOUBLE PRECISION V( 3 ) DOUBLE PRECISION V( 3 )
@ -1127,25 +1127,27 @@
H( J+2, J-1 ) = ZERO H( J+2, J-1 ) = ZERO
END IF END IF
* *
T2 = TAU*V( 2 )
T3 = TAU*V( 3 )
DO 230 JC = J, ILASTM DO 230 JC = J, ILASTM
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC ) ) $ H( J+2, JC )
H( J, JC ) = H( J, JC ) - TEMP H( J, JC ) = H( J, JC ) - TEMP*TAU
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC ) ) $ T( J+2, JC )
T( J, JC ) = T( J, JC ) - TEMP2 T( J, JC ) = T( J, JC ) - TEMP2*TAU
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
230 CONTINUE 230 CONTINUE
IF( ILQ ) THEN IF( ILQ ) THEN
DO 240 JR = 1, N DO 240 JR = 1, N
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 ) ) $ Q( JR, J+2 )
Q( JR, J ) = Q( JR, J ) - TEMP Q( JR, J ) = Q( JR, J ) - TEMP*TAU
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
240 CONTINUE 240 CONTINUE
END IF END IF
* *
@ -1233,27 +1235,29 @@
* *
* Apply transformations from the right. * Apply transformations from the right.
* *
T2 = TAU*V(2)
T3 = TAU*V(3)
DO 260 JR = IFRSTM, MIN( J+3, ILAST ) DO 260 JR = IFRSTM, MIN( J+3, ILAST )
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 ) ) $ H( JR, J+2 )
H( JR, J ) = H( JR, J ) - TEMP H( JR, J ) = H( JR, J ) - TEMP*TAU
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
260 CONTINUE 260 CONTINUE
DO 270 JR = IFRSTM, J + 2 DO 270 JR = IFRSTM, J + 2
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 ) ) $ T( JR, J+2 )
T( JR, J ) = T( JR, J ) - TEMP T( JR, J ) = T( JR, J ) - TEMP*TAU
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
270 CONTINUE 270 CONTINUE
IF( ILZ ) THEN IF( ILZ ) THEN
DO 280 JR = 1, N DO 280 JR = 1, N
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 ) ) $ Z( JR, J+2 )
Z( JR, J ) = Z( JR, J ) - TEMP Z( JR, J ) = Z( JR, J ) - TEMP*TAU
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
280 CONTINUE 280 CONTINUE
END IF END IF
T( J+1, J ) = ZERO T( J+1, J ) = ZERO

View File

@ -337,9 +337,9 @@
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, $ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, $ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
$ WR2 $ WABS, WI, WR, WR2
* .. * ..
* .. Local Arrays .. * .. Local Arrays ..
REAL V( 3 ) REAL V( 3 )
@ -1127,25 +1127,27 @@
H( J+2, J-1 ) = ZERO H( J+2, J-1 ) = ZERO
END IF END IF
* *
T2 = TAU * V( 2 )
T3 = TAU * V( 3 )
DO 230 JC = J, ILASTM DO 230 JC = J, ILASTM
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC ) ) $ H( J+2, JC )
H( J, JC ) = H( J, JC ) - TEMP H( J, JC ) = H( J, JC ) - TEMP*TAU
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC ) ) $ T( J+2, JC )
T( J, JC ) = T( J, JC ) - TEMP2 T( J, JC ) = T( J, JC ) - TEMP2*TAU
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
230 CONTINUE 230 CONTINUE
IF( ILQ ) THEN IF( ILQ ) THEN
DO 240 JR = 1, N DO 240 JR = 1, N
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 ) ) $ Q( JR, J+2 )
Q( JR, J ) = Q( JR, J ) - TEMP Q( JR, J ) = Q( JR, J ) - TEMP*TAU
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
240 CONTINUE 240 CONTINUE
END IF END IF
* *
@ -1233,27 +1235,29 @@
* *
* Apply transformations from the right. * Apply transformations from the right.
* *
T2 = TAU*V( 2 )
T3 = TAU*V( 3 )
DO 260 JR = IFRSTM, MIN( J+3, ILAST ) DO 260 JR = IFRSTM, MIN( J+3, ILAST )
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 ) ) $ H( JR, J+2 )
H( JR, J ) = H( JR, J ) - TEMP H( JR, J ) = H( JR, J ) - TEMP*TAU
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
260 CONTINUE 260 CONTINUE
DO 270 JR = IFRSTM, J + 2 DO 270 JR = IFRSTM, J + 2
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 ) ) $ T( JR, J+2 )
T( JR, J ) = T( JR, J ) - TEMP T( JR, J ) = T( JR, J ) - TEMP*TAU
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
270 CONTINUE 270 CONTINUE
IF( ILZ ) THEN IF( ILZ ) THEN
DO 280 JR = 1, N DO 280 JR = 1, N
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 ) ) $ Z( JR, J+2 )
Z( JR, J ) = Z( JR, J ) - TEMP Z( JR, J ) = Z( JR, J ) - TEMP*TAU
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
280 CONTINUE 280 CONTINUE
END IF END IF
T( J+1, J ) = ZERO T( J+1, J ) = ZERO