Rewrite to use FMA with Householder reflectors
This commit is contained in:
parent
e1c3c34178
commit
2a83ec1f79
|
@ -337,9 +337,9 @@
|
||||||
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
|
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
|
||||||
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
|
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
|
||||||
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
|
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
|
||||||
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
|
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
|
||||||
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
|
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
|
||||||
$ WR2
|
$ WABS, WI, WR, WR2
|
||||||
* ..
|
* ..
|
||||||
* .. Local Arrays ..
|
* .. Local Arrays ..
|
||||||
DOUBLE PRECISION V( 3 )
|
DOUBLE PRECISION V( 3 )
|
||||||
|
@ -1127,25 +1127,27 @@
|
||||||
H( J+2, J-1 ) = ZERO
|
H( J+2, J-1 ) = ZERO
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
|
T2 = TAU*V( 2 )
|
||||||
|
T3 = TAU*V( 3 )
|
||||||
DO 230 JC = J, ILASTM
|
DO 230 JC = J, ILASTM
|
||||||
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
|
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
|
||||||
$ H( J+2, JC ) )
|
$ H( J+2, JC )
|
||||||
H( J, JC ) = H( J, JC ) - TEMP
|
H( J, JC ) = H( J, JC ) - TEMP*TAU
|
||||||
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
|
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
|
||||||
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
|
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
|
||||||
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
|
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
|
||||||
$ T( J+2, JC ) )
|
$ T( J+2, JC )
|
||||||
T( J, JC ) = T( J, JC ) - TEMP2
|
T( J, JC ) = T( J, JC ) - TEMP2*TAU
|
||||||
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
|
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
|
||||||
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
|
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
|
||||||
230 CONTINUE
|
230 CONTINUE
|
||||||
IF( ILQ ) THEN
|
IF( ILQ ) THEN
|
||||||
DO 240 JR = 1, N
|
DO 240 JR = 1, N
|
||||||
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
|
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
|
||||||
$ Q( JR, J+2 ) )
|
$ Q( JR, J+2 )
|
||||||
Q( JR, J ) = Q( JR, J ) - TEMP
|
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
|
||||||
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
|
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
|
||||||
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
|
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
|
||||||
240 CONTINUE
|
240 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
|
@ -1233,27 +1235,29 @@
|
||||||
*
|
*
|
||||||
* Apply transformations from the right.
|
* Apply transformations from the right.
|
||||||
*
|
*
|
||||||
|
T2 = TAU*V(2)
|
||||||
|
T3 = TAU*V(3)
|
||||||
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
|
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
|
||||||
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
|
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
|
||||||
$ H( JR, J+2 ) )
|
$ H( JR, J+2 )
|
||||||
H( JR, J ) = H( JR, J ) - TEMP
|
H( JR, J ) = H( JR, J ) - TEMP*TAU
|
||||||
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
|
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
|
||||||
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
|
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
|
||||||
260 CONTINUE
|
260 CONTINUE
|
||||||
DO 270 JR = IFRSTM, J + 2
|
DO 270 JR = IFRSTM, J + 2
|
||||||
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
|
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
|
||||||
$ T( JR, J+2 ) )
|
$ T( JR, J+2 )
|
||||||
T( JR, J ) = T( JR, J ) - TEMP
|
T( JR, J ) = T( JR, J ) - TEMP*TAU
|
||||||
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
|
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
|
||||||
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
|
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
|
||||||
270 CONTINUE
|
270 CONTINUE
|
||||||
IF( ILZ ) THEN
|
IF( ILZ ) THEN
|
||||||
DO 280 JR = 1, N
|
DO 280 JR = 1, N
|
||||||
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
|
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
|
||||||
$ Z( JR, J+2 ) )
|
$ Z( JR, J+2 )
|
||||||
Z( JR, J ) = Z( JR, J ) - TEMP
|
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
|
||||||
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
|
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
|
||||||
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
|
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
|
||||||
280 CONTINUE
|
280 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
T( J+1, J ) = ZERO
|
T( J+1, J ) = ZERO
|
||||||
|
|
|
@ -337,9 +337,9 @@
|
||||||
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
|
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
|
||||||
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
|
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
|
||||||
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
|
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
|
||||||
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
|
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
|
||||||
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
|
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
|
||||||
$ WR2
|
$ WABS, WI, WR, WR2
|
||||||
* ..
|
* ..
|
||||||
* .. Local Arrays ..
|
* .. Local Arrays ..
|
||||||
REAL V( 3 )
|
REAL V( 3 )
|
||||||
|
@ -1127,25 +1127,27 @@
|
||||||
H( J+2, J-1 ) = ZERO
|
H( J+2, J-1 ) = ZERO
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
|
T2 = TAU * V( 2 )
|
||||||
|
T3 = TAU * V( 3 )
|
||||||
DO 230 JC = J, ILASTM
|
DO 230 JC = J, ILASTM
|
||||||
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
|
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
|
||||||
$ H( J+2, JC ) )
|
$ H( J+2, JC )
|
||||||
H( J, JC ) = H( J, JC ) - TEMP
|
H( J, JC ) = H( J, JC ) - TEMP*TAU
|
||||||
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
|
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
|
||||||
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
|
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
|
||||||
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
|
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
|
||||||
$ T( J+2, JC ) )
|
$ T( J+2, JC )
|
||||||
T( J, JC ) = T( J, JC ) - TEMP2
|
T( J, JC ) = T( J, JC ) - TEMP2*TAU
|
||||||
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
|
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
|
||||||
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
|
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
|
||||||
230 CONTINUE
|
230 CONTINUE
|
||||||
IF( ILQ ) THEN
|
IF( ILQ ) THEN
|
||||||
DO 240 JR = 1, N
|
DO 240 JR = 1, N
|
||||||
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
|
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
|
||||||
$ Q( JR, J+2 ) )
|
$ Q( JR, J+2 )
|
||||||
Q( JR, J ) = Q( JR, J ) - TEMP
|
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
|
||||||
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
|
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
|
||||||
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
|
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
|
||||||
240 CONTINUE
|
240 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
|
@ -1233,27 +1235,29 @@
|
||||||
*
|
*
|
||||||
* Apply transformations from the right.
|
* Apply transformations from the right.
|
||||||
*
|
*
|
||||||
|
T2 = TAU*V( 2 )
|
||||||
|
T3 = TAU*V( 3 )
|
||||||
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
|
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
|
||||||
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
|
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
|
||||||
$ H( JR, J+2 ) )
|
$ H( JR, J+2 )
|
||||||
H( JR, J ) = H( JR, J ) - TEMP
|
H( JR, J ) = H( JR, J ) - TEMP*TAU
|
||||||
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
|
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
|
||||||
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
|
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
|
||||||
260 CONTINUE
|
260 CONTINUE
|
||||||
DO 270 JR = IFRSTM, J + 2
|
DO 270 JR = IFRSTM, J + 2
|
||||||
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
|
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
|
||||||
$ T( JR, J+2 ) )
|
$ T( JR, J+2 )
|
||||||
T( JR, J ) = T( JR, J ) - TEMP
|
T( JR, J ) = T( JR, J ) - TEMP*TAU
|
||||||
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
|
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
|
||||||
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
|
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
|
||||||
270 CONTINUE
|
270 CONTINUE
|
||||||
IF( ILZ ) THEN
|
IF( ILZ ) THEN
|
||||||
DO 280 JR = 1, N
|
DO 280 JR = 1, N
|
||||||
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
|
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
|
||||||
$ Z( JR, J+2 ) )
|
$ Z( JR, J+2 )
|
||||||
Z( JR, J ) = Z( JR, J ) - TEMP
|
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
|
||||||
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
|
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
|
||||||
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
|
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
|
||||||
280 CONTINUE
|
280 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
T( J+1, J ) = ZERO
|
T( J+1, J ) = ZERO
|
||||||
|
|
Loading…
Reference in New Issue