Import packing improvements in LAPACK xLAQR from Reference-LAPACK PR 480+535

This commit is contained in:
Martin Kroeker
2021-04-30 13:50:55 +02:00
committed by GitHub
parent 3a30c12019
commit 87d2e314db
16 changed files with 1238 additions and 1641 deletions

View File

@@ -277,7 +277,7 @@
* . SLAHQR because of insufficient subdiagonal scratch space.
* . (This is a hard limit.) ====
INTEGER NTINY
PARAMETER ( NTINY = 11 )
PARAMETER ( NTINY = 15 )
*
* ==== Exceptional deflation windows: try to cure rare
* . slow convergence by varying the size of the
@@ -361,22 +361,22 @@
END IF
*
* ==== NWR = recommended deflation window size. At this
* . point, N .GT. NTINY = 11, so there is enough
* . point, N .GT. NTINY = 15, so there is enough
* . subdiagonal workspace for NWR.GE.2 as required.
* . (In fact, there is enough subdiagonal space for
* . NWR.GE.3.) ====
* . NWR.GE.4.) ====
*
NWR = ILAENV( 13, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK )
NWR = MAX( 2, NWR )
NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR )
*
* ==== NSR = recommended number of simultaneous shifts.
* . At this point N .GT. NTINY = 11, so there is at
* . At this point N .GT. NTINY = 15, so there is at
* . enough subdiagonal workspace for NSR to be even
* . and greater than or equal to two as required. ====
*
NSR = ILAENV( 15, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK )
NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO )
NSR = MIN( NSR, ( N-3 ) / 6, IHI-ILO )
NSR = MAX( 2, NSR-MOD( NSR, 2 ) )
*
* ==== Estimate optimal workspace ====
@@ -424,7 +424,7 @@
* ==== NSMAX = the Largest number of simultaneous shifts
* . for which there is sufficient workspace. ====
*
NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 )
NSMAX = MIN( ( N-3 ) / 6, 2*LWORK / 3 )
NSMAX = NSMAX - MOD( NSMAX, 2 )
*
* ==== NDFL: an iteration count restarted at deflation. ====
@@ -575,7 +575,7 @@
*
* ==== Got NS/2 or fewer shifts? Use SLAQR4 or
* . SLAHQR on a trailing principal submatrix to
* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
* . get more. (Since NS.LE.NSMAX.LE.(N-3)/6,
* . there is enough space below the subdiagonal
* . to fit an NS-by-NS scratch array.) ====
*
@@ -697,7 +697,7 @@
* . (NVE-by-KDU) vertical work WV arrow along
* . the left-hand-edge. ====
*
KDU = 3*NS - 3
KDU = 2*NS
KU = N - KDU + 1
KWH = KDU + 1
NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1