Skip to content

Commit e186ae4

Browse files
committed
Remove manipulation of lambda due to missing guard digit
* Rename dlamda -> dlambda * Remove those calls to lamc3 that are a workaround for old Cray machines * Document the purpose of the remaining lamc3 calls * Update documentation where dlambda has become an input rather than input & output Thanks to @langou and Prof Demmel for investigating the purpose of the lamc3 calls and suggesting a solution.
1 parent ba727de commit e186ae4

File tree

18 files changed

+202
-353
lines changed

18 files changed

+202
-353
lines changed

SRC/claed8.f

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Definition:
1919
* ===========
2020
*
21-
* SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
21+
* SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMBDA,
2222
* Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR,
2323
* GIVCOL, GIVNUM, INFO )
2424
*
@@ -29,7 +29,7 @@
2929
* .. Array Arguments ..
3030
* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ),
3131
* $ INDXQ( * ), PERM( * )
32-
* REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ),
32+
* REAL D( * ), DLAMBDA( * ), GIVNUM( 2, * ), W( * ),
3333
* $ Z( * )
3434
* COMPLEX Q( LDQ, * ), Q2( LDQ2, * )
3535
* ..
@@ -122,9 +122,9 @@
122122
*> destroyed during the updating process.
123123
*> \endverbatim
124124
*>
125-
*> \param[out] DLAMDA
125+
*> \param[out] DLAMBDA
126126
*> \verbatim
127-
*> DLAMDA is REAL array, dimension (N)
127+
*> DLAMBDA is REAL array, dimension (N)
128128
*> Contains a copy of the first K eigenvalues which will be used
129129
*> by SLAED3 to form the secular equation.
130130
*> \endverbatim
@@ -222,7 +222,7 @@
222222
*> \ingroup complexOTHERcomputational
223223
*
224224
* =====================================================================
225-
SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
225+
SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMBDA,
226226
$ Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR,
227227
$ GIVCOL, GIVNUM, INFO )
228228
*
@@ -237,7 +237,7 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
237237
* .. Array Arguments ..
238238
INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ),
239239
$ INDXQ( * ), PERM( * )
240-
REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ),
240+
REAL D( * ), DLAMBDA( * ), GIVNUM( 2, * ), W( * ),
241241
$ Z( * )
242242
COMPLEX Q( LDQ, * ), Q2( LDQ2, * )
243243
* ..
@@ -322,14 +322,14 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
322322
INDXQ( I ) = INDXQ( I ) + CUTPNT
323323
20 CONTINUE
324324
DO 30 I = 1, N
325-
DLAMDA( I ) = D( INDXQ( I ) )
325+
DLAMBDA( I ) = D( INDXQ( I ) )
326326
W( I ) = Z( INDXQ( I ) )
327327
30 CONTINUE
328328
I = 1
329329
J = CUTPNT + 1
330-
CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDX )
330+
CALL SLAMRG( N1, N2, DLAMBDA, 1, 1, INDX )
331331
DO 40 I = 1, N
332-
D( I ) = DLAMDA( INDX( I ) )
332+
D( I ) = DLAMBDA( INDX( I ) )
333333
Z( I ) = W( INDX( I ) )
334334
40 CONTINUE
335335
*
@@ -438,7 +438,7 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
438438
ELSE
439439
K = K + 1
440440
W( K ) = Z( JLAM )
441-
DLAMDA( K ) = D( JLAM )
441+
DLAMBDA( K ) = D( JLAM )
442442
INDXP( K ) = JLAM
443443
JLAM = J
444444
END IF
@@ -450,19 +450,19 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
450450
*
451451
K = K + 1
452452
W( K ) = Z( JLAM )
453-
DLAMDA( K ) = D( JLAM )
453+
DLAMBDA( K ) = D( JLAM )
454454
INDXP( K ) = JLAM
455455
*
456456
100 CONTINUE
457457
*
458-
* Sort the eigenvalues and corresponding eigenvectors into DLAMDA
458+
* Sort the eigenvalues and corresponding eigenvectors into DLAMBDA
459459
* and Q2 respectively. The eigenvalues/vectors which were not
460-
* deflated go into the first K slots of DLAMDA and Q2 respectively,
460+
* deflated go into the first K slots of DLAMBDA and Q2 respectively,
461461
* while those which were deflated go into the last N - K slots.
462462
*
463463
DO 110 J = 1, N
464464
JP = INDXP( J )
465-
DLAMDA( J ) = D( JP )
465+
DLAMBDA( J ) = D( JP )
466466
PERM( J ) = INDXQ( INDX( JP ) )
467467
CALL CCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
468468
110 CONTINUE
@@ -471,7 +471,7 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
471471
* into the last N - K slots of D and Q respectively.
472472
*
473473
IF( K.LT.N ) THEN
474-
CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 )
474+
CALL SCOPY( N-K, DLAMBDA( K+1 ), 1, D( K+1 ), 1 )
475475
CALL CLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, Q( 1, K+1 ),
476476
$ LDQ )
477477
END IF

SRC/clals0.f

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,11 @@ SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
392392
$ ( POLES( I, 2 ).EQ.ZERO ) ) THEN
393393
RWORK( I ) = ZERO
394394
ELSE
395+
*
396+
* Use calls to the subroutine SLAMC3 to enforce the
397+
* parentheses (x+y)+z. The goal is to prevent
398+
* optimizing compilers from doing x+(y+z).
399+
*
395400
RWORK( I ) = POLES( I, 2 )*Z( I ) /
396401
$ ( SLAMC3( POLES( I, 2 ), DSIGJ )-
397402
$ DIFLJ ) / ( POLES( I, 2 )+DJ )
@@ -470,6 +475,11 @@ SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
470475
IF( Z( J ).EQ.ZERO ) THEN
471476
RWORK( I ) = ZERO
472477
ELSE
478+
*
479+
* Use calls to the subroutine SLAMC3 to enforce the
480+
* parentheses (x+y)+z. The goal is to prevent optimizing
481+
* compilers from doing x+(y+z).
482+
*
473483
RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1,
474484
$ 2 ) )-DIFR( I, 1 ) ) /
475485
$ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )

SRC/dlaed2.f

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Definition:
1919
* ===========
2020
*
21-
* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
21+
* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMBDA, W,
2222
* Q2, INDX, INDXC, INDXP, COLTYP, INFO )
2323
*
2424
* .. Scalar Arguments ..
@@ -28,7 +28,7 @@
2828
* .. Array Arguments ..
2929
* INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
3030
* $ INDXQ( * )
31-
* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
31+
* DOUBLE PRECISION D( * ), DLAMBDA( * ), Q( LDQ, * ), Q2( * ),
3232
* $ W( * ), Z( * )
3333
* ..
3434
*
@@ -123,9 +123,9 @@
123123
*> process.
124124
*> \endverbatim
125125
*>
126-
*> \param[out] DLAMDA
126+
*> \param[out] DLAMBDA
127127
*> \verbatim
128-
*> DLAMDA is DOUBLE PRECISION array, dimension (N)
128+
*> DLAMBDA is DOUBLE PRECISION array, dimension (N)
129129
*> A copy of the first K eigenvalues which will be used by
130130
*> DLAED3 to form the secular equation.
131131
*> \endverbatim
@@ -148,7 +148,7 @@
148148
*> \param[out] INDX
149149
*> \verbatim
150150
*> INDX is INTEGER array, dimension (N)
151-
*> The permutation used to sort the contents of DLAMDA into
151+
*> The permutation used to sort the contents of DLAMBDA into
152152
*> ascending order.
153153
*> \endverbatim
154154
*>
@@ -207,7 +207,7 @@
207207
*> Modified by Francoise Tisseur, University of Tennessee
208208
*>
209209
* =====================================================================
210-
SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
210+
SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMBDA, W,
211211
$ Q2, INDX, INDXC, INDXP, COLTYP, INFO )
212212
*
213213
* -- LAPACK computational routine --
@@ -221,7 +221,7 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
221221
* .. Array Arguments ..
222222
INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
223223
$ INDXQ( * )
224-
DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
224+
DOUBLE PRECISION D( * ), DLAMBDA( * ), Q( LDQ, * ), Q2( * ),
225225
$ W( * ), Z( * )
226226
* ..
227227
*
@@ -300,9 +300,9 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
300300
* re-integrate the deflated parts from the last pass
301301
*
302302
DO 20 I = 1, N
303-
DLAMDA( I ) = D( INDXQ( I ) )
303+
DLAMBDA( I ) = D( INDXQ( I ) )
304304
20 CONTINUE
305-
CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
305+
CALL DLAMRG( N1, N2, DLAMBDA, 1, 1, INDXC )
306306
DO 30 I = 1, N
307307
INDX( I ) = INDXQ( INDXC( I ) )
308308
30 CONTINUE
@@ -324,11 +324,11 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
324324
DO 40 J = 1, N
325325
I = INDX( J )
326326
CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
327-
DLAMDA( J ) = D( I )
327+
DLAMBDA( J ) = D( I )
328328
IQ2 = IQ2 + N
329329
40 CONTINUE
330330
CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
331-
CALL DCOPY( N, DLAMDA, 1, D, 1 )
331+
CALL DCOPY( N, DLAMBDA, 1, D, 1 )
332332
GO TO 190
333333
END IF
334334
*
@@ -421,7 +421,7 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
421421
PJ = NJ
422422
ELSE
423423
K = K + 1
424-
DLAMDA( K ) = D( PJ )
424+
DLAMBDA( K ) = D( PJ )
425425
W( K ) = Z( PJ )
426426
INDXP( K ) = PJ
427427
PJ = NJ
@@ -433,7 +433,7 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
433433
* Record the last eigenvalue.
434434
*
435435
K = K + 1
436-
DLAMDA( K ) = D( PJ )
436+
DLAMBDA( K ) = D( PJ )
437437
W( K ) = Z( PJ )
438438
INDXP( K ) = PJ
439439
*
@@ -470,9 +470,9 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
470470
PSM( CT ) = PSM( CT ) + 1
471471
130 CONTINUE
472472
*
473-
* Sort the eigenvalues and corresponding eigenvectors into DLAMDA
473+
* Sort the eigenvalues and corresponding eigenvectors into DLAMBDA
474474
* and Q2 respectively. The eigenvalues/vectors which were not
475-
* deflated go into the first K slots of DLAMDA and Q2 respectively,
475+
* deflated go into the first K slots of DLAMBDA and Q2 respectively,
476476
* while those which were deflated go into the last N - K slots.
477477
*
478478
I = 1

SRC/dlaed3.f

Lines changed: 12 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Definition:
1919
* ===========
2020
*
21-
* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
21+
* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMBDA, Q2, INDX,
2222
* CTOT, W, S, INFO )
2323
*
2424
* .. Scalar Arguments ..
@@ -27,7 +27,7 @@
2727
* ..
2828
* .. Array Arguments ..
2929
* INTEGER CTOT( * ), INDX( * )
30-
* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
30+
* DOUBLE PRECISION D( * ), DLAMBDA( * ), Q( LDQ, * ), Q2( * ),
3131
* $ S( * ), W( * )
3232
* ..
3333
*
@@ -44,12 +44,6 @@
4444
*> being combined by the matrix of eigenvectors of the K-by-K system
4545
*> which is solved here.
4646
*>
47-
*> This code makes very mild assumptions about floating point
48-
*> arithmetic. It will work on machines with a guard digit in
49-
*> add/subtract, or on those binary machines without guard digits
50-
*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
51-
*> It could conceivably fail on hexadecimal or decimal machines
52-
*> without guard digits, but we know of none.
5347
*> \endverbatim
5448
*
5549
* Arguments:
@@ -104,14 +98,12 @@
10498
*> RHO >= 0 required.
10599
*> \endverbatim
106100
*>
107-
*> \param[in,out] DLAMDA
101+
*> \param[in] DLAMBDA
108102
*> \verbatim
109-
*> DLAMDA is DOUBLE PRECISION array, dimension (K)
103+
*> DLAMBDA is DOUBLE PRECISION array, dimension (K)
110104
*> The first K elements of this array contain the old roots
111105
*> of the deflated updating problem. These are the poles
112-
*> of the secular equation. May be changed on output by
113-
*> having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
114-
*> Cray-2, or Cray C-90, as described above.
106+
*> of the secular equation.
115107
*> \endverbatim
116108
*>
117109
*> \param[in] Q2
@@ -180,7 +172,7 @@
180172
*> Modified by Francoise Tisseur, University of Tennessee
181173
*>
182174
* =====================================================================
183-
SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
175+
SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMBDA, Q2, INDX,
184176
$ CTOT, W, S, INFO )
185177
*
186178
* -- LAPACK computational routine --
@@ -193,7 +185,7 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
193185
* ..
194186
* .. Array Arguments ..
195187
INTEGER CTOT( * ), INDX( * )
196-
DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
188+
DOUBLE PRECISION D( * ), DLAMBDA( * ), Q( LDQ, * ), Q2( * ),
197189
$ S( * ), W( * )
198190
* ..
199191
*
@@ -208,8 +200,8 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
208200
DOUBLE PRECISION TEMP
209201
* ..
210202
* .. External Functions ..
211-
DOUBLE PRECISION DLAMC3, DNRM2
212-
EXTERNAL DLAMC3, DNRM2
203+
DOUBLE PRECISION DNRM2
204+
EXTERNAL DNRM2
213205
* ..
214206
* .. External Subroutines ..
215207
EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
@@ -240,29 +232,9 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
240232
IF( K.EQ.0 )
241233
$ RETURN
242234
*
243-
* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
244-
* be computed with high relative accuracy (barring over/underflow).
245-
* This is a problem on machines without a guard digit in
246-
* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
247-
* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
248-
* which on any of these machines zeros out the bottommost
249-
* bit of DLAMDA(I) if it is 1; this makes the subsequent
250-
* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
251-
* occurs. On binary machines with a guard digit (almost all
252-
* machines) it does not change DLAMDA(I) at all. On hexadecimal
253-
* and decimal machines with a guard digit, it slightly
254-
* changes the bottommost bits of DLAMDA(I). It does not account
255-
* for hexadecimal or decimal machines without guard digits
256-
* (we know of none). We use a subroutine call to compute
257-
* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating
258-
* this code.
259-
*
260-
DO 10 I = 1, K
261-
DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
262-
10 CONTINUE
263235
*
264236
DO 20 J = 1, K
265-
CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
237+
CALL DLAED4( K, J, DLAMBDA, W, Q( 1, J ), RHO, D( J ), INFO )
266238
*
267239
* If the zero finder fails, the computation is terminated.
268240
*
@@ -293,10 +265,10 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
293265
CALL DCOPY( K, Q, LDQ+1, W, 1 )
294266
DO 60 J = 1, K
295267
DO 40 I = 1, J - 1
296-
W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
268+
W( I ) = W( I )*( Q( I, J )/( DLAMBDA( I )-DLAMBDA( J ) ) )
297269
40 CONTINUE
298270
DO 50 I = J + 1, K
299-
W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
271+
W( I ) = W( I )*( Q( I, J )/( DLAMBDA( I )-DLAMBDA( J ) ) )
300272
50 CONTINUE
301273
60 CONTINUE
302274
DO 70 I = 1, K

0 commit comments

Comments
 (0)