3 /* Table of constant values */
5 static integer c__1 = 1;
6 static real c_b30 = 0.f;
8 /* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer
9 *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer *
10 ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2,
11 real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc,
12 integer *idxq, integer *coltyp, integer *info)
14 /* System generated locals */
15 integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset,
16 vt2_dim1, vt2_offset, i__1;
27 integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4];
28 extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
29 integer *, real *, real *);
31 extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
33 extern doublereal slapy2_(real *, real *), slamch_(char *);
34 extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
35 integer *, integer *, real *, integer *, integer *, integer *);
37 extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
38 integer *, real *, integer *), slaset_(char *, integer *,
39 integer *, real *, real *, real *, integer *);
42 /* -- LAPACK auxiliary routine (version 3.1) -- */
43 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
46 /* .. Scalar Arguments .. */
48 /* .. Array Arguments .. */
54 /* SLASD2 merges the two sets of singular values together into a single */
55 /* sorted set. Then it tries to deflate the size of the problem. */
56 /* There are two ways in which deflation can occur: when two or more */
57 /* singular values are close together or if there is a tiny entry in the */
58 /* Z vector. For each such occurrence the order of the related secular */
59 /* equation problem is reduced by one. */
61 /* SLASD2 is called from SLASD1. */
66 /* NL (input) INTEGER */
67 /* The row dimension of the upper block. NL >= 1. */
69 /* NR (input) INTEGER */
70 /* The row dimension of the lower block. NR >= 1. */
72 /* SQRE (input) INTEGER */
73 /* = 0: the lower block is an NR-by-NR square matrix. */
74 /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */
76 /* The bidiagonal matrix has N = NL + NR + 1 rows and */
77 /* M = N + SQRE >= N columns. */
79 /* K (output) INTEGER */
80 /* Contains the dimension of the non-deflated matrix, */
81 /* This is the order of the related secular equation. 1 <= K <=N. */
83 /* D (input/output) REAL array, dimension (N) */
84 /* On entry D contains the singular values of the two submatrices */
85 /* to be combined. On exit D contains the trailing (N-K) updated */
86 /* singular values (those which were deflated) sorted into */
87 /* increasing order. */
89 /* Z (output) REAL array, dimension (N) */
90 /* On exit Z contains the updating row vector in the secular */
93 /* ALPHA (input) REAL */
94 /* Contains the diagonal element associated with the added row. */
96 /* BETA (input) REAL */
97 /* Contains the off-diagonal element associated with the added */
100 /* U (input/output) REAL array, dimension (LDU,N) */
101 /* On entry U contains the left singular vectors of two */
102 /* submatrices in the two square blocks with corners at (1,1), */
103 /* (NL, NL), and (NL+2, NL+2), (N,N). */
104 /* On exit U contains the trailing (N-K) updated left singular */
105 /* vectors (those which were deflated) in its last N-K columns. */
107 /* LDU (input) INTEGER */
108 /* The leading dimension of the array U. LDU >= N. */
110 /* VT (input/output) REAL array, dimension (LDVT,M) */
111 /* On entry VT' contains the right singular vectors of two */
112 /* submatrices in the two square blocks with corners at (1,1), */
113 /* (NL+1, NL+1), and (NL+2, NL+2), (M,M). */
114 /* On exit VT' contains the trailing (N-K) updated right singular */
115 /* vectors (those which were deflated) in its last N-K columns. */
116 /* In case SQRE =1, the last row of VT spans the right null */
119 /* LDVT (input) INTEGER */
120 /* The leading dimension of the array VT. LDVT >= M. */
122 /* DSIGMA (output) REAL array, dimension (N) */
123 /* Contains a copy of the diagonal elements (K-1 singular values */
124 /* and one zero) in the secular equation. */
126 /* U2 (output) REAL array, dimension (LDU2,N) */
127 /* Contains a copy of the first K-1 left singular vectors which */
128 /* will be used by SLASD3 in a matrix multiply (SGEMM) to solve */
129 /* for the new left singular vectors. U2 is arranged into four */
130 /* blocks. The first block contains a column with 1 at NL+1 and */
131 /* zero everywhere else; the second block contains non-zero */
132 /* entries only at and above NL; the third contains non-zero */
133 /* entries only below NL+1; and the fourth is dense. */
135 /* LDU2 (input) INTEGER */
136 /* The leading dimension of the array U2. LDU2 >= N. */
138 /* VT2 (output) REAL array, dimension (LDVT2,N) */
139 /* VT2' contains a copy of the first K right singular vectors */
140 /* which will be used by SLASD3 in a matrix multiply (SGEMM) to */
141 /* solve for the new right singular vectors. VT2 is arranged into */
142 /* three blocks. The first block contains a row that corresponds */
143 /* to the special 0 diagonal element in SIGMA; the second block */
144 /* contains non-zeros only at and before NL +1; the third block */
145 /* contains non-zeros only at and after NL +2. */
147 /* LDVT2 (input) INTEGER */
148 /* The leading dimension of the array VT2. LDVT2 >= M. */
150 /* IDXP (workspace) INTEGER array, dimension (N) */
151 /* This will contain the permutation used to place deflated */
152 /* values of D at the end of the array. On output IDXP(2:K) */
153 /* points to the nondeflated D-values and IDXP(K+1:N) */
154 /* points to the deflated singular values. */
156 /* IDX (workspace) INTEGER array, dimension (N) */
157 /* This will contain the permutation used to sort the contents of */
158 /* D into ascending order. */
160 /* IDXC (output) INTEGER array, dimension (N) */
161 /* This will contain the permutation used to arrange the columns */
162 /* of the deflated U matrix into three groups: the first group */
163 /* contains non-zero entries only at and above NL, the second */
164 /* contains non-zero entries only below NL+2, and the third is */
167 /* IDXQ (input/output) INTEGER array, dimension (N) */
168 /* This contains the permutation which separately sorts the two */
169 /* sub-problems in D into ascending order. Note that entries in */
170 /* the first hlaf of this permutation must first be moved one */
171 /* position backward; and entries in the second half */
172 /* must first have NL+1 added to their values. */
174 /* COLTYP (workspace/output) INTEGER array, dimension (N) */
175 /* As workspace, this will contain a label which will indicate */
176 /* which of the following types a column in the U2 matrix or a */
177 /* row in the VT2 matrix is: */
178 /* 1 : non-zero in the upper half only */
179 /* 2 : non-zero in the lower half only */
183 /* On exit, it is an array of dimension 4, with COLTYP(I) being */
184 /* the dimension of the I-th type columns. */
186 /* INFO (output) INTEGER */
187 /* = 0: successful exit. */
188 /* < 0: if INFO = -i, the i-th argument had an illegal value. */
190 /* Further Details */
191 /* =============== */
193 /* Based on contributions by */
194 /* Ming Gu and Huan Ren, Computer Science Division, University of */
195 /* California at Berkeley, USA */
197 /* ===================================================================== */
199 /* .. Parameters .. */
201 /* .. Local Arrays .. */
203 /* .. Local Scalars .. */
205 /* .. External Functions .. */
207 /* .. External Subroutines .. */
209 /* .. Intrinsic Functions .. */
211 /* .. Executable Statements .. */
213 /* Test the input parameters. */
215 /* Parameter adjustments */
219 u_offset = 1 + u_dim1;
222 vt_offset = 1 + vt_dim1;
226 u2_offset = 1 + u2_dim1;
229 vt2_offset = 1 + vt2_dim1;
242 } else if (*nr < 1) {
244 } else if (*sqre != 1 && *sqre != 0) {
253 } else if (*ldvt < m) {
255 } else if (*ldu2 < n) {
257 } else if (*ldvt2 < m) {
262 xerbla_("SLASD2", &i__1);
269 /* Generate the first part of the vector Z; and move the singular */
270 /* values in the first part of D one position backward. */
272 z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
274 for (i__ = *nl; i__ >= 1; --i__) {
275 z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
276 d__[i__ + 1] = d__[i__];
277 idxq[i__ + 1] = idxq[i__] + 1;
281 /* Generate the second part of the vector Z. */
284 for (i__ = nlp2; i__ <= i__1; ++i__) {
285 z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
289 /* Initialize some reference arrays. */
292 for (i__ = 2; i__ <= i__1; ++i__) {
297 for (i__ = nlp2; i__ <= i__1; ++i__) {
302 /* Sort the singular values into increasing order */
305 for (i__ = nlp2; i__ <= i__1; ++i__) {
310 /* DSIGMA, IDXC, IDXC, and the first column of U2 */
311 /* are used as storage space. */
314 for (i__ = 2; i__ <= i__1; ++i__) {
315 dsigma[i__] = d__[idxq[i__]];
316 u2[i__ + u2_dim1] = z__[idxq[i__]];
317 idxc[i__] = coltyp[idxq[i__]];
321 slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
324 for (i__ = 2; i__ <= i__1; ++i__) {
326 d__[i__] = dsigma[idxi];
327 z__[i__] = u2[idxi + u2_dim1];
328 coltyp[i__] = idxc[idxi];
332 /* Calculate the allowable deflation tolerance */
334 eps = slamch_("Epsilon");
336 r__1 = dabs(*alpha), r__2 = dabs(*beta);
337 tol = dmax(r__1,r__2);
339 r__2 = (r__1 = d__[n], dabs(r__1));
340 tol = eps * 8.f * dmax(r__2,tol);
342 /* There are 2 kinds of deflation -- first a value in the z-vector */
343 /* is small, second two (or more) singular values are very close */
344 /* together (their difference is small). */
346 /* If the value in the z-vector is small, we simply permute the */
347 /* array so that the corresponding singular value is moved to the */
350 /* If two values in the D-vector are close, we perform a two-sided */
351 /* rotation designed to make one of the corresponding z-vector */
352 /* entries zero, and then permute the array so that the deflated */
353 /* singular value is moved to the end. */
355 /* If there are multiple singular values then the problem deflates. */
356 /* Here the number of equal singular values are found. As each equal */
357 /* singular value is found, an elementary reflector is computed to */
358 /* rotate the corresponding singular subspace so that the */
359 /* corresponding components of Z are zero in this new basis. */
364 for (j = 2; j <= i__1; ++j) {
365 if ((r__1 = z__[j], dabs(r__1)) <= tol) {
367 /* Deflate due to small z component. */
388 if ((r__1 = z__[j], dabs(r__1)) <= tol) {
390 /* Deflate due to small z component. */
397 /* Check if singular values are close enough to allow deflation. */
399 if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) {
401 /* Deflation is possible. */
406 /* Find sqrt(a**2+b**2) without overflow or */
407 /* destructive underflow. */
409 tau = slapy2_(&c__, &s);
415 /* Apply back the Givens rotation to the left and right */
416 /* singular vector matrices. */
418 idxjp = idxq[idx[jprev] + 1];
419 idxj = idxq[idx[j] + 1];
426 srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
428 srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
430 if (coltyp[j] != coltyp[jprev]) {
439 u2[*k + u2_dim1] = z__[jprev];
440 dsigma[*k] = d__[jprev];
448 /* Record the last singular value. */
451 u2[*k + u2_dim1] = z__[jprev];
452 dsigma[*k] = d__[jprev];
457 /* Count up the total number of the various types of columns, then */
458 /* form a permutation which positions the four column types into */
459 /* four groups of uniform structure (although one or more of these */
460 /* groups may be empty). */
462 for (j = 1; j <= 4; ++j) {
467 for (j = 2; j <= i__1; ++j) {
473 /* PSM(*) = Position in SubMatrix (of types 1 through 4) */
476 psm[1] = ctot[0] + 2;
477 psm[2] = psm[1] + ctot[1];
478 psm[3] = psm[2] + ctot[2];
480 /* Fill out the IDXC array so that the permutation which it induces */
481 /* will place all type-1 columns first, all type-2 columns next, */
482 /* then all type-3's, and finally all type-4's, starting from the */
483 /* second column. This applies similarly to the rows of VT. */
486 for (j = 2; j <= i__1; ++j) {
489 idxc[psm[ct - 1]] = j;
494 /* Sort the singular values and corresponding singular vectors into */
495 /* DSIGMA, U2, and VT2 respectively. The singular values/vectors */
496 /* which were not deflated go into the first K slots of DSIGMA, U2, */
497 /* and VT2 respectively, while those which were deflated go into the */
498 /* last N - K slots, except that the first column/row will be treated */
502 for (j = 2; j <= i__1; ++j) {
505 idxj = idxq[idx[idxp[idxc[j]]] + 1];
509 scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
510 scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
514 /* Determine DSIGMA(1), DSIGMA(2) and Z(1) */
518 if (dabs(dsigma[2]) <= hlftol) {
522 z__[1] = slapy2_(&z1, &z__[m]);
532 if (dabs(z1) <= tol) {
539 /* Move the rest of the updating row to Z. */
542 scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
544 /* Determine the first column of U2, the first row of VT2 and the */
545 /* last row of VT. */
547 slaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2);
548 u2[nlp1 + u2_dim1] = 1.f;
551 for (i__ = 1; i__ <= i__1; ++i__) {
552 vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
553 vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
557 for (i__ = nlp2; i__ <= i__1; ++i__) {
558 vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
559 vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
563 scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
566 scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
569 /* The deflated singular values and their corresponding vectors go */
570 /* into the back of D, U, and V respectively. */
574 scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
576 slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
579 slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
583 /* Copy CTOT into COLTYP for referencing in SLASD3. */
585 for (j = 1; j <= 4; ++j) {
586 coltyp[j] = ctot[j - 1];