git.maemo.org Git - opencv/blob - 3rdparty/lapack/dlaed2.c

   1 #include "clapack.h"
   2
   3 /* Table of constant values */
   4
   5 static doublereal c_b3 = -1.;
   6 static integer c__1 = 1;
   7
   8 /* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal *
   9         d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho,
  10         doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2,
  11         integer *indx, integer *indxc, integer *indxp, integer *coltyp,
  12         integer *info)
  13 {
  14     /* System generated locals */
  15     integer q_dim1, q_offset, i__1, i__2;
  16     doublereal d__1, d__2, d__3, d__4;
  17
  18     /* Builtin functions */
  19     double sqrt(doublereal);
  20
  21     /* Local variables */
  22     doublereal c__;
  23     integer i__, j;
  24     doublereal s, t;
  25     integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
  26     doublereal eps, tau, tol;
  27     integer psm[4], imax, jmax;
  28     extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
  29             doublereal *, integer *, doublereal *, doublereal *);
  30     integer ctot[4];
  31     extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
  32             integer *), dcopy_(integer *, doublereal *, integer *, doublereal
  33             *, integer *);
  34     extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *);
  35     extern integer idamax_(integer *, doublereal *, integer *);
  36     extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
  37             integer *, integer *, integer *), dlacpy_(char *, integer *,
  38             integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
  39
  40
  41 /*  -- LAPACK routine (version 3.1) -- */
  42 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  43 /*     November 2006 */
  44
  45 /*     .. Scalar Arguments .. */
  46 /*     .. */
  47 /*     .. Array Arguments .. */
  48 /*     .. */
  49
  50 /*  Purpose */
  51 /*  ======= */
  52
  53 /*  DLAED2 merges the two sets of eigenvalues together into a single */
  54 /*  sorted set.  Then it tries to deflate the size of the problem. */
  55 /*  There are two ways in which deflation can occur:  when two or more */
  56 /*  eigenvalues are close together or if there is a tiny entry in the */
  57 /*  Z vector.  For each such occurrence the order of the related secular */
  58 /*  equation problem is reduced by one. */
  59
  60 /*  Arguments */
  61 /*  ========= */
  62
  63 /*  K      (output) INTEGER */
  64 /*         The number of non-deflated eigenvalues, and the order of the */
  65 /*         related secular equation. 0 <= K <=N. */
  66
  67 /*  N      (input) INTEGER */
  68 /*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */
  69
  70 /*  N1     (input) INTEGER */
  71 /*         The location of the last eigenvalue in the leading sub-matrix. */
  72 /*         min(1,N) <= N1 <= N/2. */
  73
  74 /*  D      (input/output) DOUBLE PRECISION array, dimension (N) */
  75 /*         On entry, D contains the eigenvalues of the two submatrices to */
  76 /*         be combined. */
  77 /*         On exit, D contains the trailing (N-K) updated eigenvalues */
  78 /*         (those which were deflated) sorted into increasing order. */
  79
  80 /*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */
  81 /*         On entry, Q contains the eigenvectors of two submatrices in */
  82 /*         the two square blocks with corners at (1,1), (N1,N1) */
  83 /*         and (N1+1, N1+1), (N,N). */
  84 /*         On exit, Q contains the trailing (N-K) updated eigenvectors */
  85 /*         (those which were deflated) in its last N-K columns. */
  86
  87 /*  LDQ    (input) INTEGER */
  88 /*         The leading dimension of the array Q.  LDQ >= max(1,N). */
  89
  90 /*  INDXQ  (input/output) INTEGER array, dimension (N) */
  91 /*         The permutation which separately sorts the two sub-problems */
  92 /*         in D into ascending order.  Note that elements in the second */
  93 /*         half of this permutation must first have N1 added to their */
  94 /*         values. Destroyed on exit. */
  95
  96 /*  RHO    (input/output) DOUBLE PRECISION */
  97 /*         On entry, the off-diagonal element associated with the rank-1 */
  98 /*         cut which originally split the two submatrices which are now */
  99 /*         being recombined. */
 100 /*         On exit, RHO has been modified to the value required by */
 101 /*         DLAED3. */
 102
 103 /*  Z      (input) DOUBLE PRECISION array, dimension (N) */
 104 /*         On entry, Z contains the updating vector (the last */
 105 /*         row of the first sub-eigenvector matrix and the first row of */
 106 /*         the second sub-eigenvector matrix). */
 107 /*         On exit, the contents of Z have been destroyed by the updating */
 108 /*         process. */
 109
 110 /*  DLAMDA (output) DOUBLE PRECISION array, dimension (N) */
 111 /*         A copy of the first K eigenvalues which will be used by */
 112 /*         DLAED3 to form the secular equation. */
 113
 114 /*  W      (output) DOUBLE PRECISION array, dimension (N) */
 115 /*         The first k values of the final deflation-altered z-vector */
 116 /*         which will be passed to DLAED3. */
 117
 118 /*  Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) */
 119 /*         A copy of the first K eigenvectors which will be used by */
 120 /*         DLAED3 in a matrix multiply (DGEMM) to solve for the new */
 121 /*         eigenvectors. */
 122
 123 /*  INDX   (workspace) INTEGER array, dimension (N) */
 124 /*         The permutation used to sort the contents of DLAMDA into */
 125 /*         ascending order. */
 126
 127 /*  INDXC  (output) INTEGER array, dimension (N) */
 128 /*         The permutation used to arrange the columns of the deflated */
 129 /*         Q matrix into three groups:  the first group contains non-zero */
 130 /*         elements only at and above N1, the second contains */
 131 /*         non-zero elements only below N1, and the third is dense. */
 132
 133 /*  INDXP  (workspace) INTEGER array, dimension (N) */
 134 /*         The permutation used to place deflated values of D at the end */
 135 /*         of the array.  INDXP(1:K) points to the nondeflated D-values */
 136 /*         and INDXP(K+1:N) points to the deflated eigenvalues. */
 137
 138 /*  COLTYP (workspace/output) INTEGER array, dimension (N) */
 139 /*         During execution, a label which will indicate which of the */
 140 /*         following types a column in the Q2 matrix is: */
 141 /*         1 : non-zero in the upper half only; */
 142 /*         2 : dense; */
 143 /*         3 : non-zero in the lower half only; */
 144 /*         4 : deflated. */
 145 /*         On exit, COLTYP(i) is the number of columns of type i, */
 146 /*         for i=1 to 4 only. */
 147
 148 /*  INFO   (output) INTEGER */
 149 /*          = 0:  successful exit. */
 150 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 151
 152 /*  Further Details */
 153 /*  =============== */
 154
 155 /*  Based on contributions by */
 156 /*     Jeff Rutter, Computer Science Division, University of California */
 157 /*     at Berkeley, USA */
 158 /*  Modified by Francoise Tisseur, University of Tennessee. */
 159
 160 /*  ===================================================================== */
 161
 162 /*     .. Parameters .. */
 163 /*     .. */
 164 /*     .. Local Arrays .. */
 165 /*     .. */
 166 /*     .. Local Scalars .. */
 167 /*     .. */
 168 /*     .. External Functions .. */
 169 /*     .. */
 170 /*     .. External Subroutines .. */
 171 /*     .. */
 172 /*     .. Intrinsic Functions .. */
 173 /*     .. */
 174 /*     .. Executable Statements .. */
 175
 176 /*     Test the input parameters. */
 177
 178     /* Parameter adjustments */
 179     --d__;
 180     q_dim1 = *ldq;
 181     q_offset = 1 + q_dim1;
 182     q -= q_offset;
 183     --indxq;
 184     --z__;
 185     --dlamda;
 186     --w;
 187     --q2;
 188     --indx;
 189     --indxc;
 190     --indxp;
 191     --coltyp;
 192
 193     /* Function Body */
 194     *info = 0;
 195
 196     if (*n < 0) {
 197         *info = -2;
 198     } else if (*ldq < max(1,*n)) {
 199         *info = -6;
 200     } else /* if(complicated condition) */ {
 201 /* Computing MIN */
 202         i__1 = 1, i__2 = *n / 2;
 203         if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
 204             *info = -3;
 205         }
 206     }
 207     if (*info != 0) {
 208         i__1 = -(*info);
 209         xerbla_("DLAED2", &i__1);
 210         return 0;
 211     }
 212
 213 /*     Quick return if possible */
 214
 215     if (*n == 0) {
 216         return 0;
 217     }
 218
 219     n2 = *n - *n1;
 220     n1p1 = *n1 + 1;
 221
 222     if (*rho < 0.) {
 223         dscal_(&n2, &c_b3, &z__[n1p1], &c__1);
 224     }
 225
 226 /*     Normalize z so that norm(z) = 1.  Since z is the concatenation of */
 227 /*     two normalized vectors, norm2(z) = sqrt(2). */
 228
 229     t = 1. / sqrt(2.);
 230     dscal_(n, &t, &z__[1], &c__1);
 231
 232 /*     RHO = ABS( norm(z)**2 * RHO ) */
 233
 234     *rho = (d__1 = *rho * 2., abs(d__1));
 235
 236 /*     Sort the eigenvalues into increasing order */
 237
 238     i__1 = *n;
 239     for (i__ = n1p1; i__ <= i__1; ++i__) {
 240         indxq[i__] += *n1;
 241 /* L10: */
 242     }
 243
 244 /*     re-integrate the deflated parts from the last pass */
 245
 246     i__1 = *n;
 247     for (i__ = 1; i__ <= i__1; ++i__) {
 248         dlamda[i__] = d__[indxq[i__]];
 249 /* L20: */
 250     }
 251     dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
 252     i__1 = *n;
 253     for (i__ = 1; i__ <= i__1; ++i__) {
 254         indx[i__] = indxq[indxc[i__]];
 255 /* L30: */
 256     }
 257
 258 /*     Calculate the allowable deflation tolerance */
 259
 260     imax = idamax_(n, &z__[1], &c__1);
 261     jmax = idamax_(n, &d__[1], &c__1);
 262     eps = dlamch_("Epsilon");
 263 /* Computing MAX */
 264     d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2))
 265             ;
 266     tol = eps * 8. * max(d__3,d__4);
 267
 268 /*     If the rank-1 modifier is small enough, no more needs to be done */
 269 /*     except to reorganize Q so that its columns correspond with the */
 270 /*     elements in D. */
 271
 272     if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
 273         *k = 0;
 274         iq2 = 1;
 275         i__1 = *n;
 276         for (j = 1; j <= i__1; ++j) {
 277             i__ = indx[j];
 278             dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
 279             dlamda[j] = d__[i__];
 280             iq2 += *n;
 281 /* L40: */
 282         }
 283         dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
 284         dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
 285         goto L190;
 286     }
 287
 288 /*     If there are multiple eigenvalues then the problem deflates.  Here */
 289 /*     the number of equal eigenvalues are found.  As each equal */
 290 /*     eigenvalue is found, an elementary reflector is computed to rotate */
 291 /*     the corresponding eigensubspace so that the corresponding */
 292 /*     components of Z are zero in this new basis. */
 293
 294     i__1 = *n1;
 295     for (i__ = 1; i__ <= i__1; ++i__) {
 296         coltyp[i__] = 1;
 297 /* L50: */
 298     }
 299     i__1 = *n;
 300     for (i__ = n1p1; i__ <= i__1; ++i__) {
 301         coltyp[i__] = 3;
 302 /* L60: */
 303     }
 304
 305
 306     *k = 0;
 307     k2 = *n + 1;
 308     i__1 = *n;
 309     for (j = 1; j <= i__1; ++j) {
 310         nj = indx[j];
 311         if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
 312
 313 /*           Deflate due to small z component. */
 314
 315             --k2;
 316             coltyp[nj] = 4;
 317             indxp[k2] = nj;
 318             if (j == *n) {
 319                 goto L100;
 320             }
 321         } else {
 322             pj = nj;
 323             goto L80;
 324         }
 325 /* L70: */
 326     }
 327 L80:
 328     ++j;
 329     nj = indx[j];
 330     if (j > *n) {
 331         goto L100;
 332     }
 333     if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
 334
 335 /*        Deflate due to small z component. */
 336
 337         --k2;
 338         coltyp[nj] = 4;
 339         indxp[k2] = nj;
 340     } else {
 341
 342 /*        Check if eigenvalues are close enough to allow deflation. */
 343
 344         s = z__[pj];
 345         c__ = z__[nj];
 346
 347 /*        Find sqrt(a**2+b**2) without overflow or */
 348 /*        destructive underflow. */
 349
 350         tau = dlapy2_(&c__, &s);
 351         t = d__[nj] - d__[pj];
 352         c__ /= tau;
 353         s = -s / tau;
 354         if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
 355
 356 /*           Deflation is possible. */
 357
 358             z__[nj] = tau;
 359             z__[pj] = 0.;
 360             if (coltyp[nj] != coltyp[pj]) {
 361                 coltyp[nj] = 2;
 362             }
 363             coltyp[pj] = 4;
 364             drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
 365                     c__, &s);
 366 /* Computing 2nd power */
 367             d__1 = c__;
 368 /* Computing 2nd power */
 369             d__2 = s;
 370             t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
 371 /* Computing 2nd power */
 372             d__1 = s;
 373 /* Computing 2nd power */
 374             d__2 = c__;
 375             d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
 376             d__[pj] = t;
 377             --k2;
 378             i__ = 1;
 379 L90:
 380             if (k2 + i__ <= *n) {
 381                 if (d__[pj] < d__[indxp[k2 + i__]]) {
 382                     indxp[k2 + i__ - 1] = indxp[k2 + i__];
 383                     indxp[k2 + i__] = pj;
 384                     ++i__;
 385                     goto L90;
 386                 } else {
 387                     indxp[k2 + i__ - 1] = pj;
 388                 }
 389             } else {
 390                 indxp[k2 + i__ - 1] = pj;
 391             }
 392             pj = nj;
 393         } else {
 394             ++(*k);
 395             dlamda[*k] = d__[pj];
 396             w[*k] = z__[pj];
 397             indxp[*k] = pj;
 398             pj = nj;
 399         }
 400     }
 401     goto L80;
 402 L100:
 403
 404 /*     Record the last eigenvalue. */
 405
 406     ++(*k);
 407     dlamda[*k] = d__[pj];
 408     w[*k] = z__[pj];
 409     indxp[*k] = pj;
 410
 411 /*     Count up the total number of the various types of columns, then */
 412 /*     form a permutation which positions the four column types into */
 413 /*     four uniform groups (although one or more of these groups may be */
 414 /*     empty). */
 415
 416     for (j = 1; j <= 4; ++j) {
 417         ctot[j - 1] = 0;
 418 /* L110: */
 419     }
 420     i__1 = *n;
 421     for (j = 1; j <= i__1; ++j) {
 422         ct = coltyp[j];
 423         ++ctot[ct - 1];
 424 /* L120: */
 425     }
 426
 427 /*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
 428
 429     psm[0] = 1;
 430     psm[1] = ctot[0] + 1;
 431     psm[2] = psm[1] + ctot[1];
 432     psm[3] = psm[2] + ctot[2];
 433     *k = *n - ctot[3];
 434
 435 /*     Fill out the INDXC array so that the permutation which it induces */
 436 /*     will place all type-1 columns first, all type-2 columns next, */
 437 /*     then all type-3's, and finally all type-4's. */
 438
 439     i__1 = *n;
 440     for (j = 1; j <= i__1; ++j) {
 441         js = indxp[j];
 442         ct = coltyp[js];
 443         indx[psm[ct - 1]] = js;
 444         indxc[psm[ct - 1]] = j;
 445         ++psm[ct - 1];
 446 /* L130: */
 447     }
 448
 449 /*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
 450 /*     and Q2 respectively.  The eigenvalues/vectors which were not */
 451 /*     deflated go into the first K slots of DLAMDA and Q2 respectively, */
 452 /*     while those which were deflated go into the last N - K slots. */
 453
 454     i__ = 1;
 455     iq1 = 1;
 456     iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
 457     i__1 = ctot[0];
 458     for (j = 1; j <= i__1; ++j) {
 459         js = indx[i__];
 460         dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
 461         z__[i__] = d__[js];
 462         ++i__;
 463         iq1 += *n1;
 464 /* L140: */
 465     }
 466
 467     i__1 = ctot[1];
 468     for (j = 1; j <= i__1; ++j) {
 469         js = indx[i__];
 470         dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
 471         dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
 472         z__[i__] = d__[js];
 473         ++i__;
 474         iq1 += *n1;
 475         iq2 += n2;
 476 /* L150: */
 477     }
 478
 479     i__1 = ctot[2];
 480     for (j = 1; j <= i__1; ++j) {
 481         js = indx[i__];
 482         dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
 483         z__[i__] = d__[js];
 484         ++i__;
 485         iq2 += n2;
 486 /* L160: */
 487     }
 488
 489     iq1 = iq2;
 490     i__1 = ctot[3];
 491     for (j = 1; j <= i__1; ++j) {
 492         js = indx[i__];
 493         dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
 494         iq2 += *n;
 495         z__[i__] = d__[js];
 496         ++i__;
 497 /* L170: */
 498     }
 499
 500 /*     The deflated eigenvalues and their corresponding vectors go back */
 501 /*     into the last N - K slots of D and Q respectively. */
 502
 503     dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
 504     i__1 = *n - *k;
 505     dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
 506
 507 /*     Copy CTOT into COLTYP for referencing in DLAED3. */
 508
 509     for (j = 1; j <= 4; ++j) {
 510         coltyp[j] = ctot[j - 1];
 511 /* L180: */
 512     }
 513
 514 L190:
 515     return 0;
 516
 517 /*     End of DLAED2 */
 518
 519 } /* dlaed2_ */