git.maemo.org Git - opencv/blob - 3rdparty/lapack/slaed2.c

   1 #include "clapack.h"
   2
   3 /* Table of constant values */
   4
   5 static real c_b3 = -1.f;
   6 static integer c__1 = 1;
   7
   8 /* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__,
   9         real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *
  10         dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *
  11         indxp, integer *coltyp, integer *info)
  12 {
  13     /* System generated locals */
  14     integer q_dim1, q_offset, i__1, i__2;
  15     real r__1, r__2, r__3, r__4;
  16
  17     /* Builtin functions */
  18     double sqrt(doublereal);
  19
  20     /* Local variables */
  21     real c__;
  22     integer i__, j;
  23     real s, t;
  24     integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
  25     real eps, tau, tol;
  26     integer psm[4], imax, jmax, ctot[4];
  27     extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
  28             integer *, real *, real *), sscal_(integer *, real *, real *,
  29             integer *), scopy_(integer *, real *, integer *, real *, integer *
  30 );
  31     extern doublereal slapy2_(real *, real *), slamch_(char *);
  32     extern /* Subroutine */ int xerbla_(char *, integer *);
  33     extern integer isamax_(integer *, real *, integer *);
  34     extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
  35             *, integer *, integer *), slacpy_(char *, integer *, integer *,
  36             real *, integer *, real *, integer *);
  37
  38
  39 /*  -- LAPACK routine (version 3.1) -- */
  40 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  41 /*     November 2006 */
  42
  43 /*     .. Scalar Arguments .. */
  44 /*     .. */
  45 /*     .. Array Arguments .. */
  46 /*     .. */
  47
  48 /*  Purpose */
  49 /*  ======= */
  50
  51 /*  SLAED2 merges the two sets of eigenvalues together into a single */
  52 /*  sorted set.  Then it tries to deflate the size of the problem. */
  53 /*  There are two ways in which deflation can occur:  when two or more */
  54 /*  eigenvalues are close together or if there is a tiny entry in the */
  55 /*  Z vector.  For each such occurrence the order of the related secular */
  56 /*  equation problem is reduced by one. */
  57
  58 /*  Arguments */
  59 /*  ========= */
  60
  61 /*  K      (output) INTEGER */
  62 /*         The number of non-deflated eigenvalues, and the order of the */
  63 /*         related secular equation. 0 <= K <=N. */
  64
  65 /*  N      (input) INTEGER */
  66 /*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */
  67
  68 /*  N1     (input) INTEGER */
  69 /*         The location of the last eigenvalue in the leading sub-matrix. */
  70 /*         min(1,N) <= N1 <= N/2. */
  71
  72 /*  D      (input/output) REAL array, dimension (N) */
  73 /*         On entry, D contains the eigenvalues of the two submatrices to */
  74 /*         be combined. */
  75 /*         On exit, D contains the trailing (N-K) updated eigenvalues */
  76 /*         (those which were deflated) sorted into increasing order. */
  77
  78 /*  Q      (input/output) REAL array, dimension (LDQ, N) */
  79 /*         On entry, Q contains the eigenvectors of two submatrices in */
  80 /*         the two square blocks with corners at (1,1), (N1,N1) */
  81 /*         and (N1+1, N1+1), (N,N). */
  82 /*         On exit, Q contains the trailing (N-K) updated eigenvectors */
  83 /*         (those which were deflated) in its last N-K columns. */
  84
  85 /*  LDQ    (input) INTEGER */
  86 /*         The leading dimension of the array Q.  LDQ >= max(1,N). */
  87
  88 /*  INDXQ  (input/output) INTEGER array, dimension (N) */
  89 /*         The permutation which separately sorts the two sub-problems */
  90 /*         in D into ascending order.  Note that elements in the second */
  91 /*         half of this permutation must first have N1 added to their */
  92 /*         values. Destroyed on exit. */
  93
  94 /*  RHO    (input/output) REAL */
  95 /*         On entry, the off-diagonal element associated with the rank-1 */
  96 /*         cut which originally split the two submatrices which are now */
  97 /*         being recombined. */
  98 /*         On exit, RHO has been modified to the value required by */
  99 /*         SLAED3. */
 100
 101 /*  Z      (input) REAL array, dimension (N) */
 102 /*         On entry, Z contains the updating vector (the last */
 103 /*         row of the first sub-eigenvector matrix and the first row of */
 104 /*         the second sub-eigenvector matrix). */
 105 /*         On exit, the contents of Z have been destroyed by the updating */
 106 /*         process. */
 107
 108 /*  DLAMDA (output) REAL array, dimension (N) */
 109 /*         A copy of the first K eigenvalues which will be used by */
 110 /*         SLAED3 to form the secular equation. */
 111
 112 /*  W      (output) REAL array, dimension (N) */
 113 /*         The first k values of the final deflation-altered z-vector */
 114 /*         which will be passed to SLAED3. */
 115
 116 /*  Q2     (output) REAL array, dimension (N1**2+(N-N1)**2) */
 117 /*         A copy of the first K eigenvectors which will be used by */
 118 /*         SLAED3 in a matrix multiply (SGEMM) to solve for the new */
 119 /*         eigenvectors. */
 120
 121 /*  INDX   (workspace) INTEGER array, dimension (N) */
 122 /*         The permutation used to sort the contents of DLAMDA into */
 123 /*         ascending order. */
 124
 125 /*  INDXC  (output) INTEGER array, dimension (N) */
 126 /*         The permutation used to arrange the columns of the deflated */
 127 /*         Q matrix into three groups:  the first group contains non-zero */
 128 /*         elements only at and above N1, the second contains */
 129 /*         non-zero elements only below N1, and the third is dense. */
 130
 131 /*  INDXP  (workspace) INTEGER array, dimension (N) */
 132 /*         The permutation used to place deflated values of D at the end */
 133 /*         of the array.  INDXP(1:K) points to the nondeflated D-values */
 134 /*         and INDXP(K+1:N) points to the deflated eigenvalues. */
 135
 136 /*  COLTYP (workspace/output) INTEGER array, dimension (N) */
 137 /*         During execution, a label which will indicate which of the */
 138 /*         following types a column in the Q2 matrix is: */
 139 /*         1 : non-zero in the upper half only; */
 140 /*         2 : dense; */
 141 /*         3 : non-zero in the lower half only; */
 142 /*         4 : deflated. */
 143 /*         On exit, COLTYP(i) is the number of columns of type i, */
 144 /*         for i=1 to 4 only. */
 145
 146 /*  INFO   (output) INTEGER */
 147 /*          = 0:  successful exit. */
 148 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 149
 150 /*  Further Details */
 151 /*  =============== */
 152
 153 /*  Based on contributions by */
 154 /*     Jeff Rutter, Computer Science Division, University of California */
 155 /*     at Berkeley, USA */
 156 /*  Modified by Francoise Tisseur, University of Tennessee. */
 157
 158 /*  ===================================================================== */
 159
 160 /*     .. Parameters .. */
 161 /*     .. */
 162 /*     .. Local Arrays .. */
 163 /*     .. */
 164 /*     .. Local Scalars .. */
 165 /*     .. */
 166 /*     .. External Functions .. */
 167 /*     .. */
 168 /*     .. External Subroutines .. */
 169 /*     .. */
 170 /*     .. Intrinsic Functions .. */
 171 /*     .. */
 172 /*     .. Executable Statements .. */
 173
 174 /*     Test the input parameters. */
 175
 176     /* Parameter adjustments */
 177     --d__;
 178     q_dim1 = *ldq;
 179     q_offset = 1 + q_dim1;
 180     q -= q_offset;
 181     --indxq;
 182     --z__;
 183     --dlamda;
 184     --w;
 185     --q2;
 186     --indx;
 187     --indxc;
 188     --indxp;
 189     --coltyp;
 190
 191     /* Function Body */
 192     *info = 0;
 193
 194     if (*n < 0) {
 195         *info = -2;
 196     } else if (*ldq < max(1,*n)) {
 197         *info = -6;
 198     } else /* if(complicated condition) */ {
 199 /* Computing MIN */
 200         i__1 = 1, i__2 = *n / 2;
 201         if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
 202             *info = -3;
 203         }
 204     }
 205     if (*info != 0) {
 206         i__1 = -(*info);
 207         xerbla_("SLAED2", &i__1);
 208         return 0;
 209     }
 210
 211 /*     Quick return if possible */
 212
 213     if (*n == 0) {
 214         return 0;
 215     }
 216
 217     n2 = *n - *n1;
 218     n1p1 = *n1 + 1;
 219
 220     if (*rho < 0.f) {
 221         sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
 222     }
 223
 224 /*     Normalize z so that norm(z) = 1.  Since z is the concatenation of */
 225 /*     two normalized vectors, norm2(z) = sqrt(2). */
 226
 227     t = 1.f / sqrt(2.f);
 228     sscal_(n, &t, &z__[1], &c__1);
 229
 230 /*     RHO = ABS( norm(z)**2 * RHO ) */
 231
 232     *rho = (r__1 = *rho * 2.f, dabs(r__1));
 233
 234 /*     Sort the eigenvalues into increasing order */
 235
 236     i__1 = *n;
 237     for (i__ = n1p1; i__ <= i__1; ++i__) {
 238         indxq[i__] += *n1;
 239 /* L10: */
 240     }
 241
 242 /*     re-integrate the deflated parts from the last pass */
 243
 244     i__1 = *n;
 245     for (i__ = 1; i__ <= i__1; ++i__) {
 246         dlamda[i__] = d__[indxq[i__]];
 247 /* L20: */
 248     }
 249     slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
 250     i__1 = *n;
 251     for (i__ = 1; i__ <= i__1; ++i__) {
 252         indx[i__] = indxq[indxc[i__]];
 253 /* L30: */
 254     }
 255
 256 /*     Calculate the allowable deflation tolerance */
 257
 258     imax = isamax_(n, &z__[1], &c__1);
 259     jmax = isamax_(n, &d__[1], &c__1);
 260     eps = slamch_("Epsilon");
 261 /* Computing MAX */
 262     r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
 263             r__2));
 264     tol = eps * 8.f * dmax(r__3,r__4);
 265
 266 /*     If the rank-1 modifier is small enough, no more needs to be done */
 267 /*     except to reorganize Q so that its columns correspond with the */
 268 /*     elements in D. */
 269
 270     if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
 271         *k = 0;
 272         iq2 = 1;
 273         i__1 = *n;
 274         for (j = 1; j <= i__1; ++j) {
 275             i__ = indx[j];
 276             scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
 277             dlamda[j] = d__[i__];
 278             iq2 += *n;
 279 /* L40: */
 280         }
 281         slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
 282         scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
 283         goto L190;
 284     }
 285
 286 /*     If there are multiple eigenvalues then the problem deflates.  Here */
 287 /*     the number of equal eigenvalues are found.  As each equal */
 288 /*     eigenvalue is found, an elementary reflector is computed to rotate */
 289 /*     the corresponding eigensubspace so that the corresponding */
 290 /*     components of Z are zero in this new basis. */
 291
 292     i__1 = *n1;
 293     for (i__ = 1; i__ <= i__1; ++i__) {
 294         coltyp[i__] = 1;
 295 /* L50: */
 296     }
 297     i__1 = *n;
 298     for (i__ = n1p1; i__ <= i__1; ++i__) {
 299         coltyp[i__] = 3;
 300 /* L60: */
 301     }
 302
 303
 304     *k = 0;
 305     k2 = *n + 1;
 306     i__1 = *n;
 307     for (j = 1; j <= i__1; ++j) {
 308         nj = indx[j];
 309         if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
 310
 311 /*           Deflate due to small z component. */
 312
 313             --k2;
 314             coltyp[nj] = 4;
 315             indxp[k2] = nj;
 316             if (j == *n) {
 317                 goto L100;
 318             }
 319         } else {
 320             pj = nj;
 321             goto L80;
 322         }
 323 /* L70: */
 324     }
 325 L80:
 326     ++j;
 327     nj = indx[j];
 328     if (j > *n) {
 329         goto L100;
 330     }
 331     if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
 332
 333 /*        Deflate due to small z component. */
 334
 335         --k2;
 336         coltyp[nj] = 4;
 337         indxp[k2] = nj;
 338     } else {
 339
 340 /*        Check if eigenvalues are close enough to allow deflation. */
 341
 342         s = z__[pj];
 343         c__ = z__[nj];
 344
 345 /*        Find sqrt(a**2+b**2) without overflow or */
 346 /*        destructive underflow. */
 347
 348         tau = slapy2_(&c__, &s);
 349         t = d__[nj] - d__[pj];
 350         c__ /= tau;
 351         s = -s / tau;
 352         if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
 353
 354 /*           Deflation is possible. */
 355
 356             z__[nj] = tau;
 357             z__[pj] = 0.f;
 358             if (coltyp[nj] != coltyp[pj]) {
 359                 coltyp[nj] = 2;
 360             }
 361             coltyp[pj] = 4;
 362             srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
 363                     c__, &s);
 364 /* Computing 2nd power */
 365             r__1 = c__;
 366 /* Computing 2nd power */
 367             r__2 = s;
 368             t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
 369 /* Computing 2nd power */
 370             r__1 = s;
 371 /* Computing 2nd power */
 372             r__2 = c__;
 373             d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
 374             d__[pj] = t;
 375             --k2;
 376             i__ = 1;
 377 L90:
 378             if (k2 + i__ <= *n) {
 379                 if (d__[pj] < d__[indxp[k2 + i__]]) {
 380                     indxp[k2 + i__ - 1] = indxp[k2 + i__];
 381                     indxp[k2 + i__] = pj;
 382                     ++i__;
 383                     goto L90;
 384                 } else {
 385                     indxp[k2 + i__ - 1] = pj;
 386                 }
 387             } else {
 388                 indxp[k2 + i__ - 1] = pj;
 389             }
 390             pj = nj;
 391         } else {
 392             ++(*k);
 393             dlamda[*k] = d__[pj];
 394             w[*k] = z__[pj];
 395             indxp[*k] = pj;
 396             pj = nj;
 397         }
 398     }
 399     goto L80;
 400 L100:
 401
 402 /*     Record the last eigenvalue. */
 403
 404     ++(*k);
 405     dlamda[*k] = d__[pj];
 406     w[*k] = z__[pj];
 407     indxp[*k] = pj;
 408
 409 /*     Count up the total number of the various types of columns, then */
 410 /*     form a permutation which positions the four column types into */
 411 /*     four uniform groups (although one or more of these groups may be */
 412 /*     empty). */
 413
 414     for (j = 1; j <= 4; ++j) {
 415         ctot[j - 1] = 0;
 416 /* L110: */
 417     }
 418     i__1 = *n;
 419     for (j = 1; j <= i__1; ++j) {
 420         ct = coltyp[j];
 421         ++ctot[ct - 1];
 422 /* L120: */
 423     }
 424
 425 /*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
 426
 427     psm[0] = 1;
 428     psm[1] = ctot[0] + 1;
 429     psm[2] = psm[1] + ctot[1];
 430     psm[3] = psm[2] + ctot[2];
 431     *k = *n - ctot[3];
 432
 433 /*     Fill out the INDXC array so that the permutation which it induces */
 434 /*     will place all type-1 columns first, all type-2 columns next, */
 435 /*     then all type-3's, and finally all type-4's. */
 436
 437     i__1 = *n;
 438     for (j = 1; j <= i__1; ++j) {
 439         js = indxp[j];
 440         ct = coltyp[js];
 441         indx[psm[ct - 1]] = js;
 442         indxc[psm[ct - 1]] = j;
 443         ++psm[ct - 1];
 444 /* L130: */
 445     }
 446
 447 /*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
 448 /*     and Q2 respectively.  The eigenvalues/vectors which were not */
 449 /*     deflated go into the first K slots of DLAMDA and Q2 respectively, */
 450 /*     while those which were deflated go into the last N - K slots. */
 451
 452     i__ = 1;
 453     iq1 = 1;
 454     iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
 455     i__1 = ctot[0];
 456     for (j = 1; j <= i__1; ++j) {
 457         js = indx[i__];
 458         scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
 459         z__[i__] = d__[js];
 460         ++i__;
 461         iq1 += *n1;
 462 /* L140: */
 463     }
 464
 465     i__1 = ctot[1];
 466     for (j = 1; j <= i__1; ++j) {
 467         js = indx[i__];
 468         scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
 469         scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
 470         z__[i__] = d__[js];
 471         ++i__;
 472         iq1 += *n1;
 473         iq2 += n2;
 474 /* L150: */
 475     }
 476
 477     i__1 = ctot[2];
 478     for (j = 1; j <= i__1; ++j) {
 479         js = indx[i__];
 480         scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
 481         z__[i__] = d__[js];
 482         ++i__;
 483         iq2 += n2;
 484 /* L160: */
 485     }
 486
 487     iq1 = iq2;
 488     i__1 = ctot[3];
 489     for (j = 1; j <= i__1; ++j) {
 490         js = indx[i__];
 491         scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
 492         iq2 += *n;
 493         z__[i__] = d__[js];
 494         ++i__;
 495 /* L170: */
 496     }
 497
 498 /*     The deflated eigenvalues and their corresponding vectors go back */
 499 /*     into the last N - K slots of D and Q respectively. */
 500
 501     slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
 502     i__1 = *n - *k;
 503     scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
 504
 505 /*     Copy CTOT into COLTYP for referencing in SLAED3. */
 506
 507     for (j = 1; j <= 4; ++j) {
 508         coltyp[j] = ctot[j - 1];
 509 /* L180: */
 510     }
 511
 512 L190:
 513     return 0;
 514
 515 /*     End of SLAED2 */
 516
 517 } /* slaed2_ */