git.maemo.org Git - opencv/blob - 3rdparty/lapack/slaed6.c

   1 #include "clapack.h"
   2
   3 /* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho,
   4         real *d__, real *z__, real *finit, real *tau, integer *info)
   5 {
   6     /* System generated locals */
   7     integer i__1;
   8     real r__1, r__2, r__3, r__4;
   9
  10     /* Builtin functions */
  11     double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *);
  12
  13     /* Local variables */
  14     real a, b, c__, f;
  15     integer i__;
  16     real fc, df, ddf, lbd, eta, ubd, eps, base;
  17     integer iter;
  18     real temp, temp1, temp2, temp3, temp4;
  19     logical scale;
  20     integer niter;
  21     real small1, small2, sminv1, sminv2, dscale[3], sclfac;
  22     extern doublereal slamch_(char *);
  23     real zscale[3], erretm, sclinv;
  24
  25
  26 /*  -- LAPACK routine (version 3.1.1) -- */
  27 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  28 /*     February 2007 */
  29
  30 /*     .. Scalar Arguments .. */
  31 /*     .. */
  32 /*     .. Array Arguments .. */
  33 /*     .. */
  34
  35 /*  Purpose */
  36 /*  ======= */
  37
  38 /*  SLAED6 computes the positive or negative root (closest to the origin) */
  39 /*  of */
  40 /*                   z(1)        z(2)        z(3) */
  41 /*  f(x) =   rho + --------- + ---------- + --------- */
  42 /*                  d(1)-x      d(2)-x      d(3)-x */
  43
  44 /*  It is assumed that */
  45
  46 /*        if ORGATI = .true. the root is between d(2) and d(3); */
  47 /*        otherwise it is between d(1) and d(2) */
  48
  49 /*  This routine will be called by SLAED4 when necessary. In most cases, */
  50 /*  the root sought is the smallest in magnitude, though it might not be */
  51 /*  in some extremely rare situations. */
  52
  53 /*  Arguments */
  54 /*  ========= */
  55
  56 /*  KNITER       (input) INTEGER */
  57 /*               Refer to SLAED4 for its significance. */
  58
  59 /*  ORGATI       (input) LOGICAL */
  60 /*               If ORGATI is true, the needed root is between d(2) and */
  61 /*               d(3); otherwise it is between d(1) and d(2).  See */
  62 /*               SLAED4 for further details. */
  63
  64 /*  RHO          (input) REAL */
  65 /*               Refer to the equation f(x) above. */
  66
  67 /*  D            (input) REAL array, dimension (3) */
  68 /*               D satisfies d(1) < d(2) < d(3). */
  69
  70 /*  Z            (input) REAL array, dimension (3) */
  71 /*               Each of the elements in z must be positive. */
  72
  73 /*  FINIT        (input) REAL */
  74 /*               The value of f at 0. It is more accurate than the one */
  75 /*               evaluated inside this routine (if someone wants to do */
  76 /*               so). */
  77
  78 /*  TAU          (output) REAL */
  79 /*               The root of the equation f(x). */
  80
  81 /*  INFO         (output) INTEGER */
  82 /*               = 0: successful exit */
  83 /*               > 0: if INFO = 1, failure to converge */
  84
  85 /*  Further Details */
  86 /*  =============== */
  87
  88 /*  30/06/99: Based on contributions by */
  89 /*     Ren-Cang Li, Computer Science Division, University of California */
  90 /*     at Berkeley, USA */
  91
  92 /*  10/02/03: This version has a few statements commented out for thread safety */
  93 /*     (machine parameters are computed on each entry). SJH. */
  94
  95 /*  05/10/06: Modified from a new version of Ren-Cang Li, use */
  96 /*     Gragg-Thornton-Warner cubic convergent scheme for better stability. */
  97
  98 /*  ===================================================================== */
  99
 100 /*     .. Parameters .. */
 101 /*     .. */
 102 /*     .. External Functions .. */
 103 /*     .. */
 104 /*     .. Local Arrays .. */
 105 /*     .. */
 106 /*     .. Local Scalars .. */
 107 /*     .. */
 108 /*     .. Intrinsic Functions .. */
 109 /*     .. */
 110 /*     .. Executable Statements .. */
 111
 112     /* Parameter adjustments */
 113     --z__;
 114     --d__;
 115
 116     /* Function Body */
 117     *info = 0;
 118
 119     if (*orgati) {
 120         lbd = d__[2];
 121         ubd = d__[3];
 122     } else {
 123         lbd = d__[1];
 124         ubd = d__[2];
 125     }
 126     if (*finit < 0.f) {
 127         lbd = 0.f;
 128     } else {
 129         ubd = 0.f;
 130     }
 131
 132     niter = 1;
 133     *tau = 0.f;
 134     if (*kniter == 2) {
 135         if (*orgati) {
 136             temp = (d__[3] - d__[2]) / 2.f;
 137             c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
 138             a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
 139             b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
 140         } else {
 141             temp = (d__[1] - d__[2]) / 2.f;
 142             c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
 143             a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
 144             b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
 145         }
 146 /* Computing MAX */
 147         r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
 148                 c__);
 149         temp = dmax(r__1,r__2);
 150         a /= temp;
 151         b /= temp;
 152         c__ /= temp;
 153         if (c__ == 0.f) {
 154             *tau = b / a;
 155         } else if (a <= 0.f) {
 156             *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
 157                     c__ * 2.f);
 158         } else {
 159             *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
 160                     r__1))));
 161         }
 162         if (*tau < lbd || *tau > ubd) {
 163             *tau = (lbd + ubd) / 2.f;
 164         }
 165         if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
 166             *tau = 0.f;
 167         } else {
 168             temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau
 169                     * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / (
 170                     d__[3] * (d__[3] - *tau));
 171             if (temp <= 0.f) {
 172                 lbd = *tau;
 173             } else {
 174                 ubd = *tau;
 175             }
 176             if (dabs(*finit) <= dabs(temp)) {
 177                 *tau = 0.f;
 178             }
 179         }
 180     }
 181
 182 /*     get machine parameters for possible scaling to avoid overflow */
 183
 184 /*     modified by Sven: parameters SMALL1, SMINV1, SMALL2, */
 185 /*     SMINV2, EPS are not SAVEd anymore between one call to the */
 186 /*     others but recomputed at each call */
 187
 188     eps = slamch_("Epsilon");
 189     base = slamch_("Base");
 190     i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f);
 191     small1 = pow_ri(&base, &i__1);
 192     sminv1 = 1.f / small1;
 193     small2 = small1 * small1;
 194     sminv2 = sminv1 * sminv1;
 195
 196 /*     Determine if scaling of inputs necessary to avoid overflow */
 197 /*     when computing 1/TEMP**3 */
 198
 199     if (*orgati) {
 200 /* Computing MIN */
 201         r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - *
 202                 tau, dabs(r__2));
 203         temp = dmin(r__3,r__4);
 204     } else {
 205 /* Computing MIN */
 206         r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - *
 207                 tau, dabs(r__2));
 208         temp = dmin(r__3,r__4);
 209     }
 210     scale = FALSE_;
 211     if (temp <= small1) {
 212         scale = TRUE_;
 213         if (temp <= small2) {
 214
 215 /*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
 216
 217             sclfac = sminv2;
 218             sclinv = small2;
 219         } else {
 220
 221 /*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
 222
 223             sclfac = sminv1;
 224             sclinv = small1;
 225         }
 226
 227 /*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
 228
 229         for (i__ = 1; i__ <= 3; ++i__) {
 230             dscale[i__ - 1] = d__[i__] * sclfac;
 231             zscale[i__ - 1] = z__[i__] * sclfac;
 232 /* L10: */
 233         }
 234         *tau *= sclfac;
 235         lbd *= sclfac;
 236         ubd *= sclfac;
 237     } else {
 238
 239 /*        Copy D and Z to DSCALE and ZSCALE */
 240
 241         for (i__ = 1; i__ <= 3; ++i__) {
 242             dscale[i__ - 1] = d__[i__];
 243             zscale[i__ - 1] = z__[i__];
 244 /* L20: */
 245         }
 246     }
 247
 248     fc = 0.f;
 249     df = 0.f;
 250     ddf = 0.f;
 251     for (i__ = 1; i__ <= 3; ++i__) {
 252         temp = 1.f / (dscale[i__ - 1] - *tau);
 253         temp1 = zscale[i__ - 1] * temp;
 254         temp2 = temp1 * temp;
 255         temp3 = temp2 * temp;
 256         fc += temp1 / dscale[i__ - 1];
 257         df += temp2;
 258         ddf += temp3;
 259 /* L30: */
 260     }
 261     f = *finit + *tau * fc;
 262
 263     if (dabs(f) <= 0.f) {
 264         goto L60;
 265     }
 266     if (f <= 0.f) {
 267         lbd = *tau;
 268     } else {
 269         ubd = *tau;
 270     }
 271
 272 /*        Iteration begins -- Use Gragg-Thornton-Warner cubic convergent */
 273 /*                            scheme */
 274
 275 /*     It is not hard to see that */
 276
 277 /*           1) Iterations will go up monotonically */
 278 /*              if FINIT < 0; */
 279
 280 /*           2) Iterations will go down monotonically */
 281 /*              if FINIT > 0. */
 282
 283     iter = niter + 1;
 284
 285     for (niter = iter; niter <= 40; ++niter) {
 286
 287         if (*orgati) {
 288             temp1 = dscale[1] - *tau;
 289             temp2 = dscale[2] - *tau;
 290         } else {
 291             temp1 = dscale[0] - *tau;
 292             temp2 = dscale[1] - *tau;
 293         }
 294         a = (temp1 + temp2) * f - temp1 * temp2 * df;
 295         b = temp1 * temp2 * f;
 296         c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
 297 /* Computing MAX */
 298         r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
 299                 c__);
 300         temp = dmax(r__1,r__2);
 301         a /= temp;
 302         b /= temp;
 303         c__ /= temp;
 304         if (c__ == 0.f) {
 305             eta = b / a;
 306         } else if (a <= 0.f) {
 307             eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
 308                     c__ * 2.f);
 309         } else {
 310             eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
 311                     r__1))));
 312         }
 313         if (f * eta >= 0.f) {
 314             eta = -f / df;
 315         }
 316
 317         *tau += eta;
 318         if (*tau < lbd || *tau > ubd) {
 319             *tau = (lbd + ubd) / 2.f;
 320         }
 321
 322         fc = 0.f;
 323         erretm = 0.f;
 324         df = 0.f;
 325         ddf = 0.f;
 326         for (i__ = 1; i__ <= 3; ++i__) {
 327             temp = 1.f / (dscale[i__ - 1] - *tau);
 328             temp1 = zscale[i__ - 1] * temp;
 329             temp2 = temp1 * temp;
 330             temp3 = temp2 * temp;
 331             temp4 = temp1 / dscale[i__ - 1];
 332             fc += temp4;
 333             erretm += dabs(temp4);
 334             df += temp2;
 335             ddf += temp3;
 336 /* L40: */
 337         }
 338         f = *finit + *tau * fc;
 339         erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df;
 340         if (dabs(f) <= eps * erretm) {
 341             goto L60;
 342         }
 343         if (f <= 0.f) {
 344             lbd = *tau;
 345         } else {
 346             ubd = *tau;
 347         }
 348 /* L50: */
 349     }
 350     *info = 1;
 351 L60:
 352
 353 /*     Undo scaling */
 354
 355     if (scale) {
 356         *tau *= sclinv;
 357     }
 358     return 0;
 359
 360 /*     End of SLAED6 */
 361
 362 } /* slaed6_ */