git.maemo.org Git - opencv/blob - cxcore/src/cxsvd.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "_cxcore.h"
  43 #include <float.h>
  44
  45 /////////////////////////////////////////////////////////////////////////////////////////
  46
  47 #define icvGivens_64f( n, x, y, c, s ) \
  48 {                                      \
  49     int _i;                            \
  50     double* _x = (x);                  \
  51     double* _y = (y);                  \
  52                                        \
  53     for( _i = 0; _i < n; _i++ )        \
  54     {                                  \
  55         double t0 = _x[_i];            \
  56         double t1 = _y[_i];            \
  57         _x[_i] = t0*c + t1*s;          \
  58         _y[_i] = -t0*s + t1*c;         \
  59     }                                  \
  60 }
  61
  62
  63 /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
  64 static  void
  65 icvMatrAXPY_64f( int m, int n, const double* x, int dx,
  66                  const double* a, double* y, int dy )
  67 {
  68     int i, j;
  69
  70     for( i = 0; i < m; i++, x += dx, y += dy )
  71     {
  72         double s = a[i];
  73
  74         for( j = 0; j <= n - 4; j += 4 )
  75         {
  76             double t0 = y[j]   + s*x[j];
  77             double t1 = y[j+1] + s*x[j+1];
  78             y[j]   = t0;
  79             y[j+1] = t1;
  80             t0 = y[j+2] + s*x[j+2];
  81             t1 = y[j+3] + s*x[j+3];
  82             y[j+2] = t0;
  83             y[j+3] = t1;
  84         }
  85
  86         for( ; j < n; j++ ) y[j] += s*x[j];
  87     }
  88 }
  89
  90
  91 /* y[1:m,-1] = h*y[1:m,0:n]*x[0:1,0:n]'*x[-1]  (this is used for U&V reconstruction)
  92    y[1:m,0:n] += h*y[1:m,0:n]*x[0:1,0:n]'*x[0:1,0:n] */
  93 static void
  94 icvMatrAXPY3_64f( int m, int n, const double* x, int l, double* y, double h )
  95 {
  96     int i, j;
  97
  98     for( i = 1; i < m; i++ )
  99     {
 100         double s = 0;
 101
 102         y += l;
 103
 104         for( j = 0; j <= n - 4; j += 4 )
 105             s += x[j]*y[j] + x[j+1]*y[j+1] + x[j+2]*y[j+2] + x[j+3]*y[j+3];
 106
 107         for( ; j < n; j++ )  s += x[j]*y[j];
 108
 109         s *= h;
 110         y[-1] = s*x[-1];
 111
 112         for( j = 0; j <= n - 4; j += 4 )
 113         {
 114             double t0 = y[j]   + s*x[j];
 115             double t1 = y[j+1] + s*x[j+1];
 116             y[j]   = t0;
 117             y[j+1] = t1;
 118             t0 = y[j+2] + s*x[j+2];
 119             t1 = y[j+3] + s*x[j+3];
 120             y[j+2] = t0;
 121             y[j+3] = t1;
 122         }
 123
 124         for( ; j < n; j++ ) y[j] += s*x[j];
 125     }
 126 }
 127
 128
 129 #define icvGivens_32f( n, x, y, c, s ) \
 130 {                                      \
 131     int _i;                            \
 132     float* _x = (x);                   \
 133     float* _y = (y);                   \
 134                                        \
 135     for( _i = 0; _i < n; _i++ )        \
 136     {                                  \
 137         double t0 = _x[_i];            \
 138         double t1 = _y[_i];            \
 139         _x[_i] = (float)(t0*c + t1*s); \
 140         _y[_i] = (float)(-t0*s + t1*c);\
 141     }                                  \
 142 }
 143
 144 static  void
 145 icvMatrAXPY_32f( int m, int n, const float* x, int dx,
 146                  const float* a, float* y, int dy )
 147 {
 148     int i, j;
 149
 150     for( i = 0; i < m; i++, x += dx, y += dy )
 151     {
 152         double s = a[i];
 153
 154         for( j = 0; j <= n - 4; j += 4 )
 155         {
 156             double t0 = y[j]   + s*x[j];
 157             double t1 = y[j+1] + s*x[j+1];
 158             y[j]   = (float)t0;
 159             y[j+1] = (float)t1;
 160             t0 = y[j+2] + s*x[j+2];
 161             t1 = y[j+3] + s*x[j+3];
 162             y[j+2] = (float)t0;
 163             y[j+3] = (float)t1;
 164         }
 165
 166         for( ; j < n; j++ )
 167             y[j] = (float)(y[j] + s*x[j]);
 168     }
 169 }
 170
 171
 172 static void
 173 icvMatrAXPY3_32f( int m, int n, const float* x, int l, float* y, double h )
 174 {
 175     int i, j;
 176
 177     for( i = 1; i < m; i++ )
 178     {
 179         double s = 0;
 180         y += l;
 181
 182         for( j = 0; j <= n - 4; j += 4 )
 183             s += x[j]*y[j] + x[j+1]*y[j+1] + x[j+2]*y[j+2] + x[j+3]*y[j+3];
 184
 185         for( ; j < n; j++ )  s += x[j]*y[j];
 186
 187         s *= h;
 188         y[-1] = (float)(s*x[-1]);
 189
 190         for( j = 0; j <= n - 4; j += 4 )
 191         {
 192             double t0 = y[j]   + s*x[j];
 193             double t1 = y[j+1] + s*x[j+1];
 194             y[j]   = (float)t0;
 195             y[j+1] = (float)t1;
 196             t0 = y[j+2] + s*x[j+2];
 197             t1 = y[j+3] + s*x[j+3];
 198             y[j+2] = (float)t0;
 199             y[j+3] = (float)t1;
 200         }
 201
 202         for( ; j < n; j++ ) y[j] = (float)(y[j] + s*x[j]);
 203     }
 204 }
 205
 206 /* accurate hypotenuse calculation */
 207 static double
 208 pythag( double a, double b )
 209 {
 210     a = fabs( a );
 211     b = fabs( b );
 212     if( a > b )
 213     {
 214         b /= a;
 215         a *= sqrt( 1. + b * b );
 216     }
 217     else if( b != 0 )
 218     {
 219         a /= b;
 220         a = b * sqrt( 1. + a * a );
 221     }
 222
 223     return a;
 224 }
 225
 226 /****************************************************************************************/
 227 /****************************************************************************************/
 228
 229 #define MAX_ITERS  30
 230
 231 static void
 232 icvSVD_64f( double* a, int lda, int m, int n,
 233             double* w,
 234             double* uT, int lduT, int nu,
 235             double* vT, int ldvT,
 236             double* buffer )
 237 {
 238     double* e;
 239     double* temp;
 240     double *w1, *e1;
 241     double *hv;
 242     double ku0 = 0, kv0 = 0;
 243     double anorm = 0;
 244     double *a1, *u0 = uT, *v0 = vT;
 245     double scale, h;
 246     int i, j, k, l;
 247     int nm, m1, n1;
 248     int nv = n;
 249     int iters = 0;
 250     double* hv0 = (double*)cvStackAlloc( (m+2)*sizeof(hv0[0])) + 1;
 251
 252     e = buffer;
 253     w1 = w;
 254     e1 = e + 1;
 255     nm = n;
 256
 257     temp = buffer + nm;
 258
 259     memset( w, 0, nm * sizeof( w[0] ));
 260     memset( e, 0, nm * sizeof( e[0] ));
 261
 262     m1 = m;
 263     n1 = n;
 264
 265     /* transform a to bi-diagonal form */
 266     for( ;; )
 267     {
 268         int update_u;
 269         int update_v;
 270
 271         if( m1 == 0 )
 272             break;
 273
 274         scale = h = 0;
 275         update_u = uT && m1 > m - nu;
 276         hv = update_u ? uT : hv0;
 277
 278         for( j = 0, a1 = a; j < m1; j++, a1 += lda )
 279         {
 280             double t = a1[0];
 281             scale += fabs( hv[j] = t );
 282         }
 283
 284         if( scale != 0 )
 285         {
 286             double f = 1./scale, g, s = 0;
 287
 288             for( j = 0; j < m1; j++ )
 289             {
 290                 double t = (hv[j] *= f);
 291                 s += t * t;
 292             }
 293
 294             g = sqrt( s );
 295             f = hv[0];
 296             if( f >= 0 )
 297                 g = -g;
 298             hv[0] = f - g;
 299             h = 1. / (f * g - s);
 300
 301             memset( temp, 0, n1 * sizeof( temp[0] ));
 302
 303             /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */
 304             icvMatrAXPY_64f( m1, n1 - 1, a + 1, lda, hv, temp + 1, 0 );
 305             for( k = 1; k < n1; k++ ) temp[k] *= h;
 306
 307             /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */
 308             icvMatrAXPY_64f( m1, n1 - 1, temp + 1, 0, hv, a + 1, lda );
 309             *w1 = g*scale;
 310         }
 311         w1++;
 312
 313         /* store -2/(hv'*hv) */
 314         if( update_u )
 315         {
 316             if( m1 == m )
 317                 ku0 = h;
 318             else
 319                 hv[-1] = h;
 320         }
 321
 322         a++;
 323         n1--;
 324         if( vT )
 325             vT += ldvT + 1;
 326
 327         if( n1 == 0 )
 328             break;
 329
 330         scale = h = 0;
 331         update_v = vT && n1 > n - nv;
 332
 333         hv = update_v ? vT : hv0;
 334
 335         for( j = 0; j < n1; j++ )
 336         {
 337             double t = a[j];
 338             scale += fabs( hv[j] = t );
 339         }
 340
 341         if( scale != 0 )
 342         {
 343             double f = 1./scale, g, s = 0;
 344
 345             for( j = 0; j < n1; j++ )
 346             {
 347                 double t = (hv[j] *= f);
 348                 s += t * t;
 349             }
 350
 351             g = sqrt( s );
 352             f = hv[0];
 353             if( f >= 0 )
 354                 g = -g;
 355             hv[0] = f - g;
 356             h = 1. / (f * g - s);
 357             hv[-1] = 0.;
 358
 359             /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */
 360             icvMatrAXPY3_64f( m1, n1, hv, lda, a, h );
 361
 362             *e1 = g*scale;
 363         }
 364         e1++;
 365
 366         /* store -2/(hv'*hv) */
 367         if( update_v )
 368         {
 369             if( n1 == n )
 370                 kv0 = h;
 371             else
 372                 hv[-1] = h;
 373         }
 374
 375         a += lda;
 376         m1--;
 377         if( uT )
 378             uT += lduT + 1;
 379     }
 380
 381     m1 -= m1 != 0;
 382     n1 -= n1 != 0;
 383
 384     /* accumulate left transformations */
 385     if( uT )
 386     {
 387         m1 = m - m1;
 388         uT = u0 + m1 * lduT;
 389         for( i = m1; i < nu; i++, uT += lduT )
 390         {
 391             memset( uT + m1, 0, (m - m1) * sizeof( uT[0] ));
 392             uT[i] = 1.;
 393         }
 394
 395         for( i = m1 - 1; i >= 0; i-- )
 396         {
 397             double s;
 398             int lh = nu - i;
 399
 400             l = m - i;
 401
 402             hv = u0 + (lduT + 1) * i;
 403             h = i == 0 ? ku0 : hv[-1];
 404
 405             assert( h <= 0 );
 406
 407             if( h != 0 )
 408             {
 409                 uT = hv;
 410                 icvMatrAXPY3_64f( lh, l-1, hv+1, lduT, uT+1, h );
 411
 412                 s = hv[0] * h;
 413                 for( k = 0; k < l; k++ ) hv[k] *= s;
 414                 hv[0] += 1;
 415             }
 416             else
 417             {
 418                 for( j = 1; j < l; j++ )
 419                     hv[j] = 0;
 420                 for( j = 1; j < lh; j++ )
 421                     hv[j * lduT] = 0;
 422                 hv[0] = 1;
 423             }
 424         }
 425         uT = u0;
 426     }
 427
 428     /* accumulate right transformations */
 429     if( vT )
 430     {
 431         n1 = n - n1;
 432         vT = v0 + n1 * ldvT;
 433         for( i = n1; i < nv; i++, vT += ldvT )
 434         {
 435             memset( vT + n1, 0, (n - n1) * sizeof( vT[0] ));
 436             vT[i] = 1.;
 437         }
 438
 439         for( i = n1 - 1; i >= 0; i-- )
 440         {
 441             double s;
 442             int lh = nv - i;
 443
 444             l = n - i;
 445             hv = v0 + (ldvT + 1) * i;
 446             h = i == 0 ? kv0 : hv[-1];
 447
 448             assert( h <= 0 );
 449
 450             if( h != 0 )
 451             {
 452                 vT = hv;
 453                 icvMatrAXPY3_64f( lh, l-1, hv+1, ldvT, vT+1, h );
 454
 455                 s = hv[0] * h;
 456                 for( k = 0; k < l; k++ ) hv[k] *= s;
 457                 hv[0] += 1;
 458             }
 459             else
 460             {
 461                 for( j = 1; j < l; j++ )
 462                     hv[j] = 0;
 463                 for( j = 1; j < lh; j++ )
 464                     hv[j * ldvT] = 0;
 465                 hv[0] = 1;
 466             }
 467         }
 468         vT = v0;
 469     }
 470
 471     for( i = 0; i < nm; i++ )
 472     {
 473         double tnorm = fabs( w[i] );
 474         tnorm += fabs( e[i] );
 475
 476         if( anorm < tnorm )
 477             anorm = tnorm;
 478     }
 479
 480     anorm *= DBL_EPSILON;
 481
 482     /* diagonalization of the bidiagonal form */
 483     for( k = nm - 1; k >= 0; k-- )
 484     {
 485         double z = 0;
 486         iters = 0;
 487
 488         for( ;; )               /* do iterations */
 489         {
 490             double c, s, f, g, x, y;
 491             int flag = 0;
 492
 493             /* test for splitting */
 494             for( l = k; l >= 0; l-- )
 495             {
 496                 if( fabs(e[l]) <= anorm )
 497                 {
 498                     flag = 1;
 499                     break;
 500                 }
 501                 assert( l > 0 );
 502                 if( fabs(w[l - 1]) <= anorm )
 503                     break;
 504             }
 505
 506             if( !flag )
 507             {
 508                 c = 0;
 509                 s = 1;
 510
 511                 for( i = l; i <= k; i++ )
 512                 {
 513                     f = s * e[i];
 514
 515                     e[i] *= c;
 516
 517                     if( anorm + fabs( f ) == anorm )
 518                         break;
 519
 520                     g = w[i];
 521                     h = pythag( f, g );
 522                     w[i] = h;
 523                     c = g / h;
 524                     s = -f / h;
 525
 526                     if( uT )
 527                         icvGivens_64f( m, uT + lduT * (l - 1), uT + lduT * i, c, s );
 528                 }
 529             }
 530
 531             z = w[k];
 532             if( l == k || iters++ == MAX_ITERS )
 533                 break;
 534
 535             /* shift from bottom 2x2 minor */
 536             x = w[l];
 537             y = w[k - 1];
 538             g = e[k - 1];
 539             h = e[k];
 540             f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
 541             g = pythag( f, 1 );
 542             if( f < 0 )
 543                 g = -g;
 544             f = x - (z / x) * z + (h / x) * (y / (f + g) - h);
 545             /* next QR transformation */
 546             c = s = 1;
 547
 548             for( i = l + 1; i <= k; i++ )
 549             {
 550                 g = e[i];
 551                 y = w[i];
 552                 h = s * g;
 553                 g *= c;
 554                 z = pythag( f, h );
 555                 e[i - 1] = z;
 556                 c = f / z;
 557                 s = h / z;
 558                 f = x * c + g * s;
 559                 g = -x * s + g * c;
 560                 h = y * s;
 561                 y *= c;
 562
 563                 if( vT )
 564                     icvGivens_64f( n, vT + ldvT * (i - 1), vT + ldvT * i, c, s );
 565
 566                 z = pythag( f, h );
 567                 w[i - 1] = z;
 568
 569                 /* rotation can be arbitrary if z == 0 */
 570                 if( z != 0 )
 571                 {
 572                     c = f / z;
 573                     s = h / z;
 574                 }
 575                 f = c * g + s * y;
 576                 x = -s * g + c * y;
 577
 578                 if( uT )
 579                     icvGivens_64f( m, uT + lduT * (i - 1), uT + lduT * i, c, s );
 580             }
 581
 582             e[l] = 0;
 583             e[k] = f;
 584             w[k] = x;
 585         }                       /* end of iteration loop */
 586
 587         if( iters > MAX_ITERS )
 588             break;
 589
 590         if( z < 0 )
 591         {
 592             w[k] = -z;
 593             if( vT )
 594             {
 595                 for( j = 0; j < n; j++ )
 596                     vT[j + k * ldvT] = -vT[j + k * ldvT];
 597             }
 598         }
 599     }                           /* end of diagonalization loop */
 600
 601     /* sort singular values and corresponding values */
 602     for( i = 0; i < nm; i++ )
 603     {
 604         k = i;
 605         for( j = i + 1; j < nm; j++ )
 606             if( w[k] < w[j] )
 607                 k = j;
 608
 609         if( k != i )
 610         {
 611             double t;
 612             CV_SWAP( w[i], w[k], t );
 613
 614             if( vT )
 615                 for( j = 0; j < n; j++ )
 616                     CV_SWAP( vT[j + ldvT*k], vT[j + ldvT*i], t );
 617
 618             if( uT )
 619                 for( j = 0; j < m; j++ )
 620                     CV_SWAP( uT[j + lduT*k], uT[j + lduT*i], t );
 621         }
 622     }
 623 }
 624
 625
 626 static void
 627 icvSVD_32f( float* a, int lda, int m, int n,
 628             float* w,
 629             float* uT, int lduT, int nu,
 630             float* vT, int ldvT,
 631             float* buffer )
 632 {
 633     float* e;
 634     float* temp;
 635     float *w1, *e1;
 636     float *hv;
 637     double ku0 = 0, kv0 = 0;
 638     double anorm = 0;
 639     float *a1, *u0 = uT, *v0 = vT;
 640     double scale, h;
 641     int i, j, k, l;
 642     int nm, m1, n1;
 643     int nv = n;
 644     int iters = 0;
 645     float* hv0 = (float*)cvStackAlloc( (m+2)*sizeof(hv0[0])) + 1;
 646
 647     e = buffer;
 648
 649     w1 = w;
 650     e1 = e + 1;
 651     nm = n;
 652
 653     temp = buffer + nm;
 654
 655     memset( w, 0, nm * sizeof( w[0] ));
 656     memset( e, 0, nm * sizeof( e[0] ));
 657
 658     m1 = m;
 659     n1 = n;
 660
 661     /* transform a to bi-diagonal form */
 662     for( ;; )
 663     {
 664         int update_u;
 665         int update_v;
 666
 667         if( m1 == 0 )
 668             break;
 669
 670         scale = h = 0;
 671
 672         update_u = uT && m1 > m - nu;
 673         hv = update_u ? uT : hv0;
 674
 675         for( j = 0, a1 = a; j < m1; j++, a1 += lda )
 676         {
 677             double t = a1[0];
 678             scale += fabs( hv[j] = (float)t );
 679         }
 680
 681         if( scale != 0 )
 682         {
 683             double f = 1./scale, g, s = 0;
 684
 685             for( j = 0; j < m1; j++ )
 686             {
 687                 double t = (hv[j] = (float)(hv[j]*f));
 688                 s += t * t;
 689             }
 690
 691             g = sqrt( s );
 692             f = hv[0];
 693             if( f >= 0 )
 694                 g = -g;
 695             hv[0] = (float)(f - g);
 696             h = 1. / (f * g - s);
 697
 698             memset( temp, 0, n1 * sizeof( temp[0] ));
 699
 700             /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */
 701             icvMatrAXPY_32f( m1, n1 - 1, a + 1, lda, hv, temp + 1, 0 );
 702
 703             for( k = 1; k < n1; k++ ) temp[k] = (float)(temp[k]*h);
 704
 705             /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */
 706             icvMatrAXPY_32f( m1, n1 - 1, temp + 1, 0, hv, a + 1, lda );
 707             *w1 = (float)(g*scale);
 708         }
 709         w1++;
 710
 711         /* store -2/(hv'*hv) */
 712         if( update_u )
 713         {
 714             if( m1 == m )
 715                 ku0 = h;
 716             else
 717                 hv[-1] = (float)h;
 718         }
 719
 720         a++;
 721         n1--;
 722         if( vT )
 723             vT += ldvT + 1;
 724
 725         if( n1 == 0 )
 726             break;
 727
 728         scale = h = 0;
 729         update_v = vT && n1 > n - nv;
 730         hv = update_v ? vT : hv0;
 731
 732         for( j = 0; j < n1; j++ )
 733         {
 734             double t = a[j];
 735             scale += fabs( hv[j] = (float)t );
 736         }
 737
 738         if( scale != 0 )
 739         {
 740             double f = 1./scale, g, s = 0;
 741
 742             for( j = 0; j < n1; j++ )
 743             {
 744                 double t = (hv[j] = (float)(hv[j]*f));
 745                 s += t * t;
 746             }
 747
 748             g = sqrt( s );
 749             f = hv[0];
 750             if( f >= 0 )
 751                 g = -g;
 752             hv[0] = (float)(f - g);
 753             h = 1. / (f * g - s);
 754             hv[-1] = 0.f;
 755
 756             /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */
 757             icvMatrAXPY3_32f( m1, n1, hv, lda, a, h );
 758
 759             *e1 = (float)(g*scale);
 760         }
 761         e1++;
 762
 763         /* store -2/(hv'*hv) */
 764         if( update_v )
 765         {
 766             if( n1 == n )
 767                 kv0 = h;
 768             else
 769                 hv[-1] = (float)h;
 770         }
 771
 772         a += lda;
 773         m1--;
 774         if( uT )
 775             uT += lduT + 1;
 776     }
 777
 778     m1 -= m1 != 0;
 779     n1 -= n1 != 0;
 780
 781     /* accumulate left transformations */
 782     if( uT )
 783     {
 784         m1 = m - m1;
 785         uT = u0 + m1 * lduT;
 786         for( i = m1; i < nu; i++, uT += lduT )
 787         {
 788             memset( uT + m1, 0, (m - m1) * sizeof( uT[0] ));
 789             uT[i] = 1.;
 790         }
 791
 792         for( i = m1 - 1; i >= 0; i-- )
 793         {
 794             double s;
 795             int lh = nu - i;
 796
 797             l = m - i;
 798
 799             hv = u0 + (lduT + 1) * i;
 800             h = i == 0 ? ku0 : hv[-1];
 801
 802             assert( h <= 0 );
 803
 804             if( h != 0 )
 805             {
 806                 uT = hv;
 807                 icvMatrAXPY3_32f( lh, l-1, hv+1, lduT, uT+1, h );
 808
 809                 s = hv[0] * h;
 810                 for( k = 0; k < l; k++ ) hv[k] = (float)(hv[k]*s);
 811                 hv[0] += 1;
 812             }
 813             else
 814             {
 815                 for( j = 1; j < l; j++ )
 816                     hv[j] = 0;
 817                 for( j = 1; j < lh; j++ )
 818                     hv[j * lduT] = 0;
 819                 hv[0] = 1;
 820             }
 821         }
 822         uT = u0;
 823     }
 824
 825     /* accumulate right transformations */
 826     if( vT )
 827     {
 828         n1 = n - n1;
 829         vT = v0 + n1 * ldvT;
 830         for( i = n1; i < nv; i++, vT += ldvT )
 831         {
 832             memset( vT + n1, 0, (n - n1) * sizeof( vT[0] ));
 833             vT[i] = 1.;
 834         }
 835
 836         for( i = n1 - 1; i >= 0; i-- )
 837         {
 838             double s;
 839             int lh = nv - i;
 840
 841             l = n - i;
 842             hv = v0 + (ldvT + 1) * i;
 843             h = i == 0 ? kv0 : hv[-1];
 844
 845             assert( h <= 0 );
 846
 847             if( h != 0 )
 848             {
 849                 vT = hv;
 850                 icvMatrAXPY3_32f( lh, l-1, hv+1, ldvT, vT+1, h );
 851
 852                 s = hv[0] * h;
 853                 for( k = 0; k < l; k++ ) hv[k] = (float)(hv[k]*s);
 854                 hv[0] += 1;
 855             }
 856             else
 857             {
 858                 for( j = 1; j < l; j++ )
 859                     hv[j] = 0;
 860                 for( j = 1; j < lh; j++ )
 861                     hv[j * ldvT] = 0;
 862                 hv[0] = 1;
 863             }
 864         }
 865         vT = v0;
 866     }
 867
 868     for( i = 0; i < nm; i++ )
 869     {
 870         double tnorm = fabs( w[i] );
 871         tnorm += fabs( e[i] );
 872
 873         if( anorm < tnorm )
 874             anorm = tnorm;
 875     }
 876
 877     anorm *= FLT_EPSILON;
 878
 879     /* diagonalization of the bidiagonal form */
 880     for( k = nm - 1; k >= 0; k-- )
 881     {
 882         double z = 0;
 883         iters = 0;
 884
 885         for( ;; )               /* do iterations */
 886         {
 887             double c, s, f, g, x, y;
 888             int flag = 0;
 889
 890             /* test for splitting */
 891             for( l = k; l >= 0; l-- )
 892             {
 893                 if( fabs( e[l] ) <= anorm )
 894                 {
 895                     flag = 1;
 896                     break;
 897                 }
 898                 assert( l > 0 );
 899                 if( fabs( w[l - 1] ) <= anorm )
 900                     break;
 901             }
 902
 903             if( !flag )
 904             {
 905                 c = 0;
 906                 s = 1;
 907
 908                 for( i = l; i <= k; i++ )
 909                 {
 910                     f = s * e[i];
 911                     e[i] = (float)(e[i]*c);
 912
 913                     if( anorm + fabs( f ) == anorm )
 914                         break;
 915
 916                     g = w[i];
 917                     h = pythag( f, g );
 918                     w[i] = (float)h;
 919                     c = g / h;
 920                     s = -f / h;
 921
 922                     if( uT )
 923                         icvGivens_32f( m, uT + lduT * (l - 1), uT + lduT * i, c, s );
 924                 }
 925             }
 926
 927             z = w[k];
 928             if( l == k || iters++ == MAX_ITERS )
 929                 break;
 930
 931             /* shift from bottom 2x2 minor */
 932             x = w[l];
 933             y = w[k - 1];
 934             g = e[k - 1];
 935             h = e[k];
 936             f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
 937             g = pythag( f, 1 );
 938             if( f < 0 )
 939                 g = -g;
 940             f = x - (z / x) * z + (h / x) * (y / (f + g) - h);
 941             /* next QR transformation */
 942             c = s = 1;
 943
 944             for( i = l + 1; i <= k; i++ )
 945             {
 946                 g = e[i];
 947                 y = w[i];
 948                 h = s * g;
 949                 g *= c;
 950                 z = pythag( f, h );
 951                 e[i - 1] = (float)z;
 952                 c = f / z;
 953                 s = h / z;
 954                 f = x * c + g * s;
 955                 g = -x * s + g * c;
 956                 h = y * s;
 957                 y *= c;
 958
 959                 if( vT )
 960                     icvGivens_32f( n, vT + ldvT * (i - 1), vT + ldvT * i, c, s );
 961
 962                 z = pythag( f, h );
 963                 w[i - 1] = (float)z;
 964
 965                 /* rotation can be arbitrary if z == 0 */
 966                 if( z != 0 )
 967                 {
 968                     c = f / z;
 969                     s = h / z;
 970                 }
 971                 f = c * g + s * y;
 972                 x = -s * g + c * y;
 973
 974                 if( uT )
 975                     icvGivens_32f( m, uT + lduT * (i - 1), uT + lduT * i, c, s );
 976             }
 977
 978             e[l] = 0;
 979             e[k] = (float)f;
 980             w[k] = (float)x;
 981         }                       /* end of iteration loop */
 982
 983         if( iters > MAX_ITERS )
 984             break;
 985
 986         if( z < 0 )
 987         {
 988             w[k] = (float)(-z);
 989             if( vT )
 990             {
 991                 for( j = 0; j < n; j++ )
 992                     vT[j + k * ldvT] = -vT[j + k * ldvT];
 993             }
 994         }
 995     }                           /* end of diagonalization loop */
 996
 997     /* sort singular values and corresponding vectors */
 998     for( i = 0; i < nm; i++ )
 999     {
1000         k = i;
1001         for( j = i + 1; j < nm; j++ )
1002             if( w[k] < w[j] )
1003                 k = j;
1004
1005         if( k != i )
1006         {
1007             float t;
1008             CV_SWAP( w[i], w[k], t );
1009
1010             if( vT )
1011                 for( j = 0; j < n; j++ )
1012                     CV_SWAP( vT[j + ldvT*k], vT[j + ldvT*i], t );
1013
1014             if( uT )
1015                 for( j = 0; j < m; j++ )
1016                     CV_SWAP( uT[j + lduT*k], uT[j + lduT*i], t );
1017         }
1018     }
1019 }
1020
1021
1022 static void
1023 icvSVBkSb_64f( int m, int n, const double* w,
1024                const double* uT, int lduT,
1025                const double* vT, int ldvT,
1026                const double* b, int ldb, int nb,
1027                double* x, int ldx, double* buffer )
1028 {
1029     double threshold = 0;
1030     int i, j, nm = MIN( m, n );
1031
1032     if( !b )
1033         nb = m;
1034
1035     for( i = 0; i < n; i++ )
1036         memset( x + i*ldx, 0, nb*sizeof(x[0]));
1037
1038     for( i = 0; i < nm; i++ )
1039         threshold += w[i];
1040     threshold *= 2*DBL_EPSILON;
1041
1042     /* vT * inv(w) * uT * b */
1043     for( i = 0; i < nm; i++, uT += lduT, vT += ldvT )
1044     {
1045         double wi = w[i];
1046
1047         if( wi > threshold )
1048         {
1049             wi = 1./wi;
1050
1051             if( nb == 1 )
1052             {
1053                 double s = 0;
1054                 if( b )
1055                 {
1056                     if( ldb == 1 )
1057                     {
1058                         for( j = 0; j <= m - 4; j += 4 )
1059                             s += uT[j]*b[j] + uT[j+1]*b[j+1] + uT[j+2]*b[j+2] + uT[j+3]*b[j+3];
1060                         for( ; j < m; j++ )
1061                             s += uT[j]*b[j];
1062                     }
1063                     else
1064                     {
1065                         for( j = 0; j < m; j++ )
1066                             s += uT[j]*b[j*ldb];
1067                     }
1068                 }
1069                 else
1070                     s = uT[0];
1071                 s *= wi;
1072                 if( ldx == 1 )
1073                 {
1074                     for( j = 0; j <= n - 4; j += 4 )
1075                     {
1076                         double t0 = x[j] + s*vT[j];
1077                         double t1 = x[j+1] + s*vT[j+1];
1078                         x[j] = t0;
1079                         x[j+1] = t1;
1080                         t0 = x[j+2] + s*vT[j+2];
1081                         t1 = x[j+3] + s*vT[j+3];
1082                         x[j+2] = t0;
1083                         x[j+3] = t1;
1084                     }
1085
1086                     for( ; j < n; j++ )
1087                         x[j] += s*vT[j];
1088                 }
1089                 else
1090                 {
1091                     for( j = 0; j < n; j++ )
1092                         x[j*ldx] += s*vT[j];
1093                 }
1094             }
1095             else
1096             {
1097                 if( b )
1098                 {
1099                     memset( buffer, 0, nb*sizeof(buffer[0]));
1100                     icvMatrAXPY_64f( m, nb, b, ldb, uT, buffer, 0 );
1101                     for( j = 0; j < nb; j++ )
1102                         buffer[j] *= wi;
1103                 }
1104                 else
1105                 {
1106                     for( j = 0; j < nb; j++ )
1107                         buffer[j] = uT[j]*wi;
1108                 }
1109                 icvMatrAXPY_64f( n, nb, buffer, 0, vT, x, ldx );
1110             }
1111         }
1112     }
1113 }
1114
1115
1116 static void
1117 icvSVBkSb_32f( int m, int n, const float* w,
1118                const float* uT, int lduT,
1119                const float* vT, int ldvT,
1120                const float* b, int ldb, int nb,
1121                float* x, int ldx, float* buffer )
1122 {
1123     float threshold = 0.f;
1124     int i, j, nm = MIN( m, n );
1125
1126     if( !b )
1127         nb = m;
1128
1129     for( i = 0; i < n; i++ )
1130         memset( x + i*ldx, 0, nb*sizeof(x[0]));
1131
1132     for( i = 0; i < nm; i++ )
1133         threshold += w[i];
1134     threshold *= 2*FLT_EPSILON;
1135
1136     /* vT * inv(w) * uT * b */
1137     for( i = 0; i < nm; i++, uT += lduT, vT += ldvT )
1138     {
1139         double wi = w[i];
1140
1141         if( wi > threshold )
1142         {
1143             wi = 1./wi;
1144
1145             if( nb == 1 )
1146             {
1147                 double s = 0;
1148                 if( b )
1149                 {
1150                     if( ldb == 1 )
1151                     {
1152                         for( j = 0; j <= m - 4; j += 4 )
1153                             s += uT[j]*b[j] + uT[j+1]*b[j+1] + uT[j+2]*b[j+2] + uT[j+3]*b[j+3];
1154                         for( ; j < m; j++ )
1155                             s += uT[j]*b[j];
1156                     }
1157                     else
1158                     {
1159                         for( j = 0; j < m; j++ )
1160                             s += uT[j]*b[j*ldb];
1161                     }
1162                 }
1163                 else
1164                     s = uT[0];
1165                 s *= wi;
1166
1167                 if( ldx == 1 )
1168                 {
1169                     for( j = 0; j <= n - 4; j += 4 )
1170                     {
1171                         double t0 = x[j] + s*vT[j];
1172                         double t1 = x[j+1] + s*vT[j+1];
1173                         x[j] = (float)t0;
1174                         x[j+1] = (float)t1;
1175                         t0 = x[j+2] + s*vT[j+2];
1176                         t1 = x[j+3] + s*vT[j+3];
1177                         x[j+2] = (float)t0;
1178                         x[j+3] = (float)t1;
1179                     }
1180
1181                     for( ; j < n; j++ )
1182                         x[j] = (float)(x[j] + s*vT[j]);
1183                 }
1184                 else
1185                 {
1186                     for( j = 0; j < n; j++ )
1187                         x[j*ldx] = (float)(x[j*ldx] + s*vT[j]);
1188                 }
1189             }
1190             else
1191             {
1192                 if( b )
1193                 {
1194                     memset( buffer, 0, nb*sizeof(buffer[0]));
1195                     icvMatrAXPY_32f( m, nb, b, ldb, uT, buffer, 0 );
1196                     for( j = 0; j < nb; j++ )
1197                         buffer[j] = (float)(buffer[j]*wi);
1198                 }
1199                 else
1200                 {
1201                     for( j = 0; j < nb; j++ )
1202                         buffer[j] = (float)(uT[j]*wi);
1203                 }
1204                 icvMatrAXPY_32f( n, nb, buffer, 0, vT, x, ldx );
1205             }
1206         }
1207     }
1208 }
1209
1210
1211 CV_IMPL  void
1212 cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
1213 {
1214     uchar* buffer = 0;
1215     int local_alloc = 0;
1216
1217     CV_FUNCNAME( "cvSVD" );
1218
1219     __BEGIN__;
1220
1221     CvMat astub, *a = (CvMat*)aarr;
1222     CvMat wstub, *w = (CvMat*)warr;
1223     CvMat ustub, *u;
1224     CvMat vstub, *v;
1225     CvMat tmat;
1226     uchar* tw = 0;
1227     int type;
1228     int a_buf_offset = 0, u_buf_offset = 0, buf_size, pix_size;
1229     int temp_u = 0, /* temporary storage for U is needed */
1230         t_svd; /* special case: a->rows < a->cols */
1231     int m, n;
1232     int w_rows, w_cols;
1233     int u_rows = 0, u_cols = 0;
1234     int w_is_mat = 0;
1235
1236     if( !CV_IS_MAT( a ))
1237         CV_CALL( a = cvGetMat( a, &astub ));
1238
1239     if( !CV_IS_MAT( w ))
1240         CV_CALL( w = cvGetMat( w, &wstub ));
1241
1242     if( !CV_ARE_TYPES_EQ( a, w ))
1243         CV_ERROR( CV_StsUnmatchedFormats, "" );
1244
1245     if( a->rows >= a->cols )
1246     {
1247         m = a->rows;
1248         n = a->cols;
1249         w_rows = w->rows;
1250         w_cols = w->cols;
1251         t_svd = 0;
1252     }
1253     else
1254     {
1255         CvArr* t;
1256         CV_SWAP( uarr, varr, t );
1257
1258         flags = (flags & CV_SVD_U_T ? CV_SVD_V_T : 0)|
1259                 (flags & CV_SVD_V_T ? CV_SVD_U_T : 0);
1260         m = a->cols;
1261         n = a->rows;
1262         w_rows = w->cols;
1263         w_cols = w->rows;
1264         t_svd = 1;
1265     }
1266
1267     u = (CvMat*)uarr;
1268     v = (CvMat*)varr;
1269
1270     w_is_mat = w_cols > 1 && w_rows > 1;
1271
1272     if( !w_is_mat && CV_IS_MAT_CONT(w->type) && w_cols + w_rows - 1 == n )
1273         tw = w->data.ptr;
1274
1275     if( u )
1276     {
1277         if( !CV_IS_MAT( u ))
1278             CV_CALL( u = cvGetMat( u, &ustub ));
1279
1280         if( !(flags & CV_SVD_U_T) )
1281         {
1282             u_rows = u->rows;
1283             u_cols = u->cols;
1284         }
1285         else
1286         {
1287             u_rows = u->cols;
1288             u_cols = u->rows;
1289         }
1290
1291         if( !CV_ARE_TYPES_EQ( a, u ))
1292             CV_ERROR( CV_StsUnmatchedFormats, "" );
1293
1294         if( u_rows != m || (u_cols != m && u_cols != n))
1295             CV_ERROR( CV_StsUnmatchedSizes, !t_svd ? "U matrix has unappropriate size" :
1296                                                      "V matrix has unappropriate size" );
1297
1298         temp_u = (u_rows != u_cols && !(flags & CV_SVD_U_T)) || u->data.ptr==a->data.ptr;
1299
1300         if( w_is_mat && u_cols != w_rows )
1301             CV_ERROR( CV_StsUnmatchedSizes, !t_svd ? "U and W have incompatible sizes" :
1302                                                      "V and W have incompatible sizes" );
1303     }
1304     else
1305     {
1306         u = &ustub;
1307         u->data.ptr = 0;
1308         u->step = 0;
1309     }
1310
1311     if( v )
1312     {
1313         int v_rows, v_cols;
1314
1315         if( !CV_IS_MAT( v ))
1316             CV_CALL( v = cvGetMat( v, &vstub ));
1317
1318         if( !(flags & CV_SVD_V_T) )
1319         {
1320             v_rows = v->rows;
1321             v_cols = v->cols;
1322         }
1323         else
1324         {
1325             v_rows = v->cols;
1326             v_cols = v->rows;
1327         }
1328
1329         if( !CV_ARE_TYPES_EQ( a, v ))
1330             CV_ERROR( CV_StsUnmatchedFormats, "" );
1331
1332         if( v_rows != n || v_cols != n )
1333             CV_ERROR( CV_StsUnmatchedSizes, t_svd ? "U matrix has unappropriate size" :
1334                                                     "V matrix has unappropriate size" );
1335
1336         if( w_is_mat && w_cols != v_cols )
1337             CV_ERROR( CV_StsUnmatchedSizes, t_svd ? "U and W have incompatible sizes" :
1338                                                     "V and W have incompatible sizes" );
1339     }
1340     else
1341     {
1342         v = &vstub;
1343         v->data.ptr = 0;
1344         v->step = 0;
1345     }
1346
1347     type = CV_MAT_TYPE( a->type );
1348     pix_size = CV_ELEM_SIZE(type);
1349     buf_size = n*2 + m;
1350
1351     if( !(flags & CV_SVD_MODIFY_A) )
1352     {
1353         a_buf_offset = buf_size;
1354         buf_size += a->rows*a->cols;
1355     }
1356
1357     if( temp_u )
1358     {
1359         u_buf_offset = buf_size;
1360         buf_size += u->rows*u->cols;
1361     }
1362
1363     buf_size *= pix_size;
1364
1365     if( buf_size <= CV_MAX_LOCAL_SIZE )
1366     {
1367         buffer = (uchar*)cvStackAlloc( buf_size );
1368         local_alloc = 1;
1369     }
1370     else
1371     {
1372         CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
1373     }
1374
1375     if( !(flags & CV_SVD_MODIFY_A) )
1376     {
1377         cvInitMatHeader( &tmat, m, n, type,
1378                          buffer + a_buf_offset*pix_size );
1379         if( !t_svd )
1380             cvCopy( a, &tmat );
1381         else
1382             cvT( a, &tmat );
1383         a = &tmat;
1384     }
1385
1386     if( temp_u )
1387     {
1388         cvInitMatHeader( &ustub, u_cols, u_rows, type, buffer + u_buf_offset*pix_size );
1389         u = &ustub;
1390     }
1391
1392     if( !tw )
1393         tw = buffer + (n + m)*pix_size;
1394
1395     if( type == CV_32FC1 )
1396     {
1397         icvSVD_32f( a->data.fl, a->step/sizeof(float), a->rows, a->cols,
1398                    (float*)tw, u->data.fl, u->step/sizeof(float), u_cols,
1399                    v->data.fl, v->step/sizeof(float), (float*)buffer );
1400     }
1401     else if( type == CV_64FC1 )
1402     {
1403         icvSVD_64f( a->data.db, a->step/sizeof(double), a->rows, a->cols,
1404                     (double*)tw, u->data.db, u->step/sizeof(double), u_cols,
1405                     v->data.db, v->step/sizeof(double), (double*)buffer );
1406     }
1407     else
1408     {
1409         CV_ERROR( CV_StsUnsupportedFormat, "" );
1410     }
1411
1412     if( tw != w->data.ptr )
1413     {
1414         int shift = w->cols != 1;
1415         cvSetZero( w );
1416         if( type == CV_32FC1 )
1417             for( int i = 0; i < n; i++ )
1418                 ((float*)(w->data.ptr + i*w->step))[i*shift] = ((float*)tw)[i];
1419         else
1420             for( int i = 0; i < n; i++ )
1421                 ((double*)(w->data.ptr + i*w->step))[i*shift] = ((double*)tw)[i];
1422     }
1423
1424     if( uarr )
1425     {
1426         if( !(flags & CV_SVD_U_T))
1427             cvT( u, uarr );
1428         else if( temp_u )
1429             cvCopy( u, uarr );
1430         /*CV_CHECK_NANS( uarr );*/
1431     }
1432
1433     if( varr )
1434     {
1435         if( !(flags & CV_SVD_V_T))
1436             cvT( v, varr );
1437         /*CV_CHECK_NANS( varr );*/
1438     }
1439
1440     CV_CHECK_NANS( w );
1441
1442     __END__;
1443
1444     if( buffer && !local_alloc )
1445         cvFree( &buffer );
1446 }
1447
1448
1449 CV_IMPL void
1450 cvSVBkSb( const CvArr* warr, const CvArr* uarr,
1451           const CvArr* varr, const CvArr* barr,
1452           CvArr* xarr, int flags )
1453 {
1454     uchar* buffer = 0;
1455     int local_alloc = 0;
1456
1457     CV_FUNCNAME( "cvSVBkSb" );
1458
1459     __BEGIN__;
1460
1461     CvMat wstub, *w = (CvMat*)warr;
1462     CvMat bstub, *b = (CvMat*)barr;
1463     CvMat xstub, *x = (CvMat*)xarr;
1464     CvMat ustub, ustub2, *u = (CvMat*)uarr;
1465     CvMat vstub, vstub2, *v = (CvMat*)varr;
1466     uchar* tw = 0;
1467     int type;
1468     int temp_u = 0, temp_v = 0;
1469     int u_buf_offset = 0, v_buf_offset = 0, w_buf_offset = 0, t_buf_offset = 0;
1470     int buf_size = 0, pix_size;
1471     int m, n, nm;
1472     int u_rows, u_cols;
1473     int v_rows, v_cols;
1474
1475     if( !CV_IS_MAT( w ))
1476         CV_CALL( w = cvGetMat( w, &wstub ));
1477
1478     if( !CV_IS_MAT( u ))
1479         CV_CALL( u = cvGetMat( u, &ustub ));
1480
1481     if( !CV_IS_MAT( v ))
1482         CV_CALL( v = cvGetMat( v, &vstub ));
1483
1484     if( !CV_IS_MAT( x ))
1485         CV_CALL( x = cvGetMat( x, &xstub ));
1486
1487     if( !CV_ARE_TYPES_EQ( w, u ) || !CV_ARE_TYPES_EQ( w, v ) || !CV_ARE_TYPES_EQ( w, x ))
1488         CV_ERROR( CV_StsUnmatchedFormats, "All matrices must have the same type" );
1489
1490     type = CV_MAT_TYPE( w->type );
1491     pix_size = CV_ELEM_SIZE(type);
1492
1493     if( !(flags & CV_SVD_U_T) )
1494     {
1495         temp_u = 1;
1496         u_buf_offset = buf_size;
1497         buf_size += u->cols*u->rows*pix_size;
1498         u_rows = u->rows;
1499         u_cols = u->cols;
1500     }
1501     else
1502     {
1503         u_rows = u->cols;
1504         u_cols = u->rows;
1505     }
1506
1507     if( !(flags & CV_SVD_V_T) )
1508     {
1509         temp_v = 1;
1510         v_buf_offset = buf_size;
1511         buf_size += v->cols*v->rows*pix_size;
1512         v_rows = v->rows;
1513         v_cols = v->cols;
1514     }
1515     else
1516     {
1517         v_rows = v->cols;
1518         v_cols = v->rows;
1519     }
1520
1521     m = u_rows;
1522     n = v_rows;
1523     nm = MIN(n,m);
1524
1525     if( (u_rows != u_cols && v_rows != v_cols) || x->rows != v_rows )
1526         CV_ERROR( CV_StsBadSize, "V or U matrix must be square" );
1527
1528     if( (w->rows == 1 || w->cols == 1) && w->rows + w->cols - 1 == nm )
1529     {
1530         if( CV_IS_MAT_CONT(w->type) )
1531             tw = w->data.ptr;
1532         else
1533         {
1534             w_buf_offset = buf_size;
1535             buf_size += nm*pix_size;
1536         }
1537     }
1538     else
1539     {
1540         if( w->cols != v_cols || w->rows != u_cols )
1541             CV_ERROR( CV_StsBadSize, "W must be 1d array of MIN(m,n) elements or "
1542                                     "matrix which size matches to U and V" );
1543         w_buf_offset = buf_size;
1544         buf_size += nm*pix_size;
1545     }
1546
1547     if( b )
1548     {
1549         if( !CV_IS_MAT( b ))
1550             CV_CALL( b = cvGetMat( b, &bstub ));
1551         if( !CV_ARE_TYPES_EQ( w, b ))
1552             CV_ERROR( CV_StsUnmatchedFormats, "All matrices must have the same type" );
1553         if( b->cols != x->cols || b->rows != m )
1554             CV_ERROR( CV_StsUnmatchedSizes, "b matrix must have (m x x->cols) size" );
1555     }
1556     else
1557     {
1558         b = &bstub;
1559         memset( b, 0, sizeof(*b));
1560     }
1561
1562     t_buf_offset = buf_size;
1563     buf_size += (MAX(m,n) + b->cols)*pix_size;
1564
1565     if( buf_size <= CV_MAX_LOCAL_SIZE )
1566     {
1567         buffer = (uchar*)cvStackAlloc( buf_size );
1568         local_alloc = 1;
1569     }
1570     else
1571         CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
1572
1573     if( temp_u )
1574     {
1575         cvInitMatHeader( &ustub2, u_cols, u_rows, type, buffer + u_buf_offset );
1576         cvT( u, &ustub2 );
1577         u = &ustub2;
1578     }
1579
1580     if( temp_v )
1581     {
1582         cvInitMatHeader( &vstub2, v_cols, v_rows, type, buffer + v_buf_offset );
1583         cvT( v, &vstub2 );
1584         v = &vstub2;
1585     }
1586
1587     if( !tw )
1588     {
1589         int i, shift = w->cols > 1 ? pix_size : 0;
1590         tw = buffer + w_buf_offset;
1591         for( i = 0; i < nm; i++ )
1592             memcpy( tw + i*pix_size, w->data.ptr + i*(w->step + shift), pix_size );
1593     }
1594
1595     if( type == CV_32FC1 )
1596     {
1597         icvSVBkSb_32f( m, n, (float*)tw, u->data.fl, u->step/sizeof(float),
1598                        v->data.fl, v->step/sizeof(float),
1599                        b->data.fl, b->step/sizeof(float), b->cols,
1600                        x->data.fl, x->step/sizeof(float),
1601                        (float*)(buffer + t_buf_offset) );
1602     }
1603     else if( type == CV_64FC1 )
1604     {
1605         icvSVBkSb_64f( m, n, (double*)tw, u->data.db, u->step/sizeof(double),
1606                        v->data.db, v->step/sizeof(double),
1607                        b->data.db, b->step/sizeof(double), b->cols,
1608                        x->data.db, x->step/sizeof(double),
1609                        (double*)(buffer + t_buf_offset) );
1610     }
1611     else
1612     {
1613         CV_ERROR( CV_StsUnsupportedFormat, "" );
1614     }
1615
1616     __END__;
1617
1618     if( buffer && !local_alloc )
1619         cvFree( &buffer );
1620 }
1621
1622 /* End of file. */