git.maemo.org Git - opencv/blob - cxcore/src/cxarithm.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 /* ////////////////////////////////////////////////////////////////////
  43 //
  44 //  CvMat arithmetic operations: +, - ...
  45 //
  46 // */
  47
  48 #include "_cxcore.h"
  49
  50 /****************************************************************************************\
  51 *                      Arithmetic operations (+, -) without mask                         *
  52 \****************************************************************************************/
  53
  54 #define ICV_DEF_BIN_ARI_OP_CASE( __op__, worktype, cast_macro, len )\
  55 {                                                                   \
  56     int i;                                                          \
  57                                                                     \
  58     for( i = 0; i <= (len) - 4; i += 4 )                            \
  59     {                                                               \
  60         worktype t0 = __op__((src1)[i], (src2)[i]);                 \
  61         worktype t1 = __op__((src1)[i+1], (src2)[i+1]);             \
  62                                                                     \
  63         (dst)[i] = cast_macro( t0 );                                \
  64         (dst)[i+1] = cast_macro( t1 );                              \
  65                                                                     \
  66         t0 = __op__((src1)[i+2],(src2)[i+2]);                       \
  67         t1 = __op__((src1)[i+3],(src2)[i+3]);                       \
  68                                                                     \
  69         (dst)[i+2] = cast_macro( t0 );                              \
  70         (dst)[i+3] = cast_macro( t1 );                              \
  71     }                                                               \
  72                                                                     \
  73     for( ; i < (len); i++ )                                         \
  74     {                                                               \
  75         worktype t0 = __op__((src1)[i],(src2)[i]);                  \
  76         (dst)[i] = cast_macro( t0 );                                \
  77     }                                                               \
  78 }
  79
  80 #define ICV_DEF_BIN_ARI_OP_2D( __op__, name, type, worktype, cast_macro )   \
  81 IPCVAPI_IMPL( CvStatus, name,                                               \
  82     ( const type* src1, int step1, const type* src2, int step2,             \
  83       type* dst, int step, CvSize size ),                                   \
  84       (src1, step1, src2, step2, dst, step, size) )                         \
  85 {                                                                           \
  86     step1/=sizeof(src1[0]); step2/=sizeof(src2[0]); step/=sizeof(dst[0]);   \
  87                                                                             \
  88     if( size.width == 1 )                                                   \
  89     {                                                                       \
  90         for( ; size.height--; src1 += step1, src2 += step2, dst += step )   \
  91         {                                                                   \
  92             worktype t0 = __op__((src1)[0],(src2)[0]);                      \
  93             (dst)[0] = cast_macro( t0 );                                    \
  94         }                                                                   \
  95     }                                                                       \
  96     else                                                                    \
  97     {                                                                       \
  98         for( ; size.height--; src1 += step1, src2 += step2, dst += step )   \
  99         {                                                                   \
 100             ICV_DEF_BIN_ARI_OP_CASE( __op__, worktype,                      \
 101                                      cast_macro, size.width );              \
 102         }                                                                   \
 103     }                                                                       \
 104                                                                             \
 105     return CV_OK;                                                           \
 106 }
 107
 108
 109 #define ICV_DEF_BIN_ARI_OP_2D_SFS(__op__, name, type, worktype, cast_macro) \
 110 IPCVAPI_IMPL( CvStatus, name,                                               \
 111     ( const type* src1, int step1, const type* src2, int step2,             \
 112       type* dst, int step, CvSize size, int /*scalefactor*/ ),              \
 113       (src1, step1, src2, step2, dst, step, size, 0) )                      \
 114 {                                                                           \
 115     step1/=sizeof(src1[0]); step2/=sizeof(src2[0]); step/=sizeof(dst[0]);   \
 116                                                                             \
 117     if( size.width == 1 )                                                   \
 118     {                                                                       \
 119         for( ; size.height--; src1 += step1, src2 += step2, dst += step )   \
 120         {                                                                   \
 121             worktype t0 = __op__((src1)[0],(src2)[0]);                      \
 122             (dst)[0] = cast_macro( t0 );                                    \
 123         }                                                                   \
 124     }                                                                       \
 125     else                                                                    \
 126     {                                                                       \
 127         for( ; size.height--; src1 += step1, src2 += step2, dst += step )   \
 128         {                                                                   \
 129             ICV_DEF_BIN_ARI_OP_CASE( __op__, worktype,                      \
 130                                      cast_macro, size.width );              \
 131         }                                                                   \
 132     }                                                                       \
 133                                                                             \
 134     return CV_OK;                                                           \
 135 }
 136
 137
 138 #define ICV_DEF_UN_ARI_OP_CASE( __op__, worktype, cast_macro,               \
 139                                 src, scalar, dst, len )                     \
 140 {                                                                           \
 141     int i;                                                                  \
 142                                                                             \
 143     for( ; ((len) -= 12) >= 0; (dst) += 12, (src) += 12 )                   \
 144     {                                                                       \
 145         worktype t0 = __op__((scalar)[0], (src)[0]);                        \
 146         worktype t1 = __op__((scalar)[1], (src)[1]);                        \
 147                                                                             \
 148         (dst)[0] = cast_macro( t0 );                                        \
 149         (dst)[1] = cast_macro( t1 );                                        \
 150                                                                             \
 151         t0 = __op__((scalar)[2], (src)[2]);                                 \
 152         t1 = __op__((scalar)[3], (src)[3]);                                 \
 153                                                                             \
 154         (dst)[2] = cast_macro( t0 );                                        \
 155         (dst)[3] = cast_macro( t1 );                                        \
 156                                                                             \
 157         t0 = __op__((scalar)[4], (src)[4]);                                 \
 158         t1 = __op__((scalar)[5], (src)[5]);                                 \
 159                                                                             \
 160         (dst)[4] = cast_macro( t0 );                                        \
 161         (dst)[5] = cast_macro( t1 );                                        \
 162                                                                             \
 163         t0 = __op__((scalar)[6], (src)[6]);                                 \
 164         t1 = __op__((scalar)[7], (src)[7]);                                 \
 165                                                                             \
 166         (dst)[6] = cast_macro( t0 );                                        \
 167         (dst)[7] = cast_macro( t1 );                                        \
 168                                                                             \
 169         t0 = __op__((scalar)[8], (src)[8]);                                 \
 170         t1 = __op__((scalar)[9], (src)[9]);                                 \
 171                                                                             \
 172         (dst)[8] = cast_macro( t0 );                                        \
 173         (dst)[9] = cast_macro( t1 );                                        \
 174                                                                             \
 175         t0 = __op__((scalar)[10], (src)[10]);                               \
 176         t1 = __op__((scalar)[11], (src)[11]);                               \
 177                                                                             \
 178         (dst)[10] = cast_macro( t0 );                                       \
 179         (dst)[11] = cast_macro( t1 );                                       \
 180     }                                                                       \
 181                                                                             \
 182     for( (len) += 12, i = 0; i < (len); i++ )                               \
 183     {                                                                       \
 184         worktype t0 = __op__((scalar)[i],(src)[i]);                         \
 185         (dst)[i] = cast_macro( t0 );                                        \
 186     }                                                                       \
 187 }
 188
 189
 190 #define ICV_DEF_UN_ARI_OP_2D( __op__, name, type, worktype, cast_macro )    \
 191 static CvStatus CV_STDCALL name                                             \
 192     ( const type* src, int step1, type* dst, int step,                      \
 193       CvSize size, const worktype* scalar )                                 \
 194 {                                                                           \
 195     step1 /= sizeof(src[0]); step /= sizeof(dst[0]);                        \
 196                                                                             \
 197     if( size.width == 1 )                                                   \
 198     {                                                                       \
 199         for( ; size.height--; src += step1, dst += step )                   \
 200         {                                                                   \
 201             worktype t0 = __op__(*(scalar),*(src));                         \
 202             *(dst) = cast_macro( t0 );                                      \
 203         }                                                                   \
 204     }                                                                       \
 205     else                                                                    \
 206     {                                                                       \
 207         for( ; size.height--; src += step1, dst += step )                   \
 208         {                                                                   \
 209             const type *tsrc = src;                                         \
 210             type *tdst = dst;                                               \
 211             int width = size.width;                                         \
 212                                                                             \
 213             ICV_DEF_UN_ARI_OP_CASE( __op__, worktype, cast_macro,           \
 214                                     tsrc, scalar, tdst, width );            \
 215         }                                                                   \
 216     }                                                                       \
 217                                                                             \
 218     return CV_OK;                                                           \
 219 }
 220
 221
 222 #define ICV_DEF_BIN_ARI_ALL( __op__, name, cast_8u )                                \
 223 ICV_DEF_BIN_ARI_OP_2D_SFS( __op__, icv##name##_8u_C1R, uchar, int, cast_8u )        \
 224 ICV_DEF_BIN_ARI_OP_2D_SFS( __op__, icv##name##_16u_C1R, ushort, int, CV_CAST_16U )  \
 225 ICV_DEF_BIN_ARI_OP_2D_SFS( __op__, icv##name##_16s_C1R, short, int, CV_CAST_16S )   \
 226 ICV_DEF_BIN_ARI_OP_2D( __op__, icv##name##_32s_C1R, int, int, CV_CAST_32S )         \
 227 ICV_DEF_BIN_ARI_OP_2D( __op__, icv##name##_32f_C1R, float, float, CV_CAST_32F )     \
 228 ICV_DEF_BIN_ARI_OP_2D( __op__, icv##name##_64f_C1R, double, double, CV_CAST_64F )
 229
 230 #define ICV_DEF_UN_ARI_ALL( __op__, name )                                          \
 231 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_8u_C1R, uchar, int, CV_CAST_8U )          \
 232 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_16u_C1R, ushort, int, CV_CAST_16U )       \
 233 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_16s_C1R, short, int, CV_CAST_16S )        \
 234 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_32s_C1R, int, int, CV_CAST_32S )          \
 235 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_32f_C1R, float, float, CV_CAST_32F )      \
 236 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_64f_C1R, double, double, CV_CAST_64F )
 237
 238 #undef CV_SUB_R
 239 #define CV_SUB_R(a,b) ((b) - (a))
 240
 241 ICV_DEF_BIN_ARI_ALL( CV_ADD, Add, CV_FAST_CAST_8U )
 242 ICV_DEF_BIN_ARI_ALL( CV_SUB_R, Sub, CV_FAST_CAST_8U )
 243
 244 ICV_DEF_UN_ARI_ALL( CV_ADD, AddC )
 245 ICV_DEF_UN_ARI_ALL( CV_SUB, SubRC )
 246
 247 #define ICV_DEF_INIT_ARITHM_FUNC_TAB( FUNCNAME, FLAG )          \
 248 static  void  icvInit##FUNCNAME##FLAG##Table( CvFuncTable* tab )\
 249 {                                                               \
 250     tab->fn_2d[CV_8U] = (void*)icv##FUNCNAME##_8u_##FLAG;       \
 251     tab->fn_2d[CV_8S] = 0;                                      \
 252     tab->fn_2d[CV_16U] = (void*)icv##FUNCNAME##_16u_##FLAG;     \
 253     tab->fn_2d[CV_16S] = (void*)icv##FUNCNAME##_16s_##FLAG;     \
 254     tab->fn_2d[CV_32S] = (void*)icv##FUNCNAME##_32s_##FLAG;     \
 255     tab->fn_2d[CV_32F] = (void*)icv##FUNCNAME##_32f_##FLAG;     \
 256     tab->fn_2d[CV_64F] = (void*)icv##FUNCNAME##_64f_##FLAG;     \
 257 }
 258
 259 ICV_DEF_INIT_ARITHM_FUNC_TAB( Sub, C1R )
 260 ICV_DEF_INIT_ARITHM_FUNC_TAB( SubRC, C1R )
 261 ICV_DEF_INIT_ARITHM_FUNC_TAB( Add, C1R )
 262 ICV_DEF_INIT_ARITHM_FUNC_TAB( AddC, C1R )
 263
 264 /****************************************************************************************\
 265 *                       External Functions for Arithmetic Operations                     *
 266 \****************************************************************************************/
 267
 268 /*************************************** S U B ******************************************/
 269
 270 CV_IMPL void
 271 cvSub( const void* srcarr1, const void* srcarr2,
 272        void* dstarr, const void* maskarr )
 273 {
 274     static CvFuncTable sub_tab;
 275     static int inittab = 0;
 276     int local_alloc = 1;
 277     uchar* buffer = 0;
 278
 279     CV_FUNCNAME( "cvSub" );
 280
 281     __BEGIN__;
 282
 283     const CvArr* tmp;
 284     int y, dy, type, depth, cn, cont_flag = 0;
 285     int src1_step, src2_step, dst_step, tdst_step, mask_step;
 286     CvMat srcstub1, srcstub2, *src1, *src2;
 287     CvMat dststub,  *dst = (CvMat*)dstarr;
 288     CvMat maskstub, *mask = (CvMat*)maskarr;
 289     CvMat dstbuf, *tdst;
 290     CvFunc2D_3A func;
 291     CvFunc2D_3A1I func_sfs;
 292     CvCopyMaskFunc copym_func;
 293     CvSize size, tsize;
 294
 295     CV_SWAP( srcarr1, srcarr2, tmp ); // to comply with IPP
 296     src1 = (CvMat*)srcarr1;
 297     src2 = (CvMat*)srcarr2;
 298
 299     if( !CV_IS_MAT(src1) || !CV_IS_MAT(src2) || !CV_IS_MAT(dst))
 300     {
 301         if( CV_IS_MATND(src1) || CV_IS_MATND(src2) || CV_IS_MATND(dst))
 302         {
 303             CvArr* arrs[] = { src1, src2, dst };
 304             CvMatND stubs[3];
 305             CvNArrayIterator iterator;
 306
 307             if( maskarr )
 308                 CV_ERROR( CV_StsBadMask,
 309                 "This operation on multi-dimensional arrays does not support mask" );
 310
 311             CV_CALL( cvInitNArrayIterator( 3, arrs, 0, stubs, &iterator ));
 312
 313             type = iterator.hdr[0]->type;
 314             iterator.size.width *= CV_MAT_CN(type);
 315
 316             if( !inittab )
 317             {
 318                 icvInitSubC1RTable( &sub_tab );
 319                 inittab = 1;
 320             }
 321
 322             depth = CV_MAT_DEPTH(type);
 323             if( depth <= CV_16S )
 324             {
 325                 func_sfs = (CvFunc2D_3A1I)(sub_tab.fn_2d[depth]);
 326                 if( !func_sfs )
 327                     CV_ERROR( CV_StsUnsupportedFormat, "" );
 328
 329                 do
 330                 {
 331                     IPPI_CALL( func_sfs( iterator.ptr[0], CV_STUB_STEP,
 332                                          iterator.ptr[1], CV_STUB_STEP,
 333                                          iterator.ptr[2], CV_STUB_STEP,
 334                                          iterator.size, 0 ));
 335                 }
 336                 while( cvNextNArraySlice( &iterator ));
 337             }
 338             else
 339             {
 340                 func = (CvFunc2D_3A)(sub_tab.fn_2d[depth]);
 341                 if( !func )
 342                     CV_ERROR( CV_StsUnsupportedFormat, "" );
 343
 344                 do
 345                 {
 346                     IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
 347                                      iterator.ptr[1], CV_STUB_STEP,
 348                                      iterator.ptr[2], CV_STUB_STEP,
 349                                      iterator.size ));
 350                 }
 351                 while( cvNextNArraySlice( &iterator ));
 352             }
 353             EXIT;
 354         }
 355         else
 356         {
 357             int coi1 = 0, coi2 = 0, coi3 = 0;
 358
 359             CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi1 ));
 360             CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi2 ));
 361             CV_CALL( dst = cvGetMat( dst, &dststub, &coi3 ));
 362             if( coi1 + coi2 + coi3 != 0 )
 363                 CV_ERROR( CV_BadCOI, "" );
 364         }
 365     }
 366
 367     if( !CV_ARE_TYPES_EQ( src1, src2 ) || !CV_ARE_TYPES_EQ( src1, dst ))
 368         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
 369
 370     if( !CV_ARE_SIZES_EQ( src1, src2 ) || !CV_ARE_SIZES_EQ( src1, dst ))
 371         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
 372
 373     type = CV_MAT_TYPE(src1->type);
 374     size = cvGetMatSize( src1 );
 375     depth = CV_MAT_DEPTH(type);
 376     cn = CV_MAT_CN(type);
 377
 378     if( !mask )
 379     {
 380         if( CV_IS_MAT_CONT( src1->type & src2->type & dst->type ))
 381         {
 382             int len = size.width*size.height*cn;
 383
 384             if( len <= CV_MAX_INLINE_MAT_OP_SIZE*CV_MAX_INLINE_MAT_OP_SIZE )
 385             {
 386                 if( depth == CV_32F )
 387                 {
 388                     const float* src1data = (const float*)(src1->data.ptr);
 389                     const float* src2data = (const float*)(src2->data.ptr);
 390                     float* dstdata = (float*)(dst->data.ptr);
 391
 392                     do
 393                     {
 394                         dstdata[len-1] = (float)(src2data[len-1] - src1data[len-1]);
 395                     }
 396                     while( --len );
 397
 398                     EXIT;
 399                 }
 400
 401                 if( depth == CV_64F )
 402                 {
 403                     const double* src1data = (const double*)(src1->data.ptr);
 404                     const double* src2data = (const double*)(src2->data.ptr);
 405                     double* dstdata = (double*)(dst->data.ptr);
 406
 407                     do
 408                     {
 409                         dstdata[len-1] = src2data[len-1] - src1data[len-1];
 410                     }
 411                     while( --len );
 412
 413                     EXIT;
 414                 }
 415             }
 416             cont_flag = 1;
 417         }
 418
 419         dy = size.height;
 420         copym_func = 0;
 421         tdst = dst;
 422     }
 423     else
 424     {
 425         int buf_size, elem_size;
 426
 427         if( !CV_IS_MAT(mask) )
 428             CV_CALL( mask = cvGetMat( mask, &maskstub ));
 429
 430         if( !CV_IS_MASK_ARR(mask))
 431             CV_ERROR( CV_StsBadMask, "" );
 432
 433         if( !CV_ARE_SIZES_EQ( mask, dst ))
 434             CV_ERROR( CV_StsUnmatchedSizes, "" );
 435
 436         cont_flag = CV_IS_MAT_CONT( src1->type & src2->type & dst->type & mask->type );
 437         elem_size = CV_ELEM_SIZE(type);
 438
 439         dy = CV_MAX_LOCAL_SIZE/(elem_size*size.height);
 440         dy = MAX(dy,1);
 441         dy = MIN(dy,size.height);
 442         dstbuf = cvMat( dy, size.width, type );
 443         if( !cont_flag )
 444             dstbuf.step = cvAlign( dstbuf.step, 8 );
 445         buf_size = dstbuf.step ? dstbuf.step*dy : size.width*elem_size;
 446         if( buf_size > CV_MAX_LOCAL_SIZE )
 447         {
 448             CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
 449             local_alloc = 0;
 450         }
 451         else
 452             buffer = (uchar*)cvStackAlloc( buf_size );
 453         dstbuf.data.ptr = buffer;
 454         tdst = &dstbuf;
 455
 456         copym_func = icvGetCopyMaskFunc( elem_size );
 457     }
 458
 459     if( !inittab )
 460     {
 461         icvInitSubC1RTable( &sub_tab );
 462         inittab = 1;
 463     }
 464
 465     if( depth <= CV_16S )
 466     {
 467         func = 0;
 468         func_sfs = (CvFunc2D_3A1I)(sub_tab.fn_2d[depth]);
 469         if( !func_sfs )
 470             CV_ERROR( CV_StsUnsupportedFormat, "" );
 471     }
 472     else
 473     {
 474         func_sfs = 0;
 475         func = (CvFunc2D_3A)(sub_tab.fn_2d[depth]);
 476         if( !func )
 477             CV_ERROR( CV_StsUnsupportedFormat, "" );
 478     }
 479
 480     src1_step = src1->step;
 481     src2_step = src2->step;
 482     dst_step = dst->step;
 483     tdst_step = tdst->step;
 484     mask_step = mask ? mask->step : 0;
 485
 486     for( y = 0; y < size.height; y += dy )
 487     {
 488         tsize.width = size.width;
 489         tsize.height = dy;
 490         if( y + dy > size.height )
 491             tsize.height = size.height - y;
 492         if( cont_flag || tsize.height == 1 )
 493         {
 494             tsize.width *= tsize.height;
 495             tsize.height = 1;
 496             src1_step = src2_step = tdst_step = dst_step = mask_step = CV_STUB_STEP;
 497         }
 498
 499         IPPI_CALL( depth <= CV_16S ?
 500             func_sfs( src1->data.ptr + y*src1->step, src1_step,
 501                       src2->data.ptr + y*src2->step, src2_step,
 502                       tdst->data.ptr, tdst_step,
 503                       cvSize( tsize.width*cn, tsize.height ), 0 ) :
 504             func( src1->data.ptr + y*src1->step, src1_step,
 505                   src2->data.ptr + y*src2->step, src2_step,
 506                   tdst->data.ptr, tdst_step,
 507                   cvSize( tsize.width*cn, tsize.height )));
 508
 509         if( mask )
 510         {
 511             IPPI_CALL( copym_func( tdst->data.ptr, tdst_step, dst->data.ptr + y*dst->step,
 512                                    dst_step, tsize, mask->data.ptr + y*mask->step, mask_step ));
 513         }
 514     }
 515
 516     __END__;
 517
 518     if( !local_alloc )
 519         cvFree( &buffer );
 520 }
 521
 522
 523 CV_IMPL void
 524 cvSubRS( const void* srcarr, CvScalar scalar, void* dstarr, const void* maskarr )
 525 {
 526     static CvFuncTable subr_tab;
 527     static int inittab = 0;
 528     int local_alloc = 1;
 529     uchar* buffer = 0;
 530
 531     CV_FUNCNAME( "cvSubRS" );
 532
 533     __BEGIN__;
 534
 535     int sctype, y, dy, type, depth, cn, coi = 0, cont_flag = 0;
 536     int src_step, dst_step, tdst_step, mask_step;
 537     CvMat srcstub, *src = (CvMat*)srcarr;
 538     CvMat dststub, *dst = (CvMat*)dstarr;
 539     CvMat maskstub, *mask = (CvMat*)maskarr;
 540     CvMat dstbuf, *tdst;
 541     CvFunc2D_2A1P func;
 542     CvCopyMaskFunc copym_func;
 543     double buf[12];
 544     int is_nd = 0;
 545     CvSize size, tsize;
 546
 547     if( !inittab )
 548     {
 549         icvInitSubRCC1RTable( &subr_tab );
 550         inittab = 1;
 551     }
 552
 553     if( !CV_IS_MAT(src) )
 554     {
 555         if( CV_IS_MATND(src) )
 556             is_nd = 1;
 557         else
 558         {
 559             CV_CALL( src = cvGetMat( src, &srcstub, &coi ));
 560             if( coi != 0 )
 561                 CV_ERROR( CV_BadCOI, "" );
 562         }
 563     }
 564
 565     if( !CV_IS_MAT(dst) )
 566     {
 567         if( CV_IS_MATND(dst) )
 568             is_nd = 1;
 569         else
 570         {
 571             CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
 572             if( coi != 0 )
 573                 CV_ERROR( CV_BadCOI, "" );
 574         }
 575     }
 576
 577     if( is_nd )
 578     {
 579         CvArr* arrs[] = { src, dst };
 580         CvMatND stubs[2];
 581         CvNArrayIterator iterator;
 582
 583         if( maskarr )
 584             CV_ERROR( CV_StsBadMask,
 585             "This operation on multi-dimensional arrays does not support mask" );
 586
 587         CV_CALL( cvInitNArrayIterator( 2, arrs, 0, stubs, &iterator ));
 588
 589         sctype = type = CV_MAT_TYPE(iterator.hdr[0]->type);
 590         if( CV_MAT_DEPTH(sctype) < CV_32S )
 591             sctype = (type & CV_MAT_CN_MASK) | CV_32SC1;
 592         iterator.size.width *= CV_MAT_CN(type);
 593
 594         func = (CvFunc2D_2A1P)(subr_tab.fn_2d[CV_MAT_DEPTH(type)]);
 595         if( !func )
 596             CV_ERROR( CV_StsUnsupportedFormat, "" );
 597
 598         CV_CALL( cvScalarToRawData( &scalar, buf, sctype, 1 ));
 599
 600         do
 601         {
 602             IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
 603                              iterator.ptr[1], CV_STUB_STEP,
 604                              iterator.size, buf ));
 605         }
 606         while( cvNextNArraySlice( &iterator ));
 607         EXIT;
 608     }
 609
 610     if( !CV_ARE_TYPES_EQ( src, dst ))
 611         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
 612
 613     if( !CV_ARE_SIZES_EQ( src, dst ))
 614         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
 615
 616     sctype = type = CV_MAT_TYPE(src->type);
 617     depth = CV_MAT_DEPTH(type);
 618     cn = CV_MAT_CN(type);
 619     if( depth < CV_32S )
 620         sctype = (type & CV_MAT_CN_MASK) | CV_32SC1;
 621
 622     size = cvGetMatSize( src );
 623
 624     if( !maskarr )
 625     {
 626         if( CV_IS_MAT_CONT( src->type & dst->type ))
 627         {
 628             if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE )
 629             {
 630                 int len = size.width * size.height;
 631
 632                 if( type == CV_32FC1 )
 633                 {
 634                     const float* srcdata = (const float*)(src->data.ptr);
 635                     float* dstdata = (float*)(dst->data.ptr);
 636
 637                     do
 638                     {
 639                         dstdata[len-1] = (float)(scalar.val[0] - srcdata[len-1]);
 640                     }
 641                     while( --len );
 642
 643                     EXIT;
 644                 }
 645
 646                 if( type == CV_64FC1 )
 647                 {
 648                     const double* srcdata = (const double*)(src->data.ptr);
 649                     double* dstdata = (double*)(dst->data.ptr);
 650
 651                     do
 652                     {
 653                         dstdata[len-1] = scalar.val[0] - srcdata[len-1];
 654                     }
 655                     while( --len );
 656
 657                     EXIT;
 658                 }
 659             }
 660             cont_flag = 1;
 661         }
 662
 663         dy = size.height;
 664         copym_func = 0;
 665         tdst = dst;
 666     }
 667     else
 668     {
 669         int buf_size, elem_size;
 670
 671         if( !CV_IS_MAT(mask) )
 672             CV_CALL( mask = cvGetMat( mask, &maskstub ));
 673
 674         if( !CV_IS_MASK_ARR(mask))
 675             CV_ERROR( CV_StsBadMask, "" );
 676
 677         if( !CV_ARE_SIZES_EQ( mask, dst ))
 678             CV_ERROR( CV_StsUnmatchedSizes, "" );
 679
 680         cont_flag = CV_IS_MAT_CONT( src->type & dst->type & mask->type );
 681         elem_size = CV_ELEM_SIZE(type);
 682
 683         dy = CV_MAX_LOCAL_SIZE/(elem_size*size.height);
 684         dy = MAX(dy,1);
 685         dy = MIN(dy,size.height);
 686         dstbuf = cvMat( dy, size.width, type );
 687         if( !cont_flag )
 688             dstbuf.step = cvAlign( dstbuf.step, 8 );
 689         buf_size = dstbuf.step ? dstbuf.step*dy : size.width*elem_size;
 690         if( buf_size > CV_MAX_LOCAL_SIZE )
 691         {
 692             CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
 693             local_alloc = 0;
 694         }
 695         else
 696             buffer = (uchar*)cvStackAlloc( buf_size );
 697         dstbuf.data.ptr = buffer;
 698         tdst = &dstbuf;
 699
 700         copym_func = icvGetCopyMaskFunc( elem_size );
 701     }
 702
 703     func = (CvFunc2D_2A1P)(subr_tab.fn_2d[depth]);
 704     if( !func )
 705         CV_ERROR( CV_StsUnsupportedFormat, "" );
 706
 707     src_step = src->step;
 708     dst_step = dst->step;
 709     tdst_step = tdst->step;
 710     mask_step = mask ? mask->step : 0;
 711
 712     CV_CALL( cvScalarToRawData( &scalar, buf, sctype, 1 ));
 713
 714     for( y = 0; y < size.height; y += dy )
 715     {
 716         tsize.width = size.width;
 717         tsize.height = dy;
 718         if( y + dy > size.height )
 719             tsize.height = size.height - y;
 720         if( cont_flag || tsize.height == 1 )
 721         {
 722             tsize.width *= tsize.height;
 723             tsize.height = 1;
 724             src_step = tdst_step = dst_step = mask_step = CV_STUB_STEP;
 725         }
 726
 727         IPPI_CALL( func( src->data.ptr + y*src->step, src_step,
 728                          tdst->data.ptr, tdst_step,
 729                          cvSize( tsize.width*cn, tsize.height ), buf ));
 730         if( mask )
 731         {
 732             IPPI_CALL( copym_func( tdst->data.ptr, tdst_step, dst->data.ptr + y*dst->step,
 733                                    dst_step, tsize, mask->data.ptr + y*mask->step, mask_step ));
 734         }
 735     }
 736
 737     __END__;
 738
 739     if( !local_alloc )
 740         cvFree( &buffer );
 741 }
 742
 743
 744 /******************************* A D D ********************************/
 745
 746 CV_IMPL void
 747 cvAdd( const void* srcarr1, const void* srcarr2,
 748        void* dstarr, const void* maskarr )
 749 {
 750     static CvFuncTable add_tab;
 751     static int inittab = 0;
 752     int local_alloc = 1;
 753     uchar* buffer = 0;
 754
 755     CV_FUNCNAME( "cvAdd" );
 756
 757     __BEGIN__;
 758
 759     int y, dy, type, depth, cn, cont_flag = 0;
 760     int src1_step, src2_step, dst_step, tdst_step, mask_step;
 761     CvMat srcstub1, *src1 = (CvMat*)srcarr1;
 762     CvMat srcstub2, *src2 = (CvMat*)srcarr2;
 763     CvMat dststub,  *dst = (CvMat*)dstarr;
 764     CvMat maskstub, *mask = (CvMat*)maskarr;
 765     CvMat dstbuf, *tdst;
 766     CvFunc2D_3A func;
 767     CvFunc2D_3A1I func_sfs;
 768     CvCopyMaskFunc copym_func;
 769     CvSize size, tsize;
 770
 771     if( !CV_IS_MAT(src1) || !CV_IS_MAT(src2) || !CV_IS_MAT(dst))
 772     {
 773         if( CV_IS_MATND(src1) || CV_IS_MATND(src2) || CV_IS_MATND(dst))
 774         {
 775             CvArr* arrs[] = { src1, src2, dst };
 776             CvMatND stubs[3];
 777             CvNArrayIterator iterator;
 778
 779             if( maskarr )
 780                 CV_ERROR( CV_StsBadMask,
 781                 "This operation on multi-dimensional arrays does not support mask" );
 782
 783             CV_CALL( cvInitNArrayIterator( 3, arrs, 0, stubs, &iterator ));
 784
 785             type = iterator.hdr[0]->type;
 786             iterator.size.width *= CV_MAT_CN(type);
 787
 788             if( !inittab )
 789             {
 790                 icvInitAddC1RTable( &add_tab );
 791                 inittab = 1;
 792             }
 793
 794             depth = CV_MAT_DEPTH(type);
 795             if( depth <= CV_16S )
 796             {
 797                 func_sfs = (CvFunc2D_3A1I)(add_tab.fn_2d[depth]);
 798                 if( !func_sfs )
 799                     CV_ERROR( CV_StsUnsupportedFormat, "" );
 800
 801                 do
 802                 {
 803                     IPPI_CALL( func_sfs( iterator.ptr[0], CV_STUB_STEP,
 804                                          iterator.ptr[1], CV_STUB_STEP,
 805                                          iterator.ptr[2], CV_STUB_STEP,
 806                                          iterator.size, 0 ));
 807                 }
 808                 while( cvNextNArraySlice( &iterator ));
 809             }
 810             else
 811             {
 812                 func = (CvFunc2D_3A)(add_tab.fn_2d[depth]);
 813                 if( !func )
 814                     CV_ERROR( CV_StsUnsupportedFormat, "" );
 815
 816                 do
 817                 {
 818                     IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
 819                                      iterator.ptr[1], CV_STUB_STEP,
 820                                      iterator.ptr[2], CV_STUB_STEP,
 821                                      iterator.size ));
 822                 }
 823                 while( cvNextNArraySlice( &iterator ));
 824             }
 825             EXIT;
 826         }
 827         else
 828         {
 829             int coi1 = 0, coi2 = 0, coi3 = 0;
 830
 831             CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi1 ));
 832             CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi2 ));
 833             CV_CALL( dst = cvGetMat( dst, &dststub, &coi3 ));
 834             if( coi1 + coi2 + coi3 != 0 )
 835                 CV_ERROR( CV_BadCOI, "" );
 836         }
 837     }
 838
 839     if( !CV_ARE_TYPES_EQ( src1, src2 ) || !CV_ARE_TYPES_EQ( src1, dst ))
 840         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
 841
 842     if( !CV_ARE_SIZES_EQ( src1, src2 ) || !CV_ARE_SIZES_EQ( src1, dst ))
 843         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
 844
 845     type = CV_MAT_TYPE(src1->type);
 846     size = cvGetMatSize( src1 );
 847     depth = CV_MAT_DEPTH(type);
 848     cn = CV_MAT_CN(type);
 849
 850     if( !mask )
 851     {
 852         if( CV_IS_MAT_CONT( src1->type & src2->type & dst->type ))
 853         {
 854             int len = size.width*size.height*cn;
 855
 856             if( len <= CV_MAX_INLINE_MAT_OP_SIZE*CV_MAX_INLINE_MAT_OP_SIZE )
 857             {
 858                 if( depth == CV_32F )
 859                 {
 860                     const float* src1data = (const float*)(src1->data.ptr);
 861                     const float* src2data = (const float*)(src2->data.ptr);
 862                     float* dstdata = (float*)(dst->data.ptr);
 863
 864                     do
 865                     {
 866                         dstdata[len-1] = (float)(src1data[len-1] + src2data[len-1]);
 867                     }
 868                     while( --len );
 869
 870                     EXIT;
 871                 }
 872
 873                 if( depth == CV_64F )
 874                 {
 875                     const double* src1data = (const double*)(src1->data.ptr);
 876                     const double* src2data = (const double*)(src2->data.ptr);
 877                     double* dstdata = (double*)(dst->data.ptr);
 878
 879                     do
 880                     {
 881                         dstdata[len-1] = src1data[len-1] + src2data[len-1];
 882                     }
 883                     while( --len );
 884
 885                     EXIT;
 886                 }
 887             }
 888             cont_flag = 1;
 889         }
 890
 891         dy = size.height;
 892         copym_func = 0;
 893         tdst = dst;
 894     }
 895     else
 896     {
 897         int buf_size, elem_size;
 898
 899         if( !CV_IS_MAT(mask) )
 900             CV_CALL( mask = cvGetMat( mask, &maskstub ));
 901
 902         if( !CV_IS_MASK_ARR(mask))
 903             CV_ERROR( CV_StsBadMask, "" );
 904
 905         if( !CV_ARE_SIZES_EQ( mask, dst ))
 906             CV_ERROR( CV_StsUnmatchedSizes, "" );
 907
 908         cont_flag = CV_IS_MAT_CONT( src1->type & src2->type & dst->type & mask->type );
 909         elem_size = CV_ELEM_SIZE(type);
 910
 911         dy = CV_MAX_LOCAL_SIZE/(elem_size*size.height);
 912         dy = MAX(dy,1);
 913         dy = MIN(dy,size.height);
 914         dstbuf = cvMat( dy, size.width, type );
 915         if( !cont_flag )
 916             dstbuf.step = cvAlign( dstbuf.step, 8 );
 917         buf_size = dstbuf.step ? dstbuf.step*dy : size.width*elem_size;
 918         if( buf_size > CV_MAX_LOCAL_SIZE )
 919         {
 920             CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
 921             local_alloc = 0;
 922         }
 923         else
 924             buffer = (uchar*)cvStackAlloc( buf_size );
 925         dstbuf.data.ptr = buffer;
 926         tdst = &dstbuf;
 927
 928         copym_func = icvGetCopyMaskFunc( elem_size );
 929     }
 930
 931     if( !inittab )
 932     {
 933         icvInitAddC1RTable( &add_tab );
 934         inittab = 1;
 935     }
 936
 937     if( depth <= CV_16S )
 938     {
 939         func = 0;
 940         func_sfs = (CvFunc2D_3A1I)(add_tab.fn_2d[depth]);
 941         if( !func_sfs )
 942             CV_ERROR( CV_StsUnsupportedFormat, "" );
 943     }
 944     else
 945     {
 946         func_sfs = 0;
 947         func = (CvFunc2D_3A)(add_tab.fn_2d[depth]);
 948         if( !func )
 949             CV_ERROR( CV_StsUnsupportedFormat, "" );
 950     }
 951
 952     src1_step = src1->step;
 953     src2_step = src2->step;
 954     dst_step = dst->step;
 955     tdst_step = tdst->step;
 956     mask_step = mask ? mask->step : 0;
 957
 958     for( y = 0; y < size.height; y += dy )
 959     {
 960         tsize.width = size.width;
 961         tsize.height = dy;
 962         if( y + dy > size.height )
 963             tsize.height = size.height - y;
 964         if( cont_flag || tsize.height == 1 )
 965         {
 966             tsize.width *= tsize.height;
 967             tsize.height = 1;
 968             src1_step = src2_step = tdst_step = dst_step = mask_step = CV_STUB_STEP;
 969         }
 970
 971         IPPI_CALL( depth <= CV_16S ?
 972             func_sfs( src1->data.ptr + y*src1->step, src1_step,
 973                       src2->data.ptr + y*src2->step, src2_step,
 974                       tdst->data.ptr, tdst_step,
 975                       cvSize( tsize.width*cn, tsize.height ), 0 ) :
 976             func( src1->data.ptr + y*src1->step, src1_step,
 977                   src2->data.ptr + y*src2->step, src2_step,
 978                   tdst->data.ptr, tdst_step,
 979                   cvSize( tsize.width*cn, tsize.height )));
 980
 981         if( mask )
 982         {
 983             IPPI_CALL( copym_func( tdst->data.ptr, tdst_step, dst->data.ptr + y*dst->step,
 984                                    dst_step, tsize, mask->data.ptr + y*mask->step, mask_step ));
 985         }
 986     }
 987
 988     __END__;
 989
 990     if( !local_alloc )
 991         cvFree( &buffer );
 992 }
 993
 994
 995 CV_IMPL void
 996 cvAddS( const void* srcarr, CvScalar scalar, void* dstarr, const void* maskarr )
 997 {
 998     static CvFuncTable add_tab;
 999     static int inittab = 0;
1000     int local_alloc = 1;
1001     uchar* buffer = 0;
1002
1003     CV_FUNCNAME( "cvAddS" );
1004
1005     __BEGIN__;
1006
1007     int sctype, y, dy, type, depth, cn, coi = 0, cont_flag = 0;
1008     int src_step, dst_step, tdst_step, mask_step;
1009     CvMat srcstub, *src = (CvMat*)srcarr;
1010     CvMat dststub, *dst = (CvMat*)dstarr;
1011     CvMat maskstub, *mask = (CvMat*)maskarr;
1012     CvMat dstbuf, *tdst;
1013     CvFunc2D_2A1P func;
1014     CvCopyMaskFunc copym_func;
1015     double buf[12];
1016     int is_nd = 0;
1017     CvSize size, tsize;
1018
1019     if( !inittab )
1020     {
1021         icvInitAddCC1RTable( &add_tab );
1022         inittab = 1;
1023     }
1024
1025     if( !CV_IS_MAT(src) )
1026     {
1027         if( CV_IS_MATND(src) )
1028             is_nd = 1;
1029         else
1030         {
1031             CV_CALL( src = cvGetMat( src, &srcstub, &coi ));
1032             if( coi != 0 )
1033                 CV_ERROR( CV_BadCOI, "" );
1034         }
1035     }
1036
1037     if( !CV_IS_MAT(dst) )
1038     {
1039         if( CV_IS_MATND(dst) )
1040             is_nd = 1;
1041         else
1042         {
1043             CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
1044             if( coi != 0 )
1045                 CV_ERROR( CV_BadCOI, "" );
1046         }
1047     }
1048
1049     if( is_nd )
1050     {
1051         CvArr* arrs[] = { src, dst };
1052         CvMatND stubs[2];
1053         CvNArrayIterator iterator;
1054
1055         if( maskarr )
1056             CV_ERROR( CV_StsBadMask,
1057             "This operation on multi-dimensional arrays does not support mask" );
1058
1059         CV_CALL( cvInitNArrayIterator( 2, arrs, 0, stubs, &iterator ));
1060
1061         sctype = type = CV_MAT_TYPE(iterator.hdr[0]->type);
1062         if( CV_MAT_DEPTH(sctype) < CV_32S )
1063             sctype = (type & CV_MAT_CN_MASK) | CV_32SC1;
1064         iterator.size.width *= CV_MAT_CN(type);
1065
1066         func = (CvFunc2D_2A1P)(add_tab.fn_2d[CV_MAT_DEPTH(type)]);
1067         if( !func )
1068             CV_ERROR( CV_StsUnsupportedFormat, "" );
1069
1070         CV_CALL( cvScalarToRawData( &scalar, buf, sctype, 1 ));
1071
1072         do
1073         {
1074             IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
1075                              iterator.ptr[1], CV_STUB_STEP,
1076                              iterator.size, buf ));
1077         }
1078         while( cvNextNArraySlice( &iterator ));
1079         EXIT;
1080     }
1081
1082     if( !CV_ARE_TYPES_EQ( src, dst ))
1083         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
1084
1085     if( !CV_ARE_SIZES_EQ( src, dst ))
1086         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
1087
1088     sctype = type = CV_MAT_TYPE(src->type);
1089     depth = CV_MAT_DEPTH(type);
1090     cn = CV_MAT_CN(type);
1091     if( depth < CV_32S )
1092         sctype = (type & CV_MAT_CN_MASK) | CV_32SC1;
1093
1094     size = cvGetMatSize( src );
1095
1096     if( !maskarr )
1097     {
1098         if( CV_IS_MAT_CONT( src->type & dst->type ))
1099         {
1100             if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE )
1101             {
1102                 int len = size.width * size.height;
1103
1104                 if( type == CV_32FC1 )
1105                 {
1106                     const float* srcdata = (const float*)(src->data.ptr);
1107                     float* dstdata = (float*)(dst->data.ptr);
1108
1109                     do
1110                     {
1111                         dstdata[len-1] = (float)(scalar.val[0] + srcdata[len-1]);
1112                     }
1113                     while( --len );
1114
1115                     EXIT;
1116                 }
1117
1118                 if( type == CV_64FC1 )
1119                 {
1120                     const double* srcdata = (const double*)(src->data.ptr);
1121                     double* dstdata = (double*)(dst->data.ptr);
1122
1123                     do
1124                     {
1125                         dstdata[len-1] = scalar.val[0] + srcdata[len-1];
1126                     }
1127                     while( --len );
1128
1129                     EXIT;
1130                 }
1131             }
1132             cont_flag = 1;
1133         }
1134
1135         dy = size.height;
1136         copym_func = 0;
1137         tdst = dst;
1138     }
1139     else
1140     {
1141         int buf_size, elem_size;
1142
1143         if( !CV_IS_MAT(mask) )
1144             CV_CALL( mask = cvGetMat( mask, &maskstub ));
1145
1146         if( !CV_IS_MASK_ARR(mask))
1147             CV_ERROR( CV_StsBadMask, "" );
1148
1149         if( !CV_ARE_SIZES_EQ( mask, dst ))
1150             CV_ERROR( CV_StsUnmatchedSizes, "" );
1151
1152         cont_flag = CV_IS_MAT_CONT( src->type & dst->type & mask->type );
1153         elem_size = CV_ELEM_SIZE(type);
1154
1155         dy = CV_MAX_LOCAL_SIZE/(elem_size*size.height);
1156         dy = MAX(dy,1);
1157         dy = MIN(dy,size.height);
1158         dstbuf = cvMat( dy, size.width, type );
1159         if( !cont_flag )
1160             dstbuf.step = cvAlign( dstbuf.step, 8 );
1161         buf_size = dstbuf.step ? dstbuf.step*dy : size.width*elem_size;
1162         if( buf_size > CV_MAX_LOCAL_SIZE )
1163         {
1164             CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
1165             local_alloc = 0;
1166         }
1167         else
1168             buffer = (uchar*)cvStackAlloc( buf_size );
1169         dstbuf.data.ptr = buffer;
1170         tdst = &dstbuf;
1171
1172         copym_func = icvGetCopyMaskFunc( elem_size );
1173     }
1174
1175     func = (CvFunc2D_2A1P)(add_tab.fn_2d[depth]);
1176     if( !func )
1177         CV_ERROR( CV_StsUnsupportedFormat, "" );
1178
1179     src_step = src->step;
1180     dst_step = dst->step;
1181     tdst_step = tdst->step;
1182     mask_step = mask ? mask->step : 0;
1183
1184     CV_CALL( cvScalarToRawData( &scalar, buf, sctype, 1 ));
1185
1186     for( y = 0; y < size.height; y += dy )
1187     {
1188         tsize.width = size.width;
1189         tsize.height = dy;
1190         if( y + dy > size.height )
1191             tsize.height = size.height - y;
1192         if( cont_flag || tsize.height == 1 )
1193         {
1194             tsize.width *= tsize.height;
1195             tsize.height = 1;
1196             src_step = tdst_step = dst_step = mask_step = CV_STUB_STEP;
1197         }
1198
1199         IPPI_CALL( func( src->data.ptr + y*src->step, src_step,
1200                          tdst->data.ptr, tdst_step,
1201                          cvSize( tsize.width*cn, tsize.height ), buf ));
1202         if( mask )
1203         {
1204             IPPI_CALL( copym_func( tdst->data.ptr, tdst_step, dst->data.ptr + y*dst->step,
1205                                    dst_step, tsize, mask->data.ptr + y*mask->step, mask_step ));
1206         }
1207     }
1208
1209     __END__;
1210
1211     if( !local_alloc )
1212         cvFree( &buffer );
1213 }
1214
1215
1216 /***************************************** M U L ****************************************/
1217
1218 #define ICV_DEF_MUL_OP_CASE( flavor, arrtype, worktype, _cast_macro1_,                  \
1219                              _cast_macro2_, _cvt_macro_ )                               \
1220 static CvStatus CV_STDCALL                                                              \
1221     icvMul_##flavor##_C1R( const arrtype* src1, int step1,                              \
1222                            const arrtype* src2, int step2,                              \
1223                            arrtype* dst, int step,                                      \
1224                            CvSize size, double scale )                                  \
1225 {                                                                                       \
1226     step1 /= sizeof(src1[0]); step2 /= sizeof(src2[0]); step /= sizeof(dst[0]);         \
1227                                                                                         \
1228     if( fabs(scale - 1.) < DBL_EPSILON )                                                \
1229     {                                                                                   \
1230         for( ; size.height--; src1+=step1, src2+=step2, dst+=step )                     \
1231         {                                                                               \
1232             int i;                                                                      \
1233             for( i = 0; i <= size.width - 4; i += 4 )                                   \
1234             {                                                                           \
1235                 worktype t0 = src1[i] * src2[i];                                        \
1236                 worktype t1 = src1[i+1] * src2[i+1];                                    \
1237                                                                                         \
1238                 dst[i] = _cast_macro2_(t0);                                             \
1239                 dst[i+1] = _cast_macro2_(t1);                                           \
1240                                                                                         \
1241                 t0 = src1[i+2] * src2[i+2];                                             \
1242                 t1 = src1[i+3] * src2[i+3];                                             \
1243                                                                                         \
1244                 dst[i+2] = _cast_macro2_(t0);                                           \
1245                 dst[i+3] = _cast_macro2_(t1);                                           \
1246             }                                                                           \
1247                                                                                         \
1248             for( ; i < size.width; i++ )                                                \
1249             {                                                                           \
1250                 worktype t0 = src1[i] * src2[i];                                        \
1251                 dst[i] = _cast_macro2_(t0);                                             \
1252             }                                                                           \
1253         }                                                                               \
1254     }                                                                                   \
1255     else                                                                                \
1256     {                                                                                   \
1257         for( ; size.height--; src1+=step1, src2+=step2, dst+=step )                     \
1258         {                                                                               \
1259             int i;                                                                      \
1260             for( i = 0; i <= size.width - 4; i += 4 )                                   \
1261             {                                                                           \
1262                 double ft0 = scale*_cvt_macro_(src1[i])*_cvt_macro_(src2[i]);           \
1263                 double ft1 = scale*_cvt_macro_(src1[i+1])*_cvt_macro_(src2[i+1]);       \
1264                 worktype t0 = _cast_macro1_(ft0);                                       \
1265                 worktype t1 = _cast_macro1_(ft1);                                       \
1266                                                                                         \
1267                 dst[i] = _cast_macro2_(t0);                                             \
1268                 dst[i+1] = _cast_macro2_(t1);                                           \
1269                                                                                         \
1270                 ft0 = scale*_cvt_macro_(src1[i+2])*_cvt_macro_(src2[i+2]);              \
1271                 ft1 = scale*_cvt_macro_(src1[i+3])*_cvt_macro_(src2[i+3]);              \
1272                 t0 = _cast_macro1_(ft0);                                                \
1273                 t1 = _cast_macro1_(ft1);                                                \
1274                                                                                         \
1275                 dst[i+2] = _cast_macro2_(t0);                                           \
1276                 dst[i+3] = _cast_macro2_(t1);                                           \
1277             }                                                                           \
1278                                                                                         \
1279             for( ; i < size.width; i++ )                                                \
1280             {                                                                           \
1281                 worktype t0;                                                            \
1282                 t0 = _cast_macro1_(scale*_cvt_macro_(src1[i])*_cvt_macro_(src2[i]));    \
1283                 dst[i] = _cast_macro2_(t0);                                             \
1284             }                                                                           \
1285         }                                                                               \
1286     }                                                                                   \
1287                                                                                         \
1288     return CV_OK;                                                                       \
1289 }
1290
1291
1292 ICV_DEF_MUL_OP_CASE( 8u, uchar, int, cvRound, CV_CAST_8U, CV_8TO32F )
1293 ICV_DEF_MUL_OP_CASE( 16u, ushort, int, cvRound, CV_CAST_16U, CV_NOP )
1294 ICV_DEF_MUL_OP_CASE( 16s, short, int, cvRound, CV_CAST_16S, CV_NOP )
1295 ICV_DEF_MUL_OP_CASE( 32s, int, int, cvRound, CV_CAST_32S, CV_NOP )
1296 ICV_DEF_MUL_OP_CASE( 32f, float, double, CV_NOP, CV_CAST_32F, CV_NOP )
1297 ICV_DEF_MUL_OP_CASE( 64f, double, double, CV_NOP, CV_CAST_64F, CV_NOP )
1298
1299
1300 ICV_DEF_INIT_ARITHM_FUNC_TAB( Mul, C1R )
1301
1302
1303 typedef CvStatus (CV_STDCALL * CvScaledElWiseFunc)( const void* src1, int step1,
1304                                                     const void* src2, int step2,
1305                                                     void* dst, int step,
1306                                                     CvSize size, double scale );
1307
1308 CV_IMPL void
1309 cvMul( const void* srcarr1, const void* srcarr2, void* dstarr, double scale )
1310 {
1311     static CvFuncTable mul_tab;
1312     static int inittab = 0;
1313
1314     CV_FUNCNAME( "cvMul" );
1315
1316     __BEGIN__;
1317
1318     int type, depth, coi = 0;
1319     int src1_step, src2_step, dst_step;
1320     int is_nd = 0;
1321     CvMat srcstub1, *src1 = (CvMat*)srcarr1;
1322     CvMat srcstub2, *src2 = (CvMat*)srcarr2;
1323     CvMat dststub,  *dst = (CvMat*)dstarr;
1324     CvSize size;
1325     CvScaledElWiseFunc func;
1326
1327     if( !inittab )
1328     {
1329         icvInitMulC1RTable( &mul_tab );
1330         inittab = 1;
1331     }
1332
1333     if( !CV_IS_MAT(src1) )
1334     {
1335         if( CV_IS_MATND(src1) )
1336             is_nd = 1;
1337         else
1338         {
1339             CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi ));
1340             if( coi != 0 )
1341                 CV_ERROR( CV_BadCOI, "" );
1342         }
1343     }
1344
1345     if( !CV_IS_MAT(src2) )
1346     {
1347         if( CV_IS_MATND(src2) )
1348             is_nd = 1;
1349         else
1350         {
1351             CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi ));
1352             if( coi != 0 )
1353                 CV_ERROR( CV_BadCOI, "" );
1354         }
1355     }
1356
1357     if( !CV_IS_MAT(dst) )
1358     {
1359         if( CV_IS_MATND(dst) )
1360             is_nd = 1;
1361         else
1362         {
1363             CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
1364             if( coi != 0 )
1365                 CV_ERROR( CV_BadCOI, "" );
1366         }
1367     }
1368
1369     if( is_nd )
1370     {
1371         CvArr* arrs[] = { src1, src2, dst };
1372         CvMatND stubs[3];
1373         CvNArrayIterator iterator;
1374
1375         CV_CALL( cvInitNArrayIterator( 3, arrs, 0, stubs, &iterator ));
1376
1377         type = iterator.hdr[0]->type;
1378         iterator.size.width *= CV_MAT_CN(type);
1379
1380         func = (CvScaledElWiseFunc)(mul_tab.fn_2d[CV_MAT_DEPTH(type)]);
1381         if( !func )
1382             CV_ERROR( CV_StsUnsupportedFormat, "" );
1383
1384         do
1385         {
1386             IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
1387                              iterator.ptr[1], CV_STUB_STEP,
1388                              iterator.ptr[2], CV_STUB_STEP,
1389                              iterator.size, scale ));
1390         }
1391         while( cvNextNArraySlice( &iterator ));
1392         EXIT;
1393     }
1394
1395     if( !CV_ARE_TYPES_EQ( src1, src2 ) || !CV_ARE_TYPES_EQ( src1, dst ))
1396         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
1397
1398     if( !CV_ARE_SIZES_EQ( src1, src2 ) || !CV_ARE_SIZES_EQ( src1, dst ))
1399         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
1400
1401     type = CV_MAT_TYPE(src1->type);
1402     size = cvGetMatSize( src1 );
1403
1404     depth = CV_MAT_DEPTH(type);
1405     size.width *= CV_MAT_CN( type );
1406
1407     if( CV_IS_MAT_CONT( src1->type & src2->type & dst->type ))
1408     {
1409         size.width *= size.height;
1410
1411         if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE && scale == 1 )
1412         {
1413             if( depth == CV_32F )
1414             {
1415                 const float* src1data = (const float*)(src1->data.ptr);
1416                 const float* src2data = (const float*)(src2->data.ptr);
1417                 float* dstdata = (float*)(dst->data.ptr);
1418
1419                 do
1420                 {
1421                     dstdata[size.width-1] = (float)
1422                         (src1data[size.width-1] * src2data[size.width-1]);
1423                 }
1424                 while( --size.width );
1425
1426                 EXIT;
1427             }
1428
1429             if( depth == CV_64F )
1430             {
1431                 const double* src1data = (const double*)(src1->data.ptr);
1432                 const double* src2data = (const double*)(src2->data.ptr);
1433                 double* dstdata = (double*)(dst->data.ptr);
1434
1435                 do
1436                 {
1437                     dstdata[size.width-1] =
1438                         src1data[size.width-1] * src2data[size.width-1];
1439                 }
1440                 while( --size.width );
1441
1442                 EXIT;
1443             }
1444         }
1445
1446         src1_step = src2_step = dst_step = CV_STUB_STEP;
1447         size.height = 1;
1448     }
1449     else
1450     {
1451         src1_step = src1->step;
1452         src2_step = src2->step;
1453         dst_step = dst->step;
1454     }
1455
1456     func = (CvScaledElWiseFunc)(mul_tab.fn_2d[CV_MAT_DEPTH(type)]);
1457
1458     if( !func )
1459         CV_ERROR( CV_StsUnsupportedFormat, "" );
1460
1461     IPPI_CALL( func( src1->data.ptr, src1_step, src2->data.ptr, src2_step,
1462                      dst->data.ptr, dst_step, size, scale ));
1463
1464     __END__;
1465 }
1466
1467
1468 /***************************************** D I V ****************************************/
1469
1470 #define ICV_DEF_DIV_OP_CASE( flavor, arrtype, worktype, checktype, _start_row_macro_,   \
1471     _cast_macro1_, _cast_macro2_, _cvt_macro_, _check_macro_, isrc )                    \
1472                                                                                         \
1473 static CvStatus CV_STDCALL                                                              \
1474 icvDiv_##flavor##_C1R( const arrtype* src1, int step1,                                  \
1475                        const arrtype* src2, int step2,                                  \
1476                        arrtype* dst, int step,                                          \
1477                        CvSize size, double scale )                                      \
1478 {                                                                                       \
1479     step1 /= sizeof(src1[0]); step2 /= sizeof(src2[0]); step /= sizeof(dst[0]);         \
1480                                                                                         \
1481     for( ; size.height--; src1+=step1, src2+=step2, dst+=step )                         \
1482     {                                                                                   \
1483         _start_row_macro_(checktype, src2);                                             \
1484         for( i = 0; i <= size.width - 4; i += 4 )                                       \
1485         {                                                                               \
1486             if( _check_macro_(isrc[i]) && _check_macro_(isrc[i+1]) &&                   \
1487                 _check_macro_(isrc[i+2]) && _check_macro_(isrc[i+3]))                   \
1488             {                                                                           \
1489                 double a = (double)_cvt_macro_(src2[i]) * _cvt_macro_(src2[i+1]);       \
1490                 double b = (double)_cvt_macro_(src2[i+2]) * _cvt_macro_(src2[i+3]);     \
1491                 double d = scale/(a * b);                                               \
1492                                                                                         \
1493                 b *= d;                                                                 \
1494                 a *= d;                                                                 \
1495                                                                                         \
1496                 worktype z0 = _cast_macro1_(src2[i+1] * _cvt_macro_(src1[i]) * b);      \
1497                 worktype z1 = _cast_macro1_(src2[i] * _cvt_macro_(src1[i+1]) * b);      \
1498                 worktype z2 = _cast_macro1_(src2[i+3] * _cvt_macro_(src1[i+2]) * a);    \
1499                 worktype z3 = _cast_macro1_(src2[i+2] * _cvt_macro_(src1[i+3]) * a);    \
1500                                                                                         \
1501                 dst[i] = _cast_macro2_(z0);                                             \
1502                 dst[i+1] = _cast_macro2_(z1);                                           \
1503                 dst[i+2] = _cast_macro2_(z2);                                           \
1504                 dst[i+3] = _cast_macro2_(z3);                                           \
1505             }                                                                           \
1506             else                                                                        \
1507             {                                                                           \
1508                 worktype z0 = _check_macro_(isrc[i]) ?                                  \
1509                    _cast_macro1_(_cvt_macro_(src1[i])*scale/_cvt_macro_(src2[i])) : 0;  \
1510                 worktype z1 = _check_macro_(isrc[i+1]) ?                                \
1511                    _cast_macro1_(_cvt_macro_(src1[i+1])*scale/_cvt_macro_(src2[i+1])):0;\
1512                 worktype z2 = _check_macro_(isrc[i+2]) ?                                \
1513                    _cast_macro1_(_cvt_macro_(src1[i+2])*scale/_cvt_macro_(src2[i+2])):0;\
1514                 worktype z3 = _check_macro_(isrc[i+3]) ?                                \
1515                    _cast_macro1_(_cvt_macro_(src1[i+3])*scale/_cvt_macro_(src2[i+3])):0;\
1516                                                                                         \
1517                 dst[i] = _cast_macro2_(z0);                                             \
1518                 dst[i+1] = _cast_macro2_(z1);                                           \
1519                 dst[i+2] = _cast_macro2_(z2);                                           \
1520                 dst[i+3] = _cast_macro2_(z3);                                           \
1521             }                                                                           \
1522         }                                                                               \
1523                                                                                         \
1524         for( ; i < size.width; i++ )                                                    \
1525         {                                                                               \
1526             worktype z0 = _check_macro_(isrc[i]) ?                                      \
1527                 _cast_macro1_(_cvt_macro_(src1[i])*scale/_cvt_macro_(src2[i])) : 0;     \
1528             dst[i] = _cast_macro2_(z0);                                                 \
1529         }                                                                               \
1530     }                                                                                   \
1531                                                                                         \
1532     return CV_OK;                                                                       \
1533 }
1534
1535
1536 #define ICV_DEF_RECIP_OP_CASE( flavor, arrtype, worktype, checktype,            \
1537     _start_row_macro_, _cast_macro1_, _cast_macro2_,                            \
1538     _cvt_macro_, _check_macro_, isrc )                                          \
1539                                                                                 \
1540 static CvStatus CV_STDCALL                                                      \
1541 icvRecip_##flavor##_C1R( const arrtype* src, int step1,                         \
1542                          arrtype* dst, int step,                                \
1543                          CvSize size, double scale )                            \
1544 {                                                                               \
1545     step1 /= sizeof(src[0]); step /= sizeof(dst[0]);                            \
1546                                                                                 \
1547     for( ; size.height--; src+=step1, dst+=step )                               \
1548     {                                                                           \
1549         _start_row_macro_(checktype, src);                                      \
1550         for( i = 0; i <= size.width - 4; i += 4 )                               \
1551         {                                                                       \
1552             if( _check_macro_(isrc[i]) && _check_macro_(isrc[i+1]) &&           \
1553                 _check_macro_(isrc[i+2]) && _check_macro_(isrc[i+3]))           \
1554             {                                                                   \
1555                 double a = (double)_cvt_macro_(src[i]) * _cvt_macro_(src[i+1]); \
1556                 double b = (double)_cvt_macro_(src[i+2]) * _cvt_macro_(src[i+3]);\
1557                 double d = scale/(a * b);                                       \
1558                                                                                 \
1559                 b *= d;                                                         \
1560                 a *= d;                                                         \
1561                                                                                 \
1562                 worktype z0 = _cast_macro1_(src[i+1] * b);                      \
1563                 worktype z1 = _cast_macro1_(src[i] * b);                        \
1564                 worktype z2 = _cast_macro1_(src[i+3] * a);                      \
1565                 worktype z3 = _cast_macro1_(src[i+2] * a);                      \
1566                                                                                 \
1567                 dst[i] = _cast_macro2_(z0);                                     \
1568                 dst[i+1] = _cast_macro2_(z1);                                   \
1569                 dst[i+2] = _cast_macro2_(z2);                                   \
1570                 dst[i+3] = _cast_macro2_(z3);                                   \
1571             }                                                                   \
1572             else                                                                \
1573             {                                                                   \
1574                 worktype z0 = _check_macro_(isrc[i]) ?                          \
1575                    _cast_macro1_(scale/_cvt_macro_(src[i])) : 0;                \
1576                 worktype z1 = _check_macro_(isrc[i+1]) ?                        \
1577                    _cast_macro1_(scale/_cvt_macro_(src[i+1])):0;                \
1578                 worktype z2 = _check_macro_(isrc[i+2]) ?                        \
1579                    _cast_macro1_(scale/_cvt_macro_(src[i+2])):0;                \
1580                 worktype z3 = _check_macro_(isrc[i+3]) ?                        \
1581                    _cast_macro1_(scale/_cvt_macro_(src[i+3])):0;                \
1582                                                                                 \
1583                 dst[i] = _cast_macro2_(z0);                                     \
1584                 dst[i+1] = _cast_macro2_(z1);                                   \
1585                 dst[i+2] = _cast_macro2_(z2);                                   \
1586                 dst[i+3] = _cast_macro2_(z3);                                   \
1587             }                                                                   \
1588         }                                                                       \
1589                                                                                 \
1590         for( ; i < size.width; i++ )                                            \
1591         {                                                                       \
1592             worktype z0 = _check_macro_(isrc[i]) ?                              \
1593                 _cast_macro1_(scale/_cvt_macro_(src[i])) : 0;                   \
1594             dst[i] = _cast_macro2_(z0);                                         \
1595         }                                                                       \
1596     }                                                                           \
1597                                                                                 \
1598     return CV_OK;                                                               \
1599 }
1600
1601
1602 #define div_start_row_int(checktype, divisor) \
1603     int i
1604
1605 #define div_start_row_flt(checktype, divisor) \
1606     const checktype* isrc = (const checktype*)divisor; int i
1607
1608 #define div_check_zero_flt(x)  (((x) & 0x7fffffff) != 0)
1609 #define div_check_zero_dbl(x)  (((x) & CV_BIG_INT(0x7fffffffffffffff)) != 0)
1610
1611 #if defined WIN64 && defined EM64T && defined _MSC_VER && !defined CV_ICC
1612 #pragma optimize("",off)
1613 #endif
1614
1615 ICV_DEF_DIV_OP_CASE( 8u, uchar, int, uchar, div_start_row_int,
1616                      cvRound, CV_CAST_8U, CV_8TO32F, CV_NONZERO, src2 )
1617
1618 #if defined WIN64 && defined EM64T && defined _MSC_VER && !defined CV_ICC
1619 #pragma optimize("",on)
1620 #endif
1621
1622
1623 ICV_DEF_DIV_OP_CASE( 16u, ushort, int, ushort, div_start_row_int,
1624                      cvRound, CV_CAST_16U, CV_CAST_64F, CV_NONZERO, src2 )
1625 ICV_DEF_DIV_OP_CASE( 16s, short, int, short, div_start_row_int,
1626                      cvRound, CV_CAST_16S, CV_NOP, CV_NONZERO, src2 )
1627 ICV_DEF_DIV_OP_CASE( 32s, int, int, int, div_start_row_int,
1628                      cvRound, CV_CAST_32S, CV_CAST_64F, CV_NONZERO, src2 )
1629 ICV_DEF_DIV_OP_CASE( 32f, float, double, int, div_start_row_flt,
1630                      CV_NOP, CV_CAST_32F, CV_NOP, div_check_zero_flt, isrc )
1631 ICV_DEF_DIV_OP_CASE( 64f, double, double, int64, div_start_row_flt,
1632                      CV_NOP, CV_CAST_64F, CV_NOP, div_check_zero_dbl, isrc )
1633
1634 ICV_DEF_RECIP_OP_CASE( 8u, uchar, int, uchar, div_start_row_int,
1635                        cvRound, CV_CAST_8U, CV_8TO32F, CV_NONZERO, src )
1636 ICV_DEF_RECIP_OP_CASE( 16u, ushort, int, ushort, div_start_row_int,
1637                        cvRound, CV_CAST_16U, CV_CAST_64F, CV_NONZERO, src )
1638 ICV_DEF_RECIP_OP_CASE( 16s, short, int, short, div_start_row_int,
1639                        cvRound, CV_CAST_16S, CV_NOP, CV_NONZERO, src )
1640 ICV_DEF_RECIP_OP_CASE( 32s, int, int, int, div_start_row_int,
1641                        cvRound, CV_CAST_32S, CV_CAST_64F, CV_NONZERO, src )
1642 ICV_DEF_RECIP_OP_CASE( 32f, float, double, int, div_start_row_flt,
1643                        CV_NOP, CV_CAST_32F, CV_NOP, div_check_zero_flt, isrc  )
1644 ICV_DEF_RECIP_OP_CASE( 64f, double, double, int64, div_start_row_flt,
1645                        CV_NOP, CV_CAST_64F, CV_NOP, div_check_zero_dbl, isrc )
1646
1647 ICV_DEF_INIT_ARITHM_FUNC_TAB( Div, C1R )
1648 ICV_DEF_INIT_ARITHM_FUNC_TAB( Recip, C1R )
1649
1650 typedef CvStatus (CV_STDCALL * CvRecipFunc)( const void* src, int step1,
1651                                              void* dst, int step,
1652                                              CvSize size, double scale );
1653
1654 CV_IMPL void
1655 cvDiv( const void* srcarr1, const void* srcarr2, void* dstarr, double scale )
1656 {
1657     static CvFuncTable div_tab;
1658     static CvFuncTable recip_tab;
1659     static int inittab = 0;
1660
1661     CV_FUNCNAME( "cvDiv" );
1662
1663     __BEGIN__;
1664
1665     int type, coi = 0;
1666     int is_nd = 0;
1667     int src1_step, src2_step, dst_step;
1668     int src1_cont_flag = CV_MAT_CONT_FLAG;
1669     CvMat srcstub1, *src1 = (CvMat*)srcarr1;
1670     CvMat srcstub2, *src2 = (CvMat*)srcarr2;
1671     CvMat dststub,  *dst = (CvMat*)dstarr;
1672     CvSize size;
1673
1674     if( !inittab )
1675     {
1676         icvInitDivC1RTable( &div_tab );
1677         icvInitRecipC1RTable( &recip_tab );
1678         inittab = 1;
1679     }
1680
1681     if( !CV_IS_MAT(src2) )
1682     {
1683         if( CV_IS_MATND(src2))
1684             is_nd = 1;
1685         else
1686         {
1687             CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi ));
1688             if( coi != 0 )
1689                 CV_ERROR( CV_BadCOI, "" );
1690         }
1691     }
1692
1693     if( src1 )
1694     {
1695         if( CV_IS_MATND(src1))
1696             is_nd = 1;
1697         else
1698         {
1699             if( !CV_IS_MAT(src1) )
1700             {
1701                 CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi ));
1702                 if( coi != 0 )
1703                     CV_ERROR( CV_BadCOI, "" );
1704             }
1705
1706             if( !CV_ARE_TYPES_EQ( src1, src2 ))
1707                 CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
1708
1709             if( !CV_ARE_SIZES_EQ( src1, src2 ))
1710                 CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
1711             src1_cont_flag = src1->type;
1712         }
1713     }
1714
1715     if( !CV_IS_MAT(dst) )
1716     {
1717         if( CV_IS_MATND(dst))
1718             is_nd = 1;
1719         else
1720         {
1721             CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
1722             if( coi != 0 )
1723                 CV_ERROR( CV_BadCOI, "" );
1724         }
1725     }
1726
1727     if( is_nd )
1728     {
1729         CvArr* arrs[] = { dst, src2, src1 };
1730         CvMatND stubs[3];
1731         CvNArrayIterator iterator;
1732
1733         CV_CALL( cvInitNArrayIterator( 2 + (src1 != 0), arrs, 0, stubs, &iterator ));
1734
1735         type = iterator.hdr[0]->type;
1736         iterator.size.width *= CV_MAT_CN(type);
1737
1738         if( src1 )
1739         {
1740             CvScaledElWiseFunc func =
1741                 (CvScaledElWiseFunc)(div_tab.fn_2d[CV_MAT_DEPTH(type)]);
1742             if( !func )
1743                 CV_ERROR( CV_StsUnsupportedFormat, "" );
1744
1745             do
1746             {
1747                 IPPI_CALL( func( iterator.ptr[2], CV_STUB_STEP,
1748                                  iterator.ptr[1], CV_STUB_STEP,
1749                                  iterator.ptr[0], CV_STUB_STEP,
1750                                  iterator.size, scale ));
1751             }
1752             while( cvNextNArraySlice( &iterator ));
1753         }
1754         else
1755         {
1756             CvRecipFunc func = (CvRecipFunc)(recip_tab.fn_2d[CV_MAT_DEPTH(type)]);
1757
1758             if( !func )
1759                 CV_ERROR( CV_StsUnsupportedFormat, "" );
1760
1761             do
1762             {
1763                 IPPI_CALL( func( iterator.ptr[1], CV_STUB_STEP,
1764                                  iterator.ptr[0], CV_STUB_STEP,
1765                                  iterator.size, scale ));
1766             }
1767             while( cvNextNArraySlice( &iterator ));
1768         }
1769         EXIT;
1770     }
1771
1772     if( !CV_ARE_TYPES_EQ( src2, dst ))
1773         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
1774
1775     if( !CV_ARE_SIZES_EQ( src2, dst ))
1776         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
1777
1778     type = CV_MAT_TYPE(src2->type);
1779     size = cvGetMatSize( src2 );
1780     size.width *= CV_MAT_CN( type );
1781
1782     if( CV_IS_MAT_CONT( src1_cont_flag & src2->type & dst->type ))
1783     {
1784         size.width *= size.height;
1785         src1_step = src2_step = dst_step = CV_STUB_STEP;
1786         size.height = 1;
1787     }
1788     else
1789     {
1790         src1_step = src1 ? src1->step : 0;
1791         src2_step = src2->step;
1792         dst_step = dst->step;
1793     }
1794
1795     if( src1 )
1796     {
1797         CvScaledElWiseFunc func = (CvScaledElWiseFunc)(div_tab.fn_2d[CV_MAT_DEPTH(type)]);
1798
1799         if( !func )
1800             CV_ERROR( CV_StsUnsupportedFormat, "" );
1801
1802         IPPI_CALL( func( src1->data.ptr, src1_step, src2->data.ptr, src2_step,
1803                          dst->data.ptr, dst_step, size, scale ));
1804     }
1805     else
1806     {
1807         CvRecipFunc func = (CvRecipFunc)(recip_tab.fn_2d[CV_MAT_DEPTH(type)]);
1808
1809         if( !func )
1810             CV_ERROR( CV_StsUnsupportedFormat, "" );
1811
1812         IPPI_CALL( func( src2->data.ptr, src2_step,
1813                          dst->data.ptr, dst_step, size, scale ));
1814     }
1815
1816     __END__;
1817 }
1818
1819 /******************************* A D D   W E I G T E D ******************************/
1820
1821 #define ICV_DEF_ADD_WEIGHTED_OP(flavor, arrtype, worktype, load_macro,          \
1822                                      cast_macro1, cast_macro2)                  \
1823 static CvStatus CV_STDCALL                                                      \
1824 icvAddWeighted_##flavor##_C1R( const arrtype* src1, int step1, double alpha,    \
1825                                const arrtype* src2, int step2, double beta,     \
1826                                double gamma, arrtype* dst, int step, CvSize size )\
1827 {                                                                               \
1828     step1 /= sizeof(src1[0]); step2 /= sizeof(src2[0]); step /= sizeof(dst[0]); \
1829                                                                                 \
1830     for( ; size.height--; src1 += step1, src2 += step2, dst += step )           \
1831     {                                                                           \
1832         int i;                                                                  \
1833                                                                                 \
1834         for( i = 0; i <= size.width - 4; i += 4 )                               \
1835         {                                                                       \
1836             worktype t0 = cast_macro1(load_macro((src1)[i])*alpha +             \
1837                                       load_macro((src2)[i])*beta + gamma);      \
1838             worktype t1 = cast_macro1(load_macro((src1)[i+1])*alpha +           \
1839                                       load_macro((src2)[i+1])*beta + gamma);    \
1840                                                                                 \
1841             (dst)[i] = cast_macro2( t0 );                                       \
1842             (dst)[i+1] = cast_macro2( t1 );                                     \
1843                                                                                 \
1844             t0 = cast_macro1(load_macro((src1)[i+2])*alpha +                    \
1845                              load_macro((src2)[i+2])*beta + gamma);             \
1846             t1 = cast_macro1(load_macro((src1)[i+3])*alpha +                    \
1847                              load_macro((src2)[i+3])*beta + gamma);             \
1848                                                                                 \
1849             (dst)[i+2] = cast_macro2( t0 );                                     \
1850             (dst)[i+3] = cast_macro2( t1 );                                     \
1851         }                                                                       \
1852                                                                                 \
1853         for( ; i < size.width; i++ )                                            \
1854         {                                                                       \
1855             worktype t0 = cast_macro1(load_macro((src1)[i])*alpha +             \
1856                                       load_macro((src2)[i])*beta + gamma);      \
1857             (dst)[i] = cast_macro2( t0 );                                       \
1858         }                                                                       \
1859     }                                                                           \
1860                                                                                 \
1861     return CV_OK;                                                               \
1862 }
1863
1864
1865 #undef shift
1866 #define shift 14
1867
1868 static  CvStatus CV_STDCALL
1869 icvAddWeighted_8u_fast_C1R( const uchar* src1, int step1, double alpha,
1870                             const uchar* src2, int step2, double beta,
1871                             double gamma, uchar* dst, int step, CvSize size )
1872 {
1873     int tab1[256], tab2[256];
1874     double t = 0;
1875     int j, t0, t1, t2, t3;
1876
1877     alpha *= 1 << shift;
1878     gamma = gamma*(1 << shift) + (1 << (shift - 1));
1879     beta *= 1 << shift;
1880
1881     for( j = 0; j < 256; j++ )
1882     {
1883         tab1[j] = cvRound(t);
1884         tab2[j] = cvRound(gamma);
1885         t += alpha;
1886         gamma += beta;
1887     }
1888
1889     t0 = (tab1[0] + tab2[0]) >> shift;
1890     t1 = (tab1[0] + tab2[255]) >> shift;
1891     t2 = (tab1[255] + tab2[0]) >> shift;
1892     t3 = (tab1[255] + tab2[255]) >> shift;
1893
1894     if( (unsigned)(t0+256) < 768 && (unsigned)(t1+256) < 768 &&
1895         (unsigned)(t2+256) < 768 && (unsigned)(t3+256) < 768 )
1896     {
1897         // use faster table-based convertion back to 8u
1898         for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1899         {
1900             int i;
1901
1902             for( i = 0; i <= size.width - 4; i += 4 )
1903             {
1904                 t0 = CV_FAST_CAST_8U((tab1[src1[i]] + tab2[src2[i]]) >> shift);
1905                 t1 = CV_FAST_CAST_8U((tab1[src1[i+1]] + tab2[src2[i+1]]) >> shift);
1906
1907                 dst[i] = (uchar)t0;
1908                 dst[i+1] = (uchar)t1;
1909
1910                 t0 = CV_FAST_CAST_8U((tab1[src1[i+2]] + tab2[src2[i+2]]) >> shift);
1911                 t1 = CV_FAST_CAST_8U((tab1[src1[i+3]] + tab2[src2[i+3]]) >> shift);
1912
1913                 dst[i+2] = (uchar)t0;
1914                 dst[i+3] = (uchar)t1;
1915             }
1916
1917             for( ; i < size.width; i++ )
1918             {
1919                 t0 = CV_FAST_CAST_8U((tab1[src1[i]] + tab2[src2[i]]) >> shift);
1920                 dst[i] = (uchar)t0;
1921             }
1922         }
1923     }
1924     else
1925     {
1926         // use universal macro for convertion back to 8u
1927         for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1928         {
1929             int i;
1930
1931             for( i = 0; i <= size.width - 4; i += 4 )
1932             {
1933                 t0 = (tab1[src1[i]] + tab2[src2[i]]) >> shift;
1934                 t1 = (tab1[src1[i+1]] + tab2[src2[i+1]]) >> shift;
1935
1936                 dst[i] = CV_CAST_8U( t0 );
1937                 dst[i+1] = CV_CAST_8U( t1 );
1938
1939                 t0 = (tab1[src1[i+2]] + tab2[src2[i+2]]) >> shift;
1940                 t1 = (tab1[src1[i+3]] + tab2[src2[i+3]]) >> shift;
1941
1942                 dst[i+2] = CV_CAST_8U( t0 );
1943                 dst[i+3] = CV_CAST_8U( t1 );
1944             }
1945
1946             for( ; i < size.width; i++ )
1947             {
1948                 t0 = (tab1[src1[i]] + tab2[src2[i]]) >> shift;
1949                 dst[i] = CV_CAST_8U( t0 );
1950             }
1951         }
1952     }
1953
1954     return CV_OK;
1955 }
1956
1957
1958 ICV_DEF_ADD_WEIGHTED_OP( 8u, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U )
1959 ICV_DEF_ADD_WEIGHTED_OP( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U )
1960 ICV_DEF_ADD_WEIGHTED_OP( 16s, short, int, CV_NOP, cvRound, CV_CAST_16S )
1961 ICV_DEF_ADD_WEIGHTED_OP( 32s, int, int, CV_NOP, cvRound, CV_CAST_32S )
1962 ICV_DEF_ADD_WEIGHTED_OP( 32f, float, double, CV_NOP, CV_NOP, CV_CAST_32F )
1963 ICV_DEF_ADD_WEIGHTED_OP( 64f, double, double, CV_NOP, CV_NOP, CV_CAST_64F )
1964
1965
1966 ICV_DEF_INIT_ARITHM_FUNC_TAB( AddWeighted, C1R )
1967
1968 typedef CvStatus (CV_STDCALL *CvAddWeightedFunc)( const void* src1, int step1, double alpha,
1969                                                   const void* src2, int step2, double beta,
1970                                                   double gamma, void* dst,
1971                                                   int step, CvSize size );
1972
1973 CV_IMPL void
1974 cvAddWeighted( const CvArr* srcAarr, double alpha,
1975                const CvArr* srcBarr, double beta,
1976                double gamma, CvArr* dstarr )
1977 {
1978     static CvFuncTable addw_tab;
1979     static int inittab = 0;
1980
1981     CV_FUNCNAME( "cvAddWeighted" );
1982
1983     __BEGIN__;
1984
1985     CvMat  srcA_stub, *srcA = (CvMat*)srcAarr;
1986     CvMat  srcB_stub, *srcB = (CvMat*)srcBarr;
1987     CvMat  dst_stub, *dst = (CvMat*)dstarr;
1988     int  coi1, coi2, coi;
1989     int  srcA_step, srcB_step, dst_step;
1990     int  type;
1991     CvAddWeightedFunc func;
1992     CvSize size;
1993
1994     if( !inittab )
1995     {
1996         icvInitAddWeightedC1RTable( &addw_tab );
1997         inittab = 1;
1998     }
1999
2000     CV_CALL( srcA = cvGetMat( srcA, &srcA_stub, &coi1 ));
2001     CV_CALL( srcB = cvGetMat( srcB, &srcB_stub, &coi2 ));
2002     CV_CALL( dst = cvGetMat( dst, &dst_stub, &coi ));
2003
2004     if( coi1 || coi2 || coi )
2005         CV_ERROR( CV_BadCOI, "COI must not be set" );
2006
2007     if( !CV_ARE_TYPES_EQ( srcA, srcB ) ||
2008         !CV_ARE_TYPES_EQ( srcA, dst ))
2009         CV_ERROR( CV_StsUnmatchedFormats,
2010         "All input/output arrays should have the same type");
2011
2012     if( !CV_ARE_SIZES_EQ( srcA, srcB ) ||
2013         !CV_ARE_SIZES_EQ( srcA, dst ))
2014         CV_ERROR( CV_StsUnmatchedSizes,
2015         "All input/output arrays should have the same sizes");
2016
2017     size = cvGetMatSize( srcA );
2018     type = CV_MAT_TYPE( srcA->type );
2019     size.width *= CV_MAT_CN( type );
2020     srcA_step = srcA->step;
2021     srcB_step = srcB->step;
2022     dst_step = dst->step;
2023
2024     if( CV_IS_MAT_CONT( type & srcB->type & dst->type ))
2025     {
2026         size.width *= size.height;
2027         size.height = 1;
2028         srcA_step = srcB_step = dst_step = CV_AUTOSTEP;
2029     }
2030
2031     if( type == CV_8UC1 && size.width * size.height >= 1024 &&
2032         fabs(alpha) < 256 && fabs(beta) < 256 && fabs(gamma) < 256*256 )
2033     {
2034         func = (CvAddWeightedFunc)icvAddWeighted_8u_fast_C1R;
2035     }
2036     else
2037     {
2038         func = (CvAddWeightedFunc)addw_tab.fn_2d[CV_MAT_DEPTH(type)];
2039         if( !func )
2040             CV_ERROR( CV_StsUnsupportedFormat, "This array type is not supported" );
2041     }
2042
2043     IPPI_CALL( func( srcA->data.ptr, srcA_step, alpha, srcB->data.ptr, srcB_step,
2044                      beta, gamma, dst->data.ptr, dst_step, size ));
2045
2046     __END__;
2047 }
2048
2049
2050 /* End of file. */