Update the changelog
[opencv] / cxcore / src / cxarithm.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                        Intel License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of Intel Corporation may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 /* ////////////////////////////////////////////////////////////////////
43 //
44 //  CvMat arithmetic operations: +, - ...
45 //
46 // */
47
48 #include "_cxcore.h"
49
50 /****************************************************************************************\
51 *                      Arithmetic operations (+, -) without mask                         *
52 \****************************************************************************************/
53
54 #define ICV_DEF_BIN_ARI_OP_CASE( __op__, worktype, cast_macro, len )\
55 {                                                                   \
56     int i;                                                          \
57                                                                     \
58     for( i = 0; i <= (len) - 4; i += 4 )                            \
59     {                                                               \
60         worktype t0 = __op__((src1)[i], (src2)[i]);                 \
61         worktype t1 = __op__((src1)[i+1], (src2)[i+1]);             \
62                                                                     \
63         (dst)[i] = cast_macro( t0 );                                \
64         (dst)[i+1] = cast_macro( t1 );                              \
65                                                                     \
66         t0 = __op__((src1)[i+2],(src2)[i+2]);                       \
67         t1 = __op__((src1)[i+3],(src2)[i+3]);                       \
68                                                                     \
69         (dst)[i+2] = cast_macro( t0 );                              \
70         (dst)[i+3] = cast_macro( t1 );                              \
71     }                                                               \
72                                                                     \
73     for( ; i < (len); i++ )                                         \
74     {                                                               \
75         worktype t0 = __op__((src1)[i],(src2)[i]);                  \
76         (dst)[i] = cast_macro( t0 );                                \
77     }                                                               \
78 }
79
80 #define ICV_DEF_BIN_ARI_OP_2D( __op__, name, type, worktype, cast_macro )   \
81 IPCVAPI_IMPL( CvStatus, name,                                               \
82     ( const type* src1, int step1, const type* src2, int step2,             \
83       type* dst, int step, CvSize size ),                                   \
84       (src1, step1, src2, step2, dst, step, size) )                         \
85 {                                                                           \
86     step1/=sizeof(src1[0]); step2/=sizeof(src2[0]); step/=sizeof(dst[0]);   \
87                                                                             \
88     if( size.width == 1 )                                                   \
89     {                                                                       \
90         for( ; size.height--; src1 += step1, src2 += step2, dst += step )   \
91         {                                                                   \
92             worktype t0 = __op__((src1)[0],(src2)[0]);                      \
93             (dst)[0] = cast_macro( t0 );                                    \
94         }                                                                   \
95     }                                                                       \
96     else                                                                    \
97     {                                                                       \
98         for( ; size.height--; src1 += step1, src2 += step2, dst += step )   \
99         {                                                                   \
100             ICV_DEF_BIN_ARI_OP_CASE( __op__, worktype,                      \
101                                      cast_macro, size.width );              \
102         }                                                                   \
103     }                                                                       \
104                                                                             \
105     return CV_OK;                                                           \
106 }
107
108
109 #define ICV_DEF_BIN_ARI_OP_2D_SFS(__op__, name, type, worktype, cast_macro) \
110 IPCVAPI_IMPL( CvStatus, name,                                               \
111     ( const type* src1, int step1, const type* src2, int step2,             \
112       type* dst, int step, CvSize size, int /*scalefactor*/ ),              \
113       (src1, step1, src2, step2, dst, step, size, 0) )                      \
114 {                                                                           \
115     step1/=sizeof(src1[0]); step2/=sizeof(src2[0]); step/=sizeof(dst[0]);   \
116                                                                             \
117     if( size.width == 1 )                                                   \
118     {                                                                       \
119         for( ; size.height--; src1 += step1, src2 += step2, dst += step )   \
120         {                                                                   \
121             worktype t0 = __op__((src1)[0],(src2)[0]);                      \
122             (dst)[0] = cast_macro( t0 );                                    \
123         }                                                                   \
124     }                                                                       \
125     else                                                                    \
126     {                                                                       \
127         for( ; size.height--; src1 += step1, src2 += step2, dst += step )   \
128         {                                                                   \
129             ICV_DEF_BIN_ARI_OP_CASE( __op__, worktype,                      \
130                                      cast_macro, size.width );              \
131         }                                                                   \
132     }                                                                       \
133                                                                             \
134     return CV_OK;                                                           \
135 }
136
137
138 #define ICV_DEF_UN_ARI_OP_CASE( __op__, worktype, cast_macro,               \
139                                 src, scalar, dst, len )                     \
140 {                                                                           \
141     int i;                                                                  \
142                                                                             \
143     for( ; ((len) -= 12) >= 0; (dst) += 12, (src) += 12 )                   \
144     {                                                                       \
145         worktype t0 = __op__((scalar)[0], (src)[0]);                        \
146         worktype t1 = __op__((scalar)[1], (src)[1]);                        \
147                                                                             \
148         (dst)[0] = cast_macro( t0 );                                        \
149         (dst)[1] = cast_macro( t1 );                                        \
150                                                                             \
151         t0 = __op__((scalar)[2], (src)[2]);                                 \
152         t1 = __op__((scalar)[3], (src)[3]);                                 \
153                                                                             \
154         (dst)[2] = cast_macro( t0 );                                        \
155         (dst)[3] = cast_macro( t1 );                                        \
156                                                                             \
157         t0 = __op__((scalar)[4], (src)[4]);                                 \
158         t1 = __op__((scalar)[5], (src)[5]);                                 \
159                                                                             \
160         (dst)[4] = cast_macro( t0 );                                        \
161         (dst)[5] = cast_macro( t1 );                                        \
162                                                                             \
163         t0 = __op__((scalar)[6], (src)[6]);                                 \
164         t1 = __op__((scalar)[7], (src)[7]);                                 \
165                                                                             \
166         (dst)[6] = cast_macro( t0 );                                        \
167         (dst)[7] = cast_macro( t1 );                                        \
168                                                                             \
169         t0 = __op__((scalar)[8], (src)[8]);                                 \
170         t1 = __op__((scalar)[9], (src)[9]);                                 \
171                                                                             \
172         (dst)[8] = cast_macro( t0 );                                        \
173         (dst)[9] = cast_macro( t1 );                                        \
174                                                                             \
175         t0 = __op__((scalar)[10], (src)[10]);                               \
176         t1 = __op__((scalar)[11], (src)[11]);                               \
177                                                                             \
178         (dst)[10] = cast_macro( t0 );                                       \
179         (dst)[11] = cast_macro( t1 );                                       \
180     }                                                                       \
181                                                                             \
182     for( (len) += 12, i = 0; i < (len); i++ )                               \
183     {                                                                       \
184         worktype t0 = __op__((scalar)[i],(src)[i]);                         \
185         (dst)[i] = cast_macro( t0 );                                        \
186     }                                                                       \
187 }
188
189
190 #define ICV_DEF_UN_ARI_OP_2D( __op__, name, type, worktype, cast_macro )    \
191 static CvStatus CV_STDCALL name                                             \
192     ( const type* src, int step1, type* dst, int step,                      \
193       CvSize size, const worktype* scalar )                                 \
194 {                                                                           \
195     step1 /= sizeof(src[0]); step /= sizeof(dst[0]);                        \
196                                                                             \
197     if( size.width == 1 )                                                   \
198     {                                                                       \
199         for( ; size.height--; src += step1, dst += step )                   \
200         {                                                                   \
201             worktype t0 = __op__(*(scalar),*(src));                         \
202             *(dst) = cast_macro( t0 );                                      \
203         }                                                                   \
204     }                                                                       \
205     else                                                                    \
206     {                                                                       \
207         for( ; size.height--; src += step1, dst += step )                   \
208         {                                                                   \
209             const type *tsrc = src;                                         \
210             type *tdst = dst;                                               \
211             int width = size.width;                                         \
212                                                                             \
213             ICV_DEF_UN_ARI_OP_CASE( __op__, worktype, cast_macro,           \
214                                     tsrc, scalar, tdst, width );            \
215         }                                                                   \
216     }                                                                       \
217                                                                             \
218     return CV_OK;                                                           \
219 }
220
221
222 #define ICV_DEF_BIN_ARI_ALL( __op__, name, cast_8u )                                \
223 ICV_DEF_BIN_ARI_OP_2D_SFS( __op__, icv##name##_8u_C1R, uchar, int, cast_8u )        \
224 ICV_DEF_BIN_ARI_OP_2D_SFS( __op__, icv##name##_16u_C1R, ushort, int, CV_CAST_16U )  \
225 ICV_DEF_BIN_ARI_OP_2D_SFS( __op__, icv##name##_16s_C1R, short, int, CV_CAST_16S )   \
226 ICV_DEF_BIN_ARI_OP_2D( __op__, icv##name##_32s_C1R, int, int, CV_CAST_32S )         \
227 ICV_DEF_BIN_ARI_OP_2D( __op__, icv##name##_32f_C1R, float, float, CV_CAST_32F )     \
228 ICV_DEF_BIN_ARI_OP_2D( __op__, icv##name##_64f_C1R, double, double, CV_CAST_64F )
229
230 #define ICV_DEF_UN_ARI_ALL( __op__, name )                                          \
231 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_8u_C1R, uchar, int, CV_CAST_8U )          \
232 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_16u_C1R, ushort, int, CV_CAST_16U )       \
233 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_16s_C1R, short, int, CV_CAST_16S )        \
234 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_32s_C1R, int, int, CV_CAST_32S )          \
235 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_32f_C1R, float, float, CV_CAST_32F )      \
236 ICV_DEF_UN_ARI_OP_2D( __op__, icv##name##_64f_C1R, double, double, CV_CAST_64F )
237
238 #undef CV_SUB_R
239 #define CV_SUB_R(a,b) ((b) - (a))
240
241 ICV_DEF_BIN_ARI_ALL( CV_ADD, Add, CV_FAST_CAST_8U )
242 ICV_DEF_BIN_ARI_ALL( CV_SUB_R, Sub, CV_FAST_CAST_8U )
243
244 ICV_DEF_UN_ARI_ALL( CV_ADD, AddC )
245 ICV_DEF_UN_ARI_ALL( CV_SUB, SubRC )
246
247 #define ICV_DEF_INIT_ARITHM_FUNC_TAB( FUNCNAME, FLAG )          \
248 static  void  icvInit##FUNCNAME##FLAG##Table( CvFuncTable* tab )\
249 {                                                               \
250     tab->fn_2d[CV_8U] = (void*)icv##FUNCNAME##_8u_##FLAG;       \
251     tab->fn_2d[CV_8S] = 0;                                      \
252     tab->fn_2d[CV_16U] = (void*)icv##FUNCNAME##_16u_##FLAG;     \
253     tab->fn_2d[CV_16S] = (void*)icv##FUNCNAME##_16s_##FLAG;     \
254     tab->fn_2d[CV_32S] = (void*)icv##FUNCNAME##_32s_##FLAG;     \
255     tab->fn_2d[CV_32F] = (void*)icv##FUNCNAME##_32f_##FLAG;     \
256     tab->fn_2d[CV_64F] = (void*)icv##FUNCNAME##_64f_##FLAG;     \
257 }
258
259 ICV_DEF_INIT_ARITHM_FUNC_TAB( Sub, C1R )
260 ICV_DEF_INIT_ARITHM_FUNC_TAB( SubRC, C1R )
261 ICV_DEF_INIT_ARITHM_FUNC_TAB( Add, C1R )
262 ICV_DEF_INIT_ARITHM_FUNC_TAB( AddC, C1R )
263
264 /****************************************************************************************\
265 *                       External Functions for Arithmetic Operations                     *
266 \****************************************************************************************/
267
268 /*************************************** S U B ******************************************/
269
270 CV_IMPL void
271 cvSub( const void* srcarr1, const void* srcarr2,
272        void* dstarr, const void* maskarr )
273 {
274     static CvFuncTable sub_tab;
275     static int inittab = 0;
276     int local_alloc = 1;
277     uchar* buffer = 0;
278
279     CV_FUNCNAME( "cvSub" );
280
281     __BEGIN__;
282
283     const CvArr* tmp;
284     int y, dy, type, depth, cn, cont_flag = 0;
285     int src1_step, src2_step, dst_step, tdst_step, mask_step;
286     CvMat srcstub1, srcstub2, *src1, *src2;
287     CvMat dststub,  *dst = (CvMat*)dstarr;
288     CvMat maskstub, *mask = (CvMat*)maskarr;
289     CvMat dstbuf, *tdst;
290     CvFunc2D_3A func;
291     CvFunc2D_3A1I func_sfs;
292     CvCopyMaskFunc copym_func;
293     CvSize size, tsize;
294
295     CV_SWAP( srcarr1, srcarr2, tmp ); // to comply with IPP
296     src1 = (CvMat*)srcarr1;
297     src2 = (CvMat*)srcarr2;
298
299     if( !CV_IS_MAT(src1) || !CV_IS_MAT(src2) || !CV_IS_MAT(dst))
300     {
301         if( CV_IS_MATND(src1) || CV_IS_MATND(src2) || CV_IS_MATND(dst))
302         {
303             CvArr* arrs[] = { src1, src2, dst };
304             CvMatND stubs[3];
305             CvNArrayIterator iterator;
306
307             if( maskarr )
308                 CV_ERROR( CV_StsBadMask,
309                 "This operation on multi-dimensional arrays does not support mask" );
310
311             CV_CALL( cvInitNArrayIterator( 3, arrs, 0, stubs, &iterator ));
312
313             type = iterator.hdr[0]->type;
314             iterator.size.width *= CV_MAT_CN(type);
315
316             if( !inittab )
317             {
318                 icvInitSubC1RTable( &sub_tab );
319                 inittab = 1;
320             }
321
322             depth = CV_MAT_DEPTH(type);
323             if( depth <= CV_16S )
324             {
325                 func_sfs = (CvFunc2D_3A1I)(sub_tab.fn_2d[depth]);
326                 if( !func_sfs )
327                     CV_ERROR( CV_StsUnsupportedFormat, "" );
328
329                 do
330                 {
331                     IPPI_CALL( func_sfs( iterator.ptr[0], CV_STUB_STEP,
332                                          iterator.ptr[1], CV_STUB_STEP,
333                                          iterator.ptr[2], CV_STUB_STEP,
334                                          iterator.size, 0 ));
335                 }
336                 while( cvNextNArraySlice( &iterator ));
337             }
338             else
339             {
340                 func = (CvFunc2D_3A)(sub_tab.fn_2d[depth]);
341                 if( !func )
342                     CV_ERROR( CV_StsUnsupportedFormat, "" );
343
344                 do
345                 {
346                     IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
347                                      iterator.ptr[1], CV_STUB_STEP,
348                                      iterator.ptr[2], CV_STUB_STEP,
349                                      iterator.size ));
350                 }
351                 while( cvNextNArraySlice( &iterator ));
352             }
353             EXIT;
354         }
355         else
356         {
357             int coi1 = 0, coi2 = 0, coi3 = 0;
358             
359             CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi1 ));
360             CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi2 ));
361             CV_CALL( dst = cvGetMat( dst, &dststub, &coi3 ));
362             if( coi1 + coi2 + coi3 != 0 )
363                 CV_ERROR( CV_BadCOI, "" );
364         }
365     }
366
367     if( !CV_ARE_TYPES_EQ( src1, src2 ) || !CV_ARE_TYPES_EQ( src1, dst ))
368         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
369
370     if( !CV_ARE_SIZES_EQ( src1, src2 ) || !CV_ARE_SIZES_EQ( src1, dst ))
371         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
372
373     type = CV_MAT_TYPE(src1->type);
374     size = cvGetMatSize( src1 );
375     depth = CV_MAT_DEPTH(type);
376     cn = CV_MAT_CN(type);
377
378     if( !mask )
379     {
380         if( CV_IS_MAT_CONT( src1->type & src2->type & dst->type ))
381         {
382             int len = size.width*size.height*cn;
383
384             if( len <= CV_MAX_INLINE_MAT_OP_SIZE*CV_MAX_INLINE_MAT_OP_SIZE )
385             {
386                 if( depth == CV_32F )
387                 {
388                     const float* src1data = (const float*)(src1->data.ptr);
389                     const float* src2data = (const float*)(src2->data.ptr);
390                     float* dstdata = (float*)(dst->data.ptr);
391
392                     do
393                     {
394                         dstdata[len-1] = (float)(src2data[len-1] - src1data[len-1]);
395                     }
396                     while( --len );
397
398                     EXIT;
399                 }
400
401                 if( depth == CV_64F )
402                 {
403                     const double* src1data = (const double*)(src1->data.ptr);
404                     const double* src2data = (const double*)(src2->data.ptr);
405                     double* dstdata = (double*)(dst->data.ptr);
406
407                     do
408                     {
409                         dstdata[len-1] = src2data[len-1] - src1data[len-1];
410                     }
411                     while( --len );
412
413                     EXIT;
414                 }
415             }
416             cont_flag = 1;
417         }
418
419         dy = size.height;
420         copym_func = 0;
421         tdst = dst;
422     }
423     else
424     {
425         int buf_size, elem_size;
426         
427         if( !CV_IS_MAT(mask) )
428             CV_CALL( mask = cvGetMat( mask, &maskstub ));
429
430         if( !CV_IS_MASK_ARR(mask))
431             CV_ERROR( CV_StsBadMask, "" );
432
433         if( !CV_ARE_SIZES_EQ( mask, dst ))
434             CV_ERROR( CV_StsUnmatchedSizes, "" );
435
436         cont_flag = CV_IS_MAT_CONT( src1->type & src2->type & dst->type & mask->type );
437         elem_size = CV_ELEM_SIZE(type);
438
439         dy = CV_MAX_LOCAL_SIZE/(elem_size*size.height);
440         dy = MAX(dy,1);
441         dy = MIN(dy,size.height);
442         dstbuf = cvMat( dy, size.width, type );
443         if( !cont_flag )
444             dstbuf.step = cvAlign( dstbuf.step, 8 );
445         buf_size = dstbuf.step ? dstbuf.step*dy : size.width*elem_size;
446         if( buf_size > CV_MAX_LOCAL_SIZE )
447         {
448             CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
449             local_alloc = 0;
450         }
451         else
452             buffer = (uchar*)cvStackAlloc( buf_size );
453         dstbuf.data.ptr = buffer;
454         tdst = &dstbuf;
455         
456         copym_func = icvGetCopyMaskFunc( elem_size );
457     }
458
459     if( !inittab )
460     {
461         icvInitSubC1RTable( &sub_tab );
462         inittab = 1;
463     }
464
465     if( depth <= CV_16S )
466     {
467         func = 0;
468         func_sfs = (CvFunc2D_3A1I)(sub_tab.fn_2d[depth]);
469         if( !func_sfs )
470             CV_ERROR( CV_StsUnsupportedFormat, "" );
471     }
472     else
473     {
474         func_sfs = 0;
475         func = (CvFunc2D_3A)(sub_tab.fn_2d[depth]);
476         if( !func )
477             CV_ERROR( CV_StsUnsupportedFormat, "" );
478     }
479
480     src1_step = src1->step;
481     src2_step = src2->step;
482     dst_step = dst->step;
483     tdst_step = tdst->step;
484     mask_step = mask ? mask->step : 0;
485
486     for( y = 0; y < size.height; y += dy )
487     {
488         tsize.width = size.width;
489         tsize.height = dy;
490         if( y + dy > size.height )
491             tsize.height = size.height - y;
492         if( cont_flag || tsize.height == 1 )
493         {
494             tsize.width *= tsize.height;
495             tsize.height = 1;
496             src1_step = src2_step = tdst_step = dst_step = mask_step = CV_STUB_STEP;
497         }
498
499         IPPI_CALL( depth <= CV_16S ?
500             func_sfs( src1->data.ptr + y*src1->step, src1_step,
501                       src2->data.ptr + y*src2->step, src2_step,
502                       tdst->data.ptr, tdst_step,
503                       cvSize( tsize.width*cn, tsize.height ), 0 ) :
504             func( src1->data.ptr + y*src1->step, src1_step,
505                   src2->data.ptr + y*src2->step, src2_step,
506                   tdst->data.ptr, tdst_step,
507                   cvSize( tsize.width*cn, tsize.height )));
508
509         if( mask )
510         {
511             IPPI_CALL( copym_func( tdst->data.ptr, tdst_step, dst->data.ptr + y*dst->step,
512                                    dst_step, tsize, mask->data.ptr + y*mask->step, mask_step ));
513         }
514     }
515
516     __END__;
517
518     if( !local_alloc )
519         cvFree( &buffer );
520 }
521
522
523 CV_IMPL void
524 cvSubRS( const void* srcarr, CvScalar scalar, void* dstarr, const void* maskarr )
525 {
526     static CvFuncTable subr_tab;
527     static int inittab = 0;
528     int local_alloc = 1;
529     uchar* buffer = 0;
530
531     CV_FUNCNAME( "cvSubRS" );
532
533     __BEGIN__;
534
535     int sctype, y, dy, type, depth, cn, coi = 0, cont_flag = 0;
536     int src_step, dst_step, tdst_step, mask_step;
537     CvMat srcstub, *src = (CvMat*)srcarr;
538     CvMat dststub, *dst = (CvMat*)dstarr;
539     CvMat maskstub, *mask = (CvMat*)maskarr;
540     CvMat dstbuf, *tdst;
541     CvFunc2D_2A1P func;
542     CvCopyMaskFunc copym_func;
543     double buf[12];
544     int is_nd = 0;
545     CvSize size, tsize; 
546
547     if( !inittab )
548     {
549         icvInitSubRCC1RTable( &subr_tab );
550         inittab = 1;
551     }
552
553     if( !CV_IS_MAT(src) )
554     {
555         if( CV_IS_MATND(src) )
556             is_nd = 1;
557         else
558         {
559             CV_CALL( src = cvGetMat( src, &srcstub, &coi ));
560             if( coi != 0 )
561                 CV_ERROR( CV_BadCOI, "" );
562         }
563     }
564
565     if( !CV_IS_MAT(dst) )
566     {
567         if( CV_IS_MATND(dst) )
568             is_nd = 1;
569         else
570         {
571             CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
572             if( coi != 0 )
573                 CV_ERROR( CV_BadCOI, "" );
574         }
575     }
576
577     if( is_nd )
578     {
579         CvArr* arrs[] = { src, dst };
580         CvMatND stubs[2];
581         CvNArrayIterator iterator;
582
583         if( maskarr )
584             CV_ERROR( CV_StsBadMask,
585             "This operation on multi-dimensional arrays does not support mask" );
586
587         CV_CALL( cvInitNArrayIterator( 2, arrs, 0, stubs, &iterator ));
588
589         sctype = type = CV_MAT_TYPE(iterator.hdr[0]->type);
590         if( CV_MAT_DEPTH(sctype) < CV_32S )
591             sctype = (type & CV_MAT_CN_MASK) | CV_32SC1;
592         iterator.size.width *= CV_MAT_CN(type);
593
594         func = (CvFunc2D_2A1P)(subr_tab.fn_2d[CV_MAT_DEPTH(type)]);
595         if( !func )
596             CV_ERROR( CV_StsUnsupportedFormat, "" );
597        
598         CV_CALL( cvScalarToRawData( &scalar, buf, sctype, 1 ));
599
600         do
601         {
602             IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
603                              iterator.ptr[1], CV_STUB_STEP,
604                              iterator.size, buf ));
605         }
606         while( cvNextNArraySlice( &iterator ));
607         EXIT;
608     }
609
610     if( !CV_ARE_TYPES_EQ( src, dst ))
611         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
612
613     if( !CV_ARE_SIZES_EQ( src, dst ))
614         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
615
616     sctype = type = CV_MAT_TYPE(src->type);
617     depth = CV_MAT_DEPTH(type);
618     cn = CV_MAT_CN(type);
619     if( depth < CV_32S )
620         sctype = (type & CV_MAT_CN_MASK) | CV_32SC1;
621
622     size = cvGetMatSize( src );
623
624     if( !maskarr )
625     {
626         if( CV_IS_MAT_CONT( src->type & dst->type ))
627         {
628             if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE )
629             {
630                 int len = size.width * size.height;
631
632                 if( type == CV_32FC1 )
633                 {
634                     const float* srcdata = (const float*)(src->data.ptr);
635                     float* dstdata = (float*)(dst->data.ptr);
636                 
637                     do
638                     {
639                         dstdata[len-1] = (float)(scalar.val[0] - srcdata[len-1]);
640                     }
641                     while( --len );
642
643                     EXIT;
644                 }
645
646                 if( type == CV_64FC1 )
647                 {
648                     const double* srcdata = (const double*)(src->data.ptr);
649                     double* dstdata = (double*)(dst->data.ptr);
650                 
651                     do
652                     {
653                         dstdata[len-1] = scalar.val[0] - srcdata[len-1];
654                     }
655                     while( --len );
656
657                     EXIT;
658                 }
659             }
660             cont_flag = 1;
661         }
662         
663         dy = size.height;
664         copym_func = 0;
665         tdst = dst;
666     }
667     else
668     {
669         int buf_size, elem_size;
670         
671         if( !CV_IS_MAT(mask) )
672             CV_CALL( mask = cvGetMat( mask, &maskstub ));
673
674         if( !CV_IS_MASK_ARR(mask))
675             CV_ERROR( CV_StsBadMask, "" );
676
677         if( !CV_ARE_SIZES_EQ( mask, dst ))
678             CV_ERROR( CV_StsUnmatchedSizes, "" );
679
680         cont_flag = CV_IS_MAT_CONT( src->type & dst->type & mask->type );
681         elem_size = CV_ELEM_SIZE(type);
682
683         dy = CV_MAX_LOCAL_SIZE/(elem_size*size.height);
684         dy = MAX(dy,1);
685         dy = MIN(dy,size.height);
686         dstbuf = cvMat( dy, size.width, type );
687         if( !cont_flag )
688             dstbuf.step = cvAlign( dstbuf.step, 8 );
689         buf_size = dstbuf.step ? dstbuf.step*dy : size.width*elem_size;
690         if( buf_size > CV_MAX_LOCAL_SIZE )
691         {
692             CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
693             local_alloc = 0;
694         }
695         else
696             buffer = (uchar*)cvStackAlloc( buf_size );
697         dstbuf.data.ptr = buffer;
698         tdst = &dstbuf;
699         
700         copym_func = icvGetCopyMaskFunc( elem_size );
701     }
702
703     func = (CvFunc2D_2A1P)(subr_tab.fn_2d[depth]);
704     if( !func )
705         CV_ERROR( CV_StsUnsupportedFormat, "" );
706
707     src_step = src->step;
708     dst_step = dst->step;
709     tdst_step = tdst->step;
710     mask_step = mask ? mask->step : 0;
711
712     CV_CALL( cvScalarToRawData( &scalar, buf, sctype, 1 ));
713
714     for( y = 0; y < size.height; y += dy )
715     {
716         tsize.width = size.width;
717         tsize.height = dy;
718         if( y + dy > size.height )
719             tsize.height = size.height - y;
720         if( cont_flag || tsize.height == 1 )
721         {
722             tsize.width *= tsize.height;
723             tsize.height = 1;
724             src_step = tdst_step = dst_step = mask_step = CV_STUB_STEP;
725         }
726
727         IPPI_CALL( func( src->data.ptr + y*src->step, src_step,
728                          tdst->data.ptr, tdst_step,
729                          cvSize( tsize.width*cn, tsize.height ), buf ));
730         if( mask )
731         {
732             IPPI_CALL( copym_func( tdst->data.ptr, tdst_step, dst->data.ptr + y*dst->step,
733                                    dst_step, tsize, mask->data.ptr + y*mask->step, mask_step ));
734         }
735     }
736
737     __END__;
738
739     if( !local_alloc )
740         cvFree( &buffer );
741 }
742
743
744 /******************************* A D D ********************************/
745
746 CV_IMPL void
747 cvAdd( const void* srcarr1, const void* srcarr2,
748        void* dstarr, const void* maskarr )
749 {
750     static CvFuncTable add_tab;
751     static int inittab = 0;
752     int local_alloc = 1;
753     uchar* buffer = 0;
754
755     CV_FUNCNAME( "cvAdd" );
756
757     __BEGIN__;
758
759     int y, dy, type, depth, cn, cont_flag = 0;
760     int src1_step, src2_step, dst_step, tdst_step, mask_step;
761     CvMat srcstub1, *src1 = (CvMat*)srcarr1;
762     CvMat srcstub2, *src2 = (CvMat*)srcarr2;
763     CvMat dststub,  *dst = (CvMat*)dstarr;
764     CvMat maskstub, *mask = (CvMat*)maskarr;
765     CvMat dstbuf, *tdst;
766     CvFunc2D_3A func;
767     CvFunc2D_3A1I func_sfs;
768     CvCopyMaskFunc copym_func;
769     CvSize size, tsize;
770
771     if( !CV_IS_MAT(src1) || !CV_IS_MAT(src2) || !CV_IS_MAT(dst))
772     {
773         if( CV_IS_MATND(src1) || CV_IS_MATND(src2) || CV_IS_MATND(dst))
774         {
775             CvArr* arrs[] = { src1, src2, dst };
776             CvMatND stubs[3];
777             CvNArrayIterator iterator;
778
779             if( maskarr )
780                 CV_ERROR( CV_StsBadMask,
781                 "This operation on multi-dimensional arrays does not support mask" );
782
783             CV_CALL( cvInitNArrayIterator( 3, arrs, 0, stubs, &iterator ));
784
785             type = iterator.hdr[0]->type;
786             iterator.size.width *= CV_MAT_CN(type);
787
788             if( !inittab )
789             {
790                 icvInitAddC1RTable( &add_tab );
791                 inittab = 1;
792             }
793
794             depth = CV_MAT_DEPTH(type);
795             if( depth <= CV_16S )
796             {
797                 func_sfs = (CvFunc2D_3A1I)(add_tab.fn_2d[depth]);
798                 if( !func_sfs )
799                     CV_ERROR( CV_StsUnsupportedFormat, "" );
800
801                 do
802                 {
803                     IPPI_CALL( func_sfs( iterator.ptr[0], CV_STUB_STEP,
804                                          iterator.ptr[1], CV_STUB_STEP,
805                                          iterator.ptr[2], CV_STUB_STEP,
806                                          iterator.size, 0 ));
807                 }
808                 while( cvNextNArraySlice( &iterator ));
809             }
810             else
811             {
812                 func = (CvFunc2D_3A)(add_tab.fn_2d[depth]);
813                 if( !func )
814                     CV_ERROR( CV_StsUnsupportedFormat, "" );
815
816                 do
817                 {
818                     IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
819                                      iterator.ptr[1], CV_STUB_STEP,
820                                      iterator.ptr[2], CV_STUB_STEP,
821                                      iterator.size ));
822                 }
823                 while( cvNextNArraySlice( &iterator ));
824             }
825             EXIT;
826         }
827         else
828         {
829             int coi1 = 0, coi2 = 0, coi3 = 0;
830             
831             CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi1 ));
832             CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi2 ));
833             CV_CALL( dst = cvGetMat( dst, &dststub, &coi3 ));
834             if( coi1 + coi2 + coi3 != 0 )
835                 CV_ERROR( CV_BadCOI, "" );
836         }
837     }
838
839     if( !CV_ARE_TYPES_EQ( src1, src2 ) || !CV_ARE_TYPES_EQ( src1, dst ))
840         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
841
842     if( !CV_ARE_SIZES_EQ( src1, src2 ) || !CV_ARE_SIZES_EQ( src1, dst ))
843         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
844
845     type = CV_MAT_TYPE(src1->type);
846     size = cvGetMatSize( src1 );
847     depth = CV_MAT_DEPTH(type);
848     cn = CV_MAT_CN(type);
849
850     if( !mask )
851     {
852         if( CV_IS_MAT_CONT( src1->type & src2->type & dst->type ))
853         {
854             int len = size.width*size.height*cn;
855
856             if( len <= CV_MAX_INLINE_MAT_OP_SIZE*CV_MAX_INLINE_MAT_OP_SIZE )
857             {
858                 if( depth == CV_32F )
859                 {
860                     const float* src1data = (const float*)(src1->data.ptr);
861                     const float* src2data = (const float*)(src2->data.ptr);
862                     float* dstdata = (float*)(dst->data.ptr);
863
864                     do
865                     {
866                         dstdata[len-1] = (float)(src1data[len-1] + src2data[len-1]);
867                     }
868                     while( --len );
869
870                     EXIT;
871                 }
872
873                 if( depth == CV_64F )
874                 {
875                     const double* src1data = (const double*)(src1->data.ptr);
876                     const double* src2data = (const double*)(src2->data.ptr);
877                     double* dstdata = (double*)(dst->data.ptr);
878
879                     do
880                     {
881                         dstdata[len-1] = src1data[len-1] + src2data[len-1];
882                     }
883                     while( --len );
884
885                     EXIT;
886                 }
887             }
888             cont_flag = 1;
889         }
890
891         dy = size.height;
892         copym_func = 0;
893         tdst = dst;
894     }
895     else
896     {
897         int buf_size, elem_size;
898         
899         if( !CV_IS_MAT(mask) )
900             CV_CALL( mask = cvGetMat( mask, &maskstub ));
901
902         if( !CV_IS_MASK_ARR(mask))
903             CV_ERROR( CV_StsBadMask, "" );
904
905         if( !CV_ARE_SIZES_EQ( mask, dst ))
906             CV_ERROR( CV_StsUnmatchedSizes, "" );
907
908         cont_flag = CV_IS_MAT_CONT( src1->type & src2->type & dst->type & mask->type );
909         elem_size = CV_ELEM_SIZE(type);
910
911         dy = CV_MAX_LOCAL_SIZE/(elem_size*size.height);
912         dy = MAX(dy,1);
913         dy = MIN(dy,size.height);
914         dstbuf = cvMat( dy, size.width, type );
915         if( !cont_flag )
916             dstbuf.step = cvAlign( dstbuf.step, 8 );
917         buf_size = dstbuf.step ? dstbuf.step*dy : size.width*elem_size;
918         if( buf_size > CV_MAX_LOCAL_SIZE )
919         {
920             CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
921             local_alloc = 0;
922         }
923         else
924             buffer = (uchar*)cvStackAlloc( buf_size );
925         dstbuf.data.ptr = buffer;
926         tdst = &dstbuf;
927         
928         copym_func = icvGetCopyMaskFunc( elem_size );
929     }
930
931     if( !inittab )
932     {
933         icvInitAddC1RTable( &add_tab );
934         inittab = 1;
935     }
936
937     if( depth <= CV_16S )
938     {
939         func = 0;
940         func_sfs = (CvFunc2D_3A1I)(add_tab.fn_2d[depth]);
941         if( !func_sfs )
942             CV_ERROR( CV_StsUnsupportedFormat, "" );
943     }
944     else
945     {
946         func_sfs = 0;
947         func = (CvFunc2D_3A)(add_tab.fn_2d[depth]);
948         if( !func )
949             CV_ERROR( CV_StsUnsupportedFormat, "" );
950     }
951
952     src1_step = src1->step;
953     src2_step = src2->step;
954     dst_step = dst->step;
955     tdst_step = tdst->step;
956     mask_step = mask ? mask->step : 0;
957
958     for( y = 0; y < size.height; y += dy )
959     {
960         tsize.width = size.width;
961         tsize.height = dy;
962         if( y + dy > size.height )
963             tsize.height = size.height - y;
964         if( cont_flag || tsize.height == 1 )
965         {
966             tsize.width *= tsize.height;
967             tsize.height = 1;
968             src1_step = src2_step = tdst_step = dst_step = mask_step = CV_STUB_STEP;
969         }
970
971         IPPI_CALL( depth <= CV_16S ?
972             func_sfs( src1->data.ptr + y*src1->step, src1_step,
973                       src2->data.ptr + y*src2->step, src2_step,
974                       tdst->data.ptr, tdst_step,
975                       cvSize( tsize.width*cn, tsize.height ), 0 ) :
976             func( src1->data.ptr + y*src1->step, src1_step,
977                   src2->data.ptr + y*src2->step, src2_step,
978                   tdst->data.ptr, tdst_step,
979                   cvSize( tsize.width*cn, tsize.height )));
980
981         if( mask )
982         {
983             IPPI_CALL( copym_func( tdst->data.ptr, tdst_step, dst->data.ptr + y*dst->step,
984                                    dst_step, tsize, mask->data.ptr + y*mask->step, mask_step ));
985         }
986     }
987
988     __END__;
989
990     if( !local_alloc )
991         cvFree( &buffer );
992 }
993
994
995 CV_IMPL void
996 cvAddS( const void* srcarr, CvScalar scalar, void* dstarr, const void* maskarr )
997 {
998     static CvFuncTable add_tab;
999     static int inittab = 0;
1000     int local_alloc = 1;
1001     uchar* buffer = 0;
1002
1003     CV_FUNCNAME( "cvAddS" );
1004
1005     __BEGIN__;
1006
1007     int sctype, y, dy, type, depth, cn, coi = 0, cont_flag = 0;
1008     int src_step, dst_step, tdst_step, mask_step;
1009     CvMat srcstub, *src = (CvMat*)srcarr;
1010     CvMat dststub, *dst = (CvMat*)dstarr;
1011     CvMat maskstub, *mask = (CvMat*)maskarr;
1012     CvMat dstbuf, *tdst;
1013     CvFunc2D_2A1P func;
1014     CvCopyMaskFunc copym_func;
1015     double buf[12];
1016     int is_nd = 0;
1017     CvSize size, tsize; 
1018
1019     if( !inittab )
1020     {
1021         icvInitAddCC1RTable( &add_tab );
1022         inittab = 1;
1023     }
1024
1025     if( !CV_IS_MAT(src) )
1026     {
1027         if( CV_IS_MATND(src) )
1028             is_nd = 1;
1029         else
1030         {
1031             CV_CALL( src = cvGetMat( src, &srcstub, &coi ));
1032             if( coi != 0 )
1033                 CV_ERROR( CV_BadCOI, "" );
1034         }
1035     }
1036
1037     if( !CV_IS_MAT(dst) )
1038     {
1039         if( CV_IS_MATND(dst) )
1040             is_nd = 1;
1041         else
1042         {
1043             CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
1044             if( coi != 0 )
1045                 CV_ERROR( CV_BadCOI, "" );
1046         }
1047     }
1048
1049     if( is_nd )
1050     {
1051         CvArr* arrs[] = { src, dst };
1052         CvMatND stubs[2];
1053         CvNArrayIterator iterator;
1054
1055         if( maskarr )
1056             CV_ERROR( CV_StsBadMask,
1057             "This operation on multi-dimensional arrays does not support mask" );
1058
1059         CV_CALL( cvInitNArrayIterator( 2, arrs, 0, stubs, &iterator ));
1060
1061         sctype = type = CV_MAT_TYPE(iterator.hdr[0]->type);
1062         if( CV_MAT_DEPTH(sctype) < CV_32S )
1063             sctype = (type & CV_MAT_CN_MASK) | CV_32SC1;
1064         iterator.size.width *= CV_MAT_CN(type);
1065
1066         func = (CvFunc2D_2A1P)(add_tab.fn_2d[CV_MAT_DEPTH(type)]);
1067         if( !func )
1068             CV_ERROR( CV_StsUnsupportedFormat, "" );
1069        
1070         CV_CALL( cvScalarToRawData( &scalar, buf, sctype, 1 ));
1071
1072         do
1073         {
1074             IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
1075                              iterator.ptr[1], CV_STUB_STEP,
1076                              iterator.size, buf ));
1077         }
1078         while( cvNextNArraySlice( &iterator ));
1079         EXIT;
1080     }
1081
1082     if( !CV_ARE_TYPES_EQ( src, dst ))
1083         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
1084
1085     if( !CV_ARE_SIZES_EQ( src, dst ))
1086         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
1087
1088     sctype = type = CV_MAT_TYPE(src->type);
1089     depth = CV_MAT_DEPTH(type);
1090     cn = CV_MAT_CN(type);
1091     if( depth < CV_32S )
1092         sctype = (type & CV_MAT_CN_MASK) | CV_32SC1;
1093
1094     size = cvGetMatSize( src );
1095
1096     if( !maskarr )
1097     {
1098         if( CV_IS_MAT_CONT( src->type & dst->type ))
1099         {
1100             if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE )
1101             {
1102                 int len = size.width * size.height;
1103
1104                 if( type == CV_32FC1 )
1105                 {
1106                     const float* srcdata = (const float*)(src->data.ptr);
1107                     float* dstdata = (float*)(dst->data.ptr);
1108                 
1109                     do
1110                     {
1111                         dstdata[len-1] = (float)(scalar.val[0] + srcdata[len-1]);
1112                     }
1113                     while( --len );
1114
1115                     EXIT;
1116                 }
1117
1118                 if( type == CV_64FC1 )
1119                 {
1120                     const double* srcdata = (const double*)(src->data.ptr);
1121                     double* dstdata = (double*)(dst->data.ptr);
1122                 
1123                     do
1124                     {
1125                         dstdata[len-1] = scalar.val[0] + srcdata[len-1];
1126                     }
1127                     while( --len );
1128
1129                     EXIT;
1130                 }
1131             }
1132             cont_flag = 1;
1133         }
1134         
1135         dy = size.height;
1136         copym_func = 0;
1137         tdst = dst;
1138     }
1139     else
1140     {
1141         int buf_size, elem_size;
1142         
1143         if( !CV_IS_MAT(mask) )
1144             CV_CALL( mask = cvGetMat( mask, &maskstub ));
1145
1146         if( !CV_IS_MASK_ARR(mask))
1147             CV_ERROR( CV_StsBadMask, "" );
1148
1149         if( !CV_ARE_SIZES_EQ( mask, dst ))
1150             CV_ERROR( CV_StsUnmatchedSizes, "" );
1151
1152         cont_flag = CV_IS_MAT_CONT( src->type & dst->type & mask->type );
1153         elem_size = CV_ELEM_SIZE(type);
1154
1155         dy = CV_MAX_LOCAL_SIZE/(elem_size*size.height);
1156         dy = MAX(dy,1);
1157         dy = MIN(dy,size.height);
1158         dstbuf = cvMat( dy, size.width, type );
1159         if( !cont_flag )
1160             dstbuf.step = cvAlign( dstbuf.step, 8 );
1161         buf_size = dstbuf.step ? dstbuf.step*dy : size.width*elem_size;
1162         if( buf_size > CV_MAX_LOCAL_SIZE )
1163         {
1164             CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
1165             local_alloc = 0;
1166         }
1167         else
1168             buffer = (uchar*)cvStackAlloc( buf_size );
1169         dstbuf.data.ptr = buffer;
1170         tdst = &dstbuf;
1171         
1172         copym_func = icvGetCopyMaskFunc( elem_size );
1173     }
1174
1175     func = (CvFunc2D_2A1P)(add_tab.fn_2d[depth]);
1176     if( !func )
1177         CV_ERROR( CV_StsUnsupportedFormat, "" );
1178
1179     src_step = src->step;
1180     dst_step = dst->step;
1181     tdst_step = tdst->step;
1182     mask_step = mask ? mask->step : 0;
1183
1184     CV_CALL( cvScalarToRawData( &scalar, buf, sctype, 1 ));
1185
1186     for( y = 0; y < size.height; y += dy )
1187     {
1188         tsize.width = size.width;
1189         tsize.height = dy;
1190         if( y + dy > size.height )
1191             tsize.height = size.height - y;
1192         if( cont_flag || tsize.height == 1 )
1193         {
1194             tsize.width *= tsize.height;
1195             tsize.height = 1;
1196             src_step = tdst_step = dst_step = mask_step = CV_STUB_STEP;
1197         }
1198
1199         IPPI_CALL( func( src->data.ptr + y*src->step, src_step,
1200                          tdst->data.ptr, tdst_step,
1201                          cvSize( tsize.width*cn, tsize.height ), buf ));
1202         if( mask )
1203         {
1204             IPPI_CALL( copym_func( tdst->data.ptr, tdst_step, dst->data.ptr + y*dst->step,
1205                                    dst_step, tsize, mask->data.ptr + y*mask->step, mask_step ));
1206         }
1207     }
1208
1209     __END__;
1210
1211     if( !local_alloc )
1212         cvFree( &buffer );
1213 }
1214
1215
1216 /***************************************** M U L ****************************************/
1217
1218 #define ICV_DEF_MUL_OP_CASE( flavor, arrtype, worktype, _cast_macro1_,                  \
1219                              _cast_macro2_, _cvt_macro_ )                               \
1220 static CvStatus CV_STDCALL                                                              \
1221     icvMul_##flavor##_C1R( const arrtype* src1, int step1,                              \
1222                            const arrtype* src2, int step2,                              \
1223                            arrtype* dst, int step,                                      \
1224                            CvSize size, double scale )                                  \
1225 {                                                                                       \
1226     step1 /= sizeof(src1[0]); step2 /= sizeof(src2[0]); step /= sizeof(dst[0]);         \
1227                                                                                         \
1228     if( fabs(scale - 1.) < DBL_EPSILON )                                                \
1229     {                                                                                   \
1230         for( ; size.height--; src1+=step1, src2+=step2, dst+=step )                     \
1231         {                                                                               \
1232             int i;                                                                      \
1233             for( i = 0; i <= size.width - 4; i += 4 )                                   \
1234             {                                                                           \
1235                 worktype t0 = src1[i] * src2[i];                                        \
1236                 worktype t1 = src1[i+1] * src2[i+1];                                    \
1237                                                                                         \
1238                 dst[i] = _cast_macro2_(t0);                                             \
1239                 dst[i+1] = _cast_macro2_(t1);                                           \
1240                                                                                         \
1241                 t0 = src1[i+2] * src2[i+2];                                             \
1242                 t1 = src1[i+3] * src2[i+3];                                             \
1243                                                                                         \
1244                 dst[i+2] = _cast_macro2_(t0);                                           \
1245                 dst[i+3] = _cast_macro2_(t1);                                           \
1246             }                                                                           \
1247                                                                                         \
1248             for( ; i < size.width; i++ )                                                \
1249             {                                                                           \
1250                 worktype t0 = src1[i] * src2[i];                                        \
1251                 dst[i] = _cast_macro2_(t0);                                             \
1252             }                                                                           \
1253         }                                                                               \
1254     }                                                                                   \
1255     else                                                                                \
1256     {                                                                                   \
1257         for( ; size.height--; src1+=step1, src2+=step2, dst+=step )                     \
1258         {                                                                               \
1259             int i;                                                                      \
1260             for( i = 0; i <= size.width - 4; i += 4 )                                   \
1261             {                                                                           \
1262                 double ft0 = scale*_cvt_macro_(src1[i])*_cvt_macro_(src2[i]);           \
1263                 double ft1 = scale*_cvt_macro_(src1[i+1])*_cvt_macro_(src2[i+1]);       \
1264                 worktype t0 = _cast_macro1_(ft0);                                       \
1265                 worktype t1 = _cast_macro1_(ft1);                                       \
1266                                                                                         \
1267                 dst[i] = _cast_macro2_(t0);                                             \
1268                 dst[i+1] = _cast_macro2_(t1);                                           \
1269                                                                                         \
1270                 ft0 = scale*_cvt_macro_(src1[i+2])*_cvt_macro_(src2[i+2]);              \
1271                 ft1 = scale*_cvt_macro_(src1[i+3])*_cvt_macro_(src2[i+3]);              \
1272                 t0 = _cast_macro1_(ft0);                                                \
1273                 t1 = _cast_macro1_(ft1);                                                \
1274                                                                                         \
1275                 dst[i+2] = _cast_macro2_(t0);                                           \
1276                 dst[i+3] = _cast_macro2_(t1);                                           \
1277             }                                                                           \
1278                                                                                         \
1279             for( ; i < size.width; i++ )                                                \
1280             {                                                                           \
1281                 worktype t0;                                                            \
1282                 t0 = _cast_macro1_(scale*_cvt_macro_(src1[i])*_cvt_macro_(src2[i]));    \
1283                 dst[i] = _cast_macro2_(t0);                                             \
1284             }                                                                           \
1285         }                                                                               \
1286     }                                                                                   \
1287                                                                                         \
1288     return CV_OK;                                                                       \
1289 }
1290
1291
1292 ICV_DEF_MUL_OP_CASE( 8u, uchar, int, cvRound, CV_CAST_8U, CV_8TO32F )
1293 ICV_DEF_MUL_OP_CASE( 16u, ushort, int, cvRound, CV_CAST_16U, CV_NOP )
1294 ICV_DEF_MUL_OP_CASE( 16s, short, int, cvRound, CV_CAST_16S, CV_NOP )
1295 ICV_DEF_MUL_OP_CASE( 32s, int, int, cvRound, CV_CAST_32S, CV_NOP )
1296 ICV_DEF_MUL_OP_CASE( 32f, float, double, CV_NOP, CV_CAST_32F, CV_NOP )
1297 ICV_DEF_MUL_OP_CASE( 64f, double, double, CV_NOP, CV_CAST_64F, CV_NOP )
1298
1299
1300 ICV_DEF_INIT_ARITHM_FUNC_TAB( Mul, C1R )
1301
1302
1303 typedef CvStatus (CV_STDCALL * CvScaledElWiseFunc)( const void* src1, int step1,
1304                                                     const void* src2, int step2,
1305                                                     void* dst, int step,
1306                                                     CvSize size, double scale );
1307
1308 CV_IMPL void
1309 cvMul( const void* srcarr1, const void* srcarr2, void* dstarr, double scale )
1310 {
1311     static CvFuncTable mul_tab;
1312     static int inittab = 0;
1313
1314     CV_FUNCNAME( "cvMul" );
1315
1316     __BEGIN__;
1317
1318     int type, depth, coi = 0;
1319     int src1_step, src2_step, dst_step;
1320     int is_nd = 0;
1321     CvMat srcstub1, *src1 = (CvMat*)srcarr1;
1322     CvMat srcstub2, *src2 = (CvMat*)srcarr2;
1323     CvMat dststub,  *dst = (CvMat*)dstarr;
1324     CvSize size;
1325     CvScaledElWiseFunc func;
1326
1327     if( !inittab )
1328     {
1329         icvInitMulC1RTable( &mul_tab );
1330         inittab = 1;
1331     }
1332
1333     if( !CV_IS_MAT(src1) )
1334     {
1335         if( CV_IS_MATND(src1) )
1336             is_nd = 1;
1337         else
1338         {
1339             CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi ));
1340             if( coi != 0 )
1341                 CV_ERROR( CV_BadCOI, "" );
1342         }
1343     }
1344
1345     if( !CV_IS_MAT(src2) )
1346     {
1347         if( CV_IS_MATND(src2) )
1348             is_nd = 1;
1349         else
1350         {
1351             CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi ));
1352             if( coi != 0 )
1353                 CV_ERROR( CV_BadCOI, "" );
1354         }
1355     }
1356
1357     if( !CV_IS_MAT(dst) )
1358     {
1359         if( CV_IS_MATND(dst) )
1360             is_nd = 1;
1361         else
1362         {
1363             CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
1364             if( coi != 0 )
1365                 CV_ERROR( CV_BadCOI, "" );
1366         }
1367     }
1368
1369     if( is_nd )
1370     {
1371         CvArr* arrs[] = { src1, src2, dst };
1372         CvMatND stubs[3];
1373         CvNArrayIterator iterator;
1374
1375         CV_CALL( cvInitNArrayIterator( 3, arrs, 0, stubs, &iterator ));
1376
1377         type = iterator.hdr[0]->type;
1378         iterator.size.width *= CV_MAT_CN(type);
1379
1380         func = (CvScaledElWiseFunc)(mul_tab.fn_2d[CV_MAT_DEPTH(type)]);
1381         if( !func )
1382             CV_ERROR( CV_StsUnsupportedFormat, "" );
1383
1384         do
1385         {
1386             IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
1387                              iterator.ptr[1], CV_STUB_STEP,
1388                              iterator.ptr[2], CV_STUB_STEP,
1389                              iterator.size, scale ));
1390         }
1391         while( cvNextNArraySlice( &iterator ));
1392         EXIT;
1393     }
1394
1395     if( !CV_ARE_TYPES_EQ( src1, src2 ) || !CV_ARE_TYPES_EQ( src1, dst ))
1396         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
1397
1398     if( !CV_ARE_SIZES_EQ( src1, src2 ) || !CV_ARE_SIZES_EQ( src1, dst ))
1399         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
1400
1401     type = CV_MAT_TYPE(src1->type);
1402     size = cvGetMatSize( src1 );
1403
1404     depth = CV_MAT_DEPTH(type);
1405     size.width *= CV_MAT_CN( type );
1406
1407     if( CV_IS_MAT_CONT( src1->type & src2->type & dst->type ))
1408     {
1409         size.width *= size.height;
1410
1411         if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE && scale == 1 )
1412         {
1413             if( depth == CV_32F )
1414             {
1415                 const float* src1data = (const float*)(src1->data.ptr);
1416                 const float* src2data = (const float*)(src2->data.ptr);
1417                 float* dstdata = (float*)(dst->data.ptr);
1418             
1419                 do
1420                 {
1421                     dstdata[size.width-1] = (float)
1422                         (src1data[size.width-1] * src2data[size.width-1]);
1423                 }
1424                 while( --size.width );
1425
1426                 EXIT;
1427             }
1428
1429             if( depth == CV_64F )
1430             {
1431                 const double* src1data = (const double*)(src1->data.ptr);
1432                 const double* src2data = (const double*)(src2->data.ptr);
1433                 double* dstdata = (double*)(dst->data.ptr);
1434             
1435                 do
1436                 {
1437                     dstdata[size.width-1] =
1438                         src1data[size.width-1] * src2data[size.width-1];
1439                 }
1440                 while( --size.width );
1441
1442                 EXIT;
1443             }
1444         }
1445
1446         src1_step = src2_step = dst_step = CV_STUB_STEP;
1447         size.height = 1;
1448     }
1449     else
1450     {
1451         src1_step = src1->step;
1452         src2_step = src2->step;
1453         dst_step = dst->step;
1454     }
1455
1456     func = (CvScaledElWiseFunc)(mul_tab.fn_2d[CV_MAT_DEPTH(type)]);
1457
1458     if( !func )
1459         CV_ERROR( CV_StsUnsupportedFormat, "" );
1460
1461     IPPI_CALL( func( src1->data.ptr, src1_step, src2->data.ptr, src2_step,
1462                      dst->data.ptr, dst_step, size, scale ));
1463
1464     __END__;
1465 }
1466
1467
1468 /***************************************** D I V ****************************************/
1469
1470 #define ICV_DEF_DIV_OP_CASE( flavor, arrtype, worktype, checktype, _start_row_macro_,   \
1471     _cast_macro1_, _cast_macro2_, _cvt_macro_, _check_macro_, isrc )                    \
1472                                                                                         \
1473 static CvStatus CV_STDCALL                                                              \
1474 icvDiv_##flavor##_C1R( const arrtype* src1, int step1,                                  \
1475                        const arrtype* src2, int step2,                                  \
1476                        arrtype* dst, int step,                                          \
1477                        CvSize size, double scale )                                      \
1478 {                                                                                       \
1479     step1 /= sizeof(src1[0]); step2 /= sizeof(src2[0]); step /= sizeof(dst[0]);         \
1480                                                                                         \
1481     for( ; size.height--; src1+=step1, src2+=step2, dst+=step )                         \
1482     {                                                                                   \
1483         _start_row_macro_(checktype, src2);                                             \
1484         for( i = 0; i <= size.width - 4; i += 4 )                                       \
1485         {                                                                               \
1486             if( _check_macro_(isrc[i]) && _check_macro_(isrc[i+1]) &&                   \
1487                 _check_macro_(isrc[i+2]) && _check_macro_(isrc[i+3]))                   \
1488             {                                                                           \
1489                 double a = (double)_cvt_macro_(src2[i]) * _cvt_macro_(src2[i+1]);       \
1490                 double b = (double)_cvt_macro_(src2[i+2]) * _cvt_macro_(src2[i+3]);     \
1491                 double d = scale/(a * b);                                               \
1492                                                                                         \
1493                 b *= d;                                                                 \
1494                 a *= d;                                                                 \
1495                                                                                         \
1496                 worktype z0 = _cast_macro1_(src2[i+1] * _cvt_macro_(src1[i]) * b);      \
1497                 worktype z1 = _cast_macro1_(src2[i] * _cvt_macro_(src1[i+1]) * b);      \
1498                 worktype z2 = _cast_macro1_(src2[i+3] * _cvt_macro_(src1[i+2]) * a);    \
1499                 worktype z3 = _cast_macro1_(src2[i+2] * _cvt_macro_(src1[i+3]) * a);    \
1500                                                                                         \
1501                 dst[i] = _cast_macro2_(z0);                                             \
1502                 dst[i+1] = _cast_macro2_(z1);                                           \
1503                 dst[i+2] = _cast_macro2_(z2);                                           \
1504                 dst[i+3] = _cast_macro2_(z3);                                           \
1505             }                                                                           \
1506             else                                                                        \
1507             {                                                                           \
1508                 worktype z0 = _check_macro_(isrc[i]) ?                                  \
1509                    _cast_macro1_(_cvt_macro_(src1[i])*scale/_cvt_macro_(src2[i])) : 0;  \
1510                 worktype z1 = _check_macro_(isrc[i+1]) ?                                \
1511                    _cast_macro1_(_cvt_macro_(src1[i+1])*scale/_cvt_macro_(src2[i+1])):0;\
1512                 worktype z2 = _check_macro_(isrc[i+2]) ?                                \
1513                    _cast_macro1_(_cvt_macro_(src1[i+2])*scale/_cvt_macro_(src2[i+2])):0;\
1514                 worktype z3 = _check_macro_(isrc[i+3]) ?                                \
1515                    _cast_macro1_(_cvt_macro_(src1[i+3])*scale/_cvt_macro_(src2[i+3])):0;\
1516                                                                                         \
1517                 dst[i] = _cast_macro2_(z0);                                             \
1518                 dst[i+1] = _cast_macro2_(z1);                                           \
1519                 dst[i+2] = _cast_macro2_(z2);                                           \
1520                 dst[i+3] = _cast_macro2_(z3);                                           \
1521             }                                                                           \
1522         }                                                                               \
1523                                                                                         \
1524         for( ; i < size.width; i++ )                                                    \
1525         {                                                                               \
1526             worktype z0 = _check_macro_(isrc[i]) ?                                      \
1527                 _cast_macro1_(_cvt_macro_(src1[i])*scale/_cvt_macro_(src2[i])) : 0;     \
1528             dst[i] = _cast_macro2_(z0);                                                 \
1529         }                                                                               \
1530     }                                                                                   \
1531                                                                                         \
1532     return CV_OK;                                                                       \
1533 }
1534
1535
1536 #define ICV_DEF_RECIP_OP_CASE( flavor, arrtype, worktype, checktype,            \
1537     _start_row_macro_, _cast_macro1_, _cast_macro2_,                            \
1538     _cvt_macro_, _check_macro_, isrc )                                          \
1539                                                                                 \
1540 static CvStatus CV_STDCALL                                                      \
1541 icvRecip_##flavor##_C1R( const arrtype* src, int step1,                         \
1542                          arrtype* dst, int step,                                \
1543                          CvSize size, double scale )                            \
1544 {                                                                               \
1545     step1 /= sizeof(src[0]); step /= sizeof(dst[0]);                            \
1546                                                                                 \
1547     for( ; size.height--; src+=step1, dst+=step )                               \
1548     {                                                                           \
1549         _start_row_macro_(checktype, src);                                      \
1550         for( i = 0; i <= size.width - 4; i += 4 )                               \
1551         {                                                                       \
1552             if( _check_macro_(isrc[i]) && _check_macro_(isrc[i+1]) &&           \
1553                 _check_macro_(isrc[i+2]) && _check_macro_(isrc[i+3]))           \
1554             {                                                                   \
1555                 double a = (double)_cvt_macro_(src[i]) * _cvt_macro_(src[i+1]); \
1556                 double b = (double)_cvt_macro_(src[i+2]) * _cvt_macro_(src[i+3]);\
1557                 double d = scale/(a * b);                                       \
1558                                                                                 \
1559                 b *= d;                                                         \
1560                 a *= d;                                                         \
1561                                                                                 \
1562                 worktype z0 = _cast_macro1_(src[i+1] * b);                      \
1563                 worktype z1 = _cast_macro1_(src[i] * b);                        \
1564                 worktype z2 = _cast_macro1_(src[i+3] * a);                      \
1565                 worktype z3 = _cast_macro1_(src[i+2] * a);                      \
1566                                                                                 \
1567                 dst[i] = _cast_macro2_(z0);                                     \
1568                 dst[i+1] = _cast_macro2_(z1);                                   \
1569                 dst[i+2] = _cast_macro2_(z2);                                   \
1570                 dst[i+3] = _cast_macro2_(z3);                                   \
1571             }                                                                   \
1572             else                                                                \
1573             {                                                                   \
1574                 worktype z0 = _check_macro_(isrc[i]) ?                          \
1575                    _cast_macro1_(scale/_cvt_macro_(src[i])) : 0;                \
1576                 worktype z1 = _check_macro_(isrc[i+1]) ?                        \
1577                    _cast_macro1_(scale/_cvt_macro_(src[i+1])):0;                \
1578                 worktype z2 = _check_macro_(isrc[i+2]) ?                        \
1579                    _cast_macro1_(scale/_cvt_macro_(src[i+2])):0;                \
1580                 worktype z3 = _check_macro_(isrc[i+3]) ?                        \
1581                    _cast_macro1_(scale/_cvt_macro_(src[i+3])):0;                \
1582                                                                                 \
1583                 dst[i] = _cast_macro2_(z0);                                     \
1584                 dst[i+1] = _cast_macro2_(z1);                                   \
1585                 dst[i+2] = _cast_macro2_(z2);                                   \
1586                 dst[i+3] = _cast_macro2_(z3);                                   \
1587             }                                                                   \
1588         }                                                                       \
1589                                                                                 \
1590         for( ; i < size.width; i++ )                                            \
1591         {                                                                       \
1592             worktype z0 = _check_macro_(isrc[i]) ?                              \
1593                 _cast_macro1_(scale/_cvt_macro_(src[i])) : 0;                   \
1594             dst[i] = _cast_macro2_(z0);                                         \
1595         }                                                                       \
1596     }                                                                           \
1597                                                                                 \
1598     return CV_OK;                                                               \
1599 }
1600
1601
1602 #define div_start_row_int(checktype, divisor) \
1603     int i
1604
1605 #define div_start_row_flt(checktype, divisor) \
1606     const checktype* isrc = (const checktype*)divisor; int i
1607
1608 #define div_check_zero_flt(x)  (((x) & 0x7fffffff) != 0)
1609 #define div_check_zero_dbl(x)  (((x) & CV_BIG_INT(0x7fffffffffffffff)) != 0)
1610
1611 #if defined WIN64 && defined EM64T && defined _MSC_VER && !defined CV_ICC
1612 #pragma optimize("",off)
1613 #endif
1614
1615 ICV_DEF_DIV_OP_CASE( 8u, uchar, int, uchar, div_start_row_int,
1616                      cvRound, CV_CAST_8U, CV_8TO32F, CV_NONZERO, src2 )
1617
1618 #if defined WIN64 && defined EM64T && defined _MSC_VER && !defined CV_ICC
1619 #pragma optimize("",on)
1620 #endif
1621
1622
1623 ICV_DEF_DIV_OP_CASE( 16u, ushort, int, ushort, div_start_row_int,
1624                      cvRound, CV_CAST_16U, CV_CAST_64F, CV_NONZERO, src2 )
1625 ICV_DEF_DIV_OP_CASE( 16s, short, int, short, div_start_row_int,
1626                      cvRound, CV_CAST_16S, CV_NOP, CV_NONZERO, src2 )
1627 ICV_DEF_DIV_OP_CASE( 32s, int, int, int, div_start_row_int,
1628                      cvRound, CV_CAST_32S, CV_CAST_64F, CV_NONZERO, src2 )
1629 ICV_DEF_DIV_OP_CASE( 32f, float, double, int, div_start_row_flt,
1630                      CV_NOP, CV_CAST_32F, CV_NOP, div_check_zero_flt, isrc )
1631 ICV_DEF_DIV_OP_CASE( 64f, double, double, int64, div_start_row_flt,
1632                      CV_NOP, CV_CAST_64F, CV_NOP, div_check_zero_dbl, isrc )
1633
1634 ICV_DEF_RECIP_OP_CASE( 8u, uchar, int, uchar, div_start_row_int,
1635                        cvRound, CV_CAST_8U, CV_8TO32F, CV_NONZERO, src )
1636 ICV_DEF_RECIP_OP_CASE( 16u, ushort, int, ushort, div_start_row_int,
1637                        cvRound, CV_CAST_16U, CV_CAST_64F, CV_NONZERO, src )
1638 ICV_DEF_RECIP_OP_CASE( 16s, short, int, short, div_start_row_int,
1639                        cvRound, CV_CAST_16S, CV_NOP, CV_NONZERO, src )
1640 ICV_DEF_RECIP_OP_CASE( 32s, int, int, int, div_start_row_int,
1641                        cvRound, CV_CAST_32S, CV_CAST_64F, CV_NONZERO, src )
1642 ICV_DEF_RECIP_OP_CASE( 32f, float, double, int, div_start_row_flt,
1643                        CV_NOP, CV_CAST_32F, CV_NOP, div_check_zero_flt, isrc  )
1644 ICV_DEF_RECIP_OP_CASE( 64f, double, double, int64, div_start_row_flt,
1645                        CV_NOP, CV_CAST_64F, CV_NOP, div_check_zero_dbl, isrc )
1646
1647 ICV_DEF_INIT_ARITHM_FUNC_TAB( Div, C1R )
1648 ICV_DEF_INIT_ARITHM_FUNC_TAB( Recip, C1R )
1649
1650 typedef CvStatus (CV_STDCALL * CvRecipFunc)( const void* src, int step1,
1651                                              void* dst, int step,
1652                                              CvSize size, double scale );
1653
1654 CV_IMPL void
1655 cvDiv( const void* srcarr1, const void* srcarr2, void* dstarr, double scale )
1656 {
1657     static CvFuncTable div_tab;
1658     static CvFuncTable recip_tab;
1659     static int inittab = 0;
1660
1661     CV_FUNCNAME( "cvDiv" );
1662
1663     __BEGIN__;
1664
1665     int type, coi = 0;
1666     int is_nd = 0;
1667     int src1_step, src2_step, dst_step;
1668     int src1_cont_flag = CV_MAT_CONT_FLAG;
1669     CvMat srcstub1, *src1 = (CvMat*)srcarr1;
1670     CvMat srcstub2, *src2 = (CvMat*)srcarr2;
1671     CvMat dststub,  *dst = (CvMat*)dstarr;
1672     CvSize size;
1673
1674     if( !inittab )
1675     {
1676         icvInitDivC1RTable( &div_tab );
1677         icvInitRecipC1RTable( &recip_tab );
1678         inittab = 1;
1679     }
1680
1681     if( !CV_IS_MAT(src2) )
1682     {
1683         if( CV_IS_MATND(src2))
1684             is_nd = 1;
1685         else
1686         {
1687             CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi ));
1688             if( coi != 0 )
1689                 CV_ERROR( CV_BadCOI, "" );
1690         }
1691     }
1692
1693     if( src1 )
1694     {
1695         if( CV_IS_MATND(src1))
1696             is_nd = 1;
1697         else
1698         {
1699             if( !CV_IS_MAT(src1) )
1700             {
1701                 CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi ));
1702                 if( coi != 0 )
1703                     CV_ERROR( CV_BadCOI, "" );
1704             }
1705
1706             if( !CV_ARE_TYPES_EQ( src1, src2 ))
1707                 CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
1708
1709             if( !CV_ARE_SIZES_EQ( src1, src2 ))
1710                 CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
1711             src1_cont_flag = src1->type;
1712         }
1713     }
1714
1715     if( !CV_IS_MAT(dst) )
1716     {
1717         if( CV_IS_MATND(dst))
1718             is_nd = 1;
1719         else
1720         {
1721             CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
1722             if( coi != 0 )
1723                 CV_ERROR( CV_BadCOI, "" );
1724         }
1725     }
1726
1727     if( is_nd )
1728     {
1729         CvArr* arrs[] = { dst, src2, src1 };
1730         CvMatND stubs[3];
1731         CvNArrayIterator iterator;
1732
1733         CV_CALL( cvInitNArrayIterator( 2 + (src1 != 0), arrs, 0, stubs, &iterator ));
1734
1735         type = iterator.hdr[0]->type;
1736         iterator.size.width *= CV_MAT_CN(type);
1737
1738         if( src1 )
1739         {
1740             CvScaledElWiseFunc func =
1741                 (CvScaledElWiseFunc)(div_tab.fn_2d[CV_MAT_DEPTH(type)]);
1742             if( !func )
1743                 CV_ERROR( CV_StsUnsupportedFormat, "" );
1744
1745             do
1746             {
1747                 IPPI_CALL( func( iterator.ptr[2], CV_STUB_STEP,
1748                                  iterator.ptr[1], CV_STUB_STEP,
1749                                  iterator.ptr[0], CV_STUB_STEP,
1750                                  iterator.size, scale ));
1751             }
1752             while( cvNextNArraySlice( &iterator ));
1753         }
1754         else
1755         {
1756             CvRecipFunc func = (CvRecipFunc)(recip_tab.fn_2d[CV_MAT_DEPTH(type)]);
1757
1758             if( !func )
1759                 CV_ERROR( CV_StsUnsupportedFormat, "" );
1760
1761             do
1762             {
1763                 IPPI_CALL( func( iterator.ptr[1], CV_STUB_STEP,
1764                                  iterator.ptr[0], CV_STUB_STEP,
1765                                  iterator.size, scale ));
1766             }
1767             while( cvNextNArraySlice( &iterator ));
1768         }
1769         EXIT;
1770     }
1771
1772     if( !CV_ARE_TYPES_EQ( src2, dst ))
1773         CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats );
1774
1775     if( !CV_ARE_SIZES_EQ( src2, dst ))
1776         CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes );
1777
1778     type = CV_MAT_TYPE(src2->type);
1779     size = cvGetMatSize( src2 );
1780     size.width *= CV_MAT_CN( type );
1781
1782     if( CV_IS_MAT_CONT( src1_cont_flag & src2->type & dst->type ))
1783     {
1784         size.width *= size.height;
1785         src1_step = src2_step = dst_step = CV_STUB_STEP;
1786         size.height = 1;
1787     }
1788     else
1789     {
1790         src1_step = src1 ? src1->step : 0;
1791         src2_step = src2->step;
1792         dst_step = dst->step;
1793     }
1794
1795     if( src1 )
1796     {
1797         CvScaledElWiseFunc func = (CvScaledElWiseFunc)(div_tab.fn_2d[CV_MAT_DEPTH(type)]);
1798
1799         if( !func )
1800             CV_ERROR( CV_StsUnsupportedFormat, "" );
1801
1802         IPPI_CALL( func( src1->data.ptr, src1_step, src2->data.ptr, src2_step,
1803                          dst->data.ptr, dst_step, size, scale ));
1804     }
1805     else
1806     {
1807         CvRecipFunc func = (CvRecipFunc)(recip_tab.fn_2d[CV_MAT_DEPTH(type)]);
1808
1809         if( !func )
1810             CV_ERROR( CV_StsUnsupportedFormat, "" );
1811
1812         IPPI_CALL( func( src2->data.ptr, src2_step,
1813                          dst->data.ptr, dst_step, size, scale ));
1814     }
1815
1816     __END__;
1817 }
1818
1819 /******************************* A D D   W E I G T E D ******************************/
1820
1821 #define ICV_DEF_ADD_WEIGHTED_OP(flavor, arrtype, worktype, load_macro,          \
1822                                      cast_macro1, cast_macro2)                  \
1823 static CvStatus CV_STDCALL                                                      \
1824 icvAddWeighted_##flavor##_C1R( const arrtype* src1, int step1, double alpha,    \
1825                                const arrtype* src2, int step2, double beta,     \
1826                                double gamma, arrtype* dst, int step, CvSize size )\
1827 {                                                                               \
1828     step1 /= sizeof(src1[0]); step2 /= sizeof(src2[0]); step /= sizeof(dst[0]); \
1829                                                                                 \
1830     for( ; size.height--; src1 += step1, src2 += step2, dst += step )           \
1831     {                                                                           \
1832         int i;                                                                  \
1833                                                                                 \
1834         for( i = 0; i <= size.width - 4; i += 4 )                               \
1835         {                                                                       \
1836             worktype t0 = cast_macro1(load_macro((src1)[i])*alpha +             \
1837                                       load_macro((src2)[i])*beta + gamma);      \
1838             worktype t1 = cast_macro1(load_macro((src1)[i+1])*alpha +           \
1839                                       load_macro((src2)[i+1])*beta + gamma);    \
1840                                                                                 \
1841             (dst)[i] = cast_macro2( t0 );                                       \
1842             (dst)[i+1] = cast_macro2( t1 );                                     \
1843                                                                                 \
1844             t0 = cast_macro1(load_macro((src1)[i+2])*alpha +                    \
1845                              load_macro((src2)[i+2])*beta + gamma);             \
1846             t1 = cast_macro1(load_macro((src1)[i+3])*alpha +                    \
1847                              load_macro((src2)[i+3])*beta + gamma);             \
1848                                                                                 \
1849             (dst)[i+2] = cast_macro2( t0 );                                     \
1850             (dst)[i+3] = cast_macro2( t1 );                                     \
1851         }                                                                       \
1852                                                                                 \
1853         for( ; i < size.width; i++ )                                            \
1854         {                                                                       \
1855             worktype t0 = cast_macro1(load_macro((src1)[i])*alpha +             \
1856                                       load_macro((src2)[i])*beta + gamma);      \
1857             (dst)[i] = cast_macro2( t0 );                                       \
1858         }                                                                       \
1859     }                                                                           \
1860                                                                                 \
1861     return CV_OK;                                                               \
1862 }
1863
1864
1865 #undef shift
1866 #define shift 14
1867
1868 static  CvStatus CV_STDCALL
1869 icvAddWeighted_8u_fast_C1R( const uchar* src1, int step1, double alpha,
1870                             const uchar* src2, int step2, double beta,
1871                             double gamma, uchar* dst, int step, CvSize size )
1872 {
1873     int tab1[256], tab2[256];
1874     double t = 0;
1875     int j, t0, t1, t2, t3;
1876
1877     alpha *= 1 << shift;
1878     gamma = gamma*(1 << shift) + (1 << (shift - 1));
1879     beta *= 1 << shift;
1880
1881     for( j = 0; j < 256; j++ )
1882     {
1883         tab1[j] = cvRound(t);
1884         tab2[j] = cvRound(gamma);
1885         t += alpha;
1886         gamma += beta;
1887     }
1888
1889     t0 = (tab1[0] + tab2[0]) >> shift;
1890     t1 = (tab1[0] + tab2[255]) >> shift;
1891     t2 = (tab1[255] + tab2[0]) >> shift;
1892     t3 = (tab1[255] + tab2[255]) >> shift;
1893
1894     if( (unsigned)(t0+256) < 768 && (unsigned)(t1+256) < 768 &&
1895         (unsigned)(t2+256) < 768 && (unsigned)(t3+256) < 768 )
1896     {
1897         // use faster table-based convertion back to 8u
1898         for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1899         {
1900             int i;
1901
1902             for( i = 0; i <= size.width - 4; i += 4 )
1903             {
1904                 t0 = CV_FAST_CAST_8U((tab1[src1[i]] + tab2[src2[i]]) >> shift);
1905                 t1 = CV_FAST_CAST_8U((tab1[src1[i+1]] + tab2[src2[i+1]]) >> shift);
1906
1907                 dst[i] = (uchar)t0;
1908                 dst[i+1] = (uchar)t1;
1909
1910                 t0 = CV_FAST_CAST_8U((tab1[src1[i+2]] + tab2[src2[i+2]]) >> shift);
1911                 t1 = CV_FAST_CAST_8U((tab1[src1[i+3]] + tab2[src2[i+3]]) >> shift);
1912
1913                 dst[i+2] = (uchar)t0;
1914                 dst[i+3] = (uchar)t1;
1915             }
1916
1917             for( ; i < size.width; i++ )
1918             {
1919                 t0 = CV_FAST_CAST_8U((tab1[src1[i]] + tab2[src2[i]]) >> shift);
1920                 dst[i] = (uchar)t0;
1921             }
1922         }
1923     }
1924     else
1925     {
1926         // use universal macro for convertion back to 8u
1927         for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1928         {
1929             int i;
1930             
1931             for( i = 0; i <= size.width - 4; i += 4 )
1932             {
1933                 t0 = (tab1[src1[i]] + tab2[src2[i]]) >> shift;
1934                 t1 = (tab1[src1[i+1]] + tab2[src2[i+1]]) >> shift;
1935
1936                 dst[i] = CV_CAST_8U( t0 );
1937                 dst[i+1] = CV_CAST_8U( t1 );
1938
1939                 t0 = (tab1[src1[i+2]] + tab2[src2[i+2]]) >> shift;
1940                 t1 = (tab1[src1[i+3]] + tab2[src2[i+3]]) >> shift;
1941
1942                 dst[i+2] = CV_CAST_8U( t0 );
1943                 dst[i+3] = CV_CAST_8U( t1 );
1944             }
1945
1946             for( ; i < size.width; i++ )
1947             {
1948                 t0 = (tab1[src1[i]] + tab2[src2[i]]) >> shift;
1949                 dst[i] = CV_CAST_8U( t0 );
1950             }
1951         }
1952     }
1953
1954     return CV_OK;
1955 }
1956
1957
1958 ICV_DEF_ADD_WEIGHTED_OP( 8u, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U )
1959 ICV_DEF_ADD_WEIGHTED_OP( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U )
1960 ICV_DEF_ADD_WEIGHTED_OP( 16s, short, int, CV_NOP, cvRound, CV_CAST_16S )
1961 ICV_DEF_ADD_WEIGHTED_OP( 32s, int, int, CV_NOP, cvRound, CV_CAST_32S )
1962 ICV_DEF_ADD_WEIGHTED_OP( 32f, float, double, CV_NOP, CV_NOP, CV_CAST_32F )
1963 ICV_DEF_ADD_WEIGHTED_OP( 64f, double, double, CV_NOP, CV_NOP, CV_CAST_64F )
1964
1965
1966 ICV_DEF_INIT_ARITHM_FUNC_TAB( AddWeighted, C1R )
1967
1968 typedef CvStatus (CV_STDCALL *CvAddWeightedFunc)( const void* src1, int step1, double alpha,
1969                                                   const void* src2, int step2, double beta,
1970                                                   double gamma, void* dst,
1971                                                   int step, CvSize size );
1972
1973 CV_IMPL void
1974 cvAddWeighted( const CvArr* srcAarr, double alpha,
1975                const CvArr* srcBarr, double beta,
1976                double gamma, CvArr* dstarr )
1977 {
1978     static CvFuncTable addw_tab;
1979     static int inittab = 0;
1980     
1981     CV_FUNCNAME( "cvAddWeighted" );
1982
1983     __BEGIN__;
1984
1985     CvMat  srcA_stub, *srcA = (CvMat*)srcAarr;
1986     CvMat  srcB_stub, *srcB = (CvMat*)srcBarr;
1987     CvMat  dst_stub, *dst = (CvMat*)dstarr;
1988     int  coi1, coi2, coi;
1989     int  srcA_step, srcB_step, dst_step;
1990     int  type;
1991     CvAddWeightedFunc func;
1992     CvSize size;
1993
1994     if( !inittab )
1995     {
1996         icvInitAddWeightedC1RTable( &addw_tab );
1997         inittab = 1;
1998     }
1999
2000     CV_CALL( srcA = cvGetMat( srcA, &srcA_stub, &coi1 ));
2001     CV_CALL( srcB = cvGetMat( srcB, &srcB_stub, &coi2 ));
2002     CV_CALL( dst = cvGetMat( dst, &dst_stub, &coi ));
2003
2004     if( coi1 || coi2 || coi )
2005         CV_ERROR( CV_BadCOI, "COI must not be set" );
2006
2007     if( !CV_ARE_TYPES_EQ( srcA, srcB ) ||
2008         !CV_ARE_TYPES_EQ( srcA, dst ))
2009         CV_ERROR( CV_StsUnmatchedFormats,
2010         "All input/output arrays should have the same type");
2011
2012     if( !CV_ARE_SIZES_EQ( srcA, srcB ) ||
2013         !CV_ARE_SIZES_EQ( srcA, dst ))
2014         CV_ERROR( CV_StsUnmatchedSizes,
2015         "All input/output arrays should have the same sizes");
2016
2017     size = cvGetMatSize( srcA );
2018     type = CV_MAT_TYPE( srcA->type );
2019     size.width *= CV_MAT_CN( type );
2020     srcA_step = srcA->step;
2021     srcB_step = srcB->step;
2022     dst_step = dst->step;
2023
2024     if( CV_IS_MAT_CONT( type & srcB->type & dst->type ))
2025     {
2026         size.width *= size.height;
2027         size.height = 1;
2028         srcA_step = srcB_step = dst_step = CV_AUTOSTEP;
2029     }
2030
2031     if( type == CV_8UC1 && size.width * size.height >= 1024 &&
2032         fabs(alpha) < 256 && fabs(beta) < 256 && fabs(gamma) < 256*256 )
2033     {
2034         func = (CvAddWeightedFunc)icvAddWeighted_8u_fast_C1R;
2035     }
2036     else
2037     {
2038         func = (CvAddWeightedFunc)addw_tab.fn_2d[CV_MAT_DEPTH(type)];
2039         if( !func )
2040             CV_ERROR( CV_StsUnsupportedFormat, "This array type is not supported" );
2041     }
2042
2043     IPPI_CALL( func( srcA->data.ptr, srcA_step, alpha, srcB->data.ptr, srcB_step,
2044                      beta, gamma, dst->data.ptr, dst_step, size ));
2045
2046     __END__;
2047 }
2048
2049
2050 /* End of file. */