Move the sources to trunk
[opencv] / cxcore / src / cxsumpixels.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                        Intel License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of Intel Corporation may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "_cxcore.h"
43
44 /****************************************************************************************\
45 *                             Find sum of pixels in the ROI                              *
46 \****************************************************************************************/
47
48 #define ICV_SUM_COI_CASE( __op__, len, cn )                 \
49     for( ; x <= (len) - 4*(cn); x += 4*(cn) )               \
50         s0 += __op__(src[x]) + __op__(src[x+(cn)]) +        \
51               __op__(src[x+(cn)*2]) + __op__(src[x+(cn)*3]);\
52                                                             \
53     for( ; x < (len); x += (cn) )                           \
54         s0 += __op__(src[x]);
55
56
57 #define ICV_SUM_CASE_C1( __op__, len )                      \
58     ICV_SUM_COI_CASE( __op__, len, 1 )
59
60
61 #define ICV_SUM_CASE_C2( __op__, len )                      \
62     for( ; x <= (len) - 8; x += 8 )                         \
63     {                                                       \
64         s0 += __op__(src[x]) + __op__(src[x+2]) +           \
65               __op__(src[x+4]) + __op__(src[x+6]);          \
66         s1 += __op__(src[x+1]) + __op__(src[x+3]) +         \
67               __op__(src[x+5]) + __op__(src[x+7]);          \
68     }                                                       \
69                                                             \
70     for( ; x < (len); x += 2 )                              \
71     {                                                       \
72         s0 += __op__(src[x]);                               \
73         s1 += __op__(src[x+1]);                             \
74     }
75
76
77
78 #define ICV_SUM_CASE_C3( __op__, len )                      \
79     for( ; x <= (len) - 12; x += 12 )                       \
80     {                                                       \
81         s0 += __op__(src[x]) + __op__(src[x+3]) +           \
82               __op__(src[x+6]) + __op__(src[x+9]);          \
83         s1 += __op__(src[x+1]) + __op__(src[x+4]) +         \
84               __op__(src[x+7]) + __op__(src[x+10]);         \
85         s2 += __op__(src[x+2]) + __op__(src[x+5]) +         \
86               __op__(src[x+8]) + __op__(src[x+11]);         \
87     }                                                       \
88                                                             \
89     for( ; x < (len); x += 3 )                              \
90     {                                                       \
91         s0 += __op__(src[x]);                               \
92         s1 += __op__(src[x+1]);                             \
93         s2 += __op__(src[x+2]);                             \
94     }
95
96
97 #define ICV_SUM_CASE_C4( __op__, len )                      \
98     for( ; x <= (len) - 16; x += 16 )                       \
99     {                                                       \
100         s0 += __op__(src[x]) + __op__(src[x+4]) +           \
101               __op__(src[x+8]) + __op__(src[x+12]);         \
102         s1 += __op__(src[x+1]) + __op__(src[x+5]) +         \
103               __op__(src[x+9]) + __op__(src[x+13]);         \
104         s2 += __op__(src[x+2]) + __op__(src[x+6]) +         \
105               __op__(src[x+10]) + __op__(src[x+14]);        \
106         s3 += __op__(src[x+3]) + __op__(src[x+7]) +         \
107               __op__(src[x+11]) + __op__(src[x+15]);        \
108     }                                                       \
109                                                             \
110     for( ; x < (len); x += 4 )                              \
111     {                                                       \
112         s0 += __op__(src[x]);                               \
113         s1 += __op__(src[x+1]);                             \
114         s2 += __op__(src[x+2]);                             \
115         s3 += __op__(src[x+3]);                             \
116     }
117
118
119 ////////////////////////////////////// entry macros //////////////////////////////////////
120
121 #define ICV_SUM_ENTRY_COMMON()          \
122     step /= sizeof(src[0])
123
124 #define ICV_SUM_ENTRY_C1( sumtype )     \
125     sumtype s0 = 0;                     \
126     ICV_SUM_ENTRY_COMMON()
127
128 #define ICV_SUM_ENTRY_C2( sumtype )     \
129     sumtype s0 = 0, s1 = 0;             \
130     ICV_SUM_ENTRY_COMMON()
131
132 #define ICV_SUM_ENTRY_C3( sumtype )     \
133     sumtype s0 = 0, s1 = 0, s2 = 0;     \
134     ICV_SUM_ENTRY_COMMON()
135
136 #define ICV_SUM_ENTRY_C4( sumtype )         \
137     sumtype s0 = 0, s1 = 0, s2 = 0, s3 = 0; \
138     ICV_SUM_ENTRY_COMMON()
139
140
141 #define ICV_SUM_ENTRY_BLOCK_COMMON( block_size )    \
142     int remaining = block_size;                     \
143     ICV_SUM_ENTRY_COMMON()
144
145 #define ICV_SUM_ENTRY_BLOCK_C1( sumtype, worktype, block_size ) \
146     sumtype sum0 = 0;                                           \
147     worktype s0 = 0;                                            \
148     ICV_SUM_ENTRY_BLOCK_COMMON( block_size )
149
150 #define ICV_SUM_ENTRY_BLOCK_C2( sumtype, worktype, block_size ) \
151     sumtype sum0 = 0, sum1 = 0;                                 \
152     worktype s0 = 0, s1 = 0;                                    \
153     ICV_SUM_ENTRY_BLOCK_COMMON( block_size )
154
155 #define ICV_SUM_ENTRY_BLOCK_C3( sumtype, worktype, block_size ) \
156     sumtype sum0 = 0, sum1 = 0, sum2 = 0;                       \
157     worktype s0 = 0, s1 = 0, s2 = 0;                            \
158     ICV_SUM_ENTRY_BLOCK_COMMON( block_size )
159
160 #define ICV_SUM_ENTRY_BLOCK_C4( sumtype, worktype, block_size ) \
161     sumtype sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;             \
162     worktype s0 = 0, s1 = 0, s2 = 0, s3 = 0;                    \
163     ICV_SUM_ENTRY_BLOCK_COMMON( block_size )
164
165
166 /////////////////////////////////////// exit macros //////////////////////////////////////
167
168 #define ICV_SUM_EXIT_C1( tmp, sumtype )     \
169     sum[0] = (sumtype)tmp##0
170
171 #define ICV_SUM_EXIT_C2( tmp, sumtype )     \
172     sum[0] = (sumtype)tmp##0;               \
173     sum[1] = (sumtype)tmp##1;
174
175 #define ICV_SUM_EXIT_C3( tmp, sumtype )     \
176     sum[0] = (sumtype)tmp##0;               \
177     sum[1] = (sumtype)tmp##1;               \
178     sum[2] = (sumtype)tmp##2;
179
180 #define ICV_SUM_EXIT_C4( tmp, sumtype )     \
181     sum[0] = (sumtype)tmp##0;               \
182     sum[1] = (sumtype)tmp##1;               \
183     sum[2] = (sumtype)tmp##2;               \
184     sum[3] = (sumtype)tmp##3;
185
186 #define ICV_SUM_EXIT_BLOCK_C1( sumtype )    \
187     sum0 += s0;                             \
188     ICV_SUM_EXIT_C1( sum, sumtype )
189
190 #define ICV_SUM_EXIT_BLOCK_C2( sumtype )    \
191     sum0 += s0; sum1 += s1;                 \
192     ICV_SUM_EXIT_C2( sum, sumtype )
193
194 #define ICV_SUM_EXIT_BLOCK_C3( sumtype )    \
195     sum0 += s0; sum1 += s1;                 \
196     sum2 += s2;                             \
197     ICV_SUM_EXIT_C3( sum, sumtype )
198
199 #define ICV_SUM_EXIT_BLOCK_C4( sumtype )    \
200     sum0 += s0; sum1 += s1;                 \
201     sum2 += s2; sum3 += s3;                 \
202     ICV_SUM_EXIT_C4( sum, sumtype )
203
204 ////////////////////////////////////// update macros /////////////////////////////////////
205
206 #define ICV_SUM_UPDATE_COMMON( block_size ) \
207     remaining = block_size
208
209 #define ICV_SUM_UPDATE_C1( block_size )     \
210     ICV_SUM_UPDATE_COMMON( block_size );    \
211     sum0 += s0;                             \
212     s0 = 0
213
214 #define ICV_SUM_UPDATE_C2( block_size )     \
215     ICV_SUM_UPDATE_COMMON( block_size );    \
216     sum0 += s0; sum1 += s1;                 \
217     s0 = s1 = 0
218
219 #define ICV_SUM_UPDATE_C3( block_size )     \
220     ICV_SUM_UPDATE_COMMON( block_size );    \
221     sum0 += s0; sum1 += s1; sum2 += s2;     \
222     s0 = s1 = s2 = 0
223
224 #define ICV_SUM_UPDATE_C4( block_size )     \
225     ICV_SUM_UPDATE_COMMON( block_size );    \
226     sum0 += s0; sum1 += s1;                 \
227     sum2 += s2; sum3 += s3;                 \
228     s0 = s1 = s2 = s3 = 0
229
230
231 #define ICV_DEF_SUM_NOHINT_BLOCK_FUNC_2D( name, flavor, cn,     \
232     __op__, arrtype, sumtype_final, sumtype, worktype, block_size )\
233 IPCVAPI_IMPL(CvStatus, icv##name##_##flavor##_C##cn##R,(        \
234     const arrtype* src, int step, CvSize size,                  \
235     sumtype_final* sum ), (src, step, size, sum) )              \
236 {                                                               \
237     ICV_SUM_ENTRY_BLOCK_C##cn(sumtype,worktype,(block_size)*(cn)); \
238     size.width *= cn;                                           \
239                                                                 \
240     for( ; size.height--; src += step )                         \
241     {                                                           \
242         int x = 0;                                              \
243         while( x < size.width )                                 \
244         {                                                       \
245             int limit = MIN( remaining, size.width - x );       \
246             remaining -= limit;                                 \
247             limit += x;                                         \
248             ICV_SUM_CASE_C##cn( __op__, limit );                \
249             if( remaining == 0 )                                \
250             {                                                   \
251                 ICV_SUM_UPDATE_C##cn( (block_size)*(cn) );      \
252             }                                                   \
253         }                                                       \
254     }                                                           \
255                                                                 \
256     ICV_SUM_EXIT_BLOCK_C##cn( sumtype_final );                  \
257     return CV_OK;                                               \
258 }
259
260
261 #define ICV_DEF_SUM_NOHINT_FUNC_2D( name, flavor, cn,           \
262     __op__, arrtype, sumtype_final, sumtype, worktype, block_size )\
263 IPCVAPI_IMPL(CvStatus, icv##name##_##flavor##_C##cn##R,(        \
264     const arrtype* src, int step, CvSize size,                  \
265     sumtype_final* sum ), (src, step, size, sum) )              \
266 {                                                               \
267     ICV_SUM_ENTRY_C##cn( sumtype );                             \
268     size.width *= cn;                                           \
269                                                                 \
270     for( ; size.height--; src += step )                         \
271     {                                                           \
272         int x = 0;                                              \
273         ICV_SUM_CASE_C##cn( __op__, size.width );               \
274     }                                                           \
275                                                                 \
276     ICV_SUM_EXIT_C##cn( s, sumtype_final );                     \
277     return CV_OK;                                               \
278 }
279
280
281 #define ICV_DEF_SUM_HINT_FUNC_2D( name, flavor, cn,             \
282     __op__, arrtype, sumtype_final, sumtype, worktype, block_size )\
283 IPCVAPI_IMPL(CvStatus, icv##name##_##flavor##_C##cn##R,(        \
284     const arrtype* src, int step, CvSize size,                  \
285     sumtype_final* sum, CvHintAlgorithm /*hint*/ ),             \
286     (src, step, size, sum, cvAlgHintAccurate) )                 \
287 {                                                               \
288     ICV_SUM_ENTRY_C##cn( sumtype );                             \
289     size.width *= cn;                                           \
290                                                                 \
291     for( ; size.height--; src += step )                         \
292     {                                                           \
293         int x = 0;                                              \
294         ICV_SUM_CASE_C##cn( __op__, size.width );               \
295     }                                                           \
296                                                                 \
297     ICV_SUM_EXIT_C##cn( s, sumtype_final );                     \
298     return CV_OK;                                               \
299 }
300
301
302 #define ICV_DEF_SUM_NOHINT_BLOCK_FUNC_2D_COI( name, flavor,     \
303     __op__, arrtype, sumtype_final, sumtype, worktype, block_size )\
304 static CvStatus CV_STDCALL icv##name##_##flavor##_CnCR(         \
305     const arrtype* src, int step, CvSize size, int cn,          \
306     int coi, sumtype_final* sum )                               \
307 {                                                               \
308     ICV_SUM_ENTRY_BLOCK_C1(sumtype,worktype,(block_size)*(cn)); \
309     size.width *= cn;                                           \
310     src += coi - 1;                                             \
311                                                                 \
312     for( ; size.height--; src += step )                         \
313     {                                                           \
314         int x = 0;                                              \
315         while( x < size.width )                                 \
316         {                                                       \
317             int limit = MIN( remaining, size.width - x );       \
318             remaining -= limit;                                 \
319             limit += x;                                         \
320             ICV_SUM_COI_CASE( __op__, limit, cn );              \
321             if( remaining == 0 )                                \
322             {                                                   \
323                 ICV_SUM_UPDATE_C1( (block_size)*(cn) );         \
324             }                                                   \
325         }                                                       \
326     }                                                           \
327                                                                 \
328     ICV_SUM_EXIT_BLOCK_C1( sumtype_final );                     \
329     return CV_OK;                                               \
330 }
331
332
333 #define ICV_DEF_SUM_NOHINT_FUNC_2D_COI( name, flavor,           \
334     __op__, arrtype, sumtype_final, sumtype, worktype, block_size )\
335 static CvStatus CV_STDCALL icv##name##_##flavor##_CnCR(         \
336     const arrtype* src, int step, CvSize size, int cn,          \
337     int coi, sumtype_final* sum )                               \
338 {                                                               \
339     ICV_SUM_ENTRY_C1( sumtype );                                \
340     size.width *= cn;                                           \
341     src += coi - 1;                                             \
342                                                                 \
343     for( ; size.height--; src += step )                         \
344     {                                                           \
345         int x = 0;                                              \
346         ICV_SUM_COI_CASE( __op__, size.width, cn );             \
347     }                                                           \
348                                                                 \
349     ICV_SUM_EXIT_C1( s, sumtype_final );                        \
350     return CV_OK;                                               \
351 }
352
353
354 #define ICV_DEF_SUM_ALL( name, flavor, __op__, arrtype, sumtype_final, sumtype, \
355                          worktype, hintp_type, nohint_type, block_size )        \
356     ICV_DEF_SUM_##hintp_type##_FUNC_2D( name, flavor, 1, __op__, arrtype,       \
357                          sumtype_final, sumtype, worktype, block_size )         \
358     ICV_DEF_SUM_##hintp_type##_FUNC_2D( name, flavor, 2, __op__, arrtype,       \
359                          sumtype_final, sumtype, worktype, block_size )         \
360     ICV_DEF_SUM_##hintp_type##_FUNC_2D( name, flavor, 3, __op__, arrtype,       \
361                          sumtype_final, sumtype, worktype, block_size )         \
362     ICV_DEF_SUM_##hintp_type##_FUNC_2D( name, flavor, 4, __op__, arrtype,       \
363                          sumtype_final, sumtype, worktype, block_size )         \
364     ICV_DEF_SUM_##nohint_type##_FUNC_2D_COI( name, flavor, __op__, arrtype,     \
365                          sumtype_final, sumtype, worktype, block_size )
366
367 ICV_DEF_SUM_ALL( Sum, 8u, CV_NOP, uchar, double, int64, unsigned,
368                  NOHINT_BLOCK, NOHINT_BLOCK, 1 << 24 )
369 ICV_DEF_SUM_ALL( Sum, 16u, CV_NOP, ushort, double, int64, unsigned,
370                  NOHINT_BLOCK, NOHINT_BLOCK, 1 << 16 )
371 ICV_DEF_SUM_ALL( Sum, 16s, CV_NOP, short, double, int64, int,
372                  NOHINT_BLOCK, NOHINT_BLOCK, 1 << 16 )
373 ICV_DEF_SUM_ALL( Sum, 32s, CV_NOP, int, double, double, double, NOHINT, NOHINT, 0 )
374 ICV_DEF_SUM_ALL( Sum, 32f, CV_NOP, float, double, double, double, HINT, NOHINT, 0 )
375 ICV_DEF_SUM_ALL( Sum, 64f, CV_NOP, double, double, double, double, NOHINT, NOHINT, 0 )
376
377 #define icvSum_8s_C1R   0
378 #define icvSum_8s_C2R   0
379 #define icvSum_8s_C3R   0
380 #define icvSum_8s_C4R   0
381 #define icvSum_8s_CnCR  0
382
383 CV_DEF_INIT_BIG_FUNC_TAB_2D( Sum, R )
384 CV_DEF_INIT_FUNC_TAB_2D( Sum, CnCR )
385
386 CV_IMPL CvScalar
387 cvSum( const CvArr* arr )
388 {
389     static CvBigFuncTable sum_tab;
390     static CvFuncTable sumcoi_tab;
391     static int inittab = 0;
392
393     CvScalar sum = {{0,0,0,0}};
394
395     CV_FUNCNAME("cvSum");
396
397     __BEGIN__;
398
399     int type, coi = 0;
400     int mat_step;
401     CvSize size;
402     CvMat stub, *mat = (CvMat*)arr;
403
404     if( !inittab )
405     {
406         icvInitSumRTable( &sum_tab );
407         icvInitSumCnCRTable( &sumcoi_tab );
408         inittab = 1;
409     }
410
411     if( !CV_IS_MAT(mat) )
412     {
413         if( CV_IS_MATND(mat) )
414         {
415             void* matnd = (void*)mat;
416             CvMatND nstub;
417             CvNArrayIterator iterator;
418             int pass_hint;
419
420             CV_CALL( cvInitNArrayIterator( 1, &matnd, 0, &nstub, &iterator ));
421
422             type = CV_MAT_TYPE(iterator.hdr[0]->type);
423             if( CV_MAT_CN(type) > 4 )
424                 CV_ERROR( CV_StsOutOfRange, "The input array must have at most 4 channels" );
425
426             pass_hint = CV_MAT_DEPTH(type) == CV_32F;
427
428             if( !pass_hint )
429             {
430                 CvFunc2D_1A1P func = (CvFunc2D_1A1P)(sum_tab.fn_2d[type]);
431                 if( !func )
432                     CV_ERROR( CV_StsUnsupportedFormat, "" );
433        
434                 do
435                 {
436                     CvScalar temp = {{0,0,0,0}};
437                     IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
438                                      iterator.size, temp.val ));
439                     sum.val[0] += temp.val[0];
440                     sum.val[1] += temp.val[1];
441                     sum.val[2] += temp.val[2];
442                     sum.val[3] += temp.val[3];
443                 }
444                 while( cvNextNArraySlice( &iterator ));
445             }
446             else
447             {
448                 CvFunc2D_1A1P1I func = (CvFunc2D_1A1P1I)(sum_tab.fn_2d[type]);
449                 if( !func )
450                     CV_ERROR( CV_StsUnsupportedFormat, "" );
451        
452                 do
453                 {
454                     CvScalar temp = {{0,0,0,0}};
455                     IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
456                                      iterator.size, temp.val, cvAlgHintAccurate ));
457                     sum.val[0] += temp.val[0];
458                     sum.val[1] += temp.val[1];
459                     sum.val[2] += temp.val[2];
460                     sum.val[3] += temp.val[3];
461                 }
462                 while( cvNextNArraySlice( &iterator ));
463             }
464             EXIT;
465         }
466         else
467             CV_CALL( mat = cvGetMat( mat, &stub, &coi ));
468     }
469
470     type = CV_MAT_TYPE(mat->type);
471     size = cvGetMatSize( mat );
472
473     mat_step = mat->step;
474
475     if( CV_IS_MAT_CONT( mat->type ))
476     {
477         size.width *= size.height;
478         
479         if( size.width <= CV_MAX_INLINE_MAT_OP_SIZE )
480         {
481             if( type == CV_32FC1 )
482             {
483                 float* data = mat->data.fl;
484
485                 do
486                 {
487                     sum.val[0] += data[size.width - 1];
488                 }
489                 while( --size.width );
490
491                 EXIT;
492             }
493
494             if( type == CV_64FC1 )
495             {
496                 double* data = mat->data.db;
497
498                 do
499                 {
500                     sum.val[0] += data[size.width - 1];
501                 }
502                 while( --size.width );
503
504                 EXIT;
505             }
506         }
507         size.height = 1;
508         mat_step = CV_STUB_STEP;
509     }
510
511     if( CV_MAT_CN(type) == 1 || coi == 0 )
512     {
513         int pass_hint = CV_MAT_DEPTH(type) == CV_32F;
514
515         if( CV_MAT_CN(type) > 4 )
516             CV_ERROR( CV_StsOutOfRange, "The input array must have at most 4 channels" );
517
518         if( !pass_hint )
519         {
520             CvFunc2D_1A1P func = (CvFunc2D_1A1P)(sum_tab.fn_2d[type]);
521
522             if( !func )
523                 CV_ERROR( CV_StsBadArg, cvUnsupportedFormat );
524
525             IPPI_CALL( func( mat->data.ptr, mat_step, size, sum.val ));
526         }
527         else
528         {
529             CvFunc2D_1A1P1I func = (CvFunc2D_1A1P1I)(sum_tab.fn_2d[type]);
530
531             if( !func )
532                 CV_ERROR( CV_StsBadArg, cvUnsupportedFormat );
533
534             IPPI_CALL( func( mat->data.ptr, mat_step, size, sum.val, cvAlgHintAccurate ));
535         }
536     }
537     else
538     {
539         CvFunc2DnC_1A1P func = (CvFunc2DnC_1A1P)(sumcoi_tab.fn_2d[CV_MAT_DEPTH(type)]);
540
541         if( !func )
542             CV_ERROR( CV_StsBadArg, cvUnsupportedFormat );
543
544         IPPI_CALL( func( mat->data.ptr, mat_step, size,
545                          CV_MAT_CN(type), coi, sum.val ));
546     }
547
548     __END__;
549
550     return  sum;
551 }
552
553
554 #define ICV_DEF_NONZERO_ALL( flavor, __op__, arrtype )              \
555     ICV_DEF_SUM_NOHINT_FUNC_2D( CountNonZero, flavor, 1, __op__,    \
556                                 arrtype, int, int, int, 0 )         \
557     ICV_DEF_SUM_NOHINT_FUNC_2D_COI( CountNonZero, flavor, __op__,   \
558                                     arrtype, int, int, int, 0 )
559
560 #undef  CV_NONZERO_DBL
561 #define CV_NONZERO_DBL(x) (((x) & CV_BIG_INT(0x7fffffffffffffff)) != 0)
562
563 ICV_DEF_NONZERO_ALL( 8u, CV_NONZERO, uchar )
564 ICV_DEF_NONZERO_ALL( 16s, CV_NONZERO, ushort )
565 ICV_DEF_NONZERO_ALL( 32s, CV_NONZERO, int )
566 ICV_DEF_NONZERO_ALL( 32f, CV_NONZERO_FLT, int )
567 ICV_DEF_NONZERO_ALL( 64f, CV_NONZERO_DBL, int64 )
568
569 #define icvCountNonZero_8s_C1R icvCountNonZero_8u_C1R
570 #define icvCountNonZero_8s_CnCR icvCountNonZero_8u_CnCR
571 #define icvCountNonZero_16u_C1R icvCountNonZero_16s_C1R
572 #define icvCountNonZero_16u_CnCR icvCountNonZero_16s_CnCR
573
574 CV_DEF_INIT_FUNC_TAB_2D( CountNonZero, C1R )
575 CV_DEF_INIT_FUNC_TAB_2D( CountNonZero, CnCR )
576
577 CV_IMPL int
578 cvCountNonZero( const CvArr* arr )
579 {
580     static CvFuncTable nz_tab;
581     static CvFuncTable nzcoi_tab;
582     static int inittab = 0;
583
584     int count = 0;
585
586     CV_FUNCNAME("cvCountNonZero");
587
588     __BEGIN__;
589
590     int type, coi = 0;
591     int mat_step;
592     CvSize size;
593     CvMat stub, *mat = (CvMat*)arr;
594
595     if( !inittab )
596     {
597         icvInitCountNonZeroC1RTable( &nz_tab );
598         icvInitCountNonZeroCnCRTable( &nzcoi_tab );
599         inittab = 1;
600     }
601
602     if( !CV_IS_MAT(mat) )
603     {
604         if( CV_IS_MATND(mat) )
605         {
606             void* matnd = (void*)arr;
607             CvMatND nstub;
608             CvNArrayIterator iterator;
609             CvFunc2D_1A1P func;
610
611             CV_CALL( cvInitNArrayIterator( 1, &matnd, 0, &nstub, &iterator ));
612
613             type = CV_MAT_TYPE(iterator.hdr[0]->type);
614
615             if( CV_MAT_CN(type) != 1 )
616                 CV_ERROR( CV_BadNumChannels,
617                     "Only single-channel array are supported here" );
618
619             func = (CvFunc2D_1A1P)(nz_tab.fn_2d[CV_MAT_DEPTH(type)]);
620             if( !func )
621                 CV_ERROR( CV_StsUnsupportedFormat, "" );
622        
623             do
624             {
625                 int temp;
626                 IPPI_CALL( func( iterator.ptr[0], CV_STUB_STEP,
627                                  iterator.size, &temp ));
628                 count += temp;
629             }
630             while( cvNextNArraySlice( &iterator ));
631             EXIT;
632         }
633         else
634             CV_CALL( mat = cvGetMat( mat, &stub, &coi ));
635     }
636
637     type = CV_MAT_TYPE(mat->type);
638     size = cvGetMatSize( mat );
639
640     mat_step = mat->step;
641
642     if( CV_IS_MAT_CONT( mat->type ))
643     {
644         size.width *= size.height;
645         size.height = 1;
646         mat_step = CV_STUB_STEP;
647     }
648
649     if( CV_MAT_CN(type) == 1 || coi == 0 )
650     {
651         CvFunc2D_1A1P func = (CvFunc2D_1A1P)(nz_tab.fn_2d[CV_MAT_DEPTH(type)]);
652
653         if( CV_MAT_CN(type) != 1 )
654             CV_ERROR( CV_BadNumChannels,
655             "The function can handle only a single channel at a time (use COI)");
656
657         if( !func )
658             CV_ERROR( CV_StsBadArg, cvUnsupportedFormat );
659
660         IPPI_CALL( func( mat->data.ptr, mat_step, size, &count ));
661     }
662     else
663     {
664         CvFunc2DnC_1A1P func = (CvFunc2DnC_1A1P)(nzcoi_tab.fn_2d[CV_MAT_DEPTH(type)]);
665
666         if( !func )
667             CV_ERROR( CV_StsBadArg, cvUnsupportedFormat );
668
669         IPPI_CALL( func( mat->data.ptr, mat_step, size, CV_MAT_CN(type), coi, &count ));
670     }
671
672     __END__;
673
674     return  count;
675 }
676
677
678 /****************************************************************************************\
679 *                                Reduce Matrix to Vector                                 *
680 \****************************************************************************************/
681
682 #define ICV_ACC_ROWS_FUNC( name, flavor, arrtype, acctype,      \
683                            __op__, load_macro )                 \
684 static CvStatus CV_STDCALL                                      \
685 icv##name##Rows_##flavor##_C1R( const arrtype* src, int srcstep,\
686                            acctype* dst, CvSize size )          \
687 {                                                               \
688     int i, width = size.width;                                  \
689     srcstep /= sizeof(src[0]);                                  \
690                                                                 \
691     for( i = 0; i < width; i++ )                                \
692         dst[i] = load_macro(src[i]);                            \
693                                                                 \
694     for( ; --size.height;  )                                    \
695     {                                                           \
696         src += srcstep;                                         \
697         for( i = 0; i <= width - 4; i += 4 )                    \
698         {                                                       \
699             acctype s0 = load_macro(src[i]);                    \
700             acctype s1 = load_macro(src[i+1]);                  \
701             acctype a0 = dst[i], a1 = dst[i+1];                 \
702             a0 = (acctype)__op__(a0,s0); a1 = (acctype)__op__(a1,s1); \
703             dst[i] = a0; dst[i+1] = a1;                         \
704                                                                 \
705             s0 = load_macro(src[i+2]);                          \
706             s1 = load_macro(src[i+3]);                          \
707             a0 = dst[i+2]; a1 = dst[i+3];                       \
708             a0 = (acctype)__op__(a0,s0); a1 = (acctype)__op__(a1,s1);  \
709             dst[i+2] = a0; dst[i+3] = a1;                       \
710         }                                                       \
711                                                                 \
712         for( ; i < width; i++ )                                 \
713         {                                                       \
714             acctype s0 = load_macro(src[i]), a0 = dst[i];       \
715             a0 = (acctype)__op__(a0,s0);                        \
716             dst[i] = a0;                                        \
717         }                                                       \
718     }                                                           \
719                                                                 \
720     return CV_OK;                                               \
721 }
722
723
724 #define ICV_ACC_COLS_FUNC_C1( name, flavor, arrtype, worktype, acctype, __op__ )\
725 static CvStatus CV_STDCALL                                              \
726 icv##name##Cols_##flavor##_C1R( const arrtype* src, int srcstep,        \
727                                 acctype* dst, int dststep, CvSize size )\
728 {                                                                       \
729     int i, width = size.width;                                          \
730     srcstep /= sizeof(src[0]);                                          \
731     dststep /= sizeof(dst[0]);                                          \
732                                                                         \
733     for( ; size.height--; src += srcstep, dst += dststep )              \
734     {                                                                   \
735         if( width == 1 )                                                \
736             dst[0] = (acctype)src[0];                                   \
737         else                                                            \
738         {                                                               \
739             worktype a0 = src[0], a1 = src[1];                          \
740             for( i = 2; i <= width - 4; i += 4 )                        \
741             {                                                           \
742                 worktype s0 = src[i], s1 = src[i+1];                    \
743                 a0 = __op__(a0, s0);                                    \
744                 a1 = __op__(a1, s1);                                    \
745                 s0 = src[i+2]; s1 = src[i+3];                           \
746                 a0 = __op__(a0, s0);                                    \
747                 a1 = __op__(a1, s1);                                    \
748             }                                                           \
749                                                                         \
750             for( ; i < width; i++ )                                     \
751             {                                                           \
752                 worktype s0 = src[i];                                   \
753                 a0 = __op__(a0, s0);                                    \
754             }                                                           \
755             a0 = __op__(a0, a1);                                        \
756             dst[0] = (acctype)a0;                                       \
757         }                                                               \
758     }                                                                   \
759                                                                         \
760     return CV_OK;                                                       \
761 }
762
763
764 #define ICV_ACC_COLS_FUNC_C3( name, flavor, arrtype, worktype, acctype, __op__ ) \
765 static CvStatus CV_STDCALL                                              \
766 icv##name##Cols_##flavor##_C3R( const arrtype* src, int srcstep,        \
767                                 acctype* dst, int dststep, CvSize size )\
768 {                                                                       \
769     int i, width = size.width*3;                                        \
770     srcstep /= sizeof(src[0]);                                          \
771     dststep /= sizeof(dst[0]);                                          \
772                                                                         \
773     for( ; size.height--; src += srcstep, dst += dststep )              \
774     {                                                                   \
775         worktype a0 = src[0], a1 = src[1], a2 = src[2];                 \
776         for( i = 3; i < width; i += 3 )                                 \
777         {                                                               \
778             worktype s0 = src[i], s1 = src[i+1], s2 = src[i+2];         \
779             a0 = __op__(a0, s0);                                        \
780             a1 = __op__(a1, s1);                                        \
781             a2 = __op__(a2, s2);                                        \
782         }                                                               \
783                                                                         \
784         dst[0] = (acctype)a0;                                           \
785         dst[1] = (acctype)a1;                                           \
786         dst[2] = (acctype)a2;                                           \
787     }                                                                   \
788                                                                         \
789     return CV_OK;                                                       \
790 }
791
792
793 #define ICV_ACC_COLS_FUNC_C4( name, flavor, arrtype, worktype, acctype, __op__ ) \
794 static CvStatus CV_STDCALL                                              \
795 icv##name##Cols_##flavor##_C4R( const arrtype* src, int srcstep,        \
796                                 acctype* dst, int dststep, CvSize size )\
797 {                                                                       \
798     int i, width = size.width*4;                                        \
799     srcstep /= sizeof(src[0]);                                          \
800     dststep /= sizeof(dst[0]);                                          \
801                                                                         \
802     for( ; size.height--; src += srcstep, dst += dststep )              \
803     {                                                                   \
804         worktype a0 = src[0], a1 = src[1], a2 = src[2], a3 = src[3];    \
805         for( i = 4; i < width; i += 4 )                                 \
806         {                                                               \
807             worktype s0 = src[i], s1 = src[i+1];                        \
808             a0 = __op__(a0, s0);                                        \
809             a1 = __op__(a1, s1);                                        \
810             s0 = src[i+2]; s1 = src[i+3];                               \
811             a2 = __op__(a2, s0);                                        \
812             a3 = __op__(a3, s1);                                        \
813         }                                                               \
814                                                                         \
815         dst[0] = (acctype)a0;                                           \
816         dst[1] = (acctype)a1;                                           \
817         dst[2] = (acctype)a2;                                           \
818         dst[3] = (acctype)a3;                                           \
819     }                                                                   \
820                                                                         \
821     return CV_OK;                                                       \
822 }
823
824
825 ICV_ACC_ROWS_FUNC( Sum, 8u32s, uchar, int, CV_ADD, CV_NOP )
826 ICV_ACC_ROWS_FUNC( Sum, 8u32f, uchar, float, CV_ADD, CV_8TO32F )
827 ICV_ACC_ROWS_FUNC( Sum, 16u32f, ushort, float, CV_ADD, CV_NOP )
828 ICV_ACC_ROWS_FUNC( Sum, 16u64f, ushort, double, CV_ADD, CV_NOP )
829 ICV_ACC_ROWS_FUNC( Sum, 16s32f, short, float, CV_ADD, CV_NOP )
830 ICV_ACC_ROWS_FUNC( Sum, 16s64f, short, double, CV_ADD, CV_NOP )
831 ICV_ACC_ROWS_FUNC( Sum, 32f, float, float, CV_ADD, CV_NOP )
832 ICV_ACC_ROWS_FUNC( Sum, 32f64f, float, double, CV_ADD, CV_NOP )
833 ICV_ACC_ROWS_FUNC( Sum, 64f, double, double, CV_ADD, CV_NOP )
834
835 ICV_ACC_ROWS_FUNC( Max, 8u, uchar, uchar, CV_MAX_8U, CV_NOP )
836 ICV_ACC_ROWS_FUNC( Max, 32f, float, float, MAX, CV_NOP )
837 ICV_ACC_ROWS_FUNC( Max, 64f, double, double, MAX, CV_NOP )
838
839 ICV_ACC_ROWS_FUNC( Min, 8u, uchar, uchar, CV_MIN_8U, CV_NOP )
840 ICV_ACC_ROWS_FUNC( Min, 32f, float, float, MIN, CV_NOP )
841 ICV_ACC_ROWS_FUNC( Min, 64f, double, double, MIN, CV_NOP )
842
843 ICV_ACC_COLS_FUNC_C1( Sum, 8u32s, uchar, int, int, CV_ADD )
844 ICV_ACC_COLS_FUNC_C1( Sum, 8u32f, uchar, int, float, CV_ADD )
845 ICV_ACC_COLS_FUNC_C1( Sum, 16u32f, ushort, float, float, CV_ADD )
846 ICV_ACC_COLS_FUNC_C1( Sum, 16u64f, ushort, double, double, CV_ADD )
847 ICV_ACC_COLS_FUNC_C1( Sum, 16s32f, short, float, float, CV_ADD )
848 ICV_ACC_COLS_FUNC_C1( Sum, 16s64f, short, double, double, CV_ADD )
849
850 ICV_ACC_COLS_FUNC_C1( Sum, 32f, float, float, float, CV_ADD )
851 ICV_ACC_COLS_FUNC_C1( Sum, 32f64f, float, double, double, CV_ADD )
852 ICV_ACC_COLS_FUNC_C1( Sum, 64f, double, double, double, CV_ADD )
853 ICV_ACC_COLS_FUNC_C3( Sum, 8u32s, uchar, int, int, CV_ADD )
854 ICV_ACC_COLS_FUNC_C3( Sum, 8u32f, uchar, int, float, CV_ADD )
855 ICV_ACC_COLS_FUNC_C3( Sum, 32f, float, float, float, CV_ADD )
856 ICV_ACC_COLS_FUNC_C3( Sum, 64f, double, double, double, CV_ADD )
857 ICV_ACC_COLS_FUNC_C4( Sum, 8u32s, uchar, int, int, CV_ADD )
858 ICV_ACC_COLS_FUNC_C4( Sum, 8u32f, uchar, int, float, CV_ADD )
859 ICV_ACC_COLS_FUNC_C4( Sum, 32f, float, float, float, CV_ADD )
860 ICV_ACC_COLS_FUNC_C4( Sum, 64f, double, double, double, CV_ADD )
861
862 ICV_ACC_COLS_FUNC_C1( Max, 8u, uchar, int, uchar, CV_MAX_8U )
863 ICV_ACC_COLS_FUNC_C1( Max, 32f, float, float, float, MAX )
864 ICV_ACC_COLS_FUNC_C1( Max, 64f, double, double, double, MAX )
865
866 ICV_ACC_COLS_FUNC_C1( Min, 8u, uchar, int, uchar, CV_MIN_8U )
867 ICV_ACC_COLS_FUNC_C1( Min, 32f, float, float, float, MIN )
868 ICV_ACC_COLS_FUNC_C1( Min, 64f, double, double, double, MIN )
869
870 typedef CvStatus (CV_STDCALL * CvReduceToRowFunc)
871     ( const void* src, int srcstep, void* dst, CvSize size );
872
873 typedef CvStatus (CV_STDCALL * CvReduceToColFunc)
874     ( const void* src, int srcstep, void* dst, int dststep, CvSize size );
875
876
877 CV_IMPL void
878 cvReduce( const CvArr* srcarr, CvArr* dstarr, int dim, int op )
879 {
880     CvMat* temp = 0;
881     
882     CV_FUNCNAME( "cvReduce" );
883
884     __BEGIN__;
885
886     CvMat sstub, *src = (CvMat*)srcarr;
887     CvMat dstub, *dst = (CvMat*)dstarr, *dst0;
888     int sdepth, ddepth, cn, op0 = op;
889     CvSize size;
890
891     if( !CV_IS_MAT(src) )
892         CV_CALL( src = cvGetMat( src, &sstub ));
893
894     if( !CV_IS_MAT(dst) )
895         CV_CALL( dst = cvGetMat( dst, &dstub ));
896
897     if( !CV_ARE_CNS_EQ(src, dst) )
898         CV_ERROR( CV_StsUnmatchedFormats, "Input and output arrays must have the same number of channels" );
899
900     sdepth = CV_MAT_DEPTH(src->type);
901     ddepth = CV_MAT_DEPTH(dst->type);
902     cn = CV_MAT_CN(src->type);
903     dst0 = dst;
904
905     size = cvGetMatSize(src);
906
907     if( dim < 0 )
908         dim = src->rows > dst->rows ? 0 : src->cols > dst->cols ? 1 : dst->cols == 1;
909
910     if( dim > 1 )
911         CV_ERROR( CV_StsOutOfRange, "The reduced dimensionality index is out of range" );
912
913     if( dim == 0 && (dst->cols != src->cols || dst->rows != 1) ||
914         dim == 1 && (dst->rows != src->rows || dst->cols != 1) )
915         CV_ERROR( CV_StsBadSize, "The output array size is incorrect" );
916
917     if( op == CV_REDUCE_AVG )
918     {
919         int ttype = sdepth == CV_8U ? CV_MAKETYPE(CV_32S,cn) : dst->type;
920         if( ttype != dst->type )
921             CV_CALL( dst = temp = cvCreateMat( dst->rows, dst->cols, ttype ));
922         op = CV_REDUCE_SUM;
923         ddepth = CV_MAT_DEPTH(ttype);
924     }
925
926     if( op != CV_REDUCE_SUM && op != CV_REDUCE_MAX && op != CV_REDUCE_MIN )
927         CV_ERROR( CV_StsBadArg, "Unknown reduce operation index, must be one of CV_REDUCE_*" );
928
929     if( dim == 0 )
930     {
931         CvReduceToRowFunc rfunc =
932             op == CV_REDUCE_SUM ?
933             (sdepth == CV_8U && ddepth == CV_32S ? (CvReduceToRowFunc)icvSumRows_8u32s_C1R :
934              sdepth == CV_8U && ddepth == CV_32F ? (CvReduceToRowFunc)icvSumRows_8u32f_C1R :
935              sdepth == CV_16U && ddepth == CV_32F ? (CvReduceToRowFunc)icvSumRows_16u32f_C1R :
936              sdepth == CV_16U && ddepth == CV_64F ? (CvReduceToRowFunc)icvSumRows_16u64f_C1R :
937              sdepth == CV_16S && ddepth == CV_32F ? (CvReduceToRowFunc)icvSumRows_16s32f_C1R :
938              sdepth == CV_16S && ddepth == CV_64F ? (CvReduceToRowFunc)icvSumRows_16s64f_C1R :
939              sdepth == CV_32F && ddepth == CV_32F ? (CvReduceToRowFunc)icvSumRows_32f_C1R :
940              sdepth == CV_32F && ddepth == CV_64F ? (CvReduceToRowFunc)icvSumRows_32f64f_C1R :        
941              sdepth == CV_64F && ddepth == CV_64F ? (CvReduceToRowFunc)icvSumRows_64f_C1R : 0) :
942             op == CV_REDUCE_MAX ?
943             (sdepth == CV_8U && ddepth == CV_8U ? (CvReduceToRowFunc)icvMaxRows_8u_C1R :
944              sdepth == CV_32F && ddepth == CV_32F ? (CvReduceToRowFunc)icvMaxRows_32f_C1R :
945              sdepth == CV_64F && ddepth == CV_64F ? (CvReduceToRowFunc)icvMaxRows_64f_C1R : 0) :
946
947             (sdepth == CV_8U && ddepth == CV_8U ? (CvReduceToRowFunc)icvMinRows_8u_C1R :
948              sdepth == CV_32F && ddepth == CV_32F ? (CvReduceToRowFunc)icvMinRows_32f_C1R :
949              sdepth == CV_64F && ddepth == CV_64F ? (CvReduceToRowFunc)icvMinRows_64f_C1R : 0);
950
951         if( !rfunc )
952             CV_ERROR( CV_StsUnsupportedFormat,
953             "Unsupported combination of input and output array formats" );
954
955         size.width *= cn;
956         IPPI_CALL( rfunc( src->data.ptr, src->step ? src->step : CV_STUB_STEP,
957                           dst->data.ptr, size ));
958     }
959     else
960     {
961         CvReduceToColFunc cfunc =
962             op == CV_REDUCE_SUM ?
963             (sdepth == CV_8U && ddepth == CV_32S ?
964             (CvReduceToColFunc)(cn == 1 ? icvSumCols_8u32s_C1R :
965                                 cn == 3 ? icvSumCols_8u32s_C3R :
966                                 cn == 4 ? icvSumCols_8u32s_C4R : 0) :
967              sdepth == CV_8U && ddepth == CV_32F ?
968             (CvReduceToColFunc)(cn == 1 ? icvSumCols_8u32f_C1R :
969                                 cn == 3 ? icvSumCols_8u32f_C3R :
970                                 cn == 4 ? icvSumCols_8u32f_C4R : 0) :
971              sdepth == CV_16U && ddepth == CV_32F ?
972             (CvReduceToColFunc)(cn == 1 ? icvSumCols_16u32f_C1R : 0) :
973              sdepth == CV_16U && ddepth == CV_64F ?
974             (CvReduceToColFunc)(cn == 1 ? icvSumCols_16u64f_C1R : 0) :
975              sdepth == CV_16S && ddepth == CV_32F ?
976             (CvReduceToColFunc)(cn == 1 ? icvSumCols_16s32f_C1R : 0) :
977              sdepth == CV_16S && ddepth == CV_64F ?
978             (CvReduceToColFunc)(cn == 1 ? icvSumCols_16s64f_C1R : 0) :
979              sdepth == CV_32F && ddepth == CV_32F ?
980             (CvReduceToColFunc)(cn == 1 ? icvSumCols_32f_C1R :
981                                 cn == 3 ? icvSumCols_32f_C3R :
982                                 cn == 4 ? icvSumCols_32f_C4R : 0) :
983              sdepth == CV_32F && ddepth == CV_64F ?
984             (CvReduceToColFunc)(cn == 1 ? icvSumCols_32f64f_C1R : 0) :
985              sdepth == CV_64F && ddepth == CV_64F ?
986             (CvReduceToColFunc)(cn == 1 ? icvSumCols_64f_C1R :
987                                 cn == 3 ? icvSumCols_64f_C3R :
988                                 cn == 4 ? icvSumCols_64f_C4R : 0) : 0) :
989              op == CV_REDUCE_MAX && cn == 1 ?
990              (sdepth == CV_8U && ddepth == CV_8U ? (CvReduceToColFunc)icvMaxCols_8u_C1R :
991               sdepth == CV_32F && ddepth == CV_32F ? (CvReduceToColFunc)icvMaxCols_32f_C1R :
992               sdepth == CV_64F && ddepth == CV_64F ? (CvReduceToColFunc)icvMaxCols_64f_C1R : 0) :
993              op == CV_REDUCE_MIN && cn == 1 ?
994              (sdepth == CV_8U && ddepth == CV_8U ? (CvReduceToColFunc)icvMinCols_8u_C1R :
995               sdepth == CV_32F && ddepth == CV_32F ? (CvReduceToColFunc)icvMinCols_32f_C1R :
996               sdepth == CV_64F && ddepth == CV_64F ? (CvReduceToColFunc)icvMinCols_64f_C1R : 0) : 0;
997
998         if( !cfunc )
999             CV_ERROR( CV_StsUnsupportedFormat,
1000             "Unsupported combination of input and output array formats" );
1001
1002         IPPI_CALL( cfunc( src->data.ptr, src->step ? src->step : CV_STUB_STEP,
1003                           dst->data.ptr, dst->step ? dst->step : CV_STUB_STEP, size ));
1004     }
1005
1006     if( op0 == CV_REDUCE_AVG )
1007         cvScale( dst, dst0, 1./(dim == 0 ? src->rows : src->cols) );
1008
1009     __END__;
1010
1011     if( temp )
1012         cvReleaseMat( &temp );
1013 }
1014
1015 /* End of file. */