1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of Intel Corporation may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
45 * This file includes the code, contributed by Simon Perreault
46 * (the function icvMedianBlur_8u_CnR_O1)
48 * Constant-time median filtering -- http://nomis80.org/ctmf.html
49 * Copyright (C) 2006 Simon Perreault
52 * Laboratoire de vision et systemes numeriques
53 * Pavillon Adrien-Pouliot
55 * Sainte-Foy, Quebec, Canada
58 * perreaul@gel.ulaval.ca
61 // uncomment the line below to force SSE2 mode
64 /****************************************************************************************\
66 \****************************************************************************************/
68 static void icvSumRow_8u32s( const uchar* src0, int* dst, void* params );
69 static void icvSumRow_32f64f( const float* src0, double* dst, void* params );
70 static void icvSumCol_32s8u( const int** src, uchar* dst, int dst_step,
71 int count, void* params );
72 static void icvSumCol_32s16s( const int** src, short* dst, int dst_step,
73 int count, void* params );
74 static void icvSumCol_32s32s( const int** src, int* dst, int dst_step,
75 int count, void* params );
76 static void icvSumCol_64f32f( const double** src, float* dst, int dst_step,
77 int count, void* params );
79 CvBoxFilter::CvBoxFilter()
88 CvBoxFilter::CvBoxFilter( int _max_width, int _src_type, int _dst_type,
89 bool _normalized, CvSize _ksize,
90 CvPoint _anchor, int _border_mode,
91 CvScalar _border_value )
97 init( _max_width, _src_type, _dst_type, _normalized,
98 _ksize, _anchor, _border_mode, _border_value );
102 CvBoxFilter::~CvBoxFilter()
108 void CvBoxFilter::init( int _max_width, int _src_type, int _dst_type,
109 bool _normalized, CvSize _ksize,
110 CvPoint _anchor, int _border_mode,
111 CvScalar _border_value )
113 CV_FUNCNAME( "CvBoxFilter::init" );
118 normalized = _normalized;
120 if( normalized && CV_MAT_TYPE(_src_type) != CV_MAT_TYPE(_dst_type) ||
121 !normalized && CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type))
122 CV_ERROR( CV_StsUnmatchedFormats,
123 "In case of normalized box filter input and output must have the same type.\n"
124 "In case of unnormalized box filter the number of input and output channels must be the same" );
126 min_depth = CV_MAT_DEPTH(_src_type) == CV_8U ? CV_32S : CV_64F;
128 CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize,
129 _anchor, _border_mode, _border_value );
131 scale = normalized ? 1./(ksize.width*ksize.height) : 1;
133 if( CV_MAT_DEPTH(src_type) == CV_8U )
134 x_func = (CvRowFilterFunc)icvSumRow_8u32s;
135 else if( CV_MAT_DEPTH(src_type) == CV_32F )
136 x_func = (CvRowFilterFunc)icvSumRow_32f64f;
138 CV_ERROR( CV_StsUnsupportedFormat, "Unknown/unsupported input image format" );
140 if( CV_MAT_DEPTH(dst_type) == CV_8U )
143 CV_ERROR( CV_StsBadArg, "Only normalized box filter can be used for 8u->8u transformation" );
144 y_func = (CvColumnFilterFunc)icvSumCol_32s8u;
146 else if( CV_MAT_DEPTH(dst_type) == CV_16S )
148 if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
149 CV_ERROR( CV_StsBadArg, "Only 8u->16s unnormalized box filter is supported in case of 16s output" );
150 y_func = (CvColumnFilterFunc)icvSumCol_32s16s;
152 else if( CV_MAT_DEPTH(dst_type) == CV_32S )
154 if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
155 CV_ERROR( CV_StsBadArg, "Only 8u->32s unnormalized box filter is supported in case of 32s output");
157 y_func = (CvColumnFilterFunc)icvSumCol_32s32s;
159 else if( CV_MAT_DEPTH(dst_type) == CV_32F )
161 if( CV_MAT_DEPTH(src_type) != CV_32F )
162 CV_ERROR( CV_StsBadArg, "Only 32f->32f box filter (normalized or not) is supported in case of 32f output" );
163 y_func = (CvColumnFilterFunc)icvSumCol_64f32f;
166 CV_ERROR( CV_StsBadArg, "Unknown/unsupported destination image format" );
173 void CvBoxFilter::start_process( CvSlice x_range, int width )
175 CvBaseImageFilter::start_process( x_range, width );
176 int i, psz = CV_ELEM_SIZE(work_type);
180 assert( buf_max_count >= max_ky*2 + 1 );
181 s = sum = buf_end + cvAlign((width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN);
185 for( i = 0; i < width; i++ )
191 icvSumRow_8u32s( const uchar* src, int* dst, void* params )
193 const CvBoxFilter* state = (const CvBoxFilter*)params;
194 int ksize = state->get_kernel_size().width;
195 int width = state->get_width();
196 int cn = CV_MAT_CN(state->get_src_type());
199 width = (width - 1)*cn; ksize *= cn;
201 for( k = 0; k < cn; k++, src++, dst++ )
204 for( i = 0; i < ksize; i += cn )
207 for( i = 0; i < width; i += cn )
209 s += src[i+ksize] - src[i];
217 icvSumRow_32f64f( const float* src, double* dst, void* params )
219 const CvBoxFilter* state = (const CvBoxFilter*)params;
220 int ksize = state->get_kernel_size().width;
221 int width = state->get_width();
222 int cn = CV_MAT_CN(state->get_src_type());
225 width = (width - 1)*cn; ksize *= cn;
227 for( k = 0; k < cn; k++, src++, dst++ )
230 for( i = 0; i < ksize; i += cn )
233 for( i = 0; i < width; i += cn )
235 s += (double)src[i+ksize] - src[i];
243 icvSumCol_32s8u( const int** src, uchar* dst,
244 int dst_step, int count, void* params )
246 #define BLUR_SHIFT 24
247 CvBoxFilter* state = (CvBoxFilter*)params;
248 int ksize = state->get_kernel_size().height;
249 int i, width = state->get_width();
250 int cn = CV_MAT_CN(state->get_src_type());
251 double scale = state->get_scale();
252 int iscale = cvFloor(scale*(1 << BLUR_SHIFT));
253 int* sum = (int*)state->get_sum_buf();
254 int* _sum_count = state->get_sum_count_ptr();
255 int sum_count = *_sum_count;
259 count += ksize - 1 - sum_count;
261 for( ; count--; src++ )
263 const int* sp = src[0];
264 if( sum_count+1 < ksize )
266 for( i = 0; i <= width - 2; i += 2 )
268 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
269 sum[i] = s0; sum[i+1] = s1;
272 for( ; i < width; i++ )
279 const int* sm = src[-ksize+1];
280 for( i = 0; i <= width - 2; i += 2 )
282 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
283 int t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT), t1 = CV_DESCALE(s1*iscale, BLUR_SHIFT);
284 s0 -= sm[i]; s1 -= sm[i+1];
285 sum[i] = s0; sum[i+1] = s1;
286 dst[i] = (uchar)t0; dst[i+1] = (uchar)t1;
289 for( ; i < width; i++ )
291 int s0 = sum[i] + sp[i], t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT);
292 sum[i] = s0 - sm[i]; dst[i] = (uchar)t0;
298 *_sum_count = sum_count;
304 icvSumCol_32s16s( const int** src, short* dst,
305 int dst_step, int count, void* params )
307 CvBoxFilter* state = (CvBoxFilter*)params;
308 int ksize = state->get_kernel_size().height;
309 int ktotal = ksize*state->get_kernel_size().width;
310 int i, width = state->get_width();
311 int cn = CV_MAT_CN(state->get_src_type());
312 int* sum = (int*)state->get_sum_buf();
313 int* _sum_count = state->get_sum_count_ptr();
314 int sum_count = *_sum_count;
316 dst_step /= sizeof(dst[0]);
319 count += ksize - 1 - sum_count;
321 for( ; count--; src++ )
323 const int* sp = src[0];
324 if( sum_count+1 < ksize )
326 for( i = 0; i <= width - 2; i += 2 )
328 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
329 sum[i] = s0; sum[i+1] = s1;
332 for( ; i < width; i++ )
337 else if( ktotal < 128 )
339 const int* sm = src[-ksize+1];
340 for( i = 0; i <= width - 2; i += 2 )
342 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
343 dst[i] = (short)s0; dst[i+1] = (short)s1;
344 s0 -= sm[i]; s1 -= sm[i+1];
345 sum[i] = s0; sum[i+1] = s1;
348 for( ; i < width; i++ )
350 int s0 = sum[i] + sp[i];
358 const int* sm = src[-ksize+1];
359 for( i = 0; i <= width - 2; i += 2 )
361 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
362 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
363 s0 -= sm[i]; s1 -= sm[i+1];
364 sum[i] = s0; sum[i+1] = s1;
367 for( ; i < width; i++ )
369 int s0 = sum[i] + sp[i];
370 dst[i] = CV_CAST_16S(s0);
377 *_sum_count = sum_count;
381 icvSumCol_32s32s( const int** src, int * dst,
382 int dst_step, int count, void* params )
384 CvBoxFilter* state = (CvBoxFilter*)params;
385 int ksize = state->get_kernel_size().height;
386 int i, width = state->get_width();
387 int cn = CV_MAT_CN(state->get_src_type());
388 int* sum = (int*)state->get_sum_buf();
389 int* _sum_count = state->get_sum_count_ptr();
390 int sum_count = *_sum_count;
392 dst_step /= sizeof(dst[0]);
395 count += ksize - 1 - sum_count;
397 for( ; count--; src++ )
399 const int* sp = src[0];
400 if( sum_count+1 < ksize )
402 for( i = 0; i <= width - 2; i += 2 )
404 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
405 sum[i] = s0; sum[i+1] = s1;
408 for( ; i < width; i++ )
415 const int* sm = src[-ksize+1];
416 for( i = 0; i <= width - 2; i += 2 )
418 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
419 dst[i] = s0; dst[i+1] = s1;
420 s0 -= sm[i]; s1 -= sm[i+1];
421 sum[i] = s0; sum[i+1] = s1;
424 for( ; i < width; i++ )
426 int s0 = sum[i] + sp[i];
434 *_sum_count = sum_count;
439 icvSumCol_64f32f( const double** src, float* dst,
440 int dst_step, int count, void* params )
442 CvBoxFilter* state = (CvBoxFilter*)params;
443 int ksize = state->get_kernel_size().height;
444 int i, width = state->get_width();
445 int cn = CV_MAT_CN(state->get_src_type());
446 double scale = state->get_scale();
447 bool normalized = state->is_normalized();
448 double* sum = (double*)state->get_sum_buf();
449 int* _sum_count = state->get_sum_count_ptr();
450 int sum_count = *_sum_count;
452 dst_step /= sizeof(dst[0]);
455 count += ksize - 1 - sum_count;
457 for( ; count--; src++ )
459 const double* sp = src[0];
460 if( sum_count+1 < ksize )
462 for( i = 0; i <= width - 2; i += 2 )
464 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
465 sum[i] = s0; sum[i+1] = s1;
468 for( ; i < width; i++ )
475 const double* sm = src[-ksize+1];
477 for( i = 0; i <= width - 2; i += 2 )
479 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
480 double t0 = s0*scale, t1 = s1*scale;
481 s0 -= sm[i]; s1 -= sm[i+1];
482 dst[i] = (float)t0; dst[i+1] = (float)t1;
483 sum[i] = s0; sum[i+1] = s1;
486 for( i = 0; i <= width - 2; i += 2 )
488 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
489 dst[i] = (float)s0; dst[i+1] = (float)s1;
490 s0 -= sm[i]; s1 -= sm[i+1];
491 sum[i] = s0; sum[i+1] = s1;
494 for( ; i < width; i++ )
496 double s0 = sum[i] + sp[i], t0 = s0*scale;
497 sum[i] = s0 - sm[i]; dst[i] = (float)t0;
503 *_sum_count = sum_count;
507 /****************************************************************************************\
509 \****************************************************************************************/
511 #define CV_MINMAX_8U(a,b) \
512 (t = CV_FAST_CAST_8U((a) - (b)), (b) += t, a -= t)
514 #if CV_SSE2 && !defined __SSE2__
516 #include "emmintrin.h"
519 #if defined(__VEC__) || defined(__ALTIVEC__)
524 #if defined(__GNUC__)
525 #define align(x) __attribute__ ((aligned (x)))
526 #elif CV_SSE2 && (defined(__ICL) || (_MSC_VER >= 1300))
527 #define align(x) __declspec(align(x))
533 #pragma warning( disable: 4244 )
537 * This structure represents a two-tier histogram. The first tier (known as the
538 * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level)
539 * is 8 bit wide. Pixels inserted in the fine level also get inserted into the
540 * coarse bucket designated by the 4 MSBs of the fine bucket value.
542 * The structure is aligned on 16 bits, which is a prerequisite for SIMD
543 * instructions. Each bucket is 16 bit wide, which means that extra care must be
544 * taken to prevent overflow.
546 typedef struct align(16)
553 * HOP is short for Histogram OPeration. This macro makes an operation \a op on
554 * histogram \a h for pixel value \a x. It takes care of handling both levels.
556 #define HOP(h,x,op) \
558 *((ushort*) h.fine + x) op;
560 #define COP(c,j,x,op) \
561 h_coarse[ 16*(n*c+j) + (x>>4) ] op; \
562 h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op;
564 #if defined __SSE2__ || defined __MMX__ || defined __ALTIVEC__
565 #define MEDIAN_HAVE_SIMD 1
567 #define MEDIAN_HAVE_SIMD 0
571 * Adds histograms \a x and \a y and stores the result in \a y. Makes use of
572 * SSE2, MMX or Altivec, if available.
574 #if defined(__SSE2__)
575 static inline void histogram_add( const ushort x[16], ushort y[16] )
577 _mm_store_si128( (__m128i*) &y[0], _mm_add_epi16(
578 _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
579 _mm_store_si128( (__m128i*) &y[8], _mm_add_epi16(
580 _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
582 #elif defined(__MMX__)
583 static inline void histogram_add( const ushort x[16], ushort y[16] )
585 *(__m64*) &y[0] = _mm_add_pi16( *(__m64*) &y[0], *(__m64*) &x[0] );
586 *(__m64*) &y[4] = _mm_add_pi16( *(__m64*) &y[4], *(__m64*) &x[4] );
587 *(__m64*) &y[8] = _mm_add_pi16( *(__m64*) &y[8], *(__m64*) &x[8] );
588 *(__m64*) &y[12] = _mm_add_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
590 #elif defined(__ALTIVEC__)
591 static inline void histogram_add( const ushort x[16], ushort y[16] )
593 *(vector ushort*) &y[0] = vec_add( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
594 *(vector ushort*) &y[8] = vec_add( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
597 static inline void histogram_add( const ushort x[16], ushort y[16] )
600 for( i = 0; i < 16; ++i )
601 y[i] = (ushort)(y[i] + x[i]);
606 * Subtracts histogram \a x from \a y and stores the result in \a y. Makes use
607 * of SSE2, MMX or Altivec, if available.
609 #if defined(__SSE2__)
610 static inline void histogram_sub( const ushort x[16], ushort y[16] )
612 _mm_store_si128( (__m128i*) &y[0], _mm_sub_epi16(
613 _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
614 _mm_store_si128( (__m128i*) &y[8], _mm_sub_epi16(
615 _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
617 #elif defined(__MMX__)
618 static inline void histogram_sub( const ushort x[16], ushort y[16] )
620 *(__m64*) &y[0] = _mm_sub_pi16( *(__m64*) &y[0], *(__m64*) &x[0] );
621 *(__m64*) &y[4] = _mm_sub_pi16( *(__m64*) &y[4], *(__m64*) &x[4] );
622 *(__m64*) &y[8] = _mm_sub_pi16( *(__m64*) &y[8], *(__m64*) &x[8] );
623 *(__m64*) &y[12] = _mm_sub_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
625 #elif defined(__ALTIVEC__)
626 static inline void histogram_sub( const ushort x[16], ushort y[16] )
628 *(vector ushort*) &y[0] = vec_sub( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
629 *(vector ushort*) &y[8] = vec_sub( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
632 static inline void histogram_sub( const ushort x[16], ushort y[16] )
635 for( i = 0; i < 16; ++i )
636 y[i] = (ushort)(y[i] - x[i]);
640 static inline void histogram_muladd( int a, const ushort x[16],
644 for ( i = 0; i < 16; ++i )
645 y[i] = (ushort)(y[i] + a * x[i]);
648 static CvStatus CV_STDCALL
649 icvMedianBlur_8u_CnR_O1( uchar* src, int src_step, uchar* dst, int dst_step,
650 CvSize size, int kernel_size, int cn, int pad_left, int pad_right )
652 int r = (kernel_size-1)/2;
653 const int m = size.height, n = size.width;
655 const unsigned char *p, *q;
657 ushort *h_coarse, *h_fine, luc[4][16];
659 if( size.height < r || size.width < r )
660 return CV_BADSIZE_ERR;
665 assert( size.width >= 2*r+1 );
666 assert( size.height >= 2*r+1 );
667 assert( src_step != 0 );
668 assert( dst_step != 0 );
670 h_coarse = (ushort*) cvAlloc( 1 * 16 * n * cn * sizeof(ushort) );
671 h_fine = (ushort*) cvAlloc( 16 * 16 * n * cn * sizeof(ushort) );
672 memset( h_coarse, 0, 1 * 16 * n * cn * sizeof(ushort) );
673 memset( h_fine, 0, 16 * 16 * n * cn * sizeof(ushort) );
675 /* First row initialization */
676 for ( j = 0; j < n; ++j ) {
677 for ( c = 0; c < cn; ++c ) {
678 COP( c, j, src[cn*j+c], += r+1 );
681 for ( i = 0; i < r; ++i ) {
682 for ( j = 0; j < n; ++j ) {
683 for ( c = 0; c < cn; ++c ) {
684 COP( c, j, src[src_step*i+cn*j+c], ++ );
689 for ( i = 0; i < m; ++i ) {
691 /* Update column histograms for entire row. */
692 p = src + src_step * MAX( 0, i-r-1 );
694 for ( j = 0; p != q; ++j ) {
695 for ( c = 0; c < cn; ++c, ++p ) {
700 p = src + src_step * MIN( m-1, i+r );
702 for ( j = 0; p != q; ++j ) {
703 for ( c = 0; c < cn; ++c, ++p ) {
708 /* First column initialization */
709 memset( H, 0, cn*sizeof(H[0]) );
710 memset( luc, 0, cn*sizeof(luc[0]) );
712 for ( c = 0; c < cn; ++c ) {
713 histogram_muladd( r, &h_coarse[16*n*c], H[c].coarse );
716 for ( j = 0; j < (pad_left ? r : 2*r); ++j ) {
717 for ( c = 0; c < cn; ++c ) {
718 histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse );
721 for ( c = 0; c < cn; ++c ) {
722 for ( k = 0; k < 16; ++k ) {
723 histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] );
727 for ( j = pad_left ? 0 : r; j < (pad_right ? n : n-r); ++j ) {
728 for ( c = 0; c < cn; ++c ) {
729 int t = 2*r*r + 2*r, b, sum = 0;
732 histogram_add( &h_coarse[16*(n*c + MIN(j+r,n-1))], H[c].coarse );
734 /* Find median at coarse level */
735 for ( k = 0; k < 16 ; ++k ) {
736 sum += H[c].coarse[k];
738 sum -= H[c].coarse[k];
744 /* Update corresponding histogram segment */
745 if ( luc[c][k] <= j-r ) {
746 memset( &H[c].fine[k], 0, 16 * sizeof(ushort) );
747 for ( luc[c][k] = j-r; luc[c][k] < MIN(j+r+1,n); ++luc[c][k] ) {
748 histogram_add( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] );
750 if ( luc[c][k] < j+r+1 ) {
751 histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] );
752 luc[c][k] = (ushort)(j+r+1);
756 for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) {
757 histogram_sub( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] );
758 histogram_add( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] );
762 histogram_sub( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse );
764 /* Find median in segment */
765 segment = H[c].fine[k];
766 for ( b = 0; b < 16 ; ++b ) {
769 dst[dst_step*i+cn*j+c] = (uchar)(16*k + b);
792 #pragma warning( default: 4244 )
796 static CvStatus CV_STDCALL
797 icvMedianBlur_8u_CnR_Om( uchar* src, int src_step, uchar* dst, int dst_step,
798 CvSize size, int m, int cn )
805 int nx = (m + 1)/2 - 1;
806 uchar* src_max = src + size.height*src_step;
807 uchar* src_right = src + size.width*cn;
809 #define UPDATE_ACC01( pix, cn, op ) \
813 zone0[cn][p >> 4] op; \
816 if( size.height < nx || size.width < nx )
817 return CV_BADSIZE_ERR;
823 for( y = 0; y < size.height; y++, dst += dst_step )
825 const uchar* src0 = src + src_step*(y-1);
826 const uchar* src1 = src0 + src_step;
827 const uchar* src2 = src1 + src_step;
830 else if( y == size.height - 1 )
833 for( x = 0; x < 2*cn; x++ )
835 int x0 = x < cn ? x : size.width - 3*cn + x;
836 int x2 = x < cn ? x + cn : size.width - 2*cn + x;
837 int x1 = x < cn ? x0 : x2, t;
839 int p0 = src0[x0], p1 = src0[x1], p2 = src0[x2];
840 int p3 = src1[x0], p4 = src1[x1], p5 = src1[x2];
841 int p6 = src2[x0], p7 = src2[x1], p8 = src2[x2];
843 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
844 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
845 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
846 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
847 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
848 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
849 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
850 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
851 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
852 CV_MINMAX_8U(p4, p2);
856 for( x = cn; x < size.width - cn; x++ )
858 int p0 = src0[x-cn], p1 = src0[x], p2 = src0[x+cn];
859 int p3 = src1[x-cn], p4 = src1[x], p5 = src1[x+cn];
860 int p6 = src2[x-cn], p7 = src2[x], p8 = src2[x+cn];
863 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
864 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
865 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
866 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
867 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
868 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
869 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
870 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
871 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
872 CV_MINMAX_8U(p4, p2);
881 for( x = 0; x < size.width; x++, dst += cn )
883 uchar* dst_cur = dst;
884 uchar* src_top = src;
885 uchar* src_bottom = src;
888 int src_step1 = src_step, dst_step1 = dst_step;
892 src_bottom = src_top += src_step*(size.height-1);
893 dst_cur += dst_step*(size.height-1);
894 src_step1 = -src_step1;
895 dst_step1 = -dst_step1;
902 x0 = x < m/2 ? 0 : (nx-1)*cn;
905 memset( zone0, 0, sizeof(zone0[0])*cn );
906 memset( zone1, 0, sizeof(zone1[0])*cn );
908 for( y = 0; y <= m/2; y++ )
910 for( c = 0; c < cn; c++ )
915 UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
916 for( k = 0; k < nx*cn; k += cn )
917 UPDATE_ACC01( src_bottom[k+c], c, ++ );
922 UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx)*(m/2+1) );
923 for( k = 0; k < nx*cn; k += cn )
924 UPDATE_ACC01( src_bottom[k+c], c, += m/2+1 );
928 if( src_step1 > 0 && y < size.height-1 ||
929 src_step1 < 0 && size.height-y-1 > 0 )
930 src_bottom += src_step1;
933 for( y = 0; y < size.height; y++, dst_cur += dst_step1 )
936 for( c = 0; c < cn; c++ )
941 int t = s + zone0[c][k];
952 dst_cur[c] = (uchar)k;
955 if( y+1 == size.height )
960 for( k = 0; k < nx; k++ )
963 int q = src_bottom[k];
972 for( k = 0; k < nx*3; k += 3 )
974 UPDATE_ACC01( src_top[k], 0, -- );
975 UPDATE_ACC01( src_top[k+1], 1, -- );
976 UPDATE_ACC01( src_top[k+2], 2, -- );
978 UPDATE_ACC01( src_bottom[k], 0, ++ );
979 UPDATE_ACC01( src_bottom[k+1], 1, ++ );
980 UPDATE_ACC01( src_bottom[k+2], 2, ++ );
986 for( k = 0; k < nx*4; k += 4 )
988 UPDATE_ACC01( src_top[k], 0, -- );
989 UPDATE_ACC01( src_top[k+1], 1, -- );
990 UPDATE_ACC01( src_top[k+2], 2, -- );
991 UPDATE_ACC01( src_top[k+3], 3, -- );
993 UPDATE_ACC01( src_bottom[k], 0, ++ );
994 UPDATE_ACC01( src_bottom[k+1], 1, ++ );
995 UPDATE_ACC01( src_bottom[k+2], 2, ++ );
996 UPDATE_ACC01( src_bottom[k+3], 3, ++ );
1002 for( c = 0; c < cn; c++ )
1004 UPDATE_ACC01( src_top[x0+c], c, -= (m - nx) );
1005 UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
1009 if( src_step1 > 0 && src_bottom + src_step1 < src_max ||
1010 src_step1 < 0 && src_bottom + src_step1 >= src )
1011 src_bottom += src_step1;
1014 src_top += src_step1;
1019 if( src + nx*cn > src_right ) nx--;
1027 /****************************************************************************************\
1029 \****************************************************************************************/
1032 icvBilateralFiltering( const CvMat* src, CvMat* dst,
1033 double sigma_color, double sigma_space )
1036 float* color_weight = 0;
1037 float* space_weight = 0;
1040 CV_FUNCNAME( "icvBilateralFiltering" );
1044 double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1045 double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1046 int cn = CV_MAT_CN(src->type);
1047 int i, j, k, maxk, radius, d;
1048 CvSize size = cvGetMatSize(src);
1050 if( CV_MAT_TYPE(src->type) != CV_8UC1 &&
1051 CV_MAT_TYPE(src->type) != CV_8UC3 ||
1052 !CV_ARE_TYPES_EQ(src, dst) )
1053 CV_ERROR( CV_StsUnsupportedFormat,
1054 "Both source and destination must be 8-bit, single-channel or 3-channel images" );
1056 if( sigma_color <= 0 )
1058 if( sigma_space <= 0 )
1061 radius = cvRound(sigma_space*1.5);
1062 radius = MAX(radius, 1);
1065 CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1066 src->cols + radius*2, src->type ));
1067 CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1068 CV_CALL( color_weight = (float*)cvAlloc(cn*256*sizeof(color_weight[0])));
1069 CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1070 CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1072 // initialize color-related bilateral filter coefficients
1073 for( i = 0; i < 256*cn; i++ )
1074 color_weight[i] = (float)exp(i*i*gauss_color_coeff);
1076 // initialize space-related bilateral filter coefficients
1077 for( i = -radius, maxk = 0; i <= radius; i++ )
1078 for( j = -radius; j <= radius; j++ )
1080 double r = sqrt((double)i*i + (double)j*j);
1083 space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1084 space_ofs[maxk++] = i*temp->step + j*cn;
1087 for( i = 0; i < size.height; i++ )
1089 const uchar* sptr = temp->data.ptr + (i+radius)*temp->step + radius*cn;
1090 uchar* dptr = dst->data.ptr + i*dst->step;
1094 for( j = 0; j < size.width; j++ )
1096 float sum = 0, wsum = 0;
1098 for( k = 0; k < maxk; k++ )
1100 int val = sptr[j + space_ofs[k]];
1101 float w = space_weight[k]*color_weight[abs(val - val0)];
1105 // overflow is not possible here => there is no need to use CV_CAST_8U
1106 dptr[j] = (uchar)cvRound(sum/wsum);
1112 for( j = 0; j < size.width*3; j += 3 )
1114 float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1115 int b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1116 for( k = 0; k < maxk; k++ )
1118 const uchar* sptr_k = sptr + j + space_ofs[k];
1119 int b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1120 float w = space_weight[k]*color_weight[abs(b - b0) +
1121 abs(g - g0) + abs(r - r0)];
1122 sum_b += b*w; sum_g += g*w; sum_r += r*w;
1126 b0 = cvRound(sum_b*wsum);
1127 g0 = cvRound(sum_g*wsum);
1128 r0 = cvRound(sum_r*wsum);
1129 dptr[j] = (uchar)b0; dptr[j+1] = (uchar)g0; dptr[j+2] = (uchar)r0;
1136 cvReleaseMat( &temp );
1137 cvFree( &color_weight );
1138 cvFree( &space_weight );
1139 cvFree( &space_ofs );
1142 //////////////////////////////// IPP smoothing functions /////////////////////////////////
1144 icvFilterMedian_8u_C1R_t icvFilterMedian_8u_C1R_p = 0;
1145 icvFilterMedian_8u_C3R_t icvFilterMedian_8u_C3R_p = 0;
1146 icvFilterMedian_8u_C4R_t icvFilterMedian_8u_C4R_p = 0;
1148 icvFilterBox_8u_C1R_t icvFilterBox_8u_C1R_p = 0;
1149 icvFilterBox_8u_C3R_t icvFilterBox_8u_C3R_p = 0;
1150 icvFilterBox_8u_C4R_t icvFilterBox_8u_C4R_p = 0;
1151 icvFilterBox_32f_C1R_t icvFilterBox_32f_C1R_p = 0;
1152 icvFilterBox_32f_C3R_t icvFilterBox_32f_C3R_p = 0;
1153 icvFilterBox_32f_C4R_t icvFilterBox_32f_C4R_p = 0;
1155 typedef CvStatus (CV_STDCALL * CvSmoothFixedIPPFunc)
1156 ( const void* src, int srcstep, void* dst, int dststep,
1157 CvSize size, CvSize ksize, CvPoint anchor );
1159 //////////////////////////////////////////////////////////////////////////////////////////
1162 cvSmooth( const void* srcarr, void* dstarr, int smooth_type,
1163 int param1, int param2, double param3, double param4 )
1165 CvBoxFilter box_filter;
1166 CvSepFilter gaussian_filter;
1170 CV_FUNCNAME( "cvSmooth" );
1174 int coi1 = 0, coi2 = 0;
1175 CvMat srcstub, *src = (CvMat*)srcarr;
1176 CvMat dststub, *dst = (CvMat*)dstarr;
1178 int src_type, dst_type, depth, cn;
1179 double sigma1 = 0, sigma2 = 0;
1180 bool have_ipp = icvFilterMedian_8u_C1R_p != 0;
1182 CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
1183 CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
1185 if( coi1 != 0 || coi2 != 0 )
1186 CV_ERROR( CV_BadCOI, "" );
1188 src_type = CV_MAT_TYPE( src->type );
1189 dst_type = CV_MAT_TYPE( dst->type );
1190 depth = CV_MAT_DEPTH(src_type);
1191 cn = CV_MAT_CN(src_type);
1192 size = cvGetMatSize(src);
1194 if( !CV_ARE_SIZES_EQ( src, dst ))
1195 CV_ERROR( CV_StsUnmatchedSizes, "" );
1197 if( smooth_type != CV_BLUR_NO_SCALE && !CV_ARE_TYPES_EQ( src, dst ))
1198 CV_ERROR( CV_StsUnmatchedFormats,
1199 "The specified smoothing algorithm requires input and ouput arrays be of the same type" );
1201 if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ||
1202 smooth_type == CV_GAUSSIAN || smooth_type == CV_MEDIAN )
1204 // automatic detection of kernel size from sigma
1205 if( smooth_type == CV_GAUSSIAN )
1208 sigma2 = param4 ? param4 : param3;
1210 if( param1 == 0 && sigma1 > 0 )
1211 param1 = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1212 if( param2 == 0 && sigma2 > 0 )
1213 param2 = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1217 param2 = size.height == 1 ? 1 : param1;
1218 if( param1 < 1 || (param1 & 1) == 0 || param2 < 1 || (param2 & 1) == 0 )
1219 CV_ERROR( CV_StsOutOfRange,
1220 "Both mask width and height must be >=1 and odd" );
1222 if( param1 == 1 && param2 == 1 )
1224 cvConvert( src, dst );
1229 if( have_ipp && (smooth_type == CV_BLUR || (smooth_type == CV_MEDIAN && param1 <= 15)) &&
1230 size.width >= param1 && size.height >= param2 && param1 > 1 && param2 > 1 )
1232 CvSmoothFixedIPPFunc ipp_median_box_func = 0;
1234 if( smooth_type == CV_BLUR )
1236 ipp_median_box_func =
1237 src_type == CV_8UC1 ? icvFilterBox_8u_C1R_p :
1238 src_type == CV_8UC3 ? icvFilterBox_8u_C3R_p :
1239 src_type == CV_8UC4 ? icvFilterBox_8u_C4R_p :
1240 src_type == CV_32FC1 ? icvFilterBox_32f_C1R_p :
1241 src_type == CV_32FC3 ? icvFilterBox_32f_C3R_p :
1242 src_type == CV_32FC4 ? icvFilterBox_32f_C4R_p : 0;
1244 else if( smooth_type == CV_MEDIAN )
1246 ipp_median_box_func =
1247 src_type == CV_8UC1 ? icvFilterMedian_8u_C1R_p :
1248 src_type == CV_8UC3 ? icvFilterMedian_8u_C3R_p :
1249 src_type == CV_8UC4 ? icvFilterMedian_8u_C4R_p : 0;
1252 if( ipp_median_box_func )
1254 CvSize el_size = { param1, param2 };
1255 CvPoint el_anchor = { param1/2, param2/2 };
1256 int stripe_size = 1 << 14; // the optimal value may depend on CPU cache,
1257 // overhead of the current IPP code etc.
1258 const uchar* shifted_ptr;
1260 int temp_step, dst_step = dst->step;
1262 CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
1264 shifted_ptr = temp->data.ptr +
1265 el_anchor.y*temp->step + el_anchor.x*CV_ELEM_SIZE(src_type);
1266 temp_step = temp->step ? temp->step : CV_STUB_STEP;
1268 for( y = 0; y < src->rows; y += dy )
1270 dy = icvIPPFilterNextStripe( src, temp, y, el_size, el_anchor );
1271 IPPI_CALL( ipp_median_box_func( shifted_ptr, temp_step,
1272 dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
1273 el_size, el_anchor ));
1279 if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE )
1281 CV_CALL( box_filter.init( src->cols, src_type, dst_type,
1282 smooth_type == CV_BLUR, cvSize(param1, param2) ));
1283 CV_CALL( box_filter.process( src, dst ));
1285 else if( smooth_type == CV_MEDIAN )
1287 int img_size_mp = size.width*size.height;
1288 img_size_mp = (img_size_mp + (1<<19)) >> 20;
1290 if( depth != CV_8U || cn != 1 && cn != 3 && cn != 4 )
1291 CV_ERROR( CV_StsUnsupportedFormat,
1292 "Median filter only supports 8uC1, 8uC3 and 8uC4 images" );
1294 if( size.width < param1*2 || size.height < param1*2 ||
1295 param1 <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*(MEDIAN_HAVE_SIMD ? 1 : 3))
1297 // Special case optimized for 3x3
1298 IPPI_CALL( icvMedianBlur_8u_CnR_Om( src->data.ptr, src->step,
1299 dst->data.ptr, dst->step, size, param1, cn ));
1303 const int r = (param1 - 1) / 2;
1304 const int CACHE_SIZE = (int) ( 0.95 * 256 * 1024 / cn ); // assume a 256 kB cache size
1305 const int STRIPES = (int) cvCeil( (double) (size.width - 2*r) /
1306 (CACHE_SIZE / sizeof(Histogram) - 2*r) );
1307 const int STRIPE_SIZE = (int) cvCeil(
1308 (double) ( size.width + STRIPES*2*r - 2*r ) / STRIPES );
1310 for( int i = 0; i < size.width; i += STRIPE_SIZE - 2*r )
1312 int stripe = STRIPE_SIZE;
1313 // Make sure that the filter kernel fits into one stripe.
1314 if( i + STRIPE_SIZE - 2*r >= size.width ||
1315 size.width - (i + STRIPE_SIZE - 2*r) < 2*r+1 )
1316 stripe = size.width - i;
1318 IPPI_CALL( icvMedianBlur_8u_CnR_O1( src->data.ptr + cn*i, src->step,
1319 dst->data.ptr + cn*i, dst->step, cvSize(stripe, size.height),
1320 param1, cn, i == 0, stripe == size.width - i ));
1322 if( stripe == size.width - i )
1327 else if( smooth_type == CV_GAUSSIAN )
1329 CvSize ksize = { param1, param2 };
1330 float* kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) );
1331 float* ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) );
1332 CvMat KX = cvMat( 1, ksize.width, CV_32F, kx );
1333 CvMat KY = cvMat( 1, ksize.height, CV_32F, ky );
1335 CvSepFilter::init_gaussian_kernel( &KX, sigma1 );
1336 if( ksize.width != ksize.height || fabs(sigma1 - sigma2) > FLT_EPSILON )
1337 CvSepFilter::init_gaussian_kernel( &KY, sigma2 );
1341 if( have_ipp && size.width >= param1*3 &&
1342 size.height >= param2 && param1 > 1 && param2 > 1 )
1345 CV_CALL( done = icvIPPSepFilter( src, dst, &KX, &KY,
1346 cvPoint(ksize.width/2,ksize.height/2)));
1351 CV_CALL( gaussian_filter.init( src->cols, src_type, dst_type, &KX, &KY ));
1352 CV_CALL( gaussian_filter.process( src, dst ));
1354 else if( smooth_type == CV_BILATERAL )
1355 CV_CALL( icvBilateralFiltering( src, dst, param1, param2 ));
1359 cvReleaseMat( &temp );