git.maemo.org Git - opencv/blob - cv/src/cvsmooth.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "_cv.h"
  43
  44 /*
  45  * This file includes the code, contributed by Simon Perreault
  46  * (the function icvMedianBlur_8u_CnR_O1)
  47  *
  48  * Constant-time median filtering -- http://nomis80.org/ctmf.html
  49  * Copyright (C) 2006 Simon Perreault
  50  *
  51  * Contact:
  52  *  Laboratoire de vision et systemes numeriques
  53  *  Pavillon Adrien-Pouliot
  54  *  Universite Laval
  55  *  Sainte-Foy, Quebec, Canada
  56  *  G1K 7P4
  57  *
  58  *  perreaul@gel.ulaval.ca
  59  */
  60
  61 // uncomment the line below to force SSE2 mode
  62 //#define CV_SSE2 1
  63
  64 /****************************************************************************************\
  65                                          Box Filter
  66 \****************************************************************************************/
  67
  68 static void icvSumRow_8u32s( const uchar* src0, int* dst, void* params );
  69 static void icvSumRow_32f64f( const float* src0, double* dst, void* params );
  70 static void icvSumCol_32s8u( const int** src, uchar* dst, int dst_step,
  71                              int count, void* params );
  72 static void icvSumCol_32s16s( const int** src, short* dst, int dst_step,
  73                              int count, void* params );
  74 static void icvSumCol_32s32s( const int** src, int* dst, int dst_step,
  75                              int count, void* params );
  76 static void icvSumCol_64f32f( const double** src, float* dst, int dst_step,
  77                               int count, void* params );
  78
  79 CvBoxFilter::CvBoxFilter()
  80 {
  81     min_depth = CV_32S;
  82     sum = 0;
  83     sum_count = 0;
  84     normalized = false;
  85 }
  86
  87
  88 CvBoxFilter::CvBoxFilter( int _max_width, int _src_type, int _dst_type,
  89                           bool _normalized, CvSize _ksize,
  90                           CvPoint _anchor, int _border_mode,
  91                           CvScalar _border_value )
  92 {
  93     min_depth = CV_32S;
  94     sum = 0;
  95     sum_count = 0;
  96     normalized = false;
  97     init( _max_width, _src_type, _dst_type, _normalized,
  98           _ksize, _anchor, _border_mode, _border_value );
  99 }
 100
 101
 102 CvBoxFilter::~CvBoxFilter()
 103 {
 104     clear();
 105 }
 106
 107
 108 void CvBoxFilter::init( int _max_width, int _src_type, int _dst_type,
 109                         bool _normalized, CvSize _ksize,
 110                         CvPoint _anchor, int _border_mode,
 111                         CvScalar _border_value )
 112 {
 113     CV_FUNCNAME( "CvBoxFilter::init" );
 114
 115     __BEGIN__;
 116
 117     sum = 0;
 118     normalized = _normalized;
 119
 120     if( normalized && CV_MAT_TYPE(_src_type) != CV_MAT_TYPE(_dst_type) ||
 121         !normalized && CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type))
 122         CV_ERROR( CV_StsUnmatchedFormats,
 123         "In case of normalized box filter input and output must have the same type.\n"
 124         "In case of unnormalized box filter the number of input and output channels must be the same" );
 125
 126     min_depth = CV_MAT_DEPTH(_src_type) == CV_8U ? CV_32S : CV_64F;
 127
 128     CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize,
 129                              _anchor, _border_mode, _border_value );
 130
 131     scale = normalized ? 1./(ksize.width*ksize.height) : 1;
 132
 133     if( CV_MAT_DEPTH(src_type) == CV_8U )
 134         x_func = (CvRowFilterFunc)icvSumRow_8u32s;
 135     else if( CV_MAT_DEPTH(src_type) == CV_32F )
 136         x_func = (CvRowFilterFunc)icvSumRow_32f64f;
 137     else
 138         CV_ERROR( CV_StsUnsupportedFormat, "Unknown/unsupported input image format" );
 139
 140     if( CV_MAT_DEPTH(dst_type) == CV_8U )
 141     {
 142         if( !normalized )
 143             CV_ERROR( CV_StsBadArg, "Only normalized box filter can be used for 8u->8u transformation" );
 144         y_func = (CvColumnFilterFunc)icvSumCol_32s8u;
 145     }
 146     else if( CV_MAT_DEPTH(dst_type) == CV_16S )
 147     {
 148         if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
 149             CV_ERROR( CV_StsBadArg, "Only 8u->16s unnormalized box filter is supported in case of 16s output" );
 150         y_func = (CvColumnFilterFunc)icvSumCol_32s16s;
 151     }
 152         else if( CV_MAT_DEPTH(dst_type) == CV_32S )
 153         {
 154                 if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
 155                         CV_ERROR( CV_StsBadArg, "Only 8u->32s unnormalized box filter is supported in case of 32s output");
 156
 157                 y_func = (CvColumnFilterFunc)icvSumCol_32s32s;
 158         }
 159     else if( CV_MAT_DEPTH(dst_type) == CV_32F )
 160     {
 161         if( CV_MAT_DEPTH(src_type) != CV_32F )
 162             CV_ERROR( CV_StsBadArg, "Only 32f->32f box filter (normalized or not) is supported in case of 32f output" );
 163         y_func = (CvColumnFilterFunc)icvSumCol_64f32f;
 164     }
 165         else{
 166                 CV_ERROR( CV_StsBadArg, "Unknown/unsupported destination image format" );
 167         }
 168
 169     __END__;
 170 }
 171
 172
 173 void CvBoxFilter::start_process( CvSlice x_range, int width )
 174 {
 175     CvBaseImageFilter::start_process( x_range, width );
 176     int i, psz = CV_ELEM_SIZE(work_type);
 177     uchar* s;
 178     buf_end -= buf_step;
 179     buf_max_count--;
 180     assert( buf_max_count >= max_ky*2 + 1 );
 181     s = sum = buf_end + cvAlign((width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN);
 182     sum_count = 0;
 183
 184     width *= psz;
 185     for( i = 0; i < width; i++ )
 186         s[i] = (uchar)0;
 187 }
 188
 189
 190 static void
 191 icvSumRow_8u32s( const uchar* src, int* dst, void* params )
 192 {
 193     const CvBoxFilter* state = (const CvBoxFilter*)params;
 194     int ksize = state->get_kernel_size().width;
 195     int width = state->get_width();
 196     int cn = CV_MAT_CN(state->get_src_type());
 197     int i, k;
 198
 199     width = (width - 1)*cn; ksize *= cn;
 200
 201     for( k = 0; k < cn; k++, src++, dst++ )
 202     {
 203         int s = 0;
 204         for( i = 0; i < ksize; i += cn )
 205             s += src[i];
 206         dst[0] = s;
 207         for( i = 0; i < width; i += cn )
 208         {
 209             s += src[i+ksize] - src[i];
 210             dst[i+cn] = s;
 211         }
 212     }
 213 }
 214
 215
 216 static void
 217 icvSumRow_32f64f( const float* src, double* dst, void* params )
 218 {
 219     const CvBoxFilter* state = (const CvBoxFilter*)params;
 220     int ksize = state->get_kernel_size().width;
 221     int width = state->get_width();
 222     int cn = CV_MAT_CN(state->get_src_type());
 223     int i, k;
 224
 225     width = (width - 1)*cn; ksize *= cn;
 226
 227     for( k = 0; k < cn; k++, src++, dst++ )
 228     {
 229         double s = 0;
 230         for( i = 0; i < ksize; i += cn )
 231             s += src[i];
 232         dst[0] = s;
 233         for( i = 0; i < width; i += cn )
 234         {
 235             s += (double)src[i+ksize] - src[i];
 236             dst[i+cn] = s;
 237         }
 238     }
 239 }
 240
 241
 242 static void
 243 icvSumCol_32s8u( const int** src, uchar* dst,
 244                  int dst_step, int count, void* params )
 245 {
 246 #define BLUR_SHIFT 24
 247     CvBoxFilter* state = (CvBoxFilter*)params;
 248     int ksize = state->get_kernel_size().height;
 249     int i, width = state->get_width();
 250     int cn = CV_MAT_CN(state->get_src_type());
 251     double scale = state->get_scale();
 252     int iscale = cvFloor(scale*(1 << BLUR_SHIFT));
 253     int* sum = (int*)state->get_sum_buf();
 254     int* _sum_count = state->get_sum_count_ptr();
 255     int sum_count = *_sum_count;
 256
 257     width *= cn;
 258     src += sum_count;
 259     count += ksize - 1 - sum_count;
 260
 261     for( ; count--; src++ )
 262     {
 263         const int* sp = src[0];
 264         if( sum_count+1 < ksize )
 265         {
 266             for( i = 0; i <= width - 2; i += 2 )
 267             {
 268                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 269                 sum[i] = s0; sum[i+1] = s1;
 270             }
 271
 272             for( ; i < width; i++ )
 273                 sum[i] += sp[i];
 274
 275             sum_count++;
 276         }
 277         else
 278         {
 279             const int* sm = src[-ksize+1];
 280             for( i = 0; i <= width - 2; i += 2 )
 281             {
 282                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 283                 int t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT), t1 = CV_DESCALE(s1*iscale, BLUR_SHIFT);
 284                 s0 -= sm[i]; s1 -= sm[i+1];
 285                 sum[i] = s0; sum[i+1] = s1;
 286                 dst[i] = (uchar)t0; dst[i+1] = (uchar)t1;
 287             }
 288
 289             for( ; i < width; i++ )
 290             {
 291                 int s0 = sum[i] + sp[i], t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT);
 292                 sum[i] = s0 - sm[i]; dst[i] = (uchar)t0;
 293             }
 294             dst += dst_step;
 295         }
 296     }
 297
 298     *_sum_count = sum_count;
 299 #undef BLUR_SHIFT
 300 }
 301
 302
 303 static void
 304 icvSumCol_32s16s( const int** src, short* dst,
 305                   int dst_step, int count, void* params )
 306 {
 307     CvBoxFilter* state = (CvBoxFilter*)params;
 308     int ksize = state->get_kernel_size().height;
 309     int ktotal = ksize*state->get_kernel_size().width;
 310     int i, width = state->get_width();
 311     int cn = CV_MAT_CN(state->get_src_type());
 312     int* sum = (int*)state->get_sum_buf();
 313     int* _sum_count = state->get_sum_count_ptr();
 314     int sum_count = *_sum_count;
 315
 316     dst_step /= sizeof(dst[0]);
 317     width *= cn;
 318     src += sum_count;
 319     count += ksize - 1 - sum_count;
 320
 321     for( ; count--; src++ )
 322     {
 323         const int* sp = src[0];
 324         if( sum_count+1 < ksize )
 325         {
 326             for( i = 0; i <= width - 2; i += 2 )
 327             {
 328                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 329                 sum[i] = s0; sum[i+1] = s1;
 330             }
 331
 332             for( ; i < width; i++ )
 333                 sum[i] += sp[i];
 334
 335             sum_count++;
 336         }
 337         else if( ktotal < 128 )
 338         {
 339             const int* sm = src[-ksize+1];
 340             for( i = 0; i <= width - 2; i += 2 )
 341             {
 342                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 343                 dst[i] = (short)s0; dst[i+1] = (short)s1;
 344                 s0 -= sm[i]; s1 -= sm[i+1];
 345                 sum[i] = s0; sum[i+1] = s1;
 346             }
 347
 348             for( ; i < width; i++ )
 349             {
 350                 int s0 = sum[i] + sp[i];
 351                 dst[i] = (short)s0;
 352                 sum[i] = s0 - sm[i];
 353             }
 354             dst += dst_step;
 355         }
 356         else
 357         {
 358             const int* sm = src[-ksize+1];
 359             for( i = 0; i <= width - 2; i += 2 )
 360             {
 361                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 362                 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
 363                 s0 -= sm[i]; s1 -= sm[i+1];
 364                 sum[i] = s0; sum[i+1] = s1;
 365             }
 366
 367             for( ; i < width; i++ )
 368             {
 369                 int s0 = sum[i] + sp[i];
 370                 dst[i] = CV_CAST_16S(s0);
 371                 sum[i] = s0 - sm[i];
 372             }
 373             dst += dst_step;
 374         }
 375     }
 376
 377     *_sum_count = sum_count;
 378 }
 379
 380 static void
 381 icvSumCol_32s32s( const int** src, int * dst,
 382                   int dst_step, int count, void* params )
 383 {
 384     CvBoxFilter* state = (CvBoxFilter*)params;
 385     int ksize = state->get_kernel_size().height;
 386     int i, width = state->get_width();
 387     int cn = CV_MAT_CN(state->get_src_type());
 388     int* sum = (int*)state->get_sum_buf();
 389     int* _sum_count = state->get_sum_count_ptr();
 390     int sum_count = *_sum_count;
 391
 392     dst_step /= sizeof(dst[0]);
 393     width *= cn;
 394     src += sum_count;
 395     count += ksize - 1 - sum_count;
 396
 397     for( ; count--; src++ )
 398     {
 399         const int* sp = src[0];
 400         if( sum_count+1 < ksize )
 401         {
 402             for( i = 0; i <= width - 2; i += 2 )
 403             {
 404                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 405                 sum[i] = s0; sum[i+1] = s1;
 406             }
 407
 408             for( ; i < width; i++ )
 409                 sum[i] += sp[i];
 410
 411             sum_count++;
 412         }
 413         else
 414         {
 415             const int* sm = src[-ksize+1];
 416             for( i = 0; i <= width - 2; i += 2 )
 417             {
 418                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 419                 dst[i] = s0; dst[i+1] = s1;
 420                 s0 -= sm[i]; s1 -= sm[i+1];
 421                 sum[i] = s0; sum[i+1] = s1;
 422             }
 423
 424             for( ; i < width; i++ )
 425             {
 426                 int s0 = sum[i] + sp[i];
 427                 dst[i] = s0;
 428                 sum[i] = s0 - sm[i];
 429             }
 430             dst += dst_step;
 431         }
 432     }
 433
 434     *_sum_count = sum_count;
 435 }
 436
 437
 438 static void
 439 icvSumCol_64f32f( const double** src, float* dst,
 440                   int dst_step, int count, void* params )
 441 {
 442     CvBoxFilter* state = (CvBoxFilter*)params;
 443     int ksize = state->get_kernel_size().height;
 444     int i, width = state->get_width();
 445     int cn = CV_MAT_CN(state->get_src_type());
 446     double scale = state->get_scale();
 447     bool normalized = state->is_normalized();
 448     double* sum = (double*)state->get_sum_buf();
 449     int* _sum_count = state->get_sum_count_ptr();
 450     int sum_count = *_sum_count;
 451
 452     dst_step /= sizeof(dst[0]);
 453     width *= cn;
 454     src += sum_count;
 455     count += ksize - 1 - sum_count;
 456
 457     for( ; count--; src++ )
 458     {
 459         const double* sp = src[0];
 460         if( sum_count+1 < ksize )
 461         {
 462             for( i = 0; i <= width - 2; i += 2 )
 463             {
 464                 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 465                 sum[i] = s0; sum[i+1] = s1;
 466             }
 467
 468             for( ; i < width; i++ )
 469                 sum[i] += sp[i];
 470
 471             sum_count++;
 472         }
 473         else
 474         {
 475             const double* sm = src[-ksize+1];
 476             if( normalized )
 477                 for( i = 0; i <= width - 2; i += 2 )
 478                 {
 479                     double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 480                     double t0 = s0*scale, t1 = s1*scale;
 481                     s0 -= sm[i]; s1 -= sm[i+1];
 482                     dst[i] = (float)t0; dst[i+1] = (float)t1;
 483                     sum[i] = s0; sum[i+1] = s1;
 484                 }
 485             else
 486                 for( i = 0; i <= width - 2; i += 2 )
 487                 {
 488                     double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 489                     dst[i] = (float)s0; dst[i+1] = (float)s1;
 490                     s0 -= sm[i]; s1 -= sm[i+1];
 491                     sum[i] = s0; sum[i+1] = s1;
 492                 }
 493
 494             for( ; i < width; i++ )
 495             {
 496                 double s0 = sum[i] + sp[i], t0 = s0*scale;
 497                 sum[i] = s0 - sm[i]; dst[i] = (float)t0;
 498             }
 499             dst += dst_step;
 500         }
 501     }
 502
 503     *_sum_count = sum_count;
 504 }
 505
 506
 507 /****************************************************************************************\
 508                                       Median Filter
 509 \****************************************************************************************/
 510
 511 #define CV_MINMAX_8U(a,b) \
 512     (t = CV_FAST_CAST_8U((a) - (b)), (b) += t, a -= t)
 513
 514 #if CV_SSE2 && !defined __SSE2__
 515 #define __SSE2__ 1
 516 #include "emmintrin.h"
 517 #endif
 518
 519 #if defined(__VEC__) || defined(__ALTIVEC__)
 520 #include <altivec.h>
 521 #undef bool
 522 #endif
 523
 524 #if defined(__GNUC__)
 525 #define align(x) __attribute__ ((aligned (x)))
 526 #elif CV_SSE2 && (defined(__ICL) || (_MSC_VER >= 1300))
 527 #define align(x) __declspec(align(x))
 528 #else
 529 #define align(x)
 530 #endif
 531
 532 #if _MSC_VER >= 1200
 533 #pragma warning( disable: 4244 )
 534 #endif
 535
 536 /**
 537  * This structure represents a two-tier histogram. The first tier (known as the
 538  * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level)
 539  * is 8 bit wide. Pixels inserted in the fine level also get inserted into the
 540  * coarse bucket designated by the 4 MSBs of the fine bucket value.
 541  *
 542  * The structure is aligned on 16 bits, which is a prerequisite for SIMD
 543  * instructions. Each bucket is 16 bit wide, which means that extra care must be
 544  * taken to prevent overflow.
 545  */
 546 typedef struct align(16)
 547 {
 548     ushort coarse[16];
 549     ushort fine[16][16];
 550 } Histogram;
 551
 552 /**
 553  * HOP is short for Histogram OPeration. This macro makes an operation \a op on
 554  * histogram \a h for pixel value \a x. It takes care of handling both levels.
 555  */
 556 #define HOP(h,x,op) \
 557     h.coarse[x>>4] op; \
 558     *((ushort*) h.fine + x) op;
 559
 560 #define COP(c,j,x,op) \
 561     h_coarse[ 16*(n*c+j) + (x>>4) ] op; \
 562     h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op;
 563
 564 #if defined __SSE2__ || defined __MMX__ || defined __ALTIVEC__
 565 #define MEDIAN_HAVE_SIMD 1
 566 #else
 567 #define MEDIAN_HAVE_SIMD 0
 568 #endif
 569
 570 /**
 571  * Adds histograms \a x and \a y and stores the result in \a y. Makes use of
 572  * SSE2, MMX or Altivec, if available.
 573  */
 574 #if defined(__SSE2__)
 575 static inline void histogram_add( const ushort x[16], ushort y[16] )
 576 {
 577     _mm_store_si128( (__m128i*) &y[0], _mm_add_epi16(
 578         _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
 579     _mm_store_si128( (__m128i*) &y[8], _mm_add_epi16(
 580         _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
 581 }
 582 #elif defined(__MMX__)
 583 static inline void histogram_add( const ushort x[16], ushort y[16] )
 584 {
 585     *(__m64*) &y[0]  = _mm_add_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
 586     *(__m64*) &y[4]  = _mm_add_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
 587     *(__m64*) &y[8]  = _mm_add_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
 588     *(__m64*) &y[12] = _mm_add_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
 589 }
 590 #elif defined(__ALTIVEC__)
 591 static inline void histogram_add( const ushort x[16], ushort y[16] )
 592 {
 593     *(vector ushort*) &y[0] = vec_add( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
 594     *(vector ushort*) &y[8] = vec_add( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
 595 }
 596 #else
 597 static inline void histogram_add( const ushort x[16], ushort y[16] )
 598 {
 599     int i;
 600     for( i = 0; i < 16; ++i )
 601         y[i] = (ushort)(y[i] + x[i]);
 602 }
 603 #endif
 604
 605 /**
 606  * Subtracts histogram \a x from \a y and stores the result in \a y. Makes use
 607  * of SSE2, MMX or Altivec, if available.
 608  */
 609 #if defined(__SSE2__)
 610 static inline void histogram_sub( const ushort x[16], ushort y[16] )
 611 {
 612     _mm_store_si128( (__m128i*) &y[0], _mm_sub_epi16(
 613         _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
 614     _mm_store_si128( (__m128i*) &y[8], _mm_sub_epi16(
 615         _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
 616 }
 617 #elif defined(__MMX__)
 618 static inline void histogram_sub( const ushort x[16], ushort y[16] )
 619 {
 620     *(__m64*) &y[0]  = _mm_sub_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
 621     *(__m64*) &y[4]  = _mm_sub_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
 622     *(__m64*) &y[8]  = _mm_sub_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
 623     *(__m64*) &y[12] = _mm_sub_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
 624 }
 625 #elif defined(__ALTIVEC__)
 626 static inline void histogram_sub( const ushort x[16], ushort y[16] )
 627 {
 628     *(vector ushort*) &y[0] = vec_sub( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
 629     *(vector ushort*) &y[8] = vec_sub( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
 630 }
 631 #else
 632 static inline void histogram_sub( const ushort x[16], ushort y[16] )
 633 {
 634     int i;
 635     for( i = 0; i < 16; ++i )
 636         y[i] = (ushort)(y[i] - x[i]);
 637 }
 638 #endif
 639
 640 static inline void histogram_muladd( int a, const ushort x[16],
 641         ushort y[16] )
 642 {
 643     int i;
 644     for ( i = 0; i < 16; ++i )
 645         y[i] = (ushort)(y[i] + a * x[i]);
 646 }
 647
 648 static CvStatus CV_STDCALL
 649 icvMedianBlur_8u_CnR_O1( uchar* src, int src_step, uchar* dst, int dst_step,
 650                          CvSize size, int kernel_size, int cn, int pad_left, int pad_right )
 651 {
 652     int r = (kernel_size-1)/2;
 653     const int m = size.height, n = size.width;
 654     int i, j, k, c;
 655     const unsigned char *p, *q;
 656     Histogram H[4];
 657     ushort *h_coarse, *h_fine, luc[4][16];
 658
 659     if( size.height < r || size.width < r )
 660         return CV_BADSIZE_ERR;
 661
 662     assert( src );
 663     assert( dst );
 664     assert( r >= 0 );
 665     assert( size.width >= 2*r+1 );
 666     assert( size.height >= 2*r+1 );
 667     assert( src_step != 0 );
 668     assert( dst_step != 0 );
 669
 670     h_coarse = (ushort*) cvAlloc(  1 * 16 * n * cn * sizeof(ushort) );
 671     h_fine   = (ushort*) cvAlloc( 16 * 16 * n * cn * sizeof(ushort) );
 672     memset( h_coarse, 0,  1 * 16 * n * cn * sizeof(ushort) );
 673     memset( h_fine,   0, 16 * 16 * n * cn * sizeof(ushort) );
 674
 675     /* First row initialization */
 676     for ( j = 0; j < n; ++j ) {
 677         for ( c = 0; c < cn; ++c ) {
 678             COP( c, j, src[cn*j+c], += r+1 );
 679         }
 680     }
 681     for ( i = 0; i < r; ++i ) {
 682         for ( j = 0; j < n; ++j ) {
 683             for ( c = 0; c < cn; ++c ) {
 684                 COP( c, j, src[src_step*i+cn*j+c], ++ );
 685             }
 686         }
 687     }
 688
 689     for ( i = 0; i < m; ++i ) {
 690
 691         /* Update column histograms for entire row. */
 692         p = src + src_step * MAX( 0, i-r-1 );
 693         q = p + cn * n;
 694         for ( j = 0; p != q; ++j ) {
 695             for ( c = 0; c < cn; ++c, ++p ) {
 696                 COP( c, j, *p, -- );
 697             }
 698         }
 699
 700         p = src + src_step * MIN( m-1, i+r );
 701         q = p + cn * n;
 702         for ( j = 0; p != q; ++j ) {
 703             for ( c = 0; c < cn; ++c, ++p ) {
 704                 COP( c, j, *p, ++ );
 705             }
 706         }
 707
 708         /* First column initialization */
 709         memset( H, 0, cn*sizeof(H[0]) );
 710         memset( luc, 0, cn*sizeof(luc[0]) );
 711         if ( pad_left ) {
 712             for ( c = 0; c < cn; ++c ) {
 713                 histogram_muladd( r, &h_coarse[16*n*c], H[c].coarse );
 714             }
 715         }
 716         for ( j = 0; j < (pad_left ? r : 2*r); ++j ) {
 717             for ( c = 0; c < cn; ++c ) {
 718                 histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse );
 719             }
 720         }
 721         for ( c = 0; c < cn; ++c ) {
 722             for ( k = 0; k < 16; ++k ) {
 723                 histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] );
 724             }
 725         }
 726
 727         for ( j = pad_left ? 0 : r; j < (pad_right ? n : n-r); ++j ) {
 728             for ( c = 0; c < cn; ++c ) {
 729                 int t = 2*r*r + 2*r, b, sum = 0;
 730                 ushort* segment;
 731
 732                 histogram_add( &h_coarse[16*(n*c + MIN(j+r,n-1))], H[c].coarse );
 733
 734                 /* Find median at coarse level */
 735                 for ( k = 0; k < 16 ; ++k ) {
 736                     sum += H[c].coarse[k];
 737                     if ( sum > t ) {
 738                         sum -= H[c].coarse[k];
 739                         break;
 740                     }
 741                 }
 742                 assert( k < 16 );
 743
 744                 /* Update corresponding histogram segment */
 745                 if ( luc[c][k] <= j-r ) {
 746                     memset( &H[c].fine[k], 0, 16 * sizeof(ushort) );
 747                     for ( luc[c][k] = j-r; luc[c][k] < MIN(j+r+1,n); ++luc[c][k] ) {
 748                         histogram_add( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] );
 749                     }
 750                     if ( luc[c][k] < j+r+1 ) {
 751                         histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] );
 752                         luc[c][k] = (ushort)(j+r+1);
 753                     }
 754                 }
 755                 else {
 756                     for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) {
 757                         histogram_sub( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] );
 758                         histogram_add( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] );
 759                     }
 760                 }
 761
 762                 histogram_sub( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse );
 763
 764                 /* Find median in segment */
 765                 segment = H[c].fine[k];
 766                 for ( b = 0; b < 16 ; ++b ) {
 767                     sum += segment[b];
 768                     if ( sum > t ) {
 769                         dst[dst_step*i+cn*j+c] = (uchar)(16*k + b);
 770                         break;
 771                     }
 772                 }
 773                 assert( b < 16 );
 774             }
 775         }
 776     }
 777
 778 #if defined(__MMX__)
 779     _mm_empty();
 780 #endif
 781
 782     cvFree(&h_coarse);
 783     cvFree(&h_fine);
 784
 785 #undef HOP
 786 #undef COP
 787     return CV_OK;
 788 }
 789
 790
 791 #if _MSC_VER >= 1200
 792 #pragma warning( default: 4244 )
 793 #endif
 794
 795
 796 static CvStatus CV_STDCALL
 797 icvMedianBlur_8u_CnR_Om( uchar* src, int src_step, uchar* dst, int dst_step,
 798                          CvSize size, int m, int cn )
 799 {
 800     #define N  16
 801     int     zone0[4][N];
 802     int     zone1[4][N*N];
 803     int     x, y;
 804     int     n2 = m*m/2;
 805     int     nx = (m + 1)/2 - 1;
 806     uchar*  src_max = src + size.height*src_step;
 807     uchar*  src_right = src + size.width*cn;
 808
 809     #define UPDATE_ACC01( pix, cn, op ) \
 810     {                                   \
 811         int p = (pix);                  \
 812         zone1[cn][p] op;                \
 813         zone0[cn][p >> 4] op;           \
 814     }
 815
 816     if( size.height < nx || size.width < nx )
 817         return CV_BADSIZE_ERR;
 818
 819     if( m == 3 )
 820     {
 821         size.width *= cn;
 822
 823         for( y = 0; y < size.height; y++, dst += dst_step )
 824         {
 825             const uchar* src0 = src + src_step*(y-1);
 826             const uchar* src1 = src0 + src_step;
 827             const uchar* src2 = src1 + src_step;
 828             if( y == 0 )
 829                 src0 = src1;
 830             else if( y == size.height - 1 )
 831                 src2 = src1;
 832
 833             for( x = 0; x < 2*cn; x++ )
 834             {
 835                 int x0 = x < cn ? x : size.width - 3*cn + x;
 836                 int x2 = x < cn ? x + cn : size.width - 2*cn + x;
 837                 int x1 = x < cn ? x0 : x2, t;
 838
 839                 int p0 = src0[x0], p1 = src0[x1], p2 = src0[x2];
 840                 int p3 = src1[x0], p4 = src1[x1], p5 = src1[x2];
 841                 int p6 = src2[x0], p7 = src2[x1], p8 = src2[x2];
 842
 843                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
 844                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
 845                 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
 846                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
 847                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
 848                 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
 849                 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
 850                 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
 851                 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
 852                 CV_MINMAX_8U(p4, p2);
 853                 dst[x1] = (uchar)p4;
 854             }
 855
 856             for( x = cn; x < size.width - cn; x++ )
 857             {
 858                 int p0 = src0[x-cn], p1 = src0[x], p2 = src0[x+cn];
 859                 int p3 = src1[x-cn], p4 = src1[x], p5 = src1[x+cn];
 860                 int p6 = src2[x-cn], p7 = src2[x], p8 = src2[x+cn];
 861                 int t;
 862
 863                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
 864                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
 865                 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
 866                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
 867                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
 868                 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
 869                 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
 870                 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
 871                 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
 872                 CV_MINMAX_8U(p4, p2);
 873
 874                 dst[x] = (uchar)p4;
 875             }
 876         }
 877
 878         return CV_OK;
 879     }
 880
 881     for( x = 0; x < size.width; x++, dst += cn )
 882     {
 883         uchar* dst_cur = dst;
 884         uchar* src_top = src;
 885         uchar* src_bottom = src;
 886         int    k, c;
 887         int    x0 = -1;
 888         int    src_step1 = src_step, dst_step1 = dst_step;
 889
 890         if( x % 2 != 0 )
 891         {
 892             src_bottom = src_top += src_step*(size.height-1);
 893             dst_cur += dst_step*(size.height-1);
 894             src_step1 = -src_step1;
 895             dst_step1 = -dst_step1;
 896         }
 897
 898         if( x <= m/2 )
 899             nx++;
 900
 901         if( nx < m )
 902             x0 = x < m/2 ? 0 : (nx-1)*cn;
 903
 904         // init accumulator
 905         memset( zone0, 0, sizeof(zone0[0])*cn );
 906         memset( zone1, 0, sizeof(zone1[0])*cn );
 907
 908         for( y = 0; y <= m/2; y++ )
 909         {
 910             for( c = 0; c < cn; c++ )
 911             {
 912                 if( y > 0 )
 913                 {
 914                     if( x0 >= 0 )
 915                         UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
 916                     for( k = 0; k < nx*cn; k += cn )
 917                         UPDATE_ACC01( src_bottom[k+c], c, ++ );
 918                 }
 919                 else
 920                 {
 921                     if( x0 >= 0 )
 922                         UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx)*(m/2+1) );
 923                     for( k = 0; k < nx*cn; k += cn )
 924                         UPDATE_ACC01( src_bottom[k+c], c, += m/2+1 );
 925                 }
 926             }
 927
 928             if( src_step1 > 0 && y < size.height-1 ||
 929                 src_step1 < 0 && size.height-y-1 > 0 )
 930                 src_bottom += src_step1;
 931         }
 932
 933         for( y = 0; y < size.height; y++, dst_cur += dst_step1 )
 934         {
 935             // find median
 936             for( c = 0; c < cn; c++ )
 937             {
 938                 int s = 0;
 939                 for( k = 0; ; k++ )
 940                 {
 941                     int t = s + zone0[c][k];
 942                     if( t > n2 ) break;
 943                     s = t;
 944                 }
 945
 946                 for( k *= N; ;k++ )
 947                 {
 948                     s += zone1[c][k];
 949                     if( s > n2 ) break;
 950                 }
 951
 952                 dst_cur[c] = (uchar)k;
 953             }
 954
 955             if( y+1 == size.height )
 956                 break;
 957
 958             if( cn == 1 )
 959             {
 960                 for( k = 0; k < nx; k++ )
 961                 {
 962                     int p = src_top[k];
 963                     int q = src_bottom[k];
 964                     zone1[0][p]--;
 965                     zone0[0][p>>4]--;
 966                     zone1[0][q]++;
 967                     zone0[0][q>>4]++;
 968                 }
 969             }
 970             else if( cn == 3 )
 971             {
 972                 for( k = 0; k < nx*3; k += 3 )
 973                 {
 974                     UPDATE_ACC01( src_top[k], 0, -- );
 975                     UPDATE_ACC01( src_top[k+1], 1, -- );
 976                     UPDATE_ACC01( src_top[k+2], 2, -- );
 977
 978                     UPDATE_ACC01( src_bottom[k], 0, ++ );
 979                     UPDATE_ACC01( src_bottom[k+1], 1, ++ );
 980                     UPDATE_ACC01( src_bottom[k+2], 2, ++ );
 981                 }
 982             }
 983             else
 984             {
 985                 assert( cn == 4 );
 986                 for( k = 0; k < nx*4; k += 4 )
 987                 {
 988                     UPDATE_ACC01( src_top[k], 0, -- );
 989                     UPDATE_ACC01( src_top[k+1], 1, -- );
 990                     UPDATE_ACC01( src_top[k+2], 2, -- );
 991                     UPDATE_ACC01( src_top[k+3], 3, -- );
 992
 993                     UPDATE_ACC01( src_bottom[k], 0, ++ );
 994                     UPDATE_ACC01( src_bottom[k+1], 1, ++ );
 995                     UPDATE_ACC01( src_bottom[k+2], 2, ++ );
 996                     UPDATE_ACC01( src_bottom[k+3], 3, ++ );
 997                 }
 998             }
 999
1000             if( x0 >= 0 )
1001             {
1002                 for( c = 0; c < cn; c++ )
1003                 {
1004                     UPDATE_ACC01( src_top[x0+c], c, -= (m - nx) );
1005                     UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
1006                 }
1007             }
1008
1009             if( src_step1 > 0 && src_bottom + src_step1 < src_max ||
1010                 src_step1 < 0 && src_bottom + src_step1 >= src )
1011                 src_bottom += src_step1;
1012
1013             if( y >= m/2 )
1014                 src_top += src_step1;
1015         }
1016
1017         if( x >= m/2 )
1018             src += cn;
1019         if( src + nx*cn > src_right ) nx--;
1020     }
1021 #undef N
1022 #undef UPDATE_ACC
1023     return CV_OK;
1024 }
1025
1026
1027 /****************************************************************************************\
1028                                    Bilateral Filtering
1029 \****************************************************************************************/
1030
1031 static void
1032 icvBilateralFiltering( const CvMat* src, CvMat* dst,
1033                        double sigma_color, double sigma_space )
1034 {
1035     CvMat* temp = 0;
1036     float* color_weight = 0;
1037     float* space_weight = 0;
1038     int* space_ofs = 0;
1039
1040     CV_FUNCNAME( "icvBilateralFiltering" );
1041
1042     __BEGIN__;
1043
1044     double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1045     double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1046     int cn = CV_MAT_CN(src->type);
1047     int i, j, k, maxk, radius, d;
1048     CvSize size = cvGetMatSize(src);
1049
1050     if( CV_MAT_TYPE(src->type) != CV_8UC1 &&
1051         CV_MAT_TYPE(src->type) != CV_8UC3 ||
1052         !CV_ARE_TYPES_EQ(src, dst) )
1053         CV_ERROR( CV_StsUnsupportedFormat,
1054         "Both source and destination must be 8-bit, single-channel or 3-channel images" );
1055
1056     if( sigma_color <= 0 )
1057         sigma_color = 1;
1058     if( sigma_space <= 0 )
1059         sigma_space = 1;
1060
1061     radius = cvRound(sigma_space*1.5);
1062     radius = MAX(radius, 1);
1063     d = radius*2 + 1;
1064
1065     CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1066         src->cols + radius*2, src->type ));
1067     CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1068     CV_CALL( color_weight = (float*)cvAlloc(cn*256*sizeof(color_weight[0])));
1069     CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1070     CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1071
1072     // initialize color-related bilateral filter coefficients
1073     for( i = 0; i < 256*cn; i++ )
1074         color_weight[i] = (float)exp(i*i*gauss_color_coeff);
1075
1076     // initialize space-related bilateral filter coefficients
1077     for( i = -radius, maxk = 0; i <= radius; i++ )
1078         for( j = -radius; j <= radius; j++ )
1079         {
1080             double r = sqrt((double)i*i + (double)j*j);
1081             if( r > radius )
1082                 continue;
1083             space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1084             space_ofs[maxk++] = i*temp->step + j*cn;
1085         }
1086
1087     for( i = 0; i < size.height; i++ )
1088     {
1089         const uchar* sptr = temp->data.ptr + (i+radius)*temp->step + radius*cn;
1090         uchar* dptr = dst->data.ptr + i*dst->step;
1091
1092         if( cn == 1 )
1093         {
1094             for( j = 0; j < size.width; j++ )
1095             {
1096                 float sum = 0, wsum = 0;
1097                 int val0 = sptr[j];
1098                 for( k = 0; k < maxk; k++ )
1099                 {
1100                     int val = sptr[j + space_ofs[k]];
1101                     float w = space_weight[k]*color_weight[abs(val - val0)];
1102                     sum += val*w;
1103                     wsum += w;
1104                 }
1105                 // overflow is not possible here => there is no need to use CV_CAST_8U
1106                 dptr[j] = (uchar)cvRound(sum/wsum);
1107             }
1108         }
1109         else
1110         {
1111             assert( cn == 3 );
1112             for( j = 0; j < size.width*3; j += 3 )
1113             {
1114                 float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1115                 int b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1116                 for( k = 0; k < maxk; k++ )
1117                 {
1118                     const uchar* sptr_k = sptr + j + space_ofs[k];
1119                     int b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1120                     float w = space_weight[k]*color_weight[abs(b - b0) +
1121                         abs(g - g0) + abs(r - r0)];
1122                     sum_b += b*w; sum_g += g*w; sum_r += r*w;
1123                     wsum += w;
1124                 }
1125                 wsum = 1.f/wsum;
1126                 b0 = cvRound(sum_b*wsum);
1127                 g0 = cvRound(sum_g*wsum);
1128                 r0 = cvRound(sum_r*wsum);
1129                 dptr[j] = (uchar)b0; dptr[j+1] = (uchar)g0; dptr[j+2] = (uchar)r0;
1130             }
1131         }
1132     }
1133
1134     __END__;
1135
1136     cvReleaseMat( &temp );
1137     cvFree( &color_weight );
1138     cvFree( &space_weight );
1139     cvFree( &space_ofs );
1140 }
1141
1142 //////////////////////////////// IPP smoothing functions /////////////////////////////////
1143
1144 icvFilterMedian_8u_C1R_t icvFilterMedian_8u_C1R_p = 0;
1145 icvFilterMedian_8u_C3R_t icvFilterMedian_8u_C3R_p = 0;
1146 icvFilterMedian_8u_C4R_t icvFilterMedian_8u_C4R_p = 0;
1147
1148 icvFilterBox_8u_C1R_t icvFilterBox_8u_C1R_p = 0;
1149 icvFilterBox_8u_C3R_t icvFilterBox_8u_C3R_p = 0;
1150 icvFilterBox_8u_C4R_t icvFilterBox_8u_C4R_p = 0;
1151 icvFilterBox_32f_C1R_t icvFilterBox_32f_C1R_p = 0;
1152 icvFilterBox_32f_C3R_t icvFilterBox_32f_C3R_p = 0;
1153 icvFilterBox_32f_C4R_t icvFilterBox_32f_C4R_p = 0;
1154
1155 typedef CvStatus (CV_STDCALL * CvSmoothFixedIPPFunc)
1156 ( const void* src, int srcstep, void* dst, int dststep,
1157   CvSize size, CvSize ksize, CvPoint anchor );
1158
1159 //////////////////////////////////////////////////////////////////////////////////////////
1160
1161 CV_IMPL void
1162 cvSmooth( const void* srcarr, void* dstarr, int smooth_type,
1163           int param1, int param2, double param3, double param4 )
1164 {
1165     CvBoxFilter box_filter;
1166     CvSepFilter gaussian_filter;
1167
1168     CvMat* temp = 0;
1169
1170     CV_FUNCNAME( "cvSmooth" );
1171
1172     __BEGIN__;
1173
1174     int coi1 = 0, coi2 = 0;
1175     CvMat srcstub, *src = (CvMat*)srcarr;
1176     CvMat dststub, *dst = (CvMat*)dstarr;
1177     CvSize size;
1178     int src_type, dst_type, depth, cn;
1179     double sigma1 = 0, sigma2 = 0;
1180     bool have_ipp = icvFilterMedian_8u_C1R_p != 0;
1181
1182     CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
1183     CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
1184
1185     if( coi1 != 0 || coi2 != 0 )
1186         CV_ERROR( CV_BadCOI, "" );
1187
1188     src_type = CV_MAT_TYPE( src->type );
1189     dst_type = CV_MAT_TYPE( dst->type );
1190     depth = CV_MAT_DEPTH(src_type);
1191     cn = CV_MAT_CN(src_type);
1192     size = cvGetMatSize(src);
1193
1194     if( !CV_ARE_SIZES_EQ( src, dst ))
1195         CV_ERROR( CV_StsUnmatchedSizes, "" );
1196
1197     if( smooth_type != CV_BLUR_NO_SCALE && !CV_ARE_TYPES_EQ( src, dst ))
1198         CV_ERROR( CV_StsUnmatchedFormats,
1199         "The specified smoothing algorithm requires input and ouput arrays be of the same type" );
1200
1201     if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ||
1202         smooth_type == CV_GAUSSIAN || smooth_type == CV_MEDIAN )
1203     {
1204         // automatic detection of kernel size from sigma
1205         if( smooth_type == CV_GAUSSIAN )
1206         {
1207             sigma1 = param3;
1208             sigma2 = param4 ? param4 : param3;
1209
1210             if( param1 == 0 && sigma1 > 0 )
1211                 param1 = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1212             if( param2 == 0 && sigma2 > 0 )
1213                 param2 = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1214         }
1215
1216         if( param2 == 0 )
1217             param2 = size.height == 1 ? 1 : param1;
1218         if( param1 < 1 || (param1 & 1) == 0 || param2 < 1 || (param2 & 1) == 0 )
1219             CV_ERROR( CV_StsOutOfRange,
1220                 "Both mask width and height must be >=1 and odd" );
1221
1222         if( param1 == 1 && param2 == 1 )
1223         {
1224             cvConvert( src, dst );
1225             EXIT;
1226         }
1227     }
1228
1229     if( have_ipp && (smooth_type == CV_BLUR || (smooth_type == CV_MEDIAN && param1 <= 15)) &&
1230         size.width >= param1 && size.height >= param2 && param1 > 1 && param2 > 1 )
1231     {
1232         CvSmoothFixedIPPFunc ipp_median_box_func = 0;
1233
1234         if( smooth_type == CV_BLUR )
1235         {
1236             ipp_median_box_func =
1237                 src_type == CV_8UC1 ? icvFilterBox_8u_C1R_p :
1238                 src_type == CV_8UC3 ? icvFilterBox_8u_C3R_p :
1239                 src_type == CV_8UC4 ? icvFilterBox_8u_C4R_p :
1240                 src_type == CV_32FC1 ? icvFilterBox_32f_C1R_p :
1241                 src_type == CV_32FC3 ? icvFilterBox_32f_C3R_p :
1242                 src_type == CV_32FC4 ? icvFilterBox_32f_C4R_p : 0;
1243         }
1244         else if( smooth_type == CV_MEDIAN )
1245         {
1246             ipp_median_box_func =
1247                 src_type == CV_8UC1 ? icvFilterMedian_8u_C1R_p :
1248                 src_type == CV_8UC3 ? icvFilterMedian_8u_C3R_p :
1249                 src_type == CV_8UC4 ? icvFilterMedian_8u_C4R_p : 0;
1250         }
1251
1252         if( ipp_median_box_func )
1253         {
1254             CvSize el_size = { param1, param2 };
1255             CvPoint el_anchor = { param1/2, param2/2 };
1256             int stripe_size = 1 << 14; // the optimal value may depend on CPU cache,
1257                                        // overhead of the current IPP code etc.
1258             const uchar* shifted_ptr;
1259             int y, dy = 0;
1260             int temp_step, dst_step = dst->step;
1261
1262             CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
1263
1264             shifted_ptr = temp->data.ptr +
1265                 el_anchor.y*temp->step + el_anchor.x*CV_ELEM_SIZE(src_type);
1266             temp_step = temp->step ? temp->step : CV_STUB_STEP;
1267
1268             for( y = 0; y < src->rows; y += dy )
1269             {
1270                 dy = icvIPPFilterNextStripe( src, temp, y, el_size, el_anchor );
1271                 IPPI_CALL( ipp_median_box_func( shifted_ptr, temp_step,
1272                     dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
1273                     el_size, el_anchor ));
1274             }
1275             EXIT;
1276         }
1277     }
1278
1279     if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE )
1280     {
1281         CV_CALL( box_filter.init( src->cols, src_type, dst_type,
1282             smooth_type == CV_BLUR, cvSize(param1, param2) ));
1283         CV_CALL( box_filter.process( src, dst ));
1284     }
1285     else if( smooth_type == CV_MEDIAN )
1286     {
1287         int img_size_mp = size.width*size.height;
1288         img_size_mp = (img_size_mp + (1<<19)) >> 20;
1289
1290         if( depth != CV_8U || cn != 1 && cn != 3 && cn != 4 )
1291             CV_ERROR( CV_StsUnsupportedFormat,
1292             "Median filter only supports 8uC1, 8uC3 and 8uC4 images" );
1293
1294         if( size.width < param1*2 || size.height < param1*2 ||
1295             param1 <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*(MEDIAN_HAVE_SIMD ? 1 : 3))
1296         {
1297             // Special case optimized for 3x3
1298             IPPI_CALL( icvMedianBlur_8u_CnR_Om( src->data.ptr, src->step,
1299                 dst->data.ptr, dst->step, size, param1, cn ));
1300         }
1301         else
1302         {
1303             const int r = (param1 - 1) / 2;
1304             const int CACHE_SIZE = (int) ( 0.95 * 256 * 1024 / cn );  // assume a 256 kB cache size
1305             const int STRIPES = (int) cvCeil( (double) (size.width - 2*r) /
1306                     (CACHE_SIZE / sizeof(Histogram) - 2*r) );
1307             const int STRIPE_SIZE = (int) cvCeil(
1308                     (double) ( size.width + STRIPES*2*r - 2*r ) / STRIPES );
1309
1310             for( int i = 0; i < size.width; i += STRIPE_SIZE - 2*r )
1311             {
1312                 int stripe = STRIPE_SIZE;
1313                 // Make sure that the filter kernel fits into one stripe.
1314                 if( i + STRIPE_SIZE - 2*r >= size.width ||
1315                     size.width - (i + STRIPE_SIZE - 2*r) < 2*r+1 )
1316                     stripe = size.width - i;
1317
1318                 IPPI_CALL( icvMedianBlur_8u_CnR_O1( src->data.ptr + cn*i, src->step,
1319                     dst->data.ptr + cn*i, dst->step, cvSize(stripe, size.height),
1320                     param1, cn, i == 0, stripe == size.width - i ));
1321
1322                 if( stripe == size.width - i )
1323                     break;
1324             }
1325         }
1326     }
1327     else if( smooth_type == CV_GAUSSIAN )
1328     {
1329         CvSize ksize = { param1, param2 };
1330         float* kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) );
1331         float* ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) );
1332         CvMat KX = cvMat( 1, ksize.width, CV_32F, kx );
1333         CvMat KY = cvMat( 1, ksize.height, CV_32F, ky );
1334
1335         CvSepFilter::init_gaussian_kernel( &KX, sigma1 );
1336         if( ksize.width != ksize.height || fabs(sigma1 - sigma2) > FLT_EPSILON )
1337             CvSepFilter::init_gaussian_kernel( &KY, sigma2 );
1338         else
1339             KY.data.fl = kx;
1340
1341         if( have_ipp && size.width >= param1*3 &&
1342             size.height >= param2 && param1 > 1 && param2 > 1 )
1343         {
1344             int done;
1345             CV_CALL( done = icvIPPSepFilter( src, dst, &KX, &KY,
1346                         cvPoint(ksize.width/2,ksize.height/2)));
1347             if( done )
1348                 EXIT;
1349         }
1350
1351         CV_CALL( gaussian_filter.init( src->cols, src_type, dst_type, &KX, &KY ));
1352         CV_CALL( gaussian_filter.process( src, dst ));
1353     }
1354     else if( smooth_type == CV_BILATERAL )
1355         CV_CALL( icvBilateralFiltering( src, dst, param1, param2 ));
1356
1357     __END__;
1358
1359     cvReleaseMat( &temp );
1360 }
1361
1362 /* End of file. */