1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
10 // Intel License Agreement
12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
13 // Third party copyrights are property of their respective owners.
15 // Redistribution and use in source and binary forms, with or without modification,
16 // are permitted provided that the following conditions are met:
18 // * Redistribution's of source code must retain the above copyright notice,
19 // this list of conditions and the following disclaimer.
21 // * Redistribution's in binary form must reproduce the above copyright notice,
22 // this list of conditions and the following disclaimer in the documentation
23 // and/or other materials provided with the distribution.
25 // * The name of Intel Corporation may not be used to endorse or promote products
26 // derived from this software without specific prior written permission.
28 // This software is provided by the copyright holders and contributors "as is" and
29 // any express or implied warranties, including, but not limited to, the implied
30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
31 // In no event shall the Intel Corporation or contributors be liable for any direct,
32 // indirect, incidental, special, exemplary, or consequential damages
33 // (including, but not limited to, procurement of substitute goods or services;
34 // loss of use, data, or profits; or business interruption) however caused
35 // and on any theory of liability, whether in contract, strict liability,
36 // or tort (including negligence or otherwise) arising in any way out of
37 // the use of this software, even if advised of the possibility of such damage.
44 /****************************************************************************************\
45 * Auxilary functions declarations *
46 \****************************************************************************************/
47 /*---------------------- functions for the CNN classifier ------------------------------*/
48 static float icvCNNModelPredict(
49 const CvStatModel* cnn_model,
51 CvMat* probs CV_DEFAULT(0) );
53 static void icvCNNModelUpdate(
54 CvStatModel* cnn_model, const CvMat* images, int tflag,
55 const CvMat* responses, const CvStatModelParams* params,
56 const CvMat* CV_DEFAULT(0), const CvMat* sample_idx CV_DEFAULT(0),
57 const CvMat* CV_DEFAULT(0), const CvMat* CV_DEFAULT(0));
59 static void icvCNNModelRelease( CvStatModel** cnn_model );
61 static void icvTrainCNNetwork( CvCNNetwork* network,
63 const CvMat* responses,
69 /*------------------------- functions for the CNN network ------------------------------*/
70 static void icvCNNetworkAddLayer( CvCNNetwork* network, CvCNNLayer* layer );
71 static void icvCNNetworkRelease( CvCNNetwork** network );
73 /* In all layer functions we denote input by X and output by Y, where
74 X and Y are column-vectors, so that
75 length(X)==<n_input_planes>*<input_height>*<input_width>,
76 length(Y)==<n_output_planes>*<output_height>*<output_width>.
78 /*------------------------ functions for convolutional layer ---------------------------*/
79 static void icvCNNConvolutionRelease( CvCNNLayer** p_layer );
81 static void icvCNNConvolutionForward( CvCNNLayer* layer, const CvMat* X, CvMat* Y );
83 static void icvCNNConvolutionBackward( CvCNNLayer* layer, int t,
84 const CvMat* X, const CvMat* dE_dY, CvMat* dE_dX );
86 /*------------------------ functions for sub-sampling layer ----------------------------*/
87 static void icvCNNSubSamplingRelease( CvCNNLayer** p_layer );
89 static void icvCNNSubSamplingForward( CvCNNLayer* layer, const CvMat* X, CvMat* Y );
91 static void icvCNNSubSamplingBackward( CvCNNLayer* layer, int t,
92 const CvMat* X, const CvMat* dE_dY, CvMat* dE_dX );
94 /*------------------------ functions for full connected layer --------------------------*/
95 static void icvCNNFullConnectRelease( CvCNNLayer** p_layer );
97 static void icvCNNFullConnectForward( CvCNNLayer* layer, const CvMat* X, CvMat* Y );
99 static void icvCNNFullConnectBackward( CvCNNLayer* layer, int,
100 const CvMat*, const CvMat* dE_dY, CvMat* dE_dX );
102 /****************************************************************************************\
103 * Functions implementations *
104 \****************************************************************************************/
106 #define ICV_CHECK_CNN_NETWORK(network) \
108 CvCNNLayer* first_layer, *layer, *last_layer; \
111 CV_ERROR( CV_StsNullPtr, \
112 "Null <network> pointer. Network must be created by user." ); \
113 n_layers = network->n_layers; \
114 first_layer = last_layer = network->layers; \
115 for( i = 0, layer = first_layer; i < n_layers && layer; i++ ) \
117 if( !ICV_IS_CNN_LAYER(layer) ) \
118 CV_ERROR( CV_StsNullPtr, "Invalid network" ); \
119 last_layer = layer; \
120 layer = layer->next_layer; \
123 if( i == 0 || i != n_layers || first_layer->prev_layer || layer ) \
124 CV_ERROR( CV_StsNullPtr, "Invalid network" ); \
126 if( first_layer->n_input_planes != 1 ) \
127 CV_ERROR( CV_StsBadArg, "First layer must contain only one input plane" ); \
129 if( img_size != first_layer->input_height*first_layer->input_width ) \
130 CV_ERROR( CV_StsBadArg, "Invalid input sizes of the first layer" ); \
132 if( params->etalons->cols != last_layer->n_output_planes* \
133 last_layer->output_height*last_layer->output_width ) \
134 CV_ERROR( CV_StsBadArg, "Invalid output sizes of the last layer" ); \
137 #define ICV_CHECK_CNN_MODEL_PARAMS(params) \
140 CV_ERROR( CV_StsNullPtr, "Null <params> pointer" ); \
142 if( !ICV_IS_MAT_OF_TYPE(params->etalons, CV_32FC1) ) \
143 CV_ERROR( CV_StsBadArg, "<etalons> must be CV_32FC1 type" ); \
144 if( params->etalons->rows != cnn_model->cls_labels->cols ) \
145 CV_ERROR( CV_StsBadArg, "Invalid <etalons> size" ); \
147 if( params->grad_estim_type != CV_CNN_GRAD_ESTIM_RANDOM && \
148 params->grad_estim_type != CV_CNN_GRAD_ESTIM_BY_WORST_IMG ) \
149 CV_ERROR( CV_StsBadArg, "Invalid <grad_estim_type>" ); \
151 if( params->start_iter < 0 ) \
152 CV_ERROR( CV_StsBadArg, "Parameter <start_iter> must be positive or zero" ); \
154 if( params->max_iter < 1 ) \
155 params->max_iter = 1; \
158 /****************************************************************************************\
159 * Classifier functions *
160 \****************************************************************************************/
162 cvTrainCNNClassifier( const CvMat* _train_data, int tflag,
163 const CvMat* _responses,
164 const CvStatModelParams* _params,
165 const CvMat*, const CvMat* _sample_idx, const CvMat*, const CvMat* )
167 CvCNNStatModel* cnn_model = 0;
168 const float** out_train_data = 0;
169 CvMat* responses = 0;
171 CV_FUNCNAME("cvTrainCNNClassifier");
176 CvCNNStatModelParams* params = (CvCNNStatModelParams*)_params;
178 CV_CALL(cnn_model = (CvCNNStatModel*)cvCreateStatModel(
179 CV_STAT_MODEL_MAGIC_VAL|CV_CNN_MAGIC_VAL, sizeof(CvCNNStatModel),
180 icvCNNModelRelease, icvCNNModelPredict, icvCNNModelUpdate ));
182 CV_CALL(cvPrepareTrainData( "cvTrainCNNClassifier",
183 _train_data, tflag, _responses, CV_VAR_CATEGORICAL,
184 0, _sample_idx, false, &out_train_data,
185 &n_images, &img_size, &img_size, &responses,
186 &cnn_model->cls_labels, 0 ));
188 ICV_CHECK_CNN_MODEL_PARAMS(params);
189 ICV_CHECK_CNN_NETWORK(params->network);
191 cnn_model->network = params->network;
192 CV_CALL(cnn_model->etalons = (CvMat*)cvClone( params->etalons ));
194 CV_CALL( icvTrainCNNetwork( cnn_model->network, out_train_data, responses,
195 cnn_model->etalons, params->grad_estim_type, params->max_iter,
196 params->start_iter ));
200 if( cvGetErrStatus() < 0 && cnn_model )
202 cnn_model->release( (CvStatModel**)&cnn_model );
204 cvFree( &out_train_data );
205 cvReleaseMat( &responses );
207 return (CvStatModel*)cnn_model;
210 /****************************************************************************************/
211 static void icvTrainCNNetwork( CvCNNetwork* network,
212 const float** images,
213 const CvMat* responses,
214 const CvMat* etalons,
221 const int n_layers = network->n_layers;
224 CV_FUNCNAME("icvTrainCNNetwork");
227 CvCNNLayer* first_layer = network->layers;
228 const int img_height = first_layer->input_height;
229 const int img_width = first_layer->input_width;
230 const int img_size = img_width*img_height;
231 const int n_images = responses->cols;
232 CvMat image = cvMat( 1, img_size, CV_32FC1 );
235 CvRNG rng = cvRNG(-1);
237 CV_CALL(X = (CvMat**)cvAlloc( (n_layers+1)*sizeof(CvMat*) ));
238 CV_CALL(dE_dX = (CvMat**)cvAlloc( (n_layers+1)*sizeof(CvMat*) ));
239 memset( X, 0, (n_layers+1)*sizeof(CvMat*) );
240 memset( dE_dX, 0, (n_layers+1)*sizeof(CvMat*) );
242 CV_CALL(X[0] = cvCreateMat( img_height*img_width,1,CV_32FC1 ));
243 CV_CALL(dE_dX[0] = cvCreateMat( 1, X[0]->rows, CV_32FC1 ));
244 for( k = 0, layer = first_layer; k < n_layers; k++, layer = layer->next_layer )
246 CV_CALL(X[k+1] = cvCreateMat( layer->n_output_planes*layer->output_height*
247 layer->output_width, 1, CV_32FC1 ));
248 CV_CALL(dE_dX[k+1] = cvCreateMat( 1, X[k+1]->rows, CV_32FC1 ));
251 for( n = 1; n <= max_iter; n++ )
253 float loss, max_loss = 0;
255 int worst_img_idx = -1;
256 int* right_etal_idx = responses->data.i;
259 // Find the worst image (which produces the greatest loss) or use the random image
260 if( grad_estim_type == CV_CNN_GRAD_ESTIM_BY_WORST_IMG )
262 for( i = 0; i < n_images; i++, right_etal_idx++ )
264 image.data.fl = (float*)images[i];
265 cvTranspose( &image, X[0] );
267 for( k = 0, layer = first_layer; k < n_layers; k++, layer = layer->next_layer )
268 CV_CALL(layer->forward( layer, X[k], X[k+1] ));
270 cvTranspose( X[n_layers], dE_dX[n_layers] );
271 cvGetRow( etalons, &etalon, *right_etal_idx );
272 loss = (float)cvNorm( dE_dX[n_layers], &etalon );
273 if( loss > max_loss )
281 worst_img_idx = cvRandInt(&rng) % n_images;
283 // Train network on the worst image
284 // 1) Compute the network output on the <image>
285 image.data.fl = (float*)images[worst_img_idx];
286 CV_CALL(cvTranspose( &image, X[0] ));
288 for( k = 0, layer = first_layer; k < n_layers - 1; k++, layer = layer->next_layer )
289 CV_CALL(layer->forward( layer, X[k], X[k+1] ));
290 CV_CALL(layer->forward( layer, X[k], X[k+1] ));
292 // 2) Compute the gradient
293 cvTranspose( X[n_layers], dE_dX[n_layers] );
294 cvGetRow( etalons, &etalon, responses->data.i[worst_img_idx] );
295 cvSub( dE_dX[n_layers], &etalon, dE_dX[n_layers] );
297 // 3) Update weights by the gradient descent
298 for( k = n_layers; k > 0; k--, layer = layer->prev_layer )
299 CV_CALL(layer->backward( layer, n + start_iter, X[k-1], dE_dX[k], dE_dX[k-1] ));
304 for( k = 0; k <= n_layers; k++ )
306 cvReleaseMat( &X[k] );
307 cvReleaseMat( &dE_dX[k] );
313 /****************************************************************************************/
314 static float icvCNNModelPredict( const CvStatModel* model,
321 int best_etal_idx = -1;
324 CV_FUNCNAME("icvCNNModelPredict");
327 CvCNNStatModel* cnn_model = (CvCNNStatModel*)model;
328 CvCNNLayer* first_layer, *layer = 0;
329 int img_height, img_width, img_size;
331 float loss, min_loss = FLT_MAX;
335 if( !CV_IS_CNN(model) )
336 CV_ERROR( CV_StsBadArg, "Invalid model" );
338 nclasses = cnn_model->cls_labels->cols;
339 n_layers = cnn_model->network->n_layers;
340 first_layer = cnn_model->network->layers;
341 img_height = first_layer->input_height;
342 img_width = first_layer->input_width;
343 img_size = img_height*img_width;
345 cvPreparePredictData( _image, img_size, 0, nclasses, probs, &img_data );
347 CV_CALL(X = (CvMat**)cvAlloc( (n_layers+1)*sizeof(CvMat*) ));
348 memset( X, 0, (n_layers+1)*sizeof(CvMat*) );
350 CV_CALL(X[0] = cvCreateMat( img_size,1,CV_32FC1 ));
351 for( k = 0, layer = first_layer; k < n_layers; k++, layer = layer->next_layer )
353 CV_CALL(X[k+1] = cvCreateMat( layer->n_output_planes*layer->output_height*
354 layer->output_width, 1, CV_32FC1 ));
357 image = cvMat( 1, img_size, CV_32FC1, img_data );
358 cvTranspose( &image, X[0] );
359 for( k = 0, layer = first_layer; k < n_layers; k++, layer = layer->next_layer )
360 CV_CALL(layer->forward( layer, X[k], X[k+1] ));
362 probs_data = probs ? probs->data.fl : 0;
363 etalon = cvMat( cnn_model->etalons->cols, 1, CV_32FC1, cnn_model->etalons->data.fl );
364 for( i = 0; i < nclasses; i++, etalon.data.fl += cnn_model->etalons->cols )
366 loss = (float)cvNorm( X[n_layers], &etalon );
367 if( loss < min_loss )
373 *probs_data++ = -loss;
378 cvExp( probs, probs );
379 CvScalar sum = cvSum( probs );
380 cvConvertScale( probs, probs, 1./sum.val[0] );
385 for( k = 0; k <= n_layers; k++ )
386 cvReleaseMat( &X[k] );
388 if( img_data != _image->data.fl )
391 return ((float) ((CvCNNStatModel*)model)->cls_labels->data.i[best_etal_idx]);
394 /****************************************************************************************/
395 static void icvCNNModelUpdate(
396 CvStatModel* _cnn_model, const CvMat* _train_data, int tflag,
397 const CvMat* _responses, const CvStatModelParams* _params,
398 const CvMat*, const CvMat* _sample_idx,
399 const CvMat*, const CvMat* )
401 const float** out_train_data = 0;
402 CvMat* responses = 0;
403 CvMat* cls_labels = 0;
405 CV_FUNCNAME("icvCNNModelUpdate");
408 int n_images, img_size, i;
409 CvCNNStatModelParams* params = (CvCNNStatModelParams*)_params;
410 CvCNNStatModel* cnn_model = (CvCNNStatModel*)_cnn_model;
412 if( !CV_IS_CNN(cnn_model) )
413 CV_ERROR( CV_StsBadArg, "Invalid model" );
415 CV_CALL(cvPrepareTrainData( "cvTrainCNNClassifier",
416 _train_data, tflag, _responses, CV_VAR_CATEGORICAL,
417 0, _sample_idx, false, &out_train_data,
418 &n_images, &img_size, &img_size, &responses,
419 &cls_labels, 0, 0 ));
421 ICV_CHECK_CNN_MODEL_PARAMS(params);
423 // Number of classes must be the same as when classifiers was created
424 if( !CV_ARE_SIZES_EQ(cls_labels, cnn_model->cls_labels) )
425 CV_ERROR( CV_StsBadArg, "Number of classes must be left unchanged" );
426 for( i = 0; i < cls_labels->cols; i++ )
428 if( cls_labels->data.i[i] != cnn_model->cls_labels->data.i[i] )
429 CV_ERROR( CV_StsBadArg, "Number of classes must be left unchanged" );
432 CV_CALL( icvTrainCNNetwork( cnn_model->network, out_train_data, responses,
433 cnn_model->etalons, params->grad_estim_type, params->max_iter,
434 params->start_iter ));
438 cvFree( &out_train_data );
439 cvReleaseMat( &responses );
442 /****************************************************************************************/
443 static void icvCNNModelRelease( CvStatModel** cnn_model )
445 CV_FUNCNAME("icvCNNModelRelease");
450 CV_ERROR( CV_StsNullPtr, "Null double pointer" );
452 cnn = *(CvCNNStatModel**)cnn_model;
454 cvReleaseMat( &cnn->cls_labels );
455 cvReleaseMat( &cnn->etalons );
456 cnn->network->release( &cnn->network );
464 /****************************************************************************************\
465 * Network functions *
466 \****************************************************************************************/
467 ML_IMPL CvCNNetwork* cvCreateCNNetwork( CvCNNLayer* first_layer )
469 CvCNNetwork* network = 0;
471 CV_FUNCNAME( "cvCreateCNNetwork" );
474 if( !ICV_IS_CNN_LAYER(first_layer) )
475 CV_ERROR( CV_StsBadArg, "Invalid layer" );
477 CV_CALL(network = (CvCNNetwork*)cvAlloc( sizeof(CvCNNetwork) ));
478 memset( network, 0, sizeof(CvCNNetwork) );
480 network->layers = first_layer;
481 network->n_layers = 1;
482 network->release = icvCNNetworkRelease;
483 network->add_layer = icvCNNetworkAddLayer;
487 if( cvGetErrStatus() < 0 && network )
494 /****************************************************************************************/
495 static void icvCNNetworkAddLayer( CvCNNetwork* network, CvCNNLayer* layer )
497 CV_FUNCNAME( "icvCNNetworkAddLayer" );
500 CvCNNLayer* prev_layer;
502 if( network == NULL )
503 CV_ERROR( CV_StsNullPtr, "Null <network> pointer" );
505 prev_layer = network->layers;
506 while( prev_layer->next_layer )
507 prev_layer = prev_layer->next_layer;
509 if( ICV_IS_CNN_FULLCONNECT_LAYER(layer) )
511 if( layer->n_input_planes != prev_layer->output_width*prev_layer->output_height*
512 prev_layer->n_output_planes )
513 CV_ERROR( CV_StsBadArg, "Unmatched size of the new layer" );
514 if( layer->input_height != 1 || layer->output_height != 1 ||
515 layer->input_width != 1 || layer->output_width != 1 )
516 CV_ERROR( CV_StsBadArg, "Invalid size of the new layer" );
518 else if( ICV_IS_CNN_CONVOLUTION_LAYER(layer) || ICV_IS_CNN_SUBSAMPLING_LAYER(layer) )
520 if( prev_layer->n_output_planes != layer->n_input_planes ||
521 prev_layer->output_height != layer->input_height ||
522 prev_layer->output_width != layer->input_width )
523 CV_ERROR( CV_StsBadArg, "Unmatched size of the new layer" );
526 CV_ERROR( CV_StsBadArg, "Invalid layer" );
528 layer->prev_layer = prev_layer;
529 prev_layer->next_layer = layer;
535 /****************************************************************************************/
536 static void icvCNNetworkRelease( CvCNNetwork** network_pptr )
538 CV_FUNCNAME( "icvReleaseCNNetwork" );
541 CvCNNetwork* network = 0;
542 CvCNNLayer* layer = 0, *next_layer = 0;
545 if( network_pptr == NULL )
546 CV_ERROR( CV_StsBadArg, "Null double pointer" );
547 if( *network_pptr == NULL )
550 network = *network_pptr;
551 layer = network->layers;
553 CV_ERROR( CV_StsBadArg, "CNN is empty (does not contain any layer)" );
555 // k is the number of the layer to be deleted
556 for( k = 0; k < network->n_layers && layer; k++ )
558 next_layer = layer->next_layer;
559 layer->release( &layer );
563 if( k != network->n_layers || layer)
564 CV_ERROR( CV_StsBadArg, "Invalid network" );
571 /****************************************************************************************\
573 \****************************************************************************************/
574 static CvCNNLayer* icvCreateCNNLayer( int layer_type, int header_size,
575 int n_input_planes, int input_height, int input_width,
576 int n_output_planes, int output_height, int output_width,
577 float init_learn_rate, int learn_rate_decrease_type,
578 CvCNNLayerRelease release, CvCNNLayerForward forward, CvCNNLayerBackward backward )
580 CvCNNLayer* layer = 0;
582 CV_FUNCNAME("icvCreateCNNLayer");
585 CV_ASSERT( release && forward && backward )
586 CV_ASSERT( header_size >= sizeof(CvCNNLayer) )
588 if( n_input_planes < 1 || n_output_planes < 1 ||
589 input_height < 1 || input_width < 1 ||
590 output_height < 1 || output_width < 1 ||
591 input_height < output_height ||
592 input_width < output_width )
593 CV_ERROR( CV_StsBadArg, "Incorrect input or output parameters" );
594 if( init_learn_rate < FLT_EPSILON )
595 CV_ERROR( CV_StsBadArg, "Initial learning rate must be positive" );
596 if( learn_rate_decrease_type != CV_CNN_LEARN_RATE_DECREASE_HYPERBOLICALLY &&
597 learn_rate_decrease_type != CV_CNN_LEARN_RATE_DECREASE_SQRT_INV &&
598 learn_rate_decrease_type != CV_CNN_LEARN_RATE_DECREASE_LOG_INV )
599 CV_ERROR( CV_StsBadArg, "Invalid type of learning rate dynamics" );
601 CV_CALL(layer = (CvCNNLayer*)cvAlloc( header_size ));
602 memset( layer, 0, header_size );
604 layer->flags = ICV_CNN_LAYER|layer_type;
605 CV_ASSERT( ICV_IS_CNN_LAYER(layer) )
607 layer->n_input_planes = n_input_planes;
608 layer->input_height = input_height;
609 layer->input_width = input_width;
611 layer->n_output_planes = n_output_planes;
612 layer->output_height = output_height;
613 layer->output_width = output_width;
615 layer->init_learn_rate = init_learn_rate;
616 layer->learn_rate_decrease_type = learn_rate_decrease_type;
618 layer->release = release;
619 layer->forward = forward;
620 layer->backward = backward;
624 if( cvGetErrStatus() < 0 && layer)
630 /****************************************************************************************/
631 ML_IMPL CvCNNLayer* cvCreateCNNConvolutionLayer(
632 int n_input_planes, int input_height, int input_width,
633 int n_output_planes, int K,
634 float init_learn_rate, int learn_rate_decrease_type,
635 CvMat* connect_mask, CvMat* weights )
638 CvCNNConvolutionLayer* layer = 0;
640 CV_FUNCNAME("cvCreateCNNConvolutionLayer");
643 const int output_height = input_height - K + 1;
644 const int output_width = input_width - K + 1;
646 if( K < 1 || init_learn_rate <= 0 )
647 CV_ERROR( CV_StsBadArg, "Incorrect parameters" );
649 CV_CALL(layer = (CvCNNConvolutionLayer*)icvCreateCNNLayer( ICV_CNN_CONVOLUTION_LAYER,
650 sizeof(CvCNNConvolutionLayer), n_input_planes, input_height, input_width,
651 n_output_planes, output_height, output_width,
652 init_learn_rate, learn_rate_decrease_type,
653 icvCNNConvolutionRelease, icvCNNConvolutionForward, icvCNNConvolutionBackward ));
656 CV_CALL(layer->weights = cvCreateMat( n_output_planes, K*K+1, CV_32FC1 ));
657 CV_CALL(layer->connect_mask = cvCreateMat( n_output_planes, n_input_planes, CV_8UC1));
661 if( !ICV_IS_MAT_OF_TYPE( weights, CV_32FC1 ) )
662 CV_ERROR( CV_StsBadSize, "Type of initial weights matrix must be CV_32FC1" );
663 if( !CV_ARE_SIZES_EQ( weights, layer->weights ) )
664 CV_ERROR( CV_StsBadSize, "Invalid size of initial weights matrix" );
665 CV_CALL(cvCopy( weights, layer->weights ));
669 CvRNG rng = cvRNG( 0xFFFFFFFF );
670 cvRandArr( &rng, layer->weights, CV_RAND_UNI, cvRealScalar(-1), cvRealScalar(1) );
675 if( !ICV_IS_MAT_OF_TYPE( connect_mask, CV_8UC1 ) )
676 CV_ERROR( CV_StsBadSize, "Type of connection matrix must be CV_32FC1" );
677 if( !CV_ARE_SIZES_EQ( connect_mask, layer->connect_mask ) )
678 CV_ERROR( CV_StsBadSize, "Invalid size of connection matrix" );
679 CV_CALL(cvCopy( connect_mask, layer->connect_mask ));
682 CV_CALL(cvSet( layer->connect_mask, cvRealScalar(1) ));
686 if( cvGetErrStatus() < 0 && layer )
688 cvReleaseMat( &layer->weights );
689 cvReleaseMat( &layer->connect_mask );
693 return (CvCNNLayer*)layer;
696 /****************************************************************************************/
697 ML_IMPL CvCNNLayer* cvCreateCNNSubSamplingLayer(
698 int n_input_planes, int input_height, int input_width,
699 int sub_samp_scale, float a, float s,
700 float init_learn_rate, int learn_rate_decrease_type, CvMat* weights )
703 CvCNNSubSamplingLayer* layer = 0;
705 CV_FUNCNAME("cvCreateCNNSubSamplingLayer");
708 const int output_height = input_height/sub_samp_scale;
709 const int output_width = input_width/sub_samp_scale;
710 const int n_output_planes = n_input_planes;
712 if( sub_samp_scale < 1 || a <= 0 || s <= 0)
713 CV_ERROR( CV_StsBadArg, "Incorrect parameters" );
715 CV_CALL(layer = (CvCNNSubSamplingLayer*)icvCreateCNNLayer( ICV_CNN_SUBSAMPLING_LAYER,
716 sizeof(CvCNNSubSamplingLayer), n_input_planes, input_height, input_width,
717 n_output_planes, output_height, output_width,
718 init_learn_rate, learn_rate_decrease_type,
719 icvCNNSubSamplingRelease, icvCNNSubSamplingForward, icvCNNSubSamplingBackward ));
721 layer->sub_samp_scale = sub_samp_scale;
725 CV_CALL(layer->sumX =
726 cvCreateMat( n_output_planes*output_width*output_height, 1, CV_32FC1 ));
727 CV_CALL(layer->exp2ssumWX =
728 cvCreateMat( n_output_planes*output_width*output_height, 1, CV_32FC1 ));
730 cvZero( layer->sumX );
731 cvZero( layer->exp2ssumWX );
733 CV_CALL(layer->weights = cvCreateMat( n_output_planes, 2, CV_32FC1 ));
736 if( !ICV_IS_MAT_OF_TYPE( weights, CV_32FC1 ) )
737 CV_ERROR( CV_StsBadSize, "Type of initial weights matrix must be CV_32FC1" );
738 if( !CV_ARE_SIZES_EQ( weights, layer->weights ) )
739 CV_ERROR( CV_StsBadSize, "Invalid size of initial weights matrix" );
740 CV_CALL(cvCopy( weights, layer->weights ));
744 CvRNG rng = cvRNG( 0xFFFFFFFF );
745 cvRandArr( &rng, layer->weights, CV_RAND_UNI, cvRealScalar(-1), cvRealScalar(1) );
750 if( cvGetErrStatus() < 0 && layer )
752 cvReleaseMat( &layer->exp2ssumWX );
756 return (CvCNNLayer*)layer;
759 /****************************************************************************************/
760 ML_IMPL CvCNNLayer* cvCreateCNNFullConnectLayer(
761 int n_inputs, int n_outputs, float a, float s,
762 float init_learn_rate, int learn_rate_decrease_type, CvMat* weights )
764 CvCNNFullConnectLayer* layer = 0;
766 CV_FUNCNAME("cvCreateCNNFullConnectLayer");
769 if( a <= 0 || s <= 0 || init_learn_rate <= 0)
770 CV_ERROR( CV_StsBadArg, "Incorrect parameters" );
772 CV_CALL(layer = (CvCNNFullConnectLayer*)icvCreateCNNLayer( ICV_CNN_FULLCONNECT_LAYER,
773 sizeof(CvCNNFullConnectLayer), n_inputs, 1, 1, n_outputs, 1, 1,
774 init_learn_rate, learn_rate_decrease_type,
775 icvCNNFullConnectRelease, icvCNNFullConnectForward, icvCNNFullConnectBackward ));
780 CV_CALL(layer->exp2ssumWX = cvCreateMat( n_outputs, 1, CV_32FC1 ));
781 cvZero( layer->exp2ssumWX );
783 CV_CALL(layer->weights = cvCreateMat( n_outputs, n_inputs+1, CV_32FC1 ));
786 if( !ICV_IS_MAT_OF_TYPE( weights, CV_32FC1 ) )
787 CV_ERROR( CV_StsBadSize, "Type of initial weights matrix must be CV_32FC1" );
788 if( !CV_ARE_SIZES_EQ( weights, layer->weights ) )
789 CV_ERROR( CV_StsBadSize, "Invalid size of initial weights matrix" );
790 CV_CALL(cvCopy( weights, layer->weights ));
794 CvRNG rng = cvRNG( 0xFFFFFFFF );
795 cvRandArr( &rng, layer->weights, CV_RAND_UNI, cvRealScalar(-1), cvRealScalar(1) );
800 if( cvGetErrStatus() < 0 && layer )
802 cvReleaseMat( &layer->exp2ssumWX );
803 cvReleaseMat( &layer->weights );
807 return (CvCNNLayer*)layer;
811 /****************************************************************************************\
812 * Layer FORWARD functions *
813 \****************************************************************************************/
814 static void icvCNNConvolutionForward( CvCNNLayer* _layer,
818 CV_FUNCNAME("icvCNNConvolutionForward");
820 if( !ICV_IS_CNN_CONVOLUTION_LAYER(_layer) )
821 CV_ERROR( CV_StsBadArg, "Invalid layer" );
825 const CvCNNConvolutionLayer* layer = (CvCNNConvolutionLayer*) _layer;
827 const int K = layer->K;
828 const int n_weights_for_Yplane = K*K + 1;
830 const int nXplanes = layer->n_input_planes;
831 const int Xheight = layer->input_height;
832 const int Xwidth = layer->input_width ;
833 const int Xsize = Xwidth*Xheight;
835 const int nYplanes = layer->n_output_planes;
836 const int Yheight = layer->output_height;
837 const int Ywidth = layer->output_width;
838 const int Ysize = Ywidth*Yheight;
840 int xx, yy, ni, no, kx, ky;
841 float *Yplane = 0, *Xplane = 0, *w = 0;
842 uchar* connect_mask_data = 0;
844 CV_ASSERT( X->rows == nXplanes*Xsize && X->cols == 1 );
845 CV_ASSERT( Y->rows == nYplanes*Ysize && Y->cols == 1 );
850 connect_mask_data = layer->connect_mask->data.ptr;
851 w = layer->weights->data.fl;
852 for( no = 0; no < nYplanes; no++, Yplane += Ysize, w += n_weights_for_Yplane )
855 for( ni = 0; ni < nXplanes; ni++, Xplane += Xsize, connect_mask_data++ )
857 if( *connect_mask_data )
859 float* Yelem = Yplane;
861 // Xheight-K+1 == Yheight && Xwidth-K+1 == Ywidth
862 for( yy = 0; yy < Xheight-K+1; yy++ )
864 for( xx = 0; xx < Xwidth-K+1; xx++, Yelem++ )
866 float* templ = Xplane+yy*Xwidth+xx;
868 for( ky = 0; ky < K; ky++, templ += Xwidth-K )
870 for( kx = 0; kx < K; kx++, templ++ )
872 WX += *templ*w[ky*K+kx];
875 *Yelem += WX + w[K*K];
884 /****************************************************************************************/
885 static void icvCNNSubSamplingForward( CvCNNLayer* _layer,
889 CV_FUNCNAME("icvCNNSubSamplingForward");
891 if( !ICV_IS_CNN_SUBSAMPLING_LAYER(_layer) )
892 CV_ERROR( CV_StsBadArg, "Invalid layer" );
896 const CvCNNSubSamplingLayer* layer = (CvCNNSubSamplingLayer*) _layer;
898 const int sub_sampl_scale = layer->sub_samp_scale;
899 const int nplanes = layer->n_input_planes;
901 const int Xheight = layer->input_height;
902 const int Xwidth = layer->input_width ;
903 const int Xsize = Xwidth*Xheight;
905 const int Yheight = layer->output_height;
906 const int Ywidth = layer->output_width;
907 const int Ysize = Ywidth*Yheight;
909 int xx, yy, ni, kx, ky;
910 float* sumX_data = 0, *w = 0;
911 CvMat sumX_sub_col, exp2ssumWX_sub_col;
913 CV_ASSERT(X->rows == nplanes*Xsize && X->cols == 1);
914 CV_ASSERT(layer->exp2ssumWX->cols == 1 && layer->exp2ssumWX->rows == nplanes*Ysize);
916 // update inner variable layer->exp2ssumWX, which will be used in back-progation
917 cvZero( layer->sumX );
918 cvZero( layer->exp2ssumWX );
920 for( ky = 0; ky < sub_sampl_scale; ky++ )
921 for( kx = 0; kx < sub_sampl_scale; kx++ )
923 float* Xplane = X->data.fl;
924 sumX_data = layer->sumX->data.fl;
925 for( ni = 0; ni < nplanes; ni++, Xplane += Xsize )
927 for( yy = 0; yy < Yheight; yy++ )
928 for( xx = 0; xx < Ywidth; xx++, sumX_data++ )
929 *sumX_data += Xplane[((yy+ky)*Xwidth+(xx+kx))];
933 w = layer->weights->data.fl;
934 cvGetRows( layer->sumX, &sumX_sub_col, 0, Ysize );
935 cvGetRows( layer->exp2ssumWX, &exp2ssumWX_sub_col, 0, Ysize );
936 for( ni = 0; ni < nplanes; ni++, w += 2 )
938 CV_CALL(cvConvertScale( &sumX_sub_col, &exp2ssumWX_sub_col, w[0], w[1] ));
939 sumX_sub_col.data.fl += Ysize;
940 exp2ssumWX_sub_col.data.fl += Ysize;
943 CV_CALL(cvScale( layer->exp2ssumWX, layer->exp2ssumWX, 2.0*layer->s ));
944 CV_CALL(cvExp( layer->exp2ssumWX, layer->exp2ssumWX ));
945 CV_CALL(cvMinS( layer->exp2ssumWX, FLT_MAX, layer->exp2ssumWX ));
948 float* exp2ssumWX_data = layer->exp2ssumWX->data.fl;
949 for( ni = 0; ni < layer->exp2ssumWX->rows; ni++, exp2ssumWX_data++ )
951 if( *exp2ssumWX_data == FLT_MAX )
956 // compute the output variable Y == ( a - 2a/(layer->exp2ssumWX + 1))
957 CV_CALL(cvAddS( layer->exp2ssumWX, cvRealScalar(1), Y ));
958 CV_CALL(cvDiv( 0, Y, Y, -2.0*layer->a ));
959 CV_CALL(cvAddS( Y, cvRealScalar(layer->a), Y ));
964 /****************************************************************************************/
965 static void icvCNNFullConnectForward( CvCNNLayer* _layer, const CvMat* X, CvMat* Y )
967 CV_FUNCNAME("icvCNNFullConnectForward");
969 if( !ICV_IS_CNN_FULLCONNECT_LAYER(_layer) )
970 CV_ERROR( CV_StsBadArg, "Invalid layer" );
974 const CvCNNFullConnectLayer* layer = (CvCNNFullConnectLayer*)_layer;
975 CvMat* weights = layer->weights;
976 CvMat sub_weights, bias;
978 CV_ASSERT(X->cols == 1 && X->rows == layer->n_input_planes);
979 CV_ASSERT(Y->cols == 1 && Y->rows == layer->n_output_planes);
981 CV_CALL(cvGetSubRect( weights, &sub_weights,
982 cvRect(0, 0, weights->cols-1, weights->rows )));
983 CV_CALL(cvGetCol( weights, &bias, weights->cols-1));
985 // update inner variable layer->exp2ssumWX, which will be used in Back-Propagation
986 CV_CALL(cvGEMM( &sub_weights, X, 2*layer->s, &bias, 2*layer->s, layer->exp2ssumWX ));
987 CV_CALL(cvExp( layer->exp2ssumWX, layer->exp2ssumWX ));
988 CV_CALL(cvMinS( layer->exp2ssumWX, FLT_MAX, layer->exp2ssumWX ));
991 float* exp2ssumWX_data = layer->exp2ssumWX->data.fl;
993 for( i = 0; i < layer->exp2ssumWX->rows; i++, exp2ssumWX_data++ )
995 if( *exp2ssumWX_data == FLT_MAX )
1000 // compute the output variable Y == ( a - 2a/(layer->exp2ssumWX + 1))
1001 CV_CALL(cvAddS( layer->exp2ssumWX, cvRealScalar(1), Y ));
1002 CV_CALL(cvDiv( 0, Y, Y, -2.0*layer->a ));
1003 CV_CALL(cvAddS( Y, cvRealScalar(layer->a), Y ));
1008 /****************************************************************************************\
1009 * Layer BACKWARD functions *
1010 \****************************************************************************************/
1012 /* <dE_dY>, <dE_dX> should be row-vectors.
1013 Function computes partial derivatives <dE_dX>
1014 of the loss function with respect to the planes components
1015 of the previous layer (X).
1016 It is a basic function for back propagation method.
1017 Input parameter <dE_dY> is the partial derivative of the
1018 loss function with respect to the planes components
1019 of the current layer. */
1020 static void icvCNNConvolutionBackward(
1021 CvCNNLayer* _layer, int t, const CvMat* X, const CvMat* dE_dY, CvMat* dE_dX )
1027 CV_FUNCNAME("icvCNNConvolutionBackward");
1029 if( !ICV_IS_CNN_CONVOLUTION_LAYER(_layer) )
1030 CV_ERROR( CV_StsBadArg, "Invalid layer" );
1034 const CvCNNConvolutionLayer* layer = (CvCNNConvolutionLayer*) _layer;
1036 const int K = layer->K;
1038 const int n_X_planes = layer->n_input_planes;
1039 const int X_plane_height = layer->input_height;
1040 const int X_plane_width = layer->input_width;
1041 const int X_plane_size = X_plane_height*X_plane_width;
1043 const int n_Y_planes = layer->n_output_planes;
1044 const int Y_plane_height = layer->output_height;
1045 const int Y_plane_width = layer->output_width;
1046 const int Y_plane_size = Y_plane_height*Y_plane_width;
1048 int no, ni, yy, xx, ky, kx;
1049 int X_idx = 0, Y_idx = 0;
1051 float *X_plane = 0, *w = 0;
1053 CvMat* weights = layer->weights;
1055 CV_ASSERT( t >= 1 );
1056 CV_ASSERT( n_Y_planes == weights->rows );
1058 dY_dX = cvCreateMat( n_Y_planes*Y_plane_size, X->rows, CV_32FC1 );
1059 dY_dW = cvCreateMat( dY_dX->rows, weights->cols*weights->rows, CV_32FC1 );
1060 dE_dW = cvCreateMat( 1, dY_dW->cols, CV_32FC1 );
1065 // compute gradient of the loss function with respect to X and W
1066 for( no = 0; no < n_Y_planes; no++, Y_idx += Y_plane_size )
1068 w = weights->data.fl + no*(K*K+1);
1070 X_plane = X->data.fl;
1071 for( ni = 0; ni < n_X_planes; ni++, X_plane += X_plane_size )
1073 if( layer->connect_mask->data.ptr[ni*n_Y_planes+no] )
1075 for( yy = 0; yy < X_plane_height - K + 1; yy++ )
1077 for( xx = 0; xx < X_plane_width - K + 1; xx++ )
1079 for( ky = 0; ky < K; ky++ )
1081 for( kx = 0; kx < K; kx++ )
1083 CV_MAT_ELEM(*dY_dX, float, Y_idx+yy*Y_plane_width+xx,
1084 X_idx+(yy+ky)*X_plane_width+(xx+kx)) = w[ky*K+kx];
1086 // dY_dWi, i=1,...,K*K
1087 CV_MAT_ELEM(*dY_dW, float, Y_idx+yy*Y_plane_width+xx,
1088 no*(K*K+1)+ky*K+kx) +=
1089 X_plane[(yy+ky)*X_plane_width+(xx+kx)];
1092 // dY_dW(K*K+1)==1 because W(K*K+1) is bias
1093 CV_MAT_ELEM(*dY_dW, float, Y_idx+yy*Y_plane_width+xx,
1094 no*(K*K+1)+K*K) += 1;
1098 X_idx += X_plane_size;
1102 CV_CALL(cvMatMul( dE_dY, dY_dW, dE_dW ));
1103 CV_CALL(cvMatMul( dE_dY, dY_dX, dE_dX ));
1109 if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_LOG_INV )
1110 eta = -layer->init_learn_rate/logf(1+(float)t);
1111 else if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_SQRT_INV )
1112 eta = -layer->init_learn_rate/sqrtf((float)t);
1114 eta = -layer->init_learn_rate/(float)t;
1115 cvReshape( dE_dW, &dE_dW_mat, 0, weights->rows );
1116 cvScaleAdd( &dE_dW_mat, cvRealScalar(eta), weights, weights );
1121 cvReleaseMat( &dY_dX );
1122 cvReleaseMat( &dY_dW );
1123 cvReleaseMat( &dE_dW );
1126 /****************************************************************************************/
1127 static void icvCNNSubSamplingBackward(
1128 CvCNNLayer* _layer, int t, const CvMat*, const CvMat* dE_dY, CvMat* dE_dX )
1130 // derivative of activation function
1131 CvMat* dY_dX_elems = 0; // elements of matrix dY_dX
1132 CvMat* dY_dW_elems = 0; // elements of matrix dY_dW
1135 CV_FUNCNAME("icvCNNSubSamplingBackward");
1137 if( !ICV_IS_CNN_SUBSAMPLING_LAYER(_layer) )
1138 CV_ERROR( CV_StsBadArg, "Invalid layer" );
1142 const CvCNNSubSamplingLayer* layer = (CvCNNSubSamplingLayer*) _layer;
1144 const int Xwidth = layer->input_width;
1145 const int Ywidth = layer->output_width;
1146 const int Yheight = layer->output_height;
1147 const int Ysize = Ywidth * Yheight;
1148 const int scale = layer->sub_samp_scale;
1149 const int k_max = layer->n_output_planes * Yheight;
1152 float* dY_dX_current_elem = 0, *dE_dX_start = 0, *dE_dW_data = 0, *w = 0;
1153 CvMat dy_dw0, dy_dw1;
1154 CvMat activ_func_der, sumX_row;
1155 CvMat dE_dY_sub_row, dY_dX_sub_col, dy_dw0_sub_row, dy_dw1_sub_row;
1157 CV_CALL(dY_dX_elems = cvCreateMat( layer->sumX->rows, 1, CV_32FC1 ));
1158 CV_CALL(dY_dW_elems = cvCreateMat( 2, layer->sumX->rows, CV_32FC1 ));
1159 CV_CALL(dE_dW = cvCreateMat( 1, 2*layer->n_output_planes, CV_32FC1 ));
1161 // compute derivative of activ.func.
1162 // ==<dY_dX_elems> = 4as*(layer->exp2ssumWX)/(layer->exp2ssumWX + 1)^2
1163 CV_CALL(cvAddS( layer->exp2ssumWX, cvRealScalar(1), dY_dX_elems ));
1164 CV_CALL(cvPow( dY_dX_elems, dY_dX_elems, -2.0 ));
1165 CV_CALL(cvMul( dY_dX_elems, layer->exp2ssumWX, dY_dX_elems, 4.0*layer->a*layer->s ));
1168 // a) compute <dY_dW_elems>
1169 cvReshape( dY_dX_elems, &activ_func_der, 0, 1 );
1170 cvGetRow( dY_dW_elems, &dy_dw0, 0 );
1171 cvGetRow( dY_dW_elems, &dy_dw1, 1 );
1172 CV_CALL(cvCopy( &activ_func_der, &dy_dw0 ));
1173 CV_CALL(cvCopy( &activ_func_der, &dy_dw1 ));
1175 cvReshape( layer->sumX, &sumX_row, 0, 1 );
1176 cvMul( &dy_dw0, &sumX_row, &dy_dw0 );
1178 // b) compute <dE_dW> = <dE_dY>*<dY_dW_elems>
1179 cvGetCols( dE_dY, &dE_dY_sub_row, 0, Ysize );
1180 cvGetCols( &dy_dw0, &dy_dw0_sub_row, 0, Ysize );
1181 cvGetCols( &dy_dw1, &dy_dw1_sub_row, 0, Ysize );
1182 dE_dW_data = dE_dW->data.fl;
1183 for( i = 0; i < layer->n_output_planes; i++ )
1185 *dE_dW_data++ = (float)cvDotProduct( &dE_dY_sub_row, &dy_dw0_sub_row );
1186 *dE_dW_data++ = (float)cvDotProduct( &dE_dY_sub_row, &dy_dw1_sub_row );
1188 dE_dY_sub_row.data.fl += Ysize;
1189 dy_dw0_sub_row.data.fl += Ysize;
1190 dy_dw1_sub_row.data.fl += Ysize;
1193 // compute <dY_dX> = layer->weights*<dY_dX>
1194 w = layer->weights->data.fl;
1195 cvGetRows( dY_dX_elems, &dY_dX_sub_col, 0, Ysize );
1196 for( i = 0; i < layer->n_input_planes; i++, w++, dY_dX_sub_col.data.fl += Ysize )
1197 CV_CALL(cvConvertScale( &dY_dX_sub_col, &dY_dX_sub_col, (float)*w ));
1200 CV_CALL(cvReshape( dY_dX_elems, dY_dX_elems, 0, 1 ));
1201 CV_CALL(cvMul( dY_dX_elems, dE_dY, dY_dX_elems ));
1203 dY_dX_current_elem = dY_dX_elems->data.fl;
1204 dE_dX_start = dE_dX->data.fl;
1205 for( k = 0; k < k_max; k++ )
1207 for( i = 0; i < Ywidth; i++, dY_dX_current_elem++ )
1209 float* dE_dX_current_elem = dE_dX_start;
1210 for( j = 0; j < scale; j++, dE_dX_current_elem += Xwidth - scale )
1212 for( m = 0; m < scale; m++, dE_dX_current_elem++ )
1213 *dE_dX_current_elem = *dY_dX_current_elem;
1215 dE_dX_start += scale;
1217 dE_dX_start += Xwidth * (scale - 1);
1222 CvMat dE_dW_mat, *weights = layer->weights;
1224 if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_LOG_INV )
1225 eta = -layer->init_learn_rate/logf(1+(float)t);
1226 else if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_SQRT_INV )
1227 eta = -layer->init_learn_rate/sqrtf((float)t);
1229 eta = -layer->init_learn_rate/(float)t;
1230 cvReshape( dE_dW, &dE_dW_mat, 0, weights->rows );
1231 cvScaleAdd( &dE_dW_mat, cvRealScalar(eta), weights, weights );
1236 cvReleaseMat( &dY_dX_elems );
1237 cvReleaseMat( &dY_dW_elems );
1238 cvReleaseMat( &dE_dW );
1241 /****************************************************************************************/
1242 /* <dE_dY>, <dE_dX> should be row-vectors.
1243 Function computes partial derivatives <dE_dX>, <dE_dW>
1244 of the loss function with respect to the planes components
1245 of the previous layer (X) and the weights of the current layer (W)
1246 and updates weights od the current layer by using <dE_dW>.
1247 It is a basic function for back propagation method.
1248 Input parameter <dE_dY> is the partial derivative of the
1249 loss function with respect to the planes components
1250 of the current layer. */
1251 static void icvCNNFullConnectBackward( CvCNNLayer* _layer,
1257 CvMat* dE_dY_activ_func_der = 0;
1260 CV_FUNCNAME( "icvCNNFullConnectBackward" );
1262 if( !ICV_IS_CNN_FULLCONNECT_LAYER(_layer) )
1263 CV_ERROR( CV_StsBadArg, "Invalid layer" );
1267 const CvCNNFullConnectLayer* layer = (CvCNNFullConnectLayer*)_layer;
1268 const int n_outputs = layer->n_output_planes;
1269 const int n_inputs = layer->n_input_planes;
1272 float* dE_dY_activ_func_der_data;
1273 CvMat* weights = layer->weights;
1274 CvMat sub_weights, Xtemplate, Xrow, exp2ssumWXrow;
1276 CV_ASSERT(X->cols == 1 && X->rows == n_inputs);
1277 CV_ASSERT(dE_dY->rows == 1 && dE_dY->cols == n_outputs );
1278 CV_ASSERT(dE_dX->rows == 1 && dE_dX->cols == n_inputs );
1280 // we violate the convetion about vector's orientation because
1281 // here is more convenient to make this parameter a row-vector
1282 CV_CALL(dE_dY_activ_func_der = cvCreateMat( 1, n_outputs, CV_32FC1 ));
1283 CV_CALL(dE_dW = cvCreateMat( 1, weights->rows*weights->cols, CV_32FC1 ));
1285 // 1) compute gradients dE_dX and dE_dW
1286 // activ_func_der == 4as*(layer->exp2ssumWX)/(layer->exp2ssumWX + 1)^2
1287 CV_CALL(cvReshape( layer->exp2ssumWX, &exp2ssumWXrow, 0, layer->exp2ssumWX->cols ));
1288 CV_CALL(cvAddS( &exp2ssumWXrow, cvRealScalar(1), dE_dY_activ_func_der ));
1289 CV_CALL(cvPow( dE_dY_activ_func_der, dE_dY_activ_func_der, -2.0 ));
1290 CV_CALL(cvMul( dE_dY_activ_func_der, &exp2ssumWXrow, dE_dY_activ_func_der,
1291 4.0*layer->a*layer->s ));
1292 CV_CALL(cvMul( dE_dY, dE_dY_activ_func_der, dE_dY_activ_func_der ));
1294 // sub_weights = d(W*(X|1))/dX
1295 CV_CALL(cvGetSubRect( weights, &sub_weights,
1296 cvRect(0, 0, weights->cols-1, weights->rows) ));
1297 CV_CALL(cvMatMul( dE_dY_activ_func_der, &sub_weights, dE_dX ));
1299 cvReshape( X, &Xrow, 0, 1 );
1300 dE_dY_activ_func_der_data = dE_dY_activ_func_der->data.fl;
1301 Xtemplate = cvMat( 1, n_inputs, CV_32FC1, dE_dW->data.fl );
1302 for( i = 0; i < n_outputs; i++, Xtemplate.data.fl += n_inputs + 1 )
1304 CV_CALL(cvConvertScale( &Xrow, &Xtemplate, *dE_dY_activ_func_der_data ));
1305 Xtemplate.data.fl[n_inputs] = *dE_dY_activ_func_der_data++;
1308 // 2) update weights
1312 if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_LOG_INV )
1313 eta = -layer->init_learn_rate/logf(1+(float)t);
1314 else if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_SQRT_INV )
1315 eta = -layer->init_learn_rate/sqrtf((float)t);
1317 eta = -layer->init_learn_rate/(float)t;
1318 cvReshape( dE_dW, &dE_dW_mat, 0, n_outputs );
1319 cvScaleAdd( &dE_dW_mat, cvRealScalar(eta), weights, weights );
1324 cvReleaseMat( &dE_dY_activ_func_der );
1325 cvReleaseMat( &dE_dW );
1328 /****************************************************************************************\
1329 * Layer RELEASE functions *
1330 \****************************************************************************************/
1331 static void icvCNNConvolutionRelease( CvCNNLayer** p_layer )
1333 CV_FUNCNAME("icvCNNConvolutionRelease");
1336 CvCNNConvolutionLayer* layer = 0;
1339 CV_ERROR( CV_StsNullPtr, "Null double pointer" );
1341 layer = *(CvCNNConvolutionLayer**)p_layer;
1345 if( !ICV_IS_CNN_CONVOLUTION_LAYER(layer) )
1346 CV_ERROR( CV_StsBadArg, "Invalid layer" );
1348 cvReleaseMat( &layer->weights );
1349 cvReleaseMat( &layer->connect_mask );
1355 /****************************************************************************************/
1356 static void icvCNNSubSamplingRelease( CvCNNLayer** p_layer )
1358 CV_FUNCNAME("icvCNNSubSamplingRelease");
1361 CvCNNSubSamplingLayer* layer = 0;
1364 CV_ERROR( CV_StsNullPtr, "Null double pointer" );
1366 layer = *(CvCNNSubSamplingLayer**)p_layer;
1370 if( !ICV_IS_CNN_SUBSAMPLING_LAYER(layer) )
1371 CV_ERROR( CV_StsBadArg, "Invalid layer" );
1373 cvReleaseMat( &layer->exp2ssumWX );
1374 cvReleaseMat( &layer->weights );
1380 /****************************************************************************************/
1381 static void icvCNNFullConnectRelease( CvCNNLayer** p_layer )
1383 CV_FUNCNAME("icvCNNFullConnectRelease");
1386 CvCNNFullConnectLayer* layer = 0;
1389 CV_ERROR( CV_StsNullPtr, "Null double pointer" );
1391 layer = *(CvCNNFullConnectLayer**)p_layer;
1395 if( !ICV_IS_CNN_FULLCONNECT_LAYER(layer) )
1396 CV_ERROR( CV_StsBadArg, "Invalid layer" );
1398 cvReleaseMat( &layer->exp2ssumWX );
1399 cvReleaseMat( &layer->weights );
1405 /****************************************************************************************\
1406 * Read/Write CNN classifier *
1407 \****************************************************************************************/
1408 static int icvIsCNNModel( const void* ptr )
1410 return CV_IS_CNN(ptr);
1413 /****************************************************************************************/
1414 static void icvReleaseCNNModel( void** ptr )
1416 CV_FUNCNAME("icvReleaseCNNModel");
1420 CV_ERROR( CV_StsNullPtr, "NULL double pointer" );
1421 CV_ASSERT(CV_IS_CNN(*ptr));
1423 icvCNNModelRelease( (CvStatModel**)ptr );
1428 /****************************************************************************************/
1429 static CvCNNLayer* icvReadCNNLayer( CvFileStorage* fs, CvFileNode* node )
1431 CvCNNLayer* layer = 0;
1433 CvMat* connect_mask = 0;
1435 CV_FUNCNAME("icvReadCNNLayer");
1438 int n_input_planes, input_height, input_width;
1439 int n_output_planes, output_height, output_width;
1440 int learn_type, layer_type;
1441 float init_learn_rate;
1443 CV_CALL(n_input_planes = cvReadIntByName( fs, node, "n_input_planes", -1 ));
1444 CV_CALL(input_height = cvReadIntByName( fs, node, "input_height", -1 ));
1445 CV_CALL(input_width = cvReadIntByName( fs, node, "input_width", -1 ));
1446 CV_CALL(n_output_planes = cvReadIntByName( fs, node, "n_output_planes", -1 ));
1447 CV_CALL(output_height = cvReadIntByName( fs, node, "output_height", -1 ));
1448 CV_CALL(output_width = cvReadIntByName( fs, node, "output_width", -1 ));
1449 CV_CALL(layer_type = cvReadIntByName( fs, node, "layer_type", -1 ));
1451 CV_CALL(init_learn_rate = (float)cvReadRealByName( fs, node, "init_learn_rate", -1 ));
1452 CV_CALL(learn_type = cvReadIntByName( fs, node, "learn_rate_decrease_type", -1 ));
1453 CV_CALL(weights = (CvMat*)cvReadByName( fs, node, "weights" ));
1455 if( n_input_planes < 0 || input_height < 0 || input_width < 0 ||
1456 n_output_planes < 0 || output_height < 0 || output_width < 0 ||
1457 init_learn_rate < 0 || learn_type < 0 || layer_type < 0 || !weights )
1458 CV_ERROR( CV_StsParseError, "" );
1460 if( layer_type == ICV_CNN_CONVOLUTION_LAYER )
1462 const int K = input_height - output_height + 1;
1463 if( K <= 0 || K != input_width - output_width + 1 )
1464 CV_ERROR( CV_StsBadArg, "Invalid <K>" );
1466 CV_CALL(connect_mask = (CvMat*)cvReadByName( fs, node, "connect_mask" ));
1468 CV_ERROR( CV_StsParseError, "Missing <connect mask>" );
1470 CV_CALL(layer = cvCreateCNNConvolutionLayer(
1471 n_input_planes, input_height, input_width, n_output_planes, K,
1472 init_learn_rate, learn_type, connect_mask, weights ));
1474 else if( layer_type == ICV_CNN_SUBSAMPLING_LAYER )
1477 const int sub_samp_scale = input_height/output_height;
1479 if( sub_samp_scale <= 0 || sub_samp_scale != input_width/output_width )
1480 CV_ERROR( CV_StsBadArg, "Invalid <sub_samp_scale>" );
1482 CV_CALL(a = (float)cvReadRealByName( fs, node, "a", -1 ));
1483 CV_CALL(s = (float)cvReadRealByName( fs, node, "s", -1 ));
1484 if( a < 0 || s < 0 )
1485 CV_ERROR( CV_StsParseError, "Missing <a> or <s>" );
1487 CV_CALL(layer = cvCreateCNNSubSamplingLayer(
1488 n_input_planes, input_height, input_width, sub_samp_scale,
1489 a, s, init_learn_rate, learn_type, weights ));
1491 else if( layer_type == ICV_CNN_FULLCONNECT_LAYER )
1494 CV_CALL(a = (float)cvReadRealByName( fs, node, "a", -1 ));
1495 CV_CALL(s = (float)cvReadRealByName( fs, node, "s", -1 ));
1496 if( a < 0 || s < 0 )
1497 CV_ERROR( CV_StsParseError, "" );
1498 if( input_height != 1 || input_width != 1 ||
1499 output_height != 1 || output_width != 1 )
1500 CV_ERROR( CV_StsBadArg, "" );
1502 CV_CALL(layer = cvCreateCNNFullConnectLayer( n_input_planes, n_output_planes,
1503 a, s, init_learn_rate, learn_type, weights ));
1506 CV_ERROR( CV_StsBadArg, "Invalid <layer_type>" );
1510 if( cvGetErrStatus() < 0 && layer )
1511 layer->release( &layer );
1513 cvReleaseMat( &weights );
1514 cvReleaseMat( &connect_mask );
1519 /****************************************************************************************/
1520 static void icvWriteCNNLayer( CvFileStorage* fs, CvCNNLayer* layer )
1522 CV_FUNCNAME ("icvWriteCNNLayer");
1525 if( !ICV_IS_CNN_LAYER(layer) )
1526 CV_ERROR( CV_StsBadArg, "Invalid layer" );
1528 CV_CALL( cvStartWriteStruct( fs, NULL, CV_NODE_MAP, "opencv-ml-cnn-layer" ));
1530 CV_CALL(cvWriteInt( fs, "n_input_planes", layer->n_input_planes ));
1531 CV_CALL(cvWriteInt( fs, "input_height", layer->input_height ));
1532 CV_CALL(cvWriteInt( fs, "input_width", layer->input_width ));
1533 CV_CALL(cvWriteInt( fs, "n_output_planes", layer->n_output_planes ));
1534 CV_CALL(cvWriteInt( fs, "output_height", layer->output_height ));
1535 CV_CALL(cvWriteInt( fs, "output_width", layer->output_width ));
1536 CV_CALL(cvWriteInt( fs, "learn_rate_decrease_type", layer->learn_rate_decrease_type));
1537 CV_CALL(cvWriteReal( fs, "init_learn_rate", layer->init_learn_rate ));
1538 CV_CALL(cvWrite( fs, "weights", layer->weights ));
1540 if( ICV_IS_CNN_CONVOLUTION_LAYER( layer ))
1542 CvCNNConvolutionLayer* l = (CvCNNConvolutionLayer*)layer;
1543 CV_CALL(cvWriteInt( fs, "layer_type", ICV_CNN_CONVOLUTION_LAYER ));
1544 CV_CALL(cvWrite( fs, "connect_mask", l->connect_mask ));
1546 else if( ICV_IS_CNN_SUBSAMPLING_LAYER( layer ) )
1548 CvCNNSubSamplingLayer* l = (CvCNNSubSamplingLayer*)layer;
1549 CV_CALL(cvWriteInt( fs, "layer_type", ICV_CNN_SUBSAMPLING_LAYER ));
1550 CV_CALL(cvWriteReal( fs, "a", l->a ));
1551 CV_CALL(cvWriteReal( fs, "s", l->s ));
1553 else if( ICV_IS_CNN_FULLCONNECT_LAYER( layer ) )
1555 CvCNNFullConnectLayer* l = (CvCNNFullConnectLayer*)layer;
1556 CV_CALL(cvWriteInt( fs, "layer_type", ICV_CNN_FULLCONNECT_LAYER ));
1557 CV_CALL(cvWriteReal( fs, "a", l->a ));
1558 CV_CALL(cvWriteReal( fs, "s", l->s ));
1561 CV_ERROR( CV_StsBadArg, "Invalid layer" );
1563 CV_CALL( cvEndWriteStruct( fs )); //"opencv-ml-cnn-layer"
1568 /****************************************************************************************/
1569 static void* icvReadCNNModel( CvFileStorage* fs, CvFileNode* root_node )
1571 CvCNNStatModel* cnn = 0;
1572 CvCNNLayer* layer = 0;
1574 CV_FUNCNAME("icvReadCNNModel");
1582 CV_CALL(cnn = (CvCNNStatModel*)cvCreateStatModel(
1583 CV_STAT_MODEL_MAGIC_VAL|CV_CNN_MAGIC_VAL, sizeof(CvCNNStatModel),
1584 icvCNNModelRelease, icvCNNModelPredict, icvCNNModelUpdate ));
1586 CV_CALL(cnn->etalons = (CvMat*)cvReadByName( fs, root_node, "etalons" ));
1587 CV_CALL(cnn->cls_labels = (CvMat*)cvReadByName( fs, root_node, "cls_labels" ));
1589 if( !cnn->etalons || !cnn->cls_labels )
1590 CV_ERROR( CV_StsParseError, "No <etalons> or <cls_labels> in CNN model" );
1592 CV_CALL( node = cvGetFileNodeByName( fs, root_node, "network" ));
1593 seq = node->data.seq;
1594 if( !CV_NODE_IS_SEQ(node->tag) )
1595 CV_ERROR( CV_StsBadArg, "" );
1597 CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
1598 CV_CALL(layer = icvReadCNNLayer( fs, (CvFileNode*)reader.ptr ));
1599 CV_CALL(cnn->network = cvCreateCNNetwork( layer ));
1601 for( i = 1; i < seq->total; i++ )
1603 CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
1604 CV_CALL(layer = icvReadCNNLayer( fs, (CvFileNode*)reader.ptr ));
1605 CV_CALL(cnn->network->add_layer( cnn->network, layer ));
1610 if( cvGetErrStatus() < 0 )
1612 if( cnn ) cnn->release( (CvStatModel**)&cnn );
1613 if( layer ) layer->release( &layer );
1618 /****************************************************************************************/
1620 icvWriteCNNModel( CvFileStorage* fs, const char* name,
1621 const void* struct_ptr, CvAttrList )
1624 CV_FUNCNAME ("icvWriteCNNModel");
1627 CvCNNStatModel* cnn = (CvCNNStatModel*)struct_ptr;
1631 if( !CV_IS_CNN(cnn) )
1632 CV_ERROR( CV_StsBadArg, "Invalid pointer" );
1634 n_layers = cnn->network->n_layers;
1636 CV_CALL( cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_CNN ));
1638 CV_CALL(cvWrite( fs, "etalons", cnn->etalons ));
1639 CV_CALL(cvWrite( fs, "cls_labels", cnn->cls_labels ));
1641 CV_CALL( cvStartWriteStruct( fs, "network", CV_NODE_SEQ ));
1643 layer = cnn->network->layers;
1644 for( i = 0; i < n_layers && layer; i++, layer = layer->next_layer )
1645 CV_CALL(icvWriteCNNLayer( fs, layer ));
1646 if( i < n_layers || layer )
1647 CV_ERROR( CV_StsBadArg, "Invalid network" );
1649 CV_CALL( cvEndWriteStruct( fs )); //"network"
1650 CV_CALL( cvEndWriteStruct( fs )); //"opencv-ml-cnn"
1655 static int icvRegisterCNNStatModelType()
1659 info.header_size = sizeof( info );
1660 info.is_instance = icvIsCNNModel;
1661 info.release = icvReleaseCNNModel;
1662 info.read = icvReadCNNModel;
1663 info.write = icvWriteCNNModel;
1665 info.type_name = CV_TYPE_NAME_ML_CNN;
1666 cvRegisterType( &info );
1669 } // End of icvRegisterCNNStatModelType
1671 static int cnn = icvRegisterCNNStatModelType();