+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// Intel License Agreement
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "_ml.h"
-
-CvNormalBayesClassifier::CvNormalBayesClassifier()
-{
- var_count = var_all = 0;
- var_idx = 0;
- cls_labels = 0;
- count = 0;
- sum = 0;
- productsum = 0;
- avg = 0;
- inv_eigen_values = 0;
- cov_rotate_mats = 0;
- c = 0;
- default_model_name = "my_nb";
-}
-
-
-void CvNormalBayesClassifier::clear()
-{
- if( cls_labels )
- {
- for( int cls = 0; cls < cls_labels->cols; cls++ )
- {
- cvReleaseMat( &count[cls] );
- cvReleaseMat( &sum[cls] );
- cvReleaseMat( &productsum[cls] );
- cvReleaseMat( &avg[cls] );
- cvReleaseMat( &inv_eigen_values[cls] );
- cvReleaseMat( &cov_rotate_mats[cls] );
- }
- }
-
- cvReleaseMat( &cls_labels );
- cvReleaseMat( &var_idx );
- cvReleaseMat( &c );
- cvFree( &count );
-}
-
-
-CvNormalBayesClassifier::~CvNormalBayesClassifier()
-{
- clear();
-}
-
-
-CvNormalBayesClassifier::CvNormalBayesClassifier(
- const CvMat* _train_data, const CvMat* _responses,
- const CvMat* _var_idx, const CvMat* _sample_idx )
-{
- var_count = var_all = 0;
- var_idx = 0;
- cls_labels = 0;
- count = 0;
- sum = 0;
- productsum = 0;
- avg = 0;
- inv_eigen_values = 0;
- cov_rotate_mats = 0;
- c = 0;
- default_model_name = "my_nb";
-
- train( _train_data, _responses, _var_idx, _sample_idx );
-}
-
-
-bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _responses,
- const CvMat* _var_idx, const CvMat* _sample_idx, bool update )
-{
- const float min_variation = FLT_EPSILON;
- bool result = false;
- CvMat* responses = 0;
- const float** train_data = 0;
- CvMat* __cls_labels = 0;
- CvMat* __var_idx = 0;
- CvMat* cov = 0;
-
- CV_FUNCNAME( "CvNormalBayesClassifier::train" );
-
- __BEGIN__;
-
- int cls, nsamples = 0, _var_count = 0, _var_all = 0, nclasses = 0;
- int s, c1, c2;
- const int* responses_data;
-
- CV_CALL( cvPrepareTrainData( 0,
- _train_data, CV_ROW_SAMPLE, _responses, CV_VAR_CATEGORICAL,
- _var_idx, _sample_idx, false, &train_data,
- &nsamples, &_var_count, &_var_all, &responses,
- &__cls_labels, &__var_idx ));
-
- if( !update )
- {
- const size_t mat_size = sizeof(CvMat*);
- size_t data_size;
-
- clear();
-
- var_idx = __var_idx;
- cls_labels = __cls_labels;
- __var_idx = __cls_labels = 0;
- var_count = _var_count;
- var_all = _var_all;
-
- nclasses = cls_labels->cols;
- data_size = nclasses*6*mat_size;
-
- CV_CALL( count = (CvMat**)cvAlloc( data_size ));
- memset( count, 0, data_size );
-
- sum = count + nclasses;
- productsum = sum + nclasses;
- avg = productsum + nclasses;
- inv_eigen_values= avg + nclasses;
- cov_rotate_mats = inv_eigen_values + nclasses;
-
- CV_CALL( c = cvCreateMat( 1, nclasses, CV_64FC1 ));
-
- for( cls = 0; cls < nclasses; cls++ )
- {
- CV_CALL(count[cls] = cvCreateMat( 1, var_count, CV_32SC1 ));
- CV_CALL(sum[cls] = cvCreateMat( 1, var_count, CV_64FC1 ));
- CV_CALL(productsum[cls] = cvCreateMat( var_count, var_count, CV_64FC1 ));
- CV_CALL(avg[cls] = cvCreateMat( 1, var_count, CV_64FC1 ));
- CV_CALL(inv_eigen_values[cls] = cvCreateMat( 1, var_count, CV_64FC1 ));
- CV_CALL(cov_rotate_mats[cls] = cvCreateMat( var_count, var_count, CV_64FC1 ));
- CV_CALL(cvZero( count[cls] ));
- CV_CALL(cvZero( sum[cls] ));
- CV_CALL(cvZero( productsum[cls] ));
- CV_CALL(cvZero( avg[cls] ));
- CV_CALL(cvZero( inv_eigen_values[cls] ));
- CV_CALL(cvZero( cov_rotate_mats[cls] ));
- }
- }
- else
- {
- // check that the new training data has the same dimensionality etc.
- if( _var_count != var_count || _var_all != var_all || !(!_var_idx && !var_idx ||
- _var_idx && var_idx && cvNorm(_var_idx,var_idx,CV_C) < DBL_EPSILON) )
- CV_ERROR( CV_StsBadArg,
- "The new training data is inconsistent with the original training data" );
-
- if( cls_labels->cols != __cls_labels->cols ||
- cvNorm(cls_labels, __cls_labels, CV_C) > DBL_EPSILON )
- CV_ERROR( CV_StsNotImplemented,
- "In the current implementation the new training data must have absolutely "
- "the same set of class labels as used in the original training data" );
-
- nclasses = cls_labels->cols;
- }
-
- responses_data = responses->data.i;
- CV_CALL( cov = cvCreateMat( _var_count, _var_count, CV_64FC1 ));
-
- /* process train data (count, sum , productsum) */
- for( s = 0; s < nsamples; s++ )
- {
- cls = responses_data[s];
- int* count_data = count[cls]->data.i;
- double* sum_data = sum[cls]->data.db;
- double* prod_data = productsum[cls]->data.db;
- const float* train_vec = train_data[s];
-
- for( c1 = 0; c1 < _var_count; c1++, prod_data += _var_count )
- {
- double val1 = train_vec[c1];
- sum_data[c1] += val1;
- count_data[c1]++;
- for( c2 = c1; c2 < _var_count; c2++ )
- prod_data[c2] += train_vec[c2]*val1;
- }
- }
-
- /* calculate avg, covariance matrix, c */
- for( cls = 0; cls < nclasses; cls++ )
- {
- double det = 1;
- int i, j;
- CvMat* w = inv_eigen_values[cls];
- int* count_data = count[cls]->data.i;
- double* avg_data = avg[cls]->data.db;
- double* sum1 = sum[cls]->data.db;
-
- cvCompleteSymm( productsum[cls], 0 );
-
- for( j = 0; j < _var_count; j++ )
- {
- int n = count_data[j];
- avg_data[j] = n ? sum1[j] / n : 0.;
- }
-
- count_data = count[cls]->data.i;
- avg_data = avg[cls]->data.db;
- sum1 = sum[cls]->data.db;
-
- for( i = 0; i < _var_count; i++ )
- {
- double* avg2_data = avg[cls]->data.db;
- double* sum2 = sum[cls]->data.db;
- double* prod_data = productsum[cls]->data.db + i*_var_count;
- double* cov_data = cov->data.db + i*_var_count;
- double s1val = sum1[j];
- double avg1 = avg_data[i];
- int count = count_data[i];
-
- for( j = 0; j <= i; j++ )
- {
- double avg2 = avg2_data[j];
- double cov_val = prod_data[j] - avg1 * sum2[j] - avg2 * s1val + avg1 * avg2 * count;
- cov_val = (count > 1) ? cov_val / (count - 1) : cov_val;
- cov_data[j] = cov_val;
- }
- }
-
- CV_CALL( cvCompleteSymm( cov, 1 ));
- CV_CALL( cvSVD( cov, w, cov_rotate_mats[cls], 0, CV_SVD_U_T ));
- CV_CALL( cvMaxS( w, min_variation, w ));
- for( j = 0; j < _var_count; j++ )
- det *= w->data.db[j];
-
- CV_CALL( cvDiv( NULL, w, w ));
- c->data.db[cls] = log( det );
- }
-
- result = true;
-
- __END__;
-
- if( !result || cvGetErrStatus() < 0 )
- clear();
-
- cvReleaseMat( &cov );
- cvReleaseMat( &__cls_labels );
- cvReleaseMat( &__var_idx );
- cvFree( &train_data );
-
- return result;
-}
-
-
-float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) const
-{
- float value = 0;
- void* buffer = 0;
- int allocated_buffer = 0;
-
- CV_FUNCNAME( "CvNormalBayesClassifier::predict" );
-
- __BEGIN__;
-
- int i, j, k, cls = -1, _var_count, nclasses;
- double opt = FLT_MAX;
- CvMat diff;
- int rtype = 0, rstep = 0, size;
- const int* vidx = 0;
-
- nclasses = cls_labels->cols;
- _var_count = avg[0]->cols;
-
- if( !CV_IS_MAT(samples) || CV_MAT_TYPE(samples->type) != CV_32FC1 || samples->cols != var_all )
- CV_ERROR( CV_StsBadArg,
- "The input samples must be 32f matrix with the number of columns = var_all" );
-
- if( samples->rows > 1 && !results )
- CV_ERROR( CV_StsNullPtr,
- "When the number of input samples is >1, the output vector of results must be passed" );
-
- if( results )
- {
- if( !CV_IS_MAT(results) || CV_MAT_TYPE(results->type) != CV_32FC1 &&
- CV_MAT_TYPE(results->type) != CV_32SC1 ||
- results->cols != 1 && results->rows != 1 ||
- results->cols + results->rows - 1 != samples->rows )
- CV_ERROR( CV_StsBadArg, "The output array must be integer or floating-point vector "
- "with the number of elements = number of rows in the input matrix" );
-
- rtype = CV_MAT_TYPE(results->type);
- rstep = CV_IS_MAT_CONT(results->type) ? 1 : results->step/CV_ELEM_SIZE(rtype);
- }
-
- if( var_idx )
- vidx = var_idx->data.i;
-
-// allocate memory and initializing headers for calculating
- size = sizeof(double) * (nclasses + var_count);
- if( size <= CV_MAX_LOCAL_SIZE )
- buffer = cvStackAlloc( size );
- else
- {
- CV_CALL( buffer = cvAlloc( size ));
- allocated_buffer = 1;
- }
-
- diff = cvMat( 1, var_count, CV_64FC1, buffer );
-
- for( k = 0; k < samples->rows; k++ )
- {
- int ival;
-
- for( i = 0; i < nclasses; i++ )
- {
- double cur = c->data.db[i];
- CvMat* u = cov_rotate_mats[i];
- CvMat* w = inv_eigen_values[i];
- const double* avg_data = avg[i]->data.db;
- const float* x = (const float*)(samples->data.ptr + samples->step*k);
-
- // cov = u w u' --> cov^(-1) = u w^(-1) u'
- for( j = 0; j < _var_count; j++ )
- diff.data.db[j] = avg_data[j] - x[vidx ? vidx[j] : j];
-
- CV_CALL(cvGEMM( &diff, u, 1, 0, 0, &diff, CV_GEMM_B_T ));
- for( j = 0; j < _var_count; j++ )
- {
- double d = diff.data.db[j];
- cur += d*d*w->data.db[j];
- }
-
- if( cur < opt )
- {
- cls = i;
- opt = cur;
- }
- /* probability = exp( -0.5 * cur ) */
- }
-
- ival = cls_labels->data.i[cls];
- if( results )
- {
- if( rtype == CV_32SC1 )
- results->data.i[k*rstep] = ival;
- else
- results->data.fl[k*rstep] = (float)ival;
- }
- if( k == 0 )
- value = (float)ival;
-
- /*if( _probs )
- {
- CV_CALL( cvConvertScale( &expo, &expo, -0.5 ));
- CV_CALL( cvExp( &expo, &expo ));
- if( _probs->cols == 1 )
- CV_CALL( cvReshape( &expo, &expo, 1, nclasses ));
- CV_CALL( cvConvertScale( &expo, _probs, 1./cvSum( &expo ).val[0] ));
- }*/
- }
-
- __END__;
-
- if( allocated_buffer )
- cvFree( &buffer );
-
- return value;
-}
-
-
-void CvNormalBayesClassifier::write( CvFileStorage* fs, const char* name )
-{
- CV_FUNCNAME( "CvNormalBayesClassifier::write" );
-
- __BEGIN__;
-
- int nclasses, i;
-
- nclasses = cls_labels->cols;
-
- cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_NBAYES );
-
- CV_CALL( cvWriteInt( fs, "var_count", var_count ));
- CV_CALL( cvWriteInt( fs, "var_all", var_all ));
-
- if( var_idx )
- CV_CALL( cvWrite( fs, "var_idx", var_idx ));
- CV_CALL( cvWrite( fs, "cls_labels", cls_labels ));
-
- CV_CALL( cvStartWriteStruct( fs, "count", CV_NODE_SEQ ));
- for( i = 0; i < nclasses; i++ )
- CV_CALL( cvWrite( fs, NULL, count[i] ));
- CV_CALL( cvEndWriteStruct( fs ));
-
- CV_CALL( cvStartWriteStruct( fs, "sum", CV_NODE_SEQ ));
- for( i = 0; i < nclasses; i++ )
- CV_CALL( cvWrite( fs, NULL, sum[i] ));
- CV_CALL( cvEndWriteStruct( fs ));
-
- CV_CALL( cvStartWriteStruct( fs, "productsum", CV_NODE_SEQ ));
- for( i = 0; i < nclasses; i++ )
- CV_CALL( cvWrite( fs, NULL, productsum[i] ));
- CV_CALL( cvEndWriteStruct( fs ));
-
- CV_CALL( cvStartWriteStruct( fs, "avg", CV_NODE_SEQ ));
- for( i = 0; i < nclasses; i++ )
- CV_CALL( cvWrite( fs, NULL, avg[i] ));
- CV_CALL( cvEndWriteStruct( fs ));
-
- CV_CALL( cvStartWriteStruct( fs, "inv_eigen_values", CV_NODE_SEQ ));
- for( i = 0; i < nclasses; i++ )
- CV_CALL( cvWrite( fs, NULL, inv_eigen_values[i] ));
- CV_CALL( cvEndWriteStruct( fs ));
-
- CV_CALL( cvStartWriteStruct( fs, "cov_rotate_mats", CV_NODE_SEQ ));
- for( i = 0; i < nclasses; i++ )
- CV_CALL( cvWrite( fs, NULL, cov_rotate_mats[i] ));
- CV_CALL( cvEndWriteStruct( fs ));
-
- CV_CALL( cvWrite( fs, "c", c ));
-
- cvEndWriteStruct( fs );
-
- __END__;
-}
-
-
-void CvNormalBayesClassifier::read( CvFileStorage* fs, CvFileNode* root_node )
-{
- bool ok = false;
- CV_FUNCNAME( "CvNormalBayesClassifier::read" );
-
- __BEGIN__;
-
- int nclasses, i;
- size_t data_size;
- CvFileNode* node;
- CvSeq* seq;
- CvSeqReader reader;
-
- clear();
-
- CV_CALL( var_count = cvReadIntByName( fs, root_node, "var_count", -1 ));
- CV_CALL( var_all = cvReadIntByName( fs, root_node, "var_all", -1 ));
- CV_CALL( var_idx = (CvMat*)cvReadByName( fs, root_node, "var_idx" ));
- CV_CALL( cls_labels = (CvMat*)cvReadByName( fs, root_node, "cls_labels" ));
- if( !cls_labels )
- CV_ERROR( CV_StsParseError, "No \"cls_labels\" in NBayes classifier" );
- if( cls_labels->cols < 1 )
- CV_ERROR( CV_StsBadArg, "Number of classes is less 1" );
- if( var_count <= 0 )
- CV_ERROR( CV_StsParseError,
- "The field \"var_count\" of NBayes classifier is missing" );
- nclasses = cls_labels->cols;
-
- data_size = nclasses*6*sizeof(CvMat*);
- CV_CALL( count = (CvMat**)cvAlloc( data_size ));
- memset( count, 0, data_size );
-
- sum = count + nclasses;
- productsum = sum + nclasses;
- avg = productsum + nclasses;
- inv_eigen_values = avg + nclasses;
- cov_rotate_mats = inv_eigen_values + nclasses;
-
- CV_CALL( node = cvGetFileNodeByName( fs, root_node, "count" ));
- seq = node->data.seq;
- if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
- CV_ERROR( CV_StsBadArg, "" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( i = 0; i < nclasses; i++ )
- {
- CV_CALL( count[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
- }
-
- CV_CALL( node = cvGetFileNodeByName( fs, root_node, "sum" ));
- seq = node->data.seq;
- if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
- CV_ERROR( CV_StsBadArg, "" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( i = 0; i < nclasses; i++ )
- {
- CV_CALL( sum[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
- }
-
- CV_CALL( node = cvGetFileNodeByName( fs, root_node, "productsum" ));
- seq = node->data.seq;
- if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
- CV_ERROR( CV_StsBadArg, "" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( i = 0; i < nclasses; i++ )
- {
- CV_CALL( productsum[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
- }
-
- CV_CALL( node = cvGetFileNodeByName( fs, root_node, "avg" ));
- seq = node->data.seq;
- if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
- CV_ERROR( CV_StsBadArg, "" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( i = 0; i < nclasses; i++ )
- {
- CV_CALL( avg[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
- }
-
- CV_CALL( node = cvGetFileNodeByName( fs, root_node, "inv_eigen_values" ));
- seq = node->data.seq;
- if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
- CV_ERROR( CV_StsBadArg, "" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( i = 0; i < nclasses; i++ )
- {
- CV_CALL( inv_eigen_values[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
- }
-
- CV_CALL( node = cvGetFileNodeByName( fs, root_node, "cov_rotate_mats" ));
- seq = node->data.seq;
- if( !CV_NODE_IS_SEQ(node->tag) || seq->total != nclasses)
- CV_ERROR( CV_StsBadArg, "" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( i = 0; i < nclasses; i++ )
- {
- CV_CALL( cov_rotate_mats[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
- }
-
- CV_CALL( c = (CvMat*)cvReadByName( fs, root_node, "c" ));
-
- ok = true;
-
- __END__;
-
- if( !ok )
- clear();
-}
-
-/* End of file. */
-