Move the sources to trunk
[opencv] / ml / src / _ml.h
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                        Intel License Agreement
11 //
12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
13 // Third party copyrights are property of their respective owners.
14 //
15 // Redistribution and use in source and binary forms, with or without modification,
16 // are permitted provided that the following conditions are met:
17 //
18 //   * Redistribution's of source code must retain the above copyright notice,
19 //     this list of conditions and the following disclaimer.
20 //
21 //   * Redistribution's in binary form must reproduce the above copyright notice,
22 //     this list of conditions and the following disclaimer in the documentation
23 //     and/or other materials provided with the distribution.
24 //
25 //   * The name of Intel Corporation may not be used to endorse or promote products
26 //     derived from this software without specific prior written permission.
27 //
28 // This software is provided by the copyright holders and contributors "as is" and
29 // any express or implied warranties, including, but not limited to, the implied
30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
31 // In no event shall the Intel Corporation or contributors be liable for any direct,
32 // indirect, incidental, special, exemplary, or consequential damages
33 // (including, but not limited to, procurement of substitute goods or services;
34 // loss of use, data, or profits; or business interruption) however caused
35 // and on any theory of liability, whether in contract, strict liability,
36 // or tort (including negligence or otherwise) arising in any way out of
37 // the use of this software, even if advised of the possibility of such damage.
38 //
39 //M*/
40
41 #ifndef __ML_INTERNAL_H__
42 #define __ML_INTERNAL_H__
43
44 #if _MSC_VER >= 1200
45 #pragma warning( disable: 4514 4710 4711 4710 )
46 #endif
47
48 #include "ml.h"
49 #include "cxmisc.h"
50
51 #include <assert.h>
52 #include <float.h>
53 #include <limits.h>
54 #include <math.h>
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <time.h>
59
60 #ifndef FALSE
61 #define FALSE 0
62 #endif
63 #ifndef TRUE
64 #define TRUE 1
65 #endif
66
67 #define ML_IMPL CV_IMPL
68
69 #define CV_MAT_ELEM_FLAG( mat, type, comp, vect, tflag )    \
70     (( tflag == CV_ROW_SAMPLE )                             \
71     ? (CV_MAT_ELEM( mat, type, comp, vect ))                \
72     : (CV_MAT_ELEM( mat, type, vect, comp )))
73
74 /* Convert matrix to vector */
75 #define ICV_MAT2VEC( mat, vdata, vstep, num )      \
76     if( MIN( (mat).rows, (mat).cols ) != 1 )       \
77         CV_ERROR( CV_StsBadArg, "" );              \
78     (vdata) = ((mat).data.ptr);                    \
79     if( (mat).rows == 1 )                          \
80     {                                              \
81         (vstep) = CV_ELEM_SIZE( (mat).type );      \
82         (num) = (mat).cols;                        \
83     }                                              \
84     else                                           \
85     {                                              \
86         (vstep) = (mat).step;                      \
87         (num) = (mat).rows;                        \
88     }
89
90 /* get raw data */
91 #define ICV_RAWDATA( mat, flags, rdata, sstep, cstep, m, n )         \
92     (rdata) = (mat).data.ptr;                                        \
93     if( CV_IS_ROW_SAMPLE( flags ) )                                  \
94     {                                                                \
95         (sstep) = (mat).step;                                        \
96         (cstep) = CV_ELEM_SIZE( (mat).type );                        \
97         (m) = (mat).rows;                                            \
98         (n) = (mat).cols;                                            \
99     }                                                                \
100     else                                                             \
101     {                                                                \
102         (cstep) = (mat).step;                                        \
103         (sstep) = CV_ELEM_SIZE( (mat).type );                        \
104         (n) = (mat).rows;                                            \
105         (m) = (mat).cols;                                            \
106     }
107
108 #define ICV_IS_MAT_OF_TYPE( mat, mat_type) \
109     (CV_IS_MAT( mat ) && CV_MAT_TYPE( mat->type ) == (mat_type) &&   \
110     (mat)->cols > 0 && (mat)->rows > 0)
111
112 /*
113     uchar* data; int sstep, cstep;      - trainData->data
114     uchar* classes; int clstep; int ncl;- trainClasses
115     uchar* tmask; int tmstep; int ntm;  - typeMask
116     uchar* missed;int msstep, mcstep;   -missedMeasurements...
117     int mm, mn;                         == m,n == size,dim
118     uchar* sidx;int sistep;             - sampleIdx
119     uchar* cidx;int cistep;             - compIdx
120     int k, l;                           == n,m == dim,size (length of cidx, sidx)
121     int m, n;                           == size,dim
122 */
123 #define ICV_DECLARE_TRAIN_ARGS()                                                    \
124     uchar* data;                                                                    \
125     int sstep, cstep;                                                               \
126     uchar* classes;                                                                 \
127     int clstep;                                                                     \
128     int ncl;                                                                        \
129     uchar* tmask;                                                                   \
130     int tmstep;                                                                     \
131     int ntm;                                                                        \
132     uchar* missed;                                                                  \
133     int msstep, mcstep;                                                             \
134     int mm, mn;                                                                     \
135     uchar* sidx;                                                                    \
136     int sistep;                                                                     \
137     uchar* cidx;                                                                    \
138     int cistep;                                                                     \
139     int k, l;                                                                       \
140     int m, n;                                                                       \
141                                                                                     \
142     data = classes = tmask = missed = sidx = cidx = NULL;                           \
143     sstep = cstep = clstep = ncl = tmstep = ntm = msstep = mcstep = mm = mn = 0;    \
144     sistep = cistep = k = l = m = n = 0;
145
146 #define ICV_TRAIN_DATA_REQUIRED( param, flags )                                     \
147     if( !ICV_IS_MAT_OF_TYPE( (param), CV_32FC1 ) )                                  \
148     {                                                                               \
149         CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" );                   \
150     }                                                                               \
151     else                                                                            \
152     {                                                                               \
153         ICV_RAWDATA( *(param), (flags), data, sstep, cstep, m, n );                 \
154         k = n;                                                                      \
155         l = m;                                                                      \
156     }
157
158 #define ICV_TRAIN_CLASSES_REQUIRED( param )                                         \
159     if( !ICV_IS_MAT_OF_TYPE( (param), CV_32FC1 ) )                                  \
160     {                                                                               \
161         CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" );                   \
162     }                                                                               \
163     else                                                                            \
164     {                                                                               \
165         ICV_MAT2VEC( *(param), classes, clstep, ncl );                              \
166         if( m != ncl )                                                              \
167         {                                                                           \
168             CV_ERROR( CV_StsBadArg, "Unmatched sizes" );                            \
169         }                                                                           \
170     }
171
172 #define ICV_ARG_NULL( param )                                                       \
173     if( (param) != NULL )                                                           \
174     {                                                                               \
175         CV_ERROR( CV_StsBadArg, #param " parameter must be NULL" );                 \
176     }
177
178 #define ICV_MISSED_MEASUREMENTS_OPTIONAL( param, flags )                            \
179     if( param )                                                                     \
180     {                                                                               \
181         if( !ICV_IS_MAT_OF_TYPE( param, CV_8UC1 ) )                                 \
182         {                                                                           \
183             CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" );               \
184         }                                                                           \
185         else                                                                        \
186         {                                                                           \
187             ICV_RAWDATA( *(param), (flags), missed, msstep, mcstep, mm, mn );       \
188             if( mm != m || mn != n )                                                \
189             {                                                                       \
190                 CV_ERROR( CV_StsBadArg, "Unmatched sizes" );                        \
191             }                                                                       \
192         }                                                                           \
193     }
194
195 #define ICV_COMP_IDX_OPTIONAL( param )                                              \
196     if( param )                                                                     \
197     {                                                                               \
198         if( !ICV_IS_MAT_OF_TYPE( param, CV_32SC1 ) )                                \
199         {                                                                           \
200             CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" );               \
201         }                                                                           \
202         else                                                                        \
203         {                                                                           \
204             ICV_MAT2VEC( *(param), cidx, cistep, k );                               \
205             if( k > n )                                                             \
206                 CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" );           \
207         }                                                                           \
208     }
209
210 #define ICV_SAMPLE_IDX_OPTIONAL( param )                                            \
211     if( param )                                                                     \
212     {                                                                               \
213         if( !ICV_IS_MAT_OF_TYPE( param, CV_32SC1 ) )                                \
214         {                                                                           \
215             CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" );               \
216         }                                                                           \
217         else                                                                        \
218         {                                                                           \
219             ICV_MAT2VEC( *sampleIdx, sidx, sistep, l );                             \
220             if( l > m )                                                             \
221                 CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" );           \
222         }                                                                           \
223     }
224
225 /****************************************************************************************/
226 #define ICV_CONVERT_FLOAT_ARRAY_TO_MATRICE( array, matrice )        \
227 {                                                                   \
228     CvMat a, b;                                                     \
229     int dims = (matrice)->cols;                                     \
230     int nsamples = (matrice)->rows;                                 \
231     int type = CV_MAT_TYPE((matrice)->type);                        \
232     int i, offset = dims;                                           \
233                                                                     \
234     CV_ASSERT( type == CV_32FC1 || type == CV_64FC1 );              \
235     offset *= ((type == CV_32FC1) ? sizeof(float) : sizeof(double));\
236                                                                     \
237     b = cvMat( 1, dims, CV_32FC1 );                                 \
238     cvGetRow( matrice, &a, 0 );                                     \
239     for( i = 0; i < nsamples; i++, a.data.ptr += offset )           \
240     {                                                               \
241         b.data.fl = (float*)array[i];                               \
242         CV_CALL( cvConvert( &b, &a ) );                             \
243     }                                                               \
244 }
245
246 /****************************************************************************************\
247 *                       Auxiliary functions declarations                                 *
248 \****************************************************************************************/
249
250 /* Generates a set of classes centers in quantity <num_of_clusters> that are generated as
251    uniform random vectors in parallelepiped, where <data> is concentrated. Vectors in
252    <data> should have horizontal orientation. If <centers> != NULL, the function doesn't
253    allocate any memory and stores generated centers in <centers>, returns <centers>.
254    If <centers> == NULL, the function allocates memory and creates the matrice. Centers
255    are supposed to be oriented horizontally. */
256 CvMat* icvGenerateRandomClusterCenters( int seed,
257                                         const CvMat* data,
258                                         int num_of_clusters,
259                                         CvMat* centers CV_DEFAULT(0));
260
261 /* Fills the <labels> using <probs> by choosing the maximal probability. Outliers are
262    fixed by <oulier_tresh> and have cluster label (-1). Function also controls that there
263    weren't "empty" clusters by filling empty clusters with the maximal probability vector.
264    If probs_sums != NULL, filles it with the sums of probabilities for each sample (it is
265    useful for normalizing probabilities' matrice of FCM) */
266 void icvFindClusterLabels( const CvMat* probs, float outlier_thresh, float r,
267                            const CvMat* labels );
268
269 typedef struct CvSparseVecElem32f
270 {
271     int idx;
272     float val;
273 }
274 CvSparseVecElem32f;
275
276 /* Prepare training data and related parameters */
277 #define CV_TRAIN_STATMODEL_DEFRAGMENT_TRAIN_DATA    1
278 #define CV_TRAIN_STATMODEL_SAMPLES_AS_ROWS          2
279 #define CV_TRAIN_STATMODEL_SAMPLES_AS_COLUMNS       4
280 #define CV_TRAIN_STATMODEL_CATEGORICAL_RESPONSE     8
281 #define CV_TRAIN_STATMODEL_ORDERED_RESPONSE         16
282 #define CV_TRAIN_STATMODEL_RESPONSES_ON_OUTPUT      32
283 #define CV_TRAIN_STATMODEL_ALWAYS_COPY_TRAIN_DATA   64
284 #define CV_TRAIN_STATMODEL_SPARSE_AS_SPARSE         128
285
286 int
287 cvPrepareTrainData( const char* /*funcname*/,
288                     const CvMat* train_data, int tflag,
289                     const CvMat* responses, int response_type,
290                     const CvMat* var_idx,
291                     const CvMat* sample_idx,
292                     bool always_copy_data,
293                     const float*** out_train_samples,
294                     int* _sample_count,
295                     int* _var_count,
296                     int* _var_all,
297                     CvMat** out_responses,
298                     CvMat** out_response_map,
299                     CvMat** out_var_idx,
300                     CvMat** out_sample_idx=0 );
301
302 void
303 cvSortSamplesByClasses( const float** samples, const CvMat* classes, 
304                         int* class_ranges, const uchar** mask CV_DEFAULT(0) );
305
306 void 
307 cvCombineResponseMaps (CvMat*  _responses,
308                  const CvMat*  old_response_map,
309                        CvMat*  new_response_map,
310                        CvMat** out_response_map);
311
312 void
313 cvPreparePredictData( const CvArr* sample, int dims_all, const CvMat* comp_idx,
314                       int class_count, const CvMat* prob, float** row_sample,
315                       int as_sparse CV_DEFAULT(0) );
316
317 /* copies clustering [or batch "predict"] results
318    (labels and/or centers and/or probs) back to the output arrays */
319 void
320 cvWritebackLabels( const CvMat* labels, CvMat* dst_labels,
321                    const CvMat* centers, CvMat* dst_centers,
322                    const CvMat* probs, CvMat* dst_probs,
323                    const CvMat* sample_idx, int samples_all,
324                    const CvMat* comp_idx, int dims_all );
325 #define cvWritebackResponses cvWritebackLabels
326
327 #define XML_FIELD_NAME "_name"
328 CvFileNode* icvFileNodeGetChild(CvFileNode* father, const char* name);
329 CvFileNode* icvFileNodeGetChildArrayElem(CvFileNode* father, const char* name,int index);
330 CvFileNode* icvFileNodeGetNext(CvFileNode* n, const char* name);
331
332
333 void cvCheckTrainData( const CvMat* train_data, int tflag,
334                        const CvMat* missing_mask, 
335                        int* var_all, int* sample_all );
336
337 CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false );
338
339 CvMat* cvPreprocessVarType( const CvMat* type_mask, const CvMat* var_idx,
340                             int var_all, int* response_type );
341
342 CvMat* cvPreprocessOrderedResponses( const CvMat* responses,
343                 const CvMat* sample_idx, int sample_all );
344
345 CvMat* cvPreprocessCategoricalResponses( const CvMat* responses,
346                 const CvMat* sample_idx, int sample_all,
347                 CvMat** out_response_map, CvMat** class_counts=0 );
348
349 const float** cvGetTrainSamples( const CvMat* train_data, int tflag,
350                    const CvMat* var_idx, const CvMat* sample_idx,
351                    int* _var_count, int* _sample_count,
352                    bool always_copy_data=false );
353
354 #endif /* __ML_H__ */