Initial check-in
[him-cellwriter] / src / preprocess.c
1
2 /*
3
4 cellwriter -- a character recognition input method
5 Copyright (C) 2007 Michael Levin <risujin@risujin.org>
6
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License
9 as published by the Free Software Foundation; either version 2
10 of the License, or (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20
21 */
22
23 #include "config.h"
24 #include "common.h"
25 #include "recognize.h"
26 #include <string.h>
27
28 /*
29         Preprocessing engine
30 */
31
32 /* Maximum and variable versions of the number of samples to prepare for
33    thorough examination */
34 #define PREP_MAX     (SAMPLES_MAX * 4)
35 #define PREP_SAMPLES (samples_max * 4)
36
37 /* Greedy mapping */
38 #define VALUE_MAX 2048.f
39 #define VALUE_MIN 1024.f
40
41 /* Penalties (proportion of final score deducted) */
42 #define VERTICAL_PENALTY 16.00f
43 #define GLUABLE_PENALTY   0.08f
44 #define GLUE_PENALTY      0.02f
45
46 int ignore_stroke_dir = TRUE, ignore_stroke_num = TRUE, prep_examined;
47
48 static float measure_partial(Stroke *as, Stroke *b, Vec2 *offset, float scale_b)
49 {
50         Stroke *bs;
51         float value;
52         int b_len, min_len;
53
54         b_len = b->distance * scale_b / ROUGH_RESOLUTION + 0.5;
55         if (b_len < 4)
56                 b_len = 4;
57         min_len = as->len >= b_len ? b_len : as->len;
58         bs = sample_stroke(NULL, b, b_len, min_len);
59         value = measure_strokes(as, bs, (MeasureFunc)measure_distance, offset,
60                                 min_len, ROUGH_ELASTICITY);
61         stroke_free(bs);
62         return value;
63 }
64
65 static float greedy_map(Sample *larger, Sample *smaller, Transform *ptfm,
66                         Vec2 *offset)
67 {
68         Transform tfm;
69         int i, unmapped_len;
70         float total;
71
72         unmapped_len = larger->len;
73
74         /* Prepare transform structure */
75         memset(&tfm, 0, sizeof (tfm));
76         *ptfm = tfm;
77         tfm.valid = TRUE;
78
79         for (i = 0, total = 0.f; i < smaller->len; i++) {
80                 float best, best_reach = G_MAXFLOAT, best_value = G_MAXFLOAT,
81                       value, penalty = G_MAXFLOAT, seg_dist = 0.f;
82                 int j, last_j = 0, best_j = 0, glue = 0;
83
84         glue_more:
85                 for (j = 0, best = G_MAXFLOAT; j < larger->len; j++) {
86                         Stroke *stroke;
87                         float reach, scale;
88                         unsigned char gluable;
89
90                         if (tfm.order[j])
91                                 continue;
92                         tfm.reverse[j] = FALSE;
93
94                         /* Do not glue on oversize segments */
95                         if (seg_dist +
96                             larger->strokes[j]->distance / 2 >
97                             smaller->strokes[i]->distance &&
98                             (larger->strokes[j]->spread > DOT_SPREAD ||
99                              smaller->strokes[i]->spread > DOT_SPREAD))
100                                 continue;
101
102                         tfm.order[j] = i + 1;
103                         tfm.glue[j] = glue;
104
105                 measure:
106                         reach = 0.f;
107                         gluable = 0;
108                         if (glue) {
109                                 Vec2 v;
110                                 Point *p1, *p2;
111                                 unsigned char gluable2;
112
113                                 /* Can we glue these strokes together? */
114                                 if (!tfm.reverse[j]) {
115                                         gluable = larger->strokes[j]->
116                                                   gluable_start[last_j];
117                                         gluable2 = larger->strokes[last_j]->
118                                                    gluable_end[j];
119                                         if (gluable2 < gluable)
120                                                 gluable = gluable2;
121                                         if (gluable >= GLUABLE_MAX) {
122                                                 if (!ignore_stroke_dir)
123                                                         continue;
124                                                 tfm.reverse[j] = TRUE;
125                                         }
126                                 }
127                                 if (tfm.reverse[j]) {
128                                         gluable = larger->strokes[j]->
129                                                   gluable_end[last_j];
130                                         gluable2 = larger->strokes[last_j]->
131                                                    gluable_start[j];
132                                         if (gluable2 < gluable)
133                                                 gluable = gluable2;
134                                         if (gluable >= GLUABLE_MAX)
135                                                 continue;
136                                 }
137
138                                 /* Get the inter-stroke (reach) distance */
139                                 p1 = larger->strokes[last_j]->points +
140                                      (tfm.reverse[last_j] ? 0 :
141                                       larger->strokes[last_j]->len - 1);
142                                 p2 = larger->strokes[j]->points +
143                                      (!tfm.reverse[j] ? 0 :
144                                       larger->strokes[j]->len - 1);
145                                 vec2_set(&v, p2->x - p1->x,
146                                          p2->y - p1->y);
147                                 reach = vec2_mag(&v);
148                         }
149
150                         /* Transform and measure the distance */
151                         stroke = transform_stroke(larger, &tfm, i);
152                         scale = smaller->distance /
153                                 (reach + ptfm->reach + larger->distance);
154                         value = measure_partial(smaller->roughs[i], stroke,
155                                                 offset, scale);
156                         stroke_free(stroke);
157
158                         /* Keep track of the best result */
159                         if (value < best && value < VALUE_MAX) {
160                                 best = value;
161                                 best_j = j;
162                                 best_reach = reach;
163                                 *ptfm = tfm;
164
165                                 /* Penalize glue and reach distance */
166                                 penalty = glue * GLUE_PENALTY +
167                                           gluable * GLUABLE_PENALTY /
168                                                     GLUABLE_MAX;
169                         }
170
171                         /* Bail if we have a really good match */
172                         if (value < VALUE_MIN)
173                                 break;
174
175                         /* Glue on with reversed direction */
176                         if (ignore_stroke_dir && !tfm.reverse[j] &&
177                             larger->strokes[j]->spread > DOT_SPREAD) {
178                                 tfm.reverse[j] = TRUE;
179                                 goto measure;
180                         }
181
182                         tfm.reverse[j] = FALSE;
183                         tfm.order[j] = 0;
184                 }
185                 if (best < G_MAXFLOAT) {
186                         best_value = best;
187                         larger->penalty += penalty;
188                         smaller->penalty += penalty;
189                         seg_dist += best_reach +
190                                     larger->strokes[best_j]->distance;
191                         ptfm->reach += best_reach;
192                         tfm = *ptfm;
193
194                         /* If we still have strokes and we didn't just add on
195                            a dot, try gluing them on */
196                         unmapped_len--;
197                         if (unmapped_len >= smaller->len - i &&
198                             larger->strokes[best_j]->spread >
199                             DOT_SPREAD) {
200                                 last_j = best_j;
201                                 glue++;
202                                 goto glue_more;
203                         }
204                 }
205
206                 /* Didn't map a target stroke? */
207                 else if (!glue) {
208                         ptfm->valid = FALSE;
209                         return G_MAXFLOAT;
210                 }
211
212                 total += best_value;
213         }
214
215         /* Didn't assign all of the strokes? */
216         if (unmapped_len) {
217                 ptfm->valid = FALSE;
218                 return G_MAXFLOAT;
219         }
220
221         return total / smaller->len;
222 }
223
224 static int prep_sample(Sample *sample)
225 {
226         Vec2 offset;
227         float dist;
228
229         /* Structural disqualification */
230         if (!sample->used || !sample->enabled ||
231             (!ignore_stroke_num && sample->len != input->len))
232                 return FALSE;
233
234         prep_examined++;
235         sample->penalty = 0.f;
236
237         /* Account for displacement */
238         center_samples(&offset, sample, input);
239
240         /* Compare each input stroke to every stroke in the sample and
241            generate the stroke order information which will be used by other
242            engines */
243         if (input->len >= sample->len)
244                 dist = greedy_map(input, sample, &sample->transform, &offset);
245         else {
246                 vec2_set(&offset, -offset.x, -offset.y);
247                 dist = greedy_map(sample, input, &sample->transform, &offset);
248         }
249         if (!sample->transform.valid)
250                 return FALSE;
251
252         /* Undo square distortion */
253         dist = sqrtf(dist);
254         if (dist > MAX_DIST)
255                 return FALSE;
256
257         /* Penalize vertical displacement */
258         sample->penalty += VERTICAL_PENALTY *
259                            offset.y * offset.y / SCALE / SCALE;
260
261         sample->ratings[ENGINE_PREP] = RATING_MAX -
262                                        RATING_MAX * dist / MAX_DIST;
263         return TRUE;
264 }
265
266 void engine_prep(void)
267 {
268         Sample *sample, *list[PREP_MAX];
269         int i;
270
271         /* Rate every sample in every possible configuration */
272         list[0] = NULL;
273         prep_examined = 0;
274         sampleiter_reset();
275         while ((sample = sampleiter_next())) {
276                 sample->disqualified = TRUE;
277                 if (!sample->used || !sample->ch || !prep_sample(sample))
278                         continue;
279
280                 /* Bubble-sort sample into the list */
281                 for (i = 0; i < PREP_SAMPLES; i++)
282                         if (!list[i]) {
283                                 list[i] = sample;
284                                 if (i < PREP_MAX - 1)
285                                         list[i + 1] = NULL;
286                                 break;
287                         } else if (list[i]->ratings[ENGINE_PREP] <
288                                    sample->ratings[ENGINE_PREP]) {
289                                 memmove(list + i + 1, list + i,
290                                         (PREP_MAX - i - 1) * sizeof (*list));
291                                 list[i] = sample;
292                                 break;
293                         }
294         }
295
296         /* Qualify the best samples */
297         for (i = 0; i < PREP_SAMPLES && list[i]; i++)
298                 list[i]->disqualified = FALSE;
299 }
300