comparison mupdf-source/thirdparty/leptonica/src/recogtrain.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file recogtrain.c
29 * <pre>
30 *
31 * Training on labeled data
32 * l_int32 recogTrainLabeled()
33 * PIX *recogProcessLabeled()
34 * l_int32 recogAddSample()
35 * PIX *recogModifyTemplate()
36 * l_int32 recogAverageSamples()
37 * l_int32 pixaAccumulateSamples()
38 * l_int32 recogTrainingFinished()
39 * static l_int32 recogTemplatesAreOK()
40 * PIXA *recogFilterPixaBySize()
41 * PIXAA *recogSortPixaByClass()
42 * l_int32 recogRemoveOutliers1()
43 * PIXA *pixaRemoveOutliers1()
44 * l_int32 recogRemoveOutliers2()
45 * PIXA *pixaRemoveOutliers2()
46 *
47 * Training on unlabeled data
48 * L_RECOG recogTrainFromBoot()
49 *
50 * Padding the digit training set
51 * l_int32 recogPadDigitTrainingSet()
52 * l_int32 recogIsPaddingNeeded()
53 * static SARRAY *recogAddMissingClassStrings()
54 * PIXA *recogAddDigitPadTemplates()
55 * static l_int32 recogCharsetAvailable()
56 *
57 * Making a boot digit recognizer
58 * L_RECOG *recogMakeBootDigitRecog()
59 * PIXA *recogMakeBootDigitTemplates()
60 *
61 * Debugging
62 * l_int32 recogShowContent()
63 * l_int32 recogDebugAverages()
64 * l_int32 recogShowAverageTemplates()
65 * static PIX *pixDisplayOutliers()
66 * PIX *recogDisplayOutlier()
67 * PIX *recogShowMatchesInRange()
68 * PIX *recogShowMatch()
69 *
70 * These abbreviations are for the type of template to be used:
71 * * SI (for the scanned images)
72 * * WNL (for width-normalized lines, formed by first skeletonizing
73 * the scanned images, and then dilating to a fixed width)
74 * These abbreviations are for the type of recognizer:
75 * * BAR (book-adapted recognizer; the best type; can do identification
76 * with unscaled images and separation of touching characters.
77 * * BSR (bootstrap recognizer; used if more labeled templates are
78 * required for a BAR, either for finding more templates from
79 * the book, or making a hybrid BAR/BSR.
80 *
81 * The recog struct typically holds two versions of the input templates
82 * (e.g. from a pixa) that were used to generate it. One version is
83 * the unscaled input templates. The other version is the one that
84 * will be used by the recog to identify unlabeled data. That version
85 * depends on the input parameters when the recog is created. The choices
86 * for the latter version, and their suggested use, are:
87 * (1) unscaled SI -- typical for BAR, generated from book images
88 * (2) unscaled WNL -- ditto
89 * (3) scaled SI -- typical for recognizers containing template
90 * images from sources other than the book to be recognized
91 * (4) scaled WNL -- ditto
92 * For cases (3) and (4), we recommend scaling to fixed height; e.g.,
93 * scalew = 0, scaleh = 40.
94 * When using WNL, we recommend using a width of 5 in the template
95 * and 4 in the unlabeled data.
96 * It appears that better results for a BAR are usually obtained using
97 * SI than WNL, but more experimentation is needed.
98 *
99 * This utility is designed to build recognizers that are specifically
100 * adapted from a large amount of material, such as a book. These
101 * use labeled templates taken from the material, and not scaled.
102 * In addition, two special recognizers are useful:
103 * (1) Bootstrap recognizer (BSR). This uses height-scaled templates,
104 * that have been extended with several repetitions in one of two ways:
105 * (a) aniotropic width scaling (for either SI or WNL)
106 * (b) iterative erosions/dilations (for SI).
107 * (2) Outlier removal. This uses height scaled templates. It can be
108 * implemented without using templates that are aligned averages of all
109 * templates in a class.
110 *
111 * Recognizers are inexpensive to generate, for example, from a pixa
112 * of labeled templates. The general process of building a BAR is
113 * to start with labeled templates, e.g., in a pixa, make a BAR, and
114 * analyze new samples from the book to augment the BAR until it has
115 * enough samples for each character class. Along the way, samples
116 * from a BSR may be added for help in training. If not enough samples
117 * are available for the BAR, it can finally be augmented with BSR
118 * samples, in which case the resulting hybrid BAR/BSR recognizer
119 * must work on scaled images.
120 *
121 * Here are the steps in doing recog training:
122 * A. Generate a BAR from any existing labeled templates
123 * (1) Create a recog and add the templates, using recogAddSample().
124 * This stores the unscaled templates.
125 * [Note: this can be done in one step if the labeled templates are put
126 * into a pixa:
127 * L_Recog *rec = recogCreateFromPixa(pixa, ...); ]
128 * (2) Call recogTrainingFinished() to generate the (sometimes modified)
129 * templates to be used for correlation.
130 * (3) Optionally, remove outliers.
131 * If there are sufficient samples in the classes, we're done. Otherwise,
132 * B. Try to get more samples from the book to pad the BAR.
133 * (1) Save the unscaled, labeled templates from the BAR.
134 * (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR.
135 * (3) Do recognition on more unlabeled images, scaled to a fixed height
136 * (4) Add the unscaled, labeled images to the saved set.
137 * (5) Optionally, remove outliers.
138 * If there are sufficient samples in the classes, we're done. Otherwise,
139 * C. For classes without a sufficient number of templates, we can
140 * supplement the BAR with templates from a BSR (a hybrid RAR/BSR),
141 * and do recognition scaled to a fixed height.
142 *
143 * Here are several methods that can be used for identifying outliers:
144 * (1) Compute average templates for each class and remove a candidate
145 * that is poorly correlated with the average. This is the most
146 * simple method. recogRemoveOutliers1() uses this, supplemented with
147 * a second threshold and a target number of templates to be saved.
148 * (2) Compute average templates for each class and remove a candidate
149 * that is more highly correlated with the average of some other class.
150 * This does not require setting a threshold for the correlation.
151 * recogRemoveOutliers2() uses this method, supplemented with a minimum
152 * correlation score.
153 * (3) For each candidate, find the average correlation with other
154 * members of its class, and remove those that have a relatively
155 * low average correlation. This is similar to (1), gives comparable
156 * results and because it does not use average templates, it requires
157 * a bit more computation.
158 * </pre>
159 */
160
161 #ifdef HAVE_CONFIG_H
162 #include <config_auto.h>
163 #endif /* HAVE_CONFIG_H */
164
165 #include <string.h>
166 #include "allheaders.h"
167 #include "pix_internal.h"
168
169 /* Static functions */
170 static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize,
171 l_float32 minfract, l_int32 *pok);
172 static SARRAY *recogAddMissingClassStrings(L_RECOG *recog);
173 static l_int32 recogCharsetAvailable(l_int32 type);
174 static PIX *pixDisplayOutliers(PIXA *pixas, NUMA *nas);
175 static PIX *recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp,
176 l_int32 maxclass, l_float32 maxscore);
177
178 /* Default parameters that are used in recogTemplatesAreOK() and
179 * in outlier removal functions, and that use template set size
180 * to decide if the set of templates (before outliers are removed)
181 * is valid. Values are set to accept most sets of sample templates. */
182 static const l_int32 DefaultMinSetSize = 1; /* minimum number of
183 samples for a valid class */
184 static const l_float32 DefaultMinSetFract = 0.4f; /* minimum fraction
185 of classes required for a valid recog */
186
187 /* Defaults in pixaRemoveOutliers1() and pixaRemoveOutliers2() */
188 static const l_float32 DefaultMinScore = 0.75; /* keep everything above */
189 static const l_int32 DefaultMinTarget = 3; /* to be kept if possible */
190 static const l_float32 LowerScoreThreshold = 0.5; /* templates can be
191 * kept down to this score to if needed to retain the
192 * desired minimum number of templates */
193
194
195 /*------------------------------------------------------------------------*
196 * Training *
197 *------------------------------------------------------------------------*/
198 /*!
199 * \brief recogTrainLabeled()
200 *
201 * \param[in] recog in training mode
202 * \param[in] pixs if depth > 1, will be thresholded to 1 bpp
203 * \param[in] box [optional] cropping box
204 * \param[in] text [optional] if null, use text field in pix
205 * \param[in] debug 1 to display images of samples not captured
206 * \return 0 if OK, 1 on error
207 *
208 * <pre>
209 * Notes:
210 * (1) Training is restricted to the addition of a single
211 * character in an arbitrary (e.g., UTF8) charset
212 * (2) If box != null, it should represent the location in %pixs
213 * of the character image.
214 * </pre>
215 */
216 l_ok
217 recogTrainLabeled(L_RECOG *recog,
218 PIX *pixs,
219 BOX *box,
220 char *text,
221 l_int32 debug)
222 {
223 l_int32 ret;
224 PIX *pix;
225
226 if (!recog)
227 return ERROR_INT("recog not defined", __func__, 1);
228 if (!pixs)
229 return ERROR_INT("pixs not defined", __func__, 1);
230
231 /* Prepare the sample to be added. This step also acts
232 * as a filter, and can invalidate pixs as a template. */
233 ret = recogProcessLabeled(recog, pixs, box, text, &pix);
234 if (ret) {
235 pixDestroy(&pix);
236 L_WARNING("failure to get sample '%s' for training\n", __func__,
237 text);
238 return 1;
239 }
240
241 recogAddSample(recog, pix, debug);
242 pixDestroy(&pix);
243 return 0;
244 }
245
246
247 /*!
248 * \brief recogProcessLabeled()
249 *
250 * \param[in] recog in training mode
251 * \param[in] pixs if depth > 1, will be thresholded to 1 bpp
252 * \param[in] box [optional] cropping box
253 * \param[in] text [optional] if null, use text field in pix
254 * \param[out] ppix addr of pix, 1 bpp, labeled
255 * \return 0 if OK, 1 on error
256 *
257 * <pre>
258 * Notes:
259 * (1) This crops and binarizes the input image, generating a pix
260 * of one character where the charval is inserted into the pix.
261 * </pre>
262 */
263 l_ok
264 recogProcessLabeled(L_RECOG *recog,
265 PIX *pixs,
266 BOX *box,
267 char *text,
268 PIX **ppix)
269 {
270 char *textdata;
271 l_int32 textinpix, textin, nsets;
272 NUMA *na;
273 PIX *pix1, *pix2, *pix3, *pix4;
274
275 if (!ppix)
276 return ERROR_INT("&pix not defined", __func__, 1);
277 *ppix = NULL;
278 if (!recog)
279 return ERROR_INT("recog not defined", __func__, 1);
280 if (!pixs)
281 return ERROR_INT("pixs not defined", __func__, 1);
282
283 /* Find the text; this will be stored with the output images */
284 textin = text && (text[0] != '\0');
285 textinpix = (pixs->text && (pixs->text[0] != '\0'));
286 if (!textin && !textinpix) {
287 L_ERROR("no text: %d\n", __func__, recog->num_samples);
288 return 1;
289 }
290 textdata = (textin) ? text : pixs->text; /* do not free */
291
292 /* Crop and binarize if necessary */
293 if (box)
294 pix1 = pixClipRectangle(pixs, box, NULL);
295 else
296 pix1 = pixClone(pixs);
297 if (pixGetDepth(pix1) > 1)
298 pix2 = pixConvertTo1(pix1, recog->threshold);
299 else
300 pix2 = pixClone(pix1);
301 pixDestroy(&pix1);
302
303 /* Remove isolated noise, using as a criterion all components
304 * that are removed by a vertical opening of size 5. */
305 pix3 = pixMorphSequence(pix2, "o1.5", 0); /* seed */
306 pixSeedfillBinary(pix3, pix3, pix2, 8); /* fill from seed; clip to pix2 */
307 pixDestroy(&pix2);
308
309 /* Clip to foreground */
310 pixClipToForeground(pix3, &pix4, NULL);
311 pixDestroy(&pix3);
312 if (!pix4)
313 return ERROR_INT("pix4 is empty", __func__, 1);
314
315 /* Verify that if there is more than 1 c.c., they all have
316 * horizontal overlap */
317 na = pixCountByColumn(pix4, NULL);
318 numaCountNonzeroRuns(na, &nsets);
319 numaDestroy(&na);
320 if (nsets > 1) {
321 L_WARNING("found %d sets of horiz separated c.c.; skipping\n",
322 __func__, nsets);
323 pixDestroy(&pix4);
324 return 1;
325 }
326
327 pixSetText(pix4, textdata);
328 *ppix = pix4;
329 return 0;
330 }
331
332
333 /*!
334 * \brief recogAddSample()
335 *
336 * \param[in] recog
337 * \param[in] pix a single character, 1 bpp
338 * \param[in] debug
339 * \return 0 if OK, 1 on error
340 *
341 * <pre>
342 * Notes:
343 * (1) The pix is 1 bpp, with the character string label embedded.
344 * (2) The pixaa_u array of the recog is initialized to accept
345 * up to 256 different classes. When training is finished,
346 * the arrays are truncated to the actual number of classes.
347 * To pad an existing recog from the boot recognizers, training
348 * is started again; if samples from a new class are added,
349 * the pixaa_u array is extended by adding a pixa to hold them.
350 * </pre>
351 */
352 l_ok
353 recogAddSample(L_RECOG *recog,
354 PIX *pix,
355 l_int32 debug)
356 {
357 char *text;
358 l_int32 npa, charint, index;
359 PIXA *pixa1;
360 PIXAA *paa;
361
362 if (!recog)
363 return ERROR_INT("recog not defined", __func__, 1);
364 if (!pix || pixGetDepth(pix) != 1)
365 return ERROR_INT("pix not defined or not 1 bpp\n", __func__, 1);
366 if (recog->train_done)
367 return ERROR_INT("not added: training has been completed", __func__, 1);
368 paa = recog->pixaa_u;
369
370 /* Make sure the character is in the set */
371 text = pixGetText(pix);
372 if (l_convertCharstrToInt(text, &charint) == 1) {
373 L_ERROR("invalid text: %s\n", __func__, text);
374 return 1;
375 }
376
377 /* Determine the class array index. Check if the class
378 * already exists, and if not, add it. */
379 if (recogGetClassIndex(recog, charint, text, &index) == 1) {
380 /* New class must be added */
381 npa = pixaaGetCount(paa, NULL);
382 if (index > npa) {
383 L_ERROR("oops: bad index %d > npa %d!!\n", __func__, index, npa);
384 return 1;
385 }
386 if (index == npa) { /* paa needs to be extended */
387 L_INFO("Adding new class and pixa: index = %d, text = %s\n",
388 __func__, index, text);
389 pixa1 = pixaCreate(10);
390 pixaaAddPixa(paa, pixa1, L_INSERT);
391 }
392 }
393 if (debug) {
394 L_INFO("Identified text label: %s\n", __func__, text);
395 L_INFO("Identified: charint = %d, index = %d\n",
396 __func__, charint, index);
397 }
398
399 /* Insert the unscaled character image into the right pixa.
400 * (Unscaled images are required to split touching characters.) */
401 recog->num_samples++;
402 pixaaAddPix(paa, index, pix, NULL, L_COPY);
403 return 0;
404 }
405
406
407 /*!
408 * \brief recogModifyTemplate()
409 *
410 * \param[in] recog
411 * \param[in] pixs 1 bpp, to be optionally scaled and turned into
412 * strokes of fixed width
413 * \return pixd modified pix if OK, NULL on error
414 */
415 PIX *
416 recogModifyTemplate(L_RECOG *recog,
417 PIX *pixs)
418 {
419 l_int32 w, h, empty;
420 PIX *pix1, *pix2;
421
422 if (!recog)
423 return (PIX *)ERROR_PTR("recog not defined", __func__, NULL);
424 if (!pixs)
425 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
426
427 /* Scale first */
428 pixGetDimensions(pixs, &w, &h, NULL);
429 if ((recog->scalew == 0 || recog->scalew == w) &&
430 (recog->scaleh == 0 || recog->scaleh == h)) { /* no scaling */
431 pix1 = pixCopy(NULL, pixs);
432 } else {
433 pix1 = pixScaleToSize(pixs, recog->scalew, recog->scaleh);
434 }
435 if (!pix1)
436 return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL);
437
438 /* Then optionally convert to lines */
439 if (recog->linew <= 0) {
440 pix2 = pixClone(pix1);
441 } else {
442 pix2 = pixSetStrokeWidth(pix1, recog->linew, 1, 8);
443 }
444 pixDestroy(&pix1);
445 if (!pix2)
446 return (PIX *)ERROR_PTR("pix2 not made", __func__, NULL);
447
448 /* Make sure we still have some pixels */
449 pixZero(pix2, &empty);
450 if (empty) {
451 pixDestroy(&pix2);
452 return (PIX *)ERROR_PTR("modified template has no pixels",
453 __func__, NULL);
454 }
455 return pix2;
456 }
457
458
459 /*!
460 * \brief recogAverageSamples()
461 *
462 * \param[in] recog addr of existing recog
463 * \param[in] debug
464 * \return 0 on success, 1 on failure
465 *
466 * <pre>
467 * Notes:
468 * (1) This is only called in two situations:
469 * (a) When splitting characters using either the DID method
470 * recogDecode() or the the greedy splitter
471 * recogCorrelationBestRow()
472 * (b) By a special recognizer that is used to remove outliers.
473 * Both unscaled and scaled inputs are averaged.
474 * (2) If the data in any class is nonexistent (no samples), or
475 * very bad (no fg pixels in the average), or if the ratio
476 * of max/min average unscaled class template heights is
477 * greater than max_ht_ratio, this function fails. The caller
478 * must check the return value of the recog, and destroy the
479 * recog on failure.
480 * (3) Set debug = 1 to view the resulting templates and their centroids.
481 * </pre>
482 */
483 l_int32
484 recogAverageSamples(L_RECOG *recog,
485 l_int32 debug)
486 {
487 l_int32 i, nsamp, size, area, bx, by, badclass;
488 l_float32 x, y, hratio;
489 BOX *box;
490 PIXA *pixa1;
491 PIX *pix1, *pix2, *pix3;
492 PTA *pta1;
493
494 if (!recog)
495 return ERROR_INT("recog not defined", __func__, 1);
496
497 if (recog->ave_done) {
498 if (debug) /* always do this if requested */
499 recogShowAverageTemplates(recog);
500 return 0;
501 }
502
503 /* Remove any previous averaging data */
504 size = recog->setsize;
505 pixaDestroy(&recog->pixa_u);
506 ptaDestroy(&recog->pta_u);
507 numaDestroy(&recog->nasum_u);
508 recog->pixa_u = pixaCreate(size);
509 recog->pta_u = ptaCreate(size);
510 recog->nasum_u = numaCreate(size);
511
512 pixaDestroy(&recog->pixa);
513 ptaDestroy(&recog->pta);
514 numaDestroy(&recog->nasum);
515 recog->pixa = pixaCreate(size);
516 recog->pta = ptaCreate(size);
517 recog->nasum = numaCreate(size);
518
519 /* Unscaled bitmaps: compute averaged bitmap, centroid, and fg area.
520 * Note that when we threshold to 1 bpp the 8 bpp averaged template
521 * that is returned from the accumulator, it will not be cropped
522 * to the foreground. We must crop it, because the correlator
523 * makes that assumption and will return a zero value if the
524 * width or height of the two images differs by several pixels.
525 * But cropping to fg can cause the value of the centroid to
526 * change, if bx > 0 or by > 0. */
527 badclass = FALSE;
528 for (i = 0; i < size; i++) {
529 pixa1 = pixaaGetPixa(recog->pixaa_u, i, L_CLONE);
530 pta1 = ptaaGetPta(recog->ptaa_u, i, L_CLONE);
531 nsamp = pixaGetCount(pixa1);
532 nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */
533 if (nsamp == 0) { /* no information for this class */
534 L_ERROR("no samples in class %d\n", __func__, i);
535 badclass = TRUE;
536 pixaDestroy(&pixa1);
537 ptaDestroy(&pta1);
538 break;
539 } else {
540 pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y);
541 pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2));
542 pixInvert(pix2, pix2);
543 pixClipToForeground(pix2, &pix3, &box);
544 if (!box) {
545 L_ERROR("no fg pixels in average for uclass %d\n", __func__, i);
546 badclass = TRUE;
547 pixDestroy(&pix1);
548 pixDestroy(&pix2);
549 pixaDestroy(&pixa1);
550 ptaDestroy(&pta1);
551 break;
552 } else {
553 boxGetGeometry(box, &bx, &by, NULL, NULL);
554 pixaAddPix(recog->pixa_u, pix3, L_INSERT);
555 ptaAddPt(recog->pta_u, x - bx, y - by); /* correct centroid */
556 pixCountPixels(pix3, &area, recog->sumtab);
557 numaAddNumber(recog->nasum_u, area); /* foreground */
558 boxDestroy(&box);
559 }
560 pixDestroy(&pix1);
561 pixDestroy(&pix2);
562 }
563 pixaDestroy(&pixa1);
564 ptaDestroy(&pta1);
565 }
566
567 /* Are any classes bad? */
568 if (badclass)
569 return ERROR_INT("at least 1 bad class", __func__, 1);
570
571 /* Get the range of sizes of the unscaled average templates.
572 * Reject if the height ratio is too large. */
573 pixaSizeRange(recog->pixa_u, &recog->minwidth_u, &recog->minheight_u,
574 &recog->maxwidth_u, &recog->maxheight_u);
575 hratio = (l_float32)recog->maxheight_u / (l_float32)recog->minheight_u;
576 if (hratio > recog->max_ht_ratio) {
577 L_ERROR("ratio of max/min height of average templates = %4.1f\n",
578 __func__, hratio);
579 return 1;
580 }
581
582 /* Scaled bitmaps: compute averaged bitmap, centroid, and fg area */
583 for (i = 0; i < size; i++) {
584 pixa1 = pixaaGetPixa(recog->pixaa, i, L_CLONE);
585 pta1 = ptaaGetPta(recog->ptaa, i, L_CLONE);
586 nsamp = pixaGetCount(pixa1);
587 nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */
588 pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y);
589 pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2));
590 pixInvert(pix2, pix2);
591 pixClipToForeground(pix2, &pix3, &box);
592 if (!box) {
593 L_ERROR("no fg pixels in average for class %d\n", __func__, i);
594 badclass = TRUE;
595 pixDestroy(&pix1);
596 pixDestroy(&pix2);
597 pixaDestroy(&pixa1);
598 ptaDestroy(&pta1);
599 break;
600 } else {
601 boxGetGeometry(box, &bx, &by, NULL, NULL);
602 pixaAddPix(recog->pixa, pix3, L_INSERT);
603 ptaAddPt(recog->pta, x - bx, y - by); /* correct centroid */
604 pixCountPixels(pix3, &area, recog->sumtab);
605 numaAddNumber(recog->nasum, area); /* foreground */
606 boxDestroy(&box);
607 }
608 pixDestroy(&pix1);
609 pixDestroy(&pix2);
610 pixaDestroy(&pixa1);
611 ptaDestroy(&pta1);
612 }
613
614 if (badclass)
615 return ERROR_INT("no fg pixels in at least 1 class", __func__, 1);
616
617 /* Get the range of widths of the scaled average templates */
618 pixaSizeRange(recog->pixa, &recog->minwidth, NULL, &recog->maxwidth, NULL);
619
620 /* Get dimensions useful for splitting */
621 recog->min_splitw = L_MAX(5, recog->minwidth_u - 5);
622 recog->max_splith = recog->maxheight_u + 12; /* allow for skew */
623
624 if (debug)
625 recogShowAverageTemplates(recog);
626
627 recog->ave_done = TRUE;
628 return 0;
629 }
630
631
632 /*!
633 * \brief pixaAccumulateSamples()
634 *
635 * \param[in] pixa of samples from the same class, 1 bpp
636 * \param[in] pta [optional] of centroids of the samples
637 * \param[out] ppixd accumulated samples, 8 bpp
638 * \param[out] px [optional] average x coordinate of centroids
639 * \param[out] py [optional] average y coordinate of centroids
640 * \return 0 on success, 1 on failure
641 *
642 * <pre>
643 * Notes:
644 * (1) This generates an aligned (by centroid) sum of the input pix.
645 * (2) We use only the first 256 samples; that's plenty.
646 * (3) If pta is not input, we generate two tables, and discard
647 * after use. If this is called many times, it is better
648 * to precompute the pta.
649 * </pre>
650 */
651 l_int32
652 pixaAccumulateSamples(PIXA *pixa,
653 PTA *pta,
654 PIX **ppixd,
655 l_float32 *px,
656 l_float32 *py)
657 {
658 l_int32 i, n, maxw, maxh, xdiff, ydiff;
659 l_int32 *centtab, *sumtab;
660 l_float32 xc, yc, xave, yave;
661 PIX *pix1, *pix2, *pixsum;
662 PTA *ptac;
663
664 if (px) *px = 0;
665 if (py) *py = 0;
666 if (!ppixd)
667 return ERROR_INT("&pixd not defined", __func__, 1);
668 *ppixd = NULL;
669 if (!pixa)
670 return ERROR_INT("pixa not defined", __func__, 1);
671
672 n = pixaGetCount(pixa);
673 if (pta && ptaGetCount(pta) != n)
674 return ERROR_INT("pta count differs from pixa count", __func__, 1);
675 n = L_MIN(n, 256); /* take the first 256 only */
676 if (n == 0)
677 return ERROR_INT("pixa array empty", __func__, 1);
678
679 /* Find the centroids */
680 if (pta) {
681 ptac = ptaClone(pta);
682 } else { /* generate them here */
683 ptac = ptaCreate(n);
684 centtab = makePixelCentroidTab8();
685 sumtab = makePixelSumTab8();
686 for (i = 0; i < n; i++) {
687 pix1 = pixaGetPix(pixa, i, L_CLONE);
688 pixCentroid(pix1, centtab, sumtab, &xc, &yc);
689 ptaAddPt(ptac, xc, yc);
690 }
691 LEPT_FREE(centtab);
692 LEPT_FREE(sumtab);
693 }
694
695 /* Find the average value of the centroids */
696 xave = yave = 0;
697 for (i = 0; i < n; i++) {
698 ptaGetPt(pta, i, &xc, &yc);
699 xave += xc;
700 yave += yc;
701 }
702 xave = xave / (l_float32)n;
703 yave = yave / (l_float32)n;
704 if (px) *px = xave;
705 if (py) *py = yave;
706
707 /* Place all pix with their centroids located at the average
708 * centroid value, and sum the results. Make the accumulator
709 * image slightly larger than the largest sample to insure
710 * that all pixels are represented in the accumulator. */
711 pixaSizeRange(pixa, NULL, NULL, &maxw, &maxh);
712 pixsum = pixInitAccumulate(maxw + 5, maxh + 5, 0);
713 pix1 = pixCreate(maxw, maxh, 1);
714 for (i = 0; i < n; i++) {
715 pix2 = pixaGetPix(pixa, i, L_CLONE);
716 ptaGetPt(ptac, i, &xc, &yc);
717 xdiff = (l_int32)(xave - xc);
718 ydiff = (l_int32)(yave - yc);
719 pixClearAll(pix1);
720 pixRasterop(pix1, xdiff, ydiff, maxw, maxh, PIX_SRC,
721 pix2, 0, 0);
722 pixAccumulate(pixsum, pix1, L_ARITH_ADD);
723 pixDestroy(&pix2);
724 }
725 *ppixd = pixFinalAccumulate(pixsum, 0, 8);
726
727 pixDestroy(&pix1);
728 pixDestroy(&pixsum);
729 ptaDestroy(&ptac);
730 return 0;
731 }
732
733
734 /*!
735 * \brief recogTrainingFinished()
736 *
737 * \param[in] precog addr of recog
738 * \param[in] modifyflag 1 to use recogModifyTemplate(); 0 otherwise
739 * \param[in] minsize set to -1 for default
740 * \param[in] minfract set to -1.0 for default
741 * \return 0 if OK, 1 on error (input recog will be destroyed)
742 *
743 * <pre>
744 * Notes:
745 * (1) This must be called after all training samples have been added.
746 * (2) If the templates are not good enough, the recog input is destroyed.
747 * (3) Usually, %modifyflag == 1, because we want to apply
748 * recogModifyTemplate() to generate the actual templates
749 * that will be used. The one exception is when reading a
750 * serialized recog: there we want to put the same set of
751 * templates in both the unscaled and modified pixaa.
752 * See recogReadStream() to see why we do this.
753 * (4) See recogTemplatesAreOK() for %minsize and %minfract usage.
754 * (5) The following things are done here:
755 * (a) Allocate (or reallocate) storage for (possibly) modified
756 * bitmaps, centroids, and fg areas.
757 * (b) Generate the (possibly) modified bitmaps.
758 * (c) Compute centroid and fg area data for both unscaled and
759 * modified bitmaps.
760 * (d) Truncate the pixaa, ptaa and numaa arrays down from
761 * 256 to the actual size.
762 * (6) Putting these operations here makes it simple to recompute
763 * the recog with different modifications on the bitmaps.
764 * (7) Call recogShowContent() to display the templates, both
765 * unscaled and modified.
766 * </pre>
767 */
768 l_ok
769 recogTrainingFinished(L_RECOG **precog,
770 l_int32 modifyflag,
771 l_int32 minsize,
772 l_float32 minfract)
773 {
774 l_int32 ok, i, j, size, nc, ns, area;
775 l_float32 xave, yave;
776 PIX *pix, *pixd;
777 PIXA *pixa;
778 PIXAA *paa;
779 PTA *pta;
780 PTAA *ptaa;
781 L_RECOG *recog;
782
783 if (!precog)
784 return ERROR_INT("&recog not defined", __func__, 1);
785 if ((recog = *precog) == NULL)
786 return ERROR_INT("recog not defined", __func__, 1);
787 if (recog->train_done) return 0;
788
789 /* Test the input templates */
790 recogTemplatesAreOK(recog, minsize, minfract, &ok);
791 if (!ok) {
792 recogDestroy(precog);
793 return ERROR_INT("bad templates", __func__, 1);
794 }
795
796 /* Generate the storage for the possibly-scaled training bitmaps */
797 size = recog->maxarraysize;
798 paa = pixaaCreate(size);
799 pixa = pixaCreate(1);
800 pixaaInitFull(paa, pixa);
801 pixaDestroy(&pixa);
802 pixaaDestroy(&recog->pixaa);
803 recog->pixaa = paa;
804
805 /* Generate the storage for the unscaled centroid training data */
806 ptaa = ptaaCreate(size);
807 pta = ptaCreate(0);
808 ptaaInitFull(ptaa, pta);
809 ptaaDestroy(&recog->ptaa_u);
810 recog->ptaa_u = ptaa;
811
812 /* Generate the storage for the possibly-scaled centroid data */
813 ptaa = ptaaCreate(size);
814 ptaaInitFull(ptaa, pta);
815 ptaDestroy(&pta);
816 ptaaDestroy(&recog->ptaa);
817 recog->ptaa = ptaa;
818
819 /* Generate the storage for the fg area data */
820 numaaDestroy(&recog->naasum_u);
821 numaaDestroy(&recog->naasum);
822 recog->naasum_u = numaaCreateFull(size, 0);
823 recog->naasum = numaaCreateFull(size, 0);
824
825 paa = recog->pixaa_u;
826 nc = recog->setsize;
827 for (i = 0; i < nc; i++) {
828 pixa = pixaaGetPixa(paa, i, L_CLONE);
829 ns = pixaGetCount(pixa);
830 for (j = 0; j < ns; j++) {
831 /* Save centroid and area data for the unscaled pix */
832 pix = pixaGetPix(pixa, j, L_CLONE);
833 pixCentroid(pix, recog->centtab, recog->sumtab, &xave, &yave);
834 ptaaAddPt(recog->ptaa_u, i, xave, yave);
835 pixCountPixels(pix, &area, recog->sumtab);
836 numaaAddNumber(recog->naasum_u, i, area); /* foreground */
837
838 /* Insert the (optionally) scaled character image, and
839 * save centroid and area data for it */
840 if (modifyflag == 1)
841 pixd = recogModifyTemplate(recog, pix);
842 else
843 pixd = pixClone(pix);
844 if (pixd) {
845 pixaaAddPix(recog->pixaa, i, pixd, NULL, L_INSERT);
846 pixCentroid(pixd, recog->centtab, recog->sumtab, &xave, &yave);
847 ptaaAddPt(recog->ptaa, i, xave, yave);
848 pixCountPixels(pixd, &area, recog->sumtab);
849 numaaAddNumber(recog->naasum, i, area);
850 } else {
851 L_ERROR("failed: modified template for class %d, sample %d\n",
852 __func__, i, j);
853 }
854 pixDestroy(&pix);
855 }
856 pixaDestroy(&pixa);
857 }
858
859 /* Truncate the arrays to those with non-empty containers */
860 pixaaTruncate(recog->pixaa_u);
861 pixaaTruncate(recog->pixaa);
862 ptaaTruncate(recog->ptaa_u);
863 ptaaTruncate(recog->ptaa);
864 numaaTruncate(recog->naasum_u);
865 numaaTruncate(recog->naasum);
866
867 recog->train_done = TRUE;
868 return 0;
869 }
870
871
872 /*!
873 * \brief recogTemplatesAreOK()
874 *
875 * \param[in] recog
876 * \param[in] minsize set to -1 for default
877 * \param[in] minfract set to -1.0 for default
878 * \param[out] pok set to 1 if template set is valid; 0 otherwise
879 * \return 1 on error; 0 otherwise. An invalid template set is not an error.
880 *
881 * <pre>
882 * Notes:
883 * (1) This is called by recogTrainingFinished(). A return value of 0
884 * will cause recogTrainingFinished() to destroy the recog.
885 * (2) %minsize is the minimum number of samples required for
886 * the class; -1 uses the default
887 * (3) %minfract is the minimum fraction of classes required for
888 * the recog to be usable; -1.0 uses the default
889 * </pre>
890 */
891 static l_int32
892 recogTemplatesAreOK(L_RECOG *recog,
893 l_int32 minsize,
894 l_float32 minfract,
895 l_int32 *pok)
896 {
897 l_int32 i, n, validsets, nt;
898 l_float32 ratio;
899 NUMA *na;
900
901 if (!pok)
902 return ERROR_INT("&ok not defined", __func__, 1);
903 *pok = 0;
904 if (!recog)
905 return ERROR_INT("recog not defined", __func__, 1);
906
907 minsize = (minsize < 0) ? DefaultMinSetSize : minsize;
908 minfract = (minfract < 0) ? DefaultMinSetFract : minfract;
909 n = pixaaGetCount(recog->pixaa_u, &na);
910 validsets = 0;
911 for (i = 0, validsets = 0; i < n; i++) {
912 numaGetIValue(na, i, &nt);
913 if (nt >= minsize)
914 validsets++;
915 }
916 numaDestroy(&na);
917 ratio = (l_float32)validsets / (l_float32)recog->charset_size;
918 *pok = (ratio >= minfract) ? 1 : 0;
919 return 0;
920 }
921
922
923 /*!
924 * \brief recogFilterPixaBySize()
925 *
926 * \param[in] pixas labeled templates
927 * \param[in] setsize size of character set (number of classes)
928 * \param[in] maxkeep max number of templates to keep in a class
929 * \param[in] max_ht_ratio max allowed height ratio (see below)
930 * \param[out] pna [optional] debug output, giving the number
931 * in each class after filtering; use NULL to skip
932 * \return pixa filtered templates, or NULL on error
933 *
934 * <pre>
935 * Notes:
936 * (1) The basic assumption is that the most common and larger
937 * templates in each class are more likely to represent the
938 * characters we are interested in. For example, larger digits
939 * are more likely to represent page numbers, and smaller digits
940 * could be data in tables. Therefore, we bias the first
941 * stage of filtering toward the larger characters by removing
942 * very small ones, and select based on proximity of the
943 * remaining characters to median height.
944 * (2) For each of the %setsize classes, order the templates
945 * increasingly by height. Take the rank 0.9 height. Eliminate
946 * all templates that are shorter by more than %max_ht_ratio.
947 * Of the remaining ones, select up to %maxkeep that are closest
948 * in rank order height to the median template.
949 * </pre>
950 */
951 PIXA *
952 recogFilterPixaBySize(PIXA *pixas,
953 l_int32 setsize,
954 l_int32 maxkeep,
955 l_float32 max_ht_ratio,
956 NUMA **pna)
957 {
958 l_int32 i, j, h90, hj, j1, j2, j90, n, nc;
959 l_float32 ratio;
960 NUMA *na;
961 PIXA *pixa1, *pixa2, *pixa3, *pixa4, *pixa5;
962 PIXAA *paa;
963
964 if (pna) *pna = NULL;
965 if (!pixas)
966 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
967
968 if ((paa = recogSortPixaByClass(pixas, setsize)) == NULL)
969 return (PIXA *)ERROR_PTR("paa not made", __func__, NULL);
970 nc = pixaaGetCount(paa, NULL);
971 na = (pna) ? numaCreate(0) : NULL;
972 if (pna) *pna = na;
973 pixa5 = pixaCreate(0);
974 for (i = 0; i < nc; i++) {
975 pixa1 = pixaaGetPixa(paa, i, L_CLONE);
976 if ((n = pixaGetCount(pixa1)) == 0) {
977 pixaDestroy(&pixa1);
978 continue;
979 }
980 pixa2 = pixaSort(pixa1, L_SORT_BY_HEIGHT, L_SORT_INCREASING, NULL,
981 L_COPY);
982 j90 = (l_int32)(0.9 * n);
983 pixaGetPixDimensions(pixa2, j90, NULL, &h90, NULL);
984 pixa3 = pixaCreate(n);
985 for (j = 0; j < n; j++) {
986 pixaGetPixDimensions(pixa2, j, NULL, &hj, NULL);
987 ratio = (l_float32)h90 / (l_float32)hj;
988 if (ratio <= max_ht_ratio)
989 pixaAddPix(pixa3, pixaGetPix(pixa2, j, L_COPY), L_INSERT);
990 }
991 n = pixaGetCount(pixa3);
992 if (n <= maxkeep) {
993 pixa4 = pixaCopy(pixa3, L_CLONE);
994 } else {
995 j1 = (n - maxkeep) / 2;
996 j2 = j1 + maxkeep - 1;
997 pixa4 = pixaSelectRange(pixa3, j1, j2, L_CLONE);
998 }
999 if (na) numaAddNumber(na, pixaGetCount(pixa4));
1000 pixaJoin(pixa5, pixa4, 0, -1);
1001 pixaDestroy(&pixa1);
1002 pixaDestroy(&pixa2);
1003 pixaDestroy(&pixa3);
1004 pixaDestroy(&pixa4);
1005 }
1006
1007 pixaaDestroy(&paa);
1008 return pixa5;
1009 }
1010
1011
1012 /*!
1013 * \brief recogSortPixaByClass()
1014 *
1015 * \param[in] pixa labeled templates
1016 * \param[in] setsize size of character set (number of classes)
1017 * \return paa pixaa where each pixa has templates for one class,
1018 * or null on error
1019 */
1020 PIXAA *
1021 recogSortPixaByClass(PIXA *pixa,
1022 l_int32 setsize)
1023 {
1024 PIXAA *paa;
1025 L_RECOG *recog;
1026
1027 if (!pixa)
1028 return (PIXAA *)ERROR_PTR("pixa not defined", __func__, NULL);
1029
1030 if ((recog = recogCreateFromPixaNoFinish(pixa, 0, 0, 0, 0, 0)) == NULL)
1031 return (PIXAA *)ERROR_PTR("recog not made", __func__, NULL);
1032 paa = recog->pixaa_u; /* grab the paa of unscaled templates */
1033 recog->pixaa_u = NULL;
1034 recogDestroy(&recog);
1035 return paa;
1036 }
1037
1038
1039 /*!
1040 * \brief recogRemoveOutliers1()
1041 *
1042 * \param[in] precog addr of recog with unscaled labeled templates
1043 * \param[in] minscore keep everything with at least this score
1044 * \param[in] mintarget minimum desired number to retain if possible
1045 * \param[in] minsize minimum number of samples required for a class
1046 * \param[out] ppixsave [optional debug] saved templates, with scores
1047 * \param[out] ppixrem [optional debug] removed templates, with scores
1048 * \return 0 if OK, 1 on error.
1049 *
1050 * <pre>
1051 * Notes:
1052 * (1) This is a convenience wrapper when using default parameters
1053 * for the recog. See pixaRemoveOutliers1() for details.
1054 * (2) If this succeeds, the new recog replaces the input recog;
1055 * if it fails, the input recog is destroyed.
1056 * </pre>
1057 */
1058 l_ok
1059 recogRemoveOutliers1(L_RECOG **precog,
1060 l_float32 minscore,
1061 l_int32 mintarget,
1062 l_int32 minsize,
1063 PIX **ppixsave,
1064 PIX **ppixrem)
1065 {
1066 PIXA *pixa1, *pixa2;
1067 L_RECOG *recog;
1068
1069 if (!precog)
1070 return ERROR_INT("&recog not defined", __func__, 1);
1071 if (*precog == NULL)
1072 return ERROR_INT("recog not defined", __func__, 1);
1073
1074 /* Extract the unscaled templates */
1075 pixa1 = recogExtractPixa(*precog);
1076 recogDestroy(precog);
1077
1078 pixa2 = pixaRemoveOutliers1(pixa1, minscore, mintarget, minsize,
1079 ppixsave, ppixrem);
1080 pixaDestroy(&pixa1);
1081 if (!pixa2)
1082 return ERROR_INT("failure to remove outliers", __func__, 1);
1083
1084 recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1);
1085 pixaDestroy(&pixa2);
1086 if (!recog)
1087 return ERROR_INT("failure to make recog from pixa sans outliers",
1088 __func__, 1);
1089
1090 *precog = recog;
1091 return 0;
1092 }
1093
1094
1095 /*!
1096 * \brief pixaRemoveOutliers1()
1097 *
1098 * \param[in] pixas unscaled labeled templates
1099 * \param[in] minscore keep everything with at least this score;
1100 * use -1.0 for default.
1101 * \param[in] mintarget minimum desired number to retain if possible;
1102 * use -1 for default.
1103 * \param[in] minsize minimum number of samples required for a class;
1104 * use -1 for default.
1105 * \param[out] ppixsave [optional debug] saved templates, with scores
1106 * \param[out] ppixrem [optional debug] removed templates, with scores
1107 * \return pixa of unscaled templates to be kept, or NULL on error
1108 *
1109 * <pre>
1110 * Notes:
1111 * (1) Removing outliers is particularly important when recognition
1112 * goes against all the samples in the training set, as opposed
1113 * to the averages for each class. The reason is that we get
1114 * an identification error if a mislabeled template is a best
1115 * match for an input sample.
1116 * (2) Because the score values depend strongly on the quality
1117 * of the character images, to avoid losing too many samples
1118 * we supplement a minimum score for retention with a score
1119 * necessary to acquire the minimum target number of templates.
1120 * To do this we are willing to use a lower threshold,
1121 * LowerScoreThreshold, on the score. Consequently, with
1122 * poor quality templates, we may keep samples with a score
1123 * less than %minscore, but never less than LowerScoreThreshold.
1124 * And if the number of samples is less than %minsize, we do
1125 * not use any.
1126 * (3) This is meant to be used on a BAR, where the templates all
1127 * come from the same book; use minscore ~0.75.
1128 * (4) Method: make a scaled recog from the input %pixas. Then,
1129 * for each class: generate the averages, match each
1130 * scaled template against the average, and save unscaled
1131 * templates that had a sufficiently good match.
1132 * </pre>
1133 */
1134 PIXA *
1135 pixaRemoveOutliers1(PIXA *pixas,
1136 l_float32 minscore,
1137 l_int32 mintarget,
1138 l_int32 minsize,
1139 PIX **ppixsave,
1140 PIX **ppixrem)
1141 {
1142 l_int32 i, j, debug, n, area1, area2;
1143 l_float32 x1, y1, x2, y2, minfract, score, rankscore, threshscore;
1144 NUMA *nasum, *narem, *nasave, *nascore;
1145 PIX *pix1, *pix2;
1146 PIXA *pixa, *pixarem, *pixad;
1147 PTA *pta;
1148 L_RECOG *recog;
1149
1150 if (ppixsave) *ppixsave = NULL;
1151 if (ppixrem) *ppixrem = NULL;
1152 if (!pixas)
1153 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
1154 minscore = L_MIN(minscore, 1.0);
1155 if (minscore <= 0.0)
1156 minscore = DefaultMinScore;
1157 mintarget = L_MIN(mintarget, 3);
1158 if (mintarget <= 0)
1159 mintarget = DefaultMinTarget;
1160 if (minsize < 0)
1161 minsize = DefaultMinSetSize;
1162
1163 /* Make a special height-scaled recognizer with average templates */
1164 debug = (ppixsave || ppixrem) ? 1 : 0;
1165 recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1);
1166 if (!recog)
1167 return (PIXA *)ERROR_PTR("bad pixas; recog not made", __func__, NULL);
1168 if (recogAverageSamples(recog, debug) != 0) {
1169 recogDestroy(&recog);
1170 return (PIXA *)ERROR_PTR("bad templates", __func__, NULL);
1171 }
1172
1173 nasave = (ppixsave) ? numaCreate(0) : NULL;
1174 pixarem = (ppixrem) ? pixaCreate(0) : NULL;
1175 narem = (ppixrem) ? numaCreate(0) : NULL;
1176
1177 pixad = pixaCreate(0);
1178 for (i = 0; i < recog->setsize; i++) {
1179 /* Access the average template and values for scaled
1180 * images in this class */
1181 pix1 = pixaGetPix(recog->pixa, i, L_CLONE);
1182 ptaGetPt(recog->pta, i, &x1, &y1);
1183 numaGetIValue(recog->nasum, i, &area1);
1184
1185 /* Get the scores for each sample in the class */
1186 pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE);
1187 pta = ptaaGetPta(recog->ptaa, i, L_CLONE); /* centroids */
1188 nasum = numaaGetNuma(recog->naasum, i, L_CLONE); /* fg areas */
1189 n = pixaGetCount(pixa);
1190 nascore = numaCreate(n);
1191 for (j = 0; j < n; j++) {
1192 pix2 = pixaGetPix(pixa, j, L_CLONE);
1193 ptaGetPt(pta, j, &x2, &y2); /* centroid average */
1194 numaGetIValue(nasum, j, &area2); /* fg sum average */
1195 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1196 x1 - x2, y1 - y2, 5, 5,
1197 recog->sumtab, &score);
1198 numaAddNumber(nascore, score);
1199 if (debug && score == 0.0) /* typ. large size difference */
1200 lept_stderr("Got 0 score for i = %d, j = %d\n", i, j);
1201 pixDestroy(&pix2);
1202 }
1203 pixDestroy(&pix1);
1204
1205 /* Find the rankscore, corresponding to the 1.0 - minfract.
1206 * To attempt to maintain the minfract of templates, use as a
1207 * cutoff the minimum of minscore and the rank score. However,
1208 * no template is saved with an actual score less than
1209 * that at least one template is kept. */
1210 minfract = (l_float32)mintarget / (l_float32)n;
1211 numaGetRankValue(nascore, 1.0 - minfract, NULL, 0, &rankscore);
1212 threshscore = L_MAX(LowerScoreThreshold,
1213 L_MIN(minscore, rankscore));
1214 if (debug) {
1215 L_INFO("minscore = %4.2f, rankscore = %4.2f, threshscore = %4.2f\n",
1216 __func__, minscore, rankscore, threshscore);
1217 }
1218
1219 /* Save templates that are at or above threshold.
1220 * Toss any classes with less than %minsize templates. */
1221 for (j = 0; j < n; j++) {
1222 numaGetFValue(nascore, j, &score);
1223 pix1 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY);
1224 if (score >= threshscore && n >= minsize) {
1225 pixaAddPix(pixad, pix1, L_INSERT);
1226 if (nasave) numaAddNumber(nasave, score);
1227 } else if (debug) {
1228 pixaAddPix(pixarem, pix1, L_INSERT);
1229 numaAddNumber(narem, score);
1230 } else {
1231 pixDestroy(&pix1);
1232 }
1233 }
1234
1235 pixaDestroy(&pixa);
1236 ptaDestroy(&pta);
1237 numaDestroy(&nasum);
1238 numaDestroy(&nascore);
1239 }
1240
1241 if (ppixsave) {
1242 *ppixsave = pixDisplayOutliers(pixad, nasave);
1243 numaDestroy(&nasave);
1244 }
1245 if (ppixrem) {
1246 *ppixrem = pixDisplayOutliers(pixarem, narem);
1247 pixaDestroy(&pixarem);
1248 numaDestroy(&narem);
1249 }
1250 recogDestroy(&recog);
1251 return pixad;
1252 }
1253
1254
1255 /*!
1256 * \brief recogRemoveOutliers2()
1257 *
1258 * \param[in] precog addr of recog with unscaled labeled templates
1259 * \param[in] minscore keep everything with at least this score
1260 * \param[in] minsize minimum number of samples required for a class
1261 * \param[out] ppixsave [optional debug] saved templates, with scores
1262 * \param[out] ppixrem [optional debug] removed templates, with scores
1263 * \return 0 if OK, 1 on error.
1264 *
1265 * <pre>
1266 * Notes:
1267 * (1) This is a convenience wrapper when using default parameters
1268 * for the recog. See pixaRemoveOutliers2() for details.
1269 * (2) If this succeeds, the new recog replaces the input recog;
1270 * if it fails, the input recog is destroyed.
1271 * </pre>
1272 */
1273 l_ok
1274 recogRemoveOutliers2(L_RECOG **precog,
1275 l_float32 minscore,
1276 l_int32 minsize,
1277 PIX **ppixsave,
1278 PIX **ppixrem)
1279 {
1280 PIXA *pixa1, *pixa2;
1281 L_RECOG *recog;
1282
1283 if (!precog)
1284 return ERROR_INT("&recog not defined", __func__, 1);
1285 if (*precog == NULL)
1286 return ERROR_INT("recog not defined", __func__, 1);
1287
1288 /* Extract the unscaled templates */
1289 pixa1 = recogExtractPixa(*precog);
1290 recogDestroy(precog);
1291
1292 pixa2 = pixaRemoveOutliers2(pixa1, minscore, minsize, ppixsave, ppixrem);
1293 pixaDestroy(&pixa1);
1294 if (!pixa2)
1295 return ERROR_INT("failure to remove outliers", __func__, 1);
1296
1297 recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1);
1298 pixaDestroy(&pixa2);
1299 if (!recog)
1300 return ERROR_INT("failure to make recog from pixa sans outliers",
1301 __func__, 1);
1302
1303 *precog = recog;
1304 return 0;
1305 }
1306
1307
1308 /*!
1309 * \brief pixaRemoveOutliers2()
1310 *
1311 * \param[in] pixas unscaled labeled templates
1312 * \param[in] minscore keep everything with at least this score;
1313 * use -1.0 for default.
1314 * \param[in] minsize minimum number of samples required for a class;
1315 * use -1 for default.
1316 * \param[out] ppixsave [optional debug] saved templates, with scores
1317 * \param[out] ppixrem [optional debug] removed templates, with scores
1318 * \return pixa of unscaled templates to be kept, or NULL on error
1319 *
1320 * <pre>
1321 * Notes:
1322 * (1) Removing outliers is particularly important when recognition
1323 * goes against all the samples in the training set, as opposed
1324 * to the averages for each class. The reason is that we get
1325 * an identification error if a mislabeled template is a best
1326 * match for an input sample.
1327 * (2) This method compares each template against the average templates
1328 * of each class, and discards any template that has a higher
1329 * correlation to a class different from its own. It also
1330 * sets a lower bound on correlation scores with its class average.
1331 * (3) This is meant to be used on a BAR, where the templates all
1332 * come from the same book; use minscore ~0.75.
1333 * </pre>
1334 */
1335 PIXA *
1336 pixaRemoveOutliers2(PIXA *pixas,
1337 l_float32 minscore,
1338 l_int32 minsize,
1339 PIX **ppixsave,
1340 PIX **ppixrem)
1341 {
1342 l_int32 i, j, k, n, area1, area2, maxk, debug;
1343 l_float32 x1, y1, x2, y2, score, maxscore;
1344 NUMA *nan, *nascore, *nasave;
1345 PIX *pix1, *pix2, *pix3;
1346 PIXA *pixarem, *pixad;
1347 L_RECOG *recog;
1348
1349 if (ppixsave) *ppixsave = NULL;
1350 if (ppixrem) *ppixrem = NULL;
1351 if (!pixas)
1352 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
1353 minscore = L_MIN(minscore, 1.0);
1354 if (minscore <= 0.0)
1355 minscore = DefaultMinScore;
1356 if (minsize < 0)
1357 minsize = DefaultMinSetSize;
1358
1359 /* Make a special height-scaled recognizer with average templates */
1360 debug = (ppixsave || ppixrem) ? 1 : 0;
1361 recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1);
1362 if (!recog)
1363 return (PIXA *)ERROR_PTR("bad pixas; recog not made", __func__, NULL);
1364 if (recogAverageSamples(recog, debug) != 0) {
1365 recogDestroy(&recog);
1366 return (PIXA *)ERROR_PTR("bad templates", __func__, NULL);
1367 }
1368
1369 nasave = (ppixsave) ? numaCreate(0) : NULL;
1370 pixarem = (ppixrem) ? pixaCreate(0) : NULL;
1371
1372 pixad = pixaCreate(0);
1373 pixaaGetCount(recog->pixaa, &nan); /* number of templates in each class */
1374 for (i = 0; i < recog->setsize; i++) {
1375 /* Get the scores for each sample in the class, when comparing
1376 * with averages from all the classes. */
1377 numaGetIValue(nan, i, &n);
1378 for (j = 0; j < n; j++) {
1379 pix1 = pixaaGetPix(recog->pixaa, i, j, L_CLONE);
1380 ptaaGetPt(recog->ptaa, i, j, &x1, &y1); /* centroid */
1381 numaaGetValue(recog->naasum, i, j, NULL, &area1); /* fg sum */
1382 nascore = numaCreate(n);
1383 for (k = 0; k < recog->setsize; k++) { /* average templates */
1384 pix2 = pixaGetPix(recog->pixa, k, L_CLONE);
1385 ptaGetPt(recog->pta, k, &x2, &y2); /* average centroid */
1386 numaGetIValue(recog->nasum, k, &area2); /* average fg sum */
1387 pixCorrelationScoreSimple(pix1, pix2, area1, area2,
1388 x1 - x2, y1 - y2, 5, 5,
1389 recog->sumtab, &score);
1390 numaAddNumber(nascore, score);
1391 pixDestroy(&pix2);
1392 }
1393
1394 /* Save templates that are in the correct class and
1395 * at or above threshold. Toss any classes with less
1396 * than %minsize templates. */
1397 numaGetMax(nascore, &maxscore, &maxk);
1398 if (maxk == i && maxscore >= minscore && n >= minsize) {
1399 /* save it */
1400 pix3 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY);
1401 pixaAddPix(pixad, pix3, L_INSERT);
1402 if (nasave) numaAddNumber(nasave, maxscore);
1403 } else if (ppixrem) { /* outlier */
1404 pix3 = recogDisplayOutlier(recog, i, j, maxk, maxscore);
1405 pixaAddPix(pixarem, pix3, L_INSERT);
1406 }
1407 numaDestroy(&nascore);
1408 pixDestroy(&pix1);
1409 }
1410 }
1411
1412 if (ppixsave) {
1413 *ppixsave = pixDisplayOutliers(pixad, nasave);
1414 numaDestroy(&nasave);
1415 }
1416 if (ppixrem) {
1417 *ppixrem = pixaDisplayTiledInRows(pixarem, 32, 1500, 1.0, 0, 20, 2);
1418 pixaDestroy(&pixarem);
1419 }
1420
1421 numaDestroy(&nan);
1422 recogDestroy(&recog);
1423 return pixad;
1424 }
1425
1426
1427 /*------------------------------------------------------------------------*
1428 * Training on unlabeled data *
1429 *------------------------------------------------------------------------*/
1430 /*!
1431 * \brief recogTrainFromBoot()
1432 *
1433 * \param[in] recogboot labeled boot recognizer
1434 * \param[in] pixas set of unlabeled input characters
1435 * \param[in] minscore min score for accepting the example; e.g., 0.75
1436 * \param[in] threshold for binarization, if needed
1437 * \param[in] debug 1 for debug output saved to recogboot; 0 otherwise
1438 * \return pixad labeled version of input pixas, trained on a BSR,
1439 * or NULL on error
1440 *
1441 * <pre>
1442 * Notes:
1443 * (1) This takes %pixas of unscaled single characters and %recboot,
1444 * a bootstrep recognizer (BSR) that has been set up with parameters
1445 * * scaleh: scale all templates to this height
1446 * * linew: width of normalized strokes, or 0 if using
1447 * the input image
1448 * It modifies the pix in %pixas accordingly and correlates
1449 * with the templates in the BSR. It returns those input
1450 * images in %pixas whose best correlation with the BSR is at
1451 * or above %minscore. The returned pix have added text labels
1452 * for the text string of the class to which the best
1453 * correlated template belongs.
1454 * (2) Identification occurs in scaled mode (typically with h = 40),
1455 * optionally using a width-normalized line images derived
1456 * from those in %pixas.
1457 * </pre>
1458 */
1459 PIXA *
1460 recogTrainFromBoot(L_RECOG *recogboot,
1461 PIXA *pixas,
1462 l_float32 minscore,
1463 l_int32 threshold,
1464 l_int32 debug)
1465 {
1466 char *text;
1467 l_int32 i, n, same, maxd, scaleh, linew;
1468 l_float32 score;
1469 PIX *pix1, *pix2, *pixdb = NULL;
1470 PIXA *pixa1, *pixa2, *pixa3, *pixad;
1471
1472 if (!recogboot)
1473 return (PIXA *)ERROR_PTR("recogboot not defined", __func__, NULL);
1474 if (!pixas)
1475 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
1476
1477 /* Make sure all input pix are 1 bpp */
1478 if ((n = pixaGetCount(pixas)) == 0)
1479 return (PIXA *)ERROR_PTR("no pix in pixa", __func__, NULL);
1480 pixaVerifyDepth(pixas, &same, &maxd);
1481 if (maxd == 1) {
1482 pixa1 = pixaCopy(pixas, L_COPY);
1483 } else {
1484 pixa1 = pixaCreate(n);
1485 for (i = 0; i < n; i++) {
1486 pix1 = pixaGetPix(pixas, i, L_CLONE);
1487 pix2 = pixConvertTo1(pix1, threshold);
1488 pixaAddPix(pixa1, pix2, L_INSERT);
1489 pixDestroy(&pix1);
1490 }
1491 }
1492
1493 /* Scale the input images to match the BSR */
1494 scaleh = recogboot->scaleh;
1495 linew = recogboot->linew;
1496 pixa2 = pixaCreate(n);
1497 for (i = 0; i < n; i++) {
1498 pix1 = pixaGetPix(pixa1, i, L_CLONE);
1499 pix2 = pixScaleToSize(pix1, 0, scaleh);
1500 pixaAddPix(pixa2, pix2, L_INSERT);
1501 pixDestroy(&pix1);
1502 }
1503 pixaDestroy(&pixa1);
1504
1505 /* Optionally convert to width-normalized line */
1506 if (linew > 0)
1507 pixa3 = pixaSetStrokeWidth(pixa2, linew, 4, 8);
1508 else
1509 pixa3 = pixaCopy(pixa2, L_CLONE);
1510 pixaDestroy(&pixa2);
1511
1512 /* Identify using recogboot */
1513 n = pixaGetCount(pixa3);
1514 pixad = pixaCreate(n);
1515 for (i = 0; i < n; i++) {
1516 pix1 = pixaGetPix(pixa3, i, L_COPY);
1517 pixSetText(pix1, NULL); /* remove any existing text or labelling */
1518 if (!debug) {
1519 recogIdentifyPix(recogboot, pix1, NULL);
1520 } else {
1521 recogIdentifyPix(recogboot, pix1, &pixdb);
1522 pixaAddPix(recogboot->pixadb_boot, pixdb, L_INSERT);
1523 }
1524 rchExtract(recogboot->rch, NULL, &score, &text, NULL, NULL, NULL, NULL);
1525 if (score >= minscore) {
1526 pix2 = pixaGetPix(pixas, i, L_COPY);
1527 pixSetText(pix2, text);
1528 pixaAddPix(pixad, pix2, L_INSERT);
1529 pixaAddPix(recogboot->pixadb_boot, pixdb, L_COPY);
1530 }
1531 LEPT_FREE(text);
1532 pixDestroy(&pix1);
1533 }
1534 pixaDestroy(&pixa3);
1535
1536 return pixad;
1537 }
1538
1539
1540 /*------------------------------------------------------------------------*
1541 * Padding the digit training set *
1542 *------------------------------------------------------------------------*/
1543 /*!
1544 * \brief recogPadDigitTrainingSet()
1545 *
1546 * \param[in,out] precog trained; if padding is needed, it is replaced
1547 * by a a new padded recog
1548 * \param[in] scaleh must be > 0; suggest ~40.
1549 * \param[in] linew use 0 for original scanned images
1550 * \return 0 if OK, 1 on error
1551 *
1552 * <pre>
1553 * Notes:
1554 * (1) This is a no-op if padding is not needed. However,
1555 * if it is, this replaces the input recog with a new recog,
1556 * padded appropriately with templates from a boot recognizer,
1557 * and set up with correlation templates derived from
1558 * %scaleh and %linew.
1559 * </pre>
1560 */
1561 l_ok
1562 recogPadDigitTrainingSet(L_RECOG **precog,
1563 l_int32 scaleh,
1564 l_int32 linew)
1565 {
1566 PIXA *pixa;
1567 L_RECOG *recog1, *recog2;
1568 SARRAY *sa;
1569
1570 if (!precog)
1571 return ERROR_INT("&recog not defined", __func__, 1);
1572 recog1 = *precog;
1573
1574 recogIsPaddingNeeded(recog1, &sa);
1575 if (!sa) return 0;
1576
1577 /* Get a new pixa with the padding templates added */
1578 pixa = recogAddDigitPadTemplates(recog1, sa);
1579 sarrayDestroy(&sa);
1580 if (!pixa)
1581 return ERROR_INT("pixa not made", __func__, 1);
1582
1583 /* Need to use templates that are scaled to a fixed height. */
1584 if (scaleh <= 0) {
1585 L_WARNING("templates must be scaled to fixed height; using %d\n",
1586 __func__, 40);
1587 scaleh = 40;
1588 }
1589
1590 /* Create a hybrid recog, composed of templates from both
1591 * the original and bootstrap sources. */
1592 recog2 = recogCreateFromPixa(pixa, 0, scaleh, linew, recog1->threshold,
1593 recog1->maxyshift);
1594 pixaDestroy(&pixa);
1595 recogDestroy(precog);
1596 *precog = recog2;
1597 return 0;
1598 }
1599
1600
1601 /*!
1602 * \brief recogIsPaddingNeeded()
1603 *
1604 * \param[in] recog trained
1605 * \param[out] psa addr of returned string containing text value
1606 * \return 1 on error; 0 if OK, whether or not additional padding
1607 * templates are required.
1608 *
1609 * <pre>
1610 * Notes:
1611 * (1) This returns a string array in &sa containing character values
1612 * for which extra templates are needed; this sarray is
1613 * used by recogGetPadTemplates(). It returns NULL
1614 * if no padding templates are needed.
1615 * </pre>
1616 */
1617 l_int32
1618 recogIsPaddingNeeded(L_RECOG *recog,
1619 SARRAY **psa)
1620 {
1621 char *str;
1622 l_int32 i, nt, min_nopad, nclass, allclasses;
1623 l_float32 minval;
1624 NUMA *naclass;
1625 SARRAY *sa;
1626
1627 if (!psa)
1628 return ERROR_INT("&sa not defined", __func__, 1);
1629 *psa = NULL;
1630 if (!recog)
1631 return ERROR_INT("recog not defined", __func__, 1);
1632
1633 /* Do we have samples from all classes? */
1634 nclass = pixaaGetCount(recog->pixaa_u, &naclass); /* unscaled bitmaps */
1635 allclasses = (nclass == recog->charset_size) ? 1 : 0;
1636
1637 /* Are there enough samples in each class already? */
1638 min_nopad = recog->min_nopad;
1639 numaGetMin(naclass, &minval, NULL);
1640 if (allclasses && (minval >= min_nopad)) {
1641 numaDestroy(&naclass);
1642 return 0;
1643 }
1644
1645 /* Are any classes not represented? */
1646 sa = recogAddMissingClassStrings(recog);
1647 *psa = sa;
1648
1649 /* Are any other classes under-represented? */
1650 for (i = 0; i < nclass; i++) {
1651 numaGetIValue(naclass, i, &nt);
1652 if (nt < min_nopad) {
1653 str = sarrayGetString(recog->sa_text, i, L_COPY);
1654 sarrayAddString(sa, str, L_INSERT);
1655 }
1656 }
1657 numaDestroy(&naclass);
1658 return 0;
1659 }
1660
1661
1662 /*!
1663 * \brief recogAddMissingClassStrings()
1664 *
1665 * \param[in] recog trained
1666 * \return sa of class string missing in %recog, or NULL on error
1667 *
1668 * <pre>
1669 * Notes:
1670 * (1) This returns an empty %sa if there is at least one template
1671 * in each class in %recog.
1672 * </pre>
1673 */
1674 static SARRAY *
1675 recogAddMissingClassStrings(L_RECOG *recog)
1676 {
1677 char *text;
1678 char str[4];
1679 l_int32 i, nclass, index, ival;
1680 NUMA *na;
1681 SARRAY *sa;
1682
1683 if (!recog)
1684 return (SARRAY *)ERROR_PTR("recog not defined", __func__, NULL);
1685
1686 /* Only handling digits */
1687 nclass = pixaaGetCount(recog->pixaa_u, NULL); /* unscaled bitmaps */
1688 if (recog->charset_type != 1 || nclass == 10)
1689 return sarrayCreate(0); /* empty */
1690
1691 /* Make an indicator array for missing classes */
1692 na = numaCreate(0);
1693 sa = sarrayCreate(0);
1694 for (i = 0; i < recog->charset_size; i++)
1695 numaAddNumber(na, 1);
1696 for (i = 0; i < nclass; i++) {
1697 text = sarrayGetString(recog->sa_text, i, L_NOCOPY);
1698 index = text[0] - '0';
1699 numaSetValue(na, index, 0);
1700 }
1701
1702 /* Convert to string and add to output */
1703 for (i = 0; i < nclass; i++) {
1704 numaGetIValue(na, i, &ival);
1705 if (ival == 1) {
1706 str[0] = '0' + i;
1707 str[1] = '\0';
1708 sarrayAddString(sa, str, L_COPY);
1709 }
1710 }
1711 numaDestroy(&na);
1712 return sa;
1713 }
1714
1715
1716 /*!
1717 * \brief recogAddDigitPadTemplates()
1718 *
1719 * \param[in] recog trained
1720 * \param[in] sa set of text strings that need to be padded
1721 * \return pixa of all templates from %recog and the additional pad
1722 * templates from a boot recognizer; or NULL on error
1723 *
1724 * <pre>
1725 * Notes:
1726 * (1) Call recogIsPaddingNeeded() first, which returns %sa of
1727 * template text strings for classes where more templates
1728 * are needed.
1729 * </pre>
1730 */
1731 PIXA *
1732 recogAddDigitPadTemplates(L_RECOG *recog,
1733 SARRAY *sa)
1734 {
1735 char *str, *text;
1736 l_int32 i, j, n, nt;
1737 PIX *pix;
1738 PIXA *pixa1, *pixa2;
1739
1740 if (!recog)
1741 return (PIXA *)ERROR_PTR("recog not defined", __func__, NULL);
1742 if (!sa)
1743 return (PIXA *)ERROR_PTR("sa not defined", __func__, NULL);
1744 if (recogCharsetAvailable(recog->charset_type) == FALSE)
1745 return (PIXA *)ERROR_PTR("boot charset not available", __func__, NULL);
1746
1747 /* Make boot recog templates */
1748 pixa1 = recogMakeBootDigitTemplates(0, 0);
1749 n = pixaGetCount(pixa1);
1750
1751 /* Extract the unscaled templates from %recog */
1752 pixa2 = recogExtractPixa(recog);
1753
1754 /* Add selected boot recog templates based on the text strings in sa */
1755 nt = sarrayGetCount(sa);
1756 for (i = 0; i < n; i++) {
1757 pix = pixaGetPix(pixa1, i, L_CLONE);
1758 text = pixGetText(pix);
1759 for (j = 0; j < nt; j++) {
1760 str = sarrayGetString(sa, j, L_NOCOPY);
1761 if (!strcmp(text, str)) {
1762 pixaAddPix(pixa2, pix, L_COPY);
1763 break;
1764 }
1765 }
1766 pixDestroy(&pix);
1767 }
1768
1769 pixaDestroy(&pixa1);
1770 return pixa2;
1771 }
1772
1773
1774 /*!
1775 * \brief recogCharsetAvailable()
1776 *
1777 * \param[in] type of charset for padding
1778 * \return 1 if available; 0 if not.
1779 */
1780 static l_int32
1781 recogCharsetAvailable(l_int32 type)
1782 {
1783 l_int32 ret;
1784
1785 switch (type)
1786 {
1787 case L_ARABIC_NUMERALS:
1788 ret = TRUE;
1789 break;
1790 case L_LC_ROMAN_NUMERALS:
1791 case L_UC_ROMAN_NUMERALS:
1792 case L_LC_ALPHA:
1793 case L_UC_ALPHA:
1794 L_INFO("charset type %d not available\n", __func__, type);
1795 ret = FALSE;
1796 break;
1797 default:
1798 L_INFO("charset type %d is unknown\n", __func__, type);
1799 ret = FALSE;
1800 break;
1801 }
1802
1803 return ret;
1804 }
1805
1806
1807 /*------------------------------------------------------------------------*
1808 * Making a boot digit recognizer *
1809 *------------------------------------------------------------------------*/
1810 /*!
1811 * \brief recogMakeBootDigitRecog()
1812 *
1813 * \param[in] nsamp number of samples of each digit; or 0
1814 * \param[in] scaleh scale all heights to this; typ. use 40
1815 * \param[in] linew normalized line width; typ. use 5; 0 to skip
1816 * \param[in] maxyshift from nominal centroid alignment; typically 0 or 1
1817 * \param[in] debug 1 for showing templates; 0 otherwise
1818 * \return recog, or NULL on error
1819 *
1820 * <pre>
1821 * Notes:
1822 * (1) This takes a set of pre-computed, labeled pixa of single
1823 * digits, and generates a recognizer from them.
1824 * The templates used in the recognizer can be modified by:
1825 * - scaling (isotropically to fixed height)
1826 * - generating a skeleton and thickening so that all strokes
1827 * have the same width.
1828 * (2) The resulting templates are scaled versions of either the
1829 * input bitmaps or images with fixed line widths. To use the
1830 * input bitmaps, set %linew = 0; otherwise, set %linew to the
1831 * desired line width.
1832 * (3) If %nsamp == 0, this uses and extends the output from
1833 * three boot generators:
1834 * l_bootnum_gen1, l_bootnum_gen2, l_bootnum_gen3.
1835 * Otherwise, it uses exactly %nsamp templates of each digit,
1836 * extracted by l_bootnum_gen4.
1837 * </pre>
1838 */
1839 L_RECOG *
1840 recogMakeBootDigitRecog(l_int32 nsamp,
1841 l_int32 scaleh,
1842 l_int32 linew,
1843 l_int32 maxyshift,
1844 l_int32 debug)
1845
1846 {
1847 PIXA *pixa;
1848 L_RECOG *recog;
1849
1850 /* Get the templates, extended by horizontal scaling */
1851 pixa = recogMakeBootDigitTemplates(nsamp, debug);
1852
1853 /* Make the boot recog; recogModifyTemplate() will scale the
1854 * templates and optionally turn them into strokes of fixed width. */
1855 recog = recogCreateFromPixa(pixa, 0, scaleh, linew, 128, maxyshift);
1856 pixaDestroy(&pixa);
1857 if (debug)
1858 recogShowContent(stderr, recog, 0, 1);
1859
1860 return recog;
1861 }
1862
1863
1864 /*!
1865 * \brief recogMakeBootDigitTemplates()
1866 *
1867 * \param[in] nsamp number of samples of each digit; or 0
1868 * \param[in] debug 1 for display of templates
1869 * \return pixa of templates; or NULL on error
1870 *
1871 * <pre>
1872 * Notes:
1873 * (1) See recogMakeBootDigitRecog().
1874 * </pre>
1875 */
1876 PIXA *
1877 recogMakeBootDigitTemplates(l_int32 nsamp,
1878 l_int32 debug)
1879 {
1880 NUMA *na1;
1881 PIX *pix1, *pix2, *pix3;
1882 PIXA *pixa1, *pixa2, *pixa3;
1883
1884 if (nsamp > 0) {
1885 pixa1 = l_bootnum_gen4(nsamp);
1886 if (debug) {
1887 pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10,
1888 2, 6, 0xff000000);
1889 pixDisplay(pix1, 0, 0);
1890 pixDestroy(&pix1);
1891 }
1892 return pixa1;
1893 }
1894
1895 /* Else, generate from 3 pixa */
1896 pixa1 = l_bootnum_gen1();
1897 pixa2 = l_bootnum_gen2();
1898 pixa3 = l_bootnum_gen3();
1899 if (debug) {
1900 pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000);
1901 pix2 = pixaDisplayTiledWithText(pixa2, 1500, 1.0, 10, 2, 6, 0xff000000);
1902 pix3 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 10, 2, 6, 0xff000000);
1903 pixDisplay(pix1, 0, 0);
1904 pixDisplay(pix2, 600, 0);
1905 pixDisplay(pix3, 1200, 0);
1906 pixDestroy(&pix1);
1907 pixDestroy(&pix2);
1908 pixDestroy(&pix3);
1909 }
1910 pixaJoin(pixa1, pixa2, 0, -1);
1911 pixaJoin(pixa1, pixa3, 0, -1);
1912 pixaDestroy(&pixa2);
1913 pixaDestroy(&pixa3);
1914
1915 /* Extend by horizontal scaling */
1916 na1 = numaCreate(4);
1917 numaAddNumber(na1, 0.9f);
1918 numaAddNumber(na1, 1.1f);
1919 numaAddNumber(na1, 1.2f);
1920 pixa2 = pixaExtendByScaling(pixa1, na1, L_HORIZ, 1);
1921
1922 pixaDestroy(&pixa1);
1923 numaDestroy(&na1);
1924 return pixa2;
1925 }
1926
1927
1928 /*------------------------------------------------------------------------*
1929 * Debugging *
1930 *------------------------------------------------------------------------*/
1931 /*!
1932 * \brief recogShowContent()
1933 *
1934 * \param[in] fp file stream
1935 * \param[in] recog
1936 * \param[in] index for naming of output files of template images
1937 * \param[in] display 1 for showing template images; 0 otherwise
1938 * \return 0 if OK, 1 on error
1939 */
1940 l_ok
1941 recogShowContent(FILE *fp,
1942 L_RECOG *recog,
1943 l_int32 index,
1944 l_int32 display)
1945 {
1946 char buf[128];
1947 l_int32 i, val, count;
1948 PIX *pix;
1949 NUMA *na;
1950
1951 if (!fp)
1952 return ERROR_INT("stream not defined", __func__, 1);
1953 if (!recog)
1954 return ERROR_INT("recog not defined", __func__, 1);
1955
1956 fprintf(fp, "Debug print of recog contents\n");
1957 fprintf(fp, " Setsize: %d\n", recog->setsize);
1958 fprintf(fp, " Binarization threshold: %d\n", recog->threshold);
1959 fprintf(fp, " Maximum matching y-jiggle: %d\n", recog->maxyshift);
1960 if (recog->linew <= 0)
1961 fprintf(fp, " Using image templates for matching\n");
1962 else
1963 fprintf(fp, " Using templates with fixed line width for matching\n");
1964 if (recog->scalew == 0)
1965 fprintf(fp, " No width scaling of templates\n");
1966 else
1967 fprintf(fp, " Template width scaled to %d\n", recog->scalew);
1968 if (recog->scaleh == 0)
1969 fprintf(fp, " No height scaling of templates\n");
1970 else
1971 fprintf(fp, " Template height scaled to %d\n", recog->scaleh);
1972 fprintf(fp, " Number of samples in each class:\n");
1973 pixaaGetCount(recog->pixaa_u, &na);
1974 for (i = 0; i < recog->setsize; i++) {
1975 l_dnaGetIValue(recog->dna_tochar, i, &val);
1976 numaGetIValue(na, i, &count);
1977 if (val < 128)
1978 fprintf(fp, " class %d, char %c: %d\n", i, val, count);
1979 else
1980 fprintf(fp, " class %d, val %d: %d\n", i, val, count);
1981 }
1982 numaDestroy(&na);
1983
1984 if (display) {
1985 lept_mkdir("lept/recog");
1986 pix = pixaaDisplayByPixa(recog->pixaa_u, 50, 1.0, 20, 20, 0);
1987 snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates_u.%d.png", index);
1988 pixWriteDebug(buf, pix, IFF_PNG);
1989 pixDisplay(pix, 0, 200 * index);
1990 pixDestroy(&pix);
1991 if (recog->train_done) {
1992 pix = pixaaDisplayByPixa(recog->pixaa, 50, 1.0, 20, 20, 0);
1993 snprintf(buf, sizeof(buf),
1994 "/tmp/lept/recog/templates.%d.png", index);
1995 pixWriteDebug(buf, pix, IFF_PNG);
1996 pixDisplay(pix, 800, 200 * index);
1997 pixDestroy(&pix);
1998 }
1999 }
2000 return 0;
2001 }
2002
2003
2004 /*!
2005 * \brief recogDebugAverages()
2006 *
2007 * \param[in] recog addr of recog
2008 * \param[in] debug 0 no output; 1 for images; 2 for text; 3 for both
2009 * \return 0 if OK, 1 on error
2010 *
2011 * <pre>
2012 * Notes:
2013 * (1) Generates an image that pairs each of the input images used
2014 * in training with the average template that it is best
2015 * correlated to. This is written into the recog.
2016 * (2) It also generates pixa_tr of all the input training images,
2017 * which can be used, e.g., in recogShowMatchesInRange().
2018 * (3) Returns an error if the averaging function finds bad classes.
2019 * </pre>
2020 */
2021 l_ok
2022 recogDebugAverages(L_RECOG *recog,
2023 l_int32 debug)
2024 {
2025 l_int32 i, j, n, np, index;
2026 l_float32 score;
2027 PIX *pix1, *pix2, *pix3;
2028 PIXA *pixa, *pixat;
2029 PIXAA *paa1, *paa2;
2030
2031 if (!recog)
2032 return ERROR_INT("recog not defined", __func__, 1);
2033
2034 /* Mark the training as finished if necessary, and make sure
2035 * that the average templates have been built. */
2036 if (recogAverageSamples(recog, 0) != 0)
2037 return ERROR_INT("averaging failed", __func__, 1);
2038
2039 /* Save a pixa of all the training examples */
2040 paa1 = recog->pixaa;
2041 if (!recog->pixa_tr)
2042 recog->pixa_tr = pixaaFlattenToPixa(paa1, NULL, L_CLONE);
2043
2044 /* Destroy any existing image and make a new one */
2045 if (recog->pixdb_ave)
2046 pixDestroy(&recog->pixdb_ave);
2047 n = pixaaGetCount(paa1, NULL);
2048 paa2 = pixaaCreate(n);
2049 for (i = 0; i < n; i++) {
2050 pixa = pixaCreate(0);
2051 pixat = pixaaGetPixa(paa1, i, L_CLONE);
2052 np = pixaGetCount(pixat);
2053 for (j = 0; j < np; j++) {
2054 pix1 = pixaaGetPix(paa1, i, j, L_CLONE);
2055 recogIdentifyPix(recog, pix1, &pix2);
2056 rchExtract(recog->rch, &index, &score, NULL, NULL, NULL,
2057 NULL, NULL);
2058 if (debug >= 2)
2059 lept_stderr("index = %d, score = %7.3f\n", index, score);
2060 pix3 = pixAddBorder(pix2, 2, 1);
2061 pixaAddPix(pixa, pix3, L_INSERT);
2062 pixDestroy(&pix1);
2063 pixDestroy(&pix2);
2064 }
2065 pixaaAddPixa(paa2, pixa, L_INSERT);
2066 pixaDestroy(&pixat);
2067 }
2068 recog->pixdb_ave = pixaaDisplayByPixa(paa2, 50, 1.0, 20, 20, 0);
2069 if (debug % 2) {
2070 lept_mkdir("lept/recog");
2071 pixWriteDebug("/tmp/lept/recog/templ_match.png", recog->pixdb_ave,
2072 IFF_PNG);
2073 pixDisplay(recog->pixdb_ave, 100, 100);
2074 }
2075
2076 pixaaDestroy(&paa2);
2077 return 0;
2078 }
2079
2080
2081 /*!
2082 * \brief recogShowAverageTemplates()
2083 *
2084 * \param[in] recog
2085 * \return 0 on success, 1 on failure
2086 *
2087 * <pre>
2088 * Notes:
2089 * (1) This debug routine generates a display of the averaged templates,
2090 * both scaled and unscaled, with the centroid visible in red.
2091 * </pre>
2092 */
2093 l_int32
2094 recogShowAverageTemplates(L_RECOG *recog)
2095 {
2096 l_int32 i, size;
2097 l_float32 x, y;
2098 PIX *pix1, *pix2, *pixr;
2099 PIXA *pixat, *pixadb;
2100
2101 if (!recog)
2102 return ERROR_INT("recog not defined", __func__, 1);
2103
2104 lept_stderr("min/max width_u = (%d,%d); min/max height_u = (%d,%d)\n",
2105 recog->minwidth_u, recog->maxwidth_u,
2106 recog->minheight_u, recog->maxheight_u);
2107 lept_stderr("min splitw = %d, max splith = %d\n",
2108 recog->min_splitw, recog->max_splith);
2109
2110 pixaDestroy(&recog->pixadb_ave);
2111
2112 pixr = pixCreate(3, 3, 32); /* 3x3 red square for centroid location */
2113 pixSetAllArbitrary(pixr, 0xff000000);
2114 pixadb = pixaCreate(2);
2115
2116 /* Unscaled bitmaps */
2117 size = recog->setsize;
2118 pixat = pixaCreate(size);
2119 for (i = 0; i < size; i++) {
2120 if ((pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE)) == NULL)
2121 continue;
2122 pix2 = pixConvertTo32(pix1);
2123 ptaGetPt(recog->pta_u, i, &x, &y);
2124 pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3,
2125 PIX_SRC, pixr, 0, 0);
2126 pixaAddPix(pixat, pix2, L_INSERT);
2127 pixDestroy(&pix1);
2128 }
2129 pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0);
2130 pixaAddPix(pixadb, pix1, L_INSERT);
2131 pixDisplay(pix1, 100, 100);
2132 pixaDestroy(&pixat);
2133
2134 /* Scaled bitmaps */
2135 pixat = pixaCreate(size);
2136 for (i = 0; i < size; i++) {
2137 if ((pix1 = pixaGetPix(recog->pixa, i, L_CLONE)) == NULL)
2138 continue;
2139 pix2 = pixConvertTo32(pix1);
2140 ptaGetPt(recog->pta, i, &x, &y);
2141 pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3,
2142 PIX_SRC, pixr, 0, 0);
2143 pixaAddPix(pixat, pix2, L_INSERT);
2144 pixDestroy(&pix1);
2145 }
2146 pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0);
2147 pixaAddPix(pixadb, pix1, L_INSERT);
2148 pixDisplay(pix1, 100, 100);
2149 pixaDestroy(&pixat);
2150 pixDestroy(&pixr);
2151 recog->pixadb_ave = pixadb;
2152 return 0;
2153 }
2154
2155
2156 /*!
2157 * \brief pixDisplayOutliers()
2158 *
2159 * \param[in] pixas unscaled labeled templates
2160 * \param[in] nas scores of templates (against class averages)
2161 * \return pix tiled pixa with text and scores, or NULL on failure
2162 *
2163 * <pre>
2164 * Notes:
2165 * (1) This debug routine is called from recogRemoveOutliers2(),
2166 * and takes the saved templates and their scores as input.
2167 * </pre>
2168 */
2169 static PIX *
2170 pixDisplayOutliers(PIXA *pixas,
2171 NUMA *nas)
2172 {
2173 char *text;
2174 char buf[16];
2175 l_int32 i, n;
2176 l_float32 fval;
2177 PIX *pix1, *pix2;
2178 PIXA *pixa1;
2179
2180 if (!pixas)
2181 return (PIX *)ERROR_PTR("pixas not defined", __func__, NULL);
2182 if (!nas)
2183 return (PIX *)ERROR_PTR("nas not defined", __func__, NULL);
2184 n = pixaGetCount(pixas);
2185 if (numaGetCount(nas) != n)
2186 return (PIX *)ERROR_PTR("pixas and nas sizes differ", __func__, NULL);
2187
2188 pixa1 = pixaCreate(n);
2189 for (i = 0; i < n; i++) {
2190 pix1 = pixaGetPix(pixas, i, L_CLONE);
2191 pix2 = pixAddBlackOrWhiteBorder(pix1, 25, 25, 0, 0, L_GET_WHITE_VAL);
2192 text = pixGetText(pix1);
2193 numaGetFValue(nas, i, &fval);
2194 snprintf(buf, sizeof(buf), "'%s': %5.2f", text, fval);
2195 pixSetText(pix2, buf);
2196 pixaAddPix(pixa1, pix2, L_INSERT);
2197 pixDestroy(&pix1);
2198 }
2199 pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 20, 2, 6, 0xff000000);
2200 pixaDestroy(&pixa1);
2201 return pix1;
2202 }
2203
2204
2205 /*!
2206 * \brief recogDisplayOutlier()
2207 *
2208 * \param[in] recog
2209 * \param[in] iclass sample is in this class
2210 * \param[in] jsamp index of sample is class i
2211 * \param[in] maxclass index of class with closest average to sample
2212 * \param[in] maxscore score of sample with average of class %maxclass
2213 * \return pix sample and template images, with score, or NULL on error
2214 *
2215 * <pre>
2216 * Notes:
2217 * (1) This shows three templates, side-by-side:
2218 * - The outlier sample
2219 * - The average template from the same class
2220 * - The average class template that best matched the outlier sample
2221 * </pre>
2222 */
2223 static PIX *
2224 recogDisplayOutlier(L_RECOG *recog,
2225 l_int32 iclass,
2226 l_int32 jsamp,
2227 l_int32 maxclass,
2228 l_float32 maxscore)
2229 {
2230 char buf[64];
2231 PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2232 PIXA *pixa;
2233
2234 if (!recog)
2235 return (PIX *)ERROR_PTR("recog not defined", __func__, NULL);
2236
2237 pix1 = pixaaGetPix(recog->pixaa, iclass, jsamp, L_CLONE);
2238 pix2 = pixaGetPix(recog->pixa, iclass, L_CLONE);
2239 pix3 = pixaGetPix(recog->pixa, maxclass, L_CLONE);
2240 pixa = pixaCreate(3);
2241 pixaAddPix(pixa, pix1, L_INSERT);
2242 pixaAddPix(pixa, pix2, L_INSERT);
2243 pixaAddPix(pixa, pix3, L_INSERT);
2244 pix4 = pixaDisplayTiledInRows(pixa, 32, 400, 2.0, 0, 12, 2);
2245 snprintf(buf, sizeof(buf), "C=%d, BAC=%d, S=%4.2f", iclass, maxclass,
2246 maxscore);
2247 pix5 = pixAddSingleTextblock(pix4, recog->bmf, buf, 0xff000000,
2248 L_ADD_BELOW, NULL);
2249 pixDestroy(&pix4);
2250 pixaDestroy(&pixa);
2251 return pix5;
2252 }
2253
2254
2255 /*!
2256 * \brief recogShowMatchesInRange()
2257 *
2258 * \param[in] recog
2259 * \param[in] pixa of 1 bpp images to match
2260 * \param[in] minscore min score to include output
2261 * \param[in] maxscore max score to include output
2262 * \param[in] display 1 to display the result
2263 * \return 0 if OK, 1 on error
2264 *
2265 * <pre>
2266 * Notes:
2267 * (1) This gives a visual output of the best matches for a given
2268 * range of scores. Each pair of images can optionally be
2269 * labeled with the index of the best match and the correlation.
2270 * (2) To use this, save a set of 1 bpp images (labeled or
2271 * unlabeled) that can be given to a recognizer in a pixa.
2272 * Then call this function with the pixa and parameters
2273 * to filter a range of scores.
2274 * </pre>
2275 */
2276 l_ok
2277 recogShowMatchesInRange(L_RECOG *recog,
2278 PIXA *pixa,
2279 l_float32 minscore,
2280 l_float32 maxscore,
2281 l_int32 display)
2282 {
2283 l_int32 i, n, index, depth;
2284 l_float32 score;
2285 NUMA *nascore, *naindex;
2286 PIX *pix1, *pix2;
2287 PIXA *pixa1, *pixa2;
2288
2289 if (!recog)
2290 return ERROR_INT("recog not defined", __func__, 1);
2291 if (!pixa)
2292 return ERROR_INT("pixa not defined", __func__, 1);
2293
2294 /* Run the recognizer on the set of images */
2295 n = pixaGetCount(pixa);
2296 nascore = numaCreate(n);
2297 naindex = numaCreate(n);
2298 pixa1 = pixaCreate(n);
2299 for (i = 0; i < n; i++) {
2300 pix1 = pixaGetPix(pixa, i, L_CLONE);
2301 recogIdentifyPix(recog, pix1, &pix2);
2302 rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL);
2303 numaAddNumber(nascore, score);
2304 numaAddNumber(naindex, index);
2305 pixaAddPix(pixa1, pix2, L_INSERT);
2306 pixDestroy(&pix1);
2307 }
2308
2309 /* Filter the set and optionally add text to each */
2310 pixa2 = pixaCreate(n);
2311 depth = 1;
2312 for (i = 0; i < n; i++) {
2313 numaGetFValue(nascore, i, &score);
2314 if (score < minscore || score > maxscore) continue;
2315 pix1 = pixaGetPix(pixa1, i, L_CLONE);
2316 numaGetIValue(naindex, i, &index);
2317 pix2 = recogShowMatch(recog, pix1, NULL, NULL, index, score);
2318 if (i == 0) depth = pixGetDepth(pix2);
2319 pixaAddPix(pixa2, pix2, L_INSERT);
2320 pixDestroy(&pix1);
2321 }
2322
2323 /* Package it up */
2324 pixDestroy(&recog->pixdb_range);
2325 if (pixaGetCount(pixa2) > 0) {
2326 recog->pixdb_range =
2327 pixaDisplayTiledInRows(pixa2, depth, 2500, 1.0, 0, 20, 1);
2328 if (display)
2329 pixDisplay(recog->pixdb_range, 300, 100);
2330 } else {
2331 L_INFO("no character matches in the range of scores\n", __func__);
2332 }
2333
2334 pixaDestroy(&pixa1);
2335 pixaDestroy(&pixa2);
2336 numaDestroy(&nascore);
2337 numaDestroy(&naindex);
2338 return 0;
2339 }
2340
2341
2342 /*!
2343 * \brief recogShowMatch()
2344 *
2345 * \param[in] recog
2346 * \param[in] pix1 input pix; several possibilities
2347 * \param[in] pix2 [optional] matching template
2348 * \param[in] box [optional] region in pix1 for which pix2 matches
2349 * \param[in] index index of matching template; use -1 to disable printing
2350 * \param[in] score score of match
2351 * \return pixd pair of images, showing input pix and best template,
2352 * optionally with matching information, or NULL on error.
2353 *
2354 * <pre>
2355 * Notes:
2356 * (1) pix1 can be one of these:
2357 * (a) The input pix alone, which can be either a single character
2358 * (box == NULL) or several characters that need to be
2359 * segmented. If more than character is present, the box
2360 * region is displayed with an outline.
2361 * (b) Both the input pix and the matching template. In this case,
2362 * pix2 and box will both be null.
2363 * (2) If the bmf has been made (by a call to recogMakeBmf())
2364 * and the index >= 0, the text field, match score and index
2365 * will be rendered; otherwise their values will be ignored.
2366 * </pre>
2367 */
2368 PIX *
2369 recogShowMatch(L_RECOG *recog,
2370 PIX *pix1,
2371 PIX *pix2,
2372 BOX *box,
2373 l_int32 index,
2374 l_float32 score)
2375 {
2376 char buf[32];
2377 char *text;
2378 L_BMF *bmf;
2379 PIX *pix3, *pix4, *pix5, *pixd;
2380 PIXA *pixa;
2381
2382 if (!recog)
2383 return (PIX *)ERROR_PTR("recog not defined", __func__, NULL);
2384 if (!pix1)
2385 return (PIX *)ERROR_PTR("pix1 not defined", __func__, NULL);
2386
2387 bmf = (recog->bmf && index >= 0) ? recog->bmf : NULL;
2388 if (!pix2 && !box && !bmf) /* nothing to do */
2389 return pixCopy(NULL, pix1);
2390
2391 pix3 = pixConvertTo32(pix1);
2392 if (box)
2393 pixRenderBoxArb(pix3, box, 1, 255, 0, 0);
2394
2395 if (pix2) {
2396 pixa = pixaCreate(2);
2397 pixaAddPix(pixa, pix3, L_CLONE);
2398 pixaAddPix(pixa, pix2, L_CLONE);
2399 pix4 = pixaDisplayTiledInRows(pixa, 1, 500, 1.0, 0, 15, 0);
2400 pixaDestroy(&pixa);
2401 } else {
2402 pix4 = pixCopy(NULL, pix3);
2403 }
2404 pixDestroy(&pix3);
2405
2406 if (bmf) {
2407 pix5 = pixAddBorderGeneral(pix4, 55, 55, 0, 0, 0xffffff00);
2408 recogGetClassString(recog, index, &text);
2409 snprintf(buf, sizeof(buf), "C=%s, S=%4.3f, I=%d", text, score, index);
2410 pixd = pixAddSingleTextblock(pix5, bmf, buf, 0xff000000,
2411 L_ADD_BELOW, NULL);
2412 pixDestroy(&pix5);
2413 LEPT_FREE(text);
2414 } else {
2415 pixd = pixClone(pix4);
2416 }
2417 pixDestroy(&pix4);
2418
2419 return pixd;
2420 }