Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/recogtrain.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file recogtrain.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Training on labeled data | |
| 32 * l_int32 recogTrainLabeled() | |
| 33 * PIX *recogProcessLabeled() | |
| 34 * l_int32 recogAddSample() | |
| 35 * PIX *recogModifyTemplate() | |
| 36 * l_int32 recogAverageSamples() | |
| 37 * l_int32 pixaAccumulateSamples() | |
| 38 * l_int32 recogTrainingFinished() | |
| 39 * static l_int32 recogTemplatesAreOK() | |
| 40 * PIXA *recogFilterPixaBySize() | |
| 41 * PIXAA *recogSortPixaByClass() | |
| 42 * l_int32 recogRemoveOutliers1() | |
| 43 * PIXA *pixaRemoveOutliers1() | |
| 44 * l_int32 recogRemoveOutliers2() | |
| 45 * PIXA *pixaRemoveOutliers2() | |
| 46 * | |
| 47 * Training on unlabeled data | |
| 48 * L_RECOG recogTrainFromBoot() | |
| 49 * | |
| 50 * Padding the digit training set | |
| 51 * l_int32 recogPadDigitTrainingSet() | |
| 52 * l_int32 recogIsPaddingNeeded() | |
| 53 * static SARRAY *recogAddMissingClassStrings() | |
| 54 * PIXA *recogAddDigitPadTemplates() | |
| 55 * static l_int32 recogCharsetAvailable() | |
| 56 * | |
| 57 * Making a boot digit recognizer | |
| 58 * L_RECOG *recogMakeBootDigitRecog() | |
| 59 * PIXA *recogMakeBootDigitTemplates() | |
| 60 * | |
| 61 * Debugging | |
| 62 * l_int32 recogShowContent() | |
| 63 * l_int32 recogDebugAverages() | |
| 64 * l_int32 recogShowAverageTemplates() | |
| 65 * static PIX *pixDisplayOutliers() | |
| 66 * PIX *recogDisplayOutlier() | |
| 67 * PIX *recogShowMatchesInRange() | |
| 68 * PIX *recogShowMatch() | |
| 69 * | |
| 70 * These abbreviations are for the type of template to be used: | |
| 71 * * SI (for the scanned images) | |
| 72 * * WNL (for width-normalized lines, formed by first skeletonizing | |
| 73 * the scanned images, and then dilating to a fixed width) | |
| 74 * These abbreviations are for the type of recognizer: | |
| 75 * * BAR (book-adapted recognizer; the best type; can do identification | |
| 76 * with unscaled images and separation of touching characters. | |
| 77 * * BSR (bootstrap recognizer; used if more labeled templates are | |
| 78 * required for a BAR, either for finding more templates from | |
| 79 * the book, or making a hybrid BAR/BSR. | |
| 80 * | |
| 81 * The recog struct typically holds two versions of the input templates | |
| 82 * (e.g. from a pixa) that were used to generate it. One version is | |
| 83 * the unscaled input templates. The other version is the one that | |
| 84 * will be used by the recog to identify unlabeled data. That version | |
| 85 * depends on the input parameters when the recog is created. The choices | |
| 86 * for the latter version, and their suggested use, are: | |
| 87 * (1) unscaled SI -- typical for BAR, generated from book images | |
| 88 * (2) unscaled WNL -- ditto | |
| 89 * (3) scaled SI -- typical for recognizers containing template | |
| 90 * images from sources other than the book to be recognized | |
| 91 * (4) scaled WNL -- ditto | |
| 92 * For cases (3) and (4), we recommend scaling to fixed height; e.g., | |
| 93 * scalew = 0, scaleh = 40. | |
| 94 * When using WNL, we recommend using a width of 5 in the template | |
| 95 * and 4 in the unlabeled data. | |
| 96 * It appears that better results for a BAR are usually obtained using | |
| 97 * SI than WNL, but more experimentation is needed. | |
| 98 * | |
| 99 * This utility is designed to build recognizers that are specifically | |
| 100 * adapted from a large amount of material, such as a book. These | |
| 101 * use labeled templates taken from the material, and not scaled. | |
| 102 * In addition, two special recognizers are useful: | |
| 103 * (1) Bootstrap recognizer (BSR). This uses height-scaled templates, | |
| 104 * that have been extended with several repetitions in one of two ways: | |
| 105 * (a) aniotropic width scaling (for either SI or WNL) | |
| 106 * (b) iterative erosions/dilations (for SI). | |
| 107 * (2) Outlier removal. This uses height scaled templates. It can be | |
| 108 * implemented without using templates that are aligned averages of all | |
| 109 * templates in a class. | |
| 110 * | |
| 111 * Recognizers are inexpensive to generate, for example, from a pixa | |
| 112 * of labeled templates. The general process of building a BAR is | |
| 113 * to start with labeled templates, e.g., in a pixa, make a BAR, and | |
| 114 * analyze new samples from the book to augment the BAR until it has | |
| 115 * enough samples for each character class. Along the way, samples | |
| 116 * from a BSR may be added for help in training. If not enough samples | |
| 117 * are available for the BAR, it can finally be augmented with BSR | |
| 118 * samples, in which case the resulting hybrid BAR/BSR recognizer | |
| 119 * must work on scaled images. | |
| 120 * | |
| 121 * Here are the steps in doing recog training: | |
| 122 * A. Generate a BAR from any existing labeled templates | |
| 123 * (1) Create a recog and add the templates, using recogAddSample(). | |
| 124 * This stores the unscaled templates. | |
| 125 * [Note: this can be done in one step if the labeled templates are put | |
| 126 * into a pixa: | |
| 127 * L_Recog *rec = recogCreateFromPixa(pixa, ...); ] | |
| 128 * (2) Call recogTrainingFinished() to generate the (sometimes modified) | |
| 129 * templates to be used for correlation. | |
| 130 * (3) Optionally, remove outliers. | |
| 131 * If there are sufficient samples in the classes, we're done. Otherwise, | |
| 132 * B. Try to get more samples from the book to pad the BAR. | |
| 133 * (1) Save the unscaled, labeled templates from the BAR. | |
| 134 * (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR. | |
| 135 * (3) Do recognition on more unlabeled images, scaled to a fixed height | |
| 136 * (4) Add the unscaled, labeled images to the saved set. | |
| 137 * (5) Optionally, remove outliers. | |
| 138 * If there are sufficient samples in the classes, we're done. Otherwise, | |
| 139 * C. For classes without a sufficient number of templates, we can | |
| 140 * supplement the BAR with templates from a BSR (a hybrid RAR/BSR), | |
| 141 * and do recognition scaled to a fixed height. | |
| 142 * | |
| 143 * Here are several methods that can be used for identifying outliers: | |
| 144 * (1) Compute average templates for each class and remove a candidate | |
| 145 * that is poorly correlated with the average. This is the most | |
| 146 * simple method. recogRemoveOutliers1() uses this, supplemented with | |
| 147 * a second threshold and a target number of templates to be saved. | |
| 148 * (2) Compute average templates for each class and remove a candidate | |
| 149 * that is more highly correlated with the average of some other class. | |
| 150 * This does not require setting a threshold for the correlation. | |
| 151 * recogRemoveOutliers2() uses this method, supplemented with a minimum | |
| 152 * correlation score. | |
| 153 * (3) For each candidate, find the average correlation with other | |
| 154 * members of its class, and remove those that have a relatively | |
| 155 * low average correlation. This is similar to (1), gives comparable | |
| 156 * results and because it does not use average templates, it requires | |
| 157 * a bit more computation. | |
| 158 * </pre> | |
| 159 */ | |
| 160 | |
| 161 #ifdef HAVE_CONFIG_H | |
| 162 #include <config_auto.h> | |
| 163 #endif /* HAVE_CONFIG_H */ | |
| 164 | |
| 165 #include <string.h> | |
| 166 #include "allheaders.h" | |
| 167 #include "pix_internal.h" | |
| 168 | |
| 169 /* Static functions */ | |
| 170 static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize, | |
| 171 l_float32 minfract, l_int32 *pok); | |
| 172 static SARRAY *recogAddMissingClassStrings(L_RECOG *recog); | |
| 173 static l_int32 recogCharsetAvailable(l_int32 type); | |
| 174 static PIX *pixDisplayOutliers(PIXA *pixas, NUMA *nas); | |
| 175 static PIX *recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp, | |
| 176 l_int32 maxclass, l_float32 maxscore); | |
| 177 | |
| 178 /* Default parameters that are used in recogTemplatesAreOK() and | |
| 179 * in outlier removal functions, and that use template set size | |
| 180 * to decide if the set of templates (before outliers are removed) | |
| 181 * is valid. Values are set to accept most sets of sample templates. */ | |
| 182 static const l_int32 DefaultMinSetSize = 1; /* minimum number of | |
| 183 samples for a valid class */ | |
| 184 static const l_float32 DefaultMinSetFract = 0.4f; /* minimum fraction | |
| 185 of classes required for a valid recog */ | |
| 186 | |
| 187 /* Defaults in pixaRemoveOutliers1() and pixaRemoveOutliers2() */ | |
| 188 static const l_float32 DefaultMinScore = 0.75; /* keep everything above */ | |
| 189 static const l_int32 DefaultMinTarget = 3; /* to be kept if possible */ | |
| 190 static const l_float32 LowerScoreThreshold = 0.5; /* templates can be | |
| 191 * kept down to this score to if needed to retain the | |
| 192 * desired minimum number of templates */ | |
| 193 | |
| 194 | |
| 195 /*------------------------------------------------------------------------* | |
| 196 * Training * | |
| 197 *------------------------------------------------------------------------*/ | |
| 198 /*! | |
| 199 * \brief recogTrainLabeled() | |
| 200 * | |
| 201 * \param[in] recog in training mode | |
| 202 * \param[in] pixs if depth > 1, will be thresholded to 1 bpp | |
| 203 * \param[in] box [optional] cropping box | |
| 204 * \param[in] text [optional] if null, use text field in pix | |
| 205 * \param[in] debug 1 to display images of samples not captured | |
| 206 * \return 0 if OK, 1 on error | |
| 207 * | |
| 208 * <pre> | |
| 209 * Notes: | |
| 210 * (1) Training is restricted to the addition of a single | |
| 211 * character in an arbitrary (e.g., UTF8) charset | |
| 212 * (2) If box != null, it should represent the location in %pixs | |
| 213 * of the character image. | |
| 214 * </pre> | |
| 215 */ | |
| 216 l_ok | |
| 217 recogTrainLabeled(L_RECOG *recog, | |
| 218 PIX *pixs, | |
| 219 BOX *box, | |
| 220 char *text, | |
| 221 l_int32 debug) | |
| 222 { | |
| 223 l_int32 ret; | |
| 224 PIX *pix; | |
| 225 | |
| 226 if (!recog) | |
| 227 return ERROR_INT("recog not defined", __func__, 1); | |
| 228 if (!pixs) | |
| 229 return ERROR_INT("pixs not defined", __func__, 1); | |
| 230 | |
| 231 /* Prepare the sample to be added. This step also acts | |
| 232 * as a filter, and can invalidate pixs as a template. */ | |
| 233 ret = recogProcessLabeled(recog, pixs, box, text, &pix); | |
| 234 if (ret) { | |
| 235 pixDestroy(&pix); | |
| 236 L_WARNING("failure to get sample '%s' for training\n", __func__, | |
| 237 text); | |
| 238 return 1; | |
| 239 } | |
| 240 | |
| 241 recogAddSample(recog, pix, debug); | |
| 242 pixDestroy(&pix); | |
| 243 return 0; | |
| 244 } | |
| 245 | |
| 246 | |
| 247 /*! | |
| 248 * \brief recogProcessLabeled() | |
| 249 * | |
| 250 * \param[in] recog in training mode | |
| 251 * \param[in] pixs if depth > 1, will be thresholded to 1 bpp | |
| 252 * \param[in] box [optional] cropping box | |
| 253 * \param[in] text [optional] if null, use text field in pix | |
| 254 * \param[out] ppix addr of pix, 1 bpp, labeled | |
| 255 * \return 0 if OK, 1 on error | |
| 256 * | |
| 257 * <pre> | |
| 258 * Notes: | |
| 259 * (1) This crops and binarizes the input image, generating a pix | |
| 260 * of one character where the charval is inserted into the pix. | |
| 261 * </pre> | |
| 262 */ | |
| 263 l_ok | |
| 264 recogProcessLabeled(L_RECOG *recog, | |
| 265 PIX *pixs, | |
| 266 BOX *box, | |
| 267 char *text, | |
| 268 PIX **ppix) | |
| 269 { | |
| 270 char *textdata; | |
| 271 l_int32 textinpix, textin, nsets; | |
| 272 NUMA *na; | |
| 273 PIX *pix1, *pix2, *pix3, *pix4; | |
| 274 | |
| 275 if (!ppix) | |
| 276 return ERROR_INT("&pix not defined", __func__, 1); | |
| 277 *ppix = NULL; | |
| 278 if (!recog) | |
| 279 return ERROR_INT("recog not defined", __func__, 1); | |
| 280 if (!pixs) | |
| 281 return ERROR_INT("pixs not defined", __func__, 1); | |
| 282 | |
| 283 /* Find the text; this will be stored with the output images */ | |
| 284 textin = text && (text[0] != '\0'); | |
| 285 textinpix = (pixs->text && (pixs->text[0] != '\0')); | |
| 286 if (!textin && !textinpix) { | |
| 287 L_ERROR("no text: %d\n", __func__, recog->num_samples); | |
| 288 return 1; | |
| 289 } | |
| 290 textdata = (textin) ? text : pixs->text; /* do not free */ | |
| 291 | |
| 292 /* Crop and binarize if necessary */ | |
| 293 if (box) | |
| 294 pix1 = pixClipRectangle(pixs, box, NULL); | |
| 295 else | |
| 296 pix1 = pixClone(pixs); | |
| 297 if (pixGetDepth(pix1) > 1) | |
| 298 pix2 = pixConvertTo1(pix1, recog->threshold); | |
| 299 else | |
| 300 pix2 = pixClone(pix1); | |
| 301 pixDestroy(&pix1); | |
| 302 | |
| 303 /* Remove isolated noise, using as a criterion all components | |
| 304 * that are removed by a vertical opening of size 5. */ | |
| 305 pix3 = pixMorphSequence(pix2, "o1.5", 0); /* seed */ | |
| 306 pixSeedfillBinary(pix3, pix3, pix2, 8); /* fill from seed; clip to pix2 */ | |
| 307 pixDestroy(&pix2); | |
| 308 | |
| 309 /* Clip to foreground */ | |
| 310 pixClipToForeground(pix3, &pix4, NULL); | |
| 311 pixDestroy(&pix3); | |
| 312 if (!pix4) | |
| 313 return ERROR_INT("pix4 is empty", __func__, 1); | |
| 314 | |
| 315 /* Verify that if there is more than 1 c.c., they all have | |
| 316 * horizontal overlap */ | |
| 317 na = pixCountByColumn(pix4, NULL); | |
| 318 numaCountNonzeroRuns(na, &nsets); | |
| 319 numaDestroy(&na); | |
| 320 if (nsets > 1) { | |
| 321 L_WARNING("found %d sets of horiz separated c.c.; skipping\n", | |
| 322 __func__, nsets); | |
| 323 pixDestroy(&pix4); | |
| 324 return 1; | |
| 325 } | |
| 326 | |
| 327 pixSetText(pix4, textdata); | |
| 328 *ppix = pix4; | |
| 329 return 0; | |
| 330 } | |
| 331 | |
| 332 | |
| 333 /*! | |
| 334 * \brief recogAddSample() | |
| 335 * | |
| 336 * \param[in] recog | |
| 337 * \param[in] pix a single character, 1 bpp | |
| 338 * \param[in] debug | |
| 339 * \return 0 if OK, 1 on error | |
| 340 * | |
| 341 * <pre> | |
| 342 * Notes: | |
| 343 * (1) The pix is 1 bpp, with the character string label embedded. | |
| 344 * (2) The pixaa_u array of the recog is initialized to accept | |
| 345 * up to 256 different classes. When training is finished, | |
| 346 * the arrays are truncated to the actual number of classes. | |
| 347 * To pad an existing recog from the boot recognizers, training | |
| 348 * is started again; if samples from a new class are added, | |
| 349 * the pixaa_u array is extended by adding a pixa to hold them. | |
| 350 * </pre> | |
| 351 */ | |
| 352 l_ok | |
| 353 recogAddSample(L_RECOG *recog, | |
| 354 PIX *pix, | |
| 355 l_int32 debug) | |
| 356 { | |
| 357 char *text; | |
| 358 l_int32 npa, charint, index; | |
| 359 PIXA *pixa1; | |
| 360 PIXAA *paa; | |
| 361 | |
| 362 if (!recog) | |
| 363 return ERROR_INT("recog not defined", __func__, 1); | |
| 364 if (!pix || pixGetDepth(pix) != 1) | |
| 365 return ERROR_INT("pix not defined or not 1 bpp\n", __func__, 1); | |
| 366 if (recog->train_done) | |
| 367 return ERROR_INT("not added: training has been completed", __func__, 1); | |
| 368 paa = recog->pixaa_u; | |
| 369 | |
| 370 /* Make sure the character is in the set */ | |
| 371 text = pixGetText(pix); | |
| 372 if (l_convertCharstrToInt(text, &charint) == 1) { | |
| 373 L_ERROR("invalid text: %s\n", __func__, text); | |
| 374 return 1; | |
| 375 } | |
| 376 | |
| 377 /* Determine the class array index. Check if the class | |
| 378 * already exists, and if not, add it. */ | |
| 379 if (recogGetClassIndex(recog, charint, text, &index) == 1) { | |
| 380 /* New class must be added */ | |
| 381 npa = pixaaGetCount(paa, NULL); | |
| 382 if (index > npa) { | |
| 383 L_ERROR("oops: bad index %d > npa %d!!\n", __func__, index, npa); | |
| 384 return 1; | |
| 385 } | |
| 386 if (index == npa) { /* paa needs to be extended */ | |
| 387 L_INFO("Adding new class and pixa: index = %d, text = %s\n", | |
| 388 __func__, index, text); | |
| 389 pixa1 = pixaCreate(10); | |
| 390 pixaaAddPixa(paa, pixa1, L_INSERT); | |
| 391 } | |
| 392 } | |
| 393 if (debug) { | |
| 394 L_INFO("Identified text label: %s\n", __func__, text); | |
| 395 L_INFO("Identified: charint = %d, index = %d\n", | |
| 396 __func__, charint, index); | |
| 397 } | |
| 398 | |
| 399 /* Insert the unscaled character image into the right pixa. | |
| 400 * (Unscaled images are required to split touching characters.) */ | |
| 401 recog->num_samples++; | |
| 402 pixaaAddPix(paa, index, pix, NULL, L_COPY); | |
| 403 return 0; | |
| 404 } | |
| 405 | |
| 406 | |
| 407 /*! | |
| 408 * \brief recogModifyTemplate() | |
| 409 * | |
| 410 * \param[in] recog | |
| 411 * \param[in] pixs 1 bpp, to be optionally scaled and turned into | |
| 412 * strokes of fixed width | |
| 413 * \return pixd modified pix if OK, NULL on error | |
| 414 */ | |
| 415 PIX * | |
| 416 recogModifyTemplate(L_RECOG *recog, | |
| 417 PIX *pixs) | |
| 418 { | |
| 419 l_int32 w, h, empty; | |
| 420 PIX *pix1, *pix2; | |
| 421 | |
| 422 if (!recog) | |
| 423 return (PIX *)ERROR_PTR("recog not defined", __func__, NULL); | |
| 424 if (!pixs) | |
| 425 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 426 | |
| 427 /* Scale first */ | |
| 428 pixGetDimensions(pixs, &w, &h, NULL); | |
| 429 if ((recog->scalew == 0 || recog->scalew == w) && | |
| 430 (recog->scaleh == 0 || recog->scaleh == h)) { /* no scaling */ | |
| 431 pix1 = pixCopy(NULL, pixs); | |
| 432 } else { | |
| 433 pix1 = pixScaleToSize(pixs, recog->scalew, recog->scaleh); | |
| 434 } | |
| 435 if (!pix1) | |
| 436 return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL); | |
| 437 | |
| 438 /* Then optionally convert to lines */ | |
| 439 if (recog->linew <= 0) { | |
| 440 pix2 = pixClone(pix1); | |
| 441 } else { | |
| 442 pix2 = pixSetStrokeWidth(pix1, recog->linew, 1, 8); | |
| 443 } | |
| 444 pixDestroy(&pix1); | |
| 445 if (!pix2) | |
| 446 return (PIX *)ERROR_PTR("pix2 not made", __func__, NULL); | |
| 447 | |
| 448 /* Make sure we still have some pixels */ | |
| 449 pixZero(pix2, &empty); | |
| 450 if (empty) { | |
| 451 pixDestroy(&pix2); | |
| 452 return (PIX *)ERROR_PTR("modified template has no pixels", | |
| 453 __func__, NULL); | |
| 454 } | |
| 455 return pix2; | |
| 456 } | |
| 457 | |
| 458 | |
| 459 /*! | |
| 460 * \brief recogAverageSamples() | |
| 461 * | |
| 462 * \param[in] recog addr of existing recog | |
| 463 * \param[in] debug | |
| 464 * \return 0 on success, 1 on failure | |
| 465 * | |
| 466 * <pre> | |
| 467 * Notes: | |
| 468 * (1) This is only called in two situations: | |
| 469 * (a) When splitting characters using either the DID method | |
| 470 * recogDecode() or the the greedy splitter | |
| 471 * recogCorrelationBestRow() | |
| 472 * (b) By a special recognizer that is used to remove outliers. | |
| 473 * Both unscaled and scaled inputs are averaged. | |
| 474 * (2) If the data in any class is nonexistent (no samples), or | |
| 475 * very bad (no fg pixels in the average), or if the ratio | |
| 476 * of max/min average unscaled class template heights is | |
| 477 * greater than max_ht_ratio, this function fails. The caller | |
| 478 * must check the return value of the recog, and destroy the | |
| 479 * recog on failure. | |
| 480 * (3) Set debug = 1 to view the resulting templates and their centroids. | |
| 481 * </pre> | |
| 482 */ | |
| 483 l_int32 | |
| 484 recogAverageSamples(L_RECOG *recog, | |
| 485 l_int32 debug) | |
| 486 { | |
| 487 l_int32 i, nsamp, size, area, bx, by, badclass; | |
| 488 l_float32 x, y, hratio; | |
| 489 BOX *box; | |
| 490 PIXA *pixa1; | |
| 491 PIX *pix1, *pix2, *pix3; | |
| 492 PTA *pta1; | |
| 493 | |
| 494 if (!recog) | |
| 495 return ERROR_INT("recog not defined", __func__, 1); | |
| 496 | |
| 497 if (recog->ave_done) { | |
| 498 if (debug) /* always do this if requested */ | |
| 499 recogShowAverageTemplates(recog); | |
| 500 return 0; | |
| 501 } | |
| 502 | |
| 503 /* Remove any previous averaging data */ | |
| 504 size = recog->setsize; | |
| 505 pixaDestroy(&recog->pixa_u); | |
| 506 ptaDestroy(&recog->pta_u); | |
| 507 numaDestroy(&recog->nasum_u); | |
| 508 recog->pixa_u = pixaCreate(size); | |
| 509 recog->pta_u = ptaCreate(size); | |
| 510 recog->nasum_u = numaCreate(size); | |
| 511 | |
| 512 pixaDestroy(&recog->pixa); | |
| 513 ptaDestroy(&recog->pta); | |
| 514 numaDestroy(&recog->nasum); | |
| 515 recog->pixa = pixaCreate(size); | |
| 516 recog->pta = ptaCreate(size); | |
| 517 recog->nasum = numaCreate(size); | |
| 518 | |
| 519 /* Unscaled bitmaps: compute averaged bitmap, centroid, and fg area. | |
| 520 * Note that when we threshold to 1 bpp the 8 bpp averaged template | |
| 521 * that is returned from the accumulator, it will not be cropped | |
| 522 * to the foreground. We must crop it, because the correlator | |
| 523 * makes that assumption and will return a zero value if the | |
| 524 * width or height of the two images differs by several pixels. | |
| 525 * But cropping to fg can cause the value of the centroid to | |
| 526 * change, if bx > 0 or by > 0. */ | |
| 527 badclass = FALSE; | |
| 528 for (i = 0; i < size; i++) { | |
| 529 pixa1 = pixaaGetPixa(recog->pixaa_u, i, L_CLONE); | |
| 530 pta1 = ptaaGetPta(recog->ptaa_u, i, L_CLONE); | |
| 531 nsamp = pixaGetCount(pixa1); | |
| 532 nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */ | |
| 533 if (nsamp == 0) { /* no information for this class */ | |
| 534 L_ERROR("no samples in class %d\n", __func__, i); | |
| 535 badclass = TRUE; | |
| 536 pixaDestroy(&pixa1); | |
| 537 ptaDestroy(&pta1); | |
| 538 break; | |
| 539 } else { | |
| 540 pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y); | |
| 541 pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2)); | |
| 542 pixInvert(pix2, pix2); | |
| 543 pixClipToForeground(pix2, &pix3, &box); | |
| 544 if (!box) { | |
| 545 L_ERROR("no fg pixels in average for uclass %d\n", __func__, i); | |
| 546 badclass = TRUE; | |
| 547 pixDestroy(&pix1); | |
| 548 pixDestroy(&pix2); | |
| 549 pixaDestroy(&pixa1); | |
| 550 ptaDestroy(&pta1); | |
| 551 break; | |
| 552 } else { | |
| 553 boxGetGeometry(box, &bx, &by, NULL, NULL); | |
| 554 pixaAddPix(recog->pixa_u, pix3, L_INSERT); | |
| 555 ptaAddPt(recog->pta_u, x - bx, y - by); /* correct centroid */ | |
| 556 pixCountPixels(pix3, &area, recog->sumtab); | |
| 557 numaAddNumber(recog->nasum_u, area); /* foreground */ | |
| 558 boxDestroy(&box); | |
| 559 } | |
| 560 pixDestroy(&pix1); | |
| 561 pixDestroy(&pix2); | |
| 562 } | |
| 563 pixaDestroy(&pixa1); | |
| 564 ptaDestroy(&pta1); | |
| 565 } | |
| 566 | |
| 567 /* Are any classes bad? */ | |
| 568 if (badclass) | |
| 569 return ERROR_INT("at least 1 bad class", __func__, 1); | |
| 570 | |
| 571 /* Get the range of sizes of the unscaled average templates. | |
| 572 * Reject if the height ratio is too large. */ | |
| 573 pixaSizeRange(recog->pixa_u, &recog->minwidth_u, &recog->minheight_u, | |
| 574 &recog->maxwidth_u, &recog->maxheight_u); | |
| 575 hratio = (l_float32)recog->maxheight_u / (l_float32)recog->minheight_u; | |
| 576 if (hratio > recog->max_ht_ratio) { | |
| 577 L_ERROR("ratio of max/min height of average templates = %4.1f\n", | |
| 578 __func__, hratio); | |
| 579 return 1; | |
| 580 } | |
| 581 | |
| 582 /* Scaled bitmaps: compute averaged bitmap, centroid, and fg area */ | |
| 583 for (i = 0; i < size; i++) { | |
| 584 pixa1 = pixaaGetPixa(recog->pixaa, i, L_CLONE); | |
| 585 pta1 = ptaaGetPta(recog->ptaa, i, L_CLONE); | |
| 586 nsamp = pixaGetCount(pixa1); | |
| 587 nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */ | |
| 588 pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y); | |
| 589 pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2)); | |
| 590 pixInvert(pix2, pix2); | |
| 591 pixClipToForeground(pix2, &pix3, &box); | |
| 592 if (!box) { | |
| 593 L_ERROR("no fg pixels in average for class %d\n", __func__, i); | |
| 594 badclass = TRUE; | |
| 595 pixDestroy(&pix1); | |
| 596 pixDestroy(&pix2); | |
| 597 pixaDestroy(&pixa1); | |
| 598 ptaDestroy(&pta1); | |
| 599 break; | |
| 600 } else { | |
| 601 boxGetGeometry(box, &bx, &by, NULL, NULL); | |
| 602 pixaAddPix(recog->pixa, pix3, L_INSERT); | |
| 603 ptaAddPt(recog->pta, x - bx, y - by); /* correct centroid */ | |
| 604 pixCountPixels(pix3, &area, recog->sumtab); | |
| 605 numaAddNumber(recog->nasum, area); /* foreground */ | |
| 606 boxDestroy(&box); | |
| 607 } | |
| 608 pixDestroy(&pix1); | |
| 609 pixDestroy(&pix2); | |
| 610 pixaDestroy(&pixa1); | |
| 611 ptaDestroy(&pta1); | |
| 612 } | |
| 613 | |
| 614 if (badclass) | |
| 615 return ERROR_INT("no fg pixels in at least 1 class", __func__, 1); | |
| 616 | |
| 617 /* Get the range of widths of the scaled average templates */ | |
| 618 pixaSizeRange(recog->pixa, &recog->minwidth, NULL, &recog->maxwidth, NULL); | |
| 619 | |
| 620 /* Get dimensions useful for splitting */ | |
| 621 recog->min_splitw = L_MAX(5, recog->minwidth_u - 5); | |
| 622 recog->max_splith = recog->maxheight_u + 12; /* allow for skew */ | |
| 623 | |
| 624 if (debug) | |
| 625 recogShowAverageTemplates(recog); | |
| 626 | |
| 627 recog->ave_done = TRUE; | |
| 628 return 0; | |
| 629 } | |
| 630 | |
| 631 | |
| 632 /*! | |
| 633 * \brief pixaAccumulateSamples() | |
| 634 * | |
| 635 * \param[in] pixa of samples from the same class, 1 bpp | |
| 636 * \param[in] pta [optional] of centroids of the samples | |
| 637 * \param[out] ppixd accumulated samples, 8 bpp | |
| 638 * \param[out] px [optional] average x coordinate of centroids | |
| 639 * \param[out] py [optional] average y coordinate of centroids | |
| 640 * \return 0 on success, 1 on failure | |
| 641 * | |
| 642 * <pre> | |
| 643 * Notes: | |
| 644 * (1) This generates an aligned (by centroid) sum of the input pix. | |
| 645 * (2) We use only the first 256 samples; that's plenty. | |
| 646 * (3) If pta is not input, we generate two tables, and discard | |
| 647 * after use. If this is called many times, it is better | |
| 648 * to precompute the pta. | |
| 649 * </pre> | |
| 650 */ | |
| 651 l_int32 | |
| 652 pixaAccumulateSamples(PIXA *pixa, | |
| 653 PTA *pta, | |
| 654 PIX **ppixd, | |
| 655 l_float32 *px, | |
| 656 l_float32 *py) | |
| 657 { | |
| 658 l_int32 i, n, maxw, maxh, xdiff, ydiff; | |
| 659 l_int32 *centtab, *sumtab; | |
| 660 l_float32 xc, yc, xave, yave; | |
| 661 PIX *pix1, *pix2, *pixsum; | |
| 662 PTA *ptac; | |
| 663 | |
| 664 if (px) *px = 0; | |
| 665 if (py) *py = 0; | |
| 666 if (!ppixd) | |
| 667 return ERROR_INT("&pixd not defined", __func__, 1); | |
| 668 *ppixd = NULL; | |
| 669 if (!pixa) | |
| 670 return ERROR_INT("pixa not defined", __func__, 1); | |
| 671 | |
| 672 n = pixaGetCount(pixa); | |
| 673 if (pta && ptaGetCount(pta) != n) | |
| 674 return ERROR_INT("pta count differs from pixa count", __func__, 1); | |
| 675 n = L_MIN(n, 256); /* take the first 256 only */ | |
| 676 if (n == 0) | |
| 677 return ERROR_INT("pixa array empty", __func__, 1); | |
| 678 | |
| 679 /* Find the centroids */ | |
| 680 if (pta) { | |
| 681 ptac = ptaClone(pta); | |
| 682 } else { /* generate them here */ | |
| 683 ptac = ptaCreate(n); | |
| 684 centtab = makePixelCentroidTab8(); | |
| 685 sumtab = makePixelSumTab8(); | |
| 686 for (i = 0; i < n; i++) { | |
| 687 pix1 = pixaGetPix(pixa, i, L_CLONE); | |
| 688 pixCentroid(pix1, centtab, sumtab, &xc, &yc); | |
| 689 ptaAddPt(ptac, xc, yc); | |
| 690 } | |
| 691 LEPT_FREE(centtab); | |
| 692 LEPT_FREE(sumtab); | |
| 693 } | |
| 694 | |
| 695 /* Find the average value of the centroids */ | |
| 696 xave = yave = 0; | |
| 697 for (i = 0; i < n; i++) { | |
| 698 ptaGetPt(pta, i, &xc, &yc); | |
| 699 xave += xc; | |
| 700 yave += yc; | |
| 701 } | |
| 702 xave = xave / (l_float32)n; | |
| 703 yave = yave / (l_float32)n; | |
| 704 if (px) *px = xave; | |
| 705 if (py) *py = yave; | |
| 706 | |
| 707 /* Place all pix with their centroids located at the average | |
| 708 * centroid value, and sum the results. Make the accumulator | |
| 709 * image slightly larger than the largest sample to insure | |
| 710 * that all pixels are represented in the accumulator. */ | |
| 711 pixaSizeRange(pixa, NULL, NULL, &maxw, &maxh); | |
| 712 pixsum = pixInitAccumulate(maxw + 5, maxh + 5, 0); | |
| 713 pix1 = pixCreate(maxw, maxh, 1); | |
| 714 for (i = 0; i < n; i++) { | |
| 715 pix2 = pixaGetPix(pixa, i, L_CLONE); | |
| 716 ptaGetPt(ptac, i, &xc, &yc); | |
| 717 xdiff = (l_int32)(xave - xc); | |
| 718 ydiff = (l_int32)(yave - yc); | |
| 719 pixClearAll(pix1); | |
| 720 pixRasterop(pix1, xdiff, ydiff, maxw, maxh, PIX_SRC, | |
| 721 pix2, 0, 0); | |
| 722 pixAccumulate(pixsum, pix1, L_ARITH_ADD); | |
| 723 pixDestroy(&pix2); | |
| 724 } | |
| 725 *ppixd = pixFinalAccumulate(pixsum, 0, 8); | |
| 726 | |
| 727 pixDestroy(&pix1); | |
| 728 pixDestroy(&pixsum); | |
| 729 ptaDestroy(&ptac); | |
| 730 return 0; | |
| 731 } | |
| 732 | |
| 733 | |
| 734 /*! | |
| 735 * \brief recogTrainingFinished() | |
| 736 * | |
| 737 * \param[in] precog addr of recog | |
| 738 * \param[in] modifyflag 1 to use recogModifyTemplate(); 0 otherwise | |
| 739 * \param[in] minsize set to -1 for default | |
| 740 * \param[in] minfract set to -1.0 for default | |
| 741 * \return 0 if OK, 1 on error (input recog will be destroyed) | |
| 742 * | |
| 743 * <pre> | |
| 744 * Notes: | |
| 745 * (1) This must be called after all training samples have been added. | |
| 746 * (2) If the templates are not good enough, the recog input is destroyed. | |
| 747 * (3) Usually, %modifyflag == 1, because we want to apply | |
| 748 * recogModifyTemplate() to generate the actual templates | |
| 749 * that will be used. The one exception is when reading a | |
| 750 * serialized recog: there we want to put the same set of | |
| 751 * templates in both the unscaled and modified pixaa. | |
| 752 * See recogReadStream() to see why we do this. | |
| 753 * (4) See recogTemplatesAreOK() for %minsize and %minfract usage. | |
| 754 * (5) The following things are done here: | |
| 755 * (a) Allocate (or reallocate) storage for (possibly) modified | |
| 756 * bitmaps, centroids, and fg areas. | |
| 757 * (b) Generate the (possibly) modified bitmaps. | |
| 758 * (c) Compute centroid and fg area data for both unscaled and | |
| 759 * modified bitmaps. | |
| 760 * (d) Truncate the pixaa, ptaa and numaa arrays down from | |
| 761 * 256 to the actual size. | |
| 762 * (6) Putting these operations here makes it simple to recompute | |
| 763 * the recog with different modifications on the bitmaps. | |
| 764 * (7) Call recogShowContent() to display the templates, both | |
| 765 * unscaled and modified. | |
| 766 * </pre> | |
| 767 */ | |
| 768 l_ok | |
| 769 recogTrainingFinished(L_RECOG **precog, | |
| 770 l_int32 modifyflag, | |
| 771 l_int32 minsize, | |
| 772 l_float32 minfract) | |
| 773 { | |
| 774 l_int32 ok, i, j, size, nc, ns, area; | |
| 775 l_float32 xave, yave; | |
| 776 PIX *pix, *pixd; | |
| 777 PIXA *pixa; | |
| 778 PIXAA *paa; | |
| 779 PTA *pta; | |
| 780 PTAA *ptaa; | |
| 781 L_RECOG *recog; | |
| 782 | |
| 783 if (!precog) | |
| 784 return ERROR_INT("&recog not defined", __func__, 1); | |
| 785 if ((recog = *precog) == NULL) | |
| 786 return ERROR_INT("recog not defined", __func__, 1); | |
| 787 if (recog->train_done) return 0; | |
| 788 | |
| 789 /* Test the input templates */ | |
| 790 recogTemplatesAreOK(recog, minsize, minfract, &ok); | |
| 791 if (!ok) { | |
| 792 recogDestroy(precog); | |
| 793 return ERROR_INT("bad templates", __func__, 1); | |
| 794 } | |
| 795 | |
| 796 /* Generate the storage for the possibly-scaled training bitmaps */ | |
| 797 size = recog->maxarraysize; | |
| 798 paa = pixaaCreate(size); | |
| 799 pixa = pixaCreate(1); | |
| 800 pixaaInitFull(paa, pixa); | |
| 801 pixaDestroy(&pixa); | |
| 802 pixaaDestroy(&recog->pixaa); | |
| 803 recog->pixaa = paa; | |
| 804 | |
| 805 /* Generate the storage for the unscaled centroid training data */ | |
| 806 ptaa = ptaaCreate(size); | |
| 807 pta = ptaCreate(0); | |
| 808 ptaaInitFull(ptaa, pta); | |
| 809 ptaaDestroy(&recog->ptaa_u); | |
| 810 recog->ptaa_u = ptaa; | |
| 811 | |
| 812 /* Generate the storage for the possibly-scaled centroid data */ | |
| 813 ptaa = ptaaCreate(size); | |
| 814 ptaaInitFull(ptaa, pta); | |
| 815 ptaDestroy(&pta); | |
| 816 ptaaDestroy(&recog->ptaa); | |
| 817 recog->ptaa = ptaa; | |
| 818 | |
| 819 /* Generate the storage for the fg area data */ | |
| 820 numaaDestroy(&recog->naasum_u); | |
| 821 numaaDestroy(&recog->naasum); | |
| 822 recog->naasum_u = numaaCreateFull(size, 0); | |
| 823 recog->naasum = numaaCreateFull(size, 0); | |
| 824 | |
| 825 paa = recog->pixaa_u; | |
| 826 nc = recog->setsize; | |
| 827 for (i = 0; i < nc; i++) { | |
| 828 pixa = pixaaGetPixa(paa, i, L_CLONE); | |
| 829 ns = pixaGetCount(pixa); | |
| 830 for (j = 0; j < ns; j++) { | |
| 831 /* Save centroid and area data for the unscaled pix */ | |
| 832 pix = pixaGetPix(pixa, j, L_CLONE); | |
| 833 pixCentroid(pix, recog->centtab, recog->sumtab, &xave, &yave); | |
| 834 ptaaAddPt(recog->ptaa_u, i, xave, yave); | |
| 835 pixCountPixels(pix, &area, recog->sumtab); | |
| 836 numaaAddNumber(recog->naasum_u, i, area); /* foreground */ | |
| 837 | |
| 838 /* Insert the (optionally) scaled character image, and | |
| 839 * save centroid and area data for it */ | |
| 840 if (modifyflag == 1) | |
| 841 pixd = recogModifyTemplate(recog, pix); | |
| 842 else | |
| 843 pixd = pixClone(pix); | |
| 844 if (pixd) { | |
| 845 pixaaAddPix(recog->pixaa, i, pixd, NULL, L_INSERT); | |
| 846 pixCentroid(pixd, recog->centtab, recog->sumtab, &xave, &yave); | |
| 847 ptaaAddPt(recog->ptaa, i, xave, yave); | |
| 848 pixCountPixels(pixd, &area, recog->sumtab); | |
| 849 numaaAddNumber(recog->naasum, i, area); | |
| 850 } else { | |
| 851 L_ERROR("failed: modified template for class %d, sample %d\n", | |
| 852 __func__, i, j); | |
| 853 } | |
| 854 pixDestroy(&pix); | |
| 855 } | |
| 856 pixaDestroy(&pixa); | |
| 857 } | |
| 858 | |
| 859 /* Truncate the arrays to those with non-empty containers */ | |
| 860 pixaaTruncate(recog->pixaa_u); | |
| 861 pixaaTruncate(recog->pixaa); | |
| 862 ptaaTruncate(recog->ptaa_u); | |
| 863 ptaaTruncate(recog->ptaa); | |
| 864 numaaTruncate(recog->naasum_u); | |
| 865 numaaTruncate(recog->naasum); | |
| 866 | |
| 867 recog->train_done = TRUE; | |
| 868 return 0; | |
| 869 } | |
| 870 | |
| 871 | |
| 872 /*! | |
| 873 * \brief recogTemplatesAreOK() | |
| 874 * | |
| 875 * \param[in] recog | |
| 876 * \param[in] minsize set to -1 for default | |
| 877 * \param[in] minfract set to -1.0 for default | |
| 878 * \param[out] pok set to 1 if template set is valid; 0 otherwise | |
| 879 * \return 1 on error; 0 otherwise. An invalid template set is not an error. | |
| 880 * | |
| 881 * <pre> | |
| 882 * Notes: | |
| 883 * (1) This is called by recogTrainingFinished(). A return value of 0 | |
| 884 * will cause recogTrainingFinished() to destroy the recog. | |
| 885 * (2) %minsize is the minimum number of samples required for | |
| 886 * the class; -1 uses the default | |
| 887 * (3) %minfract is the minimum fraction of classes required for | |
| 888 * the recog to be usable; -1.0 uses the default | |
| 889 * </pre> | |
| 890 */ | |
| 891 static l_int32 | |
| 892 recogTemplatesAreOK(L_RECOG *recog, | |
| 893 l_int32 minsize, | |
| 894 l_float32 minfract, | |
| 895 l_int32 *pok) | |
| 896 { | |
| 897 l_int32 i, n, validsets, nt; | |
| 898 l_float32 ratio; | |
| 899 NUMA *na; | |
| 900 | |
| 901 if (!pok) | |
| 902 return ERROR_INT("&ok not defined", __func__, 1); | |
| 903 *pok = 0; | |
| 904 if (!recog) | |
| 905 return ERROR_INT("recog not defined", __func__, 1); | |
| 906 | |
| 907 minsize = (minsize < 0) ? DefaultMinSetSize : minsize; | |
| 908 minfract = (minfract < 0) ? DefaultMinSetFract : minfract; | |
| 909 n = pixaaGetCount(recog->pixaa_u, &na); | |
| 910 validsets = 0; | |
| 911 for (i = 0, validsets = 0; i < n; i++) { | |
| 912 numaGetIValue(na, i, &nt); | |
| 913 if (nt >= minsize) | |
| 914 validsets++; | |
| 915 } | |
| 916 numaDestroy(&na); | |
| 917 ratio = (l_float32)validsets / (l_float32)recog->charset_size; | |
| 918 *pok = (ratio >= minfract) ? 1 : 0; | |
| 919 return 0; | |
| 920 } | |
| 921 | |
| 922 | |
| 923 /*! | |
| 924 * \brief recogFilterPixaBySize() | |
| 925 * | |
| 926 * \param[in] pixas labeled templates | |
| 927 * \param[in] setsize size of character set (number of classes) | |
| 928 * \param[in] maxkeep max number of templates to keep in a class | |
| 929 * \param[in] max_ht_ratio max allowed height ratio (see below) | |
| 930 * \param[out] pna [optional] debug output, giving the number | |
| 931 * in each class after filtering; use NULL to skip | |
| 932 * \return pixa filtered templates, or NULL on error | |
| 933 * | |
| 934 * <pre> | |
| 935 * Notes: | |
| 936 * (1) The basic assumption is that the most common and larger | |
| 937 * templates in each class are more likely to represent the | |
| 938 * characters we are interested in. For example, larger digits | |
| 939 * are more likely to represent page numbers, and smaller digits | |
| 940 * could be data in tables. Therefore, we bias the first | |
| 941 * stage of filtering toward the larger characters by removing | |
| 942 * very small ones, and select based on proximity of the | |
| 943 * remaining characters to median height. | |
| 944 * (2) For each of the %setsize classes, order the templates | |
| 945 * increasingly by height. Take the rank 0.9 height. Eliminate | |
| 946 * all templates that are shorter by more than %max_ht_ratio. | |
| 947 * Of the remaining ones, select up to %maxkeep that are closest | |
| 948 * in rank order height to the median template. | |
| 949 * </pre> | |
| 950 */ | |
| 951 PIXA * | |
| 952 recogFilterPixaBySize(PIXA *pixas, | |
| 953 l_int32 setsize, | |
| 954 l_int32 maxkeep, | |
| 955 l_float32 max_ht_ratio, | |
| 956 NUMA **pna) | |
| 957 { | |
| 958 l_int32 i, j, h90, hj, j1, j2, j90, n, nc; | |
| 959 l_float32 ratio; | |
| 960 NUMA *na; | |
| 961 PIXA *pixa1, *pixa2, *pixa3, *pixa4, *pixa5; | |
| 962 PIXAA *paa; | |
| 963 | |
| 964 if (pna) *pna = NULL; | |
| 965 if (!pixas) | |
| 966 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL); | |
| 967 | |
| 968 if ((paa = recogSortPixaByClass(pixas, setsize)) == NULL) | |
| 969 return (PIXA *)ERROR_PTR("paa not made", __func__, NULL); | |
| 970 nc = pixaaGetCount(paa, NULL); | |
| 971 na = (pna) ? numaCreate(0) : NULL; | |
| 972 if (pna) *pna = na; | |
| 973 pixa5 = pixaCreate(0); | |
| 974 for (i = 0; i < nc; i++) { | |
| 975 pixa1 = pixaaGetPixa(paa, i, L_CLONE); | |
| 976 if ((n = pixaGetCount(pixa1)) == 0) { | |
| 977 pixaDestroy(&pixa1); | |
| 978 continue; | |
| 979 } | |
| 980 pixa2 = pixaSort(pixa1, L_SORT_BY_HEIGHT, L_SORT_INCREASING, NULL, | |
| 981 L_COPY); | |
| 982 j90 = (l_int32)(0.9 * n); | |
| 983 pixaGetPixDimensions(pixa2, j90, NULL, &h90, NULL); | |
| 984 pixa3 = pixaCreate(n); | |
| 985 for (j = 0; j < n; j++) { | |
| 986 pixaGetPixDimensions(pixa2, j, NULL, &hj, NULL); | |
| 987 ratio = (l_float32)h90 / (l_float32)hj; | |
| 988 if (ratio <= max_ht_ratio) | |
| 989 pixaAddPix(pixa3, pixaGetPix(pixa2, j, L_COPY), L_INSERT); | |
| 990 } | |
| 991 n = pixaGetCount(pixa3); | |
| 992 if (n <= maxkeep) { | |
| 993 pixa4 = pixaCopy(pixa3, L_CLONE); | |
| 994 } else { | |
| 995 j1 = (n - maxkeep) / 2; | |
| 996 j2 = j1 + maxkeep - 1; | |
| 997 pixa4 = pixaSelectRange(pixa3, j1, j2, L_CLONE); | |
| 998 } | |
| 999 if (na) numaAddNumber(na, pixaGetCount(pixa4)); | |
| 1000 pixaJoin(pixa5, pixa4, 0, -1); | |
| 1001 pixaDestroy(&pixa1); | |
| 1002 pixaDestroy(&pixa2); | |
| 1003 pixaDestroy(&pixa3); | |
| 1004 pixaDestroy(&pixa4); | |
| 1005 } | |
| 1006 | |
| 1007 pixaaDestroy(&paa); | |
| 1008 return pixa5; | |
| 1009 } | |
| 1010 | |
| 1011 | |
| 1012 /*! | |
| 1013 * \brief recogSortPixaByClass() | |
| 1014 * | |
| 1015 * \param[in] pixa labeled templates | |
| 1016 * \param[in] setsize size of character set (number of classes) | |
| 1017 * \return paa pixaa where each pixa has templates for one class, | |
| 1018 * or null on error | |
| 1019 */ | |
| 1020 PIXAA * | |
| 1021 recogSortPixaByClass(PIXA *pixa, | |
| 1022 l_int32 setsize) | |
| 1023 { | |
| 1024 PIXAA *paa; | |
| 1025 L_RECOG *recog; | |
| 1026 | |
| 1027 if (!pixa) | |
| 1028 return (PIXAA *)ERROR_PTR("pixa not defined", __func__, NULL); | |
| 1029 | |
| 1030 if ((recog = recogCreateFromPixaNoFinish(pixa, 0, 0, 0, 0, 0)) == NULL) | |
| 1031 return (PIXAA *)ERROR_PTR("recog not made", __func__, NULL); | |
| 1032 paa = recog->pixaa_u; /* grab the paa of unscaled templates */ | |
| 1033 recog->pixaa_u = NULL; | |
| 1034 recogDestroy(&recog); | |
| 1035 return paa; | |
| 1036 } | |
| 1037 | |
| 1038 | |
| 1039 /*! | |
| 1040 * \brief recogRemoveOutliers1() | |
| 1041 * | |
| 1042 * \param[in] precog addr of recog with unscaled labeled templates | |
| 1043 * \param[in] minscore keep everything with at least this score | |
| 1044 * \param[in] mintarget minimum desired number to retain if possible | |
| 1045 * \param[in] minsize minimum number of samples required for a class | |
| 1046 * \param[out] ppixsave [optional debug] saved templates, with scores | |
| 1047 * \param[out] ppixrem [optional debug] removed templates, with scores | |
| 1048 * \return 0 if OK, 1 on error. | |
| 1049 * | |
| 1050 * <pre> | |
| 1051 * Notes: | |
| 1052 * (1) This is a convenience wrapper when using default parameters | |
| 1053 * for the recog. See pixaRemoveOutliers1() for details. | |
| 1054 * (2) If this succeeds, the new recog replaces the input recog; | |
| 1055 * if it fails, the input recog is destroyed. | |
| 1056 * </pre> | |
| 1057 */ | |
| 1058 l_ok | |
| 1059 recogRemoveOutliers1(L_RECOG **precog, | |
| 1060 l_float32 minscore, | |
| 1061 l_int32 mintarget, | |
| 1062 l_int32 minsize, | |
| 1063 PIX **ppixsave, | |
| 1064 PIX **ppixrem) | |
| 1065 { | |
| 1066 PIXA *pixa1, *pixa2; | |
| 1067 L_RECOG *recog; | |
| 1068 | |
| 1069 if (!precog) | |
| 1070 return ERROR_INT("&recog not defined", __func__, 1); | |
| 1071 if (*precog == NULL) | |
| 1072 return ERROR_INT("recog not defined", __func__, 1); | |
| 1073 | |
| 1074 /* Extract the unscaled templates */ | |
| 1075 pixa1 = recogExtractPixa(*precog); | |
| 1076 recogDestroy(precog); | |
| 1077 | |
| 1078 pixa2 = pixaRemoveOutliers1(pixa1, minscore, mintarget, minsize, | |
| 1079 ppixsave, ppixrem); | |
| 1080 pixaDestroy(&pixa1); | |
| 1081 if (!pixa2) | |
| 1082 return ERROR_INT("failure to remove outliers", __func__, 1); | |
| 1083 | |
| 1084 recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1); | |
| 1085 pixaDestroy(&pixa2); | |
| 1086 if (!recog) | |
| 1087 return ERROR_INT("failure to make recog from pixa sans outliers", | |
| 1088 __func__, 1); | |
| 1089 | |
| 1090 *precog = recog; | |
| 1091 return 0; | |
| 1092 } | |
| 1093 | |
| 1094 | |
| 1095 /*! | |
| 1096 * \brief pixaRemoveOutliers1() | |
| 1097 * | |
| 1098 * \param[in] pixas unscaled labeled templates | |
| 1099 * \param[in] minscore keep everything with at least this score; | |
| 1100 * use -1.0 for default. | |
| 1101 * \param[in] mintarget minimum desired number to retain if possible; | |
| 1102 * use -1 for default. | |
| 1103 * \param[in] minsize minimum number of samples required for a class; | |
| 1104 * use -1 for default. | |
| 1105 * \param[out] ppixsave [optional debug] saved templates, with scores | |
| 1106 * \param[out] ppixrem [optional debug] removed templates, with scores | |
| 1107 * \return pixa of unscaled templates to be kept, or NULL on error | |
| 1108 * | |
| 1109 * <pre> | |
| 1110 * Notes: | |
| 1111 * (1) Removing outliers is particularly important when recognition | |
| 1112 * goes against all the samples in the training set, as opposed | |
| 1113 * to the averages for each class. The reason is that we get | |
| 1114 * an identification error if a mislabeled template is a best | |
| 1115 * match for an input sample. | |
| 1116 * (2) Because the score values depend strongly on the quality | |
| 1117 * of the character images, to avoid losing too many samples | |
| 1118 * we supplement a minimum score for retention with a score | |
| 1119 * necessary to acquire the minimum target number of templates. | |
| 1120 * To do this we are willing to use a lower threshold, | |
| 1121 * LowerScoreThreshold, on the score. Consequently, with | |
| 1122 * poor quality templates, we may keep samples with a score | |
| 1123 * less than %minscore, but never less than LowerScoreThreshold. | |
| 1124 * And if the number of samples is less than %minsize, we do | |
| 1125 * not use any. | |
| 1126 * (3) This is meant to be used on a BAR, where the templates all | |
| 1127 * come from the same book; use minscore ~0.75. | |
| 1128 * (4) Method: make a scaled recog from the input %pixas. Then, | |
| 1129 * for each class: generate the averages, match each | |
| 1130 * scaled template against the average, and save unscaled | |
| 1131 * templates that had a sufficiently good match. | |
| 1132 * </pre> | |
| 1133 */ | |
| 1134 PIXA * | |
| 1135 pixaRemoveOutliers1(PIXA *pixas, | |
| 1136 l_float32 minscore, | |
| 1137 l_int32 mintarget, | |
| 1138 l_int32 minsize, | |
| 1139 PIX **ppixsave, | |
| 1140 PIX **ppixrem) | |
| 1141 { | |
| 1142 l_int32 i, j, debug, n, area1, area2; | |
| 1143 l_float32 x1, y1, x2, y2, minfract, score, rankscore, threshscore; | |
| 1144 NUMA *nasum, *narem, *nasave, *nascore; | |
| 1145 PIX *pix1, *pix2; | |
| 1146 PIXA *pixa, *pixarem, *pixad; | |
| 1147 PTA *pta; | |
| 1148 L_RECOG *recog; | |
| 1149 | |
| 1150 if (ppixsave) *ppixsave = NULL; | |
| 1151 if (ppixrem) *ppixrem = NULL; | |
| 1152 if (!pixas) | |
| 1153 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL); | |
| 1154 minscore = L_MIN(minscore, 1.0); | |
| 1155 if (minscore <= 0.0) | |
| 1156 minscore = DefaultMinScore; | |
| 1157 mintarget = L_MIN(mintarget, 3); | |
| 1158 if (mintarget <= 0) | |
| 1159 mintarget = DefaultMinTarget; | |
| 1160 if (minsize < 0) | |
| 1161 minsize = DefaultMinSetSize; | |
| 1162 | |
| 1163 /* Make a special height-scaled recognizer with average templates */ | |
| 1164 debug = (ppixsave || ppixrem) ? 1 : 0; | |
| 1165 recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1); | |
| 1166 if (!recog) | |
| 1167 return (PIXA *)ERROR_PTR("bad pixas; recog not made", __func__, NULL); | |
| 1168 if (recogAverageSamples(recog, debug) != 0) { | |
| 1169 recogDestroy(&recog); | |
| 1170 return (PIXA *)ERROR_PTR("bad templates", __func__, NULL); | |
| 1171 } | |
| 1172 | |
| 1173 nasave = (ppixsave) ? numaCreate(0) : NULL; | |
| 1174 pixarem = (ppixrem) ? pixaCreate(0) : NULL; | |
| 1175 narem = (ppixrem) ? numaCreate(0) : NULL; | |
| 1176 | |
| 1177 pixad = pixaCreate(0); | |
| 1178 for (i = 0; i < recog->setsize; i++) { | |
| 1179 /* Access the average template and values for scaled | |
| 1180 * images in this class */ | |
| 1181 pix1 = pixaGetPix(recog->pixa, i, L_CLONE); | |
| 1182 ptaGetPt(recog->pta, i, &x1, &y1); | |
| 1183 numaGetIValue(recog->nasum, i, &area1); | |
| 1184 | |
| 1185 /* Get the scores for each sample in the class */ | |
| 1186 pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE); | |
| 1187 pta = ptaaGetPta(recog->ptaa, i, L_CLONE); /* centroids */ | |
| 1188 nasum = numaaGetNuma(recog->naasum, i, L_CLONE); /* fg areas */ | |
| 1189 n = pixaGetCount(pixa); | |
| 1190 nascore = numaCreate(n); | |
| 1191 for (j = 0; j < n; j++) { | |
| 1192 pix2 = pixaGetPix(pixa, j, L_CLONE); | |
| 1193 ptaGetPt(pta, j, &x2, &y2); /* centroid average */ | |
| 1194 numaGetIValue(nasum, j, &area2); /* fg sum average */ | |
| 1195 pixCorrelationScoreSimple(pix1, pix2, area1, area2, | |
| 1196 x1 - x2, y1 - y2, 5, 5, | |
| 1197 recog->sumtab, &score); | |
| 1198 numaAddNumber(nascore, score); | |
| 1199 if (debug && score == 0.0) /* typ. large size difference */ | |
| 1200 lept_stderr("Got 0 score for i = %d, j = %d\n", i, j); | |
| 1201 pixDestroy(&pix2); | |
| 1202 } | |
| 1203 pixDestroy(&pix1); | |
| 1204 | |
| 1205 /* Find the rankscore, corresponding to the 1.0 - minfract. | |
| 1206 * To attempt to maintain the minfract of templates, use as a | |
| 1207 * cutoff the minimum of minscore and the rank score. However, | |
| 1208 * no template is saved with an actual score less than | |
| 1209 * that at least one template is kept. */ | |
| 1210 minfract = (l_float32)mintarget / (l_float32)n; | |
| 1211 numaGetRankValue(nascore, 1.0 - minfract, NULL, 0, &rankscore); | |
| 1212 threshscore = L_MAX(LowerScoreThreshold, | |
| 1213 L_MIN(minscore, rankscore)); | |
| 1214 if (debug) { | |
| 1215 L_INFO("minscore = %4.2f, rankscore = %4.2f, threshscore = %4.2f\n", | |
| 1216 __func__, minscore, rankscore, threshscore); | |
| 1217 } | |
| 1218 | |
| 1219 /* Save templates that are at or above threshold. | |
| 1220 * Toss any classes with less than %minsize templates. */ | |
| 1221 for (j = 0; j < n; j++) { | |
| 1222 numaGetFValue(nascore, j, &score); | |
| 1223 pix1 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY); | |
| 1224 if (score >= threshscore && n >= minsize) { | |
| 1225 pixaAddPix(pixad, pix1, L_INSERT); | |
| 1226 if (nasave) numaAddNumber(nasave, score); | |
| 1227 } else if (debug) { | |
| 1228 pixaAddPix(pixarem, pix1, L_INSERT); | |
| 1229 numaAddNumber(narem, score); | |
| 1230 } else { | |
| 1231 pixDestroy(&pix1); | |
| 1232 } | |
| 1233 } | |
| 1234 | |
| 1235 pixaDestroy(&pixa); | |
| 1236 ptaDestroy(&pta); | |
| 1237 numaDestroy(&nasum); | |
| 1238 numaDestroy(&nascore); | |
| 1239 } | |
| 1240 | |
| 1241 if (ppixsave) { | |
| 1242 *ppixsave = pixDisplayOutliers(pixad, nasave); | |
| 1243 numaDestroy(&nasave); | |
| 1244 } | |
| 1245 if (ppixrem) { | |
| 1246 *ppixrem = pixDisplayOutliers(pixarem, narem); | |
| 1247 pixaDestroy(&pixarem); | |
| 1248 numaDestroy(&narem); | |
| 1249 } | |
| 1250 recogDestroy(&recog); | |
| 1251 return pixad; | |
| 1252 } | |
| 1253 | |
| 1254 | |
| 1255 /*! | |
| 1256 * \brief recogRemoveOutliers2() | |
| 1257 * | |
| 1258 * \param[in] precog addr of recog with unscaled labeled templates | |
| 1259 * \param[in] minscore keep everything with at least this score | |
| 1260 * \param[in] minsize minimum number of samples required for a class | |
| 1261 * \param[out] ppixsave [optional debug] saved templates, with scores | |
| 1262 * \param[out] ppixrem [optional debug] removed templates, with scores | |
| 1263 * \return 0 if OK, 1 on error. | |
| 1264 * | |
| 1265 * <pre> | |
| 1266 * Notes: | |
| 1267 * (1) This is a convenience wrapper when using default parameters | |
| 1268 * for the recog. See pixaRemoveOutliers2() for details. | |
| 1269 * (2) If this succeeds, the new recog replaces the input recog; | |
| 1270 * if it fails, the input recog is destroyed. | |
| 1271 * </pre> | |
| 1272 */ | |
| 1273 l_ok | |
| 1274 recogRemoveOutliers2(L_RECOG **precog, | |
| 1275 l_float32 minscore, | |
| 1276 l_int32 minsize, | |
| 1277 PIX **ppixsave, | |
| 1278 PIX **ppixrem) | |
| 1279 { | |
| 1280 PIXA *pixa1, *pixa2; | |
| 1281 L_RECOG *recog; | |
| 1282 | |
| 1283 if (!precog) | |
| 1284 return ERROR_INT("&recog not defined", __func__, 1); | |
| 1285 if (*precog == NULL) | |
| 1286 return ERROR_INT("recog not defined", __func__, 1); | |
| 1287 | |
| 1288 /* Extract the unscaled templates */ | |
| 1289 pixa1 = recogExtractPixa(*precog); | |
| 1290 recogDestroy(precog); | |
| 1291 | |
| 1292 pixa2 = pixaRemoveOutliers2(pixa1, minscore, minsize, ppixsave, ppixrem); | |
| 1293 pixaDestroy(&pixa1); | |
| 1294 if (!pixa2) | |
| 1295 return ERROR_INT("failure to remove outliers", __func__, 1); | |
| 1296 | |
| 1297 recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1); | |
| 1298 pixaDestroy(&pixa2); | |
| 1299 if (!recog) | |
| 1300 return ERROR_INT("failure to make recog from pixa sans outliers", | |
| 1301 __func__, 1); | |
| 1302 | |
| 1303 *precog = recog; | |
| 1304 return 0; | |
| 1305 } | |
| 1306 | |
| 1307 | |
| 1308 /*! | |
| 1309 * \brief pixaRemoveOutliers2() | |
| 1310 * | |
| 1311 * \param[in] pixas unscaled labeled templates | |
| 1312 * \param[in] minscore keep everything with at least this score; | |
| 1313 * use -1.0 for default. | |
| 1314 * \param[in] minsize minimum number of samples required for a class; | |
| 1315 * use -1 for default. | |
| 1316 * \param[out] ppixsave [optional debug] saved templates, with scores | |
| 1317 * \param[out] ppixrem [optional debug] removed templates, with scores | |
| 1318 * \return pixa of unscaled templates to be kept, or NULL on error | |
| 1319 * | |
| 1320 * <pre> | |
| 1321 * Notes: | |
| 1322 * (1) Removing outliers is particularly important when recognition | |
| 1323 * goes against all the samples in the training set, as opposed | |
| 1324 * to the averages for each class. The reason is that we get | |
| 1325 * an identification error if a mislabeled template is a best | |
| 1326 * match for an input sample. | |
| 1327 * (2) This method compares each template against the average templates | |
| 1328 * of each class, and discards any template that has a higher | |
| 1329 * correlation to a class different from its own. It also | |
| 1330 * sets a lower bound on correlation scores with its class average. | |
| 1331 * (3) This is meant to be used on a BAR, where the templates all | |
| 1332 * come from the same book; use minscore ~0.75. | |
| 1333 * </pre> | |
| 1334 */ | |
| 1335 PIXA * | |
| 1336 pixaRemoveOutliers2(PIXA *pixas, | |
| 1337 l_float32 minscore, | |
| 1338 l_int32 minsize, | |
| 1339 PIX **ppixsave, | |
| 1340 PIX **ppixrem) | |
| 1341 { | |
| 1342 l_int32 i, j, k, n, area1, area2, maxk, debug; | |
| 1343 l_float32 x1, y1, x2, y2, score, maxscore; | |
| 1344 NUMA *nan, *nascore, *nasave; | |
| 1345 PIX *pix1, *pix2, *pix3; | |
| 1346 PIXA *pixarem, *pixad; | |
| 1347 L_RECOG *recog; | |
| 1348 | |
| 1349 if (ppixsave) *ppixsave = NULL; | |
| 1350 if (ppixrem) *ppixrem = NULL; | |
| 1351 if (!pixas) | |
| 1352 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL); | |
| 1353 minscore = L_MIN(minscore, 1.0); | |
| 1354 if (minscore <= 0.0) | |
| 1355 minscore = DefaultMinScore; | |
| 1356 if (minsize < 0) | |
| 1357 minsize = DefaultMinSetSize; | |
| 1358 | |
| 1359 /* Make a special height-scaled recognizer with average templates */ | |
| 1360 debug = (ppixsave || ppixrem) ? 1 : 0; | |
| 1361 recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1); | |
| 1362 if (!recog) | |
| 1363 return (PIXA *)ERROR_PTR("bad pixas; recog not made", __func__, NULL); | |
| 1364 if (recogAverageSamples(recog, debug) != 0) { | |
| 1365 recogDestroy(&recog); | |
| 1366 return (PIXA *)ERROR_PTR("bad templates", __func__, NULL); | |
| 1367 } | |
| 1368 | |
| 1369 nasave = (ppixsave) ? numaCreate(0) : NULL; | |
| 1370 pixarem = (ppixrem) ? pixaCreate(0) : NULL; | |
| 1371 | |
| 1372 pixad = pixaCreate(0); | |
| 1373 pixaaGetCount(recog->pixaa, &nan); /* number of templates in each class */ | |
| 1374 for (i = 0; i < recog->setsize; i++) { | |
| 1375 /* Get the scores for each sample in the class, when comparing | |
| 1376 * with averages from all the classes. */ | |
| 1377 numaGetIValue(nan, i, &n); | |
| 1378 for (j = 0; j < n; j++) { | |
| 1379 pix1 = pixaaGetPix(recog->pixaa, i, j, L_CLONE); | |
| 1380 ptaaGetPt(recog->ptaa, i, j, &x1, &y1); /* centroid */ | |
| 1381 numaaGetValue(recog->naasum, i, j, NULL, &area1); /* fg sum */ | |
| 1382 nascore = numaCreate(n); | |
| 1383 for (k = 0; k < recog->setsize; k++) { /* average templates */ | |
| 1384 pix2 = pixaGetPix(recog->pixa, k, L_CLONE); | |
| 1385 ptaGetPt(recog->pta, k, &x2, &y2); /* average centroid */ | |
| 1386 numaGetIValue(recog->nasum, k, &area2); /* average fg sum */ | |
| 1387 pixCorrelationScoreSimple(pix1, pix2, area1, area2, | |
| 1388 x1 - x2, y1 - y2, 5, 5, | |
| 1389 recog->sumtab, &score); | |
| 1390 numaAddNumber(nascore, score); | |
| 1391 pixDestroy(&pix2); | |
| 1392 } | |
| 1393 | |
| 1394 /* Save templates that are in the correct class and | |
| 1395 * at or above threshold. Toss any classes with less | |
| 1396 * than %minsize templates. */ | |
| 1397 numaGetMax(nascore, &maxscore, &maxk); | |
| 1398 if (maxk == i && maxscore >= minscore && n >= minsize) { | |
| 1399 /* save it */ | |
| 1400 pix3 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY); | |
| 1401 pixaAddPix(pixad, pix3, L_INSERT); | |
| 1402 if (nasave) numaAddNumber(nasave, maxscore); | |
| 1403 } else if (ppixrem) { /* outlier */ | |
| 1404 pix3 = recogDisplayOutlier(recog, i, j, maxk, maxscore); | |
| 1405 pixaAddPix(pixarem, pix3, L_INSERT); | |
| 1406 } | |
| 1407 numaDestroy(&nascore); | |
| 1408 pixDestroy(&pix1); | |
| 1409 } | |
| 1410 } | |
| 1411 | |
| 1412 if (ppixsave) { | |
| 1413 *ppixsave = pixDisplayOutliers(pixad, nasave); | |
| 1414 numaDestroy(&nasave); | |
| 1415 } | |
| 1416 if (ppixrem) { | |
| 1417 *ppixrem = pixaDisplayTiledInRows(pixarem, 32, 1500, 1.0, 0, 20, 2); | |
| 1418 pixaDestroy(&pixarem); | |
| 1419 } | |
| 1420 | |
| 1421 numaDestroy(&nan); | |
| 1422 recogDestroy(&recog); | |
| 1423 return pixad; | |
| 1424 } | |
| 1425 | |
| 1426 | |
| 1427 /*------------------------------------------------------------------------* | |
| 1428 * Training on unlabeled data * | |
| 1429 *------------------------------------------------------------------------*/ | |
| 1430 /*! | |
| 1431 * \brief recogTrainFromBoot() | |
| 1432 * | |
| 1433 * \param[in] recogboot labeled boot recognizer | |
| 1434 * \param[in] pixas set of unlabeled input characters | |
| 1435 * \param[in] minscore min score for accepting the example; e.g., 0.75 | |
| 1436 * \param[in] threshold for binarization, if needed | |
| 1437 * \param[in] debug 1 for debug output saved to recogboot; 0 otherwise | |
| 1438 * \return pixad labeled version of input pixas, trained on a BSR, | |
| 1439 * or NULL on error | |
| 1440 * | |
| 1441 * <pre> | |
| 1442 * Notes: | |
| 1443 * (1) This takes %pixas of unscaled single characters and %recboot, | |
| 1444 * a bootstrep recognizer (BSR) that has been set up with parameters | |
| 1445 * * scaleh: scale all templates to this height | |
| 1446 * * linew: width of normalized strokes, or 0 if using | |
| 1447 * the input image | |
| 1448 * It modifies the pix in %pixas accordingly and correlates | |
| 1449 * with the templates in the BSR. It returns those input | |
| 1450 * images in %pixas whose best correlation with the BSR is at | |
| 1451 * or above %minscore. The returned pix have added text labels | |
| 1452 * for the text string of the class to which the best | |
| 1453 * correlated template belongs. | |
| 1454 * (2) Identification occurs in scaled mode (typically with h = 40), | |
| 1455 * optionally using a width-normalized line images derived | |
| 1456 * from those in %pixas. | |
| 1457 * </pre> | |
| 1458 */ | |
| 1459 PIXA * | |
| 1460 recogTrainFromBoot(L_RECOG *recogboot, | |
| 1461 PIXA *pixas, | |
| 1462 l_float32 minscore, | |
| 1463 l_int32 threshold, | |
| 1464 l_int32 debug) | |
| 1465 { | |
| 1466 char *text; | |
| 1467 l_int32 i, n, same, maxd, scaleh, linew; | |
| 1468 l_float32 score; | |
| 1469 PIX *pix1, *pix2, *pixdb = NULL; | |
| 1470 PIXA *pixa1, *pixa2, *pixa3, *pixad; | |
| 1471 | |
| 1472 if (!recogboot) | |
| 1473 return (PIXA *)ERROR_PTR("recogboot not defined", __func__, NULL); | |
| 1474 if (!pixas) | |
| 1475 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL); | |
| 1476 | |
| 1477 /* Make sure all input pix are 1 bpp */ | |
| 1478 if ((n = pixaGetCount(pixas)) == 0) | |
| 1479 return (PIXA *)ERROR_PTR("no pix in pixa", __func__, NULL); | |
| 1480 pixaVerifyDepth(pixas, &same, &maxd); | |
| 1481 if (maxd == 1) { | |
| 1482 pixa1 = pixaCopy(pixas, L_COPY); | |
| 1483 } else { | |
| 1484 pixa1 = pixaCreate(n); | |
| 1485 for (i = 0; i < n; i++) { | |
| 1486 pix1 = pixaGetPix(pixas, i, L_CLONE); | |
| 1487 pix2 = pixConvertTo1(pix1, threshold); | |
| 1488 pixaAddPix(pixa1, pix2, L_INSERT); | |
| 1489 pixDestroy(&pix1); | |
| 1490 } | |
| 1491 } | |
| 1492 | |
| 1493 /* Scale the input images to match the BSR */ | |
| 1494 scaleh = recogboot->scaleh; | |
| 1495 linew = recogboot->linew; | |
| 1496 pixa2 = pixaCreate(n); | |
| 1497 for (i = 0; i < n; i++) { | |
| 1498 pix1 = pixaGetPix(pixa1, i, L_CLONE); | |
| 1499 pix2 = pixScaleToSize(pix1, 0, scaleh); | |
| 1500 pixaAddPix(pixa2, pix2, L_INSERT); | |
| 1501 pixDestroy(&pix1); | |
| 1502 } | |
| 1503 pixaDestroy(&pixa1); | |
| 1504 | |
| 1505 /* Optionally convert to width-normalized line */ | |
| 1506 if (linew > 0) | |
| 1507 pixa3 = pixaSetStrokeWidth(pixa2, linew, 4, 8); | |
| 1508 else | |
| 1509 pixa3 = pixaCopy(pixa2, L_CLONE); | |
| 1510 pixaDestroy(&pixa2); | |
| 1511 | |
| 1512 /* Identify using recogboot */ | |
| 1513 n = pixaGetCount(pixa3); | |
| 1514 pixad = pixaCreate(n); | |
| 1515 for (i = 0; i < n; i++) { | |
| 1516 pix1 = pixaGetPix(pixa3, i, L_COPY); | |
| 1517 pixSetText(pix1, NULL); /* remove any existing text or labelling */ | |
| 1518 if (!debug) { | |
| 1519 recogIdentifyPix(recogboot, pix1, NULL); | |
| 1520 } else { | |
| 1521 recogIdentifyPix(recogboot, pix1, &pixdb); | |
| 1522 pixaAddPix(recogboot->pixadb_boot, pixdb, L_INSERT); | |
| 1523 } | |
| 1524 rchExtract(recogboot->rch, NULL, &score, &text, NULL, NULL, NULL, NULL); | |
| 1525 if (score >= minscore) { | |
| 1526 pix2 = pixaGetPix(pixas, i, L_COPY); | |
| 1527 pixSetText(pix2, text); | |
| 1528 pixaAddPix(pixad, pix2, L_INSERT); | |
| 1529 pixaAddPix(recogboot->pixadb_boot, pixdb, L_COPY); | |
| 1530 } | |
| 1531 LEPT_FREE(text); | |
| 1532 pixDestroy(&pix1); | |
| 1533 } | |
| 1534 pixaDestroy(&pixa3); | |
| 1535 | |
| 1536 return pixad; | |
| 1537 } | |
| 1538 | |
| 1539 | |
| 1540 /*------------------------------------------------------------------------* | |
| 1541 * Padding the digit training set * | |
| 1542 *------------------------------------------------------------------------*/ | |
| 1543 /*! | |
| 1544 * \brief recogPadDigitTrainingSet() | |
| 1545 * | |
| 1546 * \param[in,out] precog trained; if padding is needed, it is replaced | |
| 1547 * by a a new padded recog | |
| 1548 * \param[in] scaleh must be > 0; suggest ~40. | |
| 1549 * \param[in] linew use 0 for original scanned images | |
| 1550 * \return 0 if OK, 1 on error | |
| 1551 * | |
| 1552 * <pre> | |
| 1553 * Notes: | |
| 1554 * (1) This is a no-op if padding is not needed. However, | |
| 1555 * if it is, this replaces the input recog with a new recog, | |
| 1556 * padded appropriately with templates from a boot recognizer, | |
| 1557 * and set up with correlation templates derived from | |
| 1558 * %scaleh and %linew. | |
| 1559 * </pre> | |
| 1560 */ | |
| 1561 l_ok | |
| 1562 recogPadDigitTrainingSet(L_RECOG **precog, | |
| 1563 l_int32 scaleh, | |
| 1564 l_int32 linew) | |
| 1565 { | |
| 1566 PIXA *pixa; | |
| 1567 L_RECOG *recog1, *recog2; | |
| 1568 SARRAY *sa; | |
| 1569 | |
| 1570 if (!precog) | |
| 1571 return ERROR_INT("&recog not defined", __func__, 1); | |
| 1572 recog1 = *precog; | |
| 1573 | |
| 1574 recogIsPaddingNeeded(recog1, &sa); | |
| 1575 if (!sa) return 0; | |
| 1576 | |
| 1577 /* Get a new pixa with the padding templates added */ | |
| 1578 pixa = recogAddDigitPadTemplates(recog1, sa); | |
| 1579 sarrayDestroy(&sa); | |
| 1580 if (!pixa) | |
| 1581 return ERROR_INT("pixa not made", __func__, 1); | |
| 1582 | |
| 1583 /* Need to use templates that are scaled to a fixed height. */ | |
| 1584 if (scaleh <= 0) { | |
| 1585 L_WARNING("templates must be scaled to fixed height; using %d\n", | |
| 1586 __func__, 40); | |
| 1587 scaleh = 40; | |
| 1588 } | |
| 1589 | |
| 1590 /* Create a hybrid recog, composed of templates from both | |
| 1591 * the original and bootstrap sources. */ | |
| 1592 recog2 = recogCreateFromPixa(pixa, 0, scaleh, linew, recog1->threshold, | |
| 1593 recog1->maxyshift); | |
| 1594 pixaDestroy(&pixa); | |
| 1595 recogDestroy(precog); | |
| 1596 *precog = recog2; | |
| 1597 return 0; | |
| 1598 } | |
| 1599 | |
| 1600 | |
| 1601 /*! | |
| 1602 * \brief recogIsPaddingNeeded() | |
| 1603 * | |
| 1604 * \param[in] recog trained | |
| 1605 * \param[out] psa addr of returned string containing text value | |
| 1606 * \return 1 on error; 0 if OK, whether or not additional padding | |
| 1607 * templates are required. | |
| 1608 * | |
| 1609 * <pre> | |
| 1610 * Notes: | |
| 1611 * (1) This returns a string array in &sa containing character values | |
| 1612 * for which extra templates are needed; this sarray is | |
| 1613 * used by recogGetPadTemplates(). It returns NULL | |
| 1614 * if no padding templates are needed. | |
| 1615 * </pre> | |
| 1616 */ | |
| 1617 l_int32 | |
| 1618 recogIsPaddingNeeded(L_RECOG *recog, | |
| 1619 SARRAY **psa) | |
| 1620 { | |
| 1621 char *str; | |
| 1622 l_int32 i, nt, min_nopad, nclass, allclasses; | |
| 1623 l_float32 minval; | |
| 1624 NUMA *naclass; | |
| 1625 SARRAY *sa; | |
| 1626 | |
| 1627 if (!psa) | |
| 1628 return ERROR_INT("&sa not defined", __func__, 1); | |
| 1629 *psa = NULL; | |
| 1630 if (!recog) | |
| 1631 return ERROR_INT("recog not defined", __func__, 1); | |
| 1632 | |
| 1633 /* Do we have samples from all classes? */ | |
| 1634 nclass = pixaaGetCount(recog->pixaa_u, &naclass); /* unscaled bitmaps */ | |
| 1635 allclasses = (nclass == recog->charset_size) ? 1 : 0; | |
| 1636 | |
| 1637 /* Are there enough samples in each class already? */ | |
| 1638 min_nopad = recog->min_nopad; | |
| 1639 numaGetMin(naclass, &minval, NULL); | |
| 1640 if (allclasses && (minval >= min_nopad)) { | |
| 1641 numaDestroy(&naclass); | |
| 1642 return 0; | |
| 1643 } | |
| 1644 | |
| 1645 /* Are any classes not represented? */ | |
| 1646 sa = recogAddMissingClassStrings(recog); | |
| 1647 *psa = sa; | |
| 1648 | |
| 1649 /* Are any other classes under-represented? */ | |
| 1650 for (i = 0; i < nclass; i++) { | |
| 1651 numaGetIValue(naclass, i, &nt); | |
| 1652 if (nt < min_nopad) { | |
| 1653 str = sarrayGetString(recog->sa_text, i, L_COPY); | |
| 1654 sarrayAddString(sa, str, L_INSERT); | |
| 1655 } | |
| 1656 } | |
| 1657 numaDestroy(&naclass); | |
| 1658 return 0; | |
| 1659 } | |
| 1660 | |
| 1661 | |
| 1662 /*! | |
| 1663 * \brief recogAddMissingClassStrings() | |
| 1664 * | |
| 1665 * \param[in] recog trained | |
| 1666 * \return sa of class string missing in %recog, or NULL on error | |
| 1667 * | |
| 1668 * <pre> | |
| 1669 * Notes: | |
| 1670 * (1) This returns an empty %sa if there is at least one template | |
| 1671 * in each class in %recog. | |
| 1672 * </pre> | |
| 1673 */ | |
| 1674 static SARRAY * | |
| 1675 recogAddMissingClassStrings(L_RECOG *recog) | |
| 1676 { | |
| 1677 char *text; | |
| 1678 char str[4]; | |
| 1679 l_int32 i, nclass, index, ival; | |
| 1680 NUMA *na; | |
| 1681 SARRAY *sa; | |
| 1682 | |
| 1683 if (!recog) | |
| 1684 return (SARRAY *)ERROR_PTR("recog not defined", __func__, NULL); | |
| 1685 | |
| 1686 /* Only handling digits */ | |
| 1687 nclass = pixaaGetCount(recog->pixaa_u, NULL); /* unscaled bitmaps */ | |
| 1688 if (recog->charset_type != 1 || nclass == 10) | |
| 1689 return sarrayCreate(0); /* empty */ | |
| 1690 | |
| 1691 /* Make an indicator array for missing classes */ | |
| 1692 na = numaCreate(0); | |
| 1693 sa = sarrayCreate(0); | |
| 1694 for (i = 0; i < recog->charset_size; i++) | |
| 1695 numaAddNumber(na, 1); | |
| 1696 for (i = 0; i < nclass; i++) { | |
| 1697 text = sarrayGetString(recog->sa_text, i, L_NOCOPY); | |
| 1698 index = text[0] - '0'; | |
| 1699 numaSetValue(na, index, 0); | |
| 1700 } | |
| 1701 | |
| 1702 /* Convert to string and add to output */ | |
| 1703 for (i = 0; i < nclass; i++) { | |
| 1704 numaGetIValue(na, i, &ival); | |
| 1705 if (ival == 1) { | |
| 1706 str[0] = '0' + i; | |
| 1707 str[1] = '\0'; | |
| 1708 sarrayAddString(sa, str, L_COPY); | |
| 1709 } | |
| 1710 } | |
| 1711 numaDestroy(&na); | |
| 1712 return sa; | |
| 1713 } | |
| 1714 | |
| 1715 | |
| 1716 /*! | |
| 1717 * \brief recogAddDigitPadTemplates() | |
| 1718 * | |
| 1719 * \param[in] recog trained | |
| 1720 * \param[in] sa set of text strings that need to be padded | |
| 1721 * \return pixa of all templates from %recog and the additional pad | |
| 1722 * templates from a boot recognizer; or NULL on error | |
| 1723 * | |
| 1724 * <pre> | |
| 1725 * Notes: | |
| 1726 * (1) Call recogIsPaddingNeeded() first, which returns %sa of | |
| 1727 * template text strings for classes where more templates | |
| 1728 * are needed. | |
| 1729 * </pre> | |
| 1730 */ | |
| 1731 PIXA * | |
| 1732 recogAddDigitPadTemplates(L_RECOG *recog, | |
| 1733 SARRAY *sa) | |
| 1734 { | |
| 1735 char *str, *text; | |
| 1736 l_int32 i, j, n, nt; | |
| 1737 PIX *pix; | |
| 1738 PIXA *pixa1, *pixa2; | |
| 1739 | |
| 1740 if (!recog) | |
| 1741 return (PIXA *)ERROR_PTR("recog not defined", __func__, NULL); | |
| 1742 if (!sa) | |
| 1743 return (PIXA *)ERROR_PTR("sa not defined", __func__, NULL); | |
| 1744 if (recogCharsetAvailable(recog->charset_type) == FALSE) | |
| 1745 return (PIXA *)ERROR_PTR("boot charset not available", __func__, NULL); | |
| 1746 | |
| 1747 /* Make boot recog templates */ | |
| 1748 pixa1 = recogMakeBootDigitTemplates(0, 0); | |
| 1749 n = pixaGetCount(pixa1); | |
| 1750 | |
| 1751 /* Extract the unscaled templates from %recog */ | |
| 1752 pixa2 = recogExtractPixa(recog); | |
| 1753 | |
| 1754 /* Add selected boot recog templates based on the text strings in sa */ | |
| 1755 nt = sarrayGetCount(sa); | |
| 1756 for (i = 0; i < n; i++) { | |
| 1757 pix = pixaGetPix(pixa1, i, L_CLONE); | |
| 1758 text = pixGetText(pix); | |
| 1759 for (j = 0; j < nt; j++) { | |
| 1760 str = sarrayGetString(sa, j, L_NOCOPY); | |
| 1761 if (!strcmp(text, str)) { | |
| 1762 pixaAddPix(pixa2, pix, L_COPY); | |
| 1763 break; | |
| 1764 } | |
| 1765 } | |
| 1766 pixDestroy(&pix); | |
| 1767 } | |
| 1768 | |
| 1769 pixaDestroy(&pixa1); | |
| 1770 return pixa2; | |
| 1771 } | |
| 1772 | |
| 1773 | |
| 1774 /*! | |
| 1775 * \brief recogCharsetAvailable() | |
| 1776 * | |
| 1777 * \param[in] type of charset for padding | |
| 1778 * \return 1 if available; 0 if not. | |
| 1779 */ | |
| 1780 static l_int32 | |
| 1781 recogCharsetAvailable(l_int32 type) | |
| 1782 { | |
| 1783 l_int32 ret; | |
| 1784 | |
| 1785 switch (type) | |
| 1786 { | |
| 1787 case L_ARABIC_NUMERALS: | |
| 1788 ret = TRUE; | |
| 1789 break; | |
| 1790 case L_LC_ROMAN_NUMERALS: | |
| 1791 case L_UC_ROMAN_NUMERALS: | |
| 1792 case L_LC_ALPHA: | |
| 1793 case L_UC_ALPHA: | |
| 1794 L_INFO("charset type %d not available\n", __func__, type); | |
| 1795 ret = FALSE; | |
| 1796 break; | |
| 1797 default: | |
| 1798 L_INFO("charset type %d is unknown\n", __func__, type); | |
| 1799 ret = FALSE; | |
| 1800 break; | |
| 1801 } | |
| 1802 | |
| 1803 return ret; | |
| 1804 } | |
| 1805 | |
| 1806 | |
| 1807 /*------------------------------------------------------------------------* | |
| 1808 * Making a boot digit recognizer * | |
| 1809 *------------------------------------------------------------------------*/ | |
| 1810 /*! | |
| 1811 * \brief recogMakeBootDigitRecog() | |
| 1812 * | |
| 1813 * \param[in] nsamp number of samples of each digit; or 0 | |
| 1814 * \param[in] scaleh scale all heights to this; typ. use 40 | |
| 1815 * \param[in] linew normalized line width; typ. use 5; 0 to skip | |
| 1816 * \param[in] maxyshift from nominal centroid alignment; typically 0 or 1 | |
| 1817 * \param[in] debug 1 for showing templates; 0 otherwise | |
| 1818 * \return recog, or NULL on error | |
| 1819 * | |
| 1820 * <pre> | |
| 1821 * Notes: | |
| 1822 * (1) This takes a set of pre-computed, labeled pixa of single | |
| 1823 * digits, and generates a recognizer from them. | |
| 1824 * The templates used in the recognizer can be modified by: | |
| 1825 * - scaling (isotropically to fixed height) | |
| 1826 * - generating a skeleton and thickening so that all strokes | |
| 1827 * have the same width. | |
| 1828 * (2) The resulting templates are scaled versions of either the | |
| 1829 * input bitmaps or images with fixed line widths. To use the | |
| 1830 * input bitmaps, set %linew = 0; otherwise, set %linew to the | |
| 1831 * desired line width. | |
| 1832 * (3) If %nsamp == 0, this uses and extends the output from | |
| 1833 * three boot generators: | |
| 1834 * l_bootnum_gen1, l_bootnum_gen2, l_bootnum_gen3. | |
| 1835 * Otherwise, it uses exactly %nsamp templates of each digit, | |
| 1836 * extracted by l_bootnum_gen4. | |
| 1837 * </pre> | |
| 1838 */ | |
| 1839 L_RECOG * | |
| 1840 recogMakeBootDigitRecog(l_int32 nsamp, | |
| 1841 l_int32 scaleh, | |
| 1842 l_int32 linew, | |
| 1843 l_int32 maxyshift, | |
| 1844 l_int32 debug) | |
| 1845 | |
| 1846 { | |
| 1847 PIXA *pixa; | |
| 1848 L_RECOG *recog; | |
| 1849 | |
| 1850 /* Get the templates, extended by horizontal scaling */ | |
| 1851 pixa = recogMakeBootDigitTemplates(nsamp, debug); | |
| 1852 | |
| 1853 /* Make the boot recog; recogModifyTemplate() will scale the | |
| 1854 * templates and optionally turn them into strokes of fixed width. */ | |
| 1855 recog = recogCreateFromPixa(pixa, 0, scaleh, linew, 128, maxyshift); | |
| 1856 pixaDestroy(&pixa); | |
| 1857 if (debug) | |
| 1858 recogShowContent(stderr, recog, 0, 1); | |
| 1859 | |
| 1860 return recog; | |
| 1861 } | |
| 1862 | |
| 1863 | |
| 1864 /*! | |
| 1865 * \brief recogMakeBootDigitTemplates() | |
| 1866 * | |
| 1867 * \param[in] nsamp number of samples of each digit; or 0 | |
| 1868 * \param[in] debug 1 for display of templates | |
| 1869 * \return pixa of templates; or NULL on error | |
| 1870 * | |
| 1871 * <pre> | |
| 1872 * Notes: | |
| 1873 * (1) See recogMakeBootDigitRecog(). | |
| 1874 * </pre> | |
| 1875 */ | |
| 1876 PIXA * | |
| 1877 recogMakeBootDigitTemplates(l_int32 nsamp, | |
| 1878 l_int32 debug) | |
| 1879 { | |
| 1880 NUMA *na1; | |
| 1881 PIX *pix1, *pix2, *pix3; | |
| 1882 PIXA *pixa1, *pixa2, *pixa3; | |
| 1883 | |
| 1884 if (nsamp > 0) { | |
| 1885 pixa1 = l_bootnum_gen4(nsamp); | |
| 1886 if (debug) { | |
| 1887 pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, | |
| 1888 2, 6, 0xff000000); | |
| 1889 pixDisplay(pix1, 0, 0); | |
| 1890 pixDestroy(&pix1); | |
| 1891 } | |
| 1892 return pixa1; | |
| 1893 } | |
| 1894 | |
| 1895 /* Else, generate from 3 pixa */ | |
| 1896 pixa1 = l_bootnum_gen1(); | |
| 1897 pixa2 = l_bootnum_gen2(); | |
| 1898 pixa3 = l_bootnum_gen3(); | |
| 1899 if (debug) { | |
| 1900 pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000); | |
| 1901 pix2 = pixaDisplayTiledWithText(pixa2, 1500, 1.0, 10, 2, 6, 0xff000000); | |
| 1902 pix3 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 10, 2, 6, 0xff000000); | |
| 1903 pixDisplay(pix1, 0, 0); | |
| 1904 pixDisplay(pix2, 600, 0); | |
| 1905 pixDisplay(pix3, 1200, 0); | |
| 1906 pixDestroy(&pix1); | |
| 1907 pixDestroy(&pix2); | |
| 1908 pixDestroy(&pix3); | |
| 1909 } | |
| 1910 pixaJoin(pixa1, pixa2, 0, -1); | |
| 1911 pixaJoin(pixa1, pixa3, 0, -1); | |
| 1912 pixaDestroy(&pixa2); | |
| 1913 pixaDestroy(&pixa3); | |
| 1914 | |
| 1915 /* Extend by horizontal scaling */ | |
| 1916 na1 = numaCreate(4); | |
| 1917 numaAddNumber(na1, 0.9f); | |
| 1918 numaAddNumber(na1, 1.1f); | |
| 1919 numaAddNumber(na1, 1.2f); | |
| 1920 pixa2 = pixaExtendByScaling(pixa1, na1, L_HORIZ, 1); | |
| 1921 | |
| 1922 pixaDestroy(&pixa1); | |
| 1923 numaDestroy(&na1); | |
| 1924 return pixa2; | |
| 1925 } | |
| 1926 | |
| 1927 | |
| 1928 /*------------------------------------------------------------------------* | |
| 1929 * Debugging * | |
| 1930 *------------------------------------------------------------------------*/ | |
| 1931 /*! | |
| 1932 * \brief recogShowContent() | |
| 1933 * | |
| 1934 * \param[in] fp file stream | |
| 1935 * \param[in] recog | |
| 1936 * \param[in] index for naming of output files of template images | |
| 1937 * \param[in] display 1 for showing template images; 0 otherwise | |
| 1938 * \return 0 if OK, 1 on error | |
| 1939 */ | |
| 1940 l_ok | |
| 1941 recogShowContent(FILE *fp, | |
| 1942 L_RECOG *recog, | |
| 1943 l_int32 index, | |
| 1944 l_int32 display) | |
| 1945 { | |
| 1946 char buf[128]; | |
| 1947 l_int32 i, val, count; | |
| 1948 PIX *pix; | |
| 1949 NUMA *na; | |
| 1950 | |
| 1951 if (!fp) | |
| 1952 return ERROR_INT("stream not defined", __func__, 1); | |
| 1953 if (!recog) | |
| 1954 return ERROR_INT("recog not defined", __func__, 1); | |
| 1955 | |
| 1956 fprintf(fp, "Debug print of recog contents\n"); | |
| 1957 fprintf(fp, " Setsize: %d\n", recog->setsize); | |
| 1958 fprintf(fp, " Binarization threshold: %d\n", recog->threshold); | |
| 1959 fprintf(fp, " Maximum matching y-jiggle: %d\n", recog->maxyshift); | |
| 1960 if (recog->linew <= 0) | |
| 1961 fprintf(fp, " Using image templates for matching\n"); | |
| 1962 else | |
| 1963 fprintf(fp, " Using templates with fixed line width for matching\n"); | |
| 1964 if (recog->scalew == 0) | |
| 1965 fprintf(fp, " No width scaling of templates\n"); | |
| 1966 else | |
| 1967 fprintf(fp, " Template width scaled to %d\n", recog->scalew); | |
| 1968 if (recog->scaleh == 0) | |
| 1969 fprintf(fp, " No height scaling of templates\n"); | |
| 1970 else | |
| 1971 fprintf(fp, " Template height scaled to %d\n", recog->scaleh); | |
| 1972 fprintf(fp, " Number of samples in each class:\n"); | |
| 1973 pixaaGetCount(recog->pixaa_u, &na); | |
| 1974 for (i = 0; i < recog->setsize; i++) { | |
| 1975 l_dnaGetIValue(recog->dna_tochar, i, &val); | |
| 1976 numaGetIValue(na, i, &count); | |
| 1977 if (val < 128) | |
| 1978 fprintf(fp, " class %d, char %c: %d\n", i, val, count); | |
| 1979 else | |
| 1980 fprintf(fp, " class %d, val %d: %d\n", i, val, count); | |
| 1981 } | |
| 1982 numaDestroy(&na); | |
| 1983 | |
| 1984 if (display) { | |
| 1985 lept_mkdir("lept/recog"); | |
| 1986 pix = pixaaDisplayByPixa(recog->pixaa_u, 50, 1.0, 20, 20, 0); | |
| 1987 snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates_u.%d.png", index); | |
| 1988 pixWriteDebug(buf, pix, IFF_PNG); | |
| 1989 pixDisplay(pix, 0, 200 * index); | |
| 1990 pixDestroy(&pix); | |
| 1991 if (recog->train_done) { | |
| 1992 pix = pixaaDisplayByPixa(recog->pixaa, 50, 1.0, 20, 20, 0); | |
| 1993 snprintf(buf, sizeof(buf), | |
| 1994 "/tmp/lept/recog/templates.%d.png", index); | |
| 1995 pixWriteDebug(buf, pix, IFF_PNG); | |
| 1996 pixDisplay(pix, 800, 200 * index); | |
| 1997 pixDestroy(&pix); | |
| 1998 } | |
| 1999 } | |
| 2000 return 0; | |
| 2001 } | |
| 2002 | |
| 2003 | |
| 2004 /*! | |
| 2005 * \brief recogDebugAverages() | |
| 2006 * | |
| 2007 * \param[in] recog addr of recog | |
| 2008 * \param[in] debug 0 no output; 1 for images; 2 for text; 3 for both | |
| 2009 * \return 0 if OK, 1 on error | |
| 2010 * | |
| 2011 * <pre> | |
| 2012 * Notes: | |
| 2013 * (1) Generates an image that pairs each of the input images used | |
| 2014 * in training with the average template that it is best | |
| 2015 * correlated to. This is written into the recog. | |
| 2016 * (2) It also generates pixa_tr of all the input training images, | |
| 2017 * which can be used, e.g., in recogShowMatchesInRange(). | |
| 2018 * (3) Returns an error if the averaging function finds bad classes. | |
| 2019 * </pre> | |
| 2020 */ | |
| 2021 l_ok | |
| 2022 recogDebugAverages(L_RECOG *recog, | |
| 2023 l_int32 debug) | |
| 2024 { | |
| 2025 l_int32 i, j, n, np, index; | |
| 2026 l_float32 score; | |
| 2027 PIX *pix1, *pix2, *pix3; | |
| 2028 PIXA *pixa, *pixat; | |
| 2029 PIXAA *paa1, *paa2; | |
| 2030 | |
| 2031 if (!recog) | |
| 2032 return ERROR_INT("recog not defined", __func__, 1); | |
| 2033 | |
| 2034 /* Mark the training as finished if necessary, and make sure | |
| 2035 * that the average templates have been built. */ | |
| 2036 if (recogAverageSamples(recog, 0) != 0) | |
| 2037 return ERROR_INT("averaging failed", __func__, 1); | |
| 2038 | |
| 2039 /* Save a pixa of all the training examples */ | |
| 2040 paa1 = recog->pixaa; | |
| 2041 if (!recog->pixa_tr) | |
| 2042 recog->pixa_tr = pixaaFlattenToPixa(paa1, NULL, L_CLONE); | |
| 2043 | |
| 2044 /* Destroy any existing image and make a new one */ | |
| 2045 if (recog->pixdb_ave) | |
| 2046 pixDestroy(&recog->pixdb_ave); | |
| 2047 n = pixaaGetCount(paa1, NULL); | |
| 2048 paa2 = pixaaCreate(n); | |
| 2049 for (i = 0; i < n; i++) { | |
| 2050 pixa = pixaCreate(0); | |
| 2051 pixat = pixaaGetPixa(paa1, i, L_CLONE); | |
| 2052 np = pixaGetCount(pixat); | |
| 2053 for (j = 0; j < np; j++) { | |
| 2054 pix1 = pixaaGetPix(paa1, i, j, L_CLONE); | |
| 2055 recogIdentifyPix(recog, pix1, &pix2); | |
| 2056 rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, | |
| 2057 NULL, NULL); | |
| 2058 if (debug >= 2) | |
| 2059 lept_stderr("index = %d, score = %7.3f\n", index, score); | |
| 2060 pix3 = pixAddBorder(pix2, 2, 1); | |
| 2061 pixaAddPix(pixa, pix3, L_INSERT); | |
| 2062 pixDestroy(&pix1); | |
| 2063 pixDestroy(&pix2); | |
| 2064 } | |
| 2065 pixaaAddPixa(paa2, pixa, L_INSERT); | |
| 2066 pixaDestroy(&pixat); | |
| 2067 } | |
| 2068 recog->pixdb_ave = pixaaDisplayByPixa(paa2, 50, 1.0, 20, 20, 0); | |
| 2069 if (debug % 2) { | |
| 2070 lept_mkdir("lept/recog"); | |
| 2071 pixWriteDebug("/tmp/lept/recog/templ_match.png", recog->pixdb_ave, | |
| 2072 IFF_PNG); | |
| 2073 pixDisplay(recog->pixdb_ave, 100, 100); | |
| 2074 } | |
| 2075 | |
| 2076 pixaaDestroy(&paa2); | |
| 2077 return 0; | |
| 2078 } | |
| 2079 | |
| 2080 | |
| 2081 /*! | |
| 2082 * \brief recogShowAverageTemplates() | |
| 2083 * | |
| 2084 * \param[in] recog | |
| 2085 * \return 0 on success, 1 on failure | |
| 2086 * | |
| 2087 * <pre> | |
| 2088 * Notes: | |
| 2089 * (1) This debug routine generates a display of the averaged templates, | |
| 2090 * both scaled and unscaled, with the centroid visible in red. | |
| 2091 * </pre> | |
| 2092 */ | |
| 2093 l_int32 | |
| 2094 recogShowAverageTemplates(L_RECOG *recog) | |
| 2095 { | |
| 2096 l_int32 i, size; | |
| 2097 l_float32 x, y; | |
| 2098 PIX *pix1, *pix2, *pixr; | |
| 2099 PIXA *pixat, *pixadb; | |
| 2100 | |
| 2101 if (!recog) | |
| 2102 return ERROR_INT("recog not defined", __func__, 1); | |
| 2103 | |
| 2104 lept_stderr("min/max width_u = (%d,%d); min/max height_u = (%d,%d)\n", | |
| 2105 recog->minwidth_u, recog->maxwidth_u, | |
| 2106 recog->minheight_u, recog->maxheight_u); | |
| 2107 lept_stderr("min splitw = %d, max splith = %d\n", | |
| 2108 recog->min_splitw, recog->max_splith); | |
| 2109 | |
| 2110 pixaDestroy(&recog->pixadb_ave); | |
| 2111 | |
| 2112 pixr = pixCreate(3, 3, 32); /* 3x3 red square for centroid location */ | |
| 2113 pixSetAllArbitrary(pixr, 0xff000000); | |
| 2114 pixadb = pixaCreate(2); | |
| 2115 | |
| 2116 /* Unscaled bitmaps */ | |
| 2117 size = recog->setsize; | |
| 2118 pixat = pixaCreate(size); | |
| 2119 for (i = 0; i < size; i++) { | |
| 2120 if ((pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE)) == NULL) | |
| 2121 continue; | |
| 2122 pix2 = pixConvertTo32(pix1); | |
| 2123 ptaGetPt(recog->pta_u, i, &x, &y); | |
| 2124 pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3, | |
| 2125 PIX_SRC, pixr, 0, 0); | |
| 2126 pixaAddPix(pixat, pix2, L_INSERT); | |
| 2127 pixDestroy(&pix1); | |
| 2128 } | |
| 2129 pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0); | |
| 2130 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 2131 pixDisplay(pix1, 100, 100); | |
| 2132 pixaDestroy(&pixat); | |
| 2133 | |
| 2134 /* Scaled bitmaps */ | |
| 2135 pixat = pixaCreate(size); | |
| 2136 for (i = 0; i < size; i++) { | |
| 2137 if ((pix1 = pixaGetPix(recog->pixa, i, L_CLONE)) == NULL) | |
| 2138 continue; | |
| 2139 pix2 = pixConvertTo32(pix1); | |
| 2140 ptaGetPt(recog->pta, i, &x, &y); | |
| 2141 pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3, | |
| 2142 PIX_SRC, pixr, 0, 0); | |
| 2143 pixaAddPix(pixat, pix2, L_INSERT); | |
| 2144 pixDestroy(&pix1); | |
| 2145 } | |
| 2146 pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0); | |
| 2147 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 2148 pixDisplay(pix1, 100, 100); | |
| 2149 pixaDestroy(&pixat); | |
| 2150 pixDestroy(&pixr); | |
| 2151 recog->pixadb_ave = pixadb; | |
| 2152 return 0; | |
| 2153 } | |
| 2154 | |
| 2155 | |
| 2156 /*! | |
| 2157 * \brief pixDisplayOutliers() | |
| 2158 * | |
| 2159 * \param[in] pixas unscaled labeled templates | |
| 2160 * \param[in] nas scores of templates (against class averages) | |
| 2161 * \return pix tiled pixa with text and scores, or NULL on failure | |
| 2162 * | |
| 2163 * <pre> | |
| 2164 * Notes: | |
| 2165 * (1) This debug routine is called from recogRemoveOutliers2(), | |
| 2166 * and takes the saved templates and their scores as input. | |
| 2167 * </pre> | |
| 2168 */ | |
| 2169 static PIX * | |
| 2170 pixDisplayOutliers(PIXA *pixas, | |
| 2171 NUMA *nas) | |
| 2172 { | |
| 2173 char *text; | |
| 2174 char buf[16]; | |
| 2175 l_int32 i, n; | |
| 2176 l_float32 fval; | |
| 2177 PIX *pix1, *pix2; | |
| 2178 PIXA *pixa1; | |
| 2179 | |
| 2180 if (!pixas) | |
| 2181 return (PIX *)ERROR_PTR("pixas not defined", __func__, NULL); | |
| 2182 if (!nas) | |
| 2183 return (PIX *)ERROR_PTR("nas not defined", __func__, NULL); | |
| 2184 n = pixaGetCount(pixas); | |
| 2185 if (numaGetCount(nas) != n) | |
| 2186 return (PIX *)ERROR_PTR("pixas and nas sizes differ", __func__, NULL); | |
| 2187 | |
| 2188 pixa1 = pixaCreate(n); | |
| 2189 for (i = 0; i < n; i++) { | |
| 2190 pix1 = pixaGetPix(pixas, i, L_CLONE); | |
| 2191 pix2 = pixAddBlackOrWhiteBorder(pix1, 25, 25, 0, 0, L_GET_WHITE_VAL); | |
| 2192 text = pixGetText(pix1); | |
| 2193 numaGetFValue(nas, i, &fval); | |
| 2194 snprintf(buf, sizeof(buf), "'%s': %5.2f", text, fval); | |
| 2195 pixSetText(pix2, buf); | |
| 2196 pixaAddPix(pixa1, pix2, L_INSERT); | |
| 2197 pixDestroy(&pix1); | |
| 2198 } | |
| 2199 pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 20, 2, 6, 0xff000000); | |
| 2200 pixaDestroy(&pixa1); | |
| 2201 return pix1; | |
| 2202 } | |
| 2203 | |
| 2204 | |
| 2205 /*! | |
| 2206 * \brief recogDisplayOutlier() | |
| 2207 * | |
| 2208 * \param[in] recog | |
| 2209 * \param[in] iclass sample is in this class | |
| 2210 * \param[in] jsamp index of sample is class i | |
| 2211 * \param[in] maxclass index of class with closest average to sample | |
| 2212 * \param[in] maxscore score of sample with average of class %maxclass | |
| 2213 * \return pix sample and template images, with score, or NULL on error | |
| 2214 * | |
| 2215 * <pre> | |
| 2216 * Notes: | |
| 2217 * (1) This shows three templates, side-by-side: | |
| 2218 * - The outlier sample | |
| 2219 * - The average template from the same class | |
| 2220 * - The average class template that best matched the outlier sample | |
| 2221 * </pre> | |
| 2222 */ | |
| 2223 static PIX * | |
| 2224 recogDisplayOutlier(L_RECOG *recog, | |
| 2225 l_int32 iclass, | |
| 2226 l_int32 jsamp, | |
| 2227 l_int32 maxclass, | |
| 2228 l_float32 maxscore) | |
| 2229 { | |
| 2230 char buf[64]; | |
| 2231 PIX *pix1, *pix2, *pix3, *pix4, *pix5; | |
| 2232 PIXA *pixa; | |
| 2233 | |
| 2234 if (!recog) | |
| 2235 return (PIX *)ERROR_PTR("recog not defined", __func__, NULL); | |
| 2236 | |
| 2237 pix1 = pixaaGetPix(recog->pixaa, iclass, jsamp, L_CLONE); | |
| 2238 pix2 = pixaGetPix(recog->pixa, iclass, L_CLONE); | |
| 2239 pix3 = pixaGetPix(recog->pixa, maxclass, L_CLONE); | |
| 2240 pixa = pixaCreate(3); | |
| 2241 pixaAddPix(pixa, pix1, L_INSERT); | |
| 2242 pixaAddPix(pixa, pix2, L_INSERT); | |
| 2243 pixaAddPix(pixa, pix3, L_INSERT); | |
| 2244 pix4 = pixaDisplayTiledInRows(pixa, 32, 400, 2.0, 0, 12, 2); | |
| 2245 snprintf(buf, sizeof(buf), "C=%d, BAC=%d, S=%4.2f", iclass, maxclass, | |
| 2246 maxscore); | |
| 2247 pix5 = pixAddSingleTextblock(pix4, recog->bmf, buf, 0xff000000, | |
| 2248 L_ADD_BELOW, NULL); | |
| 2249 pixDestroy(&pix4); | |
| 2250 pixaDestroy(&pixa); | |
| 2251 return pix5; | |
| 2252 } | |
| 2253 | |
| 2254 | |
| 2255 /*! | |
| 2256 * \brief recogShowMatchesInRange() | |
| 2257 * | |
| 2258 * \param[in] recog | |
| 2259 * \param[in] pixa of 1 bpp images to match | |
| 2260 * \param[in] minscore min score to include output | |
| 2261 * \param[in] maxscore max score to include output | |
| 2262 * \param[in] display 1 to display the result | |
| 2263 * \return 0 if OK, 1 on error | |
| 2264 * | |
| 2265 * <pre> | |
| 2266 * Notes: | |
| 2267 * (1) This gives a visual output of the best matches for a given | |
| 2268 * range of scores. Each pair of images can optionally be | |
| 2269 * labeled with the index of the best match and the correlation. | |
| 2270 * (2) To use this, save a set of 1 bpp images (labeled or | |
| 2271 * unlabeled) that can be given to a recognizer in a pixa. | |
| 2272 * Then call this function with the pixa and parameters | |
| 2273 * to filter a range of scores. | |
| 2274 * </pre> | |
| 2275 */ | |
| 2276 l_ok | |
| 2277 recogShowMatchesInRange(L_RECOG *recog, | |
| 2278 PIXA *pixa, | |
| 2279 l_float32 minscore, | |
| 2280 l_float32 maxscore, | |
| 2281 l_int32 display) | |
| 2282 { | |
| 2283 l_int32 i, n, index, depth; | |
| 2284 l_float32 score; | |
| 2285 NUMA *nascore, *naindex; | |
| 2286 PIX *pix1, *pix2; | |
| 2287 PIXA *pixa1, *pixa2; | |
| 2288 | |
| 2289 if (!recog) | |
| 2290 return ERROR_INT("recog not defined", __func__, 1); | |
| 2291 if (!pixa) | |
| 2292 return ERROR_INT("pixa not defined", __func__, 1); | |
| 2293 | |
| 2294 /* Run the recognizer on the set of images */ | |
| 2295 n = pixaGetCount(pixa); | |
| 2296 nascore = numaCreate(n); | |
| 2297 naindex = numaCreate(n); | |
| 2298 pixa1 = pixaCreate(n); | |
| 2299 for (i = 0; i < n; i++) { | |
| 2300 pix1 = pixaGetPix(pixa, i, L_CLONE); | |
| 2301 recogIdentifyPix(recog, pix1, &pix2); | |
| 2302 rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL); | |
| 2303 numaAddNumber(nascore, score); | |
| 2304 numaAddNumber(naindex, index); | |
| 2305 pixaAddPix(pixa1, pix2, L_INSERT); | |
| 2306 pixDestroy(&pix1); | |
| 2307 } | |
| 2308 | |
| 2309 /* Filter the set and optionally add text to each */ | |
| 2310 pixa2 = pixaCreate(n); | |
| 2311 depth = 1; | |
| 2312 for (i = 0; i < n; i++) { | |
| 2313 numaGetFValue(nascore, i, &score); | |
| 2314 if (score < minscore || score > maxscore) continue; | |
| 2315 pix1 = pixaGetPix(pixa1, i, L_CLONE); | |
| 2316 numaGetIValue(naindex, i, &index); | |
| 2317 pix2 = recogShowMatch(recog, pix1, NULL, NULL, index, score); | |
| 2318 if (i == 0) depth = pixGetDepth(pix2); | |
| 2319 pixaAddPix(pixa2, pix2, L_INSERT); | |
| 2320 pixDestroy(&pix1); | |
| 2321 } | |
| 2322 | |
| 2323 /* Package it up */ | |
| 2324 pixDestroy(&recog->pixdb_range); | |
| 2325 if (pixaGetCount(pixa2) > 0) { | |
| 2326 recog->pixdb_range = | |
| 2327 pixaDisplayTiledInRows(pixa2, depth, 2500, 1.0, 0, 20, 1); | |
| 2328 if (display) | |
| 2329 pixDisplay(recog->pixdb_range, 300, 100); | |
| 2330 } else { | |
| 2331 L_INFO("no character matches in the range of scores\n", __func__); | |
| 2332 } | |
| 2333 | |
| 2334 pixaDestroy(&pixa1); | |
| 2335 pixaDestroy(&pixa2); | |
| 2336 numaDestroy(&nascore); | |
| 2337 numaDestroy(&naindex); | |
| 2338 return 0; | |
| 2339 } | |
| 2340 | |
| 2341 | |
| 2342 /*! | |
| 2343 * \brief recogShowMatch() | |
| 2344 * | |
| 2345 * \param[in] recog | |
| 2346 * \param[in] pix1 input pix; several possibilities | |
| 2347 * \param[in] pix2 [optional] matching template | |
| 2348 * \param[in] box [optional] region in pix1 for which pix2 matches | |
| 2349 * \param[in] index index of matching template; use -1 to disable printing | |
| 2350 * \param[in] score score of match | |
| 2351 * \return pixd pair of images, showing input pix and best template, | |
| 2352 * optionally with matching information, or NULL on error. | |
| 2353 * | |
| 2354 * <pre> | |
| 2355 * Notes: | |
| 2356 * (1) pix1 can be one of these: | |
| 2357 * (a) The input pix alone, which can be either a single character | |
| 2358 * (box == NULL) or several characters that need to be | |
| 2359 * segmented. If more than character is present, the box | |
| 2360 * region is displayed with an outline. | |
| 2361 * (b) Both the input pix and the matching template. In this case, | |
| 2362 * pix2 and box will both be null. | |
| 2363 * (2) If the bmf has been made (by a call to recogMakeBmf()) | |
| 2364 * and the index >= 0, the text field, match score and index | |
| 2365 * will be rendered; otherwise their values will be ignored. | |
| 2366 * </pre> | |
| 2367 */ | |
| 2368 PIX * | |
| 2369 recogShowMatch(L_RECOG *recog, | |
| 2370 PIX *pix1, | |
| 2371 PIX *pix2, | |
| 2372 BOX *box, | |
| 2373 l_int32 index, | |
| 2374 l_float32 score) | |
| 2375 { | |
| 2376 char buf[32]; | |
| 2377 char *text; | |
| 2378 L_BMF *bmf; | |
| 2379 PIX *pix3, *pix4, *pix5, *pixd; | |
| 2380 PIXA *pixa; | |
| 2381 | |
| 2382 if (!recog) | |
| 2383 return (PIX *)ERROR_PTR("recog not defined", __func__, NULL); | |
| 2384 if (!pix1) | |
| 2385 return (PIX *)ERROR_PTR("pix1 not defined", __func__, NULL); | |
| 2386 | |
| 2387 bmf = (recog->bmf && index >= 0) ? recog->bmf : NULL; | |
| 2388 if (!pix2 && !box && !bmf) /* nothing to do */ | |
| 2389 return pixCopy(NULL, pix1); | |
| 2390 | |
| 2391 pix3 = pixConvertTo32(pix1); | |
| 2392 if (box) | |
| 2393 pixRenderBoxArb(pix3, box, 1, 255, 0, 0); | |
| 2394 | |
| 2395 if (pix2) { | |
| 2396 pixa = pixaCreate(2); | |
| 2397 pixaAddPix(pixa, pix3, L_CLONE); | |
| 2398 pixaAddPix(pixa, pix2, L_CLONE); | |
| 2399 pix4 = pixaDisplayTiledInRows(pixa, 1, 500, 1.0, 0, 15, 0); | |
| 2400 pixaDestroy(&pixa); | |
| 2401 } else { | |
| 2402 pix4 = pixCopy(NULL, pix3); | |
| 2403 } | |
| 2404 pixDestroy(&pix3); | |
| 2405 | |
| 2406 if (bmf) { | |
| 2407 pix5 = pixAddBorderGeneral(pix4, 55, 55, 0, 0, 0xffffff00); | |
| 2408 recogGetClassString(recog, index, &text); | |
| 2409 snprintf(buf, sizeof(buf), "C=%s, S=%4.3f, I=%d", text, score, index); | |
| 2410 pixd = pixAddSingleTextblock(pix5, bmf, buf, 0xff000000, | |
| 2411 L_ADD_BELOW, NULL); | |
| 2412 pixDestroy(&pix5); | |
| 2413 LEPT_FREE(text); | |
| 2414 } else { | |
| 2415 pixd = pixClone(pix4); | |
| 2416 } | |
| 2417 pixDestroy(&pix4); | |
| 2418 | |
| 2419 return pixd; | |
| 2420 } |
