Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/recogident.c @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | b50eed0cc0ef |
| children |
comparison
equal
deleted
inserted
replaced
| 0:6015a75abc2d | 3:2c135c81b16c |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file recogident.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Top-level identification | |
| 32 * l_int32 recogIdentifyMultiple() | |
| 33 * | |
| 34 * Segmentation and noise removal | |
| 35 * l_int32 recogSplitIntoCharacters() | |
| 36 * | |
| 37 * Greedy character splitting | |
| 38 * l_int32 recogCorrelationBestRow() | |
| 39 * l_int32 recogCorrelationBestChar() | |
| 40 * static l_int32 pixCorrelationBestShift() | |
| 41 * | |
| 42 * Low-level identification of single characters | |
| 43 * l_int32 recogIdentifyPixa() | |
| 44 * l_int32 recogIdentifyPix() | |
| 45 * l_int32 recogSkipIdentify() | |
| 46 * | |
| 47 * Operations for handling identification results | |
| 48 * static L_RCHA *rchaCreate() | |
| 49 * void rchaDestroy() | |
| 50 * static L_RCH *rchCreate() | |
| 51 * void rchDestroy() | |
| 52 * l_int32 rchaExtract() | |
| 53 * l_int32 rchExtract() | |
| 54 * static l_int32 transferRchToRcha() | |
| 55 * | |
| 56 * Preprocessing and filtering | |
| 57 * l_int32 recogProcessToIdentify() | |
| 58 * static PIX *recogPreSplittingFilter() | |
| 59 * static PIX *recogSplittingFilter() | |
| 60 * | |
| 61 * Postprocessing | |
| 62 * SARRAY *recogExtractNumbers() | |
| 63 * PIX *showExtractNumbers() | |
| 64 * | |
| 65 * Static debug helper | |
| 66 * static void l_showIndicatorSplitValues() | |
| 67 * | |
| 68 * See recogbasic.c for examples of training a recognizer, which is | |
| 69 * required before it can be used for identification. | |
| 70 * | |
| 71 * The character splitter repeatedly does a greedy correlation with each | |
| 72 * averaged unscaled template, at all pixel locations along the text to | |
| 73 * be identified. The vertical alignment is between the template | |
| 74 * centroid and the (moving) windowed centroid, including a delta of | |
| 75 * 1 pixel above and below. The best match then removes part of the | |
| 76 * input image, leaving 1 or 2 pieces, which, after filtering, | |
| 77 * are put in a queue. The process ends when the queue is empty. | |
| 78 * The filtering is based on the size and aspect ratio of the | |
| 79 * remaining pieces; the intent is to remove anything that is | |
| 80 * unlikely to be text, such as small pieces and line graphics. | |
| 81 * | |
| 82 * After splitting, the selected segments are identified using | |
| 83 * the input parameters that were initially specified for the | |
| 84 * recognizer. Unlike the splitter, which uses the averaged | |
| 85 * templates from the unscaled input, the recognizer can use | |
| 86 * either all training examples or averaged templates, and these | |
| 87 * can be either scaled or unscaled. These choices are specified | |
| 88 * when the recognizer is constructed. | |
| 89 * </pre> | |
| 90 */ | |
| 91 | |
| 92 #ifdef HAVE_CONFIG_H | |
| 93 #include <config_auto.h> | |
| 94 #endif /* HAVE_CONFIG_H */ | |
| 95 | |
| 96 #include <string.h> | |
| 97 #include "allheaders.h" | |
| 98 #include "array_internal.h" | |
| 99 | |
| 100 /* There are two methods for splitting characters: DID and greedy. | |
| 101 * The default method is DID. */ | |
| 102 #define SPLIT_WITH_DID 1 | |
| 103 | |
| 104 /* Padding on pix1: added before correlations and removed from result */ | |
| 105 static const l_int32 LeftRightPadding = 32; | |
| 106 | |
| 107 /* Parameters for filtering and sorting connected components in splitter */ | |
| 108 static const l_float32 MinFillFactor = 0.10f; | |
| 109 static const l_int32 DefaultMinHeight = 15; /* min unscaled height */ | |
| 110 static const l_int32 MinOverlap1 = 6; /* in pass 1 of boxaSort2d() */ | |
| 111 static const l_int32 MinOverlap2 = 6; /* in pass 2 of boxaSort2d() */ | |
| 112 static const l_int32 MinHeightPass1 = 5; /* min height to start pass 1 */ | |
| 113 | |
| 114 | |
| 115 static l_int32 pixCorrelationBestShift(PIX *pix1, PIX *pix2, NUMA *nasum1, | |
| 116 NUMA *namoment1, l_int32 area2, | |
| 117 l_int32 ycent2, l_int32 maxyshift, | |
| 118 l_int32 *tab8, l_int32 *pdelx, | |
| 119 l_int32 *pdely, l_float32 *pscore, | |
| 120 l_int32 debugflag ); | |
| 121 static L_RCH *rchCreate(l_int32 index, l_float32 score, char *text, | |
| 122 l_int32 sample, l_int32 xloc, l_int32 yloc, | |
| 123 l_int32 width); | |
| 124 static L_RCHA *rchaCreate(); | |
| 125 static l_int32 transferRchToRcha(L_RCH *rch, L_RCHA *rcha); | |
| 126 static PIX *recogPreSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 minh, | |
| 127 l_float32 minaf, l_int32 debug); | |
| 128 static l_int32 recogSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 min, | |
| 129 l_float32 minaf, l_int32 *premove, | |
| 130 l_int32 debug); | |
| 131 static void l_showIndicatorSplitValues(NUMA *na1, NUMA *na2, NUMA *na3, | |
| 132 NUMA *na4, NUMA *na5, NUMA *na6); | |
| 133 | |
| 134 /*------------------------------------------------------------------------* | |
| 135 * Identification | |
| 136 *------------------------------------------------------------------------*/ | |
| 137 /*! | |
| 138 * \brief recogIdentifyMultiple() | |
| 139 * | |
| 140 * \param[in] recog with training finished | |
| 141 * \param[in] pixs containing typically a small number of characters | |
| 142 * \param[in] minh remove shorter components; use 0 for default | |
| 143 * \param[in] skipsplit 1 to skip the splitting step | |
| 144 * \param[out] pboxa [optional] locations of identified components | |
| 145 * \param[out] ppixa [optional] images of identified components | |
| 146 * \param[out] ppixdb [optional] debug pix: inputs and best fits | |
| 147 * \param[in] debugsplit 1 returns pix split debugging images | |
| 148 * \return 0 if OK; 1 if nothing is found; 2 for other errors. | |
| 149 * | |
| 150 * <pre> | |
| 151 * Notes: | |
| 152 * (1) This filters the input pixa and calls recogIdentifyPixa() | |
| 153 * (2) Splitting is relatively slow, because it tries to match all | |
| 154 * character templates to all locations. This step can be skipped. | |
| 155 * (3) An attempt is made to order the (optionally) returned images | |
| 156 * and boxes in 2-dimensional sorted order. These can then | |
| 157 * be used to aggregate identified characters into numbers or words. | |
| 158 * One typically wants the pixa, which contains a boxa of the | |
| 159 * extracted subimages. | |
| 160 * </pre> | |
| 161 */ | |
| 162 l_ok | |
| 163 recogIdentifyMultiple(L_RECOG *recog, | |
| 164 PIX *pixs, | |
| 165 l_int32 minh, | |
| 166 l_int32 skipsplit, | |
| 167 BOXA **pboxa, | |
| 168 PIXA **ppixa, | |
| 169 PIX **ppixdb, | |
| 170 l_int32 debugsplit) | |
| 171 { | |
| 172 l_int32 n; | |
| 173 BOXA *boxa; | |
| 174 PIX *pixb; | |
| 175 PIXA *pixa; | |
| 176 | |
| 177 if (pboxa) *pboxa = NULL; | |
| 178 if (ppixa) *ppixa = NULL; | |
| 179 if (ppixdb) *ppixdb = NULL; | |
| 180 if (!recog) | |
| 181 return ERROR_INT("recog not defined", __func__, 2); | |
| 182 if (!recog->train_done) | |
| 183 return ERROR_INT("training not finished", __func__, 2); | |
| 184 if (!pixs) | |
| 185 return ERROR_INT("pixs not defined", __func__, 2); | |
| 186 | |
| 187 /* Binarize if necessary */ | |
| 188 if (pixGetDepth(pixs) > 1) | |
| 189 pixb = pixConvertTo1(pixs, recog->threshold); | |
| 190 else | |
| 191 pixb = pixClone(pixs); | |
| 192 | |
| 193 /* Noise removal and splitting of touching characters */ | |
| 194 recogSplitIntoCharacters(recog, pixb, minh, skipsplit, &boxa, &pixa, | |
| 195 debugsplit); | |
| 196 pixDestroy(&pixb); | |
| 197 if (!pixa || (n = pixaGetCount(pixa)) == 0) { | |
| 198 pixaDestroy(&pixa); | |
| 199 boxaDestroy(&boxa); | |
| 200 L_WARNING("nothing found\n", __func__); | |
| 201 return 1; | |
| 202 } | |
| 203 | |
| 204 recogIdentifyPixa(recog, pixa, ppixdb); | |
| 205 if (pboxa) | |
| 206 *pboxa = boxa; | |
| 207 else | |
| 208 boxaDestroy(&boxa); | |
| 209 if (ppixa) | |
| 210 *ppixa = pixa; | |
| 211 else | |
| 212 pixaDestroy(&pixa); | |
| 213 return 0; | |
| 214 } | |
| 215 | |
| 216 | |
| 217 /*------------------------------------------------------------------------* | |
| 218 * Segmentation and noise removal * | |
| 219 *------------------------------------------------------------------------*/ | |
| 220 /*! | |
| 221 * \brief recogSplitIntoCharacters() | |
| 222 * | |
| 223 * \param[in] recog | |
| 224 * \param[in] pixs 1 bpp, contains only mostly deskewed text | |
| 225 * \param[in] minh remove shorter components; use 0 for default | |
| 226 * \param[in] skipsplit 1 to skip the splitting step | |
| 227 * \param[out] pboxa character bounding boxes | |
| 228 * \param[out] ppixa character images | |
| 229 * \param[in] debug 1 for results written to pixadb_split | |
| 230 * \return 0 if OK, 1 on error or if no components are returned | |
| 231 * | |
| 232 * <pre> | |
| 233 * Notes: | |
| 234 * (1) This can be given an image that has an arbitrary number | |
| 235 * of text characters. It optionally splits connected | |
| 236 * components based on document image decoding in recogDecode(). | |
| 237 * The returned pixa includes the boxes from which the | |
| 238 * (possibly split) components are extracted. | |
| 239 * (2) After noise filtering, the resulting components are put in | |
| 240 * row-major (2D) order, and the smaller of overlapping | |
| 241 * components are removed if they satisfy conditions of | |
| 242 * relative size and fractional overlap. | |
| 243 * (3) Note that the splitting function uses unscaled templates | |
| 244 * and does not bother returning the class results and scores. | |
| 245 * These are more accurately found later using the scaled templates. | |
| 246 * </pre> | |
| 247 */ | |
| 248 l_ok | |
| 249 recogSplitIntoCharacters(L_RECOG *recog, | |
| 250 PIX *pixs, | |
| 251 l_int32 minh, | |
| 252 l_int32 skipsplit, | |
| 253 BOXA **pboxa, | |
| 254 PIXA **ppixa, | |
| 255 l_int32 debug) | |
| 256 { | |
| 257 static l_int32 ind = 0; | |
| 258 char buf[32]; | |
| 259 l_int32 i, xoff, yoff, empty, maxw, bw, ncomp, scaling; | |
| 260 BOX *box; | |
| 261 BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxad; | |
| 262 BOXAA *baa; | |
| 263 PIX *pix, *pix1, *pix2, *pix3; | |
| 264 PIXA *pixa; | |
| 265 | |
| 266 lept_mkdir("lept/recog"); | |
| 267 | |
| 268 if (pboxa) *pboxa = NULL; | |
| 269 if (ppixa) *ppixa = NULL; | |
| 270 if (!pboxa || !ppixa) | |
| 271 return ERROR_INT("&boxa and &pixa not defined", __func__, 1); | |
| 272 if (!recog) | |
| 273 return ERROR_INT("recog not defined", __func__, 1); | |
| 274 if (!recog->train_done) | |
| 275 return ERROR_INT("training not finished", __func__, 1); | |
| 276 if (!pixs || pixGetDepth(pixs) != 1) | |
| 277 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 278 if (minh <= 0) minh = DefaultMinHeight; | |
| 279 pixZero(pixs, &empty); | |
| 280 if (empty) return 1; | |
| 281 | |
| 282 /* Small vertical close for consolidation. Don't do a horizontal | |
| 283 * closing, because it might join separate characters. */ | |
| 284 pix1 = pixMorphSequence(pixs, "c1.3", 0); | |
| 285 | |
| 286 /* Carefully filter out noise */ | |
| 287 pix2 = recogPreSplittingFilter(recog, pix1, minh, MinFillFactor, debug); | |
| 288 pixDestroy(&pix1); | |
| 289 | |
| 290 /* Get the 8-connected components to be split/identified */ | |
| 291 boxa1 = pixConnComp(pix2, NULL, 8); | |
| 292 pixDestroy(&pix2); | |
| 293 ncomp = boxaGetCount(boxa1); | |
| 294 if (ncomp == 0) { | |
| 295 boxaDestroy(&boxa1); | |
| 296 L_WARNING("all components removed\n", __func__); | |
| 297 return 1; | |
| 298 } | |
| 299 | |
| 300 /* Save everything and split the large components */ | |
| 301 boxa2 = boxaCreate(ncomp); | |
| 302 maxw = recog->maxwidth_u + 5; | |
| 303 scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE; | |
| 304 pixa = (debug) ? pixaCreate(ncomp) : NULL; | |
| 305 for (i = 0; i < ncomp; i++) { | |
| 306 box = boxaGetBox(boxa1, i, L_CLONE); | |
| 307 boxGetGeometry(box, &xoff, &yoff, &bw, NULL); | |
| 308 /* Treat as one character if it is small, if the images | |
| 309 * have been scaled, or if splitting is not to be run. */ | |
| 310 if (bw <= maxw || scaling || skipsplit) { | |
| 311 boxaAddBox(boxa2, box, L_INSERT); | |
| 312 } else { | |
| 313 pix = pixClipRectangle(pixs, box, NULL); | |
| 314 #if SPLIT_WITH_DID | |
| 315 if (!debug) { | |
| 316 boxa3 = recogDecode(recog, pix, 2, NULL); | |
| 317 } else { | |
| 318 boxa3 = recogDecode(recog, pix, 2, &pix2); | |
| 319 pixaAddPix(pixa, pix2, L_INSERT); | |
| 320 } | |
| 321 #else /* use greedy splitting */ | |
| 322 recogCorrelationBestRow(recog, pix, &boxa3, NULL, NULL, | |
| 323 NULL, debug); | |
| 324 if (debug) { | |
| 325 pix2 = pixConvertTo32(pix); | |
| 326 pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0); | |
| 327 pixaAddPix(pixa, pix2, L_INSERT); | |
| 328 } | |
| 329 #endif /* SPLIT_WITH_DID */ | |
| 330 pixDestroy(&pix); | |
| 331 boxDestroy(&box); | |
| 332 if (!boxa3) { | |
| 333 L_ERROR("boxa3 not found for component %d\n", __func__, i); | |
| 334 } else { | |
| 335 boxa4 = boxaTransform(boxa3, xoff, yoff, 1.0, 1.0); | |
| 336 boxaJoin(boxa2, boxa4, 0, -1); | |
| 337 boxaDestroy(&boxa3); | |
| 338 boxaDestroy(&boxa4); | |
| 339 } | |
| 340 } | |
| 341 } | |
| 342 boxaDestroy(&boxa1); | |
| 343 if (pixa) { /* debug */ | |
| 344 pix3 = pixaDisplayTiledInColumns(pixa, 1, 1.0, 20, 2); | |
| 345 snprintf(buf, sizeof(buf), "/tmp/lept/recog/decode-%d.png", ind++); | |
| 346 pixWrite(buf, pix3, IFF_PNG); | |
| 347 pixaDestroy(&pixa); | |
| 348 pixDestroy(&pix3); | |
| 349 } | |
| 350 | |
| 351 /* Do a 2D sort on the bounding boxes, and flatten the result to 1D. | |
| 352 * For the 2D sort, to add a box to an existing boxa, we require | |
| 353 * specified minimum vertical overlaps for the first two passes | |
| 354 * of the 2D sort. In pass 1, only components with sufficient | |
| 355 * height can start a new boxa. */ | |
| 356 baa = boxaSort2d(boxa2, NULL, MinOverlap1, MinOverlap2, MinHeightPass1); | |
| 357 boxa3 = boxaaFlattenToBoxa(baa, NULL, L_CLONE); | |
| 358 boxaaDestroy(&baa); | |
| 359 boxaDestroy(&boxa2); | |
| 360 | |
| 361 /* Remove smaller components of overlapping pairs. | |
| 362 * We only remove the small component if the overlap is | |
| 363 * at least half its area and if its area is no more | |
| 364 * than 30% of the area of the large component. Because the | |
| 365 * components are in a flattened 2D sort, we don't need to | |
| 366 * look far ahead in the array to find all overlapping boxes; | |
| 367 * 10 boxes is plenty. */ | |
| 368 boxad = boxaHandleOverlaps(boxa3, L_COMBINE, 10, 0.5f, 0.3f, NULL); | |
| 369 boxaDestroy(&boxa3); | |
| 370 | |
| 371 /* Extract and save the image pieces from the input image. */ | |
| 372 *ppixa = pixClipRectangles(pixs, boxad); | |
| 373 *pboxa = boxad; | |
| 374 return 0; | |
| 375 } | |
| 376 | |
| 377 | |
| 378 /*------------------------------------------------------------------------* | |
| 379 * Greedy character splitting * | |
| 380 *------------------------------------------------------------------------*/ | |
| 381 /*! | |
| 382 * \brief recogCorrelationBestRow() | |
| 383 * | |
| 384 * \param[in] recog with LUT's pre-computed | |
| 385 * \param[in] pixs typically of multiple touching characters, 1 bpp | |
| 386 * \param[out] pboxa bounding boxs of best fit character | |
| 387 * \param[out] pnascore [optional] correlation scores | |
| 388 * \param[out] pnaindex [optional] indices of classes | |
| 389 * \param[out] psachar [optional] array of character strings | |
| 390 * \param[in] debug 1 for results written to pixadb_split | |
| 391 * \return 0 if OK, 1 on error | |
| 392 * | |
| 393 * <pre> | |
| 394 * Notes: | |
| 395 * (1) Supervises character matching for (in general) a c.c with | |
| 396 * multiple touching characters. Finds the best match greedily. | |
| 397 * Rejects small parts that are left over after splitting. | |
| 398 * (2) Matching is to the average, and without character scaling. | |
| 399 * </pre> | |
| 400 */ | |
| 401 l_ok | |
| 402 recogCorrelationBestRow(L_RECOG *recog, | |
| 403 PIX *pixs, | |
| 404 BOXA **pboxa, | |
| 405 NUMA **pnascore, | |
| 406 NUMA **pnaindex, | |
| 407 SARRAY **psachar, | |
| 408 l_int32 debug) | |
| 409 { | |
| 410 char *charstr; | |
| 411 l_int32 index, remove, w, h, bx, bw, bxc, bwc, w1, w2, w3; | |
| 412 l_float32 score; | |
| 413 BOX *box, *boxc, *boxtrans, *boxl, *boxr, *boxlt, *boxrt; | |
| 414 BOXA *boxat; | |
| 415 NUMA *nascoret, *naindext, *nasort; | |
| 416 PIX *pixb, *pixc, *pixl, *pixr, *pixdb, *pixd; | |
| 417 PIXA *pixar, *pixadb; | |
| 418 SARRAY *sachart; | |
| 419 | |
| 420 l_int32 iter; | |
| 421 | |
| 422 if (pnascore) *pnascore = NULL; | |
| 423 if (pnaindex) *pnaindex = NULL; | |
| 424 if (psachar) *psachar = NULL; | |
| 425 if (!pboxa) | |
| 426 return ERROR_INT("&boxa not defined", __func__, 1); | |
| 427 *pboxa = NULL; | |
| 428 if (!recog) | |
| 429 return ERROR_INT("recog not defined", __func__, 1); | |
| 430 if (!pixs || pixGetDepth(pixs) != 1) | |
| 431 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 432 if (pixGetWidth(pixs) < recog->minwidth_u - 4) | |
| 433 return ERROR_INT("pixs too narrow", __func__, 1); | |
| 434 if (!recog->train_done) | |
| 435 return ERROR_INT("training not finished", __func__, 1); | |
| 436 | |
| 437 /* Binarize and crop to foreground if necessary */ | |
| 438 pixb = recogProcessToIdentify(recog, pixs, 0); | |
| 439 | |
| 440 /* Initialize the arrays */ | |
| 441 boxat = boxaCreate(4); | |
| 442 nascoret = numaCreate(4); | |
| 443 naindext = numaCreate(4); | |
| 444 sachart = sarrayCreate(4); | |
| 445 pixadb = (debug) ? pixaCreate(4) : NULL; | |
| 446 | |
| 447 /* Initialize the images remaining to be processed with the input. | |
| 448 * These are stored in pixar, which is used here as a queue, | |
| 449 * on which we only put image fragments that are large enough to | |
| 450 * contain at least one character. */ | |
| 451 pixar = pixaCreate(1); | |
| 452 pixGetDimensions(pixb, &w, &h, NULL); | |
| 453 box = boxCreate(0, 0, w, h); | |
| 454 pixaAddPix(pixar, pixb, L_INSERT); | |
| 455 pixaAddBox(pixar, box, L_INSERT); | |
| 456 | |
| 457 /* Successively split on the best match until nothing is left. | |
| 458 * To be safe, we limit the search to 10 characters. */ | |
| 459 for (iter = 0; iter < 11; iter++) { | |
| 460 if (pixaGetCount(pixar) == 0) | |
| 461 break; | |
| 462 if (iter == 10) { | |
| 463 L_WARNING("more than 10 chars; ending search\n", __func__); | |
| 464 break; | |
| 465 } | |
| 466 | |
| 467 /* Pop one from the queue */ | |
| 468 pixaRemovePixAndSave(pixar, 0, &pixc, &boxc); | |
| 469 boxGetGeometry(boxc, &bxc, NULL, &bwc, NULL); | |
| 470 | |
| 471 /* This is a single component; if noise, remove it */ | |
| 472 recogSplittingFilter(recog, pixc, 0, MinFillFactor, &remove, debug); | |
| 473 if (debug) | |
| 474 lept_stderr("iter = %d, removed = %d\n", iter, remove); | |
| 475 if (remove) { | |
| 476 pixDestroy(&pixc); | |
| 477 boxDestroy(&boxc); | |
| 478 continue; | |
| 479 } | |
| 480 | |
| 481 /* Find the best character match */ | |
| 482 if (debug) { | |
| 483 recogCorrelationBestChar(recog, pixc, &box, &score, | |
| 484 &index, &charstr, &pixdb); | |
| 485 pixaAddPix(pixadb, pixdb, L_INSERT); | |
| 486 } else { | |
| 487 recogCorrelationBestChar(recog, pixc, &box, &score, | |
| 488 &index, &charstr, NULL); | |
| 489 } | |
| 490 | |
| 491 /* Find the box in original coordinates, and append | |
| 492 * the results to the arrays. */ | |
| 493 boxtrans = boxTransform(box, bxc, 0, 1.0, 1.0); | |
| 494 boxaAddBox(boxat, boxtrans, L_INSERT); | |
| 495 numaAddNumber(nascoret, score); | |
| 496 numaAddNumber(naindext, index); | |
| 497 sarrayAddString(sachart, charstr, L_INSERT); | |
| 498 | |
| 499 /* Split the current pixc into three regions and save | |
| 500 * each region if it is large enough. */ | |
| 501 boxGetGeometry(box, &bx, NULL, &bw, NULL); | |
| 502 w1 = bx; | |
| 503 w2 = bw; | |
| 504 w3 = bwc - bx - bw; | |
| 505 if (debug) | |
| 506 lept_stderr(" w1 = %d, w2 = %d, w3 = %d\n", w1, w2, w3); | |
| 507 if (w1 < recog->minwidth_u - 4) { | |
| 508 if (debug) L_INFO("discarding width %d on left\n", __func__, w1); | |
| 509 } else { /* extract and save left region */ | |
| 510 boxl = boxCreate(0, 0, bx + 1, h); | |
| 511 pixl = pixClipRectangle(pixc, boxl, NULL); | |
| 512 boxlt = boxTransform(boxl, bxc, 0, 1.0, 1.0); | |
| 513 pixaAddPix(pixar, pixl, L_INSERT); | |
| 514 pixaAddBox(pixar, boxlt, L_INSERT); | |
| 515 boxDestroy(&boxl); | |
| 516 } | |
| 517 if (w3 < recog->minwidth_u - 4) { | |
| 518 if (debug) L_INFO("discarding width %d on right\n", __func__, w3); | |
| 519 } else { /* extract and save left region */ | |
| 520 boxr = boxCreate(bx + bw - 1, 0, w3 + 1, h); | |
| 521 pixr = pixClipRectangle(pixc, boxr, NULL); | |
| 522 boxrt = boxTransform(boxr, bxc, 0, 1.0, 1.0); | |
| 523 pixaAddPix(pixar, pixr, L_INSERT); | |
| 524 pixaAddBox(pixar, boxrt, L_INSERT); | |
| 525 boxDestroy(&boxr); | |
| 526 } | |
| 527 pixDestroy(&pixc); | |
| 528 boxDestroy(&box); | |
| 529 boxDestroy(&boxc); | |
| 530 } | |
| 531 pixaDestroy(&pixar); | |
| 532 | |
| 533 | |
| 534 /* Sort the output results by left-to-right in the boxa */ | |
| 535 *pboxa = boxaSort(boxat, L_SORT_BY_X, L_SORT_INCREASING, &nasort); | |
| 536 if (pnascore) | |
| 537 *pnascore = numaSortByIndex(nascoret, nasort); | |
| 538 if (pnaindex) | |
| 539 *pnaindex = numaSortByIndex(naindext, nasort); | |
| 540 if (psachar) | |
| 541 *psachar = sarraySortByIndex(sachart, nasort); | |
| 542 numaDestroy(&nasort); | |
| 543 boxaDestroy(&boxat); | |
| 544 numaDestroy(&nascoret); | |
| 545 numaDestroy(&naindext); | |
| 546 sarrayDestroy(&sachart); | |
| 547 | |
| 548 /* Final debug output */ | |
| 549 if (debug) { | |
| 550 pixd = pixaDisplayTiledInRows(pixadb, 32, 2000, 1.0, 0, 15, 2); | |
| 551 pixDisplay(pixd, 400, 400); | |
| 552 pixaAddPix(recog->pixadb_split, pixd, L_INSERT); | |
| 553 pixaDestroy(&pixadb); | |
| 554 } | |
| 555 return 0; | |
| 556 } | |
| 557 | |
| 558 | |
| 559 /*! | |
| 560 * \brief recogCorrelationBestChar() | |
| 561 * | |
| 562 * \param[in] recog with LUT's pre-computed | |
| 563 * \param[in] pixs can be of multiple touching characters, 1 bpp | |
| 564 * \param[out] pbox bounding box of best fit character | |
| 565 * \param[out] pscore correlation score | |
| 566 * \param[out] pindex [optional] index of class | |
| 567 * \param[out] pcharstr [optional] character string of class | |
| 568 * \param[out] ppixdb [optional] debug pix showing input and best fit | |
| 569 * \return 0 if OK, 1 on error | |
| 570 * | |
| 571 * <pre> | |
| 572 * Notes: | |
| 573 * (1) Basic matching character splitter. Finds the best match among | |
| 574 * all templates to some region of the image. This can result | |
| 575 * in splitting the image into two parts. This is "image decoding" | |
| 576 * without dynamic programming, because we don't use a setwidth | |
| 577 * and compute the best matching score for the entire image. | |
| 578 * (2) Matching is to the average templates, without character scaling. | |
| 579 * </pre> | |
| 580 */ | |
| 581 l_ok | |
| 582 recogCorrelationBestChar(L_RECOG *recog, | |
| 583 PIX *pixs, | |
| 584 BOX **pbox, | |
| 585 l_float32 *pscore, | |
| 586 l_int32 *pindex, | |
| 587 char **pcharstr, | |
| 588 PIX **ppixdb) | |
| 589 { | |
| 590 l_int32 i, n, w1, h1, w2, area2, ycent2, delx, dely; | |
| 591 l_int32 bestdelx, bestdely, bestindex; | |
| 592 l_float32 score, bestscore; | |
| 593 BOX *box; | |
| 594 BOXA *boxa; | |
| 595 NUMA *nasum, *namoment; | |
| 596 PIX *pix1, *pix2; | |
| 597 | |
| 598 if (pindex) *pindex = 0; | |
| 599 if (pcharstr) *pcharstr = NULL; | |
| 600 if (ppixdb) *ppixdb = NULL; | |
| 601 if (pbox) *pbox = NULL; | |
| 602 if (pscore) *pscore = 0.0; | |
| 603 if (!pbox || !pscore) | |
| 604 return ERROR_INT("&box and &score not both defined", __func__, 1); | |
| 605 if (!recog) | |
| 606 return ERROR_INT("recog not defined", __func__, 1); | |
| 607 if (!pixs || pixGetDepth(pixs) != 1) | |
| 608 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 609 if (!recog->train_done) | |
| 610 return ERROR_INT("training not finished", __func__, 1); | |
| 611 | |
| 612 /* Binarize and crop to foreground if necessary. Add padding | |
| 613 * to both the left and right side; this is compensated for | |
| 614 * when reporting the bounding box of the best matched character. */ | |
| 615 pix1 = recogProcessToIdentify(recog, pixs, LeftRightPadding); | |
| 616 pixGetDimensions(pix1, &w1, &h1, NULL); | |
| 617 | |
| 618 /* Compute vertical sum and moment arrays */ | |
| 619 nasum = pixCountPixelsByColumn(pix1); | |
| 620 namoment = pixGetMomentByColumn(pix1, 1); | |
| 621 | |
| 622 /* Do shifted correlation against all averaged templates. */ | |
| 623 n = recog->setsize; | |
| 624 boxa = boxaCreate(n); /* location of best fits for each character */ | |
| 625 bestscore = 0.0; | |
| 626 bestindex = bestdelx = bestdely = 0; | |
| 627 for (i = 0; i < n; i++) { | |
| 628 pix2 = pixaGetPix(recog->pixa_u, i, L_CLONE); | |
| 629 w2 = pixGetWidth(pix2); | |
| 630 /* Note that the slightly expended w1 is typically larger | |
| 631 * than w2 (the template). */ | |
| 632 if (w1 >= w2) { | |
| 633 numaGetIValue(recog->nasum_u, i, &area2); | |
| 634 ptaGetIPt(recog->pta_u, i, NULL, &ycent2); | |
| 635 pixCorrelationBestShift(pix1, pix2, nasum, namoment, area2, ycent2, | |
| 636 recog->maxyshift, recog->sumtab, &delx, | |
| 637 &dely, &score, 1); | |
| 638 if (ppixdb) { | |
| 639 lept_stderr( | |
| 640 "Best match template %d: (x,y) = (%d,%d), score = %5.3f\n", | |
| 641 i, delx, dely, score); | |
| 642 } | |
| 643 /* Compensate for padding */ | |
| 644 box = boxCreate(delx - LeftRightPadding, 0, w2, h1); | |
| 645 if (score > bestscore) { | |
| 646 bestscore = score; | |
| 647 bestdelx = delx - LeftRightPadding; | |
| 648 bestdely = dely; | |
| 649 bestindex = i; | |
| 650 } | |
| 651 } else { | |
| 652 box = boxCreate(0, 0, 1, 1); /* placeholder */ | |
| 653 if (ppixdb) | |
| 654 lept_stderr("Component too thin: w1 = %d, w2 = %d\n", w1, w2); | |
| 655 } | |
| 656 boxaAddBox(boxa, box, L_INSERT); | |
| 657 pixDestroy(&pix2); | |
| 658 } | |
| 659 | |
| 660 *pscore = bestscore; | |
| 661 *pbox = boxaGetBox(boxa, bestindex, L_COPY); | |
| 662 if (pindex) *pindex = bestindex; | |
| 663 if (pcharstr) | |
| 664 recogGetClassString(recog, bestindex, pcharstr); | |
| 665 | |
| 666 if (ppixdb) { | |
| 667 L_INFO("Best match: class %d; shifts (%d, %d)\n", | |
| 668 __func__, bestindex, bestdelx, bestdely); | |
| 669 pix2 = pixaGetPix(recog->pixa_u, bestindex, L_CLONE); | |
| 670 *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0); | |
| 671 pixDestroy(&pix2); | |
| 672 } | |
| 673 | |
| 674 pixDestroy(&pix1); | |
| 675 boxaDestroy(&boxa); | |
| 676 numaDestroy(&nasum); | |
| 677 numaDestroy(&namoment); | |
| 678 return 0; | |
| 679 } | |
| 680 | |
| 681 | |
| 682 /*! | |
| 683 * \brief pixCorrelationBestShift() | |
| 684 * | |
| 685 * \param[in] pix1 1 bpp, the unknown image; typically larger | |
| 686 * \param[in] pix2 1 bpp, the matching template image) | |
| 687 * \param[in] nasum1 vertical column pixel sums for pix1 | |
| 688 * \param[in] namoment1 vertical column first moment of pixels for pix1 | |
| 689 * \param[in] area2 number of on pixels in pix2 | |
| 690 * \param[in] ycent2 y component of centroid of pix2 | |
| 691 * \param[in] maxyshift max y shift of pix2 around the location where | |
| 692 * the centroids of pix2 and a windowed part of pix1 | |
| 693 * are vertically aligned | |
| 694 * \param[in] tab8 [optional] sum tab for ON pixels in byte; | |
| 695 * can be NULL | |
| 696 * \param[out] pdelx [optional] best x shift of pix2 relative to pix1 | |
| 697 * \param[out] pdely [optional] best y shift of pix2 relative to pix1 | |
| 698 * \param[out] pscore [optional] maximum score found; can be NULL | |
| 699 * \param[in] debugflag <= 0 to skip; positive to generate output; | |
| 700 * the integer is used to label the debug image. | |
| 701 * \return 0 if OK, 1 on error | |
| 702 * | |
| 703 * <pre> | |
| 704 * Notes: | |
| 705 * (1) This maximizes the correlation score between two 1 bpp images, | |
| 706 * one of which is typically wider. In a typical example, | |
| 707 * pix1 is a bitmap of 2 or more touching characters and pix2 is | |
| 708 * a single character template. This finds the location of pix2 | |
| 709 * that gives the largest correlation. | |
| 710 * (2) The windowed area of fg pixels and windowed first moment | |
| 711 * in the y direction are computed from the input sum and moment | |
| 712 * column arrays, %nasum1 and %namoment1 | |
| 713 * (3) This is a brute force operation. We compute the correlation | |
| 714 * at every x shift for which pix2 fits entirely within pix1, | |
| 715 * and where the centroid of pix2 is aligned, within +-maxyshift, | |
| 716 * with the centroid of a window of pix1 of the same width. | |
| 717 * The correlation is taken over the full height of pix1. | |
| 718 * This can be made more efficient. | |
| 719 * </pre> | |
| 720 */ | |
| 721 static l_int32 | |
| 722 pixCorrelationBestShift(PIX *pix1, | |
| 723 PIX *pix2, | |
| 724 NUMA *nasum1, | |
| 725 NUMA *namoment1, | |
| 726 l_int32 area2, | |
| 727 l_int32 ycent2, | |
| 728 l_int32 maxyshift, | |
| 729 l_int32 *tab8, | |
| 730 l_int32 *pdelx, | |
| 731 l_int32 *pdely, | |
| 732 l_float32 *pscore, | |
| 733 l_int32 debugflag) | |
| 734 { | |
| 735 l_int32 w1, w2, h1, h2, i, j, nx, shifty, delx, dely; | |
| 736 l_int32 sum, moment, count; | |
| 737 l_int32 *tab, *area1, *arraysum, *arraymoment; | |
| 738 l_float32 maxscore, score; | |
| 739 l_float32 *ycent1; | |
| 740 FPIX *fpix = NULL; | |
| 741 PIX *pixt, *pixt1, *pixt2; | |
| 742 | |
| 743 if (pdelx) *pdelx = 0; | |
| 744 if (pdely) *pdely = 0; | |
| 745 if (pscore) *pscore = 0.0; | |
| 746 if (!pix1 || pixGetDepth(pix1) != 1) | |
| 747 return ERROR_INT("pix1 not defined or not 1 bpp", __func__, 1); | |
| 748 if (!pix2 || pixGetDepth(pix2) != 1) | |
| 749 return ERROR_INT("pix2 not defined or not 1 bpp", __func__, 1); | |
| 750 if (!nasum1 || !namoment1) | |
| 751 return ERROR_INT("nasum1 and namoment1 not both defined", __func__, 1); | |
| 752 if (area2 <= 0 || ycent2 <= 0) | |
| 753 return ERROR_INT("area2 and ycent2 must be > 0", __func__, 1); | |
| 754 | |
| 755 /* If pix1 (the unknown image) is narrower than pix2, | |
| 756 * don't bother to try the match. pix1 is already padded with | |
| 757 * 2 pixels on each side. */ | |
| 758 pixGetDimensions(pix1, &w1, &h1, NULL); | |
| 759 pixGetDimensions(pix2, &w2, &h2, NULL); | |
| 760 if (w1 < w2) { | |
| 761 if (debugflag > 0) { | |
| 762 L_INFO("skipping match with w1 = %d and w2 = %d\n", | |
| 763 __func__, w1, w2); | |
| 764 } | |
| 765 return 0; | |
| 766 } | |
| 767 nx = w1 - w2 + 1; | |
| 768 | |
| 769 if (debugflag > 0) | |
| 770 fpix = fpixCreate(nx, 2 * maxyshift + 1); | |
| 771 if (!tab8) | |
| 772 tab = makePixelSumTab8(); | |
| 773 else | |
| 774 tab = tab8; | |
| 775 | |
| 776 /* Set up the arrays for area1 and ycent1. We have to do this | |
| 777 * for each template (pix2) because the window width is w2. */ | |
| 778 area1 = (l_int32 *)LEPT_CALLOC(nx, sizeof(l_int32)); | |
| 779 ycent1 = (l_float32 *)LEPT_CALLOC(nx, sizeof(l_int32)); | |
| 780 arraysum = numaGetIArray(nasum1); | |
| 781 arraymoment = numaGetIArray(namoment1); | |
| 782 for (i = 0, sum = 0, moment = 0; i < w2; i++) { | |
| 783 sum += arraysum[i]; | |
| 784 moment += arraymoment[i]; | |
| 785 } | |
| 786 for (i = 0; i < nx - 1; i++) { | |
| 787 area1[i] = sum; | |
| 788 ycent1[i] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; | |
| 789 sum += arraysum[w2 + i] - arraysum[i]; | |
| 790 moment += arraymoment[w2 + i] - arraymoment[i]; | |
| 791 } | |
| 792 area1[nx - 1] = sum; | |
| 793 ycent1[nx - 1] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; | |
| 794 | |
| 795 /* Find the best match location for pix2. At each location, | |
| 796 * to insure that pixels are ON only within the intersection of | |
| 797 * pix and the shifted pix2: | |
| 798 * (1) Start with pixt cleared and equal in size to pix1. | |
| 799 * (2) Blit the shifted pix2 onto pixt. Then all ON pixels | |
| 800 * are within the intersection of pix1 and the shifted pix2. | |
| 801 * (3) AND pix1 with pixt. */ | |
| 802 pixt = pixCreate(w2, h1, 1); | |
| 803 maxscore = 0; | |
| 804 delx = 0; | |
| 805 dely = 0; /* amount to shift pix2 relative to pix1 to get alignment */ | |
| 806 for (i = 0; i < nx; i++) { | |
| 807 shifty = (l_int32)(ycent1[i] - ycent2 + 0.5); | |
| 808 for (j = -maxyshift; j <= maxyshift; j++) { | |
| 809 pixClearAll(pixt); | |
| 810 pixRasterop(pixt, 0, shifty + j, w2, h2, PIX_SRC, pix2, 0, 0); | |
| 811 pixRasterop(pixt, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, i, 0); | |
| 812 pixCountPixels(pixt, &count, tab); | |
| 813 score = (l_float32)count * (l_float32)count / | |
| 814 ((l_float32)area1[i] * (l_float32)area2); | |
| 815 if (score > maxscore) { | |
| 816 maxscore = score; | |
| 817 delx = i; | |
| 818 dely = shifty + j; | |
| 819 } | |
| 820 | |
| 821 if (debugflag > 0) | |
| 822 fpixSetPixel(fpix, i, maxyshift + j, 1000.0 * score); | |
| 823 } | |
| 824 } | |
| 825 | |
| 826 if (debugflag > 0) { | |
| 827 char buf[128]; | |
| 828 lept_mkdir("lept/recog"); | |
| 829 pixt1 = fpixDisplayMaxDynamicRange(fpix); | |
| 830 pixt2 = pixExpandReplicate(pixt1, 5); | |
| 831 snprintf(buf, sizeof(buf), "/tmp/lept/recog/junkbs_%d.png", debugflag); | |
| 832 pixWrite(buf, pixt2, IFF_PNG); | |
| 833 pixDestroy(&pixt1); | |
| 834 pixDestroy(&pixt2); | |
| 835 fpixDestroy(&fpix); | |
| 836 } | |
| 837 | |
| 838 if (pdelx) *pdelx = delx; | |
| 839 if (pdely) *pdely = dely; | |
| 840 if (pscore) *pscore = maxscore; | |
| 841 if (!tab8) LEPT_FREE(tab); | |
| 842 LEPT_FREE(area1); | |
| 843 LEPT_FREE(ycent1); | |
| 844 LEPT_FREE(arraysum); | |
| 845 LEPT_FREE(arraymoment); | |
| 846 pixDestroy(&pixt); | |
| 847 return 0; | |
| 848 } | |
| 849 | |
| 850 | |
| 851 /*------------------------------------------------------------------------* | |
| 852 * Low-level identification * | |
| 853 *------------------------------------------------------------------------*/ | |
| 854 /*! | |
| 855 * \brief recogIdentifyPixa() | |
| 856 * | |
| 857 * \param[in] recog | |
| 858 * \param[in] pixa of 1 bpp images to match | |
| 859 * \param[out] ppixdb [optional] pix showing inputs and best fits | |
| 860 * \return 0 if OK, 1 on error | |
| 861 * | |
| 862 * <pre> | |
| 863 * Notes: | |
| 864 * (1) This should be called by recogIdentifyMuliple(), which | |
| 865 * binarizes and splits characters before sending %pixa here. | |
| 866 * (2) This calls recogIdentifyPix(), which does the same operation | |
| 867 * on each pix in %pixa, and optionally returns the arrays | |
| 868 * of results (scores, class index and character string) | |
| 869 * for the best correlation match. | |
| 870 * </pre> | |
| 871 */ | |
| 872 l_ok | |
| 873 recogIdentifyPixa(L_RECOG *recog, | |
| 874 PIXA *pixa, | |
| 875 PIX **ppixdb) | |
| 876 { | |
| 877 char *text; | |
| 878 l_int32 i, n, fail, index, depth; | |
| 879 l_float32 score; | |
| 880 PIX *pix1, *pix2, *pix3; | |
| 881 PIXA *pixa1; | |
| 882 L_RCH *rch; | |
| 883 | |
| 884 if (ppixdb) *ppixdb = NULL; | |
| 885 if (!recog) | |
| 886 return ERROR_INT("recog not defined", __func__, 1); | |
| 887 if (!pixa) | |
| 888 return ERROR_INT("pixa not defined", __func__, 1); | |
| 889 | |
| 890 /* Run the recognizer on the set of images. This writes | |
| 891 * the text string into each pix in pixa. */ | |
| 892 n = pixaGetCount(pixa); | |
| 893 rchaDestroy(&recog->rcha); | |
| 894 recog->rcha = rchaCreate(); | |
| 895 pixa1 = (ppixdb) ? pixaCreate(n) : NULL; | |
| 896 depth = 1; | |
| 897 for (i = 0; i < n; i++) { | |
| 898 pix1 = pixaGetPix(pixa, i, L_CLONE); | |
| 899 pix2 = NULL; | |
| 900 fail = FALSE; | |
| 901 if (!ppixdb) | |
| 902 fail = recogIdentifyPix(recog, pix1, NULL); | |
| 903 else | |
| 904 fail = recogIdentifyPix(recog, pix1, &pix2); | |
| 905 if (fail) | |
| 906 recogSkipIdentify(recog); | |
| 907 if ((rch = recog->rch) == NULL) { | |
| 908 L_ERROR("rch not found for char %d\n", __func__, i); | |
| 909 pixDestroy(&pix1); | |
| 910 pixDestroy(&pix2); | |
| 911 continue; | |
| 912 } | |
| 913 rchExtract(rch, NULL, NULL, &text, NULL, NULL, NULL, NULL); | |
| 914 pixSetText(pix1, text); | |
| 915 LEPT_FREE(text); | |
| 916 if (ppixdb) { | |
| 917 rchExtract(rch, &index, &score, NULL, NULL, NULL, NULL, NULL); | |
| 918 pix3 = recogShowMatch(recog, pix2, NULL, NULL, index, score); | |
| 919 if (i == 0) depth = pixGetDepth(pix3); | |
| 920 pixaAddPix(pixa1, pix3, L_INSERT); | |
| 921 pixDestroy(&pix2); | |
| 922 } | |
| 923 transferRchToRcha(rch, recog->rcha); | |
| 924 pixDestroy(&pix1); | |
| 925 } | |
| 926 | |
| 927 /* Package the images for debug */ | |
| 928 if (ppixdb) { | |
| 929 *ppixdb = pixaDisplayTiledInRows(pixa1, depth, 2500, 1.0, 0, 20, 1); | |
| 930 pixaDestroy(&pixa1); | |
| 931 } | |
| 932 | |
| 933 return 0; | |
| 934 } | |
| 935 | |
| 936 | |
| 937 /*! | |
| 938 * \brief recogIdentifyPix() | |
| 939 * | |
| 940 * \param[in] recog with LUT's pre-computed | |
| 941 * \param[in] pixs of a single character, 1 bpp | |
| 942 * \param[out] ppixdb [optional] debug pix showing input and best fit | |
| 943 * \return 0 if OK, 1 on error | |
| 944 * | |
| 945 * <pre> | |
| 946 * Notes: | |
| 947 * (1) Basic recognition function for a single character. | |
| 948 * (2) If templ_use == L_USE_ALL_TEMPLATES, which is the default | |
| 949 * situation, matching is attempted to every bitmap in the recog, | |
| 950 * and the identify of the best match is returned. | |
| 951 * (3) For finding outliers, templ_use == L_USE_AVERAGE_TEMPLATES, and | |
| 952 * matching is only attemplted to the averaged bitmaps. For this | |
| 953 * case, the index of the bestsample is meaningless (0 is returned | |
| 954 * if requested). | |
| 955 * (4) The score is related to the confidence (probability of correct | |
| 956 * identification), in that a higher score is correlated with | |
| 957 * a higher probability. However, the actual relation between | |
| 958 * the correlation (score) and the probability is not known; | |
| 959 * we call this a "score" because "confidence" can be misinterpreted | |
| 960 * as an actual probability. | |
| 961 * </pre> | |
| 962 */ | |
| 963 l_ok | |
| 964 recogIdentifyPix(L_RECOG *recog, | |
| 965 PIX *pixs, | |
| 966 PIX **ppixdb) | |
| 967 { | |
| 968 char *text; | |
| 969 l_int32 i, j, n, bestindex, bestsample, area1, area2, ret; | |
| 970 l_int32 shiftx, shifty, bestdelx, bestdely, bestwidth, maxyshift; | |
| 971 l_float32 x1, y1, x2, y2, delx, dely, score, maxscore; | |
| 972 NUMA *numa; | |
| 973 PIX *pix0, *pix1, *pix2; | |
| 974 PIXA *pixa; | |
| 975 PTA *pta; | |
| 976 | |
| 977 if (ppixdb) *ppixdb = NULL; | |
| 978 if (!recog) | |
| 979 return ERROR_INT("recog not defined", __func__, 1); | |
| 980 if (!pixs || pixGetDepth(pixs) != 1) | |
| 981 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 982 | |
| 983 /* Do the averaging if required and not yet done. */ | |
| 984 if (recog->templ_use == L_USE_AVERAGE_TEMPLATES && !recog->ave_done) { | |
| 985 ret = recogAverageSamples(recog, 0); | |
| 986 if (ret) | |
| 987 return ERROR_INT("averaging failed", __func__, 1); | |
| 988 } | |
| 989 | |
| 990 /* Binarize and crop to foreground if necessary */ | |
| 991 if ((pix0 = recogProcessToIdentify(recog, pixs, 0)) == NULL) | |
| 992 return ERROR_INT("no fg pixels in pix0", __func__, 1); | |
| 993 | |
| 994 /* Optionally scale and/or convert to fixed stroke width */ | |
| 995 pix1 = recogModifyTemplate(recog, pix0); | |
| 996 pixDestroy(&pix0); | |
| 997 if (!pix1) | |
| 998 return ERROR_INT("no fg pixels in pix1", __func__, 1); | |
| 999 | |
| 1000 /* Do correlation at all positions within +-maxyshift of | |
| 1001 * the nominal centroid alignment. */ | |
| 1002 pixCountPixels(pix1, &area1, recog->sumtab); | |
| 1003 pixCentroid(pix1, recog->centtab, recog->sumtab, &x1, &y1); | |
| 1004 bestindex = bestsample = bestdelx = bestdely = bestwidth = 0; | |
| 1005 maxscore = 0.0; | |
| 1006 maxyshift = recog->maxyshift; | |
| 1007 if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) { | |
| 1008 for (i = 0; i < recog->setsize; i++) { | |
| 1009 numaGetIValue(recog->nasum, i, &area2); | |
| 1010 if (area2 == 0) continue; /* no template available */ | |
| 1011 pix2 = pixaGetPix(recog->pixa, i, L_CLONE); | |
| 1012 ptaGetPt(recog->pta, i, &x2, &y2); | |
| 1013 delx = x1 - x2; | |
| 1014 dely = y1 - y2; | |
| 1015 for (shifty = -maxyshift; shifty <= maxyshift; shifty++) { | |
| 1016 for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) { | |
| 1017 pixCorrelationScoreSimple(pix1, pix2, area1, area2, | |
| 1018 delx + shiftx, dely + shifty, | |
| 1019 5, 5, recog->sumtab, &score); | |
| 1020 if (score > maxscore) { | |
| 1021 bestindex = i; | |
| 1022 bestdelx = delx + shiftx; | |
| 1023 bestdely = dely + shifty; | |
| 1024 maxscore = score; | |
| 1025 } | |
| 1026 } | |
| 1027 } | |
| 1028 pixDestroy(&pix2); | |
| 1029 } | |
| 1030 } else { /* use all the samples */ | |
| 1031 for (i = 0; i < recog->setsize; i++) { | |
| 1032 pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE); | |
| 1033 n = pixaGetCount(pixa); | |
| 1034 if (n == 0) { | |
| 1035 pixaDestroy(&pixa); | |
| 1036 continue; | |
| 1037 } | |
| 1038 numa = numaaGetNuma(recog->naasum, i, L_CLONE); | |
| 1039 pta = ptaaGetPta(recog->ptaa, i, L_CLONE); | |
| 1040 for (j = 0; j < n; j++) { | |
| 1041 pix2 = pixaGetPix(pixa, j, L_CLONE); | |
| 1042 numaGetIValue(numa, j, &area2); | |
| 1043 ptaGetPt(pta, j, &x2, &y2); | |
| 1044 delx = x1 - x2; | |
| 1045 dely = y1 - y2; | |
| 1046 for (shifty = -maxyshift; shifty <= maxyshift; shifty++) { | |
| 1047 for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) { | |
| 1048 pixCorrelationScoreSimple(pix1, pix2, area1, area2, | |
| 1049 delx + shiftx, dely + shifty, | |
| 1050 5, 5, recog->sumtab, &score); | |
| 1051 if (score > maxscore) { | |
| 1052 bestindex = i; | |
| 1053 bestsample = j; | |
| 1054 bestdelx = delx + shiftx; | |
| 1055 bestdely = dely + shifty; | |
| 1056 maxscore = score; | |
| 1057 bestwidth = pixGetWidth(pix2); | |
| 1058 } | |
| 1059 } | |
| 1060 } | |
| 1061 pixDestroy(&pix2); | |
| 1062 } | |
| 1063 pixaDestroy(&pixa); | |
| 1064 numaDestroy(&numa); | |
| 1065 ptaDestroy(&pta); | |
| 1066 } | |
| 1067 } | |
| 1068 | |
| 1069 /* Package up the results */ | |
| 1070 recogGetClassString(recog, bestindex, &text); | |
| 1071 rchDestroy(&recog->rch); | |
| 1072 recog->rch = rchCreate(bestindex, maxscore, text, bestsample, | |
| 1073 bestdelx, bestdely, bestwidth); | |
| 1074 | |
| 1075 if (ppixdb) { | |
| 1076 if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) { | |
| 1077 L_INFO("Best match: str %s; class %d; sh (%d, %d); score %5.3f\n", | |
| 1078 __func__, text, bestindex, bestdelx, bestdely, maxscore); | |
| 1079 pix2 = pixaGetPix(recog->pixa, bestindex, L_CLONE); | |
| 1080 } else { /* L_USE_ALL_TEMPLATES */ | |
| 1081 L_INFO("Best match: str %s; sample %d in class %d; score %5.3f\n", | |
| 1082 __func__, text, bestsample, bestindex, maxscore); | |
| 1083 if (maxyshift > 0 && (L_ABS(bestdelx) > 0 || L_ABS(bestdely) > 0)) { | |
| 1084 L_INFO(" Best shift: (%d, %d)\n", | |
| 1085 __func__, bestdelx, bestdely); | |
| 1086 } | |
| 1087 pix2 = pixaaGetPix(recog->pixaa, bestindex, bestsample, L_CLONE); | |
| 1088 } | |
| 1089 *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0); | |
| 1090 pixDestroy(&pix2); | |
| 1091 } | |
| 1092 | |
| 1093 pixDestroy(&pix1); | |
| 1094 return 0; | |
| 1095 } | |
| 1096 | |
| 1097 | |
| 1098 /*! | |
| 1099 * \brief recogSkipIdentify() | |
| 1100 * | |
| 1101 * \param[in] recog | |
| 1102 * \return 0 if OK, 1 on error | |
| 1103 * | |
| 1104 * <pre> | |
| 1105 * Notes: | |
| 1106 * (1) This just writes a "dummy" result with 0 score and empty | |
| 1107 * string id into the rch. | |
| 1108 * </pre> | |
| 1109 */ | |
| 1110 l_ok | |
| 1111 recogSkipIdentify(L_RECOG *recog) | |
| 1112 { | |
| 1113 if (!recog) | |
| 1114 return ERROR_INT("recog not defined", __func__, 1); | |
| 1115 | |
| 1116 /* Package up placeholder results */ | |
| 1117 rchDestroy(&recog->rch); | |
| 1118 recog->rch = rchCreate(0, 0.0, stringNew(""), 0, 0, 0, 0); | |
| 1119 return 0; | |
| 1120 } | |
| 1121 | |
| 1122 | |
| 1123 /*------------------------------------------------------------------------* | |
| 1124 * Operations for handling identification results * | |
| 1125 *------------------------------------------------------------------------*/ | |
| 1126 /*! | |
| 1127 * \brief rchaCreate() | |
| 1128 * | |
| 1129 * Return: 0 if OK, 1 on error | |
| 1130 * | |
| 1131 * Notes: | |
| 1132 * (1) Be sure to destroy any existing rcha before assigning this. | |
| 1133 */ | |
| 1134 static L_RCHA * | |
| 1135 rchaCreate() | |
| 1136 { | |
| 1137 L_RCHA *rcha; | |
| 1138 | |
| 1139 rcha = (L_RCHA *)LEPT_CALLOC(1, sizeof(L_RCHA)); | |
| 1140 rcha->naindex = numaCreate(0); | |
| 1141 rcha->nascore = numaCreate(0); | |
| 1142 rcha->satext = sarrayCreate(0); | |
| 1143 rcha->nasample = numaCreate(0); | |
| 1144 rcha->naxloc = numaCreate(0); | |
| 1145 rcha->nayloc = numaCreate(0); | |
| 1146 rcha->nawidth = numaCreate(0); | |
| 1147 return rcha; | |
| 1148 } | |
| 1149 | |
| 1150 | |
| 1151 /*! | |
| 1152 * \brief rchaDestroy() | |
| 1153 * | |
| 1154 * \param[in,out] prcha to be nulled | |
| 1155 */ | |
| 1156 void | |
| 1157 rchaDestroy(L_RCHA **prcha) | |
| 1158 { | |
| 1159 L_RCHA *rcha; | |
| 1160 | |
| 1161 if (prcha == NULL) { | |
| 1162 L_WARNING("&rcha is null!\n", __func__); | |
| 1163 return; | |
| 1164 } | |
| 1165 if ((rcha = *prcha) == NULL) | |
| 1166 return; | |
| 1167 | |
| 1168 numaDestroy(&rcha->naindex); | |
| 1169 numaDestroy(&rcha->nascore); | |
| 1170 sarrayDestroy(&rcha->satext); | |
| 1171 numaDestroy(&rcha->nasample); | |
| 1172 numaDestroy(&rcha->naxloc); | |
| 1173 numaDestroy(&rcha->nayloc); | |
| 1174 numaDestroy(&rcha->nawidth); | |
| 1175 LEPT_FREE(rcha); | |
| 1176 *prcha = NULL; | |
| 1177 } | |
| 1178 | |
| 1179 | |
| 1180 /*! | |
| 1181 * \brief rchCreate() | |
| 1182 * | |
| 1183 * \param[in] index index of best template | |
| 1184 * \param[in] score correlation score of best template | |
| 1185 * \param[in] text character string of best template | |
| 1186 * \param[in] sample index of best sample; -1 if averages are used | |
| 1187 * \param[in] xloc x-location of template: delx + shiftx | |
| 1188 * \param[in] yloc y-location of template: dely + shifty | |
| 1189 * \param[in] width width of best template | |
| 1190 * \return 0 if OK, 1 on error | |
| 1191 * | |
| 1192 * <pre> | |
| 1193 * Notes: | |
| 1194 * (1) Be sure to destroy any existing rch before assigning this. | |
| 1195 * (2) This stores the text string, not a copy of it, so the | |
| 1196 * caller must not destroy the string. | |
| 1197 * </pre> | |
| 1198 */ | |
| 1199 static L_RCH * | |
| 1200 rchCreate(l_int32 index, | |
| 1201 l_float32 score, | |
| 1202 char *text, | |
| 1203 l_int32 sample, | |
| 1204 l_int32 xloc, | |
| 1205 l_int32 yloc, | |
| 1206 l_int32 width) | |
| 1207 { | |
| 1208 L_RCH *rch; | |
| 1209 | |
| 1210 rch = (L_RCH *)LEPT_CALLOC(1, sizeof(L_RCH)); | |
| 1211 rch->index = index; | |
| 1212 rch->score = score; | |
| 1213 rch->text = text; | |
| 1214 rch->sample = sample; | |
| 1215 rch->xloc = xloc; | |
| 1216 rch->yloc = yloc; | |
| 1217 rch->width = width; | |
| 1218 return rch; | |
| 1219 } | |
| 1220 | |
| 1221 | |
| 1222 /*! | |
| 1223 * \brief rchDestroy() | |
| 1224 * | |
| 1225 * \param[in,out] prch to be nulled | |
| 1226 */ | |
| 1227 void | |
| 1228 rchDestroy(L_RCH **prch) | |
| 1229 { | |
| 1230 L_RCH *rch; | |
| 1231 | |
| 1232 if (prch == NULL) { | |
| 1233 L_WARNING("&rch is null!\n", __func__); | |
| 1234 return; | |
| 1235 } | |
| 1236 if ((rch = *prch) == NULL) | |
| 1237 return; | |
| 1238 LEPT_FREE(rch->text); | |
| 1239 LEPT_FREE(rch); | |
| 1240 *prch = NULL; | |
| 1241 } | |
| 1242 | |
| 1243 | |
| 1244 /*! | |
| 1245 * \brief rchaExtract() | |
| 1246 * | |
| 1247 * \param[in] rcha | |
| 1248 * \param[out] pnaindex [optional] indices of best templates | |
| 1249 * \param[out] pnascore [optional] correl scores of best templates | |
| 1250 * \param[out] psatext [optional] character strings of best templates | |
| 1251 * \param[out] pnasample [optional] indices of best samples | |
| 1252 * \param[out] pnaxloc [optional] x-locations of templates | |
| 1253 * \param[out] pnayloc [optional] y-locations of templates | |
| 1254 * \param[out] pnawidth [optional] widths of best templates | |
| 1255 * \return 0 if OK, 1 on error | |
| 1256 * | |
| 1257 * <pre> | |
| 1258 * Notes: | |
| 1259 * (1) This returns clones of the number and string arrays. They must | |
| 1260 * be destroyed by the caller. | |
| 1261 * </pre> | |
| 1262 */ | |
| 1263 l_ok | |
| 1264 rchaExtract(L_RCHA *rcha, | |
| 1265 NUMA **pnaindex, | |
| 1266 NUMA **pnascore, | |
| 1267 SARRAY **psatext, | |
| 1268 NUMA **pnasample, | |
| 1269 NUMA **pnaxloc, | |
| 1270 NUMA **pnayloc, | |
| 1271 NUMA **pnawidth) | |
| 1272 { | |
| 1273 if (pnaindex) *pnaindex = NULL; | |
| 1274 if (pnascore) *pnascore = NULL; | |
| 1275 if (psatext) *psatext = NULL; | |
| 1276 if (pnasample) *pnasample = NULL; | |
| 1277 if (pnaxloc) *pnaxloc = NULL; | |
| 1278 if (pnayloc) *pnayloc = NULL; | |
| 1279 if (pnawidth) *pnawidth = NULL; | |
| 1280 if (!rcha) | |
| 1281 return ERROR_INT("rcha not defined", __func__, 1); | |
| 1282 | |
| 1283 if (pnaindex) *pnaindex = numaClone(rcha->naindex); | |
| 1284 if (pnascore) *pnascore = numaClone(rcha->nascore); | |
| 1285 if (psatext) *psatext = sarrayClone(rcha->satext); | |
| 1286 if (pnasample) *pnasample = numaClone(rcha->nasample); | |
| 1287 if (pnaxloc) *pnaxloc = numaClone(rcha->naxloc); | |
| 1288 if (pnayloc) *pnayloc = numaClone(rcha->nayloc); | |
| 1289 if (pnawidth) *pnawidth = numaClone(rcha->nawidth); | |
| 1290 return 0; | |
| 1291 } | |
| 1292 | |
| 1293 | |
| 1294 /*! | |
| 1295 * \brief rchExtract() | |
| 1296 * | |
| 1297 * \param[in] rch | |
| 1298 * \param[out] pindex [optional] index of best template | |
| 1299 * \param[out] pscore [optional] correlation score of best template | |
| 1300 * \param[out] ptext [optional] character string of best template | |
| 1301 * \param[out] psample [optional] index of best sample | |
| 1302 * \param[out] pxloc [optional] x-location of template | |
| 1303 * \param[out] pyloc [optional] y-location of template | |
| 1304 * \param[out] pwidth [optional] width of best template | |
| 1305 * \return 0 if OK, 1 on error | |
| 1306 */ | |
| 1307 l_ok | |
| 1308 rchExtract(L_RCH *rch, | |
| 1309 l_int32 *pindex, | |
| 1310 l_float32 *pscore, | |
| 1311 char **ptext, | |
| 1312 l_int32 *psample, | |
| 1313 l_int32 *pxloc, | |
| 1314 l_int32 *pyloc, | |
| 1315 l_int32 *pwidth) | |
| 1316 { | |
| 1317 if (pindex) *pindex = 0; | |
| 1318 if (pscore) *pscore = 0.0; | |
| 1319 if (ptext) *ptext = NULL; | |
| 1320 if (psample) *psample = 0; | |
| 1321 if (pxloc) *pxloc = 0; | |
| 1322 if (pyloc) *pyloc = 0; | |
| 1323 if (pwidth) *pwidth = 0; | |
| 1324 if (!rch) | |
| 1325 return ERROR_INT("rch not defined", __func__, 1); | |
| 1326 | |
| 1327 if (pindex) *pindex = rch->index; | |
| 1328 if (pscore) *pscore = rch->score; | |
| 1329 if (ptext) *ptext = stringNew(rch->text); /* new string: owned by caller */ | |
| 1330 if (psample) *psample = rch->sample; | |
| 1331 if (pxloc) *pxloc = rch->xloc; | |
| 1332 if (pyloc) *pyloc = rch->yloc; | |
| 1333 if (pwidth) *pwidth = rch->width; | |
| 1334 return 0; | |
| 1335 } | |
| 1336 | |
| 1337 | |
| 1338 /*! | |
| 1339 * \brief transferRchToRcha() | |
| 1340 * | |
| 1341 * \param[in] rch source of data | |
| 1342 * \param[in] rcha append to arrays in this destination | |
| 1343 * \return 0 if OK, 1 on error | |
| 1344 * | |
| 1345 * <pre> | |
| 1346 * Notes: | |
| 1347 * (1) This is used to transfer the results of a single character | |
| 1348 * identification to an rcha array for the array of characters. | |
| 1349 * </pre> | |
| 1350 */ | |
| 1351 static l_int32 | |
| 1352 transferRchToRcha(L_RCH *rch, | |
| 1353 L_RCHA *rcha) | |
| 1354 { | |
| 1355 | |
| 1356 if (!rch) | |
| 1357 return ERROR_INT("rch not defined", __func__, 1); | |
| 1358 if (!rcha) | |
| 1359 return ERROR_INT("rcha not defined", __func__, 1); | |
| 1360 | |
| 1361 numaAddNumber(rcha->naindex, rch->index); | |
| 1362 numaAddNumber(rcha->nascore, rch->score); | |
| 1363 sarrayAddString(rcha->satext, rch->text, L_COPY); | |
| 1364 numaAddNumber(rcha->nasample, rch->sample); | |
| 1365 numaAddNumber(rcha->naxloc, rch->xloc); | |
| 1366 numaAddNumber(rcha->nayloc, rch->yloc); | |
| 1367 numaAddNumber(rcha->nawidth, rch->width); | |
| 1368 return 0; | |
| 1369 } | |
| 1370 | |
| 1371 | |
| 1372 /*------------------------------------------------------------------------* | |
| 1373 * Preprocessing and filtering * | |
| 1374 *------------------------------------------------------------------------*/ | |
| 1375 /*! | |
| 1376 * \brief recogProcessToIdentify() | |
| 1377 * | |
| 1378 * \param[in] recog with LUT's pre-computed | |
| 1379 * \param[in] pixs typ. single character, possibly d > 1 and uncropped | |
| 1380 * \param[in] pad extra pixels added to left and right sides | |
| 1381 * \return pixd 1 bpp, clipped to foreground, or NULL if there | |
| 1382 * are no fg pixels or on error. | |
| 1383 * | |
| 1384 * <pre> | |
| 1385 * Notes: | |
| 1386 * (1) This is a lightweight operation to insure that the input | |
| 1387 * image is 1 bpp, properly cropped, and padded on each side. | |
| 1388 * If bpp > 1, the image is thresholded. | |
| 1389 * </pre> | |
| 1390 */ | |
| 1391 PIX * | |
| 1392 recogProcessToIdentify(L_RECOG *recog, | |
| 1393 PIX *pixs, | |
| 1394 l_int32 pad) | |
| 1395 { | |
| 1396 l_int32 canclip; | |
| 1397 PIX *pix1, *pix2, *pixd; | |
| 1398 | |
| 1399 if (!recog) | |
| 1400 return (PIX *)ERROR_PTR("recog not defined", __func__, NULL); | |
| 1401 if (!pixs) | |
| 1402 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 1403 | |
| 1404 if (pixGetDepth(pixs) != 1) | |
| 1405 pix1 = pixThresholdToBinary(pixs, recog->threshold); | |
| 1406 else | |
| 1407 pix1 = pixClone(pixs); | |
| 1408 pixTestClipToForeground(pix1, &canclip); | |
| 1409 if (canclip) | |
| 1410 pixClipToForeground(pix1, &pix2, NULL); | |
| 1411 else | |
| 1412 pix2 = pixClone(pix1); | |
| 1413 pixDestroy(&pix1); | |
| 1414 if (!pix2) | |
| 1415 return (PIX *)ERROR_PTR("no foreground pixels", __func__, NULL); | |
| 1416 | |
| 1417 pixd = pixAddBorderGeneral(pix2, pad, pad, 0, 0, 0); | |
| 1418 pixDestroy(&pix2); | |
| 1419 return pixd; | |
| 1420 } | |
| 1421 | |
| 1422 | |
| 1423 /*! | |
| 1424 * \brief recogPreSplittingFilter() | |
| 1425 * | |
| 1426 * \param[in] recog | |
| 1427 * \param[in] pixs 1 bpp, many connected components | |
| 1428 * \param[in] minh minimum height of components to be retained | |
| 1429 * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained | |
| 1430 * \param[in] debug 1 to output indicator arrays | |
| 1431 * \return pixd with filtered components removed or NULL on error | |
| 1432 */ | |
| 1433 static PIX * | |
| 1434 recogPreSplittingFilter(L_RECOG *recog, | |
| 1435 PIX *pixs, | |
| 1436 l_int32 minh, | |
| 1437 l_float32 minaf, | |
| 1438 l_int32 debug) | |
| 1439 { | |
| 1440 l_int32 scaling, minsplitw, maxsplith, maxasp; | |
| 1441 BOXA *boxas; | |
| 1442 NUMA *naw, *nah, *na1, *na1c, *na2, *na3, *na4, *na5, *na6, *na7; | |
| 1443 PIX *pixd; | |
| 1444 PIXA *pixas; | |
| 1445 | |
| 1446 if (!recog) | |
| 1447 return (PIX *)ERROR_PTR("recog not defined", __func__, NULL); | |
| 1448 if (!pixs) | |
| 1449 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 1450 | |
| 1451 /* If there is scaling, do not remove components based on the | |
| 1452 * values of min_splitw and max_splith. */ | |
| 1453 scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE; | |
| 1454 minsplitw = (scaling) ? 1 : recog->min_splitw - 3; | |
| 1455 maxsplith = (scaling) ? 150 : recog->max_splith; | |
| 1456 maxasp = recog->max_wh_ratio; | |
| 1457 | |
| 1458 /* Generate an indicator array of connected components to remove: | |
| 1459 * short stuff | |
| 1460 * tall stuff | |
| 1461 * components with large width/height ratio | |
| 1462 * components with small area fill fraction */ | |
| 1463 boxas = pixConnComp(pixs, &pixas, 8); | |
| 1464 pixaFindDimensions(pixas, &naw, &nah); | |
| 1465 na1 = numaMakeThresholdIndicator(naw, minsplitw, L_SELECT_IF_LT); | |
| 1466 na1c = numaCopy(na1); | |
| 1467 na2 = numaMakeThresholdIndicator(nah, minh, L_SELECT_IF_LT); | |
| 1468 na3 = numaMakeThresholdIndicator(nah, maxsplith, L_SELECT_IF_GT); | |
| 1469 na4 = pixaFindWidthHeightRatio(pixas); | |
| 1470 na5 = numaMakeThresholdIndicator(na4, maxasp, L_SELECT_IF_GT); | |
| 1471 na6 = pixaFindAreaFraction(pixas); | |
| 1472 na7 = numaMakeThresholdIndicator(na6, minaf, L_SELECT_IF_LT); | |
| 1473 numaLogicalOp(na1, na1, na2, L_UNION); | |
| 1474 numaLogicalOp(na1, na1, na3, L_UNION); | |
| 1475 numaLogicalOp(na1, na1, na5, L_UNION); | |
| 1476 numaLogicalOp(na1, na1, na7, L_UNION); | |
| 1477 pixd = pixCopy(NULL, pixs); | |
| 1478 pixRemoveWithIndicator(pixd, pixas, na1); | |
| 1479 if (debug) | |
| 1480 l_showIndicatorSplitValues(na1c, na2, na3, na5, na7, na1); | |
| 1481 numaDestroy(&naw); | |
| 1482 numaDestroy(&nah); | |
| 1483 numaDestroy(&na1); | |
| 1484 numaDestroy(&na1c); | |
| 1485 numaDestroy(&na2); | |
| 1486 numaDestroy(&na3); | |
| 1487 numaDestroy(&na4); | |
| 1488 numaDestroy(&na5); | |
| 1489 numaDestroy(&na6); | |
| 1490 numaDestroy(&na7); | |
| 1491 boxaDestroy(&boxas); | |
| 1492 pixaDestroy(&pixas); | |
| 1493 return pixd; | |
| 1494 } | |
| 1495 | |
| 1496 | |
| 1497 /*! | |
| 1498 * \brief recogSplittingFilter() | |
| 1499 * | |
| 1500 * \param[in] recog | |
| 1501 * \param[in] pixs 1 bpp, single connected component | |
| 1502 * \param[in] minh minimum height of component; 0 for default | |
| 1503 * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained | |
| 1504 * \param[out] premove 0 to save, 1 to remove | |
| 1505 * \param[in] debug 1 to output indicator arrays | |
| 1506 * \return 0 if OK, 1 on error | |
| 1507 */ | |
| 1508 static l_int32 | |
| 1509 recogSplittingFilter(L_RECOG *recog, | |
| 1510 PIX *pixs, | |
| 1511 l_int32 minh, | |
| 1512 l_float32 minaf, | |
| 1513 l_int32 *premove, | |
| 1514 l_int32 debug) | |
| 1515 { | |
| 1516 l_int32 w, h; | |
| 1517 l_float32 aspratio, fract; | |
| 1518 | |
| 1519 if (!premove) | |
| 1520 return ERROR_INT("&remove not defined", __func__, 1); | |
| 1521 *premove = 0; | |
| 1522 if (!recog) | |
| 1523 return ERROR_INT("recog not defined", __func__, 1); | |
| 1524 if (!pixs) | |
| 1525 return ERROR_INT("pixs not defined", __func__, 1); | |
| 1526 if (minh <= 0) minh = DefaultMinHeight; | |
| 1527 | |
| 1528 /* Remove from further consideration: | |
| 1529 * small stuff | |
| 1530 * components with large width/height ratio | |
| 1531 * components with small area fill fraction */ | |
| 1532 pixGetDimensions(pixs, &w, &h, NULL); | |
| 1533 if (w < recog->min_splitw) { | |
| 1534 if (debug) L_INFO("w = %d < %d\n", __func__, w, recog->min_splitw); | |
| 1535 *premove = 1; | |
| 1536 return 0; | |
| 1537 } | |
| 1538 if (h < minh) { | |
| 1539 if (debug) L_INFO("h = %d < %d\n", __func__, h, minh); | |
| 1540 *premove = 1; | |
| 1541 return 0; | |
| 1542 } | |
| 1543 aspratio = (l_float32)w / (l_float32)h; | |
| 1544 if (aspratio > recog->max_wh_ratio) { | |
| 1545 if (debug) L_INFO("w/h = %5.3f too large\n", __func__, aspratio); | |
| 1546 *premove = 1; | |
| 1547 return 0; | |
| 1548 } | |
| 1549 pixFindAreaFraction(pixs, recog->sumtab, &fract); | |
| 1550 if (fract < minaf) { | |
| 1551 if (debug) L_INFO("area fill fract %5.3f < %5.3f\n", | |
| 1552 __func__, fract, minaf); | |
| 1553 *premove = 1; | |
| 1554 return 0; | |
| 1555 } | |
| 1556 | |
| 1557 return 0; | |
| 1558 } | |
| 1559 | |
| 1560 | |
| 1561 /*------------------------------------------------------------------------* | |
| 1562 * Postprocessing * | |
| 1563 *------------------------------------------------------------------------*/ | |
| 1564 /*! | |
| 1565 * \brief recogExtractNumbers() | |
| 1566 * | |
| 1567 * \param[in] recog | |
| 1568 * \param[in] boxas location of components | |
| 1569 * \param[in] scorethresh min score for which we accept a component | |
| 1570 * \param[in] spacethresh max horizontal distance allowed between digits; | |
| 1571 * use -1 for default | |
| 1572 * \param[out] pbaa [optional] bounding boxes of identified numbers | |
| 1573 * \param[out] pnaa [optional] scores of identified digits | |
| 1574 * \return sa of identified numbers, or NULL on error | |
| 1575 * | |
| 1576 * <pre> | |
| 1577 * Notes: | |
| 1578 * (1) This extracts digit data after recogaIdentifyMultiple() or | |
| 1579 * lower-level identification has taken place. | |
| 1580 * (2) Each string in the returned sa contains a sequence of ascii | |
| 1581 * digits in a number. | |
| 1582 * (3) The horizontal distance between boxes (limited by %spacethresh) | |
| 1583 * is the negative of the horizontal overlap. | |
| 1584 * (4) Components with a score less than %scorethresh, which may | |
| 1585 * be hyphens or other small characters, will signal the | |
| 1586 * end of the current sequence of digits in the number. A typical | |
| 1587 * value for %scorethresh is 0.60. | |
| 1588 * (5) We allow two digits to be combined if these conditions apply: | |
| 1589 * (a) the first is to the left of the second | |
| 1590 * (b) the second has a horizontal separation less than %spacethresh | |
| 1591 * (c) the vertical overlap >= 0 (vertical separation < 0) | |
| 1592 * (d) both have a score that exceeds %scorethresh | |
| 1593 * (6) Each numa in the optionally returned naa contains the digit | |
| 1594 * scores of a number. Each boxa in the optionally returned baa | |
| 1595 * contains the bounding boxes of the digits in the number. | |
| 1596 * </pre> | |
| 1597 */ | |
| 1598 SARRAY * | |
| 1599 recogExtractNumbers(L_RECOG *recog, | |
| 1600 BOXA *boxas, | |
| 1601 l_float32 scorethresh, | |
| 1602 l_int32 spacethresh, | |
| 1603 BOXAA **pbaa, | |
| 1604 NUMAA **pnaa) | |
| 1605 { | |
| 1606 char *str, *text; | |
| 1607 l_int32 i, n, x1, x2, h_ovl, v_ovl, h_sep, v_sep; | |
| 1608 l_float32 score; | |
| 1609 BOX *box, *prebox; | |
| 1610 BOXA *ba = NULL; | |
| 1611 BOXAA *baa; | |
| 1612 NUMA *nascore, *na = NULL; | |
| 1613 NUMAA *naa; | |
| 1614 SARRAY *satext, *sa = NULL, *saout; | |
| 1615 | |
| 1616 if (pbaa) *pbaa = NULL; | |
| 1617 if (pnaa) *pnaa = NULL; | |
| 1618 if (!recog || !recog->rcha) | |
| 1619 return (SARRAY *)ERROR_PTR("recog and rcha not both defined", | |
| 1620 __func__, NULL); | |
| 1621 if (!boxas) | |
| 1622 return (SARRAY *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 1623 | |
| 1624 if (spacethresh < 0) | |
| 1625 spacethresh = L_MAX(recog->maxheight_u, 20); | |
| 1626 rchaExtract(recog->rcha, NULL, &nascore, &satext, NULL, NULL, NULL, NULL); | |
| 1627 if (!nascore || !satext) { | |
| 1628 numaDestroy(&nascore); | |
| 1629 sarrayDestroy(&satext); | |
| 1630 return (SARRAY *)ERROR_PTR("nascore and satext not both returned", | |
| 1631 __func__, NULL); | |
| 1632 } | |
| 1633 | |
| 1634 saout = sarrayCreate(0); | |
| 1635 naa = numaaCreate(0); | |
| 1636 baa = boxaaCreate(0); | |
| 1637 prebox = NULL; | |
| 1638 n = numaGetCount(nascore); | |
| 1639 for (i = 0; i < n; i++) { | |
| 1640 numaGetFValue(nascore, i, &score); | |
| 1641 text = sarrayGetString(satext, i, L_NOCOPY); | |
| 1642 if (prebox == NULL) { /* no current run */ | |
| 1643 if (score < scorethresh) { | |
| 1644 continue; | |
| 1645 } else { /* start a number run */ | |
| 1646 sa = sarrayCreate(0); | |
| 1647 ba = boxaCreate(0); | |
| 1648 na = numaCreate(0); | |
| 1649 sarrayAddString(sa, text, L_COPY); | |
| 1650 prebox = boxaGetBox(boxas, i, L_CLONE); | |
| 1651 boxaAddBox(ba, prebox, L_COPY); | |
| 1652 numaAddNumber(na, score); | |
| 1653 } | |
| 1654 } else { /* in a current number run */ | |
| 1655 box = boxaGetBox(boxas, i, L_CLONE); | |
| 1656 boxGetGeometry(prebox, &x1, NULL, NULL, NULL); | |
| 1657 boxGetGeometry(box, &x2, NULL, NULL, NULL); | |
| 1658 boxOverlapDistance(box, prebox, &h_ovl, &v_ovl); | |
| 1659 h_sep = -h_ovl; | |
| 1660 v_sep = -v_ovl; | |
| 1661 boxDestroy(&prebox); | |
| 1662 if (x1 < x2 && h_sep <= spacethresh && | |
| 1663 v_sep < 0 && score >= scorethresh) { /* add to number */ | |
| 1664 sarrayAddString(sa, text, L_COPY); | |
| 1665 boxaAddBox(ba, box, L_COPY); | |
| 1666 numaAddNumber(na, score); | |
| 1667 prebox = box; | |
| 1668 } else { /* save the completed number */ | |
| 1669 str = sarrayToString(sa, 0); | |
| 1670 sarrayAddString(saout, str, L_INSERT); | |
| 1671 sarrayDestroy(&sa); | |
| 1672 boxaaAddBoxa(baa, ba, L_INSERT); | |
| 1673 numaaAddNuma(naa, na, L_INSERT); | |
| 1674 boxDestroy(&box); | |
| 1675 if (score >= scorethresh) { /* start a new number */ | |
| 1676 i--; | |
| 1677 continue; | |
| 1678 } | |
| 1679 } | |
| 1680 } | |
| 1681 } | |
| 1682 | |
| 1683 if (prebox) { /* save the last number */ | |
| 1684 str = sarrayToString(sa, 0); | |
| 1685 sarrayAddString(saout, str, L_INSERT); | |
| 1686 boxaaAddBoxa(baa, ba, L_INSERT); | |
| 1687 numaaAddNuma(naa, na, L_INSERT); | |
| 1688 sarrayDestroy(&sa); | |
| 1689 boxDestroy(&prebox); | |
| 1690 } | |
| 1691 | |
| 1692 numaDestroy(&nascore); | |
| 1693 sarrayDestroy(&satext); | |
| 1694 if (sarrayGetCount(saout) == 0) { | |
| 1695 sarrayDestroy(&saout); | |
| 1696 boxaaDestroy(&baa); | |
| 1697 numaaDestroy(&naa); | |
| 1698 L_INFO("saout has no identified text\n", __func__); | |
| 1699 return NULL; | |
| 1700 } | |
| 1701 | |
| 1702 if (pbaa) | |
| 1703 *pbaa = baa; | |
| 1704 else | |
| 1705 boxaaDestroy(&baa); | |
| 1706 if (pnaa) | |
| 1707 *pnaa = naa; | |
| 1708 else | |
| 1709 numaaDestroy(&naa); | |
| 1710 return saout; | |
| 1711 } | |
| 1712 | |
| 1713 /*! | |
| 1714 * \brief showExtractNumbers() | |
| 1715 * | |
| 1716 * \param[in] pixs input 1 bpp image | |
| 1717 * \param[in] sa recognized text strings | |
| 1718 * \param[in] baa boxa array for location of characters in each string | |
| 1719 * \param[in] naa numa array for scores of characters in each string | |
| 1720 * \param[out] ppixdb [optional] input pixs with identified chars outlined | |
| 1721 * \return pixa of identified strings with text and scores, or NULL on error | |
| 1722 * | |
| 1723 * <pre> | |
| 1724 * Notes: | |
| 1725 * (1) This is a debugging routine on digit identification; e.g.: | |
| 1726 * recogIdentifyMultiple(recog, pixs, 0, 1, &boxa, NULL, NULL, 0); | |
| 1727 * sa = recogExtractNumbers(recog, boxa, 0.8, -1, &baa, &naa); | |
| 1728 * pixa = showExtractNumbers(pixs, sa, baa, naa, NULL); | |
| 1729 * </pre> | |
| 1730 */ | |
| 1731 PIXA * | |
| 1732 showExtractNumbers(PIX *pixs, | |
| 1733 SARRAY *sa, | |
| 1734 BOXAA *baa, | |
| 1735 NUMAA *naa, | |
| 1736 PIX **ppixdb) | |
| 1737 { | |
| 1738 char buf[128]; | |
| 1739 char *textstr, *scorestr; | |
| 1740 l_int32 i, j, n, nchar, len; | |
| 1741 l_float32 score; | |
| 1742 L_BMF *bmf; | |
| 1743 BOX *box1, *box2; | |
| 1744 BOXA *ba; | |
| 1745 NUMA *na; | |
| 1746 PIX *pix1, *pix2, *pix3, *pix4; | |
| 1747 PIXA *pixa; | |
| 1748 | |
| 1749 if (ppixdb) *ppixdb = NULL; | |
| 1750 if (!pixs) | |
| 1751 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 1752 if (!sa) | |
| 1753 return (PIXA *)ERROR_PTR("sa not defined", __func__, NULL); | |
| 1754 if (!baa) | |
| 1755 return (PIXA *)ERROR_PTR("baa not defined", __func__, NULL); | |
| 1756 if (!naa) | |
| 1757 return (PIXA *)ERROR_PTR("naa not defined", __func__, NULL); | |
| 1758 | |
| 1759 n = sarrayGetCount(sa); | |
| 1760 pixa = pixaCreate(n); | |
| 1761 bmf = bmfCreate(NULL, 6); | |
| 1762 if (ppixdb) *ppixdb = pixConvertTo8(pixs, 1); | |
| 1763 for (i = 0; i < n; i++) { | |
| 1764 textstr = sarrayGetString(sa, i, L_NOCOPY); | |
| 1765 ba = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1766 na = numaaGetNuma(naa, i, L_CLONE); | |
| 1767 boxaGetExtent(ba, NULL, NULL, &box1); | |
| 1768 box2 = boxAdjustSides(NULL, box1, -5, 5, -5, 5); | |
| 1769 if (ppixdb) pixRenderBoxArb(*ppixdb, box2, 3, 255, 0, 0); | |
| 1770 pix1 = pixClipRectangle(pixs, box1, NULL); | |
| 1771 len = strlen(textstr) + 1; | |
| 1772 pix2 = pixAddBlackOrWhiteBorder(pix1, 14 * len, 14 * len, | |
| 1773 5, 3, L_SET_WHITE); | |
| 1774 pix3 = pixConvertTo8(pix2, 1); | |
| 1775 nchar = numaGetCount(na); | |
| 1776 scorestr = NULL; | |
| 1777 for (j = 0; j < nchar; j++) { | |
| 1778 numaGetFValue(na, j, &score); | |
| 1779 snprintf(buf, sizeof(buf), "%d", (l_int32)(100 * score)); | |
| 1780 stringJoinIP(&scorestr, buf); | |
| 1781 if (j < nchar - 1) stringJoinIP(&scorestr, ","); | |
| 1782 } | |
| 1783 snprintf(buf, sizeof(buf), "%s: %s\n", textstr, scorestr); | |
| 1784 pix4 = pixAddTextlines(pix3, bmf, buf, 0xff000000, L_ADD_BELOW); | |
| 1785 pixaAddPix(pixa, pix4, L_INSERT); | |
| 1786 boxDestroy(&box1); | |
| 1787 boxDestroy(&box2); | |
| 1788 pixDestroy(&pix1); | |
| 1789 pixDestroy(&pix2); | |
| 1790 pixDestroy(&pix3); | |
| 1791 boxaDestroy(&ba); | |
| 1792 numaDestroy(&na); | |
| 1793 LEPT_FREE(scorestr); | |
| 1794 } | |
| 1795 | |
| 1796 bmfDestroy(&bmf); | |
| 1797 return pixa; | |
| 1798 } | |
| 1799 | |
| 1800 | |
| 1801 /*------------------------------------------------------------------------* | |
| 1802 * Static debug helper * | |
| 1803 *------------------------------------------------------------------------*/ | |
| 1804 /*! | |
| 1805 * \brief l_showIndicatorSplitValues() | |
| 1806 * | |
| 1807 * \param[in] na1, na2, na3, na4, na5, na6 6 indicator array | |
| 1808 * | |
| 1809 * <pre> | |
| 1810 * Notes: | |
| 1811 * (1) The values indicate that specific criteria has been met | |
| 1812 * for component removal by pre-splitting filter.. | |
| 1813 * The 'result' line shows which components have been removed. | |
| 1814 * </pre> | |
| 1815 */ | |
| 1816 static void | |
| 1817 l_showIndicatorSplitValues(NUMA *na1, | |
| 1818 NUMA *na2, | |
| 1819 NUMA *na3, | |
| 1820 NUMA *na4, | |
| 1821 NUMA *na5, | |
| 1822 NUMA *na6) | |
| 1823 { | |
| 1824 l_int32 i, n; | |
| 1825 | |
| 1826 n = numaGetCount(na1); | |
| 1827 lept_stderr("================================================\n"); | |
| 1828 lept_stderr("lt minw: "); | |
| 1829 for (i = 0; i < n; i++) | |
| 1830 lept_stderr("%4d ", (l_int32)na1->array[i]); | |
| 1831 lept_stderr("\nlt minh: "); | |
| 1832 for (i = 0; i < n; i++) | |
| 1833 lept_stderr("%4d ", (l_int32)na2->array[i]); | |
| 1834 lept_stderr("\ngt maxh: "); | |
| 1835 for (i = 0; i < n; i++) | |
| 1836 lept_stderr("%4d ", (l_int32)na3->array[i]); | |
| 1837 lept_stderr("\ngt maxasp: "); | |
| 1838 for (i = 0; i < n; i++) | |
| 1839 lept_stderr("%4d ", (l_int32)na4->array[i]); | |
| 1840 lept_stderr("\nlt minaf: "); | |
| 1841 for (i = 0; i < n; i++) | |
| 1842 lept_stderr("%4d ", (l_int32)na5->array[i]); | |
| 1843 lept_stderr("\n------------------------------------------------"); | |
| 1844 lept_stderr("\nresult: "); | |
| 1845 for (i = 0; i < n; i++) | |
| 1846 lept_stderr("%4d ", (l_int32)na6->array[i]); | |
| 1847 lept_stderr("\n================================================\n"); | |
| 1848 } |
