Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/leptonica/src/recogident.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/leptonica/src/recogident.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,1848 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file recogident.c + * <pre> + * + * Top-level identification + * l_int32 recogIdentifyMultiple() + * + * Segmentation and noise removal + * l_int32 recogSplitIntoCharacters() + * + * Greedy character splitting + * l_int32 recogCorrelationBestRow() + * l_int32 recogCorrelationBestChar() + * static l_int32 pixCorrelationBestShift() + * + * Low-level identification of single characters + * l_int32 recogIdentifyPixa() + * l_int32 recogIdentifyPix() + * l_int32 recogSkipIdentify() + * + * Operations for handling identification results + * static L_RCHA *rchaCreate() + * void rchaDestroy() + * static L_RCH *rchCreate() + * void rchDestroy() + * l_int32 rchaExtract() + * l_int32 rchExtract() + * static l_int32 transferRchToRcha() + * + * Preprocessing and filtering + * l_int32 recogProcessToIdentify() + * static PIX *recogPreSplittingFilter() + * static PIX *recogSplittingFilter() + * + * Postprocessing + * SARRAY *recogExtractNumbers() + * PIX *showExtractNumbers() + * + * Static debug helper + * static void l_showIndicatorSplitValues() + * + * See recogbasic.c for examples of training a recognizer, which is + * required before it can be used for identification. + * + * The character splitter repeatedly does a greedy correlation with each + * averaged unscaled template, at all pixel locations along the text to + * be identified. The vertical alignment is between the template + * centroid and the (moving) windowed centroid, including a delta of + * 1 pixel above and below. The best match then removes part of the + * input image, leaving 1 or 2 pieces, which, after filtering, + * are put in a queue. The process ends when the queue is empty. + * The filtering is based on the size and aspect ratio of the + * remaining pieces; the intent is to remove anything that is + * unlikely to be text, such as small pieces and line graphics. + * + * After splitting, the selected segments are identified using + * the input parameters that were initially specified for the + * recognizer. Unlike the splitter, which uses the averaged + * templates from the unscaled input, the recognizer can use + * either all training examples or averaged templates, and these + * can be either scaled or unscaled. These choices are specified + * when the recognizer is constructed. + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include <string.h> +#include "allheaders.h" +#include "array_internal.h" + + /* There are two methods for splitting characters: DID and greedy. + * The default method is DID. */ +#define SPLIT_WITH_DID 1 + + /* Padding on pix1: added before correlations and removed from result */ +static const l_int32 LeftRightPadding = 32; + + /* Parameters for filtering and sorting connected components in splitter */ +static const l_float32 MinFillFactor = 0.10f; +static const l_int32 DefaultMinHeight = 15; /* min unscaled height */ +static const l_int32 MinOverlap1 = 6; /* in pass 1 of boxaSort2d() */ +static const l_int32 MinOverlap2 = 6; /* in pass 2 of boxaSort2d() */ +static const l_int32 MinHeightPass1 = 5; /* min height to start pass 1 */ + + +static l_int32 pixCorrelationBestShift(PIX *pix1, PIX *pix2, NUMA *nasum1, + NUMA *namoment1, l_int32 area2, + l_int32 ycent2, l_int32 maxyshift, + l_int32 *tab8, l_int32 *pdelx, + l_int32 *pdely, l_float32 *pscore, + l_int32 debugflag ); +static L_RCH *rchCreate(l_int32 index, l_float32 score, char *text, + l_int32 sample, l_int32 xloc, l_int32 yloc, + l_int32 width); +static L_RCHA *rchaCreate(); +static l_int32 transferRchToRcha(L_RCH *rch, L_RCHA *rcha); +static PIX *recogPreSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 minh, + l_float32 minaf, l_int32 debug); +static l_int32 recogSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 min, + l_float32 minaf, l_int32 *premove, + l_int32 debug); +static void l_showIndicatorSplitValues(NUMA *na1, NUMA *na2, NUMA *na3, + NUMA *na4, NUMA *na5, NUMA *na6); + +/*------------------------------------------------------------------------* + * Identification + *------------------------------------------------------------------------*/ +/*! + * \brief recogIdentifyMultiple() + * + * \param[in] recog with training finished + * \param[in] pixs containing typically a small number of characters + * \param[in] minh remove shorter components; use 0 for default + * \param[in] skipsplit 1 to skip the splitting step + * \param[out] pboxa [optional] locations of identified components + * \param[out] ppixa [optional] images of identified components + * \param[out] ppixdb [optional] debug pix: inputs and best fits + * \param[in] debugsplit 1 returns pix split debugging images + * \return 0 if OK; 1 if nothing is found; 2 for other errors. + * + * <pre> + * Notes: + * (1) This filters the input pixa and calls recogIdentifyPixa() + * (2) Splitting is relatively slow, because it tries to match all + * character templates to all locations. This step can be skipped. + * (3) An attempt is made to order the (optionally) returned images + * and boxes in 2-dimensional sorted order. These can then + * be used to aggregate identified characters into numbers or words. + * One typically wants the pixa, which contains a boxa of the + * extracted subimages. + * </pre> + */ +l_ok +recogIdentifyMultiple(L_RECOG *recog, + PIX *pixs, + l_int32 minh, + l_int32 skipsplit, + BOXA **pboxa, + PIXA **ppixa, + PIX **ppixdb, + l_int32 debugsplit) +{ +l_int32 n; +BOXA *boxa; +PIX *pixb; +PIXA *pixa; + + if (pboxa) *pboxa = NULL; + if (ppixa) *ppixa = NULL; + if (ppixdb) *ppixdb = NULL; + if (!recog) + return ERROR_INT("recog not defined", __func__, 2); + if (!recog->train_done) + return ERROR_INT("training not finished", __func__, 2); + if (!pixs) + return ERROR_INT("pixs not defined", __func__, 2); + + /* Binarize if necessary */ + if (pixGetDepth(pixs) > 1) + pixb = pixConvertTo1(pixs, recog->threshold); + else + pixb = pixClone(pixs); + + /* Noise removal and splitting of touching characters */ + recogSplitIntoCharacters(recog, pixb, minh, skipsplit, &boxa, &pixa, + debugsplit); + pixDestroy(&pixb); + if (!pixa || (n = pixaGetCount(pixa)) == 0) { + pixaDestroy(&pixa); + boxaDestroy(&boxa); + L_WARNING("nothing found\n", __func__); + return 1; + } + + recogIdentifyPixa(recog, pixa, ppixdb); + if (pboxa) + *pboxa = boxa; + else + boxaDestroy(&boxa); + if (ppixa) + *ppixa = pixa; + else + pixaDestroy(&pixa); + return 0; +} + + +/*------------------------------------------------------------------------* + * Segmentation and noise removal * + *------------------------------------------------------------------------*/ +/*! + * \brief recogSplitIntoCharacters() + * + * \param[in] recog + * \param[in] pixs 1 bpp, contains only mostly deskewed text + * \param[in] minh remove shorter components; use 0 for default + * \param[in] skipsplit 1 to skip the splitting step + * \param[out] pboxa character bounding boxes + * \param[out] ppixa character images + * \param[in] debug 1 for results written to pixadb_split + * \return 0 if OK, 1 on error or if no components are returned + * + * <pre> + * Notes: + * (1) This can be given an image that has an arbitrary number + * of text characters. It optionally splits connected + * components based on document image decoding in recogDecode(). + * The returned pixa includes the boxes from which the + * (possibly split) components are extracted. + * (2) After noise filtering, the resulting components are put in + * row-major (2D) order, and the smaller of overlapping + * components are removed if they satisfy conditions of + * relative size and fractional overlap. + * (3) Note that the splitting function uses unscaled templates + * and does not bother returning the class results and scores. + * These are more accurately found later using the scaled templates. + * </pre> + */ +l_ok +recogSplitIntoCharacters(L_RECOG *recog, + PIX *pixs, + l_int32 minh, + l_int32 skipsplit, + BOXA **pboxa, + PIXA **ppixa, + l_int32 debug) +{ +static l_int32 ind = 0; +char buf[32]; +l_int32 i, xoff, yoff, empty, maxw, bw, ncomp, scaling; +BOX *box; +BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxad; +BOXAA *baa; +PIX *pix, *pix1, *pix2, *pix3; +PIXA *pixa; + + lept_mkdir("lept/recog"); + + if (pboxa) *pboxa = NULL; + if (ppixa) *ppixa = NULL; + if (!pboxa || !ppixa) + return ERROR_INT("&boxa and &pixa not defined", __func__, 1); + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + if (!recog->train_done) + return ERROR_INT("training not finished", __func__, 1); + if (!pixs || pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); + if (minh <= 0) minh = DefaultMinHeight; + pixZero(pixs, &empty); + if (empty) return 1; + + /* Small vertical close for consolidation. Don't do a horizontal + * closing, because it might join separate characters. */ + pix1 = pixMorphSequence(pixs, "c1.3", 0); + + /* Carefully filter out noise */ + pix2 = recogPreSplittingFilter(recog, pix1, minh, MinFillFactor, debug); + pixDestroy(&pix1); + + /* Get the 8-connected components to be split/identified */ + boxa1 = pixConnComp(pix2, NULL, 8); + pixDestroy(&pix2); + ncomp = boxaGetCount(boxa1); + if (ncomp == 0) { + boxaDestroy(&boxa1); + L_WARNING("all components removed\n", __func__); + return 1; + } + + /* Save everything and split the large components */ + boxa2 = boxaCreate(ncomp); + maxw = recog->maxwidth_u + 5; + scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE; + pixa = (debug) ? pixaCreate(ncomp) : NULL; + for (i = 0; i < ncomp; i++) { + box = boxaGetBox(boxa1, i, L_CLONE); + boxGetGeometry(box, &xoff, &yoff, &bw, NULL); + /* Treat as one character if it is small, if the images + * have been scaled, or if splitting is not to be run. */ + if (bw <= maxw || scaling || skipsplit) { + boxaAddBox(boxa2, box, L_INSERT); + } else { + pix = pixClipRectangle(pixs, box, NULL); +#if SPLIT_WITH_DID + if (!debug) { + boxa3 = recogDecode(recog, pix, 2, NULL); + } else { + boxa3 = recogDecode(recog, pix, 2, &pix2); + pixaAddPix(pixa, pix2, L_INSERT); + } +#else /* use greedy splitting */ + recogCorrelationBestRow(recog, pix, &boxa3, NULL, NULL, + NULL, debug); + if (debug) { + pix2 = pixConvertTo32(pix); + pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0); + pixaAddPix(pixa, pix2, L_INSERT); + } +#endif /* SPLIT_WITH_DID */ + pixDestroy(&pix); + boxDestroy(&box); + if (!boxa3) { + L_ERROR("boxa3 not found for component %d\n", __func__, i); + } else { + boxa4 = boxaTransform(boxa3, xoff, yoff, 1.0, 1.0); + boxaJoin(boxa2, boxa4, 0, -1); + boxaDestroy(&boxa3); + boxaDestroy(&boxa4); + } + } + } + boxaDestroy(&boxa1); + if (pixa) { /* debug */ + pix3 = pixaDisplayTiledInColumns(pixa, 1, 1.0, 20, 2); + snprintf(buf, sizeof(buf), "/tmp/lept/recog/decode-%d.png", ind++); + pixWrite(buf, pix3, IFF_PNG); + pixaDestroy(&pixa); + pixDestroy(&pix3); + } + + /* Do a 2D sort on the bounding boxes, and flatten the result to 1D. + * For the 2D sort, to add a box to an existing boxa, we require + * specified minimum vertical overlaps for the first two passes + * of the 2D sort. In pass 1, only components with sufficient + * height can start a new boxa. */ + baa = boxaSort2d(boxa2, NULL, MinOverlap1, MinOverlap2, MinHeightPass1); + boxa3 = boxaaFlattenToBoxa(baa, NULL, L_CLONE); + boxaaDestroy(&baa); + boxaDestroy(&boxa2); + + /* Remove smaller components of overlapping pairs. + * We only remove the small component if the overlap is + * at least half its area and if its area is no more + * than 30% of the area of the large component. Because the + * components are in a flattened 2D sort, we don't need to + * look far ahead in the array to find all overlapping boxes; + * 10 boxes is plenty. */ + boxad = boxaHandleOverlaps(boxa3, L_COMBINE, 10, 0.5f, 0.3f, NULL); + boxaDestroy(&boxa3); + + /* Extract and save the image pieces from the input image. */ + *ppixa = pixClipRectangles(pixs, boxad); + *pboxa = boxad; + return 0; +} + + +/*------------------------------------------------------------------------* + * Greedy character splitting * + *------------------------------------------------------------------------*/ +/*! + * \brief recogCorrelationBestRow() + * + * \param[in] recog with LUT's pre-computed + * \param[in] pixs typically of multiple touching characters, 1 bpp + * \param[out] pboxa bounding boxs of best fit character + * \param[out] pnascore [optional] correlation scores + * \param[out] pnaindex [optional] indices of classes + * \param[out] psachar [optional] array of character strings + * \param[in] debug 1 for results written to pixadb_split + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) Supervises character matching for (in general) a c.c with + * multiple touching characters. Finds the best match greedily. + * Rejects small parts that are left over after splitting. + * (2) Matching is to the average, and without character scaling. + * </pre> + */ +l_ok +recogCorrelationBestRow(L_RECOG *recog, + PIX *pixs, + BOXA **pboxa, + NUMA **pnascore, + NUMA **pnaindex, + SARRAY **psachar, + l_int32 debug) +{ +char *charstr; +l_int32 index, remove, w, h, bx, bw, bxc, bwc, w1, w2, w3; +l_float32 score; +BOX *box, *boxc, *boxtrans, *boxl, *boxr, *boxlt, *boxrt; +BOXA *boxat; +NUMA *nascoret, *naindext, *nasort; +PIX *pixb, *pixc, *pixl, *pixr, *pixdb, *pixd; +PIXA *pixar, *pixadb; +SARRAY *sachart; + +l_int32 iter; + + if (pnascore) *pnascore = NULL; + if (pnaindex) *pnaindex = NULL; + if (psachar) *psachar = NULL; + if (!pboxa) + return ERROR_INT("&boxa not defined", __func__, 1); + *pboxa = NULL; + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + if (!pixs || pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); + if (pixGetWidth(pixs) < recog->minwidth_u - 4) + return ERROR_INT("pixs too narrow", __func__, 1); + if (!recog->train_done) + return ERROR_INT("training not finished", __func__, 1); + + /* Binarize and crop to foreground if necessary */ + pixb = recogProcessToIdentify(recog, pixs, 0); + + /* Initialize the arrays */ + boxat = boxaCreate(4); + nascoret = numaCreate(4); + naindext = numaCreate(4); + sachart = sarrayCreate(4); + pixadb = (debug) ? pixaCreate(4) : NULL; + + /* Initialize the images remaining to be processed with the input. + * These are stored in pixar, which is used here as a queue, + * on which we only put image fragments that are large enough to + * contain at least one character. */ + pixar = pixaCreate(1); + pixGetDimensions(pixb, &w, &h, NULL); + box = boxCreate(0, 0, w, h); + pixaAddPix(pixar, pixb, L_INSERT); + pixaAddBox(pixar, box, L_INSERT); + + /* Successively split on the best match until nothing is left. + * To be safe, we limit the search to 10 characters. */ + for (iter = 0; iter < 11; iter++) { + if (pixaGetCount(pixar) == 0) + break; + if (iter == 10) { + L_WARNING("more than 10 chars; ending search\n", __func__); + break; + } + + /* Pop one from the queue */ + pixaRemovePixAndSave(pixar, 0, &pixc, &boxc); + boxGetGeometry(boxc, &bxc, NULL, &bwc, NULL); + + /* This is a single component; if noise, remove it */ + recogSplittingFilter(recog, pixc, 0, MinFillFactor, &remove, debug); + if (debug) + lept_stderr("iter = %d, removed = %d\n", iter, remove); + if (remove) { + pixDestroy(&pixc); + boxDestroy(&boxc); + continue; + } + + /* Find the best character match */ + if (debug) { + recogCorrelationBestChar(recog, pixc, &box, &score, + &index, &charstr, &pixdb); + pixaAddPix(pixadb, pixdb, L_INSERT); + } else { + recogCorrelationBestChar(recog, pixc, &box, &score, + &index, &charstr, NULL); + } + + /* Find the box in original coordinates, and append + * the results to the arrays. */ + boxtrans = boxTransform(box, bxc, 0, 1.0, 1.0); + boxaAddBox(boxat, boxtrans, L_INSERT); + numaAddNumber(nascoret, score); + numaAddNumber(naindext, index); + sarrayAddString(sachart, charstr, L_INSERT); + + /* Split the current pixc into three regions and save + * each region if it is large enough. */ + boxGetGeometry(box, &bx, NULL, &bw, NULL); + w1 = bx; + w2 = bw; + w3 = bwc - bx - bw; + if (debug) + lept_stderr(" w1 = %d, w2 = %d, w3 = %d\n", w1, w2, w3); + if (w1 < recog->minwidth_u - 4) { + if (debug) L_INFO("discarding width %d on left\n", __func__, w1); + } else { /* extract and save left region */ + boxl = boxCreate(0, 0, bx + 1, h); + pixl = pixClipRectangle(pixc, boxl, NULL); + boxlt = boxTransform(boxl, bxc, 0, 1.0, 1.0); + pixaAddPix(pixar, pixl, L_INSERT); + pixaAddBox(pixar, boxlt, L_INSERT); + boxDestroy(&boxl); + } + if (w3 < recog->minwidth_u - 4) { + if (debug) L_INFO("discarding width %d on right\n", __func__, w3); + } else { /* extract and save left region */ + boxr = boxCreate(bx + bw - 1, 0, w3 + 1, h); + pixr = pixClipRectangle(pixc, boxr, NULL); + boxrt = boxTransform(boxr, bxc, 0, 1.0, 1.0); + pixaAddPix(pixar, pixr, L_INSERT); + pixaAddBox(pixar, boxrt, L_INSERT); + boxDestroy(&boxr); + } + pixDestroy(&pixc); + boxDestroy(&box); + boxDestroy(&boxc); + } + pixaDestroy(&pixar); + + + /* Sort the output results by left-to-right in the boxa */ + *pboxa = boxaSort(boxat, L_SORT_BY_X, L_SORT_INCREASING, &nasort); + if (pnascore) + *pnascore = numaSortByIndex(nascoret, nasort); + if (pnaindex) + *pnaindex = numaSortByIndex(naindext, nasort); + if (psachar) + *psachar = sarraySortByIndex(sachart, nasort); + numaDestroy(&nasort); + boxaDestroy(&boxat); + numaDestroy(&nascoret); + numaDestroy(&naindext); + sarrayDestroy(&sachart); + + /* Final debug output */ + if (debug) { + pixd = pixaDisplayTiledInRows(pixadb, 32, 2000, 1.0, 0, 15, 2); + pixDisplay(pixd, 400, 400); + pixaAddPix(recog->pixadb_split, pixd, L_INSERT); + pixaDestroy(&pixadb); + } + return 0; +} + + +/*! + * \brief recogCorrelationBestChar() + * + * \param[in] recog with LUT's pre-computed + * \param[in] pixs can be of multiple touching characters, 1 bpp + * \param[out] pbox bounding box of best fit character + * \param[out] pscore correlation score + * \param[out] pindex [optional] index of class + * \param[out] pcharstr [optional] character string of class + * \param[out] ppixdb [optional] debug pix showing input and best fit + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) Basic matching character splitter. Finds the best match among + * all templates to some region of the image. This can result + * in splitting the image into two parts. This is "image decoding" + * without dynamic programming, because we don't use a setwidth + * and compute the best matching score for the entire image. + * (2) Matching is to the average templates, without character scaling. + * </pre> + */ +l_ok +recogCorrelationBestChar(L_RECOG *recog, + PIX *pixs, + BOX **pbox, + l_float32 *pscore, + l_int32 *pindex, + char **pcharstr, + PIX **ppixdb) +{ +l_int32 i, n, w1, h1, w2, area2, ycent2, delx, dely; +l_int32 bestdelx, bestdely, bestindex; +l_float32 score, bestscore; +BOX *box; +BOXA *boxa; +NUMA *nasum, *namoment; +PIX *pix1, *pix2; + + if (pindex) *pindex = 0; + if (pcharstr) *pcharstr = NULL; + if (ppixdb) *ppixdb = NULL; + if (pbox) *pbox = NULL; + if (pscore) *pscore = 0.0; + if (!pbox || !pscore) + return ERROR_INT("&box and &score not both defined", __func__, 1); + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + if (!pixs || pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); + if (!recog->train_done) + return ERROR_INT("training not finished", __func__, 1); + + /* Binarize and crop to foreground if necessary. Add padding + * to both the left and right side; this is compensated for + * when reporting the bounding box of the best matched character. */ + pix1 = recogProcessToIdentify(recog, pixs, LeftRightPadding); + pixGetDimensions(pix1, &w1, &h1, NULL); + + /* Compute vertical sum and moment arrays */ + nasum = pixCountPixelsByColumn(pix1); + namoment = pixGetMomentByColumn(pix1, 1); + + /* Do shifted correlation against all averaged templates. */ + n = recog->setsize; + boxa = boxaCreate(n); /* location of best fits for each character */ + bestscore = 0.0; + bestindex = bestdelx = bestdely = 0; + for (i = 0; i < n; i++) { + pix2 = pixaGetPix(recog->pixa_u, i, L_CLONE); + w2 = pixGetWidth(pix2); + /* Note that the slightly expended w1 is typically larger + * than w2 (the template). */ + if (w1 >= w2) { + numaGetIValue(recog->nasum_u, i, &area2); + ptaGetIPt(recog->pta_u, i, NULL, &ycent2); + pixCorrelationBestShift(pix1, pix2, nasum, namoment, area2, ycent2, + recog->maxyshift, recog->sumtab, &delx, + &dely, &score, 1); + if (ppixdb) { + lept_stderr( + "Best match template %d: (x,y) = (%d,%d), score = %5.3f\n", + i, delx, dely, score); + } + /* Compensate for padding */ + box = boxCreate(delx - LeftRightPadding, 0, w2, h1); + if (score > bestscore) { + bestscore = score; + bestdelx = delx - LeftRightPadding; + bestdely = dely; + bestindex = i; + } + } else { + box = boxCreate(0, 0, 1, 1); /* placeholder */ + if (ppixdb) + lept_stderr("Component too thin: w1 = %d, w2 = %d\n", w1, w2); + } + boxaAddBox(boxa, box, L_INSERT); + pixDestroy(&pix2); + } + + *pscore = bestscore; + *pbox = boxaGetBox(boxa, bestindex, L_COPY); + if (pindex) *pindex = bestindex; + if (pcharstr) + recogGetClassString(recog, bestindex, pcharstr); + + if (ppixdb) { + L_INFO("Best match: class %d; shifts (%d, %d)\n", + __func__, bestindex, bestdelx, bestdely); + pix2 = pixaGetPix(recog->pixa_u, bestindex, L_CLONE); + *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0); + pixDestroy(&pix2); + } + + pixDestroy(&pix1); + boxaDestroy(&boxa); + numaDestroy(&nasum); + numaDestroy(&namoment); + return 0; +} + + +/*! + * \brief pixCorrelationBestShift() + * + * \param[in] pix1 1 bpp, the unknown image; typically larger + * \param[in] pix2 1 bpp, the matching template image) + * \param[in] nasum1 vertical column pixel sums for pix1 + * \param[in] namoment1 vertical column first moment of pixels for pix1 + * \param[in] area2 number of on pixels in pix2 + * \param[in] ycent2 y component of centroid of pix2 + * \param[in] maxyshift max y shift of pix2 around the location where + * the centroids of pix2 and a windowed part of pix1 + * are vertically aligned + * \param[in] tab8 [optional] sum tab for ON pixels in byte; + * can be NULL + * \param[out] pdelx [optional] best x shift of pix2 relative to pix1 + * \param[out] pdely [optional] best y shift of pix2 relative to pix1 + * \param[out] pscore [optional] maximum score found; can be NULL + * \param[in] debugflag <= 0 to skip; positive to generate output; + * the integer is used to label the debug image. + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This maximizes the correlation score between two 1 bpp images, + * one of which is typically wider. In a typical example, + * pix1 is a bitmap of 2 or more touching characters and pix2 is + * a single character template. This finds the location of pix2 + * that gives the largest correlation. + * (2) The windowed area of fg pixels and windowed first moment + * in the y direction are computed from the input sum and moment + * column arrays, %nasum1 and %namoment1 + * (3) This is a brute force operation. We compute the correlation + * at every x shift for which pix2 fits entirely within pix1, + * and where the centroid of pix2 is aligned, within +-maxyshift, + * with the centroid of a window of pix1 of the same width. + * The correlation is taken over the full height of pix1. + * This can be made more efficient. + * </pre> + */ +static l_int32 +pixCorrelationBestShift(PIX *pix1, + PIX *pix2, + NUMA *nasum1, + NUMA *namoment1, + l_int32 area2, + l_int32 ycent2, + l_int32 maxyshift, + l_int32 *tab8, + l_int32 *pdelx, + l_int32 *pdely, + l_float32 *pscore, + l_int32 debugflag) +{ +l_int32 w1, w2, h1, h2, i, j, nx, shifty, delx, dely; +l_int32 sum, moment, count; +l_int32 *tab, *area1, *arraysum, *arraymoment; +l_float32 maxscore, score; +l_float32 *ycent1; +FPIX *fpix = NULL; +PIX *pixt, *pixt1, *pixt2; + + if (pdelx) *pdelx = 0; + if (pdely) *pdely = 0; + if (pscore) *pscore = 0.0; + if (!pix1 || pixGetDepth(pix1) != 1) + return ERROR_INT("pix1 not defined or not 1 bpp", __func__, 1); + if (!pix2 || pixGetDepth(pix2) != 1) + return ERROR_INT("pix2 not defined or not 1 bpp", __func__, 1); + if (!nasum1 || !namoment1) + return ERROR_INT("nasum1 and namoment1 not both defined", __func__, 1); + if (area2 <= 0 || ycent2 <= 0) + return ERROR_INT("area2 and ycent2 must be > 0", __func__, 1); + + /* If pix1 (the unknown image) is narrower than pix2, + * don't bother to try the match. pix1 is already padded with + * 2 pixels on each side. */ + pixGetDimensions(pix1, &w1, &h1, NULL); + pixGetDimensions(pix2, &w2, &h2, NULL); + if (w1 < w2) { + if (debugflag > 0) { + L_INFO("skipping match with w1 = %d and w2 = %d\n", + __func__, w1, w2); + } + return 0; + } + nx = w1 - w2 + 1; + + if (debugflag > 0) + fpix = fpixCreate(nx, 2 * maxyshift + 1); + if (!tab8) + tab = makePixelSumTab8(); + else + tab = tab8; + + /* Set up the arrays for area1 and ycent1. We have to do this + * for each template (pix2) because the window width is w2. */ + area1 = (l_int32 *)LEPT_CALLOC(nx, sizeof(l_int32)); + ycent1 = (l_float32 *)LEPT_CALLOC(nx, sizeof(l_int32)); + arraysum = numaGetIArray(nasum1); + arraymoment = numaGetIArray(namoment1); + for (i = 0, sum = 0, moment = 0; i < w2; i++) { + sum += arraysum[i]; + moment += arraymoment[i]; + } + for (i = 0; i < nx - 1; i++) { + area1[i] = sum; + ycent1[i] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; + sum += arraysum[w2 + i] - arraysum[i]; + moment += arraymoment[w2 + i] - arraymoment[i]; + } + area1[nx - 1] = sum; + ycent1[nx - 1] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; + + /* Find the best match location for pix2. At each location, + * to insure that pixels are ON only within the intersection of + * pix and the shifted pix2: + * (1) Start with pixt cleared and equal in size to pix1. + * (2) Blit the shifted pix2 onto pixt. Then all ON pixels + * are within the intersection of pix1 and the shifted pix2. + * (3) AND pix1 with pixt. */ + pixt = pixCreate(w2, h1, 1); + maxscore = 0; + delx = 0; + dely = 0; /* amount to shift pix2 relative to pix1 to get alignment */ + for (i = 0; i < nx; i++) { + shifty = (l_int32)(ycent1[i] - ycent2 + 0.5); + for (j = -maxyshift; j <= maxyshift; j++) { + pixClearAll(pixt); + pixRasterop(pixt, 0, shifty + j, w2, h2, PIX_SRC, pix2, 0, 0); + pixRasterop(pixt, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, i, 0); + pixCountPixels(pixt, &count, tab); + score = (l_float32)count * (l_float32)count / + ((l_float32)area1[i] * (l_float32)area2); + if (score > maxscore) { + maxscore = score; + delx = i; + dely = shifty + j; + } + + if (debugflag > 0) + fpixSetPixel(fpix, i, maxyshift + j, 1000.0 * score); + } + } + + if (debugflag > 0) { + char buf[128]; + lept_mkdir("lept/recog"); + pixt1 = fpixDisplayMaxDynamicRange(fpix); + pixt2 = pixExpandReplicate(pixt1, 5); + snprintf(buf, sizeof(buf), "/tmp/lept/recog/junkbs_%d.png", debugflag); + pixWrite(buf, pixt2, IFF_PNG); + pixDestroy(&pixt1); + pixDestroy(&pixt2); + fpixDestroy(&fpix); + } + + if (pdelx) *pdelx = delx; + if (pdely) *pdely = dely; + if (pscore) *pscore = maxscore; + if (!tab8) LEPT_FREE(tab); + LEPT_FREE(area1); + LEPT_FREE(ycent1); + LEPT_FREE(arraysum); + LEPT_FREE(arraymoment); + pixDestroy(&pixt); + return 0; +} + + +/*------------------------------------------------------------------------* + * Low-level identification * + *------------------------------------------------------------------------*/ +/*! + * \brief recogIdentifyPixa() + * + * \param[in] recog + * \param[in] pixa of 1 bpp images to match + * \param[out] ppixdb [optional] pix showing inputs and best fits + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This should be called by recogIdentifyMuliple(), which + * binarizes and splits characters before sending %pixa here. + * (2) This calls recogIdentifyPix(), which does the same operation + * on each pix in %pixa, and optionally returns the arrays + * of results (scores, class index and character string) + * for the best correlation match. + * </pre> + */ +l_ok +recogIdentifyPixa(L_RECOG *recog, + PIXA *pixa, + PIX **ppixdb) +{ +char *text; +l_int32 i, n, fail, index, depth; +l_float32 score; +PIX *pix1, *pix2, *pix3; +PIXA *pixa1; +L_RCH *rch; + + if (ppixdb) *ppixdb = NULL; + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + if (!pixa) + return ERROR_INT("pixa not defined", __func__, 1); + + /* Run the recognizer on the set of images. This writes + * the text string into each pix in pixa. */ + n = pixaGetCount(pixa); + rchaDestroy(&recog->rcha); + recog->rcha = rchaCreate(); + pixa1 = (ppixdb) ? pixaCreate(n) : NULL; + depth = 1; + for (i = 0; i < n; i++) { + pix1 = pixaGetPix(pixa, i, L_CLONE); + pix2 = NULL; + fail = FALSE; + if (!ppixdb) + fail = recogIdentifyPix(recog, pix1, NULL); + else + fail = recogIdentifyPix(recog, pix1, &pix2); + if (fail) + recogSkipIdentify(recog); + if ((rch = recog->rch) == NULL) { + L_ERROR("rch not found for char %d\n", __func__, i); + pixDestroy(&pix1); + pixDestroy(&pix2); + continue; + } + rchExtract(rch, NULL, NULL, &text, NULL, NULL, NULL, NULL); + pixSetText(pix1, text); + LEPT_FREE(text); + if (ppixdb) { + rchExtract(rch, &index, &score, NULL, NULL, NULL, NULL, NULL); + pix3 = recogShowMatch(recog, pix2, NULL, NULL, index, score); + if (i == 0) depth = pixGetDepth(pix3); + pixaAddPix(pixa1, pix3, L_INSERT); + pixDestroy(&pix2); + } + transferRchToRcha(rch, recog->rcha); + pixDestroy(&pix1); + } + + /* Package the images for debug */ + if (ppixdb) { + *ppixdb = pixaDisplayTiledInRows(pixa1, depth, 2500, 1.0, 0, 20, 1); + pixaDestroy(&pixa1); + } + + return 0; +} + + +/*! + * \brief recogIdentifyPix() + * + * \param[in] recog with LUT's pre-computed + * \param[in] pixs of a single character, 1 bpp + * \param[out] ppixdb [optional] debug pix showing input and best fit + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) Basic recognition function for a single character. + * (2) If templ_use == L_USE_ALL_TEMPLATES, which is the default + * situation, matching is attempted to every bitmap in the recog, + * and the identify of the best match is returned. + * (3) For finding outliers, templ_use == L_USE_AVERAGE_TEMPLATES, and + * matching is only attemplted to the averaged bitmaps. For this + * case, the index of the bestsample is meaningless (0 is returned + * if requested). + * (4) The score is related to the confidence (probability of correct + * identification), in that a higher score is correlated with + * a higher probability. However, the actual relation between + * the correlation (score) and the probability is not known; + * we call this a "score" because "confidence" can be misinterpreted + * as an actual probability. + * </pre> + */ +l_ok +recogIdentifyPix(L_RECOG *recog, + PIX *pixs, + PIX **ppixdb) +{ +char *text; +l_int32 i, j, n, bestindex, bestsample, area1, area2, ret; +l_int32 shiftx, shifty, bestdelx, bestdely, bestwidth, maxyshift; +l_float32 x1, y1, x2, y2, delx, dely, score, maxscore; +NUMA *numa; +PIX *pix0, *pix1, *pix2; +PIXA *pixa; +PTA *pta; + + if (ppixdb) *ppixdb = NULL; + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + if (!pixs || pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); + + /* Do the averaging if required and not yet done. */ + if (recog->templ_use == L_USE_AVERAGE_TEMPLATES && !recog->ave_done) { + ret = recogAverageSamples(recog, 0); + if (ret) + return ERROR_INT("averaging failed", __func__, 1); + } + + /* Binarize and crop to foreground if necessary */ + if ((pix0 = recogProcessToIdentify(recog, pixs, 0)) == NULL) + return ERROR_INT("no fg pixels in pix0", __func__, 1); + + /* Optionally scale and/or convert to fixed stroke width */ + pix1 = recogModifyTemplate(recog, pix0); + pixDestroy(&pix0); + if (!pix1) + return ERROR_INT("no fg pixels in pix1", __func__, 1); + + /* Do correlation at all positions within +-maxyshift of + * the nominal centroid alignment. */ + pixCountPixels(pix1, &area1, recog->sumtab); + pixCentroid(pix1, recog->centtab, recog->sumtab, &x1, &y1); + bestindex = bestsample = bestdelx = bestdely = bestwidth = 0; + maxscore = 0.0; + maxyshift = recog->maxyshift; + if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) { + for (i = 0; i < recog->setsize; i++) { + numaGetIValue(recog->nasum, i, &area2); + if (area2 == 0) continue; /* no template available */ + pix2 = pixaGetPix(recog->pixa, i, L_CLONE); + ptaGetPt(recog->pta, i, &x2, &y2); + delx = x1 - x2; + dely = y1 - y2; + for (shifty = -maxyshift; shifty <= maxyshift; shifty++) { + for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) { + pixCorrelationScoreSimple(pix1, pix2, area1, area2, + delx + shiftx, dely + shifty, + 5, 5, recog->sumtab, &score); + if (score > maxscore) { + bestindex = i; + bestdelx = delx + shiftx; + bestdely = dely + shifty; + maxscore = score; + } + } + } + pixDestroy(&pix2); + } + } else { /* use all the samples */ + for (i = 0; i < recog->setsize; i++) { + pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE); + n = pixaGetCount(pixa); + if (n == 0) { + pixaDestroy(&pixa); + continue; + } + numa = numaaGetNuma(recog->naasum, i, L_CLONE); + pta = ptaaGetPta(recog->ptaa, i, L_CLONE); + for (j = 0; j < n; j++) { + pix2 = pixaGetPix(pixa, j, L_CLONE); + numaGetIValue(numa, j, &area2); + ptaGetPt(pta, j, &x2, &y2); + delx = x1 - x2; + dely = y1 - y2; + for (shifty = -maxyshift; shifty <= maxyshift; shifty++) { + for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) { + pixCorrelationScoreSimple(pix1, pix2, area1, area2, + delx + shiftx, dely + shifty, + 5, 5, recog->sumtab, &score); + if (score > maxscore) { + bestindex = i; + bestsample = j; + bestdelx = delx + shiftx; + bestdely = dely + shifty; + maxscore = score; + bestwidth = pixGetWidth(pix2); + } + } + } + pixDestroy(&pix2); + } + pixaDestroy(&pixa); + numaDestroy(&numa); + ptaDestroy(&pta); + } + } + + /* Package up the results */ + recogGetClassString(recog, bestindex, &text); + rchDestroy(&recog->rch); + recog->rch = rchCreate(bestindex, maxscore, text, bestsample, + bestdelx, bestdely, bestwidth); + + if (ppixdb) { + if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) { + L_INFO("Best match: str %s; class %d; sh (%d, %d); score %5.3f\n", + __func__, text, bestindex, bestdelx, bestdely, maxscore); + pix2 = pixaGetPix(recog->pixa, bestindex, L_CLONE); + } else { /* L_USE_ALL_TEMPLATES */ + L_INFO("Best match: str %s; sample %d in class %d; score %5.3f\n", + __func__, text, bestsample, bestindex, maxscore); + if (maxyshift > 0 && (L_ABS(bestdelx) > 0 || L_ABS(bestdely) > 0)) { + L_INFO(" Best shift: (%d, %d)\n", + __func__, bestdelx, bestdely); + } + pix2 = pixaaGetPix(recog->pixaa, bestindex, bestsample, L_CLONE); + } + *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0); + pixDestroy(&pix2); + } + + pixDestroy(&pix1); + return 0; +} + + +/*! + * \brief recogSkipIdentify() + * + * \param[in] recog + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This just writes a "dummy" result with 0 score and empty + * string id into the rch. + * </pre> + */ +l_ok +recogSkipIdentify(L_RECOG *recog) +{ + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + + /* Package up placeholder results */ + rchDestroy(&recog->rch); + recog->rch = rchCreate(0, 0.0, stringNew(""), 0, 0, 0, 0); + return 0; +} + + +/*------------------------------------------------------------------------* + * Operations for handling identification results * + *------------------------------------------------------------------------*/ +/*! + * \brief rchaCreate() + * + * Return: 0 if OK, 1 on error + * + * Notes: + * (1) Be sure to destroy any existing rcha before assigning this. + */ +static L_RCHA * +rchaCreate() +{ +L_RCHA *rcha; + + rcha = (L_RCHA *)LEPT_CALLOC(1, sizeof(L_RCHA)); + rcha->naindex = numaCreate(0); + rcha->nascore = numaCreate(0); + rcha->satext = sarrayCreate(0); + rcha->nasample = numaCreate(0); + rcha->naxloc = numaCreate(0); + rcha->nayloc = numaCreate(0); + rcha->nawidth = numaCreate(0); + return rcha; +} + + +/*! + * \brief rchaDestroy() + * + * \param[in,out] prcha to be nulled + */ +void +rchaDestroy(L_RCHA **prcha) +{ +L_RCHA *rcha; + + if (prcha == NULL) { + L_WARNING("&rcha is null!\n", __func__); + return; + } + if ((rcha = *prcha) == NULL) + return; + + numaDestroy(&rcha->naindex); + numaDestroy(&rcha->nascore); + sarrayDestroy(&rcha->satext); + numaDestroy(&rcha->nasample); + numaDestroy(&rcha->naxloc); + numaDestroy(&rcha->nayloc); + numaDestroy(&rcha->nawidth); + LEPT_FREE(rcha); + *prcha = NULL; +} + + +/*! + * \brief rchCreate() + * + * \param[in] index index of best template + * \param[in] score correlation score of best template + * \param[in] text character string of best template + * \param[in] sample index of best sample; -1 if averages are used + * \param[in] xloc x-location of template: delx + shiftx + * \param[in] yloc y-location of template: dely + shifty + * \param[in] width width of best template + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) Be sure to destroy any existing rch before assigning this. + * (2) This stores the text string, not a copy of it, so the + * caller must not destroy the string. + * </pre> + */ +static L_RCH * +rchCreate(l_int32 index, + l_float32 score, + char *text, + l_int32 sample, + l_int32 xloc, + l_int32 yloc, + l_int32 width) +{ +L_RCH *rch; + + rch = (L_RCH *)LEPT_CALLOC(1, sizeof(L_RCH)); + rch->index = index; + rch->score = score; + rch->text = text; + rch->sample = sample; + rch->xloc = xloc; + rch->yloc = yloc; + rch->width = width; + return rch; +} + + +/*! + * \brief rchDestroy() + * + * \param[in,out] prch to be nulled + */ +void +rchDestroy(L_RCH **prch) +{ +L_RCH *rch; + + if (prch == NULL) { + L_WARNING("&rch is null!\n", __func__); + return; + } + if ((rch = *prch) == NULL) + return; + LEPT_FREE(rch->text); + LEPT_FREE(rch); + *prch = NULL; +} + + +/*! + * \brief rchaExtract() + * + * \param[in] rcha + * \param[out] pnaindex [optional] indices of best templates + * \param[out] pnascore [optional] correl scores of best templates + * \param[out] psatext [optional] character strings of best templates + * \param[out] pnasample [optional] indices of best samples + * \param[out] pnaxloc [optional] x-locations of templates + * \param[out] pnayloc [optional] y-locations of templates + * \param[out] pnawidth [optional] widths of best templates + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This returns clones of the number and string arrays. They must + * be destroyed by the caller. + * </pre> + */ +l_ok +rchaExtract(L_RCHA *rcha, + NUMA **pnaindex, + NUMA **pnascore, + SARRAY **psatext, + NUMA **pnasample, + NUMA **pnaxloc, + NUMA **pnayloc, + NUMA **pnawidth) +{ + if (pnaindex) *pnaindex = NULL; + if (pnascore) *pnascore = NULL; + if (psatext) *psatext = NULL; + if (pnasample) *pnasample = NULL; + if (pnaxloc) *pnaxloc = NULL; + if (pnayloc) *pnayloc = NULL; + if (pnawidth) *pnawidth = NULL; + if (!rcha) + return ERROR_INT("rcha not defined", __func__, 1); + + if (pnaindex) *pnaindex = numaClone(rcha->naindex); + if (pnascore) *pnascore = numaClone(rcha->nascore); + if (psatext) *psatext = sarrayClone(rcha->satext); + if (pnasample) *pnasample = numaClone(rcha->nasample); + if (pnaxloc) *pnaxloc = numaClone(rcha->naxloc); + if (pnayloc) *pnayloc = numaClone(rcha->nayloc); + if (pnawidth) *pnawidth = numaClone(rcha->nawidth); + return 0; +} + + +/*! + * \brief rchExtract() + * + * \param[in] rch + * \param[out] pindex [optional] index of best template + * \param[out] pscore [optional] correlation score of best template + * \param[out] ptext [optional] character string of best template + * \param[out] psample [optional] index of best sample + * \param[out] pxloc [optional] x-location of template + * \param[out] pyloc [optional] y-location of template + * \param[out] pwidth [optional] width of best template + * \return 0 if OK, 1 on error + */ +l_ok +rchExtract(L_RCH *rch, + l_int32 *pindex, + l_float32 *pscore, + char **ptext, + l_int32 *psample, + l_int32 *pxloc, + l_int32 *pyloc, + l_int32 *pwidth) +{ + if (pindex) *pindex = 0; + if (pscore) *pscore = 0.0; + if (ptext) *ptext = NULL; + if (psample) *psample = 0; + if (pxloc) *pxloc = 0; + if (pyloc) *pyloc = 0; + if (pwidth) *pwidth = 0; + if (!rch) + return ERROR_INT("rch not defined", __func__, 1); + + if (pindex) *pindex = rch->index; + if (pscore) *pscore = rch->score; + if (ptext) *ptext = stringNew(rch->text); /* new string: owned by caller */ + if (psample) *psample = rch->sample; + if (pxloc) *pxloc = rch->xloc; + if (pyloc) *pyloc = rch->yloc; + if (pwidth) *pwidth = rch->width; + return 0; +} + + +/*! + * \brief transferRchToRcha() + * + * \param[in] rch source of data + * \param[in] rcha append to arrays in this destination + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This is used to transfer the results of a single character + * identification to an rcha array for the array of characters. + * </pre> + */ +static l_int32 +transferRchToRcha(L_RCH *rch, + L_RCHA *rcha) +{ + + if (!rch) + return ERROR_INT("rch not defined", __func__, 1); + if (!rcha) + return ERROR_INT("rcha not defined", __func__, 1); + + numaAddNumber(rcha->naindex, rch->index); + numaAddNumber(rcha->nascore, rch->score); + sarrayAddString(rcha->satext, rch->text, L_COPY); + numaAddNumber(rcha->nasample, rch->sample); + numaAddNumber(rcha->naxloc, rch->xloc); + numaAddNumber(rcha->nayloc, rch->yloc); + numaAddNumber(rcha->nawidth, rch->width); + return 0; +} + + +/*------------------------------------------------------------------------* + * Preprocessing and filtering * + *------------------------------------------------------------------------*/ +/*! + * \brief recogProcessToIdentify() + * + * \param[in] recog with LUT's pre-computed + * \param[in] pixs typ. single character, possibly d > 1 and uncropped + * \param[in] pad extra pixels added to left and right sides + * \return pixd 1 bpp, clipped to foreground, or NULL if there + * are no fg pixels or on error. + * + * <pre> + * Notes: + * (1) This is a lightweight operation to insure that the input + * image is 1 bpp, properly cropped, and padded on each side. + * If bpp > 1, the image is thresholded. + * </pre> + */ +PIX * +recogProcessToIdentify(L_RECOG *recog, + PIX *pixs, + l_int32 pad) +{ +l_int32 canclip; +PIX *pix1, *pix2, *pixd; + + if (!recog) + return (PIX *)ERROR_PTR("recog not defined", __func__, NULL); + if (!pixs) + return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); + + if (pixGetDepth(pixs) != 1) + pix1 = pixThresholdToBinary(pixs, recog->threshold); + else + pix1 = pixClone(pixs); + pixTestClipToForeground(pix1, &canclip); + if (canclip) + pixClipToForeground(pix1, &pix2, NULL); + else + pix2 = pixClone(pix1); + pixDestroy(&pix1); + if (!pix2) + return (PIX *)ERROR_PTR("no foreground pixels", __func__, NULL); + + pixd = pixAddBorderGeneral(pix2, pad, pad, 0, 0, 0); + pixDestroy(&pix2); + return pixd; +} + + +/*! + * \brief recogPreSplittingFilter() + * + * \param[in] recog + * \param[in] pixs 1 bpp, many connected components + * \param[in] minh minimum height of components to be retained + * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained + * \param[in] debug 1 to output indicator arrays + * \return pixd with filtered components removed or NULL on error + */ +static PIX * +recogPreSplittingFilter(L_RECOG *recog, + PIX *pixs, + l_int32 minh, + l_float32 minaf, + l_int32 debug) +{ +l_int32 scaling, minsplitw, maxsplith, maxasp; +BOXA *boxas; +NUMA *naw, *nah, *na1, *na1c, *na2, *na3, *na4, *na5, *na6, *na7; +PIX *pixd; +PIXA *pixas; + + if (!recog) + return (PIX *)ERROR_PTR("recog not defined", __func__, NULL); + if (!pixs) + return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); + + /* If there is scaling, do not remove components based on the + * values of min_splitw and max_splith. */ + scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE; + minsplitw = (scaling) ? 1 : recog->min_splitw - 3; + maxsplith = (scaling) ? 150 : recog->max_splith; + maxasp = recog->max_wh_ratio; + + /* Generate an indicator array of connected components to remove: + * short stuff + * tall stuff + * components with large width/height ratio + * components with small area fill fraction */ + boxas = pixConnComp(pixs, &pixas, 8); + pixaFindDimensions(pixas, &naw, &nah); + na1 = numaMakeThresholdIndicator(naw, minsplitw, L_SELECT_IF_LT); + na1c = numaCopy(na1); + na2 = numaMakeThresholdIndicator(nah, minh, L_SELECT_IF_LT); + na3 = numaMakeThresholdIndicator(nah, maxsplith, L_SELECT_IF_GT); + na4 = pixaFindWidthHeightRatio(pixas); + na5 = numaMakeThresholdIndicator(na4, maxasp, L_SELECT_IF_GT); + na6 = pixaFindAreaFraction(pixas); + na7 = numaMakeThresholdIndicator(na6, minaf, L_SELECT_IF_LT); + numaLogicalOp(na1, na1, na2, L_UNION); + numaLogicalOp(na1, na1, na3, L_UNION); + numaLogicalOp(na1, na1, na5, L_UNION); + numaLogicalOp(na1, na1, na7, L_UNION); + pixd = pixCopy(NULL, pixs); + pixRemoveWithIndicator(pixd, pixas, na1); + if (debug) + l_showIndicatorSplitValues(na1c, na2, na3, na5, na7, na1); + numaDestroy(&naw); + numaDestroy(&nah); + numaDestroy(&na1); + numaDestroy(&na1c); + numaDestroy(&na2); + numaDestroy(&na3); + numaDestroy(&na4); + numaDestroy(&na5); + numaDestroy(&na6); + numaDestroy(&na7); + boxaDestroy(&boxas); + pixaDestroy(&pixas); + return pixd; +} + + +/*! + * \brief recogSplittingFilter() + * + * \param[in] recog + * \param[in] pixs 1 bpp, single connected component + * \param[in] minh minimum height of component; 0 for default + * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained + * \param[out] premove 0 to save, 1 to remove + * \param[in] debug 1 to output indicator arrays + * \return 0 if OK, 1 on error + */ +static l_int32 +recogSplittingFilter(L_RECOG *recog, + PIX *pixs, + l_int32 minh, + l_float32 minaf, + l_int32 *premove, + l_int32 debug) +{ +l_int32 w, h; +l_float32 aspratio, fract; + + if (!premove) + return ERROR_INT("&remove not defined", __func__, 1); + *premove = 0; + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + if (!pixs) + return ERROR_INT("pixs not defined", __func__, 1); + if (minh <= 0) minh = DefaultMinHeight; + + /* Remove from further consideration: + * small stuff + * components with large width/height ratio + * components with small area fill fraction */ + pixGetDimensions(pixs, &w, &h, NULL); + if (w < recog->min_splitw) { + if (debug) L_INFO("w = %d < %d\n", __func__, w, recog->min_splitw); + *premove = 1; + return 0; + } + if (h < minh) { + if (debug) L_INFO("h = %d < %d\n", __func__, h, minh); + *premove = 1; + return 0; + } + aspratio = (l_float32)w / (l_float32)h; + if (aspratio > recog->max_wh_ratio) { + if (debug) L_INFO("w/h = %5.3f too large\n", __func__, aspratio); + *premove = 1; + return 0; + } + pixFindAreaFraction(pixs, recog->sumtab, &fract); + if (fract < minaf) { + if (debug) L_INFO("area fill fract %5.3f < %5.3f\n", + __func__, fract, minaf); + *premove = 1; + return 0; + } + + return 0; +} + + +/*------------------------------------------------------------------------* + * Postprocessing * + *------------------------------------------------------------------------*/ +/*! + * \brief recogExtractNumbers() + * + * \param[in] recog + * \param[in] boxas location of components + * \param[in] scorethresh min score for which we accept a component + * \param[in] spacethresh max horizontal distance allowed between digits; + * use -1 for default + * \param[out] pbaa [optional] bounding boxes of identified numbers + * \param[out] pnaa [optional] scores of identified digits + * \return sa of identified numbers, or NULL on error + * + * <pre> + * Notes: + * (1) This extracts digit data after recogaIdentifyMultiple() or + * lower-level identification has taken place. + * (2) Each string in the returned sa contains a sequence of ascii + * digits in a number. + * (3) The horizontal distance between boxes (limited by %spacethresh) + * is the negative of the horizontal overlap. + * (4) Components with a score less than %scorethresh, which may + * be hyphens or other small characters, will signal the + * end of the current sequence of digits in the number. A typical + * value for %scorethresh is 0.60. + * (5) We allow two digits to be combined if these conditions apply: + * (a) the first is to the left of the second + * (b) the second has a horizontal separation less than %spacethresh + * (c) the vertical overlap >= 0 (vertical separation < 0) + * (d) both have a score that exceeds %scorethresh + * (6) Each numa in the optionally returned naa contains the digit + * scores of a number. Each boxa in the optionally returned baa + * contains the bounding boxes of the digits in the number. + * </pre> + */ +SARRAY * +recogExtractNumbers(L_RECOG *recog, + BOXA *boxas, + l_float32 scorethresh, + l_int32 spacethresh, + BOXAA **pbaa, + NUMAA **pnaa) +{ +char *str, *text; +l_int32 i, n, x1, x2, h_ovl, v_ovl, h_sep, v_sep; +l_float32 score; +BOX *box, *prebox; +BOXA *ba = NULL; +BOXAA *baa; +NUMA *nascore, *na = NULL; +NUMAA *naa; +SARRAY *satext, *sa = NULL, *saout; + + if (pbaa) *pbaa = NULL; + if (pnaa) *pnaa = NULL; + if (!recog || !recog->rcha) + return (SARRAY *)ERROR_PTR("recog and rcha not both defined", + __func__, NULL); + if (!boxas) + return (SARRAY *)ERROR_PTR("boxas not defined", __func__, NULL); + + if (spacethresh < 0) + spacethresh = L_MAX(recog->maxheight_u, 20); + rchaExtract(recog->rcha, NULL, &nascore, &satext, NULL, NULL, NULL, NULL); + if (!nascore || !satext) { + numaDestroy(&nascore); + sarrayDestroy(&satext); + return (SARRAY *)ERROR_PTR("nascore and satext not both returned", + __func__, NULL); + } + + saout = sarrayCreate(0); + naa = numaaCreate(0); + baa = boxaaCreate(0); + prebox = NULL; + n = numaGetCount(nascore); + for (i = 0; i < n; i++) { + numaGetFValue(nascore, i, &score); + text = sarrayGetString(satext, i, L_NOCOPY); + if (prebox == NULL) { /* no current run */ + if (score < scorethresh) { + continue; + } else { /* start a number run */ + sa = sarrayCreate(0); + ba = boxaCreate(0); + na = numaCreate(0); + sarrayAddString(sa, text, L_COPY); + prebox = boxaGetBox(boxas, i, L_CLONE); + boxaAddBox(ba, prebox, L_COPY); + numaAddNumber(na, score); + } + } else { /* in a current number run */ + box = boxaGetBox(boxas, i, L_CLONE); + boxGetGeometry(prebox, &x1, NULL, NULL, NULL); + boxGetGeometry(box, &x2, NULL, NULL, NULL); + boxOverlapDistance(box, prebox, &h_ovl, &v_ovl); + h_sep = -h_ovl; + v_sep = -v_ovl; + boxDestroy(&prebox); + if (x1 < x2 && h_sep <= spacethresh && + v_sep < 0 && score >= scorethresh) { /* add to number */ + sarrayAddString(sa, text, L_COPY); + boxaAddBox(ba, box, L_COPY); + numaAddNumber(na, score); + prebox = box; + } else { /* save the completed number */ + str = sarrayToString(sa, 0); + sarrayAddString(saout, str, L_INSERT); + sarrayDestroy(&sa); + boxaaAddBoxa(baa, ba, L_INSERT); + numaaAddNuma(naa, na, L_INSERT); + boxDestroy(&box); + if (score >= scorethresh) { /* start a new number */ + i--; + continue; + } + } + } + } + + if (prebox) { /* save the last number */ + str = sarrayToString(sa, 0); + sarrayAddString(saout, str, L_INSERT); + boxaaAddBoxa(baa, ba, L_INSERT); + numaaAddNuma(naa, na, L_INSERT); + sarrayDestroy(&sa); + boxDestroy(&prebox); + } + + numaDestroy(&nascore); + sarrayDestroy(&satext); + if (sarrayGetCount(saout) == 0) { + sarrayDestroy(&saout); + boxaaDestroy(&baa); + numaaDestroy(&naa); + L_INFO("saout has no identified text\n", __func__); + return NULL; + } + + if (pbaa) + *pbaa = baa; + else + boxaaDestroy(&baa); + if (pnaa) + *pnaa = naa; + else + numaaDestroy(&naa); + return saout; +} + +/*! + * \brief showExtractNumbers() + * + * \param[in] pixs input 1 bpp image + * \param[in] sa recognized text strings + * \param[in] baa boxa array for location of characters in each string + * \param[in] naa numa array for scores of characters in each string + * \param[out] ppixdb [optional] input pixs with identified chars outlined + * \return pixa of identified strings with text and scores, or NULL on error + * + * <pre> + * Notes: + * (1) This is a debugging routine on digit identification; e.g.: + * recogIdentifyMultiple(recog, pixs, 0, 1, &boxa, NULL, NULL, 0); + * sa = recogExtractNumbers(recog, boxa, 0.8, -1, &baa, &naa); + * pixa = showExtractNumbers(pixs, sa, baa, naa, NULL); + * </pre> + */ +PIXA * +showExtractNumbers(PIX *pixs, + SARRAY *sa, + BOXAA *baa, + NUMAA *naa, + PIX **ppixdb) +{ +char buf[128]; +char *textstr, *scorestr; +l_int32 i, j, n, nchar, len; +l_float32 score; +L_BMF *bmf; +BOX *box1, *box2; +BOXA *ba; +NUMA *na; +PIX *pix1, *pix2, *pix3, *pix4; +PIXA *pixa; + + if (ppixdb) *ppixdb = NULL; + if (!pixs) + return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); + if (!sa) + return (PIXA *)ERROR_PTR("sa not defined", __func__, NULL); + if (!baa) + return (PIXA *)ERROR_PTR("baa not defined", __func__, NULL); + if (!naa) + return (PIXA *)ERROR_PTR("naa not defined", __func__, NULL); + + n = sarrayGetCount(sa); + pixa = pixaCreate(n); + bmf = bmfCreate(NULL, 6); + if (ppixdb) *ppixdb = pixConvertTo8(pixs, 1); + for (i = 0; i < n; i++) { + textstr = sarrayGetString(sa, i, L_NOCOPY); + ba = boxaaGetBoxa(baa, i, L_CLONE); + na = numaaGetNuma(naa, i, L_CLONE); + boxaGetExtent(ba, NULL, NULL, &box1); + box2 = boxAdjustSides(NULL, box1, -5, 5, -5, 5); + if (ppixdb) pixRenderBoxArb(*ppixdb, box2, 3, 255, 0, 0); + pix1 = pixClipRectangle(pixs, box1, NULL); + len = strlen(textstr) + 1; + pix2 = pixAddBlackOrWhiteBorder(pix1, 14 * len, 14 * len, + 5, 3, L_SET_WHITE); + pix3 = pixConvertTo8(pix2, 1); + nchar = numaGetCount(na); + scorestr = NULL; + for (j = 0; j < nchar; j++) { + numaGetFValue(na, j, &score); + snprintf(buf, sizeof(buf), "%d", (l_int32)(100 * score)); + stringJoinIP(&scorestr, buf); + if (j < nchar - 1) stringJoinIP(&scorestr, ","); + } + snprintf(buf, sizeof(buf), "%s: %s\n", textstr, scorestr); + pix4 = pixAddTextlines(pix3, bmf, buf, 0xff000000, L_ADD_BELOW); + pixaAddPix(pixa, pix4, L_INSERT); + boxDestroy(&box1); + boxDestroy(&box2); + pixDestroy(&pix1); + pixDestroy(&pix2); + pixDestroy(&pix3); + boxaDestroy(&ba); + numaDestroy(&na); + LEPT_FREE(scorestr); + } + + bmfDestroy(&bmf); + return pixa; +} + + +/*------------------------------------------------------------------------* + * Static debug helper * + *------------------------------------------------------------------------*/ +/*! + * \brief l_showIndicatorSplitValues() + * + * \param[in] na1, na2, na3, na4, na5, na6 6 indicator array + * + * <pre> + * Notes: + * (1) The values indicate that specific criteria has been met + * for component removal by pre-splitting filter.. + * The 'result' line shows which components have been removed. + * </pre> + */ +static void +l_showIndicatorSplitValues(NUMA *na1, + NUMA *na2, + NUMA *na3, + NUMA *na4, + NUMA *na5, + NUMA *na6) +{ +l_int32 i, n; + + n = numaGetCount(na1); + lept_stderr("================================================\n"); + lept_stderr("lt minw: "); + for (i = 0; i < n; i++) + lept_stderr("%4d ", (l_int32)na1->array[i]); + lept_stderr("\nlt minh: "); + for (i = 0; i < n; i++) + lept_stderr("%4d ", (l_int32)na2->array[i]); + lept_stderr("\ngt maxh: "); + for (i = 0; i < n; i++) + lept_stderr("%4d ", (l_int32)na3->array[i]); + lept_stderr("\ngt maxasp: "); + for (i = 0; i < n; i++) + lept_stderr("%4d ", (l_int32)na4->array[i]); + lept_stderr("\nlt minaf: "); + for (i = 0; i < n; i++) + lept_stderr("%4d ", (l_int32)na5->array[i]); + lept_stderr("\n------------------------------------------------"); + lept_stderr("\nresult: "); + for (i = 0; i < n; i++) + lept_stderr("%4d ", (l_int32)na6->array[i]); + lept_stderr("\n================================================\n"); +}
