Mercurial > hgrepos > Python2 > PyMuPDF

diff mupdf-source/thirdparty/leptonica/src/recogtrain.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:43:07 +0200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/leptonica/src/recogtrain.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,2420 @@
+/*====================================================================*
+ -  Copyright (C) 2001 Leptonica.  All rights reserved.
+ -
+ -  Redistribution and use in source and binary forms, with or without
+ -  modification, are permitted provided that the following conditions
+ -  are met:
+ -  1. Redistributions of source code must retain the above copyright
+ -     notice, this list of conditions and the following disclaimer.
+ -  2. Redistributions in binary form must reproduce the above
+ -     copyright notice, this list of conditions and the following
+ -     disclaimer in the documentation and/or other materials
+ -     provided with the distribution.
+ -
+ -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+ -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*!
+ * \file recogtrain.c
+ * <pre>
+ *
+ *      Training on labeled data
+ *         l_int32             recogTrainLabeled()
+ *         PIX                *recogProcessLabeled()
+ *         l_int32             recogAddSample()
+ *         PIX                *recogModifyTemplate()
+ *         l_int32             recogAverageSamples()
+ *         l_int32             pixaAccumulateSamples()
+ *         l_int32             recogTrainingFinished()
+ *         static l_int32      recogTemplatesAreOK()
+ *         PIXA               *recogFilterPixaBySize()
+ *         PIXAA              *recogSortPixaByClass()
+ *         l_int32             recogRemoveOutliers1()
+ *         PIXA               *pixaRemoveOutliers1()
+ *         l_int32             recogRemoveOutliers2()
+ *         PIXA               *pixaRemoveOutliers2()
+ *
+ *      Training on unlabeled data
+ *         L_RECOG             recogTrainFromBoot()
+ *
+ *      Padding the digit training set
+ *         l_int32             recogPadDigitTrainingSet()
+ *         l_int32             recogIsPaddingNeeded()
+ *         static SARRAY      *recogAddMissingClassStrings()
+ *         PIXA               *recogAddDigitPadTemplates()
+ *         static l_int32      recogCharsetAvailable()
+ *
+ *      Making a boot digit recognizer
+ *         L_RECOG            *recogMakeBootDigitRecog()
+ *         PIXA               *recogMakeBootDigitTemplates()
+ *
+ *      Debugging
+ *         l_int32             recogShowContent()
+ *         l_int32             recogDebugAverages()
+ *         l_int32             recogShowAverageTemplates()
+ *         static PIX         *pixDisplayOutliers()
+ *         PIX                *recogDisplayOutlier()
+ *         PIX                *recogShowMatchesInRange()
+ *         PIX                *recogShowMatch()
+ *
+ *  These abbreviations are for the type of template to be used:
+ *    * SI (for the scanned images)
+ *    * WNL (for width-normalized lines, formed by first skeletonizing
+ *           the scanned images, and then dilating to a fixed width)
+ *  These abbreviations are for the type of recognizer:
+ *    * BAR (book-adapted recognizer; the best type; can do identification
+ *           with unscaled images and separation of touching characters.
+ *    * BSR (bootstrap recognizer; used if more labeled templates are
+ *           required for a BAR, either for finding more templates from
+ *           the book, or making a hybrid BAR/BSR.
+ *
+ *  The recog struct typically holds two versions of the input templates
+ *  (e.g. from a pixa) that were used to generate it.  One version is
+ *  the unscaled input templates.  The other version is the one that
+ *  will be used by the recog to identify unlabeled data.  That version
+ *  depends on the input parameters when the recog is created.  The choices
+ *  for the latter version, and their suggested use, are:
+ *  (1) unscaled SI -- typical for BAR, generated from book images
+ *  (2) unscaled WNL -- ditto
+ *  (3) scaled SI -- typical for recognizers containing template
+ *      images from sources other than the book to be recognized
+ *  (4) scaled WNL -- ditto
+ *  For cases (3) and (4), we recommend scaling to fixed height; e.g.,
+ *  scalew = 0, scaleh = 40.
+ *  When using WNL, we recommend using a width of 5 in the template
+ *  and 4 in the unlabeled data.
+ *  It appears that better results for a BAR are usually obtained using
+ *  SI than WNL, but more experimentation is needed.
+ *
+ *  This utility is designed to build recognizers that are specifically
+ *  adapted from a large amount of material, such as a book.  These
+ *  use labeled templates taken from the material, and not scaled.
+ *  In addition, two special recognizers are useful:
+ *  (1) Bootstrap recognizer (BSR).  This uses height-scaled templates,
+ *      that have been extended with several repetitions in one of two ways:
+ *      (a) aniotropic width scaling (for either SI or WNL)
+ *      (b) iterative erosions/dilations (for SI).
+ *  (2) Outlier removal.  This uses height scaled templates.  It can be
+ *      implemented without using templates that are aligned averages of all
+ *      templates in a class.
+ *
+ *  Recognizers are inexpensive to generate, for example, from a pixa
+ *  of labeled templates.  The general process of building a BAR is
+ *  to start with labeled templates, e.g., in a pixa, make a BAR, and
+ *  analyze new samples from the book to augment the BAR until it has
+ *  enough samples for each character class.  Along the way, samples
+ *  from a BSR may be added for help in training.  If not enough samples
+ *  are available for the BAR, it can finally be augmented with BSR
+ *  samples, in which case the resulting hybrid BAR/BSR recognizer
+ *  must work on scaled images.
+ *
+ *  Here are the steps in doing recog training:
+ *  A. Generate a BAR from any existing labeled templates
+ *    (1) Create a recog and add the templates, using recogAddSample().
+ *        This stores the unscaled templates.
+ *        [Note: this can be done in one step if the labeled templates are put
+ *         into a pixa:
+ *           L_Recog *rec = recogCreateFromPixa(pixa, ...);  ]
+ *    (2) Call recogTrainingFinished() to generate the (sometimes modified)
+ *        templates to be used for correlation.
+ *    (3) Optionally, remove outliers.
+ *    If there are sufficient samples in the classes, we're done. Otherwise,
+ *  B. Try to get more samples from the book to pad the BAR.
+ *     (1) Save the unscaled, labeled templates from the BAR.
+ *     (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR.
+ *     (3) Do recognition on more unlabeled images, scaled to a fixed height
+ *     (4) Add the unscaled, labeled images to the saved set.
+ *     (5) Optionally, remove outliers.
+ *     If there are sufficient samples in the classes, we're done. Otherwise,
+ *  C. For classes without a sufficient number of templates, we can
+ *     supplement the BAR with templates from a BSR (a hybrid RAR/BSR),
+ *     and do recognition scaled to a fixed height.
+ *
+ *  Here are several methods that can be used for identifying outliers:
+ *  (1) Compute average templates for each class and remove a candidate
+ *      that is poorly correlated with the average.  This is the most
+ *      simple method.  recogRemoveOutliers1() uses this, supplemented with
+ *      a second threshold and a target number of templates to be saved.
+ *  (2) Compute average templates for each class and remove a candidate
+ *      that is more highly correlated with the average of some other class.
+ *      This does not require setting a threshold for the correlation.
+ *      recogRemoveOutliers2() uses this method, supplemented with a minimum
+ *      correlation score.
+ *  (3) For each candidate, find the average correlation with other
+ *      members of its class, and remove those that have a relatively
+ *      low average correlation.  This is similar to (1), gives comparable
+ *      results and because it does not use average templates, it requires
+ *      a bit more computation.
+ * </pre>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif  /* HAVE_CONFIG_H */
+
+#include <string.h>
+#include "allheaders.h"
+#include "pix_internal.h"
+
+    /* Static functions */
+static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize,
+                                   l_float32 minfract, l_int32 *pok);
+static SARRAY *recogAddMissingClassStrings(L_RECOG  *recog);
+static l_int32 recogCharsetAvailable(l_int32 type);
+static PIX *pixDisplayOutliers(PIXA *pixas, NUMA *nas);
+static PIX *recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp,
+                                l_int32 maxclass, l_float32 maxscore);
+
+    /* Default parameters that are used in recogTemplatesAreOK() and
+     * in outlier removal functions, and that use template set size
+     * to decide if the set of templates (before outliers are removed)
+     * is valid.  Values are set to accept most sets of sample templates. */
+static const l_int32    DefaultMinSetSize = 1;  /* minimum number of
+                                       samples for a valid class */
+static const l_float32  DefaultMinSetFract = 0.4f;  /* minimum fraction
+                               of classes required for a valid recog */
+
+    /* Defaults in pixaRemoveOutliers1() and pixaRemoveOutliers2() */
+static const l_float32  DefaultMinScore = 0.75; /* keep everything above */
+static const l_int32    DefaultMinTarget = 3;  /* to be kept if possible */
+static const l_float32  LowerScoreThreshold = 0.5;  /* templates can be
+                 * kept down to this score to if needed to retain the
+                 * desired minimum number of templates */
+
+
+/*------------------------------------------------------------------------*
+ *                                Training                                *
+ *------------------------------------------------------------------------*/
+/*!
+ * \brief   recogTrainLabeled()
+ *
+ * \param[in]    recog     in training mode
+ * \param[in]    pixs      if depth > 1, will be thresholded to 1 bpp
+ * \param[in]    box       [optional] cropping box
+ * \param[in]    text      [optional] if null, use text field in pix
+ * \param[in]    debug     1 to display images of samples not captured
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Training is restricted to the addition of a single
+ *          character in an arbitrary (e.g., UTF8) charset
+ *      (2) If box != null, it should represent the location in %pixs
+ *          of the character image.
+ * </pre>
+ */
+l_ok
+recogTrainLabeled(L_RECOG  *recog,
+                  PIX      *pixs,
+                  BOX      *box,
+                  char     *text,
+                  l_int32   debug)
+{
+l_int32  ret;
+PIX     *pix;
+
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+    if (!pixs)
+        return ERROR_INT("pixs not defined", __func__, 1);
+
+        /* Prepare the sample to be added. This step also acts
+         * as a filter, and can invalidate pixs as a template. */
+    ret = recogProcessLabeled(recog, pixs, box, text, &pix);
+    if (ret) {
+        pixDestroy(&pix);
+        L_WARNING("failure to get sample '%s' for training\n", __func__,
+                  text);
+        return 1;
+    }
+
+    recogAddSample(recog, pix, debug);
+    pixDestroy(&pix);
+    return 0;
+}
+
+
+/*!
+ * \brief   recogProcessLabeled()
+ *
+ * \param[in]    recog   in training mode
+ * \param[in]    pixs    if depth > 1, will be thresholded to 1 bpp
+ * \param[in]    box     [optional] cropping box
+ * \param[in]    text    [optional] if null, use text field in pix
+ * \param[out]   ppix    addr of pix, 1 bpp, labeled
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This crops and binarizes the input image, generating a pix
+ *          of one character where the charval is inserted into the pix.
+ * </pre>
+ */
+l_ok
+recogProcessLabeled(L_RECOG  *recog,
+                    PIX      *pixs,
+                    BOX      *box,
+                    char     *text,
+                    PIX     **ppix)
+{
+char    *textdata;
+l_int32  textinpix, textin, nsets;
+NUMA    *na;
+PIX     *pix1, *pix2, *pix3, *pix4;
+
+    if (!ppix)
+        return ERROR_INT("&pix not defined", __func__, 1);
+    *ppix = NULL;
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+    if (!pixs)
+        return ERROR_INT("pixs not defined", __func__, 1);
+
+        /* Find the text; this will be stored with the output images */
+    textin = text && (text[0] != '\0');
+    textinpix = (pixs->text && (pixs->text[0] != '\0'));
+    if (!textin && !textinpix) {
+        L_ERROR("no text: %d\n", __func__, recog->num_samples);
+        return 1;
+    }
+    textdata = (textin) ? text : pixs->text;  /* do not free */
+
+        /* Crop and binarize if necessary */
+    if (box)
+        pix1 = pixClipRectangle(pixs, box, NULL);
+    else
+        pix1 = pixClone(pixs);
+    if (pixGetDepth(pix1) > 1)
+        pix2 = pixConvertTo1(pix1, recog->threshold);
+    else
+        pix2 = pixClone(pix1);
+    pixDestroy(&pix1);
+
+        /* Remove isolated noise, using as a criterion all components
+         * that are removed by a vertical opening of size 5. */
+    pix3 = pixMorphSequence(pix2, "o1.5", 0);  /* seed */
+    pixSeedfillBinary(pix3, pix3, pix2, 8);  /* fill from seed; clip to pix2 */
+    pixDestroy(&pix2);
+
+        /* Clip to foreground */
+    pixClipToForeground(pix3, &pix4, NULL);
+    pixDestroy(&pix3);
+    if (!pix4)
+        return ERROR_INT("pix4 is empty", __func__, 1);
+
+        /* Verify that if there is more than 1 c.c., they all have
+         * horizontal overlap */
+    na = pixCountByColumn(pix4, NULL);
+    numaCountNonzeroRuns(na, &nsets);
+    numaDestroy(&na);
+    if (nsets > 1) {
+        L_WARNING("found %d sets of horiz separated c.c.; skipping\n",
+                  __func__, nsets);
+        pixDestroy(&pix4);
+        return 1;
+    }
+
+    pixSetText(pix4, textdata);
+    *ppix = pix4;
+    return 0;
+}
+
+
+/*!
+ * \brief   recogAddSample()
+ *
+ * \param[in]    recog
+ * \param[in]    pix         a single character, 1 bpp
+ * \param[in]    debug
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The pix is 1 bpp, with the character string label embedded.
+ *      (2) The pixaa_u array of the recog is initialized to accept
+ *          up to 256 different classes.  When training is finished,
+ *          the arrays are truncated to the actual number of classes.
+ *          To pad an existing recog from the boot recognizers, training
+ *          is started again; if samples from a new class are added,
+ *          the pixaa_u array is extended by adding a pixa to hold them.
+ * </pre>
+ */
+l_ok
+recogAddSample(L_RECOG  *recog,
+               PIX      *pix,
+               l_int32   debug)
+{
+char    *text;
+l_int32  npa, charint, index;
+PIXA    *pixa1;
+PIXAA   *paa;
+
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+    if (!pix || pixGetDepth(pix) != 1)
+        return ERROR_INT("pix not defined or not 1 bpp\n", __func__, 1);
+    if (recog->train_done)
+        return ERROR_INT("not added: training has been completed", __func__, 1);
+    paa = recog->pixaa_u;
+
+        /* Make sure the character is in the set */
+    text = pixGetText(pix);
+    if (l_convertCharstrToInt(text, &charint) == 1) {
+        L_ERROR("invalid text: %s\n", __func__, text);
+        return 1;
+    }
+
+        /* Determine the class array index.  Check if the class
+         * already exists, and if not, add it. */
+    if (recogGetClassIndex(recog, charint, text, &index) == 1) {
+            /* New class must be added */
+        npa = pixaaGetCount(paa, NULL);
+        if (index > npa) {
+            L_ERROR("oops: bad index %d > npa %d!!\n", __func__, index, npa);
+            return 1;
+        }
+        if (index == npa) {  /* paa needs to be extended */
+            L_INFO("Adding new class and pixa: index = %d, text = %s\n",
+                   __func__, index, text);
+            pixa1 = pixaCreate(10);
+            pixaaAddPixa(paa, pixa1, L_INSERT);
+        }
+    }
+    if (debug) {
+        L_INFO("Identified text label: %s\n", __func__, text);
+        L_INFO("Identified: charint = %d, index = %d\n",
+               __func__, charint, index);
+    }
+
+        /* Insert the unscaled character image into the right pixa.
+         * (Unscaled images are required to split touching characters.) */
+    recog->num_samples++;
+    pixaaAddPix(paa, index, pix, NULL, L_COPY);
+    return 0;
+}
+
+
+/*!
+ * \brief   recogModifyTemplate()
+ *
+ * \param[in]    recog
+ * \param[in]    pixs   1 bpp, to be optionally scaled and turned into
+ *                      strokes of fixed width
+ * \return  pixd   modified pix if OK, NULL on error
+ */
+PIX *
+recogModifyTemplate(L_RECOG  *recog,
+                    PIX      *pixs)
+{
+l_int32  w, h, empty;
+PIX     *pix1, *pix2;
+
+    if (!recog)
+        return (PIX *)ERROR_PTR("recog not defined", __func__, NULL);
+    if (!pixs)
+        return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
+
+        /* Scale first */
+    pixGetDimensions(pixs, &w, &h, NULL);
+    if ((recog->scalew == 0 || recog->scalew == w) &&
+        (recog->scaleh == 0 || recog->scaleh == h)) {  /* no scaling */
+        pix1 = pixCopy(NULL, pixs);
+    } else {
+        pix1 = pixScaleToSize(pixs, recog->scalew, recog->scaleh);
+    }
+    if (!pix1)
+        return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL);
+
+        /* Then optionally convert to lines */
+    if (recog->linew <= 0) {
+        pix2 = pixClone(pix1);
+    } else {
+        pix2 = pixSetStrokeWidth(pix1, recog->linew, 1, 8);
+    }
+    pixDestroy(&pix1);
+    if (!pix2)
+        return (PIX *)ERROR_PTR("pix2 not made", __func__, NULL);
+
+        /* Make sure we still have some pixels */
+    pixZero(pix2, &empty);
+    if (empty) {
+        pixDestroy(&pix2);
+        return (PIX *)ERROR_PTR("modified template has no pixels",
+                                __func__, NULL);
+    }
+    return pix2;
+}
+
+
+/*!
+ * \brief   recogAverageSamples()
+ *
+ * \param[in]   recog    addr of existing recog
+ * \param[in]   debug
+ * \return  0 on success, 1 on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is only called in two situations:
+ *          (a) When splitting characters using either the DID method
+ *              recogDecode() or the the greedy splitter
+ *              recogCorrelationBestRow()
+ *          (b) By a special recognizer that is used to remove outliers.
+ *          Both unscaled and scaled inputs are averaged.
+ *      (2) If the data in any class is nonexistent (no samples), or
+ *          very bad (no fg pixels in the average), or if the ratio
+ *          of max/min average unscaled class template heights is
+ *          greater than max_ht_ratio, this function fails.  The caller
+ *          must check the return value of the recog, and destroy the
+ *          recog on failure.
+ *      (3) Set debug = 1 to view the resulting templates and their centroids.
+ * </pre>
+ */
+l_int32
+recogAverageSamples(L_RECOG  *recog,
+                    l_int32   debug)
+{
+l_int32    i, nsamp, size, area, bx, by, badclass;
+l_float32  x, y, hratio;
+BOX       *box;
+PIXA      *pixa1;
+PIX       *pix1, *pix2, *pix3;
+PTA       *pta1;
+
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+
+    if (recog->ave_done) {
+        if (debug)  /* always do this if requested */
+            recogShowAverageTemplates(recog);
+        return 0;
+    }
+
+        /* Remove any previous averaging data */
+    size = recog->setsize;
+    pixaDestroy(&recog->pixa_u);
+    ptaDestroy(&recog->pta_u);
+    numaDestroy(&recog->nasum_u);
+    recog->pixa_u = pixaCreate(size);
+    recog->pta_u = ptaCreate(size);
+    recog->nasum_u = numaCreate(size);
+
+    pixaDestroy(&recog->pixa);
+    ptaDestroy(&recog->pta);
+    numaDestroy(&recog->nasum);
+    recog->pixa = pixaCreate(size);
+    recog->pta = ptaCreate(size);
+    recog->nasum = numaCreate(size);
+
+        /* Unscaled bitmaps: compute averaged bitmap, centroid, and fg area.
+         * Note that when we threshold to 1 bpp the 8 bpp averaged template
+         * that is returned from the accumulator, it will not be cropped
+         * to the foreground.  We must crop it, because the correlator
+         * makes that assumption and will return a zero value if the
+         * width or height of the two images differs by several pixels.
+         * But cropping to fg can cause the value of the centroid to
+         * change, if bx > 0 or by > 0. */
+    badclass = FALSE;
+    for (i = 0; i < size; i++) {
+        pixa1 = pixaaGetPixa(recog->pixaa_u, i, L_CLONE);
+        pta1 = ptaaGetPta(recog->ptaa_u, i, L_CLONE);
+        nsamp = pixaGetCount(pixa1);
+        nsamp = L_MIN(nsamp, 256);  /* we only use the first 256 */
+        if (nsamp == 0) {  /* no information for this class */
+            L_ERROR("no samples in class %d\n", __func__, i);
+            badclass = TRUE;
+            pixaDestroy(&pixa1);
+            ptaDestroy(&pta1);
+            break;
+        } else {
+            pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y);
+            pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2));
+            pixInvert(pix2, pix2);
+            pixClipToForeground(pix2, &pix3, &box);
+            if (!box) {
+                L_ERROR("no fg pixels in average for uclass %d\n", __func__, i);
+                badclass = TRUE;
+                pixDestroy(&pix1);
+                pixDestroy(&pix2);
+                pixaDestroy(&pixa1);
+                ptaDestroy(&pta1);
+                break;
+            } else {
+                boxGetGeometry(box, &bx, &by, NULL, NULL);
+                pixaAddPix(recog->pixa_u, pix3, L_INSERT);
+                ptaAddPt(recog->pta_u, x - bx, y - by);  /* correct centroid */
+                pixCountPixels(pix3, &area, recog->sumtab);
+                numaAddNumber(recog->nasum_u, area);  /* foreground */
+                boxDestroy(&box);
+            }
+            pixDestroy(&pix1);
+            pixDestroy(&pix2);
+        }
+        pixaDestroy(&pixa1);
+        ptaDestroy(&pta1);
+    }
+
+        /* Are any classes bad?  */
+    if (badclass)
+        return ERROR_INT("at least 1 bad class", __func__, 1);
+
+        /* Get the range of sizes of the unscaled average templates.
+         * Reject if the height ratio is too large.  */
+    pixaSizeRange(recog->pixa_u, &recog->minwidth_u, &recog->minheight_u,
+                  &recog->maxwidth_u, &recog->maxheight_u);
+    hratio = (l_float32)recog->maxheight_u / (l_float32)recog->minheight_u;
+    if (hratio > recog->max_ht_ratio) {
+        L_ERROR("ratio of max/min height of average templates = %4.1f\n",
+                __func__, hratio);
+        return 1;
+    }
+
+        /* Scaled bitmaps: compute averaged bitmap, centroid, and fg area */
+    for (i = 0; i < size; i++) {
+        pixa1 = pixaaGetPixa(recog->pixaa, i, L_CLONE);
+        pta1 = ptaaGetPta(recog->ptaa, i, L_CLONE);
+        nsamp = pixaGetCount(pixa1);
+        nsamp = L_MIN(nsamp, 256);  /* we only use the first 256 */
+        pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y);
+        pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2));
+        pixInvert(pix2, pix2);
+        pixClipToForeground(pix2, &pix3, &box);
+        if (!box) {
+            L_ERROR("no fg pixels in average for class %d\n", __func__, i);
+            badclass = TRUE;
+            pixDestroy(&pix1);
+            pixDestroy(&pix2);
+            pixaDestroy(&pixa1);
+            ptaDestroy(&pta1);
+            break;
+        } else {
+            boxGetGeometry(box, &bx, &by, NULL, NULL);
+            pixaAddPix(recog->pixa, pix3, L_INSERT);
+            ptaAddPt(recog->pta, x - bx, y - by);  /* correct centroid */
+            pixCountPixels(pix3, &area, recog->sumtab);
+            numaAddNumber(recog->nasum, area);  /* foreground */
+            boxDestroy(&box);
+        }
+        pixDestroy(&pix1);
+        pixDestroy(&pix2);
+        pixaDestroy(&pixa1);
+        ptaDestroy(&pta1);
+    }
+
+    if (badclass)
+        return ERROR_INT("no fg pixels in at least 1 class", __func__, 1);
+
+        /* Get the range of widths of the scaled average templates */
+    pixaSizeRange(recog->pixa, &recog->minwidth, NULL, &recog->maxwidth, NULL);
+
+       /* Get dimensions useful for splitting */
+    recog->min_splitw = L_MAX(5, recog->minwidth_u - 5);
+    recog->max_splith = recog->maxheight_u + 12;  /* allow for skew */
+
+    if (debug)
+        recogShowAverageTemplates(recog);
+
+    recog->ave_done = TRUE;
+    return 0;
+}
+
+
+/*!
+ * \brief   pixaAccumulateSamples()
+ *
+ * \param[in]    pixa     of samples from the same class, 1 bpp
+ * \param[in]    pta      [optional] of centroids of the samples
+ * \param[out]   ppixd    accumulated samples, 8 bpp
+ * \param[out]   px       [optional] average x coordinate of centroids
+ * \param[out]   py       [optional] average y coordinate of centroids
+ * \return  0 on success, 1 on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) This generates an aligned (by centroid) sum of the input pix.
+ *      (2) We use only the first 256 samples; that's plenty.
+ *      (3) If pta is not input, we generate two tables, and discard
+ *          after use.  If this is called many times, it is better
+ *          to precompute the pta.
+ * </pre>
+ */
+l_int32
+pixaAccumulateSamples(PIXA       *pixa,
+                      PTA        *pta,
+                      PIX       **ppixd,
+                      l_float32  *px,
+                      l_float32  *py)
+{
+l_int32    i, n, maxw, maxh, xdiff, ydiff;
+l_int32   *centtab, *sumtab;
+l_float32  xc, yc, xave, yave;
+PIX       *pix1, *pix2, *pixsum;
+PTA       *ptac;
+
+    if (px) *px = 0;
+    if (py) *py = 0;
+    if (!ppixd)
+        return ERROR_INT("&pixd not defined", __func__, 1);
+    *ppixd = NULL;
+    if (!pixa)
+        return ERROR_INT("pixa not defined", __func__, 1);
+
+    n = pixaGetCount(pixa);
+    if (pta && ptaGetCount(pta) != n)
+        return ERROR_INT("pta count differs from pixa count", __func__, 1);
+    n = L_MIN(n, 256);  /* take the first 256 only */
+    if (n == 0)
+        return ERROR_INT("pixa array empty", __func__, 1);
+
+        /* Find the centroids */
+    if (pta) {
+        ptac = ptaClone(pta);
+    } else {  /* generate them here */
+        ptac = ptaCreate(n);
+        centtab = makePixelCentroidTab8();
+        sumtab = makePixelSumTab8();
+        for (i = 0; i < n; i++) {
+            pix1 = pixaGetPix(pixa, i, L_CLONE);
+            pixCentroid(pix1, centtab, sumtab, &xc, &yc);
+            ptaAddPt(ptac, xc, yc);
+        }
+        LEPT_FREE(centtab);
+        LEPT_FREE(sumtab);
+    }
+
+        /* Find the average value of the centroids */
+    xave = yave = 0;
+    for (i = 0; i < n; i++) {
+        ptaGetPt(pta, i, &xc, &yc);
+        xave += xc;
+        yave += yc;
+    }
+    xave = xave / (l_float32)n;
+    yave = yave / (l_float32)n;
+    if (px) *px = xave;
+    if (py) *py = yave;
+
+        /* Place all pix with their centroids located at the average
+         * centroid value, and sum the results.  Make the accumulator
+         * image slightly larger than the largest sample to insure
+         * that all pixels are represented in the accumulator.  */
+    pixaSizeRange(pixa, NULL, NULL, &maxw, &maxh);
+    pixsum = pixInitAccumulate(maxw + 5, maxh + 5, 0);
+    pix1 = pixCreate(maxw, maxh, 1);
+    for (i = 0; i < n; i++) {
+        pix2 = pixaGetPix(pixa, i, L_CLONE);
+        ptaGetPt(ptac, i, &xc, &yc);
+        xdiff = (l_int32)(xave - xc);
+        ydiff = (l_int32)(yave - yc);
+        pixClearAll(pix1);
+        pixRasterop(pix1, xdiff, ydiff, maxw, maxh, PIX_SRC,
+                    pix2, 0, 0);
+        pixAccumulate(pixsum, pix1, L_ARITH_ADD);
+        pixDestroy(&pix2);
+    }
+    *ppixd = pixFinalAccumulate(pixsum, 0, 8);
+
+    pixDestroy(&pix1);
+    pixDestroy(&pixsum);
+    ptaDestroy(&ptac);
+    return 0;
+}
+
+
+/*!
+ * \brief   recogTrainingFinished()
+ *
+ * \param[in]    precog       addr of recog
+ * \param[in]    modifyflag   1 to use recogModifyTemplate(); 0 otherwise
+ * \param[in]    minsize      set to -1 for default
+ * \param[in]    minfract     set to -1.0 for default
+ * \return  0 if OK, 1 on error (input recog will be destroyed)
+ *
+ * <pre>
+ * Notes:
+ *      (1) This must be called after all training samples have been added.
+ *      (2) If the templates are not good enough, the recog input is destroyed.
+ *      (3) Usually, %modifyflag == 1, because we want to apply
+ *          recogModifyTemplate() to generate the actual templates
+ *          that will be used.  The one exception is when reading a
+ *          serialized recog: there we want to put the same set of
+ *          templates in both the unscaled and modified pixaa.
+ *          See recogReadStream() to see why we do this.
+ *      (4) See recogTemplatesAreOK() for %minsize and %minfract usage.
+ *      (5) The following things are done here:
+ *          (a) Allocate (or reallocate) storage for (possibly) modified
+ *              bitmaps, centroids, and fg areas.
+ *          (b) Generate the (possibly) modified bitmaps.
+ *          (c) Compute centroid and fg area data for both unscaled and
+ *              modified bitmaps.
+ *          (d) Truncate the pixaa, ptaa and numaa arrays down from
+ *              256 to the actual size.
+ *      (6) Putting these operations here makes it simple to recompute
+ *          the recog with different modifications on the bitmaps.
+ *      (7) Call recogShowContent() to display the templates, both
+ *          unscaled and modified.
+ * </pre>
+ */
+l_ok
+recogTrainingFinished(L_RECOG  **precog,
+                      l_int32    modifyflag,
+                      l_int32    minsize,
+                      l_float32  minfract)
+{
+l_int32    ok, i, j, size, nc, ns, area;
+l_float32  xave, yave;
+PIX       *pix, *pixd;
+PIXA      *pixa;
+PIXAA     *paa;
+PTA       *pta;
+PTAA      *ptaa;
+L_RECOG   *recog;
+
+    if (!precog)
+        return ERROR_INT("&recog not defined", __func__, 1);
+    if ((recog = *precog) == NULL)
+        return ERROR_INT("recog not defined", __func__, 1);
+    if (recog->train_done) return 0;
+
+        /* Test the input templates */
+    recogTemplatesAreOK(recog, minsize, minfract, &ok);
+    if (!ok) {
+        recogDestroy(precog);
+        return ERROR_INT("bad templates", __func__, 1);
+    }
+
+        /* Generate the storage for the possibly-scaled training bitmaps */
+    size = recog->maxarraysize;
+    paa = pixaaCreate(size);
+    pixa = pixaCreate(1);
+    pixaaInitFull(paa, pixa);
+    pixaDestroy(&pixa);
+    pixaaDestroy(&recog->pixaa);
+    recog->pixaa = paa;
+
+        /* Generate the storage for the unscaled centroid training data */
+    ptaa = ptaaCreate(size);
+    pta = ptaCreate(0);
+    ptaaInitFull(ptaa, pta);
+    ptaaDestroy(&recog->ptaa_u);
+    recog->ptaa_u = ptaa;
+
+        /* Generate the storage for the possibly-scaled centroid data */
+    ptaa = ptaaCreate(size);
+    ptaaInitFull(ptaa, pta);
+    ptaDestroy(&pta);
+    ptaaDestroy(&recog->ptaa);
+    recog->ptaa = ptaa;
+
+        /* Generate the storage for the fg area data */
+    numaaDestroy(&recog->naasum_u);
+    numaaDestroy(&recog->naasum);
+    recog->naasum_u = numaaCreateFull(size, 0);
+    recog->naasum = numaaCreateFull(size, 0);
+
+    paa = recog->pixaa_u;
+    nc = recog->setsize;
+    for (i = 0; i < nc; i++) {
+        pixa = pixaaGetPixa(paa, i, L_CLONE);
+        ns = pixaGetCount(pixa);
+        for (j = 0; j < ns; j++) {
+                /* Save centroid and area data for the unscaled pix */
+            pix = pixaGetPix(pixa, j, L_CLONE);
+            pixCentroid(pix, recog->centtab, recog->sumtab, &xave, &yave);
+            ptaaAddPt(recog->ptaa_u, i, xave, yave);
+            pixCountPixels(pix, &area, recog->sumtab);
+            numaaAddNumber(recog->naasum_u, i, area);  /* foreground */
+
+                /* Insert the (optionally) scaled character image, and
+                 * save centroid and area data for it */
+            if (modifyflag == 1)
+                pixd = recogModifyTemplate(recog, pix);
+            else
+                pixd = pixClone(pix);
+            if (pixd) {
+                pixaaAddPix(recog->pixaa, i, pixd, NULL, L_INSERT);
+                pixCentroid(pixd, recog->centtab, recog->sumtab, &xave, &yave);
+                ptaaAddPt(recog->ptaa, i, xave, yave);
+                pixCountPixels(pixd, &area, recog->sumtab);
+                numaaAddNumber(recog->naasum, i, area);
+            } else {
+                L_ERROR("failed: modified template for class %d, sample %d\n",
+                        __func__, i, j);
+            }
+            pixDestroy(&pix);
+        }
+        pixaDestroy(&pixa);
+    }
+
+        /* Truncate the arrays to those with non-empty containers */
+    pixaaTruncate(recog->pixaa_u);
+    pixaaTruncate(recog->pixaa);
+    ptaaTruncate(recog->ptaa_u);
+    ptaaTruncate(recog->ptaa);
+    numaaTruncate(recog->naasum_u);
+    numaaTruncate(recog->naasum);
+
+    recog->train_done = TRUE;
+    return 0;
+}
+
+
+/*!
+ * \brief   recogTemplatesAreOK()
+ *
+ * \param[in]    recog
+ * \param[in]    minsize     set to -1 for default
+ * \param[in]    minfract    set to -1.0 for default
+ * \param[out]   pok         set to 1 if template set is valid; 0 otherwise
+ * \return  1 on error; 0 otherwise.  An invalid template set is not an error.
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is called by recogTrainingFinished().  A return value of 0
+ *          will cause recogTrainingFinished() to destroy the recog.
+ *      (2) %minsize is the minimum number of samples required for
+ *          the class; -1 uses the default
+ *      (3) %minfract is the minimum fraction of classes required for
+ *          the recog to be usable; -1.0 uses the default
+ * </pre>
+ */
+static l_int32
+recogTemplatesAreOK(L_RECOG   *recog,
+                    l_int32    minsize,
+                    l_float32  minfract,
+                    l_int32   *pok)
+{
+l_int32    i, n, validsets, nt;
+l_float32  ratio;
+NUMA      *na;
+
+    if (!pok)
+        return ERROR_INT("&ok not defined", __func__, 1);
+    *pok = 0;
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+
+    minsize = (minsize < 0) ? DefaultMinSetSize : minsize;
+    minfract = (minfract < 0) ? DefaultMinSetFract : minfract;
+    n = pixaaGetCount(recog->pixaa_u, &na);
+    validsets = 0;
+    for (i = 0, validsets = 0; i < n; i++) {
+        numaGetIValue(na, i, &nt);
+        if (nt >= minsize)
+            validsets++;
+    }
+    numaDestroy(&na);
+    ratio = (l_float32)validsets / (l_float32)recog->charset_size;
+    *pok = (ratio >= minfract) ? 1 : 0;
+    return 0;
+}
+
+
+/*!
+ * \brief   recogFilterPixaBySize()
+ *
+ * \param[in]   pixas         labeled templates
+ * \param[in]   setsize       size of character set (number of classes)
+ * \param[in]   maxkeep       max number of templates to keep in a class
+ * \param[in]   max_ht_ratio  max allowed height ratio (see below)
+ * \param[out]  pna           [optional] debug output, giving the number
+ *                            in each class after filtering; use NULL to skip
+ * \return  pixa   filtered templates, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The basic assumption is that the most common and larger
+ *          templates in each class are more likely to represent the
+ *          characters we are interested in.  For example, larger digits
+ *          are more likely to represent page numbers, and smaller digits
+ *          could be data in tables.  Therefore, we bias the first
+ *          stage of filtering toward the larger characters by removing
+ *          very small ones, and select based on proximity of the
+ *          remaining characters to median height.
+ *      (2) For each of the %setsize classes, order the templates
+ *          increasingly by height.  Take the rank 0.9 height.  Eliminate
+ *          all templates that are shorter by more than %max_ht_ratio.
+ *          Of the remaining ones, select up to %maxkeep that are closest
+ *          in rank order height to the median template.
+ * </pre>
+ */
+PIXA *
+recogFilterPixaBySize(PIXA      *pixas,
+                      l_int32    setsize,
+                      l_int32    maxkeep,
+                      l_float32  max_ht_ratio,
+                      NUMA     **pna)
+{
+l_int32    i, j, h90, hj, j1, j2, j90, n, nc;
+l_float32  ratio;
+NUMA      *na;
+PIXA      *pixa1, *pixa2, *pixa3, *pixa4, *pixa5;
+PIXAA     *paa;
+
+    if (pna) *pna = NULL;
+    if (!pixas)
+        return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
+
+    if ((paa = recogSortPixaByClass(pixas, setsize)) == NULL)
+        return (PIXA *)ERROR_PTR("paa not made", __func__, NULL);
+    nc = pixaaGetCount(paa, NULL);
+    na = (pna) ? numaCreate(0) : NULL;
+    if (pna) *pna = na;
+    pixa5 = pixaCreate(0);
+    for (i = 0; i < nc; i++) {
+        pixa1 = pixaaGetPixa(paa, i, L_CLONE);
+        if ((n = pixaGetCount(pixa1)) == 0) {
+            pixaDestroy(&pixa1);
+            continue;
+        }
+        pixa2 = pixaSort(pixa1, L_SORT_BY_HEIGHT, L_SORT_INCREASING, NULL,
+                         L_COPY);
+        j90 = (l_int32)(0.9 * n);
+        pixaGetPixDimensions(pixa2, j90, NULL, &h90, NULL);
+        pixa3 = pixaCreate(n);
+        for (j = 0; j < n; j++) {
+            pixaGetPixDimensions(pixa2, j, NULL, &hj, NULL);
+            ratio = (l_float32)h90 / (l_float32)hj;
+            if (ratio <= max_ht_ratio)
+                pixaAddPix(pixa3, pixaGetPix(pixa2, j, L_COPY), L_INSERT);
+        }
+        n = pixaGetCount(pixa3);
+        if (n <= maxkeep) {
+            pixa4 = pixaCopy(pixa3, L_CLONE);
+        } else {
+            j1 = (n - maxkeep) / 2;
+            j2 = j1 + maxkeep - 1;
+            pixa4 = pixaSelectRange(pixa3, j1, j2, L_CLONE);
+        }
+        if (na) numaAddNumber(na, pixaGetCount(pixa4));
+        pixaJoin(pixa5, pixa4, 0, -1);
+        pixaDestroy(&pixa1);
+        pixaDestroy(&pixa2);
+        pixaDestroy(&pixa3);
+        pixaDestroy(&pixa4);
+    }
+
+    pixaaDestroy(&paa);
+    return pixa5;
+}
+
+
+/*!
+ * \brief   recogSortPixaByClass()
+ *
+ * \param[in]   pixa       labeled templates
+ * \param[in]   setsize    size of character set (number of classes)
+ * \return  paa   pixaa where each pixa has templates for one class,
+ *                or null on error
+ */
+PIXAA *
+recogSortPixaByClass(PIXA    *pixa,
+                     l_int32  setsize)
+{
+PIXAA    *paa;
+L_RECOG  *recog;
+
+    if (!pixa)
+        return (PIXAA *)ERROR_PTR("pixa not defined", __func__, NULL);
+
+    if ((recog = recogCreateFromPixaNoFinish(pixa, 0, 0, 0, 0, 0)) == NULL)
+        return (PIXAA *)ERROR_PTR("recog not made", __func__, NULL);
+    paa = recog->pixaa_u;   /* grab the paa of unscaled templates */
+    recog->pixaa_u = NULL;
+    recogDestroy(&recog);
+    return paa;
+}
+
+
+/*!
+ * \brief   recogRemoveOutliers1()
+ *
+ * \param[in]   precog       addr of recog with unscaled labeled templates
+ * \param[in]   minscore     keep everything with at least this score
+ * \param[in]   mintarget    minimum desired number to retain if possible
+ * \param[in]   minsize      minimum number of samples required for a class
+ * \param[out]  ppixsave     [optional debug] saved templates, with scores
+ * \param[out]  ppixrem      [optional debug] removed templates, with scores
+ * \return  0 if OK, 1 on error.
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a convenience wrapper when using default parameters
+ *          for the recog.  See pixaRemoveOutliers1() for details.
+ *      (2) If this succeeds, the new recog replaces the input recog;
+ *          if it fails, the input recog is destroyed.
+ * </pre>
+ */
+l_ok
+recogRemoveOutliers1(L_RECOG  **precog,
+                     l_float32  minscore,
+                     l_int32    mintarget,
+                     l_int32    minsize,
+                     PIX      **ppixsave,
+                     PIX      **ppixrem)
+{
+PIXA     *pixa1, *pixa2;
+L_RECOG  *recog;
+
+    if (!precog)
+        return ERROR_INT("&recog not defined", __func__, 1);
+    if (*precog == NULL)
+        return ERROR_INT("recog not defined", __func__, 1);
+
+        /* Extract the unscaled templates */
+    pixa1 = recogExtractPixa(*precog);
+    recogDestroy(precog);
+
+    pixa2 = pixaRemoveOutliers1(pixa1, minscore, mintarget, minsize,
+                                ppixsave, ppixrem);
+    pixaDestroy(&pixa1);
+    if (!pixa2)
+        return ERROR_INT("failure to remove outliers", __func__, 1);
+
+    recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1);
+    pixaDestroy(&pixa2);
+    if (!recog)
+        return ERROR_INT("failure to make recog from pixa sans outliers",
+                          __func__, 1);
+
+    *precog = recog;
+    return 0;
+}
+
+
+/*!
+ * \brief   pixaRemoveOutliers1()
+ *
+ * \param[in]   pixas        unscaled labeled templates
+ * \param[in]   minscore     keep everything with at least this score;
+ *                           use -1.0 for default.
+ * \param[in]   mintarget    minimum desired number to retain if possible;
+ *                           use -1 for default.
+ * \param[in]   minsize      minimum number of samples required for a class;
+ *                           use -1 for default.
+ * \param[out]  ppixsave     [optional debug] saved templates, with scores
+ * \param[out]  ppixrem      [optional debug] removed templates, with scores
+ * \return  pixa   of unscaled templates to be kept, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Removing outliers is particularly important when recognition
+ *          goes against all the samples in the training set, as opposed
+ *          to the averages for each class.  The reason is that we get
+ *          an identification error if a mislabeled template is a best
+ *          match for an input sample.
+ *      (2) Because the score values depend strongly on the quality
+ *          of the character images, to avoid losing too many samples
+ *          we supplement a minimum score for retention with a score
+ *          necessary to acquire the minimum target number of templates.
+ *          To do this we are willing to use a lower threshold,
+ *          LowerScoreThreshold, on the score.  Consequently, with
+ *          poor quality templates, we may keep samples with a score
+ *          less than %minscore, but never less than LowerScoreThreshold.
+ *          And if the number of samples is less than %minsize, we do
+ *          not use any.
+ *      (3) This is meant to be used on a BAR, where the templates all
+ *          come from the same book; use minscore ~0.75.
+ *      (4) Method: make a scaled recog from the input %pixas.  Then,
+ *          for each class: generate the averages, match each
+ *          scaled template against the average, and save unscaled
+ *          templates that had a sufficiently good match.
+ * </pre>
+ */
+PIXA *
+pixaRemoveOutliers1(PIXA      *pixas,
+                    l_float32  minscore,
+                    l_int32    mintarget,
+                    l_int32    minsize,
+                    PIX      **ppixsave,
+                    PIX      **ppixrem)
+{
+l_int32    i, j, debug, n, area1, area2;
+l_float32  x1, y1, x2, y2, minfract, score, rankscore, threshscore;
+NUMA      *nasum, *narem, *nasave, *nascore;
+PIX       *pix1, *pix2;
+PIXA      *pixa, *pixarem, *pixad;
+PTA       *pta;
+L_RECOG   *recog;
+
+    if (ppixsave) *ppixsave = NULL;
+    if (ppixrem) *ppixrem = NULL;
+    if (!pixas)
+        return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
+    minscore = L_MIN(minscore, 1.0);
+    if (minscore <= 0.0)
+        minscore = DefaultMinScore;
+    mintarget = L_MIN(mintarget, 3);
+    if (mintarget <= 0)
+        mintarget = DefaultMinTarget;
+    if (minsize < 0)
+        minsize = DefaultMinSetSize;
+
+        /* Make a special height-scaled recognizer with average templates */
+    debug = (ppixsave || ppixrem) ? 1 : 0;
+    recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1);
+    if (!recog)
+        return (PIXA *)ERROR_PTR("bad pixas; recog not made", __func__, NULL);
+    if (recogAverageSamples(recog, debug) != 0) {
+        recogDestroy(&recog);
+        return (PIXA *)ERROR_PTR("bad templates", __func__, NULL);
+    }
+
+    nasave = (ppixsave) ? numaCreate(0) : NULL;
+    pixarem = (ppixrem) ? pixaCreate(0) : NULL;
+    narem = (ppixrem) ? numaCreate(0) : NULL;
+
+    pixad = pixaCreate(0);
+    for (i = 0; i < recog->setsize; i++) {
+            /* Access the average template and values for scaled
+             * images in this class */
+        pix1 = pixaGetPix(recog->pixa, i, L_CLONE);
+        ptaGetPt(recog->pta, i, &x1, &y1);
+        numaGetIValue(recog->nasum, i, &area1);
+
+            /* Get the scores for each sample in the class */
+        pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE);
+        pta = ptaaGetPta(recog->ptaa, i, L_CLONE);  /* centroids */
+        nasum = numaaGetNuma(recog->naasum, i, L_CLONE);  /* fg areas */
+        n = pixaGetCount(pixa);
+        nascore = numaCreate(n);
+        for (j = 0; j < n; j++) {
+            pix2 = pixaGetPix(pixa, j, L_CLONE);
+            ptaGetPt(pta, j, &x2, &y2);  /* centroid average */
+            numaGetIValue(nasum, j, &area2);  /* fg sum average */
+            pixCorrelationScoreSimple(pix1, pix2, area1, area2,
+                                      x1 - x2, y1 - y2, 5, 5,
+                                      recog->sumtab, &score);
+            numaAddNumber(nascore, score);
+            if (debug && score == 0.0)  /* typ. large size difference */
+                lept_stderr("Got 0 score for i = %d, j = %d\n", i, j);
+            pixDestroy(&pix2);
+        }
+        pixDestroy(&pix1);
+
+            /* Find the rankscore, corresponding to the 1.0 - minfract.
+             * To attempt to maintain the minfract of templates, use as a
+             * cutoff the minimum of minscore and the rank score.  However,
+             * no template is saved with an actual score less than
+             * that at least one template is kept. */
+        minfract = (l_float32)mintarget / (l_float32)n;
+        numaGetRankValue(nascore, 1.0 - minfract, NULL, 0, &rankscore);
+        threshscore = L_MAX(LowerScoreThreshold,
+                            L_MIN(minscore, rankscore));
+        if (debug) {
+            L_INFO("minscore = %4.2f, rankscore = %4.2f, threshscore = %4.2f\n",
+                   __func__, minscore, rankscore, threshscore);
+        }
+
+            /* Save templates that are at or above threshold.
+             * Toss any classes with less than %minsize templates. */
+        for (j = 0; j < n; j++) {
+            numaGetFValue(nascore, j, &score);
+            pix1 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY);
+            if (score >= threshscore && n >= minsize) {
+                pixaAddPix(pixad, pix1, L_INSERT);
+                if (nasave) numaAddNumber(nasave, score);
+            } else if (debug) {
+                pixaAddPix(pixarem, pix1, L_INSERT);
+                numaAddNumber(narem, score);
+            } else {
+                pixDestroy(&pix1);
+            }
+        }
+
+        pixaDestroy(&pixa);
+        ptaDestroy(&pta);
+        numaDestroy(&nasum);
+        numaDestroy(&nascore);
+    }
+
+    if (ppixsave) {
+        *ppixsave = pixDisplayOutliers(pixad, nasave);
+        numaDestroy(&nasave);
+    }
+    if (ppixrem) {
+        *ppixrem = pixDisplayOutliers(pixarem, narem);
+        pixaDestroy(&pixarem);
+        numaDestroy(&narem);
+    }
+    recogDestroy(&recog);
+    return pixad;
+}
+
+
+/*!
+ * \brief   recogRemoveOutliers2()
+ *
+ * \param[in]   precog      addr of recog with unscaled labeled templates
+ * \param[in]   minscore    keep everything with at least this score
+ * \param[in]   minsize     minimum number of samples required for a class
+ * \param[out]  ppixsave    [optional debug] saved templates, with scores
+ * \param[out]  ppixrem     [optional debug] removed templates, with scores
+ * \return  0 if OK, 1 on error.
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a convenience wrapper when using default parameters
+ *          for the recog.  See pixaRemoveOutliers2() for details.
+ *      (2) If this succeeds, the new recog replaces the input recog;
+ *          if it fails, the input recog is destroyed.
+ * </pre>
+ */
+l_ok
+recogRemoveOutliers2(L_RECOG  **precog,
+                     l_float32  minscore,
+                     l_int32    minsize,
+                     PIX      **ppixsave,
+                     PIX      **ppixrem)
+{
+PIXA     *pixa1, *pixa2;
+L_RECOG  *recog;
+
+    if (!precog)
+        return ERROR_INT("&recog not defined", __func__, 1);
+    if (*precog == NULL)
+        return ERROR_INT("recog not defined", __func__, 1);
+
+        /* Extract the unscaled templates */
+    pixa1 = recogExtractPixa(*precog);
+    recogDestroy(precog);
+
+    pixa2 = pixaRemoveOutliers2(pixa1, minscore, minsize, ppixsave, ppixrem);
+    pixaDestroy(&pixa1);
+    if (!pixa2)
+        return ERROR_INT("failure to remove outliers", __func__, 1);
+
+    recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1);
+    pixaDestroy(&pixa2);
+    if (!recog)
+        return ERROR_INT("failure to make recog from pixa sans outliers",
+                          __func__, 1);
+
+    *precog = recog;
+    return 0;
+}
+
+
+/*!
+ * \brief   pixaRemoveOutliers2()
+ *
+ * \param[in]   pixas       unscaled labeled templates
+ * \param[in]   minscore    keep everything with at least this score;
+ *                          use -1.0 for default.
+ * \param[in]   minsize     minimum number of samples required for a class;
+ *                          use -1 for default.
+ * \param[out]  ppixsave    [optional debug] saved templates, with scores
+ * \param[out]  ppixrem     [optional debug] removed templates, with scores
+ * \return  pixa   of unscaled templates to be kept, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Removing outliers is particularly important when recognition
+ *          goes against all the samples in the training set, as opposed
+ *          to the averages for each class.  The reason is that we get
+ *          an identification error if a mislabeled template is a best
+ *          match for an input sample.
+ *      (2) This method compares each template against the average templates
+ *          of each class, and discards any template that has a higher
+ *          correlation to a class different from its own.  It also
+ *          sets a lower bound on correlation scores with its class average.
+ *      (3) This is meant to be used on a BAR, where the templates all
+ *          come from the same book; use minscore ~0.75.
+ * </pre>
+ */
+PIXA *
+pixaRemoveOutliers2(PIXA      *pixas,
+                    l_float32  minscore,
+                    l_int32    minsize,
+                    PIX      **ppixsave,
+                    PIX      **ppixrem)
+{
+l_int32    i, j, k, n, area1, area2, maxk, debug;
+l_float32  x1, y1, x2, y2, score, maxscore;
+NUMA      *nan, *nascore, *nasave;
+PIX       *pix1, *pix2, *pix3;
+PIXA      *pixarem, *pixad;
+L_RECOG   *recog;
+
+    if (ppixsave) *ppixsave = NULL;
+    if (ppixrem) *ppixrem = NULL;
+    if (!pixas)
+        return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
+    minscore = L_MIN(minscore, 1.0);
+    if (minscore <= 0.0)
+        minscore = DefaultMinScore;
+    if (minsize < 0)
+        minsize = DefaultMinSetSize;
+
+        /* Make a special height-scaled recognizer with average templates */
+    debug = (ppixsave || ppixrem) ? 1 : 0;
+    recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1);
+    if (!recog)
+        return (PIXA *)ERROR_PTR("bad pixas; recog not made", __func__, NULL);
+    if (recogAverageSamples(recog, debug) != 0) {
+        recogDestroy(&recog);
+        return (PIXA *)ERROR_PTR("bad templates", __func__, NULL);
+    }
+
+    nasave = (ppixsave) ? numaCreate(0) : NULL;
+    pixarem = (ppixrem) ? pixaCreate(0) : NULL;
+
+    pixad = pixaCreate(0);
+    pixaaGetCount(recog->pixaa, &nan);  /* number of templates in each class */
+    for (i = 0; i < recog->setsize; i++) {
+            /* Get the scores for each sample in the class, when comparing
+             * with averages from all the classes. */
+        numaGetIValue(nan, i, &n);
+        for (j = 0; j < n; j++) {
+            pix1 = pixaaGetPix(recog->pixaa, i, j, L_CLONE);
+            ptaaGetPt(recog->ptaa, i, j, &x1, &y1);  /* centroid */
+            numaaGetValue(recog->naasum, i, j, NULL, &area1);  /* fg sum */
+            nascore = numaCreate(n);
+            for (k = 0; k < recog->setsize; k++) {  /* average templates */
+                pix2 = pixaGetPix(recog->pixa, k, L_CLONE);
+                ptaGetPt(recog->pta, k, &x2, &y2);  /* average centroid */
+                numaGetIValue(recog->nasum, k, &area2);  /* average fg sum */
+                pixCorrelationScoreSimple(pix1, pix2, area1, area2,
+                                          x1 - x2, y1 - y2, 5, 5,
+                                          recog->sumtab, &score);
+                numaAddNumber(nascore, score);
+                pixDestroy(&pix2);
+            }
+
+                /* Save templates that are in the correct class and
+                 * at or above threshold.  Toss any classes with less
+                 * than %minsize templates. */
+            numaGetMax(nascore, &maxscore, &maxk);
+            if (maxk == i && maxscore >= minscore && n >= minsize) {
+                    /* save it */
+                pix3 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY);
+                pixaAddPix(pixad, pix3, L_INSERT);
+                if (nasave) numaAddNumber(nasave, maxscore);
+            } else if (ppixrem) {  /* outlier */
+                pix3 = recogDisplayOutlier(recog, i, j, maxk, maxscore);
+                pixaAddPix(pixarem, pix3, L_INSERT);
+            }
+            numaDestroy(&nascore);
+            pixDestroy(&pix1);
+        }
+    }
+
+    if (ppixsave) {
+        *ppixsave = pixDisplayOutliers(pixad, nasave);
+        numaDestroy(&nasave);
+    }
+    if (ppixrem) {
+        *ppixrem = pixaDisplayTiledInRows(pixarem, 32, 1500, 1.0, 0, 20, 2);
+        pixaDestroy(&pixarem);
+    }
+
+    numaDestroy(&nan);
+    recogDestroy(&recog);
+    return pixad;
+}
+
+
+/*------------------------------------------------------------------------*
+ *                       Training on unlabeled data                       *
+ *------------------------------------------------------------------------*/
+/*!
+ * \brief   recogTrainFromBoot()
+ *
+ * \param[in]    recogboot   labeled boot recognizer
+ * \param[in]    pixas       set of unlabeled input characters
+ * \param[in]    minscore    min score for accepting the example; e.g., 0.75
+ * \param[in]    threshold   for binarization, if needed
+ * \param[in]    debug       1 for debug output saved to recogboot; 0 otherwise
+ * \return  pixad   labeled version of input pixas, trained on a BSR,
+ *                  or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This takes %pixas of unscaled single characters and %recboot,
+ *          a bootstrep recognizer (BSR) that has been set up with parameters
+ *            * scaleh: scale all templates to this height
+ *            * linew: width of normalized strokes, or 0 if using
+ *              the input image
+ *          It modifies the pix in %pixas accordingly and correlates
+ *          with the templates in the BSR.  It returns those input
+ *          images in %pixas whose best correlation with the BSR is at
+ *          or above %minscore.  The returned pix have added text labels
+ *          for the text string of the class to which the best
+ *          correlated template belongs.
+ *      (2) Identification occurs in scaled mode (typically with h = 40),
+ *          optionally using a width-normalized line images derived
+ *          from those in %pixas.
+ * </pre>
+ */
+PIXA  *
+recogTrainFromBoot(L_RECOG   *recogboot,
+                   PIXA      *pixas,
+                   l_float32  minscore,
+                   l_int32    threshold,
+                   l_int32    debug)
+{
+char      *text;
+l_int32    i, n, same, maxd, scaleh, linew;
+l_float32  score;
+PIX       *pix1, *pix2, *pixdb = NULL;
+PIXA      *pixa1, *pixa2, *pixa3, *pixad;
+
+    if (!recogboot)
+        return (PIXA *)ERROR_PTR("recogboot not defined", __func__, NULL);
+    if (!pixas)
+        return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
+
+        /* Make sure all input pix are 1 bpp */
+    if ((n = pixaGetCount(pixas)) == 0)
+        return (PIXA *)ERROR_PTR("no pix in pixa", __func__, NULL);
+    pixaVerifyDepth(pixas, &same, &maxd);
+    if (maxd == 1) {
+        pixa1 = pixaCopy(pixas, L_COPY);
+    } else {
+        pixa1 = pixaCreate(n);
+        for (i = 0; i < n; i++) {
+            pix1 = pixaGetPix(pixas, i, L_CLONE);
+            pix2 = pixConvertTo1(pix1, threshold);
+            pixaAddPix(pixa1, pix2, L_INSERT);
+            pixDestroy(&pix1);
+        }
+    }
+
+        /* Scale the input images to match the BSR */
+    scaleh = recogboot->scaleh;
+    linew = recogboot->linew;
+    pixa2 = pixaCreate(n);
+    for (i = 0; i < n; i++) {
+        pix1 = pixaGetPix(pixa1, i, L_CLONE);
+        pix2 = pixScaleToSize(pix1, 0, scaleh);
+        pixaAddPix(pixa2, pix2, L_INSERT);
+        pixDestroy(&pix1);
+    }
+    pixaDestroy(&pixa1);
+
+        /* Optionally convert to width-normalized line */
+    if (linew > 0)
+        pixa3 = pixaSetStrokeWidth(pixa2, linew, 4, 8);
+    else
+        pixa3 = pixaCopy(pixa2, L_CLONE);
+    pixaDestroy(&pixa2);
+
+        /* Identify using recogboot */
+    n = pixaGetCount(pixa3);
+    pixad = pixaCreate(n);
+    for (i = 0; i < n; i++) {
+        pix1 = pixaGetPix(pixa3, i, L_COPY);
+        pixSetText(pix1, NULL);  /* remove any existing text or labelling */
+        if (!debug) {
+            recogIdentifyPix(recogboot, pix1, NULL);
+        } else {
+            recogIdentifyPix(recogboot, pix1, &pixdb);
+            pixaAddPix(recogboot->pixadb_boot, pixdb, L_INSERT);
+        }
+        rchExtract(recogboot->rch, NULL, &score, &text, NULL, NULL, NULL, NULL);
+        if (score >= minscore) {
+            pix2 = pixaGetPix(pixas, i, L_COPY);
+            pixSetText(pix2, text);
+            pixaAddPix(pixad, pix2, L_INSERT);
+            pixaAddPix(recogboot->pixadb_boot, pixdb, L_COPY);
+        }
+        LEPT_FREE(text);
+        pixDestroy(&pix1);
+    }
+    pixaDestroy(&pixa3);
+
+    return pixad;
+}
+
+
+/*------------------------------------------------------------------------*
+ *                     Padding the digit training set                     *
+ *------------------------------------------------------------------------*/
+/*!
+ * \brief   recogPadDigitTrainingSet()
+ *
+ * \param[in,out]   precog    trained; if padding is needed, it is replaced
+ *                            by a a new padded recog
+ * \param[in]       scaleh    must be > 0; suggest ~40.
+ * \param[in]       linew     use 0 for original scanned images
+ * \return       0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a no-op if padding is not needed.  However,
+ *          if it is, this replaces the input recog with a new recog,
+ *          padded appropriately with templates from a boot recognizer,
+ *          and set up with correlation templates derived from
+ *          %scaleh and %linew.
+ * </pre>
+ */
+l_ok
+recogPadDigitTrainingSet(L_RECOG  **precog,
+                         l_int32    scaleh,
+                         l_int32    linew)
+{
+PIXA     *pixa;
+L_RECOG  *recog1, *recog2;
+SARRAY   *sa;
+
+    if (!precog)
+        return ERROR_INT("&recog not defined", __func__, 1);
+    recog1 = *precog;
+
+    recogIsPaddingNeeded(recog1, &sa);
+    if (!sa) return 0;
+
+        /* Get a new pixa with the padding templates added */
+    pixa = recogAddDigitPadTemplates(recog1, sa);
+    sarrayDestroy(&sa);
+    if (!pixa)
+        return ERROR_INT("pixa not made", __func__, 1);
+
+        /* Need to use templates that are scaled to a fixed height. */
+    if (scaleh <= 0) {
+        L_WARNING("templates must be scaled to fixed height; using %d\n",
+                  __func__, 40);
+        scaleh = 40;
+    }
+
+        /* Create a hybrid recog, composed of templates from both
+         * the original and bootstrap sources. */
+    recog2 = recogCreateFromPixa(pixa, 0, scaleh, linew, recog1->threshold,
+                                 recog1->maxyshift);
+    pixaDestroy(&pixa);
+    recogDestroy(precog);
+    *precog = recog2;
+    return 0;
+}
+
+
+/*!
+ * \brief   recogIsPaddingNeeded()
+ *
+ * \param[in]    recog   trained
+ * \param[out]   psa     addr of returned string containing text value
+ * \return       1 on error; 0 if OK, whether or not additional padding
+ *               templates are required.
+ *
+ * <pre>
+ * Notes:
+ *      (1) This returns a string array in &sa containing character values
+ *          for which extra templates are needed; this sarray is
+ *          used by recogGetPadTemplates().  It returns NULL
+ *          if no padding templates are needed.
+ * </pre>
+ */
+l_int32
+recogIsPaddingNeeded(L_RECOG  *recog,
+                     SARRAY  **psa)
+{
+char      *str;
+l_int32    i, nt, min_nopad, nclass, allclasses;
+l_float32  minval;
+NUMA      *naclass;
+SARRAY    *sa;
+
+    if (!psa)
+        return ERROR_INT("&sa not defined", __func__, 1);
+    *psa = NULL;
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+
+        /* Do we have samples from all classes? */
+    nclass = pixaaGetCount(recog->pixaa_u, &naclass);  /* unscaled bitmaps */
+    allclasses = (nclass == recog->charset_size) ? 1 : 0;
+
+        /* Are there enough samples in each class already? */
+    min_nopad = recog->min_nopad;
+    numaGetMin(naclass, &minval, NULL);
+    if (allclasses && (minval >= min_nopad)) {
+        numaDestroy(&naclass);
+        return 0;
+    }
+
+        /* Are any classes not represented? */
+    sa = recogAddMissingClassStrings(recog);
+    *psa = sa;
+
+        /* Are any other classes under-represented? */
+    for (i = 0; i < nclass; i++) {
+        numaGetIValue(naclass, i, &nt);
+        if (nt < min_nopad) {
+            str = sarrayGetString(recog->sa_text, i, L_COPY);
+            sarrayAddString(sa, str, L_INSERT);
+        }
+    }
+    numaDestroy(&naclass);
+    return 0;
+}
+
+
+/*!
+ * \brief   recogAddMissingClassStrings()
+ *
+ * \param[in]    recog   trained
+ * \return       sa  of class string missing in %recog, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This returns an empty %sa if there is at least one template
+ *          in each class in %recog.
+ * </pre>
+ */
+static SARRAY  *
+recogAddMissingClassStrings(L_RECOG  *recog)
+{
+char    *text;
+char     str[4];
+l_int32  i, nclass, index, ival;
+NUMA    *na;
+SARRAY  *sa;
+
+    if (!recog)
+        return (SARRAY *)ERROR_PTR("recog not defined", __func__, NULL);
+
+        /* Only handling digits */
+    nclass = pixaaGetCount(recog->pixaa_u, NULL);  /* unscaled bitmaps */
+    if (recog->charset_type != 1 || nclass == 10)
+        return sarrayCreate(0);  /* empty */
+
+        /* Make an indicator array for missing classes */
+    na = numaCreate(0);
+    sa = sarrayCreate(0);
+    for (i = 0; i < recog->charset_size; i++)
+         numaAddNumber(na, 1);
+    for (i = 0; i < nclass; i++) {
+        text = sarrayGetString(recog->sa_text, i, L_NOCOPY);
+        index = text[0] - '0';
+        numaSetValue(na, index, 0);
+    }
+
+        /* Convert to string and add to output */
+    for (i = 0; i < nclass; i++) {
+        numaGetIValue(na, i, &ival);
+        if (ival == 1) {
+            str[0] = '0' + i;
+            str[1] = '\0';
+            sarrayAddString(sa, str, L_COPY);
+        }
+    }
+    numaDestroy(&na);
+    return sa;
+}
+
+
+/*!
+ * \brief   recogAddDigitPadTemplates()
+ *
+ * \param[in]    recog   trained
+ * \param[in]    sa      set of text strings that need to be padded
+ * \return  pixa   of all templates from %recog and the additional pad
+ *                 templates from a boot recognizer; or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Call recogIsPaddingNeeded() first, which returns %sa of
+ *          template text strings for classes where more templates
+ *          are needed.
+ * </pre>
+ */
+PIXA  *
+recogAddDigitPadTemplates(L_RECOG  *recog,
+                          SARRAY   *sa)
+{
+char    *str, *text;
+l_int32  i, j, n, nt;
+PIX     *pix;
+PIXA    *pixa1, *pixa2;
+
+    if (!recog)
+        return (PIXA *)ERROR_PTR("recog not defined", __func__, NULL);
+    if (!sa)
+        return (PIXA *)ERROR_PTR("sa not defined", __func__, NULL);
+    if (recogCharsetAvailable(recog->charset_type) == FALSE)
+        return (PIXA *)ERROR_PTR("boot charset not available", __func__, NULL);
+
+        /* Make boot recog templates */
+    pixa1 = recogMakeBootDigitTemplates(0, 0);
+    n = pixaGetCount(pixa1);
+
+        /* Extract the unscaled templates from %recog */
+    pixa2 = recogExtractPixa(recog);
+
+        /* Add selected boot recog templates based on the text strings in sa */
+    nt = sarrayGetCount(sa);
+    for (i = 0; i < n; i++) {
+        pix = pixaGetPix(pixa1, i, L_CLONE);
+        text = pixGetText(pix);
+        for (j = 0; j < nt; j++) {
+            str = sarrayGetString(sa, j, L_NOCOPY);
+            if (!strcmp(text, str)) {
+                pixaAddPix(pixa2, pix, L_COPY);
+                break;
+            }
+        }
+        pixDestroy(&pix);
+    }
+
+    pixaDestroy(&pixa1);
+    return pixa2;
+}
+
+
+/*!
+ * \brief   recogCharsetAvailable()
+ *
+ * \param[in]    type     of charset for padding
+ * \return  1 if available; 0 if not.
+ */
+static l_int32
+recogCharsetAvailable(l_int32  type)
+{
+l_int32  ret;
+
+    switch (type)
+    {
+    case L_ARABIC_NUMERALS:
+        ret = TRUE;
+        break;
+    case L_LC_ROMAN_NUMERALS:
+    case L_UC_ROMAN_NUMERALS:
+    case L_LC_ALPHA:
+    case L_UC_ALPHA:
+        L_INFO("charset type %d not available\n", __func__, type);
+        ret = FALSE;
+        break;
+    default:
+        L_INFO("charset type %d is unknown\n", __func__, type);
+        ret = FALSE;
+        break;
+    }
+
+    return ret;
+}
+
+
+/*------------------------------------------------------------------------*
+ *                      Making a boot digit recognizer                    *
+ *------------------------------------------------------------------------*/
+/*!
+ * \brief   recogMakeBootDigitRecog()
+ *
+ * \param[in]    nsamp       number of samples of each digit; or 0
+ * \param[in]    scaleh      scale all heights to this; typ. use 40
+ * \param[in]    linew       normalized line width; typ. use 5; 0 to skip
+ * \param[in]    maxyshift   from nominal centroid alignment; typically 0 or 1
+ * \param[in]    debug       1 for showing templates; 0 otherwise
+ * \return  recog, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *     (1) This takes a set of pre-computed, labeled pixa of single
+ *         digits, and generates a recognizer from them.
+ *         The templates used in the recognizer can be modified by:
+ *         - scaling (isotropically to fixed height)
+ *         - generating a skeleton and thickening so that all strokes
+ *           have the same width.
+ *     (2) The resulting templates are scaled versions of either the
+ *         input bitmaps or images with fixed line widths.  To use the
+ *         input bitmaps, set %linew = 0; otherwise, set %linew to the
+ *         desired line width.
+ *     (3) If %nsamp == 0, this uses and extends the output from
+ *         three boot generators:
+ *            l_bootnum_gen1, l_bootnum_gen2, l_bootnum_gen3.
+ *         Otherwise, it uses exactly %nsamp templates of each digit,
+ *         extracted by l_bootnum_gen4.
+ * </pre>
+ */
+L_RECOG  *
+recogMakeBootDigitRecog(l_int32  nsamp,
+                        l_int32  scaleh,
+                        l_int32  linew,
+                        l_int32  maxyshift,
+                        l_int32  debug)
+
+{
+PIXA     *pixa;
+L_RECOG  *recog;
+
+        /* Get the templates, extended by horizontal scaling */
+    pixa = recogMakeBootDigitTemplates(nsamp, debug);
+
+        /* Make the boot recog; recogModifyTemplate() will scale the
+         * templates and optionally turn them into strokes of fixed width. */
+    recog = recogCreateFromPixa(pixa, 0, scaleh, linew, 128, maxyshift);
+    pixaDestroy(&pixa);
+    if (debug)
+        recogShowContent(stderr, recog, 0, 1);
+
+    return recog;
+}
+
+
+/*!
+ * \brief   recogMakeBootDigitTemplates()
+ *
+ * \param[in]    nsamp     number of samples of each digit; or 0
+ * \param[in]    debug     1 for display of templates
+ * \return  pixa   of templates; or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *     (1) See recogMakeBootDigitRecog().
+ * </pre>
+ */
+PIXA  *
+recogMakeBootDigitTemplates(l_int32  nsamp,
+                            l_int32  debug)
+{
+NUMA  *na1;
+PIX   *pix1, *pix2, *pix3;
+PIXA  *pixa1, *pixa2, *pixa3;
+
+    if (nsamp > 0) {
+        pixa1 = l_bootnum_gen4(nsamp);
+        if (debug) {
+            pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10,
+                                            2, 6, 0xff000000);
+            pixDisplay(pix1, 0, 0);
+            pixDestroy(&pix1);
+        }
+        return pixa1;
+    }
+
+        /* Else, generate from 3 pixa */
+    pixa1 = l_bootnum_gen1();
+    pixa2 = l_bootnum_gen2();
+    pixa3 = l_bootnum_gen3();
+    if (debug) {
+        pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000);
+        pix2 = pixaDisplayTiledWithText(pixa2, 1500, 1.0, 10, 2, 6, 0xff000000);
+        pix3 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 10, 2, 6, 0xff000000);
+        pixDisplay(pix1, 0, 0);
+        pixDisplay(pix2, 600, 0);
+        pixDisplay(pix3, 1200, 0);
+        pixDestroy(&pix1);
+        pixDestroy(&pix2);
+        pixDestroy(&pix3);
+    }
+    pixaJoin(pixa1, pixa2, 0, -1);
+    pixaJoin(pixa1, pixa3, 0, -1);
+    pixaDestroy(&pixa2);
+    pixaDestroy(&pixa3);
+
+        /* Extend by horizontal scaling */
+    na1 = numaCreate(4);
+    numaAddNumber(na1, 0.9f);
+    numaAddNumber(na1, 1.1f);
+    numaAddNumber(na1, 1.2f);
+    pixa2 = pixaExtendByScaling(pixa1, na1, L_HORIZ, 1);
+
+    pixaDestroy(&pixa1);
+    numaDestroy(&na1);
+    return pixa2;
+}
+
+
+/*------------------------------------------------------------------------*
+ *                               Debugging                                *
+ *------------------------------------------------------------------------*/
+/*!
+ * \brief   recogShowContent()
+ *
+ * \param[in]    fp       file stream
+ * \param[in]    recog
+ * \param[in]    index    for naming of output files of template images
+ * \param[in]    display  1 for showing template images; 0 otherwise
+ * \return  0 if OK, 1 on error
+ */
+l_ok
+recogShowContent(FILE     *fp,
+                 L_RECOG  *recog,
+                 l_int32   index,
+                 l_int32   display)
+{
+char     buf[128];
+l_int32  i, val, count;
+PIX     *pix;
+NUMA    *na;
+
+    if (!fp)
+        return ERROR_INT("stream not defined", __func__, 1);
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+
+    fprintf(fp, "Debug print of recog contents\n");
+    fprintf(fp, "  Setsize: %d\n", recog->setsize);
+    fprintf(fp, "  Binarization threshold: %d\n", recog->threshold);
+    fprintf(fp, "  Maximum matching y-jiggle: %d\n", recog->maxyshift);
+    if (recog->linew <= 0)
+        fprintf(fp, "  Using image templates for matching\n");
+    else
+        fprintf(fp, "  Using templates with fixed line width for matching\n");
+    if (recog->scalew == 0)
+        fprintf(fp, "  No width scaling of templates\n");
+    else
+        fprintf(fp, "  Template width scaled to %d\n", recog->scalew);
+    if (recog->scaleh == 0)
+        fprintf(fp, "  No height scaling of templates\n");
+    else
+        fprintf(fp, "  Template height scaled to %d\n", recog->scaleh);
+    fprintf(fp, "  Number of samples in each class:\n");
+    pixaaGetCount(recog->pixaa_u, &na);
+    for (i = 0; i < recog->setsize; i++) {
+        l_dnaGetIValue(recog->dna_tochar, i, &val);
+        numaGetIValue(na, i, &count);
+        if (val < 128)
+            fprintf(fp, "    class %d, char %c:   %d\n", i, val, count);
+        else
+            fprintf(fp, "    class %d, val %d:   %d\n", i, val, count);
+    }
+    numaDestroy(&na);
+
+    if (display) {
+        lept_mkdir("lept/recog");
+        pix = pixaaDisplayByPixa(recog->pixaa_u, 50, 1.0, 20, 20, 0);
+        snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates_u.%d.png", index);
+        pixWriteDebug(buf, pix, IFF_PNG);
+        pixDisplay(pix, 0, 200 * index);
+        pixDestroy(&pix);
+        if (recog->train_done) {
+            pix = pixaaDisplayByPixa(recog->pixaa, 50, 1.0, 20, 20, 0);
+            snprintf(buf, sizeof(buf),
+                     "/tmp/lept/recog/templates.%d.png", index);
+            pixWriteDebug(buf, pix, IFF_PNG);
+            pixDisplay(pix, 800, 200 * index);
+            pixDestroy(&pix);
+        }
+    }
+    return 0;
+}
+
+
+/*!
+ * \brief   recogDebugAverages()
+ *
+ * \param[in]    recog    addr of recog
+ * \param[in]    debug     0 no output; 1 for images; 2 for text; 3 for both
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Generates an image that pairs each of the input images used
+ *          in training with the average template that it is best
+ *          correlated to.  This is written into the recog.
+ *      (2) It also generates pixa_tr of all the input training images,
+ *          which can be used, e.g., in recogShowMatchesInRange().
+ *      (3) Returns an error if the averaging function finds bad classes.
+ * </pre>
+ */
+l_ok
+recogDebugAverages(L_RECOG  *recog,
+                   l_int32   debug)
+{
+l_int32    i, j, n, np, index;
+l_float32  score;
+PIX       *pix1, *pix2, *pix3;
+PIXA      *pixa, *pixat;
+PIXAA     *paa1, *paa2;
+
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+
+        /* Mark the training as finished if necessary, and make sure
+         * that the average templates have been built. */
+    if (recogAverageSamples(recog, 0) != 0)
+        return ERROR_INT("averaging failed", __func__, 1);
+
+        /* Save a pixa of all the training examples */
+    paa1 = recog->pixaa;
+    if (!recog->pixa_tr)
+        recog->pixa_tr = pixaaFlattenToPixa(paa1, NULL, L_CLONE);
+
+        /* Destroy any existing image and make a new one */
+    if (recog->pixdb_ave)
+        pixDestroy(&recog->pixdb_ave);
+    n = pixaaGetCount(paa1, NULL);
+    paa2 = pixaaCreate(n);
+    for (i = 0; i < n; i++) {
+        pixa = pixaCreate(0);
+        pixat = pixaaGetPixa(paa1, i, L_CLONE);
+        np = pixaGetCount(pixat);
+        for (j = 0; j < np; j++) {
+            pix1 = pixaaGetPix(paa1, i, j, L_CLONE);
+            recogIdentifyPix(recog, pix1, &pix2);
+            rchExtract(recog->rch, &index, &score, NULL, NULL, NULL,
+                       NULL, NULL);
+            if (debug >= 2)
+                lept_stderr("index = %d, score = %7.3f\n", index, score);
+            pix3 = pixAddBorder(pix2, 2, 1);
+            pixaAddPix(pixa, pix3, L_INSERT);
+            pixDestroy(&pix1);
+            pixDestroy(&pix2);
+        }
+        pixaaAddPixa(paa2, pixa, L_INSERT);
+        pixaDestroy(&pixat);
+    }
+    recog->pixdb_ave = pixaaDisplayByPixa(paa2, 50, 1.0, 20, 20, 0);
+    if (debug % 2) {
+        lept_mkdir("lept/recog");
+        pixWriteDebug("/tmp/lept/recog/templ_match.png", recog->pixdb_ave,
+                      IFF_PNG);
+        pixDisplay(recog->pixdb_ave, 100, 100);
+    }
+
+    pixaaDestroy(&paa2);
+    return 0;
+}
+
+
+/*!
+ * \brief   recogShowAverageTemplates()
+ *
+ * \param[in]    recog
+ * \return  0 on success, 1 on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) This debug routine generates a display of the averaged templates,
+ *          both scaled and unscaled, with the centroid visible in red.
+ * </pre>
+ */
+l_int32
+recogShowAverageTemplates(L_RECOG  *recog)
+{
+l_int32    i, size;
+l_float32  x, y;
+PIX       *pix1, *pix2, *pixr;
+PIXA      *pixat, *pixadb;
+
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+
+    lept_stderr("min/max width_u = (%d,%d); min/max height_u = (%d,%d)\n",
+                recog->minwidth_u, recog->maxwidth_u,
+                recog->minheight_u, recog->maxheight_u);
+    lept_stderr("min splitw = %d, max splith = %d\n",
+                recog->min_splitw, recog->max_splith);
+
+    pixaDestroy(&recog->pixadb_ave);
+
+    pixr = pixCreate(3, 3, 32);  /* 3x3 red square for centroid location */
+    pixSetAllArbitrary(pixr, 0xff000000);
+    pixadb = pixaCreate(2);
+
+        /* Unscaled bitmaps */
+    size = recog->setsize;
+    pixat = pixaCreate(size);
+    for (i = 0; i < size; i++) {
+        if ((pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE)) == NULL)
+            continue;
+        pix2 = pixConvertTo32(pix1);
+        ptaGetPt(recog->pta_u, i, &x, &y);
+        pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3,
+                    PIX_SRC, pixr, 0, 0);
+        pixaAddPix(pixat, pix2, L_INSERT);
+        pixDestroy(&pix1);
+    }
+    pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0);
+    pixaAddPix(pixadb, pix1, L_INSERT);
+    pixDisplay(pix1, 100, 100);
+    pixaDestroy(&pixat);
+
+        /* Scaled bitmaps */
+    pixat = pixaCreate(size);
+    for (i = 0; i < size; i++) {
+        if ((pix1 = pixaGetPix(recog->pixa, i, L_CLONE)) == NULL)
+            continue;
+        pix2 = pixConvertTo32(pix1);
+        ptaGetPt(recog->pta, i, &x, &y);
+        pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3,
+                    PIX_SRC, pixr, 0, 0);
+        pixaAddPix(pixat, pix2, L_INSERT);
+        pixDestroy(&pix1);
+    }
+    pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0);
+    pixaAddPix(pixadb, pix1, L_INSERT);
+    pixDisplay(pix1, 100, 100);
+    pixaDestroy(&pixat);
+    pixDestroy(&pixr);
+    recog->pixadb_ave = pixadb;
+    return 0;
+}
+
+
+/*!
+ * \brief   pixDisplayOutliers()
+ *
+ * \param[in]    pixas    unscaled labeled templates
+ * \param[in]    nas      scores of templates (against class averages)
+ * \return  pix    tiled pixa with text and scores, or NULL on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) This debug routine is called from recogRemoveOutliers2(),
+ *          and takes the saved templates and their scores as input.
+ * </pre>
+ */
+static PIX  *
+pixDisplayOutliers(PIXA  *pixas,
+                   NUMA  *nas)
+{
+char      *text;
+char       buf[16];
+l_int32    i, n;
+l_float32  fval;
+PIX       *pix1, *pix2;
+PIXA      *pixa1;
+
+    if (!pixas)
+        return (PIX *)ERROR_PTR("pixas not defined", __func__, NULL);
+    if (!nas)
+        return (PIX *)ERROR_PTR("nas not defined", __func__, NULL);
+    n = pixaGetCount(pixas);
+    if (numaGetCount(nas) != n)
+        return (PIX *)ERROR_PTR("pixas and nas sizes differ", __func__, NULL);
+
+    pixa1 = pixaCreate(n);
+    for (i = 0; i < n; i++) {
+        pix1 = pixaGetPix(pixas, i, L_CLONE);
+        pix2 = pixAddBlackOrWhiteBorder(pix1, 25, 25, 0, 0, L_GET_WHITE_VAL);
+        text = pixGetText(pix1);
+        numaGetFValue(nas, i, &fval);
+        snprintf(buf, sizeof(buf), "'%s': %5.2f", text, fval);
+        pixSetText(pix2, buf);
+        pixaAddPix(pixa1, pix2, L_INSERT);
+        pixDestroy(&pix1);
+    }
+    pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 20, 2, 6, 0xff000000);
+    pixaDestroy(&pixa1);
+    return pix1;
+}
+
+
+/*!
+ * \brief   recogDisplayOutlier()
+ *
+ * \param[in]    recog
+ * \param[in]    iclass     sample is in this class
+ * \param[in]    jsamp      index of sample is class i
+ * \param[in]    maxclass   index of class with closest average to sample
+ * \param[in]    maxscore   score of sample with average of class %maxclass
+ * \return  pix  sample and template images, with score, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This shows three templates, side-by-side:
+ *          - The outlier sample
+ *          - The average template from the same class
+ *          - The average class template that best matched the outlier sample
+ * </pre>
+ */
+static PIX  *
+recogDisplayOutlier(L_RECOG   *recog,
+                    l_int32    iclass,
+                    l_int32    jsamp,
+                    l_int32    maxclass,
+                    l_float32  maxscore)
+{
+char   buf[64];
+PIX   *pix1, *pix2, *pix3, *pix4, *pix5;
+PIXA  *pixa;
+
+    if (!recog)
+        return (PIX *)ERROR_PTR("recog not defined", __func__, NULL);
+
+    pix1 = pixaaGetPix(recog->pixaa, iclass, jsamp, L_CLONE);
+    pix2 = pixaGetPix(recog->pixa, iclass, L_CLONE);
+    pix3 = pixaGetPix(recog->pixa, maxclass, L_CLONE);
+    pixa = pixaCreate(3);
+    pixaAddPix(pixa, pix1, L_INSERT);
+    pixaAddPix(pixa, pix2, L_INSERT);
+    pixaAddPix(pixa, pix3, L_INSERT);
+    pix4 = pixaDisplayTiledInRows(pixa, 32, 400, 2.0, 0, 12, 2);
+    snprintf(buf, sizeof(buf), "C=%d, BAC=%d, S=%4.2f", iclass, maxclass,
+             maxscore);
+    pix5 = pixAddSingleTextblock(pix4, recog->bmf, buf, 0xff000000,
+                                 L_ADD_BELOW, NULL);
+    pixDestroy(&pix4);
+    pixaDestroy(&pixa);
+    return pix5;
+}
+
+
+/*!
+ * \brief   recogShowMatchesInRange()
+ *
+ * \param[in]    recog
+ * \param[in]    pixa        of 1 bpp images to match
+ * \param[in]    minscore    min score to include output
+ * \param[in]    maxscore    max score to include output
+ * \param[in]    display     1 to display the result
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This gives a visual output of the best matches for a given
+ *          range of scores.  Each pair of images can optionally be
+ *          labeled with the index of the best match and the correlation.
+ *      (2) To use this, save a set of 1 bpp images (labeled or
+ *          unlabeled) that can be given to a recognizer in a pixa.
+ *          Then call this function with the pixa and parameters
+ *          to filter a range of scores.
+ * </pre>
+ */
+l_ok
+recogShowMatchesInRange(L_RECOG   *recog,
+                        PIXA      *pixa,
+                        l_float32  minscore,
+                        l_float32  maxscore,
+                        l_int32    display)
+{
+l_int32    i, n, index, depth;
+l_float32  score;
+NUMA      *nascore, *naindex;
+PIX       *pix1, *pix2;
+PIXA      *pixa1, *pixa2;
+
+    if (!recog)
+        return ERROR_INT("recog not defined", __func__, 1);
+    if (!pixa)
+        return ERROR_INT("pixa not defined", __func__, 1);
+
+        /* Run the recognizer on the set of images */
+    n = pixaGetCount(pixa);
+    nascore = numaCreate(n);
+    naindex = numaCreate(n);
+    pixa1 = pixaCreate(n);
+    for (i = 0; i < n; i++) {
+        pix1 = pixaGetPix(pixa, i, L_CLONE);
+        recogIdentifyPix(recog, pix1, &pix2);
+        rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL);
+        numaAddNumber(nascore, score);
+        numaAddNumber(naindex, index);
+        pixaAddPix(pixa1, pix2, L_INSERT);
+        pixDestroy(&pix1);
+    }
+
+        /* Filter the set and optionally add text to each */
+    pixa2 = pixaCreate(n);
+    depth = 1;
+    for (i = 0; i < n; i++) {
+        numaGetFValue(nascore, i, &score);
+        if (score < minscore || score > maxscore) continue;
+        pix1 = pixaGetPix(pixa1, i, L_CLONE);
+        numaGetIValue(naindex, i, &index);
+        pix2 = recogShowMatch(recog, pix1, NULL, NULL, index, score);
+        if (i == 0) depth = pixGetDepth(pix2);
+        pixaAddPix(pixa2, pix2, L_INSERT);
+        pixDestroy(&pix1);
+    }
+
+        /* Package it up */
+    pixDestroy(&recog->pixdb_range);
+    if (pixaGetCount(pixa2) > 0) {
+        recog->pixdb_range =
+            pixaDisplayTiledInRows(pixa2, depth, 2500, 1.0, 0, 20, 1);
+        if (display)
+            pixDisplay(recog->pixdb_range, 300, 100);
+    } else {
+        L_INFO("no character matches in the range of scores\n", __func__);
+    }
+
+    pixaDestroy(&pixa1);
+    pixaDestroy(&pixa2);
+    numaDestroy(&nascore);
+    numaDestroy(&naindex);
+    return 0;
+}
+
+
+/*!
+ * \brief   recogShowMatch()
+ *
+ * \param[in]    recog
+ * \param[in]    pix1    input pix; several possibilities
+ * \param[in]    pix2    [optional] matching template
+ * \param[in]    box     [optional] region in pix1 for which pix2 matches
+ * \param[in]    index   index of matching template; use -1 to disable printing
+ * \param[in]    score   score of match
+ * \return  pixd pair of images, showing input pix and best template,
+ *                    optionally with matching information, or NULL on error.
+ *
+ * <pre>
+ * Notes:
+ *      (1) pix1 can be one of these:
+ *          (a) The input pix alone, which can be either a single character
+ *              (box == NULL) or several characters that need to be
+ *              segmented.  If more than character is present, the box
+ *              region is displayed with an outline.
+ *          (b) Both the input pix and the matching template.  In this case,
+ *              pix2 and box will both be null.
+ *      (2) If the bmf has been made (by a call to recogMakeBmf())
+ *          and the index >= 0, the text field, match score and index
+ *          will be rendered; otherwise their values will be ignored.
+ * </pre>
+ */
+PIX *
+recogShowMatch(L_RECOG   *recog,
+               PIX       *pix1,
+               PIX       *pix2,
+               BOX       *box,
+               l_int32    index,
+               l_float32  score)
+{
+char    buf[32];
+char   *text;
+L_BMF  *bmf;
+PIX    *pix3, *pix4, *pix5, *pixd;
+PIXA   *pixa;
+
+    if (!recog)
+        return (PIX *)ERROR_PTR("recog not defined", __func__, NULL);
+    if (!pix1)
+        return (PIX *)ERROR_PTR("pix1 not defined", __func__, NULL);
+
+    bmf = (recog->bmf && index >= 0) ? recog->bmf : NULL;
+    if (!pix2 && !box && !bmf)  /* nothing to do */
+        return pixCopy(NULL, pix1);
+
+    pix3 = pixConvertTo32(pix1);
+    if (box)
+        pixRenderBoxArb(pix3, box, 1, 255, 0, 0);
+
+    if (pix2) {
+        pixa = pixaCreate(2);
+        pixaAddPix(pixa, pix3, L_CLONE);
+        pixaAddPix(pixa, pix2, L_CLONE);
+        pix4 = pixaDisplayTiledInRows(pixa, 1, 500, 1.0, 0, 15, 0);
+        pixaDestroy(&pixa);
+    } else {
+        pix4 = pixCopy(NULL, pix3);
+    }
+    pixDestroy(&pix3);
+
+    if (bmf) {
+        pix5 = pixAddBorderGeneral(pix4, 55, 55, 0, 0, 0xffffff00);
+        recogGetClassString(recog, index, &text);
+        snprintf(buf, sizeof(buf), "C=%s, S=%4.3f, I=%d", text, score, index);
+        pixd = pixAddSingleTextblock(pix5, bmf, buf, 0xff000000,
+                                     L_ADD_BELOW, NULL);
+        pixDestroy(&pix5);
+        LEPT_FREE(text);
+    } else {
+        pixd = pixClone(pix4);
+    }
+    pixDestroy(&pix4);
+
+    return pixd;
+}
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children