Python2/PyMuPDF: mupdf-source/thirdparty/leptonica/src/recogbasic.c comparison

comparison mupdf-source/thirdparty/leptonica/src/recogbasic.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.

author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children

comparison

equal deleted inserted replaced

-:1d09e1dec1d9
+:b50eed0cc0ef
+/*====================================================================*
+-  Copyright (C) 2001 Leptonica.  All rights reserved.
+-
+-  Redistribution and use in source and binary forms, with or without
+-  modification, are permitted provided that the following conditions
+-  are met:
+-  1. Redistributions of source code must retain the above copyright
+-     notice, this list of conditions and the following disclaimer.
+-  2. Redistributions in binary form must reproduce the above
+-     copyright notice, this list of conditions and the following
+-     disclaimer in the documentation and/or other materials
+-     provided with the distribution.
+-
+-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+-  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+-  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+-  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+-  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+-  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+-  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+-  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+-  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+-  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*====================================================================*/
+/*!
+* \file recogbasic.c
+* <pre>
+*
+*      Recog creation, destruction and access
+*         L_RECOG            *recogCreateFromRecog()
+*         L_RECOG            *recogCreateFromPixa()
+*         L_RECOG            *recogCreateFromPixaNoFinish()
+*         L_RECOG            *recogCreate()
+*         void                recogDestroy()
+*
+*      Recog accessors
+*         l_int32             recogGetCount()
+*         l_int32             recogSetParams()
+*         static l_int32      recogGetCharsetSize()
+*
+*      Character/index lookup
+*         l_int32             recogGetClassIndex()
+*         l_int32             recogStringToIndex()
+*         l_int32             recogGetClassString()
+*         l_int32             l_convertCharstrToInt()
+*
+*      Serialization
+*         L_RECOG            *recogRead()
+*         L_RECOG            *recogReadStream()
+*         L_RECOG            *recogReadMem()
+*         l_int32             recogWrite()
+*         l_int32             recogWriteStream()
+*         l_int32             recogWriteMem()
+*         PIXA               *recogExtractPixa()
+*         static l_int32      recogAddCharstrLabels()
+*         static l_int32      recogAddAllSamples()
+*
+*  The recognizer functionality is split into four files:
+*    recogbasic.c: create, destroy, access, serialize
+*    recogtrain.c: training on labeled and unlabeled data
+*    recogident.c: running the recognizer(s) on input
+*    recogdid.c:   running the recognizer(s) on input using a
+*                  document image decoding (DID) hidden markov model
+*
+*  This is a content-adapted (or book-adapted) recognizer (BAR) application.
+*  The recognizers here are typically assembled from data that has
+*  been labeled by a generic recognition system, such as Tesseract.
+*  The general procedure to create a recognizer (recog) from labeled data is
+*  to add the labeled character bitmaps, either one at a time or
+*  all together from a pixa with labeled pix.
+*
+*  The suggested use for a BAR that consists of labeled templates drawn
+*  from a single source (e.g., a book) is to identify unlabeled samples
+*  by using unscaled character templates in the BAR, picking the
+*  template closest to the unlabeled sample.
+*
+*  Outliers can be removed from a pixa of labeled pix.  This is one of
+*  two methods that use averaged templates (the other is greedy splitting
+*  of characters).  See recogtrain.c for a discussion and the implementation.
+*
+*  A special bootstrap recognizer (BSR) can be used to make a BAR from
+*  unlabeled book data.  This is done by comparing character images
+*  from the book with labeled templates in the BSR, where all images
+*  are scaled to h = 40.  The templates can be either the scanned images
+*  or images consisting of width-normalized strokes derived from
+*  the skeleton of the character bitmaps.
+*
+*  Two BARs of labeled character data, that have been made by
+*  different recognizers, can be joined by extracting a pixa of the
+*  labeled templates from each, joining the two pixa, and then
+*  and regenerating a BAR from the joined set of templates.
+*  If all the labeled character data is from a single source (e.g, a book),
+*  identification can proceed using unscaled templates (either the input
+*  image or width-normalized lines).  But if the labeled data comes from
+*  more than one source, (a "hybrid" recognizer), the templates should
+*  be scaled, and we recommend scaling to a fixed height.
+*
+*  Suppose it is not possible to generate a BAR with a sufficient number
+*  of templates of each class taken from a single source.  In that case,
+*  templates from the BSR itself can be added.  This is the condition
+*  described above, where the labeled templates come from multiple
+*  sources, and it is necessary to do all character matches using
+*  templates that have been scaled to a fixed height (e.g., 40).
+*  Likewise, the samples to be identified using this hybrid recognizer
+*  must be modified in the same way.  See prog/recogtest3.c for an
+*  example of the steps that can be taken in the construction of a BAR
+*  using a BSR.
+*
+*  For training numeric input, an example set of calls that scales
+*  each training input to fixed h and will use the line templates of
+*  width linew for identifying unknown characters is:
+*         L_Recog  *rec = recogCreate(0, h, linew, 128, 1);
+*         for (i = 0; i < n; i++) {  // read in n training digits
+*             Pix *pix = ...
+*             recogTrainLabeled(rec, pix, NULL, text[i], 0);
+*         }
+*         recogTrainingFinished(&rec, 1, -1, -1.0);  // required
+*
+*  It is an error if any function that computes averages, removes
+*  outliers or requests identification of an unlabeled character,
+*  such as:
+*     (1) computing the sample averages: recogAverageSamples()
+*     (2) removing outliers: recogRemoveOutliers1() or recogRemoveOutliers2()
+*     (3) requesting identification of an unlabeled character:
+*         recogIdentifyPix()
+*  is called before an explicit call to finish training.  Note that
+*  to do further training on a "finished" recognizer, you can set
+*         recog->train_done = FALSE;
+*  add the new training samples, and again call
+*         recogTrainingFinished(&rec, 1, -1, -1.0);  // required
+*
+*  If not scaling, using the images directly for identification, and
+*  removing outliers, do something like this:
+*      L_Recog  *rec = recogCreate(0, 0, 0, 128, 1);
+*      for (i = 0; i < n; i++) {  // read in n training characters
+*          Pix *pix = ...
+*          recogTrainLabeled(rec, pix, NULL, text[i], 0);
+*      }
+*      recogTrainingFinished(&rec, 1, -1, -1.0);
+*      if (!rec) ... [return]
+*      // remove outliers
+*      recogRemoveOutliers1(&rec, 0.7, 2, NULL, NULL);
+*
+*  You can generate a recognizer from a pixa where the text field in
+*  each pix is the character string label for the pix.  For example,
+*  the following recognizer will store unscaled line images:
+*      L_Recog  *rec = recogCreateFromPixa(pixa, 0, 0, linew, 128, 1);
+*  and in use, it is fed unscaled line images to identify.
+*
+*  For the following, assume that you have a pixa of labeled templates.
+*  If it is likely that some of the input templates are mislabeled,
+*  there are several things that can be done to remove them.
+*  The first is to put a size and quantity filter on them; e.g.
+*       Pixa *pixa2 = recogFilterPixaBySize(pixa1, 10, 15, 2.6);
+*  Then you can remove outliers; e.g.,
+*       Pixa *pixa3 = pixaRemoveOutliers2(pixa2, -1.0, -1, NULL, NULL);
+*
+*  To this point, all templates are from a single source, so you
+*  can make a recognizer that uses the unscaled templates and optionally
+*  attempts to split touching characters:
+*       L_Recog *recog1 = recogCreateFromPixa(pixa3, ...);
+*  Alternatively, if you need more templates for some of the classes,
+*  you can pad with templates from a "bootstrap" recognizer (BSR).
+*  If you pad, it is necessary to scale the templates and input
+*  samples to a fixed height, and no attempt will be made to split
+*  the input sample connected components:
+*       L_Recog *recog1 = recogCreateFromPixa(pixa3, 0, 40, 0, 128, 0);
+*       recogPadDigitTrainingSet(&recog1, 40, 0);
+*
+*  A special case is a pure BSR, that contains images scaled to a fixed
+*  height (we use 40 in these examples).
+*  For this,use either the scanned bitmap:
+*      L_Recog  *recboot = recogCreateFromPixa(pixa, 0, 40, 0, 128, 1);
+*  or width-normalized lines (use width of 5 here):
+*      L_Recog  *recboot = recogCreateFromPixa(pixa, 0, 40, 5, 128, 1);
+*
+*  This can be used to train a new book adapted recognizer (BAC), on
+*  unlabeled data from, e.g., a book.  To do this, the following is required:
+*   (1) the input images from the book must be scaled in the same
+*       way as those in the BSR, and
+*   (2) both the BSR and the input images must be set up to be either
+*       input scanned images or width-normalized lines.
+*
+* </pre>
+*/
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif  /* HAVE_CONFIG_H */
+#include <string.h>
+#include "allheaders.h"
+static const l_int32    MaxExamplesInClass = 256;
+/* Default recog parameters that can be changed */
+static const l_int32    DefaultCharsetType = L_ARABIC_NUMERALS;
+static const l_int32    DefaultMinNopad = 1;
+static const l_float32  DefaultMaxWHRatio = 3.0f;  /* max allowed w/h
+ratio for a component to be split  */
+static const l_float32  DefaultMaxHTRatio = 2.6f;  /* max allowed ratio of
+max/min unscaled averaged template heights  */
+static const l_int32    DefaultThreshold = 150;  /* for binarization */
+static const l_int32    DefaultMaxYShift = 1;  /* for identification */
+/* Static functions */
+static l_int32 recogGetCharsetSize(l_int32 type);
+static l_int32 recogAddCharstrLabels(L_RECOG *recog);
+static l_int32 recogAddAllSamples(L_RECOG **precog, PIXAA *paa, l_int32 debug);
+/*------------------------------------------------------------------------*
+*                Recog: initialization and destruction                   *
+*------------------------------------------------------------------------*/
+/*!
+* \brief   recogCreateFromRecog()
+*
+* \param[in]    recs        source recog with arbitrary input parameters
+* \param[in]    scalew      scale all widths to this; use 0 otherwise
+* \param[in]    scaleh      scale all heights to this; use 0 otherwise
+* \param[in]    linew       width of normalized strokes; use 0 to skip
+* \param[in]    threshold   for binarization; typically ~128
+* \param[in]    maxyshift   from nominal centroid alignment; default is 1
+* \return  recd, or NULL on error
+*
+* <pre>
+* Notes:
+*      (1) This is a convenience function that generates a recog using
+*          the unscaled training data in an existing recog.
+*      (2) It is recommended to use %maxyshift = 1 (the default value)
+*      (3) See recogCreate() for use of %scalew, %scaleh and %linew.
+* </pre>
+*/
+L_RECOG *
+recogCreateFromRecog(L_RECOG  *recs,
+l_int32   scalew,
+l_int32   scaleh,
+l_int32   linew,
+l_int32   threshold,
+l_int32   maxyshift)
+{
+L_RECOG  *recd;
+PIXA     *pixa;
+if (!recs)
+return (L_RECOG *)ERROR_PTR("recs not defined", __func__, NULL);
+pixa = recogExtractPixa(recs);
+recd = recogCreateFromPixa(pixa, scalew, scaleh, linew, threshold,
+maxyshift);
+pixaDestroy(&pixa);
+return recd;
+}
+/*!
+* \brief   recogCreateFromPixa()
+*
+* \param[in]    pixa         of labeled, 1 bpp images
+* \param[in]    scalew       scale all widths to this; use 0 otherwise
+* \param[in]    scaleh       scale all heights to this; use 0 otherwise
+* \param[in]    linew        width of normalized strokes; use 0 to skip
+* \param[in]    threshold    for binarization; typically ~150
+* \param[in]    maxyshift    from nominal centroid alignment; default is 1
+* \return  recog, or NULL on error
+*
+* <pre>
+* Notes:
+*      (1) This is a convenience function for training from labeled data.
+*          The pixa can be read from file.
+*      (2) The pixa should contain the unscaled bitmaps used for training.
+*      (3) See recogCreate() for use of %scalew, %scaleh and %linew.
+*      (4) It is recommended to use %maxyshift = 1 (the default value)
+*      (5) All examples in the same class (i.e., with the same character
+*          label) should be similar.  They can be made similar by invoking
+*          recogRemoveOutliers[1,2]() on %pixa before calling this function.
+* </pre>
+*/
+L_RECOG *
+recogCreateFromPixa(PIXA    *pixa,
+l_int32  scalew,
+l_int32  scaleh,
+l_int32  linew,
+l_int32  threshold,
+l_int32  maxyshift)
+{
+L_RECOG  *recog;
+if (!pixa)
+return (L_RECOG *)ERROR_PTR("pixa not defined", __func__, NULL);
+recog = recogCreateFromPixaNoFinish(pixa, scalew, scaleh, linew,
+threshold, maxyshift);
+if (!recog)
+return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL);
+recogTrainingFinished(&recog, 1, -1, -1.0);
+if (!recog)
+return (L_RECOG *)ERROR_PTR("bad templates", __func__, NULL);
+return recog;
+}
+/*!
+* \brief   recogCreateFromPixaNoFinish()
+*
+* \param[in]    pixa         of labeled, 1 bpp images
+* \param[in]    scalew       scale all widths to this; use 0 otherwise
+* \param[in]    scaleh       scale all heights to this; use 0 otherwise
+* \param[in]    linew        width of normalized strokes; use 0 to skip
+* \param[in]    threshold    for binarization; typically ~150
+* \param[in]    maxyshift    from nominal centroid alignment; default is 1
+* \return  recog, or NULL on error
+*
+* <pre>
+* Notes:
+*      (1) See recogCreateFromPixa() for details.
+*      (2) This is also used to generate a pixaa with templates
+*          in each class within a pixa.  For that, all args except for
+*          %pixa are ignored.
+* </pre>
+*/
+L_RECOG *
+recogCreateFromPixaNoFinish(PIXA    *pixa,
+l_int32  scalew,
+l_int32  scaleh,
+l_int32  linew,
+l_int32  threshold,
+l_int32  maxyshift)
+{
+char     *text;
+l_int32   full, n, i, ntext, same, maxd;
+PIX      *pix;
+L_RECOG  *recog;
+if (!pixa)
+return (L_RECOG *)ERROR_PTR("pixa not defined", __func__, NULL);
+pixaVerifyDepth(pixa, &same, &maxd);
+if (maxd > 1)
+return (L_RECOG *)ERROR_PTR("not all pix are 1 bpp", __func__, NULL);
+pixaIsFull(pixa, &full, NULL);
+if (!full)
+return (L_RECOG *)ERROR_PTR("not all pix are present", __func__, NULL);
+n = pixaGetCount(pixa);
+pixaCountText(pixa, &ntext);
+if (ntext == 0)
+return (L_RECOG *)ERROR_PTR("no pix have text strings", __func__, NULL);
+if (ntext < n)
+L_ERROR("%d text strings < %d pix\n", __func__, ntext, n);
+recog = recogCreate(scalew, scaleh, linew, threshold, maxyshift);
+if (!recog)
+return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL);
+for (i = 0; i < n; i++) {
+pix = pixaGetPix(pixa, i, L_CLONE);
+text = pixGetText(pix);
+if (!text || strlen(text) == 0) {
+L_ERROR("pix[%d] has no text\n", __func__, i);
+pixDestroy(&pix);
+continue;
+}
+recogTrainLabeled(recog, pix, NULL, text, 0);
+pixDestroy(&pix);
+}
+return recog;
+}
+/*!
+* \brief   recogCreate()
+*
+* \param[in]    scalew       scale all widths to this; use 0 otherwise
+* \param[in]    scaleh       scale all heights to this; use 0 otherwise
+* \param[in]    linew        width of normalized strokes; use 0 to skip
+* \param[in]    threshold    for binarization; typically ~128; 0 for default
+* \param[in]    maxyshift    from nominal centroid alignment; default is 1
+* \return  recog, or NULL on error
+*
+* <pre>
+* Notes:
+*      (1) If %scalew == 0 and %scaleh == 0, no scaling is done.
+*          If one of these is 0 and the other is > 0, scaling is isotropic
+*          to the requested size.  We typically do not set both > 0.
+*      (2) Use linew > 0 to convert the templates to images with fixed
+*          width strokes.  linew == 0 skips the conversion.
+*      (3) The only valid values for %maxyshift are 0, 1 and 2.
+*          It is recommended to use %maxyshift == 1 (default value).
+*          Using %maxyshift == 0 is much faster than %maxyshift == 1, but
+*          it is much less likely to find the template with the best
+*          correlation.  Use of anything but 1 results in a warning.
+*      (4) Scaling is used for finding outliers and for training a
+*          book-adapted recognizer (BAR) from a bootstrap recognizer (BSR).
+*          Scaling the height to a fixed value and scaling the width
+*          accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended.
+*      (5) The storage for most of the arrays is allocated when training
+*          is finished.
+* </pre>
+*/
+L_RECOG *
+recogCreate(l_int32  scalew,
+l_int32  scaleh,
+l_int32  linew,
+l_int32  threshold,
+l_int32  maxyshift)
+{
+L_RECOG  *recog;
+if (scalew < 0 || scaleh < 0)
+return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", __func__, NULL);
+if (linew > 10)
+return (L_RECOG *)ERROR_PTR("invalid linew > 10", __func__, NULL);
+if (threshold == 0) threshold = DefaultThreshold;
+if (threshold < 0 || threshold > 255) {
+L_WARNING("invalid threshold; using default\n", __func__);
+threshold = DefaultThreshold;
+}
+if (maxyshift < 0 || maxyshift > 2) {
+L_WARNING("invalid maxyshift; using default value\n", __func__);
+maxyshift = DefaultMaxYShift;
+} else if (maxyshift == 0) {
+L_WARNING("Using maxyshift = 0; faster, worse correlation results\n",
+__func__);
+} else if (maxyshift == 2) {
+L_WARNING("Using maxyshift = 2; slower\n", __func__);
+}
+recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG));
+recog->templ_use = L_USE_ALL_TEMPLATES;  /* default */
+recog->threshold = threshold;
+recog->scalew = scalew;
+recog->scaleh = scaleh;
+recog->linew = linew;
+recog->maxyshift = maxyshift;
+recogSetParams(recog, 1, -1, -1.0, -1.0);
+recog->bmf = bmfCreate(NULL, 6);
+recog->bmf_size = 6;
+recog->maxarraysize = MaxExamplesInClass;
+/* Generate the LUTs */
+recog->centtab = makePixelCentroidTab8();
+recog->sumtab = makePixelSumTab8();
+recog->sa_text = sarrayCreate(0);
+recog->dna_tochar = l_dnaCreate(0);
+/* Input default values for min component size for splitting.
+* These are overwritten when pixTrainingFinished() is called. */
+recog->min_splitw = 6;
+recog->max_splith = 60;
+/* Allocate the paa for the unscaled training bitmaps */
+recog->pixaa_u = pixaaCreate(recog->maxarraysize);
+/* Generate the storage for debugging */
+recog->pixadb_boot = pixaCreate(2);
+recog->pixadb_split = pixaCreate(2);
+return recog;
+}
+/*!
+* \brief   recogDestroy()
+*
+* \param[in,out]   precog    will be set to null before returning
+* \return  void
+*/
+void
+recogDestroy(L_RECOG  **precog)
+{
+L_RECOG  *recog;
+if (!precog) {
+L_WARNING("ptr address is null\n", __func__);
+return;
+}
+if ((recog = *precog) == NULL) return;
+LEPT_FREE(recog->centtab);
+LEPT_FREE(recog->sumtab);
+sarrayDestroy(&recog->sa_text);
+l_dnaDestroy(&recog->dna_tochar);
+pixaaDestroy(&recog->pixaa_u);
+pixaDestroy(&recog->pixa_u);
+ptaaDestroy(&recog->ptaa_u);
+ptaDestroy(&recog->pta_u);
+numaDestroy(&recog->nasum_u);
+numaaDestroy(&recog->naasum_u);
+pixaaDestroy(&recog->pixaa);
+pixaDestroy(&recog->pixa);
+ptaaDestroy(&recog->ptaa);
+ptaDestroy(&recog->pta);
+numaDestroy(&recog->nasum);
+numaaDestroy(&recog->naasum);
+pixaDestroy(&recog->pixa_tr);
+pixaDestroy(&recog->pixadb_ave);
+pixaDestroy(&recog->pixa_id);
+pixDestroy(&recog->pixdb_ave);
+pixDestroy(&recog->pixdb_range);
+pixaDestroy(&recog->pixadb_boot);
+pixaDestroy(&recog->pixadb_split);
+bmfDestroy(&recog->bmf);
+rchDestroy(&recog->rch);
+rchaDestroy(&recog->rcha);
+recogDestroyDid(recog);
+LEPT_FREE(recog);
+*precog = NULL;
+}
+/*------------------------------------------------------------------------*
+*                              Recog accessors                           *
+*------------------------------------------------------------------------*/
+/*!
+* \brief   recogGetCount()
+*
+* \param[in]    recog
+* \return  count of classes in recog; 0 if no recog or on error
+*/
+l_int32
+recogGetCount(L_RECOG  *recog)
+{
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 0);
+return recog->setsize;
+}
+/*!
+* \brief   recogSetParams()
+*
+* \param[in]    recog          to be padded, if necessary
+* \param[in]    type           type of char set; -1 for default;
+*                              see enum in recog.h
+* \param[in]    min_nopad      min number in a class without padding;
+*                              use -1 for default
+* \param[in]    max_wh_ratio   max width/height ratio allowed for splitting;
+*                              use -1.0 for default
+* \param[in]    max_ht_ratio   max of max/min averaged template height ratio;
+*                              use -1.0 for default
+* \return       0 if OK, 1 on error
+*
+* <pre>
+* Notes:
+*      (1) This is called when a recog is created.
+*      (2) Default %min_nopad value allows for some padding.
+*          To disable padding, set %min_nopad = 0.  To pad only when
+*          no samples are available for the class, set %min_nopad = 1.
+*      (3) The %max_wh_ratio limits the width/height ratio for components
+*          that we attempt to split.  Splitting long components is expensive.
+*      (4) The %max_ht_ratio is a quality requirement on the training data.
+*          The recognizer will not run if the averages are computed and
+*          the templates do not satisfy it.
+* </pre>
+*/
+l_ok
+recogSetParams(L_RECOG   *recog,
+l_int32    type,
+l_int32    min_nopad,
+l_float32  max_wh_ratio,
+l_float32  max_ht_ratio)
+{
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 1);
+recog->charset_type = (type >= 0) ? type : DefaultCharsetType;
+recog->charset_size = recogGetCharsetSize(recog->charset_type);
+recog->min_nopad = (min_nopad >= 0) ? min_nopad : DefaultMinNopad;
+recog->max_wh_ratio = (max_wh_ratio > 0.0) ? max_wh_ratio :
+DefaultMaxWHRatio;
+recog->max_ht_ratio = (max_ht_ratio > 1.0) ? max_ht_ratio :
+DefaultMaxHTRatio;
+return 0;
+}
+/*!
+* \brief   recogGetCharsetSize()
+*
+* \param[in]    type     of charset
+* \return  size of charset, or 0 if unknown or on error
+*/
+static l_int32
+recogGetCharsetSize(l_int32  type)
+{
+switch (type) {
+case L_UNKNOWN:
+return 0;
+case L_ARABIC_NUMERALS:
+return 10;
+case L_LC_ROMAN_NUMERALS:
+return 7;
+case L_UC_ROMAN_NUMERALS:
+return 7;
+case L_LC_ALPHA:
+return 26;
+case L_UC_ALPHA:
+return 26;
+default:
+L_ERROR("invalid charset_type %d\n", __func__, type);
+return 0;
+}
+return 0;  /* shouldn't happen */
+}
+/*------------------------------------------------------------------------*
+*                         Character/index lookup                         *
+*------------------------------------------------------------------------*/
+/*!
+* \brief   recogGetClassIndex()
+*
+* \param[in]    recog     with LUT's pre-computed
+* \param[in]    val       integer value; can be up to 4 bytes for UTF-8
+* \param[in]    text      text from which %val was derived; used if not found
+* \param[out]   pindex    index into dna_tochar
+* \return  0 if found; 1 if not found and added; 2 on error.
+*
+* <pre>
+* Notes:
+*      (1) This is used during training.  There is one entry in
+*          recog->dna_tochar (integer value, e.g., ascii) and
+*          one in recog->sa_text (e.g, ascii letter in a string)
+*          for each character class.
+*      (2) This searches the dna character array for %val.  If it is
+*          not found, the template represents a character class not
+*          already seen: it increments setsize (the number of character
+*          classes) by 1, and augments both the index (dna_tochar)
+*          and text (sa_text) arrays.
+*      (3) Returns the index in &index, except on error.
+*      (4) Caller must check the function return value.
+* </pre>
+*/
+l_int32
+recogGetClassIndex(L_RECOG  *recog,
+l_int32   val,
+char     *text,
+l_int32  *pindex)
+{
+l_int32  i, n, ival;
+if (!pindex)
+return ERROR_INT("&index not defined", __func__, 2);
+*pindex = -1;
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 2);
+if (!text)
+return ERROR_INT("text not defined", __func__, 2);
+/* Search existing characters */
+n = l_dnaGetCount(recog->dna_tochar);
+for (i = 0; i < n; i++) {
+l_dnaGetIValue(recog->dna_tochar, i, &ival);
+if (val == ival) {  /* found */
+*pindex = i;
+return 0;
+}
+}
+/* If not found... */
+l_dnaAddNumber(recog->dna_tochar, val);
+sarrayAddString(recog->sa_text, text, L_COPY);
+recog->setsize++;
+*pindex = n;
+return 1;
+}
+/*!
+* \brief   recogStringToIndex()
+*
+* \param[in]    recog
+* \param[in]    text     text string for some class
+* \param[out]   pindex   index for that class; -1 if not found
+* \return  0 if OK, 1 on error not finding the string is an error
+*/
+l_ok
+recogStringToIndex(L_RECOG  *recog,
+char     *text,
+l_int32  *pindex)
+{
+char    *charstr;
+l_int32  i, n, diff;
+if (!pindex)
+return ERROR_INT("&index not defined", __func__, 1);
+*pindex = -1;
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 1);
+if (!text)
+return ERROR_INT("text not defined", __func__, 1);
+/* Search existing characters */
+n = recog->setsize;
+for (i = 0; i < n; i++) {
+recogGetClassString(recog, i, &charstr);
+if (!charstr) {
+L_ERROR("string not found for index %d\n", __func__, i);
+continue;
+}
+diff = strcmp(text, charstr);
+LEPT_FREE(charstr);
+if (diff) continue;
+*pindex = i;
+return 0;
+}
+return 1;  /* not found */
+}
+/*!
+* \brief   recogGetClassString()
+*
+* \param[in]    recog
+* \param[in]    index       into array of char types
+* \param[out]   pcharstr    string representation;
+*                           returns an empty string on error
+* \return  0 if found, 1 on error
+*
+* <pre>
+* Notes:
+*      (1) Extracts a copy of the string from sa_text, which
+*          the caller must free.
+*      (2) Caller must check the function return value.
+* </pre>
+*/
+l_int32
+recogGetClassString(L_RECOG  *recog,
+l_int32   index,
+char    **pcharstr)
+{
+if (!pcharstr)
+return ERROR_INT("&charstr not defined", __func__, 1);
+*pcharstr = stringNew("");
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 2);
+if (index < 0 || index >= recog->setsize)
+return ERROR_INT("invalid index", __func__, 1);
+LEPT_FREE(*pcharstr);
+*pcharstr = sarrayGetString(recog->sa_text, index, L_COPY);
+return 0;
+}
+/*!
+* \brief   l_convertCharstrToInt()
+*
+* \param[in]    str     input string representing one UTF-8 character;
+*                       not more than 4 bytes
+* \param[out]   pval    integer value for the input.  Think of it
+*                       as a 1-to-1 hash code.
+* \return  0 if OK, 1 on error
+*/
+l_ok
+l_convertCharstrToInt(const char  *str,
+l_int32     *pval)
+{
+l_int32   size;
+l_uint32  val;
+if (!pval)
+return ERROR_INT("&val not defined", __func__, 1);
+*pval = 0;
+if (!str)
+return ERROR_INT("str not defined", __func__, 1);
+size = strlen(str);
+if (size == 0)
+return ERROR_INT("empty string", __func__, 1);
+if (size > 4)
+return ERROR_INT("invalid string: > 4 bytes", __func__, 1);
+val = (l_uint8)str[0];
+if (size > 1)
+val = (val << 8) + (l_uint8)str[1];
+if (size > 2)
+val = (val << 8) + (l_uint8)str[2];
+if (size > 3)
+val = (val << 8) + (l_uint8)str[3];
+*pval = (l_int32)(val & 0x7fffffff);
+return 0;
+}
+/*------------------------------------------------------------------------*
+*                             Serialization                              *
+*------------------------------------------------------------------------*/
+/*!
+* \brief   recogRead()
+*
+* \param[in]    filename
+* \return  recog, or NULL on error
+*
+* <pre>
+* Notes:
+*      (1) When a recog is serialized, a pixaa of the templates that are
+*          actually used for correlation is saved in the pixaa_u array
+*          of the recog.  These can be different from the templates that
+*          were used to generate the recog, because those original templates
+*          can be scaled and turned into normalized lines.  When recog1
+*          is deserialized to recog2, these templates are put in both the
+*          unscaled array (pixaa_u) and the modified array (pixaa) in recog2.
+*          Why not put it in only the unscaled array and let
+*          recogTrainingFinalized() regenerate the modified templates?
+*          The reason is that with normalized lines, the operation of
+*          thinning to a skeleton and dilating back to a fixed width
+*          is not idempotent.  Thinning to a skeleton saves pixels at
+*          the end of a line segment, and thickening the skeleton puts
+*          additional pixels at the end of the lines.  This tends to
+*          close gaps.
+* </pre>
+*/
+L_RECOG *
+recogRead(const char  *filename)
+{
+FILE     *fp;
+L_RECOG  *recog;
+if (!filename)
+return (L_RECOG *)ERROR_PTR("filename not defined", __func__, NULL);
+if ((fp = fopenReadStream(filename)) == NULL)
+return (L_RECOG *)ERROR_PTR_1("stream not opened",
+filename, __func__, NULL);
+if ((recog = recogReadStream(fp)) == NULL) {
+fclose(fp);
+return (L_RECOG *)ERROR_PTR_1("recog not read",
+filename, __func__, NULL);
+}
+fclose(fp);
+return recog;
+}
+/*!
+* \brief   recogReadStream()
+*
+* \param[in]    fp     file stream
+* \return  recog, or NULL on error
+*/
+L_RECOG *
+recogReadStream(FILE  *fp)
+{
+l_int32   version, setsize, threshold, scalew, scaleh, linew;
+l_int32   maxyshift, nc;
+L_DNA    *dna_tochar;
+PIXAA    *paa;
+L_RECOG  *recog;
+SARRAY   *sa_text;
+if (!fp)
+return (L_RECOG *)ERROR_PTR("stream not defined", __func__, NULL);
+if (fscanf(fp, "\nRecog Version %d\n", &version) != 1)
+return (L_RECOG *)ERROR_PTR("not a recog file", __func__, NULL);
+if (version != RECOG_VERSION_NUMBER)
+return (L_RECOG *)ERROR_PTR("invalid recog version", __func__, NULL);
+if (fscanf(fp, "Size of character set = %d\n", &setsize) != 1)
+return (L_RECOG *)ERROR_PTR("setsize not read", __func__, NULL);
+if (fscanf(fp, "Binarization threshold = %d\n", &threshold) != 1)
+return (L_RECOG *)ERROR_PTR("binary thresh not read", __func__, NULL);
+if (fscanf(fp, "Maxyshift = %d\n", &maxyshift) != 1)
+return (L_RECOG *)ERROR_PTR("maxyshift not read", __func__, NULL);
+if (fscanf(fp, "Scale to width = %d\n", &scalew) != 1)
+return (L_RECOG *)ERROR_PTR("width not read", __func__, NULL);
+if (fscanf(fp, "Scale to height = %d\n", &scaleh) != 1)
+return (L_RECOG *)ERROR_PTR("height not read", __func__, NULL);
+if (fscanf(fp, "Normalized line width = %d\n", &linew) != 1)
+return (L_RECOG *)ERROR_PTR("line width not read", __func__, NULL);
+if ((recog = recogCreate(scalew, scaleh, linew, threshold,
+maxyshift)) == NULL)
+return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL);
+if (fscanf(fp, "\nLabels for character set:\n") == -1) {
+recogDestroy(&recog);
+return (L_RECOG *)ERROR_PTR("label intro not read", __func__, NULL);
+}
+l_dnaDestroy(&recog->dna_tochar);
+if ((dna_tochar = l_dnaReadStream(fp)) == NULL) {
+recogDestroy(&recog);
+return (L_RECOG *)ERROR_PTR("dna_tochar not read", __func__, NULL);
+}
+recog->dna_tochar = dna_tochar;
+sarrayDestroy(&recog->sa_text);
+if ((sa_text = sarrayReadStream(fp)) == NULL) {
+recogDestroy(&recog);
+return (L_RECOG *)ERROR_PTR("sa_text not read", __func__, NULL);
+}
+recog->sa_text = sa_text;
+if (fscanf(fp, "\nPixaa of all samples in the training set:\n") == -1) {
+recogDestroy(&recog);
+return (L_RECOG *)ERROR_PTR("pixaa intro not read", __func__, NULL);
+}
+if ((paa = pixaaReadStream(fp)) == NULL) {
+recogDestroy(&recog);
+return (L_RECOG *)ERROR_PTR("pixaa not read", __func__, NULL);
+}
+recog->setsize = setsize;
+nc = pixaaGetCount(paa, NULL);
+if (nc != setsize) {
+recogDestroy(&recog);
+pixaaDestroy(&paa);
+L_ERROR("(setsize = %d) != (paa count = %d)\n", __func__,
+setsize, nc);
+return NULL;
+}
+recogAddAllSamples(&recog, paa, 0);  /* this finishes */
+pixaaDestroy(&paa);
+if (!recog)
+return (L_RECOG *)ERROR_PTR("bad templates", __func__, NULL);
+return recog;
+}
+/*!
+* \brief   recogReadMem()
+*
+* \param[in]    data    serialization of recog (not ascii)
+* \param[in]    size    of data in bytes
+* \return  recog, or NULL on error
+*/
+L_RECOG *
+recogReadMem(const l_uint8  *data,
+size_t          size)
+{
+FILE     *fp;
+L_RECOG  *recog;
+if (!data)
+return (L_RECOG *)ERROR_PTR("data not defined", __func__, NULL);
+if ((fp = fopenReadFromMemory(data, size)) == NULL)
+return (L_RECOG *)ERROR_PTR("stream not opened", __func__, NULL);
+recog = recogReadStream(fp);
+fclose(fp);
+if (!recog) L_ERROR("recog not read\n", __func__);
+return recog;
+}
+/*!
+* \brief   recogWrite()
+*
+* \param[in]    filename
+* \param[in]    recog
+* \return  0 if OK, 1 on error
+*
+* <pre>
+* Notes:
+*      (1) The pixaa of templates that is written is the modified one
+*          in the pixaa field. It is the pixaa that is actually used
+*          for correlation. This is not the unscaled array of labeled
+*          bitmaps, in pixaa_u, that was used to generate the recog in the
+*          first place.  See the notes in recogRead() for the rationale.
+* </pre>
+*/
+l_ok
+recogWrite(const char  *filename,
+L_RECOG     *recog)
+{
+l_int32  ret;
+FILE    *fp;
+if (!filename)
+return ERROR_INT("filename not defined", __func__, 1);
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 1);
+if ((fp = fopenWriteStream(filename, "wb")) == NULL)
+return ERROR_INT_1("stream not opened", filename, __func__, 1);
+ret = recogWriteStream(fp, recog);
+fclose(fp);
+if (ret)
+return ERROR_INT_1("recog not written to stream",
+filename, __func__, 1);
+return 0;
+}
+/*!
+* \brief   recogWriteStream()
+*
+* \param[in]    fp      file stream opened for "wb"
+* \param[in]    recog
+* \return  0 if OK, 1 on error
+*/
+l_ok
+recogWriteStream(FILE     *fp,
+L_RECOG  *recog)
+{
+if (!fp)
+return ERROR_INT("stream not defined", __func__, 1);
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 1);
+fprintf(fp, "\nRecog Version %d\n", RECOG_VERSION_NUMBER);
+fprintf(fp, "Size of character set = %d\n", recog->setsize);
+fprintf(fp, "Binarization threshold = %d\n", recog->threshold);
+fprintf(fp, "Maxyshift = %d\n", recog->maxyshift);
+fprintf(fp, "Scale to width = %d\n", recog->scalew);
+fprintf(fp, "Scale to height = %d\n", recog->scaleh);
+fprintf(fp, "Normalized line width = %d\n", recog->linew);
+fprintf(fp, "\nLabels for character set:\n");
+l_dnaWriteStream(fp, recog->dna_tochar);
+sarrayWriteStream(fp, recog->sa_text);
+fprintf(fp, "\nPixaa of all samples in the training set:\n");
+pixaaWriteStream(fp, recog->pixaa);
+return 0;
+}
+/*!
+* \brief   recogWriteMem()
+*
+* \param[out]   pdata    data of serialized recog (not ascii)
+* \param[out]   psize    size of returned data
+* \param[in]    recog
+* \return  0 if OK, 1 on error
+*
+* <pre>
+* Notes:
+*      (1) Serializes a recog in memory and puts the result in a buffer.
+* </pre>
+*/
+l_ok
+recogWriteMem(l_uint8  **pdata,
+size_t    *psize,
+L_RECOG   *recog)
+{
+l_int32  ret;
+FILE    *fp;
+if (pdata) *pdata = NULL;
+if (psize) *psize = 0;
+if (!pdata)
+return ERROR_INT("&data not defined", __func__, 1);
+if (!psize)
+return ERROR_INT("&size not defined", __func__, 1);
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 1);
+#if HAVE_FMEMOPEN
+if ((fp = open_memstream((char **)pdata, psize)) == NULL)
+return ERROR_INT("stream not opened", __func__, 1);
+ret = recogWriteStream(fp, recog);
+fputc('\0', fp);
+fclose(fp);
+if (*psize > 0) *psize = *psize - 1;
+#else
+L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__);
+#ifdef _WIN32
+if ((fp = fopenWriteWinTempfile()) == NULL)
+return ERROR_INT("tmpfile stream not opened", __func__, 1);
+#else
+if ((fp = tmpfile()) == NULL)
+return ERROR_INT("tmpfile stream not opened", __func__, 1);
+#endif  /* _WIN32 */
+ret = recogWriteStream(fp, recog);
+rewind(fp);
+*pdata = l_binaryReadStream(fp, psize);
+fclose(fp);
+#endif  /* HAVE_FMEMOPEN */
+return ret;
+}
+/*!
+* \brief   recogExtractPixa()
+*
+* \param[in]   recog
+* \return  pixa if OK, NULL on error
+*
+* <pre>
+* Notes:
+*      (1) This generates a pixa of all the unscaled images in the
+*          recognizer, where each one has its character class label in
+*          the pix text field, by flattening pixaa_u to a pixa.
+* </pre>
+*/
+PIXA *
+recogExtractPixa(L_RECOG  *recog)
+{
+if (!recog)
+return (PIXA *)ERROR_PTR("recog not defined", __func__, NULL);
+recogAddCharstrLabels(recog);
+return pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE);
+}
+/*!
+* \brief   recogAddCharstrLabels()
+*
+* \param[in]    recog
+* \return  0 if OK, 1 on error
+*/
+static l_int32
+recogAddCharstrLabels(L_RECOG  *recog)
+{
+char    *text;
+l_int32  i, j, n1, n2;
+PIX     *pix;
+PIXA    *pixa;
+PIXAA   *paa;
+if (!recog)
+return ERROR_INT("recog not defined", __func__, 1);
+/* Add the labels to each unscaled pix */
+paa = recog->pixaa_u;
+n1 = pixaaGetCount(paa, NULL);
+for (i = 0; i < n1; i++) {
+pixa = pixaaGetPixa(paa, i, L_CLONE);
+text = sarrayGetString(recog->sa_text, i, L_NOCOPY);
+n2 = pixaGetCount(pixa);
+for (j = 0; j < n2; j++) {
+pix = pixaGetPix(pixa, j, L_CLONE);
+pixSetText(pix, text);
+pixDestroy(&pix);
+}
+pixaDestroy(&pixa);
+}
+return 0;
+}
+/*!
+* \brief   recogAddAllSamples()
+*
+* \param[in]    precog    addr of recog
+* \param[in]    paa       pixaa from previously trained recog
+* \param[in]    debug
+* \return  0 if OK, 1 on error
+*
+* <pre>
+* Notes:
+*      (1) On error, the input recog is destroyed.
+*      (2) This is used with the serialization routine recogRead(),
+*          where each pixa in the pixaa represents a set of characters
+*          in a different class.  Before calling this function, we have
+*          verified that the number of character classes, given by the
+*          setsize field in %recog, equals the number of pixa in the paa.
+*          The character labels for each set are in the sa_text field.
+* </pre>
+*/
+static l_int32
+recogAddAllSamples(L_RECOG  **precog,
+PIXAA     *paa,
+l_int32    debug)
+{
+char     *text;
+l_int32   i, j, nc, ns;
+PIX      *pix;
+PIXA     *pixa, *pixa1;
+L_RECOG  *recog;
+if (!precog)
+return ERROR_INT("&recog not defined", __func__, 1);
+if ((recog = *precog) == NULL)
+return ERROR_INT("recog not defined", __func__, 1);
+if (!paa) {
+recogDestroy(&recog);
+*precog = NULL;
+return ERROR_INT("paa not defined", __func__, 1);
+}
+nc = pixaaGetCount(paa, NULL);
+for (i = 0; i < nc; i++) {
+pixa = pixaaGetPixa(paa, i, L_CLONE);
+ns = pixaGetCount(pixa);
+text = sarrayGetString(recog->sa_text, i, L_NOCOPY);
+pixa1 = pixaCreate(ns);
+pixaaAddPixa(recog->pixaa_u, pixa1, L_INSERT);
+for (j = 0; j < ns; j++) {
+pix = pixaGetPix(pixa, j, L_CLONE);
+if (debug) lept_stderr("pix[%d,%d]: text = %s\n", i, j, text);
+pixaaAddPix(recog->pixaa_u, i, pix, NULL, L_INSERT);
+}
+pixaDestroy(&pixa);
+}
+recogTrainingFinished(&recog, 0, -1, -1.0);  /* For second parameter,
+see comment in recogRead() */
+if (!recog)
+return ERROR_INT("bad templates; recog destroyed", __func__, 1);
+return 0;
+}

Mercurial > hgrepos > Python2 > PyMuPDF

comparison mupdf-source/thirdparty/leptonica/src/recogbasic.c @ 2:b50eed0cc0ef upstream