Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/leptonica/src/recogbasic.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/leptonica/src/recogbasic.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,1197 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file recogbasic.c + * <pre> + * + * Recog creation, destruction and access + * L_RECOG *recogCreateFromRecog() + * L_RECOG *recogCreateFromPixa() + * L_RECOG *recogCreateFromPixaNoFinish() + * L_RECOG *recogCreate() + * void recogDestroy() + * + * Recog accessors + * l_int32 recogGetCount() + * l_int32 recogSetParams() + * static l_int32 recogGetCharsetSize() + * + * Character/index lookup + * l_int32 recogGetClassIndex() + * l_int32 recogStringToIndex() + * l_int32 recogGetClassString() + * l_int32 l_convertCharstrToInt() + * + * Serialization + * L_RECOG *recogRead() + * L_RECOG *recogReadStream() + * L_RECOG *recogReadMem() + * l_int32 recogWrite() + * l_int32 recogWriteStream() + * l_int32 recogWriteMem() + * PIXA *recogExtractPixa() + * static l_int32 recogAddCharstrLabels() + * static l_int32 recogAddAllSamples() + * + * The recognizer functionality is split into four files: + * recogbasic.c: create, destroy, access, serialize + * recogtrain.c: training on labeled and unlabeled data + * recogident.c: running the recognizer(s) on input + * recogdid.c: running the recognizer(s) on input using a + * document image decoding (DID) hidden markov model + * + * This is a content-adapted (or book-adapted) recognizer (BAR) application. + * The recognizers here are typically assembled from data that has + * been labeled by a generic recognition system, such as Tesseract. + * The general procedure to create a recognizer (recog) from labeled data is + * to add the labeled character bitmaps, either one at a time or + * all together from a pixa with labeled pix. + * + * The suggested use for a BAR that consists of labeled templates drawn + * from a single source (e.g., a book) is to identify unlabeled samples + * by using unscaled character templates in the BAR, picking the + * template closest to the unlabeled sample. + * + * Outliers can be removed from a pixa of labeled pix. This is one of + * two methods that use averaged templates (the other is greedy splitting + * of characters). See recogtrain.c for a discussion and the implementation. + * + * A special bootstrap recognizer (BSR) can be used to make a BAR from + * unlabeled book data. This is done by comparing character images + * from the book with labeled templates in the BSR, where all images + * are scaled to h = 40. The templates can be either the scanned images + * or images consisting of width-normalized strokes derived from + * the skeleton of the character bitmaps. + * + * Two BARs of labeled character data, that have been made by + * different recognizers, can be joined by extracting a pixa of the + * labeled templates from each, joining the two pixa, and then + * and regenerating a BAR from the joined set of templates. + * If all the labeled character data is from a single source (e.g, a book), + * identification can proceed using unscaled templates (either the input + * image or width-normalized lines). But if the labeled data comes from + * more than one source, (a "hybrid" recognizer), the templates should + * be scaled, and we recommend scaling to a fixed height. + * + * Suppose it is not possible to generate a BAR with a sufficient number + * of templates of each class taken from a single source. In that case, + * templates from the BSR itself can be added. This is the condition + * described above, where the labeled templates come from multiple + * sources, and it is necessary to do all character matches using + * templates that have been scaled to a fixed height (e.g., 40). + * Likewise, the samples to be identified using this hybrid recognizer + * must be modified in the same way. See prog/recogtest3.c for an + * example of the steps that can be taken in the construction of a BAR + * using a BSR. + * + * For training numeric input, an example set of calls that scales + * each training input to fixed h and will use the line templates of + * width linew for identifying unknown characters is: + * L_Recog *rec = recogCreate(0, h, linew, 128, 1); + * for (i = 0; i < n; i++) { // read in n training digits + * Pix *pix = ... + * recogTrainLabeled(rec, pix, NULL, text[i], 0); + * } + * recogTrainingFinished(&rec, 1, -1, -1.0); // required + * + * It is an error if any function that computes averages, removes + * outliers or requests identification of an unlabeled character, + * such as: + * (1) computing the sample averages: recogAverageSamples() + * (2) removing outliers: recogRemoveOutliers1() or recogRemoveOutliers2() + * (3) requesting identification of an unlabeled character: + * recogIdentifyPix() + * is called before an explicit call to finish training. Note that + * to do further training on a "finished" recognizer, you can set + * recog->train_done = FALSE; + * add the new training samples, and again call + * recogTrainingFinished(&rec, 1, -1, -1.0); // required + * + * If not scaling, using the images directly for identification, and + * removing outliers, do something like this: + * L_Recog *rec = recogCreate(0, 0, 0, 128, 1); + * for (i = 0; i < n; i++) { // read in n training characters + * Pix *pix = ... + * recogTrainLabeled(rec, pix, NULL, text[i], 0); + * } + * recogTrainingFinished(&rec, 1, -1, -1.0); + * if (!rec) ... [return] + * // remove outliers + * recogRemoveOutliers1(&rec, 0.7, 2, NULL, NULL); + * + * You can generate a recognizer from a pixa where the text field in + * each pix is the character string label for the pix. For example, + * the following recognizer will store unscaled line images: + * L_Recog *rec = recogCreateFromPixa(pixa, 0, 0, linew, 128, 1); + * and in use, it is fed unscaled line images to identify. + * + * For the following, assume that you have a pixa of labeled templates. + * If it is likely that some of the input templates are mislabeled, + * there are several things that can be done to remove them. + * The first is to put a size and quantity filter on them; e.g. + * Pixa *pixa2 = recogFilterPixaBySize(pixa1, 10, 15, 2.6); + * Then you can remove outliers; e.g., + * Pixa *pixa3 = pixaRemoveOutliers2(pixa2, -1.0, -1, NULL, NULL); + * + * To this point, all templates are from a single source, so you + * can make a recognizer that uses the unscaled templates and optionally + * attempts to split touching characters: + * L_Recog *recog1 = recogCreateFromPixa(pixa3, ...); + * Alternatively, if you need more templates for some of the classes, + * you can pad with templates from a "bootstrap" recognizer (BSR). + * If you pad, it is necessary to scale the templates and input + * samples to a fixed height, and no attempt will be made to split + * the input sample connected components: + * L_Recog *recog1 = recogCreateFromPixa(pixa3, 0, 40, 0, 128, 0); + * recogPadDigitTrainingSet(&recog1, 40, 0); + * + * A special case is a pure BSR, that contains images scaled to a fixed + * height (we use 40 in these examples). + * For this,use either the scanned bitmap: + * L_Recog *recboot = recogCreateFromPixa(pixa, 0, 40, 0, 128, 1); + * or width-normalized lines (use width of 5 here): + * L_Recog *recboot = recogCreateFromPixa(pixa, 0, 40, 5, 128, 1); + * + * This can be used to train a new book adapted recognizer (BAC), on + * unlabeled data from, e.g., a book. To do this, the following is required: + * (1) the input images from the book must be scaled in the same + * way as those in the BSR, and + * (2) both the BSR and the input images must be set up to be either + * input scanned images or width-normalized lines. + * + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include <string.h> +#include "allheaders.h" + +static const l_int32 MaxExamplesInClass = 256; + + /* Default recog parameters that can be changed */ +static const l_int32 DefaultCharsetType = L_ARABIC_NUMERALS; +static const l_int32 DefaultMinNopad = 1; +static const l_float32 DefaultMaxWHRatio = 3.0f; /* max allowed w/h + ratio for a component to be split */ +static const l_float32 DefaultMaxHTRatio = 2.6f; /* max allowed ratio of + max/min unscaled averaged template heights */ +static const l_int32 DefaultThreshold = 150; /* for binarization */ +static const l_int32 DefaultMaxYShift = 1; /* for identification */ + + /* Static functions */ +static l_int32 recogGetCharsetSize(l_int32 type); +static l_int32 recogAddCharstrLabels(L_RECOG *recog); +static l_int32 recogAddAllSamples(L_RECOG **precog, PIXAA *paa, l_int32 debug); + + +/*------------------------------------------------------------------------* + * Recog: initialization and destruction * + *------------------------------------------------------------------------*/ +/*! + * \brief recogCreateFromRecog() + * + * \param[in] recs source recog with arbitrary input parameters + * \param[in] scalew scale all widths to this; use 0 otherwise + * \param[in] scaleh scale all heights to this; use 0 otherwise + * \param[in] linew width of normalized strokes; use 0 to skip + * \param[in] threshold for binarization; typically ~128 + * \param[in] maxyshift from nominal centroid alignment; default is 1 + * \return recd, or NULL on error + * + * <pre> + * Notes: + * (1) This is a convenience function that generates a recog using + * the unscaled training data in an existing recog. + * (2) It is recommended to use %maxyshift = 1 (the default value) + * (3) See recogCreate() for use of %scalew, %scaleh and %linew. + * </pre> + */ +L_RECOG * +recogCreateFromRecog(L_RECOG *recs, + l_int32 scalew, + l_int32 scaleh, + l_int32 linew, + l_int32 threshold, + l_int32 maxyshift) +{ +L_RECOG *recd; +PIXA *pixa; + + if (!recs) + return (L_RECOG *)ERROR_PTR("recs not defined", __func__, NULL); + + pixa = recogExtractPixa(recs); + recd = recogCreateFromPixa(pixa, scalew, scaleh, linew, threshold, + maxyshift); + pixaDestroy(&pixa); + return recd; +} + + +/*! + * \brief recogCreateFromPixa() + * + * \param[in] pixa of labeled, 1 bpp images + * \param[in] scalew scale all widths to this; use 0 otherwise + * \param[in] scaleh scale all heights to this; use 0 otherwise + * \param[in] linew width of normalized strokes; use 0 to skip + * \param[in] threshold for binarization; typically ~150 + * \param[in] maxyshift from nominal centroid alignment; default is 1 + * \return recog, or NULL on error + * + * <pre> + * Notes: + * (1) This is a convenience function for training from labeled data. + * The pixa can be read from file. + * (2) The pixa should contain the unscaled bitmaps used for training. + * (3) See recogCreate() for use of %scalew, %scaleh and %linew. + * (4) It is recommended to use %maxyshift = 1 (the default value) + * (5) All examples in the same class (i.e., with the same character + * label) should be similar. They can be made similar by invoking + * recogRemoveOutliers[1,2]() on %pixa before calling this function. + * </pre> + */ +L_RECOG * +recogCreateFromPixa(PIXA *pixa, + l_int32 scalew, + l_int32 scaleh, + l_int32 linew, + l_int32 threshold, + l_int32 maxyshift) +{ +L_RECOG *recog; + + if (!pixa) + return (L_RECOG *)ERROR_PTR("pixa not defined", __func__, NULL); + + recog = recogCreateFromPixaNoFinish(pixa, scalew, scaleh, linew, + threshold, maxyshift); + if (!recog) + return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL); + + recogTrainingFinished(&recog, 1, -1, -1.0); + if (!recog) + return (L_RECOG *)ERROR_PTR("bad templates", __func__, NULL); + return recog; +} + + +/*! + * \brief recogCreateFromPixaNoFinish() + * + * \param[in] pixa of labeled, 1 bpp images + * \param[in] scalew scale all widths to this; use 0 otherwise + * \param[in] scaleh scale all heights to this; use 0 otherwise + * \param[in] linew width of normalized strokes; use 0 to skip + * \param[in] threshold for binarization; typically ~150 + * \param[in] maxyshift from nominal centroid alignment; default is 1 + * \return recog, or NULL on error + * + * <pre> + * Notes: + * (1) See recogCreateFromPixa() for details. + * (2) This is also used to generate a pixaa with templates + * in each class within a pixa. For that, all args except for + * %pixa are ignored. + * </pre> + */ +L_RECOG * +recogCreateFromPixaNoFinish(PIXA *pixa, + l_int32 scalew, + l_int32 scaleh, + l_int32 linew, + l_int32 threshold, + l_int32 maxyshift) +{ +char *text; +l_int32 full, n, i, ntext, same, maxd; +PIX *pix; +L_RECOG *recog; + + if (!pixa) + return (L_RECOG *)ERROR_PTR("pixa not defined", __func__, NULL); + pixaVerifyDepth(pixa, &same, &maxd); + if (maxd > 1) + return (L_RECOG *)ERROR_PTR("not all pix are 1 bpp", __func__, NULL); + + pixaIsFull(pixa, &full, NULL); + if (!full) + return (L_RECOG *)ERROR_PTR("not all pix are present", __func__, NULL); + + n = pixaGetCount(pixa); + pixaCountText(pixa, &ntext); + if (ntext == 0) + return (L_RECOG *)ERROR_PTR("no pix have text strings", __func__, NULL); + if (ntext < n) + L_ERROR("%d text strings < %d pix\n", __func__, ntext, n); + + recog = recogCreate(scalew, scaleh, linew, threshold, maxyshift); + if (!recog) + return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL); + for (i = 0; i < n; i++) { + pix = pixaGetPix(pixa, i, L_CLONE); + text = pixGetText(pix); + if (!text || strlen(text) == 0) { + L_ERROR("pix[%d] has no text\n", __func__, i); + pixDestroy(&pix); + continue; + } + recogTrainLabeled(recog, pix, NULL, text, 0); + pixDestroy(&pix); + } + + return recog; +} + + +/*! + * \brief recogCreate() + * + * \param[in] scalew scale all widths to this; use 0 otherwise + * \param[in] scaleh scale all heights to this; use 0 otherwise + * \param[in] linew width of normalized strokes; use 0 to skip + * \param[in] threshold for binarization; typically ~128; 0 for default + * \param[in] maxyshift from nominal centroid alignment; default is 1 + * \return recog, or NULL on error + * + * <pre> + * Notes: + * (1) If %scalew == 0 and %scaleh == 0, no scaling is done. + * If one of these is 0 and the other is > 0, scaling is isotropic + * to the requested size. We typically do not set both > 0. + * (2) Use linew > 0 to convert the templates to images with fixed + * width strokes. linew == 0 skips the conversion. + * (3) The only valid values for %maxyshift are 0, 1 and 2. + * It is recommended to use %maxyshift == 1 (default value). + * Using %maxyshift == 0 is much faster than %maxyshift == 1, but + * it is much less likely to find the template with the best + * correlation. Use of anything but 1 results in a warning. + * (4) Scaling is used for finding outliers and for training a + * book-adapted recognizer (BAR) from a bootstrap recognizer (BSR). + * Scaling the height to a fixed value and scaling the width + * accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended. + * (5) The storage for most of the arrays is allocated when training + * is finished. + * </pre> + */ +L_RECOG * +recogCreate(l_int32 scalew, + l_int32 scaleh, + l_int32 linew, + l_int32 threshold, + l_int32 maxyshift) +{ +L_RECOG *recog; + + if (scalew < 0 || scaleh < 0) + return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", __func__, NULL); + if (linew > 10) + return (L_RECOG *)ERROR_PTR("invalid linew > 10", __func__, NULL); + if (threshold == 0) threshold = DefaultThreshold; + if (threshold < 0 || threshold > 255) { + L_WARNING("invalid threshold; using default\n", __func__); + threshold = DefaultThreshold; + } + if (maxyshift < 0 || maxyshift > 2) { + L_WARNING("invalid maxyshift; using default value\n", __func__); + maxyshift = DefaultMaxYShift; + } else if (maxyshift == 0) { + L_WARNING("Using maxyshift = 0; faster, worse correlation results\n", + __func__); + } else if (maxyshift == 2) { + L_WARNING("Using maxyshift = 2; slower\n", __func__); + } + + recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG)); + recog->templ_use = L_USE_ALL_TEMPLATES; /* default */ + recog->threshold = threshold; + recog->scalew = scalew; + recog->scaleh = scaleh; + recog->linew = linew; + recog->maxyshift = maxyshift; + recogSetParams(recog, 1, -1, -1.0, -1.0); + recog->bmf = bmfCreate(NULL, 6); + recog->bmf_size = 6; + recog->maxarraysize = MaxExamplesInClass; + + /* Generate the LUTs */ + recog->centtab = makePixelCentroidTab8(); + recog->sumtab = makePixelSumTab8(); + recog->sa_text = sarrayCreate(0); + recog->dna_tochar = l_dnaCreate(0); + + /* Input default values for min component size for splitting. + * These are overwritten when pixTrainingFinished() is called. */ + recog->min_splitw = 6; + recog->max_splith = 60; + + /* Allocate the paa for the unscaled training bitmaps */ + recog->pixaa_u = pixaaCreate(recog->maxarraysize); + + /* Generate the storage for debugging */ + recog->pixadb_boot = pixaCreate(2); + recog->pixadb_split = pixaCreate(2); + return recog; +} + + +/*! + * \brief recogDestroy() + * + * \param[in,out] precog will be set to null before returning + * \return void + */ +void +recogDestroy(L_RECOG **precog) +{ +L_RECOG *recog; + + if (!precog) { + L_WARNING("ptr address is null\n", __func__); + return; + } + + if ((recog = *precog) == NULL) return; + + LEPT_FREE(recog->centtab); + LEPT_FREE(recog->sumtab); + sarrayDestroy(&recog->sa_text); + l_dnaDestroy(&recog->dna_tochar); + pixaaDestroy(&recog->pixaa_u); + pixaDestroy(&recog->pixa_u); + ptaaDestroy(&recog->ptaa_u); + ptaDestroy(&recog->pta_u); + numaDestroy(&recog->nasum_u); + numaaDestroy(&recog->naasum_u); + pixaaDestroy(&recog->pixaa); + pixaDestroy(&recog->pixa); + ptaaDestroy(&recog->ptaa); + ptaDestroy(&recog->pta); + numaDestroy(&recog->nasum); + numaaDestroy(&recog->naasum); + pixaDestroy(&recog->pixa_tr); + pixaDestroy(&recog->pixadb_ave); + pixaDestroy(&recog->pixa_id); + pixDestroy(&recog->pixdb_ave); + pixDestroy(&recog->pixdb_range); + pixaDestroy(&recog->pixadb_boot); + pixaDestroy(&recog->pixadb_split); + bmfDestroy(&recog->bmf); + rchDestroy(&recog->rch); + rchaDestroy(&recog->rcha); + recogDestroyDid(recog); + LEPT_FREE(recog); + *precog = NULL; +} + + +/*------------------------------------------------------------------------* + * Recog accessors * + *------------------------------------------------------------------------*/ +/*! + * \brief recogGetCount() + * + * \param[in] recog + * \return count of classes in recog; 0 if no recog or on error + */ +l_int32 +recogGetCount(L_RECOG *recog) +{ + if (!recog) + return ERROR_INT("recog not defined", __func__, 0); + return recog->setsize; +} + + +/*! + * \brief recogSetParams() + * + * \param[in] recog to be padded, if necessary + * \param[in] type type of char set; -1 for default; + * see enum in recog.h + * \param[in] min_nopad min number in a class without padding; + * use -1 for default + * \param[in] max_wh_ratio max width/height ratio allowed for splitting; + * use -1.0 for default + * \param[in] max_ht_ratio max of max/min averaged template height ratio; + * use -1.0 for default + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) This is called when a recog is created. + * (2) Default %min_nopad value allows for some padding. + * To disable padding, set %min_nopad = 0. To pad only when + * no samples are available for the class, set %min_nopad = 1. + * (3) The %max_wh_ratio limits the width/height ratio for components + * that we attempt to split. Splitting long components is expensive. + * (4) The %max_ht_ratio is a quality requirement on the training data. + * The recognizer will not run if the averages are computed and + * the templates do not satisfy it. + * </pre> + */ +l_ok +recogSetParams(L_RECOG *recog, + l_int32 type, + l_int32 min_nopad, + l_float32 max_wh_ratio, + l_float32 max_ht_ratio) +{ + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + + recog->charset_type = (type >= 0) ? type : DefaultCharsetType; + recog->charset_size = recogGetCharsetSize(recog->charset_type); + recog->min_nopad = (min_nopad >= 0) ? min_nopad : DefaultMinNopad; + recog->max_wh_ratio = (max_wh_ratio > 0.0) ? max_wh_ratio : + DefaultMaxWHRatio; + recog->max_ht_ratio = (max_ht_ratio > 1.0) ? max_ht_ratio : + DefaultMaxHTRatio; + return 0; +} + + +/*! + * \brief recogGetCharsetSize() + * + * \param[in] type of charset + * \return size of charset, or 0 if unknown or on error + */ +static l_int32 +recogGetCharsetSize(l_int32 type) +{ + switch (type) { + case L_UNKNOWN: + return 0; + case L_ARABIC_NUMERALS: + return 10; + case L_LC_ROMAN_NUMERALS: + return 7; + case L_UC_ROMAN_NUMERALS: + return 7; + case L_LC_ALPHA: + return 26; + case L_UC_ALPHA: + return 26; + default: + L_ERROR("invalid charset_type %d\n", __func__, type); + return 0; + } + return 0; /* shouldn't happen */ +} + + +/*------------------------------------------------------------------------* + * Character/index lookup * + *------------------------------------------------------------------------*/ +/*! + * \brief recogGetClassIndex() + * + * \param[in] recog with LUT's pre-computed + * \param[in] val integer value; can be up to 4 bytes for UTF-8 + * \param[in] text text from which %val was derived; used if not found + * \param[out] pindex index into dna_tochar + * \return 0 if found; 1 if not found and added; 2 on error. + * + * <pre> + * Notes: + * (1) This is used during training. There is one entry in + * recog->dna_tochar (integer value, e.g., ascii) and + * one in recog->sa_text (e.g, ascii letter in a string) + * for each character class. + * (2) This searches the dna character array for %val. If it is + * not found, the template represents a character class not + * already seen: it increments setsize (the number of character + * classes) by 1, and augments both the index (dna_tochar) + * and text (sa_text) arrays. + * (3) Returns the index in &index, except on error. + * (4) Caller must check the function return value. + * </pre> + */ +l_int32 +recogGetClassIndex(L_RECOG *recog, + l_int32 val, + char *text, + l_int32 *pindex) +{ +l_int32 i, n, ival; + + if (!pindex) + return ERROR_INT("&index not defined", __func__, 2); + *pindex = -1; + if (!recog) + return ERROR_INT("recog not defined", __func__, 2); + if (!text) + return ERROR_INT("text not defined", __func__, 2); + + /* Search existing characters */ + n = l_dnaGetCount(recog->dna_tochar); + for (i = 0; i < n; i++) { + l_dnaGetIValue(recog->dna_tochar, i, &ival); + if (val == ival) { /* found */ + *pindex = i; + return 0; + } + } + + /* If not found... */ + l_dnaAddNumber(recog->dna_tochar, val); + sarrayAddString(recog->sa_text, text, L_COPY); + recog->setsize++; + *pindex = n; + return 1; +} + + +/*! + * \brief recogStringToIndex() + * + * \param[in] recog + * \param[in] text text string for some class + * \param[out] pindex index for that class; -1 if not found + * \return 0 if OK, 1 on error not finding the string is an error + */ +l_ok +recogStringToIndex(L_RECOG *recog, + char *text, + l_int32 *pindex) +{ +char *charstr; +l_int32 i, n, diff; + + if (!pindex) + return ERROR_INT("&index not defined", __func__, 1); + *pindex = -1; + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + if (!text) + return ERROR_INT("text not defined", __func__, 1); + + /* Search existing characters */ + n = recog->setsize; + for (i = 0; i < n; i++) { + recogGetClassString(recog, i, &charstr); + if (!charstr) { + L_ERROR("string not found for index %d\n", __func__, i); + continue; + } + diff = strcmp(text, charstr); + LEPT_FREE(charstr); + if (diff) continue; + *pindex = i; + return 0; + } + + return 1; /* not found */ +} + + +/*! + * \brief recogGetClassString() + * + * \param[in] recog + * \param[in] index into array of char types + * \param[out] pcharstr string representation; + * returns an empty string on error + * \return 0 if found, 1 on error + * + * <pre> + * Notes: + * (1) Extracts a copy of the string from sa_text, which + * the caller must free. + * (2) Caller must check the function return value. + * </pre> + */ +l_int32 +recogGetClassString(L_RECOG *recog, + l_int32 index, + char **pcharstr) +{ + if (!pcharstr) + return ERROR_INT("&charstr not defined", __func__, 1); + *pcharstr = stringNew(""); + if (!recog) + return ERROR_INT("recog not defined", __func__, 2); + + if (index < 0 || index >= recog->setsize) + return ERROR_INT("invalid index", __func__, 1); + LEPT_FREE(*pcharstr); + *pcharstr = sarrayGetString(recog->sa_text, index, L_COPY); + return 0; +} + + +/*! + * \brief l_convertCharstrToInt() + * + * \param[in] str input string representing one UTF-8 character; + * not more than 4 bytes + * \param[out] pval integer value for the input. Think of it + * as a 1-to-1 hash code. + * \return 0 if OK, 1 on error + */ +l_ok +l_convertCharstrToInt(const char *str, + l_int32 *pval) +{ +l_int32 size; +l_uint32 val; + + if (!pval) + return ERROR_INT("&val not defined", __func__, 1); + *pval = 0; + if (!str) + return ERROR_INT("str not defined", __func__, 1); + size = strlen(str); + if (size == 0) + return ERROR_INT("empty string", __func__, 1); + if (size > 4) + return ERROR_INT("invalid string: > 4 bytes", __func__, 1); + + val = (l_uint8)str[0]; + if (size > 1) + val = (val << 8) + (l_uint8)str[1]; + if (size > 2) + val = (val << 8) + (l_uint8)str[2]; + if (size > 3) + val = (val << 8) + (l_uint8)str[3]; + *pval = (l_int32)(val & 0x7fffffff); + return 0; +} + + +/*------------------------------------------------------------------------* + * Serialization * + *------------------------------------------------------------------------*/ +/*! + * \brief recogRead() + * + * \param[in] filename + * \return recog, or NULL on error + * + * <pre> + * Notes: + * (1) When a recog is serialized, a pixaa of the templates that are + * actually used for correlation is saved in the pixaa_u array + * of the recog. These can be different from the templates that + * were used to generate the recog, because those original templates + * can be scaled and turned into normalized lines. When recog1 + * is deserialized to recog2, these templates are put in both the + * unscaled array (pixaa_u) and the modified array (pixaa) in recog2. + * Why not put it in only the unscaled array and let + * recogTrainingFinalized() regenerate the modified templates? + * The reason is that with normalized lines, the operation of + * thinning to a skeleton and dilating back to a fixed width + * is not idempotent. Thinning to a skeleton saves pixels at + * the end of a line segment, and thickening the skeleton puts + * additional pixels at the end of the lines. This tends to + * close gaps. + * </pre> + */ +L_RECOG * +recogRead(const char *filename) +{ +FILE *fp; +L_RECOG *recog; + + if (!filename) + return (L_RECOG *)ERROR_PTR("filename not defined", __func__, NULL); + if ((fp = fopenReadStream(filename)) == NULL) + return (L_RECOG *)ERROR_PTR_1("stream not opened", + filename, __func__, NULL); + + if ((recog = recogReadStream(fp)) == NULL) { + fclose(fp); + return (L_RECOG *)ERROR_PTR_1("recog not read", + filename, __func__, NULL); + } + + fclose(fp); + return recog; +} + + +/*! + * \brief recogReadStream() + * + * \param[in] fp file stream + * \return recog, or NULL on error + */ +L_RECOG * +recogReadStream(FILE *fp) +{ +l_int32 version, setsize, threshold, scalew, scaleh, linew; +l_int32 maxyshift, nc; +L_DNA *dna_tochar; +PIXAA *paa; +L_RECOG *recog; +SARRAY *sa_text; + + if (!fp) + return (L_RECOG *)ERROR_PTR("stream not defined", __func__, NULL); + + if (fscanf(fp, "\nRecog Version %d\n", &version) != 1) + return (L_RECOG *)ERROR_PTR("not a recog file", __func__, NULL); + if (version != RECOG_VERSION_NUMBER) + return (L_RECOG *)ERROR_PTR("invalid recog version", __func__, NULL); + if (fscanf(fp, "Size of character set = %d\n", &setsize) != 1) + return (L_RECOG *)ERROR_PTR("setsize not read", __func__, NULL); + if (fscanf(fp, "Binarization threshold = %d\n", &threshold) != 1) + return (L_RECOG *)ERROR_PTR("binary thresh not read", __func__, NULL); + if (fscanf(fp, "Maxyshift = %d\n", &maxyshift) != 1) + return (L_RECOG *)ERROR_PTR("maxyshift not read", __func__, NULL); + if (fscanf(fp, "Scale to width = %d\n", &scalew) != 1) + return (L_RECOG *)ERROR_PTR("width not read", __func__, NULL); + if (fscanf(fp, "Scale to height = %d\n", &scaleh) != 1) + return (L_RECOG *)ERROR_PTR("height not read", __func__, NULL); + if (fscanf(fp, "Normalized line width = %d\n", &linew) != 1) + return (L_RECOG *)ERROR_PTR("line width not read", __func__, NULL); + if ((recog = recogCreate(scalew, scaleh, linew, threshold, + maxyshift)) == NULL) + return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL); + + if (fscanf(fp, "\nLabels for character set:\n") == -1) { + recogDestroy(&recog); + return (L_RECOG *)ERROR_PTR("label intro not read", __func__, NULL); + } + l_dnaDestroy(&recog->dna_tochar); + if ((dna_tochar = l_dnaReadStream(fp)) == NULL) { + recogDestroy(&recog); + return (L_RECOG *)ERROR_PTR("dna_tochar not read", __func__, NULL); + } + recog->dna_tochar = dna_tochar; + sarrayDestroy(&recog->sa_text); + if ((sa_text = sarrayReadStream(fp)) == NULL) { + recogDestroy(&recog); + return (L_RECOG *)ERROR_PTR("sa_text not read", __func__, NULL); + } + recog->sa_text = sa_text; + + if (fscanf(fp, "\nPixaa of all samples in the training set:\n") == -1) { + recogDestroy(&recog); + return (L_RECOG *)ERROR_PTR("pixaa intro not read", __func__, NULL); + } + if ((paa = pixaaReadStream(fp)) == NULL) { + recogDestroy(&recog); + return (L_RECOG *)ERROR_PTR("pixaa not read", __func__, NULL); + } + recog->setsize = setsize; + nc = pixaaGetCount(paa, NULL); + if (nc != setsize) { + recogDestroy(&recog); + pixaaDestroy(&paa); + L_ERROR("(setsize = %d) != (paa count = %d)\n", __func__, + setsize, nc); + return NULL; + } + + recogAddAllSamples(&recog, paa, 0); /* this finishes */ + pixaaDestroy(&paa); + if (!recog) + return (L_RECOG *)ERROR_PTR("bad templates", __func__, NULL); + return recog; +} + + +/*! + * \brief recogReadMem() + * + * \param[in] data serialization of recog (not ascii) + * \param[in] size of data in bytes + * \return recog, or NULL on error + */ +L_RECOG * +recogReadMem(const l_uint8 *data, + size_t size) +{ +FILE *fp; +L_RECOG *recog; + + if (!data) + return (L_RECOG *)ERROR_PTR("data not defined", __func__, NULL); + if ((fp = fopenReadFromMemory(data, size)) == NULL) + return (L_RECOG *)ERROR_PTR("stream not opened", __func__, NULL); + + recog = recogReadStream(fp); + fclose(fp); + if (!recog) L_ERROR("recog not read\n", __func__); + return recog; +} + + +/*! + * \brief recogWrite() + * + * \param[in] filename + * \param[in] recog + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) The pixaa of templates that is written is the modified one + * in the pixaa field. It is the pixaa that is actually used + * for correlation. This is not the unscaled array of labeled + * bitmaps, in pixaa_u, that was used to generate the recog in the + * first place. See the notes in recogRead() for the rationale. + * </pre> + */ +l_ok +recogWrite(const char *filename, + L_RECOG *recog) +{ +l_int32 ret; +FILE *fp; + + if (!filename) + return ERROR_INT("filename not defined", __func__, 1); + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + + if ((fp = fopenWriteStream(filename, "wb")) == NULL) + return ERROR_INT_1("stream not opened", filename, __func__, 1); + ret = recogWriteStream(fp, recog); + fclose(fp); + if (ret) + return ERROR_INT_1("recog not written to stream", + filename, __func__, 1); + return 0; +} + + +/*! + * \brief recogWriteStream() + * + * \param[in] fp file stream opened for "wb" + * \param[in] recog + * \return 0 if OK, 1 on error + */ +l_ok +recogWriteStream(FILE *fp, + L_RECOG *recog) +{ + if (!fp) + return ERROR_INT("stream not defined", __func__, 1); + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + + fprintf(fp, "\nRecog Version %d\n", RECOG_VERSION_NUMBER); + fprintf(fp, "Size of character set = %d\n", recog->setsize); + fprintf(fp, "Binarization threshold = %d\n", recog->threshold); + fprintf(fp, "Maxyshift = %d\n", recog->maxyshift); + fprintf(fp, "Scale to width = %d\n", recog->scalew); + fprintf(fp, "Scale to height = %d\n", recog->scaleh); + fprintf(fp, "Normalized line width = %d\n", recog->linew); + fprintf(fp, "\nLabels for character set:\n"); + l_dnaWriteStream(fp, recog->dna_tochar); + sarrayWriteStream(fp, recog->sa_text); + fprintf(fp, "\nPixaa of all samples in the training set:\n"); + pixaaWriteStream(fp, recog->pixaa); + + return 0; +} + + +/*! + * \brief recogWriteMem() + * + * \param[out] pdata data of serialized recog (not ascii) + * \param[out] psize size of returned data + * \param[in] recog + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) Serializes a recog in memory and puts the result in a buffer. + * </pre> + */ +l_ok +recogWriteMem(l_uint8 **pdata, + size_t *psize, + L_RECOG *recog) +{ +l_int32 ret; +FILE *fp; + + if (pdata) *pdata = NULL; + if (psize) *psize = 0; + if (!pdata) + return ERROR_INT("&data not defined", __func__, 1); + if (!psize) + return ERROR_INT("&size not defined", __func__, 1); + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + +#if HAVE_FMEMOPEN + if ((fp = open_memstream((char **)pdata, psize)) == NULL) + return ERROR_INT("stream not opened", __func__, 1); + ret = recogWriteStream(fp, recog); + fputc('\0', fp); + fclose(fp); + if (*psize > 0) *psize = *psize - 1; +#else + L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__); + #ifdef _WIN32 + if ((fp = fopenWriteWinTempfile()) == NULL) + return ERROR_INT("tmpfile stream not opened", __func__, 1); + #else + if ((fp = tmpfile()) == NULL) + return ERROR_INT("tmpfile stream not opened", __func__, 1); + #endif /* _WIN32 */ + ret = recogWriteStream(fp, recog); + rewind(fp); + *pdata = l_binaryReadStream(fp, psize); + fclose(fp); +#endif /* HAVE_FMEMOPEN */ + return ret; +} + + +/*! + * \brief recogExtractPixa() + * + * \param[in] recog + * \return pixa if OK, NULL on error + * + * <pre> + * Notes: + * (1) This generates a pixa of all the unscaled images in the + * recognizer, where each one has its character class label in + * the pix text field, by flattening pixaa_u to a pixa. + * </pre> + */ +PIXA * +recogExtractPixa(L_RECOG *recog) +{ + if (!recog) + return (PIXA *)ERROR_PTR("recog not defined", __func__, NULL); + + recogAddCharstrLabels(recog); + return pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE); +} + + +/*! + * \brief recogAddCharstrLabels() + * + * \param[in] recog + * \return 0 if OK, 1 on error + */ +static l_int32 +recogAddCharstrLabels(L_RECOG *recog) +{ +char *text; +l_int32 i, j, n1, n2; +PIX *pix; +PIXA *pixa; +PIXAA *paa; + + if (!recog) + return ERROR_INT("recog not defined", __func__, 1); + + /* Add the labels to each unscaled pix */ + paa = recog->pixaa_u; + n1 = pixaaGetCount(paa, NULL); + for (i = 0; i < n1; i++) { + pixa = pixaaGetPixa(paa, i, L_CLONE); + text = sarrayGetString(recog->sa_text, i, L_NOCOPY); + n2 = pixaGetCount(pixa); + for (j = 0; j < n2; j++) { + pix = pixaGetPix(pixa, j, L_CLONE); + pixSetText(pix, text); + pixDestroy(&pix); + } + pixaDestroy(&pixa); + } + + return 0; +} + + +/*! + * \brief recogAddAllSamples() + * + * \param[in] precog addr of recog + * \param[in] paa pixaa from previously trained recog + * \param[in] debug + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) On error, the input recog is destroyed. + * (2) This is used with the serialization routine recogRead(), + * where each pixa in the pixaa represents a set of characters + * in a different class. Before calling this function, we have + * verified that the number of character classes, given by the + * setsize field in %recog, equals the number of pixa in the paa. + * The character labels for each set are in the sa_text field. + * </pre> + */ +static l_int32 +recogAddAllSamples(L_RECOG **precog, + PIXAA *paa, + l_int32 debug) +{ +char *text; +l_int32 i, j, nc, ns; +PIX *pix; +PIXA *pixa, *pixa1; +L_RECOG *recog; + + if (!precog) + return ERROR_INT("&recog not defined", __func__, 1); + if ((recog = *precog) == NULL) + return ERROR_INT("recog not defined", __func__, 1); + if (!paa) { + recogDestroy(&recog); + *precog = NULL; + return ERROR_INT("paa not defined", __func__, 1); + } + + nc = pixaaGetCount(paa, NULL); + for (i = 0; i < nc; i++) { + pixa = pixaaGetPixa(paa, i, L_CLONE); + ns = pixaGetCount(pixa); + text = sarrayGetString(recog->sa_text, i, L_NOCOPY); + pixa1 = pixaCreate(ns); + pixaaAddPixa(recog->pixaa_u, pixa1, L_INSERT); + for (j = 0; j < ns; j++) { + pix = pixaGetPix(pixa, j, L_CLONE); + if (debug) lept_stderr("pix[%d,%d]: text = %s\n", i, j, text); + pixaaAddPix(recog->pixaa_u, i, pix, NULL, L_INSERT); + } + pixaDestroy(&pixa); + } + + recogTrainingFinished(&recog, 0, -1, -1.0); /* For second parameter, + see comment in recogRead() */ + if (!recog) + return ERROR_INT("bad templates; recog destroyed", __func__, 1); + return 0; +}
