Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/recogbasic.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file recogbasic.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Recog creation, destruction and access | |
| 32 * L_RECOG *recogCreateFromRecog() | |
| 33 * L_RECOG *recogCreateFromPixa() | |
| 34 * L_RECOG *recogCreateFromPixaNoFinish() | |
| 35 * L_RECOG *recogCreate() | |
| 36 * void recogDestroy() | |
| 37 * | |
| 38 * Recog accessors | |
| 39 * l_int32 recogGetCount() | |
| 40 * l_int32 recogSetParams() | |
| 41 * static l_int32 recogGetCharsetSize() | |
| 42 * | |
| 43 * Character/index lookup | |
| 44 * l_int32 recogGetClassIndex() | |
| 45 * l_int32 recogStringToIndex() | |
| 46 * l_int32 recogGetClassString() | |
| 47 * l_int32 l_convertCharstrToInt() | |
| 48 * | |
| 49 * Serialization | |
| 50 * L_RECOG *recogRead() | |
| 51 * L_RECOG *recogReadStream() | |
| 52 * L_RECOG *recogReadMem() | |
| 53 * l_int32 recogWrite() | |
| 54 * l_int32 recogWriteStream() | |
| 55 * l_int32 recogWriteMem() | |
| 56 * PIXA *recogExtractPixa() | |
| 57 * static l_int32 recogAddCharstrLabels() | |
| 58 * static l_int32 recogAddAllSamples() | |
| 59 * | |
| 60 * The recognizer functionality is split into four files: | |
| 61 * recogbasic.c: create, destroy, access, serialize | |
| 62 * recogtrain.c: training on labeled and unlabeled data | |
| 63 * recogident.c: running the recognizer(s) on input | |
| 64 * recogdid.c: running the recognizer(s) on input using a | |
| 65 * document image decoding (DID) hidden markov model | |
| 66 * | |
| 67 * This is a content-adapted (or book-adapted) recognizer (BAR) application. | |
| 68 * The recognizers here are typically assembled from data that has | |
| 69 * been labeled by a generic recognition system, such as Tesseract. | |
| 70 * The general procedure to create a recognizer (recog) from labeled data is | |
| 71 * to add the labeled character bitmaps, either one at a time or | |
| 72 * all together from a pixa with labeled pix. | |
| 73 * | |
| 74 * The suggested use for a BAR that consists of labeled templates drawn | |
| 75 * from a single source (e.g., a book) is to identify unlabeled samples | |
| 76 * by using unscaled character templates in the BAR, picking the | |
| 77 * template closest to the unlabeled sample. | |
| 78 * | |
| 79 * Outliers can be removed from a pixa of labeled pix. This is one of | |
| 80 * two methods that use averaged templates (the other is greedy splitting | |
| 81 * of characters). See recogtrain.c for a discussion and the implementation. | |
| 82 * | |
| 83 * A special bootstrap recognizer (BSR) can be used to make a BAR from | |
| 84 * unlabeled book data. This is done by comparing character images | |
| 85 * from the book with labeled templates in the BSR, where all images | |
| 86 * are scaled to h = 40. The templates can be either the scanned images | |
| 87 * or images consisting of width-normalized strokes derived from | |
| 88 * the skeleton of the character bitmaps. | |
| 89 * | |
| 90 * Two BARs of labeled character data, that have been made by | |
| 91 * different recognizers, can be joined by extracting a pixa of the | |
| 92 * labeled templates from each, joining the two pixa, and then | |
| 93 * and regenerating a BAR from the joined set of templates. | |
| 94 * If all the labeled character data is from a single source (e.g, a book), | |
| 95 * identification can proceed using unscaled templates (either the input | |
| 96 * image or width-normalized lines). But if the labeled data comes from | |
| 97 * more than one source, (a "hybrid" recognizer), the templates should | |
| 98 * be scaled, and we recommend scaling to a fixed height. | |
| 99 * | |
| 100 * Suppose it is not possible to generate a BAR with a sufficient number | |
| 101 * of templates of each class taken from a single source. In that case, | |
| 102 * templates from the BSR itself can be added. This is the condition | |
| 103 * described above, where the labeled templates come from multiple | |
| 104 * sources, and it is necessary to do all character matches using | |
| 105 * templates that have been scaled to a fixed height (e.g., 40). | |
| 106 * Likewise, the samples to be identified using this hybrid recognizer | |
| 107 * must be modified in the same way. See prog/recogtest3.c for an | |
| 108 * example of the steps that can be taken in the construction of a BAR | |
| 109 * using a BSR. | |
| 110 * | |
| 111 * For training numeric input, an example set of calls that scales | |
| 112 * each training input to fixed h and will use the line templates of | |
| 113 * width linew for identifying unknown characters is: | |
| 114 * L_Recog *rec = recogCreate(0, h, linew, 128, 1); | |
| 115 * for (i = 0; i < n; i++) { // read in n training digits | |
| 116 * Pix *pix = ... | |
| 117 * recogTrainLabeled(rec, pix, NULL, text[i], 0); | |
| 118 * } | |
| 119 * recogTrainingFinished(&rec, 1, -1, -1.0); // required | |
| 120 * | |
| 121 * It is an error if any function that computes averages, removes | |
| 122 * outliers or requests identification of an unlabeled character, | |
| 123 * such as: | |
| 124 * (1) computing the sample averages: recogAverageSamples() | |
| 125 * (2) removing outliers: recogRemoveOutliers1() or recogRemoveOutliers2() | |
| 126 * (3) requesting identification of an unlabeled character: | |
| 127 * recogIdentifyPix() | |
| 128 * is called before an explicit call to finish training. Note that | |
| 129 * to do further training on a "finished" recognizer, you can set | |
| 130 * recog->train_done = FALSE; | |
| 131 * add the new training samples, and again call | |
| 132 * recogTrainingFinished(&rec, 1, -1, -1.0); // required | |
| 133 * | |
| 134 * If not scaling, using the images directly for identification, and | |
| 135 * removing outliers, do something like this: | |
| 136 * L_Recog *rec = recogCreate(0, 0, 0, 128, 1); | |
| 137 * for (i = 0; i < n; i++) { // read in n training characters | |
| 138 * Pix *pix = ... | |
| 139 * recogTrainLabeled(rec, pix, NULL, text[i], 0); | |
| 140 * } | |
| 141 * recogTrainingFinished(&rec, 1, -1, -1.0); | |
| 142 * if (!rec) ... [return] | |
| 143 * // remove outliers | |
| 144 * recogRemoveOutliers1(&rec, 0.7, 2, NULL, NULL); | |
| 145 * | |
| 146 * You can generate a recognizer from a pixa where the text field in | |
| 147 * each pix is the character string label for the pix. For example, | |
| 148 * the following recognizer will store unscaled line images: | |
| 149 * L_Recog *rec = recogCreateFromPixa(pixa, 0, 0, linew, 128, 1); | |
| 150 * and in use, it is fed unscaled line images to identify. | |
| 151 * | |
| 152 * For the following, assume that you have a pixa of labeled templates. | |
| 153 * If it is likely that some of the input templates are mislabeled, | |
| 154 * there are several things that can be done to remove them. | |
| 155 * The first is to put a size and quantity filter on them; e.g. | |
| 156 * Pixa *pixa2 = recogFilterPixaBySize(pixa1, 10, 15, 2.6); | |
| 157 * Then you can remove outliers; e.g., | |
| 158 * Pixa *pixa3 = pixaRemoveOutliers2(pixa2, -1.0, -1, NULL, NULL); | |
| 159 * | |
| 160 * To this point, all templates are from a single source, so you | |
| 161 * can make a recognizer that uses the unscaled templates and optionally | |
| 162 * attempts to split touching characters: | |
| 163 * L_Recog *recog1 = recogCreateFromPixa(pixa3, ...); | |
| 164 * Alternatively, if you need more templates for some of the classes, | |
| 165 * you can pad with templates from a "bootstrap" recognizer (BSR). | |
| 166 * If you pad, it is necessary to scale the templates and input | |
| 167 * samples to a fixed height, and no attempt will be made to split | |
| 168 * the input sample connected components: | |
| 169 * L_Recog *recog1 = recogCreateFromPixa(pixa3, 0, 40, 0, 128, 0); | |
| 170 * recogPadDigitTrainingSet(&recog1, 40, 0); | |
| 171 * | |
| 172 * A special case is a pure BSR, that contains images scaled to a fixed | |
| 173 * height (we use 40 in these examples). | |
| 174 * For this,use either the scanned bitmap: | |
| 175 * L_Recog *recboot = recogCreateFromPixa(pixa, 0, 40, 0, 128, 1); | |
| 176 * or width-normalized lines (use width of 5 here): | |
| 177 * L_Recog *recboot = recogCreateFromPixa(pixa, 0, 40, 5, 128, 1); | |
| 178 * | |
| 179 * This can be used to train a new book adapted recognizer (BAC), on | |
| 180 * unlabeled data from, e.g., a book. To do this, the following is required: | |
| 181 * (1) the input images from the book must be scaled in the same | |
| 182 * way as those in the BSR, and | |
| 183 * (2) both the BSR and the input images must be set up to be either | |
| 184 * input scanned images or width-normalized lines. | |
| 185 * | |
| 186 * </pre> | |
| 187 */ | |
| 188 | |
| 189 #ifdef HAVE_CONFIG_H | |
| 190 #include <config_auto.h> | |
| 191 #endif /* HAVE_CONFIG_H */ | |
| 192 | |
| 193 #include <string.h> | |
| 194 #include "allheaders.h" | |
| 195 | |
| 196 static const l_int32 MaxExamplesInClass = 256; | |
| 197 | |
| 198 /* Default recog parameters that can be changed */ | |
| 199 static const l_int32 DefaultCharsetType = L_ARABIC_NUMERALS; | |
| 200 static const l_int32 DefaultMinNopad = 1; | |
| 201 static const l_float32 DefaultMaxWHRatio = 3.0f; /* max allowed w/h | |
| 202 ratio for a component to be split */ | |
| 203 static const l_float32 DefaultMaxHTRatio = 2.6f; /* max allowed ratio of | |
| 204 max/min unscaled averaged template heights */ | |
| 205 static const l_int32 DefaultThreshold = 150; /* for binarization */ | |
| 206 static const l_int32 DefaultMaxYShift = 1; /* for identification */ | |
| 207 | |
| 208 /* Static functions */ | |
| 209 static l_int32 recogGetCharsetSize(l_int32 type); | |
| 210 static l_int32 recogAddCharstrLabels(L_RECOG *recog); | |
| 211 static l_int32 recogAddAllSamples(L_RECOG **precog, PIXAA *paa, l_int32 debug); | |
| 212 | |
| 213 | |
| 214 /*------------------------------------------------------------------------* | |
| 215 * Recog: initialization and destruction * | |
| 216 *------------------------------------------------------------------------*/ | |
| 217 /*! | |
| 218 * \brief recogCreateFromRecog() | |
| 219 * | |
| 220 * \param[in] recs source recog with arbitrary input parameters | |
| 221 * \param[in] scalew scale all widths to this; use 0 otherwise | |
| 222 * \param[in] scaleh scale all heights to this; use 0 otherwise | |
| 223 * \param[in] linew width of normalized strokes; use 0 to skip | |
| 224 * \param[in] threshold for binarization; typically ~128 | |
| 225 * \param[in] maxyshift from nominal centroid alignment; default is 1 | |
| 226 * \return recd, or NULL on error | |
| 227 * | |
| 228 * <pre> | |
| 229 * Notes: | |
| 230 * (1) This is a convenience function that generates a recog using | |
| 231 * the unscaled training data in an existing recog. | |
| 232 * (2) It is recommended to use %maxyshift = 1 (the default value) | |
| 233 * (3) See recogCreate() for use of %scalew, %scaleh and %linew. | |
| 234 * </pre> | |
| 235 */ | |
| 236 L_RECOG * | |
| 237 recogCreateFromRecog(L_RECOG *recs, | |
| 238 l_int32 scalew, | |
| 239 l_int32 scaleh, | |
| 240 l_int32 linew, | |
| 241 l_int32 threshold, | |
| 242 l_int32 maxyshift) | |
| 243 { | |
| 244 L_RECOG *recd; | |
| 245 PIXA *pixa; | |
| 246 | |
| 247 if (!recs) | |
| 248 return (L_RECOG *)ERROR_PTR("recs not defined", __func__, NULL); | |
| 249 | |
| 250 pixa = recogExtractPixa(recs); | |
| 251 recd = recogCreateFromPixa(pixa, scalew, scaleh, linew, threshold, | |
| 252 maxyshift); | |
| 253 pixaDestroy(&pixa); | |
| 254 return recd; | |
| 255 } | |
| 256 | |
| 257 | |
| 258 /*! | |
| 259 * \brief recogCreateFromPixa() | |
| 260 * | |
| 261 * \param[in] pixa of labeled, 1 bpp images | |
| 262 * \param[in] scalew scale all widths to this; use 0 otherwise | |
| 263 * \param[in] scaleh scale all heights to this; use 0 otherwise | |
| 264 * \param[in] linew width of normalized strokes; use 0 to skip | |
| 265 * \param[in] threshold for binarization; typically ~150 | |
| 266 * \param[in] maxyshift from nominal centroid alignment; default is 1 | |
| 267 * \return recog, or NULL on error | |
| 268 * | |
| 269 * <pre> | |
| 270 * Notes: | |
| 271 * (1) This is a convenience function for training from labeled data. | |
| 272 * The pixa can be read from file. | |
| 273 * (2) The pixa should contain the unscaled bitmaps used for training. | |
| 274 * (3) See recogCreate() for use of %scalew, %scaleh and %linew. | |
| 275 * (4) It is recommended to use %maxyshift = 1 (the default value) | |
| 276 * (5) All examples in the same class (i.e., with the same character | |
| 277 * label) should be similar. They can be made similar by invoking | |
| 278 * recogRemoveOutliers[1,2]() on %pixa before calling this function. | |
| 279 * </pre> | |
| 280 */ | |
| 281 L_RECOG * | |
| 282 recogCreateFromPixa(PIXA *pixa, | |
| 283 l_int32 scalew, | |
| 284 l_int32 scaleh, | |
| 285 l_int32 linew, | |
| 286 l_int32 threshold, | |
| 287 l_int32 maxyshift) | |
| 288 { | |
| 289 L_RECOG *recog; | |
| 290 | |
| 291 if (!pixa) | |
| 292 return (L_RECOG *)ERROR_PTR("pixa not defined", __func__, NULL); | |
| 293 | |
| 294 recog = recogCreateFromPixaNoFinish(pixa, scalew, scaleh, linew, | |
| 295 threshold, maxyshift); | |
| 296 if (!recog) | |
| 297 return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL); | |
| 298 | |
| 299 recogTrainingFinished(&recog, 1, -1, -1.0); | |
| 300 if (!recog) | |
| 301 return (L_RECOG *)ERROR_PTR("bad templates", __func__, NULL); | |
| 302 return recog; | |
| 303 } | |
| 304 | |
| 305 | |
| 306 /*! | |
| 307 * \brief recogCreateFromPixaNoFinish() | |
| 308 * | |
| 309 * \param[in] pixa of labeled, 1 bpp images | |
| 310 * \param[in] scalew scale all widths to this; use 0 otherwise | |
| 311 * \param[in] scaleh scale all heights to this; use 0 otherwise | |
| 312 * \param[in] linew width of normalized strokes; use 0 to skip | |
| 313 * \param[in] threshold for binarization; typically ~150 | |
| 314 * \param[in] maxyshift from nominal centroid alignment; default is 1 | |
| 315 * \return recog, or NULL on error | |
| 316 * | |
| 317 * <pre> | |
| 318 * Notes: | |
| 319 * (1) See recogCreateFromPixa() for details. | |
| 320 * (2) This is also used to generate a pixaa with templates | |
| 321 * in each class within a pixa. For that, all args except for | |
| 322 * %pixa are ignored. | |
| 323 * </pre> | |
| 324 */ | |
| 325 L_RECOG * | |
| 326 recogCreateFromPixaNoFinish(PIXA *pixa, | |
| 327 l_int32 scalew, | |
| 328 l_int32 scaleh, | |
| 329 l_int32 linew, | |
| 330 l_int32 threshold, | |
| 331 l_int32 maxyshift) | |
| 332 { | |
| 333 char *text; | |
| 334 l_int32 full, n, i, ntext, same, maxd; | |
| 335 PIX *pix; | |
| 336 L_RECOG *recog; | |
| 337 | |
| 338 if (!pixa) | |
| 339 return (L_RECOG *)ERROR_PTR("pixa not defined", __func__, NULL); | |
| 340 pixaVerifyDepth(pixa, &same, &maxd); | |
| 341 if (maxd > 1) | |
| 342 return (L_RECOG *)ERROR_PTR("not all pix are 1 bpp", __func__, NULL); | |
| 343 | |
| 344 pixaIsFull(pixa, &full, NULL); | |
| 345 if (!full) | |
| 346 return (L_RECOG *)ERROR_PTR("not all pix are present", __func__, NULL); | |
| 347 | |
| 348 n = pixaGetCount(pixa); | |
| 349 pixaCountText(pixa, &ntext); | |
| 350 if (ntext == 0) | |
| 351 return (L_RECOG *)ERROR_PTR("no pix have text strings", __func__, NULL); | |
| 352 if (ntext < n) | |
| 353 L_ERROR("%d text strings < %d pix\n", __func__, ntext, n); | |
| 354 | |
| 355 recog = recogCreate(scalew, scaleh, linew, threshold, maxyshift); | |
| 356 if (!recog) | |
| 357 return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL); | |
| 358 for (i = 0; i < n; i++) { | |
| 359 pix = pixaGetPix(pixa, i, L_CLONE); | |
| 360 text = pixGetText(pix); | |
| 361 if (!text || strlen(text) == 0) { | |
| 362 L_ERROR("pix[%d] has no text\n", __func__, i); | |
| 363 pixDestroy(&pix); | |
| 364 continue; | |
| 365 } | |
| 366 recogTrainLabeled(recog, pix, NULL, text, 0); | |
| 367 pixDestroy(&pix); | |
| 368 } | |
| 369 | |
| 370 return recog; | |
| 371 } | |
| 372 | |
| 373 | |
| 374 /*! | |
| 375 * \brief recogCreate() | |
| 376 * | |
| 377 * \param[in] scalew scale all widths to this; use 0 otherwise | |
| 378 * \param[in] scaleh scale all heights to this; use 0 otherwise | |
| 379 * \param[in] linew width of normalized strokes; use 0 to skip | |
| 380 * \param[in] threshold for binarization; typically ~128; 0 for default | |
| 381 * \param[in] maxyshift from nominal centroid alignment; default is 1 | |
| 382 * \return recog, or NULL on error | |
| 383 * | |
| 384 * <pre> | |
| 385 * Notes: | |
| 386 * (1) If %scalew == 0 and %scaleh == 0, no scaling is done. | |
| 387 * If one of these is 0 and the other is > 0, scaling is isotropic | |
| 388 * to the requested size. We typically do not set both > 0. | |
| 389 * (2) Use linew > 0 to convert the templates to images with fixed | |
| 390 * width strokes. linew == 0 skips the conversion. | |
| 391 * (3) The only valid values for %maxyshift are 0, 1 and 2. | |
| 392 * It is recommended to use %maxyshift == 1 (default value). | |
| 393 * Using %maxyshift == 0 is much faster than %maxyshift == 1, but | |
| 394 * it is much less likely to find the template with the best | |
| 395 * correlation. Use of anything but 1 results in a warning. | |
| 396 * (4) Scaling is used for finding outliers and for training a | |
| 397 * book-adapted recognizer (BAR) from a bootstrap recognizer (BSR). | |
| 398 * Scaling the height to a fixed value and scaling the width | |
| 399 * accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended. | |
| 400 * (5) The storage for most of the arrays is allocated when training | |
| 401 * is finished. | |
| 402 * </pre> | |
| 403 */ | |
| 404 L_RECOG * | |
| 405 recogCreate(l_int32 scalew, | |
| 406 l_int32 scaleh, | |
| 407 l_int32 linew, | |
| 408 l_int32 threshold, | |
| 409 l_int32 maxyshift) | |
| 410 { | |
| 411 L_RECOG *recog; | |
| 412 | |
| 413 if (scalew < 0 || scaleh < 0) | |
| 414 return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", __func__, NULL); | |
| 415 if (linew > 10) | |
| 416 return (L_RECOG *)ERROR_PTR("invalid linew > 10", __func__, NULL); | |
| 417 if (threshold == 0) threshold = DefaultThreshold; | |
| 418 if (threshold < 0 || threshold > 255) { | |
| 419 L_WARNING("invalid threshold; using default\n", __func__); | |
| 420 threshold = DefaultThreshold; | |
| 421 } | |
| 422 if (maxyshift < 0 || maxyshift > 2) { | |
| 423 L_WARNING("invalid maxyshift; using default value\n", __func__); | |
| 424 maxyshift = DefaultMaxYShift; | |
| 425 } else if (maxyshift == 0) { | |
| 426 L_WARNING("Using maxyshift = 0; faster, worse correlation results\n", | |
| 427 __func__); | |
| 428 } else if (maxyshift == 2) { | |
| 429 L_WARNING("Using maxyshift = 2; slower\n", __func__); | |
| 430 } | |
| 431 | |
| 432 recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG)); | |
| 433 recog->templ_use = L_USE_ALL_TEMPLATES; /* default */ | |
| 434 recog->threshold = threshold; | |
| 435 recog->scalew = scalew; | |
| 436 recog->scaleh = scaleh; | |
| 437 recog->linew = linew; | |
| 438 recog->maxyshift = maxyshift; | |
| 439 recogSetParams(recog, 1, -1, -1.0, -1.0); | |
| 440 recog->bmf = bmfCreate(NULL, 6); | |
| 441 recog->bmf_size = 6; | |
| 442 recog->maxarraysize = MaxExamplesInClass; | |
| 443 | |
| 444 /* Generate the LUTs */ | |
| 445 recog->centtab = makePixelCentroidTab8(); | |
| 446 recog->sumtab = makePixelSumTab8(); | |
| 447 recog->sa_text = sarrayCreate(0); | |
| 448 recog->dna_tochar = l_dnaCreate(0); | |
| 449 | |
| 450 /* Input default values for min component size for splitting. | |
| 451 * These are overwritten when pixTrainingFinished() is called. */ | |
| 452 recog->min_splitw = 6; | |
| 453 recog->max_splith = 60; | |
| 454 | |
| 455 /* Allocate the paa for the unscaled training bitmaps */ | |
| 456 recog->pixaa_u = pixaaCreate(recog->maxarraysize); | |
| 457 | |
| 458 /* Generate the storage for debugging */ | |
| 459 recog->pixadb_boot = pixaCreate(2); | |
| 460 recog->pixadb_split = pixaCreate(2); | |
| 461 return recog; | |
| 462 } | |
| 463 | |
| 464 | |
| 465 /*! | |
| 466 * \brief recogDestroy() | |
| 467 * | |
| 468 * \param[in,out] precog will be set to null before returning | |
| 469 * \return void | |
| 470 */ | |
| 471 void | |
| 472 recogDestroy(L_RECOG **precog) | |
| 473 { | |
| 474 L_RECOG *recog; | |
| 475 | |
| 476 if (!precog) { | |
| 477 L_WARNING("ptr address is null\n", __func__); | |
| 478 return; | |
| 479 } | |
| 480 | |
| 481 if ((recog = *precog) == NULL) return; | |
| 482 | |
| 483 LEPT_FREE(recog->centtab); | |
| 484 LEPT_FREE(recog->sumtab); | |
| 485 sarrayDestroy(&recog->sa_text); | |
| 486 l_dnaDestroy(&recog->dna_tochar); | |
| 487 pixaaDestroy(&recog->pixaa_u); | |
| 488 pixaDestroy(&recog->pixa_u); | |
| 489 ptaaDestroy(&recog->ptaa_u); | |
| 490 ptaDestroy(&recog->pta_u); | |
| 491 numaDestroy(&recog->nasum_u); | |
| 492 numaaDestroy(&recog->naasum_u); | |
| 493 pixaaDestroy(&recog->pixaa); | |
| 494 pixaDestroy(&recog->pixa); | |
| 495 ptaaDestroy(&recog->ptaa); | |
| 496 ptaDestroy(&recog->pta); | |
| 497 numaDestroy(&recog->nasum); | |
| 498 numaaDestroy(&recog->naasum); | |
| 499 pixaDestroy(&recog->pixa_tr); | |
| 500 pixaDestroy(&recog->pixadb_ave); | |
| 501 pixaDestroy(&recog->pixa_id); | |
| 502 pixDestroy(&recog->pixdb_ave); | |
| 503 pixDestroy(&recog->pixdb_range); | |
| 504 pixaDestroy(&recog->pixadb_boot); | |
| 505 pixaDestroy(&recog->pixadb_split); | |
| 506 bmfDestroy(&recog->bmf); | |
| 507 rchDestroy(&recog->rch); | |
| 508 rchaDestroy(&recog->rcha); | |
| 509 recogDestroyDid(recog); | |
| 510 LEPT_FREE(recog); | |
| 511 *precog = NULL; | |
| 512 } | |
| 513 | |
| 514 | |
| 515 /*------------------------------------------------------------------------* | |
| 516 * Recog accessors * | |
| 517 *------------------------------------------------------------------------*/ | |
| 518 /*! | |
| 519 * \brief recogGetCount() | |
| 520 * | |
| 521 * \param[in] recog | |
| 522 * \return count of classes in recog; 0 if no recog or on error | |
| 523 */ | |
| 524 l_int32 | |
| 525 recogGetCount(L_RECOG *recog) | |
| 526 { | |
| 527 if (!recog) | |
| 528 return ERROR_INT("recog not defined", __func__, 0); | |
| 529 return recog->setsize; | |
| 530 } | |
| 531 | |
| 532 | |
| 533 /*! | |
| 534 * \brief recogSetParams() | |
| 535 * | |
| 536 * \param[in] recog to be padded, if necessary | |
| 537 * \param[in] type type of char set; -1 for default; | |
| 538 * see enum in recog.h | |
| 539 * \param[in] min_nopad min number in a class without padding; | |
| 540 * use -1 for default | |
| 541 * \param[in] max_wh_ratio max width/height ratio allowed for splitting; | |
| 542 * use -1.0 for default | |
| 543 * \param[in] max_ht_ratio max of max/min averaged template height ratio; | |
| 544 * use -1.0 for default | |
| 545 * \return 0 if OK, 1 on error | |
| 546 * | |
| 547 * <pre> | |
| 548 * Notes: | |
| 549 * (1) This is called when a recog is created. | |
| 550 * (2) Default %min_nopad value allows for some padding. | |
| 551 * To disable padding, set %min_nopad = 0. To pad only when | |
| 552 * no samples are available for the class, set %min_nopad = 1. | |
| 553 * (3) The %max_wh_ratio limits the width/height ratio for components | |
| 554 * that we attempt to split. Splitting long components is expensive. | |
| 555 * (4) The %max_ht_ratio is a quality requirement on the training data. | |
| 556 * The recognizer will not run if the averages are computed and | |
| 557 * the templates do not satisfy it. | |
| 558 * </pre> | |
| 559 */ | |
| 560 l_ok | |
| 561 recogSetParams(L_RECOG *recog, | |
| 562 l_int32 type, | |
| 563 l_int32 min_nopad, | |
| 564 l_float32 max_wh_ratio, | |
| 565 l_float32 max_ht_ratio) | |
| 566 { | |
| 567 if (!recog) | |
| 568 return ERROR_INT("recog not defined", __func__, 1); | |
| 569 | |
| 570 recog->charset_type = (type >= 0) ? type : DefaultCharsetType; | |
| 571 recog->charset_size = recogGetCharsetSize(recog->charset_type); | |
| 572 recog->min_nopad = (min_nopad >= 0) ? min_nopad : DefaultMinNopad; | |
| 573 recog->max_wh_ratio = (max_wh_ratio > 0.0) ? max_wh_ratio : | |
| 574 DefaultMaxWHRatio; | |
| 575 recog->max_ht_ratio = (max_ht_ratio > 1.0) ? max_ht_ratio : | |
| 576 DefaultMaxHTRatio; | |
| 577 return 0; | |
| 578 } | |
| 579 | |
| 580 | |
| 581 /*! | |
| 582 * \brief recogGetCharsetSize() | |
| 583 * | |
| 584 * \param[in] type of charset | |
| 585 * \return size of charset, or 0 if unknown or on error | |
| 586 */ | |
| 587 static l_int32 | |
| 588 recogGetCharsetSize(l_int32 type) | |
| 589 { | |
| 590 switch (type) { | |
| 591 case L_UNKNOWN: | |
| 592 return 0; | |
| 593 case L_ARABIC_NUMERALS: | |
| 594 return 10; | |
| 595 case L_LC_ROMAN_NUMERALS: | |
| 596 return 7; | |
| 597 case L_UC_ROMAN_NUMERALS: | |
| 598 return 7; | |
| 599 case L_LC_ALPHA: | |
| 600 return 26; | |
| 601 case L_UC_ALPHA: | |
| 602 return 26; | |
| 603 default: | |
| 604 L_ERROR("invalid charset_type %d\n", __func__, type); | |
| 605 return 0; | |
| 606 } | |
| 607 return 0; /* shouldn't happen */ | |
| 608 } | |
| 609 | |
| 610 | |
| 611 /*------------------------------------------------------------------------* | |
| 612 * Character/index lookup * | |
| 613 *------------------------------------------------------------------------*/ | |
| 614 /*! | |
| 615 * \brief recogGetClassIndex() | |
| 616 * | |
| 617 * \param[in] recog with LUT's pre-computed | |
| 618 * \param[in] val integer value; can be up to 4 bytes for UTF-8 | |
| 619 * \param[in] text text from which %val was derived; used if not found | |
| 620 * \param[out] pindex index into dna_tochar | |
| 621 * \return 0 if found; 1 if not found and added; 2 on error. | |
| 622 * | |
| 623 * <pre> | |
| 624 * Notes: | |
| 625 * (1) This is used during training. There is one entry in | |
| 626 * recog->dna_tochar (integer value, e.g., ascii) and | |
| 627 * one in recog->sa_text (e.g, ascii letter in a string) | |
| 628 * for each character class. | |
| 629 * (2) This searches the dna character array for %val. If it is | |
| 630 * not found, the template represents a character class not | |
| 631 * already seen: it increments setsize (the number of character | |
| 632 * classes) by 1, and augments both the index (dna_tochar) | |
| 633 * and text (sa_text) arrays. | |
| 634 * (3) Returns the index in &index, except on error. | |
| 635 * (4) Caller must check the function return value. | |
| 636 * </pre> | |
| 637 */ | |
| 638 l_int32 | |
| 639 recogGetClassIndex(L_RECOG *recog, | |
| 640 l_int32 val, | |
| 641 char *text, | |
| 642 l_int32 *pindex) | |
| 643 { | |
| 644 l_int32 i, n, ival; | |
| 645 | |
| 646 if (!pindex) | |
| 647 return ERROR_INT("&index not defined", __func__, 2); | |
| 648 *pindex = -1; | |
| 649 if (!recog) | |
| 650 return ERROR_INT("recog not defined", __func__, 2); | |
| 651 if (!text) | |
| 652 return ERROR_INT("text not defined", __func__, 2); | |
| 653 | |
| 654 /* Search existing characters */ | |
| 655 n = l_dnaGetCount(recog->dna_tochar); | |
| 656 for (i = 0; i < n; i++) { | |
| 657 l_dnaGetIValue(recog->dna_tochar, i, &ival); | |
| 658 if (val == ival) { /* found */ | |
| 659 *pindex = i; | |
| 660 return 0; | |
| 661 } | |
| 662 } | |
| 663 | |
| 664 /* If not found... */ | |
| 665 l_dnaAddNumber(recog->dna_tochar, val); | |
| 666 sarrayAddString(recog->sa_text, text, L_COPY); | |
| 667 recog->setsize++; | |
| 668 *pindex = n; | |
| 669 return 1; | |
| 670 } | |
| 671 | |
| 672 | |
| 673 /*! | |
| 674 * \brief recogStringToIndex() | |
| 675 * | |
| 676 * \param[in] recog | |
| 677 * \param[in] text text string for some class | |
| 678 * \param[out] pindex index for that class; -1 if not found | |
| 679 * \return 0 if OK, 1 on error not finding the string is an error | |
| 680 */ | |
| 681 l_ok | |
| 682 recogStringToIndex(L_RECOG *recog, | |
| 683 char *text, | |
| 684 l_int32 *pindex) | |
| 685 { | |
| 686 char *charstr; | |
| 687 l_int32 i, n, diff; | |
| 688 | |
| 689 if (!pindex) | |
| 690 return ERROR_INT("&index not defined", __func__, 1); | |
| 691 *pindex = -1; | |
| 692 if (!recog) | |
| 693 return ERROR_INT("recog not defined", __func__, 1); | |
| 694 if (!text) | |
| 695 return ERROR_INT("text not defined", __func__, 1); | |
| 696 | |
| 697 /* Search existing characters */ | |
| 698 n = recog->setsize; | |
| 699 for (i = 0; i < n; i++) { | |
| 700 recogGetClassString(recog, i, &charstr); | |
| 701 if (!charstr) { | |
| 702 L_ERROR("string not found for index %d\n", __func__, i); | |
| 703 continue; | |
| 704 } | |
| 705 diff = strcmp(text, charstr); | |
| 706 LEPT_FREE(charstr); | |
| 707 if (diff) continue; | |
| 708 *pindex = i; | |
| 709 return 0; | |
| 710 } | |
| 711 | |
| 712 return 1; /* not found */ | |
| 713 } | |
| 714 | |
| 715 | |
| 716 /*! | |
| 717 * \brief recogGetClassString() | |
| 718 * | |
| 719 * \param[in] recog | |
| 720 * \param[in] index into array of char types | |
| 721 * \param[out] pcharstr string representation; | |
| 722 * returns an empty string on error | |
| 723 * \return 0 if found, 1 on error | |
| 724 * | |
| 725 * <pre> | |
| 726 * Notes: | |
| 727 * (1) Extracts a copy of the string from sa_text, which | |
| 728 * the caller must free. | |
| 729 * (2) Caller must check the function return value. | |
| 730 * </pre> | |
| 731 */ | |
| 732 l_int32 | |
| 733 recogGetClassString(L_RECOG *recog, | |
| 734 l_int32 index, | |
| 735 char **pcharstr) | |
| 736 { | |
| 737 if (!pcharstr) | |
| 738 return ERROR_INT("&charstr not defined", __func__, 1); | |
| 739 *pcharstr = stringNew(""); | |
| 740 if (!recog) | |
| 741 return ERROR_INT("recog not defined", __func__, 2); | |
| 742 | |
| 743 if (index < 0 || index >= recog->setsize) | |
| 744 return ERROR_INT("invalid index", __func__, 1); | |
| 745 LEPT_FREE(*pcharstr); | |
| 746 *pcharstr = sarrayGetString(recog->sa_text, index, L_COPY); | |
| 747 return 0; | |
| 748 } | |
| 749 | |
| 750 | |
| 751 /*! | |
| 752 * \brief l_convertCharstrToInt() | |
| 753 * | |
| 754 * \param[in] str input string representing one UTF-8 character; | |
| 755 * not more than 4 bytes | |
| 756 * \param[out] pval integer value for the input. Think of it | |
| 757 * as a 1-to-1 hash code. | |
| 758 * \return 0 if OK, 1 on error | |
| 759 */ | |
| 760 l_ok | |
| 761 l_convertCharstrToInt(const char *str, | |
| 762 l_int32 *pval) | |
| 763 { | |
| 764 l_int32 size; | |
| 765 l_uint32 val; | |
| 766 | |
| 767 if (!pval) | |
| 768 return ERROR_INT("&val not defined", __func__, 1); | |
| 769 *pval = 0; | |
| 770 if (!str) | |
| 771 return ERROR_INT("str not defined", __func__, 1); | |
| 772 size = strlen(str); | |
| 773 if (size == 0) | |
| 774 return ERROR_INT("empty string", __func__, 1); | |
| 775 if (size > 4) | |
| 776 return ERROR_INT("invalid string: > 4 bytes", __func__, 1); | |
| 777 | |
| 778 val = (l_uint8)str[0]; | |
| 779 if (size > 1) | |
| 780 val = (val << 8) + (l_uint8)str[1]; | |
| 781 if (size > 2) | |
| 782 val = (val << 8) + (l_uint8)str[2]; | |
| 783 if (size > 3) | |
| 784 val = (val << 8) + (l_uint8)str[3]; | |
| 785 *pval = (l_int32)(val & 0x7fffffff); | |
| 786 return 0; | |
| 787 } | |
| 788 | |
| 789 | |
| 790 /*------------------------------------------------------------------------* | |
| 791 * Serialization * | |
| 792 *------------------------------------------------------------------------*/ | |
| 793 /*! | |
| 794 * \brief recogRead() | |
| 795 * | |
| 796 * \param[in] filename | |
| 797 * \return recog, or NULL on error | |
| 798 * | |
| 799 * <pre> | |
| 800 * Notes: | |
| 801 * (1) When a recog is serialized, a pixaa of the templates that are | |
| 802 * actually used for correlation is saved in the pixaa_u array | |
| 803 * of the recog. These can be different from the templates that | |
| 804 * were used to generate the recog, because those original templates | |
| 805 * can be scaled and turned into normalized lines. When recog1 | |
| 806 * is deserialized to recog2, these templates are put in both the | |
| 807 * unscaled array (pixaa_u) and the modified array (pixaa) in recog2. | |
| 808 * Why not put it in only the unscaled array and let | |
| 809 * recogTrainingFinalized() regenerate the modified templates? | |
| 810 * The reason is that with normalized lines, the operation of | |
| 811 * thinning to a skeleton and dilating back to a fixed width | |
| 812 * is not idempotent. Thinning to a skeleton saves pixels at | |
| 813 * the end of a line segment, and thickening the skeleton puts | |
| 814 * additional pixels at the end of the lines. This tends to | |
| 815 * close gaps. | |
| 816 * </pre> | |
| 817 */ | |
| 818 L_RECOG * | |
| 819 recogRead(const char *filename) | |
| 820 { | |
| 821 FILE *fp; | |
| 822 L_RECOG *recog; | |
| 823 | |
| 824 if (!filename) | |
| 825 return (L_RECOG *)ERROR_PTR("filename not defined", __func__, NULL); | |
| 826 if ((fp = fopenReadStream(filename)) == NULL) | |
| 827 return (L_RECOG *)ERROR_PTR_1("stream not opened", | |
| 828 filename, __func__, NULL); | |
| 829 | |
| 830 if ((recog = recogReadStream(fp)) == NULL) { | |
| 831 fclose(fp); | |
| 832 return (L_RECOG *)ERROR_PTR_1("recog not read", | |
| 833 filename, __func__, NULL); | |
| 834 } | |
| 835 | |
| 836 fclose(fp); | |
| 837 return recog; | |
| 838 } | |
| 839 | |
| 840 | |
| 841 /*! | |
| 842 * \brief recogReadStream() | |
| 843 * | |
| 844 * \param[in] fp file stream | |
| 845 * \return recog, or NULL on error | |
| 846 */ | |
| 847 L_RECOG * | |
| 848 recogReadStream(FILE *fp) | |
| 849 { | |
| 850 l_int32 version, setsize, threshold, scalew, scaleh, linew; | |
| 851 l_int32 maxyshift, nc; | |
| 852 L_DNA *dna_tochar; | |
| 853 PIXAA *paa; | |
| 854 L_RECOG *recog; | |
| 855 SARRAY *sa_text; | |
| 856 | |
| 857 if (!fp) | |
| 858 return (L_RECOG *)ERROR_PTR("stream not defined", __func__, NULL); | |
| 859 | |
| 860 if (fscanf(fp, "\nRecog Version %d\n", &version) != 1) | |
| 861 return (L_RECOG *)ERROR_PTR("not a recog file", __func__, NULL); | |
| 862 if (version != RECOG_VERSION_NUMBER) | |
| 863 return (L_RECOG *)ERROR_PTR("invalid recog version", __func__, NULL); | |
| 864 if (fscanf(fp, "Size of character set = %d\n", &setsize) != 1) | |
| 865 return (L_RECOG *)ERROR_PTR("setsize not read", __func__, NULL); | |
| 866 if (fscanf(fp, "Binarization threshold = %d\n", &threshold) != 1) | |
| 867 return (L_RECOG *)ERROR_PTR("binary thresh not read", __func__, NULL); | |
| 868 if (fscanf(fp, "Maxyshift = %d\n", &maxyshift) != 1) | |
| 869 return (L_RECOG *)ERROR_PTR("maxyshift not read", __func__, NULL); | |
| 870 if (fscanf(fp, "Scale to width = %d\n", &scalew) != 1) | |
| 871 return (L_RECOG *)ERROR_PTR("width not read", __func__, NULL); | |
| 872 if (fscanf(fp, "Scale to height = %d\n", &scaleh) != 1) | |
| 873 return (L_RECOG *)ERROR_PTR("height not read", __func__, NULL); | |
| 874 if (fscanf(fp, "Normalized line width = %d\n", &linew) != 1) | |
| 875 return (L_RECOG *)ERROR_PTR("line width not read", __func__, NULL); | |
| 876 if ((recog = recogCreate(scalew, scaleh, linew, threshold, | |
| 877 maxyshift)) == NULL) | |
| 878 return (L_RECOG *)ERROR_PTR("recog not made", __func__, NULL); | |
| 879 | |
| 880 if (fscanf(fp, "\nLabels for character set:\n") == -1) { | |
| 881 recogDestroy(&recog); | |
| 882 return (L_RECOG *)ERROR_PTR("label intro not read", __func__, NULL); | |
| 883 } | |
| 884 l_dnaDestroy(&recog->dna_tochar); | |
| 885 if ((dna_tochar = l_dnaReadStream(fp)) == NULL) { | |
| 886 recogDestroy(&recog); | |
| 887 return (L_RECOG *)ERROR_PTR("dna_tochar not read", __func__, NULL); | |
| 888 } | |
| 889 recog->dna_tochar = dna_tochar; | |
| 890 sarrayDestroy(&recog->sa_text); | |
| 891 if ((sa_text = sarrayReadStream(fp)) == NULL) { | |
| 892 recogDestroy(&recog); | |
| 893 return (L_RECOG *)ERROR_PTR("sa_text not read", __func__, NULL); | |
| 894 } | |
| 895 recog->sa_text = sa_text; | |
| 896 | |
| 897 if (fscanf(fp, "\nPixaa of all samples in the training set:\n") == -1) { | |
| 898 recogDestroy(&recog); | |
| 899 return (L_RECOG *)ERROR_PTR("pixaa intro not read", __func__, NULL); | |
| 900 } | |
| 901 if ((paa = pixaaReadStream(fp)) == NULL) { | |
| 902 recogDestroy(&recog); | |
| 903 return (L_RECOG *)ERROR_PTR("pixaa not read", __func__, NULL); | |
| 904 } | |
| 905 recog->setsize = setsize; | |
| 906 nc = pixaaGetCount(paa, NULL); | |
| 907 if (nc != setsize) { | |
| 908 recogDestroy(&recog); | |
| 909 pixaaDestroy(&paa); | |
| 910 L_ERROR("(setsize = %d) != (paa count = %d)\n", __func__, | |
| 911 setsize, nc); | |
| 912 return NULL; | |
| 913 } | |
| 914 | |
| 915 recogAddAllSamples(&recog, paa, 0); /* this finishes */ | |
| 916 pixaaDestroy(&paa); | |
| 917 if (!recog) | |
| 918 return (L_RECOG *)ERROR_PTR("bad templates", __func__, NULL); | |
| 919 return recog; | |
| 920 } | |
| 921 | |
| 922 | |
| 923 /*! | |
| 924 * \brief recogReadMem() | |
| 925 * | |
| 926 * \param[in] data serialization of recog (not ascii) | |
| 927 * \param[in] size of data in bytes | |
| 928 * \return recog, or NULL on error | |
| 929 */ | |
| 930 L_RECOG * | |
| 931 recogReadMem(const l_uint8 *data, | |
| 932 size_t size) | |
| 933 { | |
| 934 FILE *fp; | |
| 935 L_RECOG *recog; | |
| 936 | |
| 937 if (!data) | |
| 938 return (L_RECOG *)ERROR_PTR("data not defined", __func__, NULL); | |
| 939 if ((fp = fopenReadFromMemory(data, size)) == NULL) | |
| 940 return (L_RECOG *)ERROR_PTR("stream not opened", __func__, NULL); | |
| 941 | |
| 942 recog = recogReadStream(fp); | |
| 943 fclose(fp); | |
| 944 if (!recog) L_ERROR("recog not read\n", __func__); | |
| 945 return recog; | |
| 946 } | |
| 947 | |
| 948 | |
| 949 /*! | |
| 950 * \brief recogWrite() | |
| 951 * | |
| 952 * \param[in] filename | |
| 953 * \param[in] recog | |
| 954 * \return 0 if OK, 1 on error | |
| 955 * | |
| 956 * <pre> | |
| 957 * Notes: | |
| 958 * (1) The pixaa of templates that is written is the modified one | |
| 959 * in the pixaa field. It is the pixaa that is actually used | |
| 960 * for correlation. This is not the unscaled array of labeled | |
| 961 * bitmaps, in pixaa_u, that was used to generate the recog in the | |
| 962 * first place. See the notes in recogRead() for the rationale. | |
| 963 * </pre> | |
| 964 */ | |
| 965 l_ok | |
| 966 recogWrite(const char *filename, | |
| 967 L_RECOG *recog) | |
| 968 { | |
| 969 l_int32 ret; | |
| 970 FILE *fp; | |
| 971 | |
| 972 if (!filename) | |
| 973 return ERROR_INT("filename not defined", __func__, 1); | |
| 974 if (!recog) | |
| 975 return ERROR_INT("recog not defined", __func__, 1); | |
| 976 | |
| 977 if ((fp = fopenWriteStream(filename, "wb")) == NULL) | |
| 978 return ERROR_INT_1("stream not opened", filename, __func__, 1); | |
| 979 ret = recogWriteStream(fp, recog); | |
| 980 fclose(fp); | |
| 981 if (ret) | |
| 982 return ERROR_INT_1("recog not written to stream", | |
| 983 filename, __func__, 1); | |
| 984 return 0; | |
| 985 } | |
| 986 | |
| 987 | |
| 988 /*! | |
| 989 * \brief recogWriteStream() | |
| 990 * | |
| 991 * \param[in] fp file stream opened for "wb" | |
| 992 * \param[in] recog | |
| 993 * \return 0 if OK, 1 on error | |
| 994 */ | |
| 995 l_ok | |
| 996 recogWriteStream(FILE *fp, | |
| 997 L_RECOG *recog) | |
| 998 { | |
| 999 if (!fp) | |
| 1000 return ERROR_INT("stream not defined", __func__, 1); | |
| 1001 if (!recog) | |
| 1002 return ERROR_INT("recog not defined", __func__, 1); | |
| 1003 | |
| 1004 fprintf(fp, "\nRecog Version %d\n", RECOG_VERSION_NUMBER); | |
| 1005 fprintf(fp, "Size of character set = %d\n", recog->setsize); | |
| 1006 fprintf(fp, "Binarization threshold = %d\n", recog->threshold); | |
| 1007 fprintf(fp, "Maxyshift = %d\n", recog->maxyshift); | |
| 1008 fprintf(fp, "Scale to width = %d\n", recog->scalew); | |
| 1009 fprintf(fp, "Scale to height = %d\n", recog->scaleh); | |
| 1010 fprintf(fp, "Normalized line width = %d\n", recog->linew); | |
| 1011 fprintf(fp, "\nLabels for character set:\n"); | |
| 1012 l_dnaWriteStream(fp, recog->dna_tochar); | |
| 1013 sarrayWriteStream(fp, recog->sa_text); | |
| 1014 fprintf(fp, "\nPixaa of all samples in the training set:\n"); | |
| 1015 pixaaWriteStream(fp, recog->pixaa); | |
| 1016 | |
| 1017 return 0; | |
| 1018 } | |
| 1019 | |
| 1020 | |
| 1021 /*! | |
| 1022 * \brief recogWriteMem() | |
| 1023 * | |
| 1024 * \param[out] pdata data of serialized recog (not ascii) | |
| 1025 * \param[out] psize size of returned data | |
| 1026 * \param[in] recog | |
| 1027 * \return 0 if OK, 1 on error | |
| 1028 * | |
| 1029 * <pre> | |
| 1030 * Notes: | |
| 1031 * (1) Serializes a recog in memory and puts the result in a buffer. | |
| 1032 * </pre> | |
| 1033 */ | |
| 1034 l_ok | |
| 1035 recogWriteMem(l_uint8 **pdata, | |
| 1036 size_t *psize, | |
| 1037 L_RECOG *recog) | |
| 1038 { | |
| 1039 l_int32 ret; | |
| 1040 FILE *fp; | |
| 1041 | |
| 1042 if (pdata) *pdata = NULL; | |
| 1043 if (psize) *psize = 0; | |
| 1044 if (!pdata) | |
| 1045 return ERROR_INT("&data not defined", __func__, 1); | |
| 1046 if (!psize) | |
| 1047 return ERROR_INT("&size not defined", __func__, 1); | |
| 1048 if (!recog) | |
| 1049 return ERROR_INT("recog not defined", __func__, 1); | |
| 1050 | |
| 1051 #if HAVE_FMEMOPEN | |
| 1052 if ((fp = open_memstream((char **)pdata, psize)) == NULL) | |
| 1053 return ERROR_INT("stream not opened", __func__, 1); | |
| 1054 ret = recogWriteStream(fp, recog); | |
| 1055 fputc('\0', fp); | |
| 1056 fclose(fp); | |
| 1057 if (*psize > 0) *psize = *psize - 1; | |
| 1058 #else | |
| 1059 L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__); | |
| 1060 #ifdef _WIN32 | |
| 1061 if ((fp = fopenWriteWinTempfile()) == NULL) | |
| 1062 return ERROR_INT("tmpfile stream not opened", __func__, 1); | |
| 1063 #else | |
| 1064 if ((fp = tmpfile()) == NULL) | |
| 1065 return ERROR_INT("tmpfile stream not opened", __func__, 1); | |
| 1066 #endif /* _WIN32 */ | |
| 1067 ret = recogWriteStream(fp, recog); | |
| 1068 rewind(fp); | |
| 1069 *pdata = l_binaryReadStream(fp, psize); | |
| 1070 fclose(fp); | |
| 1071 #endif /* HAVE_FMEMOPEN */ | |
| 1072 return ret; | |
| 1073 } | |
| 1074 | |
| 1075 | |
| 1076 /*! | |
| 1077 * \brief recogExtractPixa() | |
| 1078 * | |
| 1079 * \param[in] recog | |
| 1080 * \return pixa if OK, NULL on error | |
| 1081 * | |
| 1082 * <pre> | |
| 1083 * Notes: | |
| 1084 * (1) This generates a pixa of all the unscaled images in the | |
| 1085 * recognizer, where each one has its character class label in | |
| 1086 * the pix text field, by flattening pixaa_u to a pixa. | |
| 1087 * </pre> | |
| 1088 */ | |
| 1089 PIXA * | |
| 1090 recogExtractPixa(L_RECOG *recog) | |
| 1091 { | |
| 1092 if (!recog) | |
| 1093 return (PIXA *)ERROR_PTR("recog not defined", __func__, NULL); | |
| 1094 | |
| 1095 recogAddCharstrLabels(recog); | |
| 1096 return pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE); | |
| 1097 } | |
| 1098 | |
| 1099 | |
| 1100 /*! | |
| 1101 * \brief recogAddCharstrLabels() | |
| 1102 * | |
| 1103 * \param[in] recog | |
| 1104 * \return 0 if OK, 1 on error | |
| 1105 */ | |
| 1106 static l_int32 | |
| 1107 recogAddCharstrLabels(L_RECOG *recog) | |
| 1108 { | |
| 1109 char *text; | |
| 1110 l_int32 i, j, n1, n2; | |
| 1111 PIX *pix; | |
| 1112 PIXA *pixa; | |
| 1113 PIXAA *paa; | |
| 1114 | |
| 1115 if (!recog) | |
| 1116 return ERROR_INT("recog not defined", __func__, 1); | |
| 1117 | |
| 1118 /* Add the labels to each unscaled pix */ | |
| 1119 paa = recog->pixaa_u; | |
| 1120 n1 = pixaaGetCount(paa, NULL); | |
| 1121 for (i = 0; i < n1; i++) { | |
| 1122 pixa = pixaaGetPixa(paa, i, L_CLONE); | |
| 1123 text = sarrayGetString(recog->sa_text, i, L_NOCOPY); | |
| 1124 n2 = pixaGetCount(pixa); | |
| 1125 for (j = 0; j < n2; j++) { | |
| 1126 pix = pixaGetPix(pixa, j, L_CLONE); | |
| 1127 pixSetText(pix, text); | |
| 1128 pixDestroy(&pix); | |
| 1129 } | |
| 1130 pixaDestroy(&pixa); | |
| 1131 } | |
| 1132 | |
| 1133 return 0; | |
| 1134 } | |
| 1135 | |
| 1136 | |
| 1137 /*! | |
| 1138 * \brief recogAddAllSamples() | |
| 1139 * | |
| 1140 * \param[in] precog addr of recog | |
| 1141 * \param[in] paa pixaa from previously trained recog | |
| 1142 * \param[in] debug | |
| 1143 * \return 0 if OK, 1 on error | |
| 1144 * | |
| 1145 * <pre> | |
| 1146 * Notes: | |
| 1147 * (1) On error, the input recog is destroyed. | |
| 1148 * (2) This is used with the serialization routine recogRead(), | |
| 1149 * where each pixa in the pixaa represents a set of characters | |
| 1150 * in a different class. Before calling this function, we have | |
| 1151 * verified that the number of character classes, given by the | |
| 1152 * setsize field in %recog, equals the number of pixa in the paa. | |
| 1153 * The character labels for each set are in the sa_text field. | |
| 1154 * </pre> | |
| 1155 */ | |
| 1156 static l_int32 | |
| 1157 recogAddAllSamples(L_RECOG **precog, | |
| 1158 PIXAA *paa, | |
| 1159 l_int32 debug) | |
| 1160 { | |
| 1161 char *text; | |
| 1162 l_int32 i, j, nc, ns; | |
| 1163 PIX *pix; | |
| 1164 PIXA *pixa, *pixa1; | |
| 1165 L_RECOG *recog; | |
| 1166 | |
| 1167 if (!precog) | |
| 1168 return ERROR_INT("&recog not defined", __func__, 1); | |
| 1169 if ((recog = *precog) == NULL) | |
| 1170 return ERROR_INT("recog not defined", __func__, 1); | |
| 1171 if (!paa) { | |
| 1172 recogDestroy(&recog); | |
| 1173 *precog = NULL; | |
| 1174 return ERROR_INT("paa not defined", __func__, 1); | |
| 1175 } | |
| 1176 | |
| 1177 nc = pixaaGetCount(paa, NULL); | |
| 1178 for (i = 0; i < nc; i++) { | |
| 1179 pixa = pixaaGetPixa(paa, i, L_CLONE); | |
| 1180 ns = pixaGetCount(pixa); | |
| 1181 text = sarrayGetString(recog->sa_text, i, L_NOCOPY); | |
| 1182 pixa1 = pixaCreate(ns); | |
| 1183 pixaaAddPixa(recog->pixaa_u, pixa1, L_INSERT); | |
| 1184 for (j = 0; j < ns; j++) { | |
| 1185 pix = pixaGetPix(pixa, j, L_CLONE); | |
| 1186 if (debug) lept_stderr("pix[%d,%d]: text = %s\n", i, j, text); | |
| 1187 pixaaAddPix(recog->pixaa_u, i, pix, NULL, L_INSERT); | |
| 1188 } | |
| 1189 pixaDestroy(&pixa); | |
| 1190 } | |
| 1191 | |
| 1192 recogTrainingFinished(&recog, 0, -1, -1.0); /* For second parameter, | |
| 1193 see comment in recogRead() */ | |
| 1194 if (!recog) | |
| 1195 return ERROR_INT("bad templates; recog destroyed", __func__, 1); | |
| 1196 return 0; | |
| 1197 } |
