Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/jbclass.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /* | |
| 28 * jbclass.c | |
| 29 * | |
| 30 * These are functions for unsupervised classification of | |
| 31 * collections of connected components -- either characters or | |
| 32 * words -- in binary images. They can be used as image | |
| 33 * processing steps in jbig2 compression. | |
| 34 * | |
| 35 * Initialization | |
| 36 * | |
| 37 * JBCLASSER *jbRankHausInit() [rank hausdorff encoder] | |
| 38 * JBCLASSER *jbCorrelationInit() [correlation encoder] | |
| 39 * JBCLASSER *jbCorrelationInitWithoutComponents() [ditto] | |
| 40 * static JBCLASSER *jbCorrelationInitInternal() | |
| 41 * | |
| 42 * Classify the pages | |
| 43 * | |
| 44 * l_int32 jbAddPages() | |
| 45 * l_int32 jbAddPage() | |
| 46 * l_int32 jbAddPageComponents() | |
| 47 * | |
| 48 * Rank hausdorff classifier | |
| 49 * | |
| 50 * l_int32 jbClassifyRankHaus() | |
| 51 * l_int32 pixHaustest() | |
| 52 * l_int32 pixRankHaustest() | |
| 53 * | |
| 54 * Binary correlation classifier | |
| 55 * | |
| 56 * l_int32 jbClassifyCorrelation() | |
| 57 * | |
| 58 * Determine the image components we start with | |
| 59 * | |
| 60 * l_int32 jbGetComponents() | |
| 61 * l_int32 pixWordMaskByDilation() | |
| 62 * l_int32 pixWordBoxesByDilation() | |
| 63 * | |
| 64 * Build grayscale composites (templates) | |
| 65 * | |
| 66 * PIXA *jbAccumulateComposites | |
| 67 * PIXA *jbTemplatesFromComposites | |
| 68 * | |
| 69 * Utility functions for Classer | |
| 70 * | |
| 71 * JBCLASSER *jbClasserCreate() | |
| 72 * void jbClasserDestroy() | |
| 73 * | |
| 74 * Utility functions for Data | |
| 75 * | |
| 76 * JBDATA *jbDataSave() | |
| 77 * void jbDataDestroy() | |
| 78 * l_int32 jbDataWrite() | |
| 79 * JBDATA *jbDataRead() | |
| 80 * PIXA *jbDataRender() | |
| 81 * l_int32 jbGetULCorners() | |
| 82 * l_int32 jbGetLLCorners() | |
| 83 * | |
| 84 * Static helpers | |
| 85 * | |
| 86 * static JBFINDCTX *findSimilarSizedTemplatesInit() | |
| 87 * static l_int32 findSimilarSizedTemplatesNext() | |
| 88 * static void findSimilarSizedTemplatesDestroy() | |
| 89 * static l_int32 finalPositioningForAlignment() | |
| 90 * | |
| 91 * Note: this is NOT an implementation of the JPEG jbig2 | |
| 92 * proposed standard encoder, the specifications for which | |
| 93 * can be found at http://www.jpeg.org/jbigpt2.html. | |
| 94 * (See below for a full implementation.) | |
| 95 * It is an implementation of the lower-level part of an encoder that: | |
| 96 * | |
| 97 * (1) identifies connected components that are going to be used | |
| 98 * (2) puts them in similarity classes (this is an unsupervised | |
| 99 * classifier), and | |
| 100 * (3) stores the result in a simple file format (2 files, | |
| 101 * one for templates and one for page/coordinate/template-index | |
| 102 * quartets). | |
| 103 * | |
| 104 * An actual implementation of the official jbig2 encoder could | |
| 105 * start with parts (1) and (2), and would then compress the quartets | |
| 106 * according to the standards requirements (e.g., Huffman or | |
| 107 * arithmetic coding of coordinate differences and image templates). | |
| 108 * | |
| 109 * The low-level part of the encoder provided here has the | |
| 110 * following useful features: | |
| 111 * | |
| 112 * ~ It is accurate in the identification of templates | |
| 113 * and classes because it uses a windowed hausdorff | |
| 114 * distance metric. | |
| 115 * ~ It is accurate in the placement of the connected | |
| 116 * components, doing a two step process of first aligning | |
| 117 * the the centroids of the template with those of each instance, | |
| 118 * and then making a further correction of up to +- 1 pixel | |
| 119 * in each direction to best align the templates. | |
| 120 * ~ It is fast because it uses a morphologically based | |
| 121 * matching algorithm to implement the hausdorff criterion, | |
| 122 * and it selects the patterns that are possible matches | |
| 123 * based on their size. | |
| 124 * | |
| 125 * We provide two different matching functions, one using Hausdorff | |
| 126 * distance and one using a simple image correlation. | |
| 127 * The Hausdorff method sometimes produces better results for the | |
| 128 * same number of classes, because it gives a relatively small | |
| 129 * effective weight to foreground pixels near the boundary, | |
| 130 * and a relatively large weight to foreground pixels that are | |
| 131 * not near the boundary. By effectively ignoring these boundary | |
| 132 * pixels, Hausdorff weighting corresponds better to the expected | |
| 133 * probabilities of the pixel values in a scanned image, where the | |
| 134 * variations in instances of the same printed character are much | |
| 135 * more likely to be in pixels near the boundary. By contrast, | |
| 136 * the correlation method gives equal weight to all foreground pixels. | |
| 137 * | |
| 138 * For best results, use the correlation method. Correlation takes | |
| 139 * the number of fg pixels in the AND of instance and template, | |
| 140 * divided by the product of the number of fg pixels in instance | |
| 141 * and template. It compares this with a threshold that, in | |
| 142 * general, depends on the fractional coverage of the template. | |
| 143 * For heavy text, the threshold is raised above that for light | |
| 144 * text, By using both these parameters (basic threshold and | |
| 145 * adjustment factor for text weight), one has more flexibility | |
| 146 * and can arrive at the fewest substitution errors, although | |
| 147 * this comes at the price of more templates. | |
| 148 * | |
| 149 * The strict Hausdorff scoring is not a rank weighting, because a | |
| 150 * single pixel beyond the given distance will cause a match | |
| 151 * failure. A rank Hausdorff is more robust to non-boundary noise, | |
| 152 * but it is also more susceptible to confusing components that | |
| 153 * should be in different classes. For implementing a jbig2 | |
| 154 * application for visually lossless binary image compression, | |
| 155 * you have two choices: | |
| 156 * | |
| 157 * (1) use a 3x3 structuring element (size = 3) and a strict | |
| 158 * Hausdorff comparison (rank = 1.0 in the rank Hausdorff | |
| 159 * function). This will result in a minimal number of classes, | |
| 160 * but confusion of small characters, such as italic and | |
| 161 * non-italic lower-case 'o', can still occur. | |
| 162 * (2) use the correlation method with a threshold of 0.85 | |
| 163 * and a weighting factor of about 0.7. This will result in | |
| 164 * a larger number of classes, but should not be confused | |
| 165 * either by similar small characters or by extremely | |
| 166 * thick sans serif characters, such as in prog/cootoots.png. | |
| 167 * | |
| 168 * As mentioned above, if visual substitution errors must be | |
| 169 * avoided, you should use the correlation method. | |
| 170 * | |
| 171 * We provide executables that show how to do the encoding: | |
| 172 * prog/jbrankhaus.c | |
| 173 * prog/jbcorrelation.c | |
| 174 * | |
| 175 * The basic flow for correlation classification goes as follows, | |
| 176 * where specific choices have been made for parameters (Hausdorff | |
| 177 * is the same except for initialization): | |
| 178 * | |
| 179 * // Initialize and save data in the classer | |
| 180 * JBCLASSER *classer = | |
| 181 * jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7); | |
| 182 * SARRAY *safiles = getSortedPathnamesInDirectory(directory, | |
| 183 * NULL, 0, 0); | |
| 184 * jbAddPages(classer, safiles); | |
| 185 * | |
| 186 * // Save the data in a data structure for serialization, | |
| 187 * // and write it into two files. | |
| 188 * JBDATA *data = jbDataSave(classer); | |
| 189 * jbDataWrite(rootname, data); | |
| 190 * | |
| 191 * // Reconstruct (render) the pages from the encoded data. | |
| 192 * PIXA *pixa = jbDataRender(data, FALSE); | |
| 193 * | |
| 194 * Adam Langley has built a jbig2 standards-compliant encoder, the | |
| 195 * first one to appear in open source. You can get this encoder at: | |
| 196 * http://www.imperialviolet.org/jbig2.html | |
| 197 * | |
| 198 * It uses arithmetic encoding throughout. It encodes binary images | |
| 199 * losslessly with a single arithmetic coding over the full image. | |
| 200 * It also does both lossy and lossless encoding from connected | |
| 201 * components, using leptonica to generate the templates representing | |
| 202 * each cluster. | |
| 203 */ | |
| 204 | |
| 205 #ifdef HAVE_CONFIG_H | |
| 206 #include <config_auto.h> | |
| 207 #endif /* HAVE_CONFIG_H */ | |
| 208 | |
| 209 #include <string.h> | |
| 210 #include <math.h> | |
| 211 #include "allheaders.h" | |
| 212 #include "array_internal.h" | |
| 213 | |
| 214 #define L_BUF_SIZE 512 | |
| 215 | |
| 216 /* For jbClassifyRankHaus(): size of border added around | |
| 217 * pix of each c.c., to allow further processing. This | |
| 218 * should be at least the sum of the MAX_DIFF_HEIGHT | |
| 219 * (or MAX_DIFF_WIDTH) and one-half the size of the Sel */ | |
| 220 static const l_int32 JB_ADDED_PIXELS = 6; | |
| 221 | |
| 222 /* For pixHaustest(), pixRankHaustest() and pixCorrelationScore(): | |
| 223 * choose these to be 2 or greater */ | |
| 224 static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */ | |
| 225 static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */ | |
| 226 | |
| 227 /* In initialization, you have the option to discard components | |
| 228 * (cc, characters or words) that have either width or height larger | |
| 229 * than a given size. This is convenient for jbDataSave(), because | |
| 230 * the components are placed onto a regular lattice with cell | |
| 231 * dimension equal to the maximum component size. The default | |
| 232 * values are given here. If you want to save all components, | |
| 233 * use a sufficiently large set of dimensions. */ | |
| 234 static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */ | |
| 235 static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */ | |
| 236 static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */ | |
| 237 static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */ | |
| 238 | |
| 239 /* This stores the state of a state machine which fetches | |
| 240 * similar sized templates */ | |
| 241 struct JbFindTemplatesState | |
| 242 { | |
| 243 JBCLASSER *classer; /* classer */ | |
| 244 l_int32 w; /* desired width */ | |
| 245 l_int32 h; /* desired height */ | |
| 246 l_int32 i; /* index into two_by_two step array */ | |
| 247 L_DNA *dna; /* current number array */ | |
| 248 l_int32 n; /* current element of dna */ | |
| 249 }; | |
| 250 typedef struct JbFindTemplatesState JBFINDCTX; | |
| 251 | |
| 252 /* Static initialization function */ | |
| 253 static JBCLASSER * jbCorrelationInitInternal(l_int32 components, | |
| 254 l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, | |
| 255 l_float32 weightfactor, l_int32 keep_components); | |
| 256 | |
| 257 /* Static helper functions */ | |
| 258 static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs); | |
| 259 static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context); | |
| 260 static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext); | |
| 261 static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y, | |
| 262 l_int32 idelx, l_int32 idely, PIX *pixt, | |
| 263 l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy); | |
| 264 | |
| 265 #ifndef NO_CONSOLE_IO | |
| 266 #define DEBUG_CORRELATION_SCORE 0 | |
| 267 #endif /* ~NO_CONSOLE_IO */ | |
| 268 | |
| 269 /*----------------------------------------------------------------------* | |
| 270 * Initialization * | |
| 271 *----------------------------------------------------------------------*/ | |
| 272 /*! | |
| 273 * \brief jbRankHausInit() | |
| 274 * | |
| 275 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS | |
| 276 * \param[in] maxwidth of component; use 0 for default | |
| 277 * \param[in] maxheight of component; use 0 for default | |
| 278 * \param[in] size of square structuring element; 2, representing | |
| 279 * 2x2 sel, is necessary for reasonable accuracy of | |
| 280 * small components; combine this with rank ~ 0.97 | |
| 281 * to avoid undue class expansion | |
| 282 * \param[in] rank rank val of match, each way; in [0.5 - 1.0]; | |
| 283 * when using size = 2, 0.97 is a reasonable value | |
| 284 * \return jbclasser if OK; NULL on error | |
| 285 */ | |
| 286 JBCLASSER * | |
| 287 jbRankHausInit(l_int32 components, | |
| 288 l_int32 maxwidth, | |
| 289 l_int32 maxheight, | |
| 290 l_int32 size, | |
| 291 l_float32 rank) | |
| 292 { | |
| 293 JBCLASSER *classer; | |
| 294 | |
| 295 if (components != JB_CONN_COMPS && components != JB_CHARACTERS && | |
| 296 components != JB_WORDS) | |
| 297 return (JBCLASSER *)ERROR_PTR("invalid components", __func__, NULL); | |
| 298 if (size < 1 || size > 10) | |
| 299 return (JBCLASSER *)ERROR_PTR("size not reasonable", __func__, NULL); | |
| 300 if (rank < 0.5 || rank > 1.0) | |
| 301 return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", __func__, NULL); | |
| 302 if (maxwidth == 0) { | |
| 303 if (components == JB_CONN_COMPS) | |
| 304 maxwidth = MAX_CONN_COMP_WIDTH; | |
| 305 else if (components == JB_CHARACTERS) | |
| 306 maxwidth = MAX_CHAR_COMP_WIDTH; | |
| 307 else /* JB_WORDS */ | |
| 308 maxwidth = MAX_WORD_COMP_WIDTH; | |
| 309 } | |
| 310 if (maxheight == 0) | |
| 311 maxheight = MAX_COMP_HEIGHT; | |
| 312 | |
| 313 if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL) | |
| 314 return (JBCLASSER *)ERROR_PTR("classer not made", __func__, NULL); | |
| 315 classer->maxwidth = maxwidth; | |
| 316 classer->maxheight = maxheight; | |
| 317 classer->sizehaus = size; | |
| 318 classer->rankhaus = rank; | |
| 319 classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */ | |
| 320 classer->keep_pixaa = 1; /* keep all components in pixaa */ | |
| 321 return classer; | |
| 322 } | |
| 323 | |
| 324 | |
| 325 /*! | |
| 326 * \brief jbCorrelationInit() | |
| 327 * | |
| 328 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS | |
| 329 * \param[in] maxwidth of component; use 0 for default | |
| 330 * \param[in] maxheight of component; use 0 for default | |
| 331 * \param[in] thresh value for correlation score: in [0.4 - 0.98] | |
| 332 * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0] | |
| 333 * \return jbclasser if OK; NULL on error | |
| 334 * | |
| 335 * <pre> | |
| 336 * Notes: | |
| 337 * (1) For scanned text, suggested input values are: | |
| 338 * thresh ~ [0.8 - 0.85] | |
| 339 * weightfactor ~ [0.5 - 0.6] | |
| 340 * (2) For electronically generated fonts (e.g., rasterized pdf), | |
| 341 * a very high thresh (e.g., 0.95) will not cause a significant | |
| 342 * increase in the number of classes. | |
| 343 * </pre> | |
| 344 */ | |
| 345 JBCLASSER * | |
| 346 jbCorrelationInit(l_int32 components, | |
| 347 l_int32 maxwidth, | |
| 348 l_int32 maxheight, | |
| 349 l_float32 thresh, | |
| 350 l_float32 weightfactor) | |
| 351 { | |
| 352 return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, | |
| 353 weightfactor, 1); | |
| 354 } | |
| 355 | |
| 356 /*! | |
| 357 * \brief jbCorrelationInitWithoutComponents() | |
| 358 * | |
| 359 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS | |
| 360 * \param[in] maxwidth of component; use 0 for default | |
| 361 * \param[in] maxheight of component; use 0 for default | |
| 362 * \param[in] thresh value for correlation score: in [0.4 - 0.98] | |
| 363 * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0] | |
| 364 * \return jbclasser if OK; NULL on error | |
| 365 * | |
| 366 * <pre> | |
| 367 * Notes: | |
| 368 * Acts the same as jbCorrelationInit(), but the resulting | |
| 369 * object doesn't keep a list of all the components. | |
| 370 * </pre> | |
| 371 */ | |
| 372 JBCLASSER * | |
| 373 jbCorrelationInitWithoutComponents(l_int32 components, | |
| 374 l_int32 maxwidth, | |
| 375 l_int32 maxheight, | |
| 376 l_float32 thresh, | |
| 377 l_float32 weightfactor) | |
| 378 { | |
| 379 return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, | |
| 380 weightfactor, 0); | |
| 381 } | |
| 382 | |
| 383 | |
| 384 static JBCLASSER * | |
| 385 jbCorrelationInitInternal(l_int32 components, | |
| 386 l_int32 maxwidth, | |
| 387 l_int32 maxheight, | |
| 388 l_float32 thresh, | |
| 389 l_float32 weightfactor, | |
| 390 l_int32 keep_components) | |
| 391 { | |
| 392 JBCLASSER *classer; | |
| 393 | |
| 394 if (components != JB_CONN_COMPS && components != JB_CHARACTERS && | |
| 395 components != JB_WORDS) | |
| 396 return (JBCLASSER *)ERROR_PTR("invalid components", __func__, NULL); | |
| 397 if (thresh < 0.4 || thresh > 0.98) | |
| 398 return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.98]", | |
| 399 __func__, NULL); | |
| 400 if (weightfactor < 0.0 || weightfactor > 1.0) | |
| 401 return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]", | |
| 402 __func__, NULL); | |
| 403 if (maxwidth == 0) { | |
| 404 if (components == JB_CONN_COMPS) | |
| 405 maxwidth = MAX_CONN_COMP_WIDTH; | |
| 406 else if (components == JB_CHARACTERS) | |
| 407 maxwidth = MAX_CHAR_COMP_WIDTH; | |
| 408 else /* JB_WORDS */ | |
| 409 maxwidth = MAX_WORD_COMP_WIDTH; | |
| 410 } | |
| 411 if (maxheight == 0) | |
| 412 maxheight = MAX_COMP_HEIGHT; | |
| 413 | |
| 414 | |
| 415 if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL) | |
| 416 return (JBCLASSER *)ERROR_PTR("classer not made", __func__, NULL); | |
| 417 classer->maxwidth = maxwidth; | |
| 418 classer->maxheight = maxheight; | |
| 419 classer->thresh = thresh; | |
| 420 classer->weightfactor = weightfactor; | |
| 421 classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */ | |
| 422 classer->keep_pixaa = keep_components; | |
| 423 return classer; | |
| 424 } | |
| 425 | |
| 426 | |
| 427 /*----------------------------------------------------------------------* | |
| 428 * Classify the pages * | |
| 429 *----------------------------------------------------------------------*/ | |
| 430 /*! | |
| 431 * \brief jbAddPages() | |
| 432 * | |
| 433 * \param[in] jbclasser | |
| 434 * \param[in] safiles of page image file names | |
| 435 * \return 0 if OK; 1 on error | |
| 436 * | |
| 437 * <pre> | |
| 438 * Notes: | |
| 439 * (1) jbclasser makes a copy of the array of file names. | |
| 440 * (2) The caller is still responsible for destroying the input array. | |
| 441 * </pre> | |
| 442 */ | |
| 443 l_ok | |
| 444 jbAddPages(JBCLASSER *classer, | |
| 445 SARRAY *safiles) | |
| 446 { | |
| 447 l_int32 i, nfiles; | |
| 448 char *fname; | |
| 449 PIX *pix; | |
| 450 | |
| 451 if (!classer) | |
| 452 return ERROR_INT("classer not defined", __func__, 1); | |
| 453 if (!safiles) | |
| 454 return ERROR_INT("safiles not defined", __func__, 1); | |
| 455 | |
| 456 classer->safiles = sarrayCopy(safiles); | |
| 457 nfiles = sarrayGetCount(safiles); | |
| 458 for (i = 0; i < nfiles; i++) { | |
| 459 fname = sarrayGetString(safiles, i, L_NOCOPY); | |
| 460 if ((pix = pixRead(fname)) == NULL) { | |
| 461 L_WARNING("image file %d not read\n", __func__, i); | |
| 462 continue; | |
| 463 } | |
| 464 if (pixGetDepth(pix) != 1) { | |
| 465 L_WARNING("image file %d not 1 bpp\n", __func__, i); | |
| 466 continue; | |
| 467 } | |
| 468 jbAddPage(classer, pix); | |
| 469 pixDestroy(&pix); | |
| 470 } | |
| 471 | |
| 472 return 0; | |
| 473 } | |
| 474 | |
| 475 | |
| 476 /*! | |
| 477 * \brief jbAddPage() | |
| 478 * | |
| 479 * \param[in] jbclasser | |
| 480 * \param[in] pixs input page | |
| 481 * \return 0 if OK; 1 on error | |
| 482 */ | |
| 483 l_ok | |
| 484 jbAddPage(JBCLASSER *classer, | |
| 485 PIX *pixs) | |
| 486 { | |
| 487 BOXA *boxas; | |
| 488 PIXA *pixas; | |
| 489 | |
| 490 if (!classer) | |
| 491 return ERROR_INT("classer not defined", __func__, 1); | |
| 492 if (!pixs || pixGetDepth(pixs) != 1) | |
| 493 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 494 | |
| 495 classer->w = pixGetWidth(pixs); | |
| 496 classer->h = pixGetHeight(pixs); | |
| 497 | |
| 498 /* Get the appropriate components and their bounding boxes */ | |
| 499 if (jbGetComponents(pixs, classer->components, classer->maxwidth, | |
| 500 classer->maxheight, &boxas, &pixas)) { | |
| 501 return ERROR_INT("components not made", __func__, 1); | |
| 502 } | |
| 503 | |
| 504 jbAddPageComponents(classer, pixs, boxas, pixas); | |
| 505 boxaDestroy(&boxas); | |
| 506 pixaDestroy(&pixas); | |
| 507 return 0; | |
| 508 } | |
| 509 | |
| 510 | |
| 511 /*! | |
| 512 * \brief jbAddPageComponents() | |
| 513 * | |
| 514 * \param[in] jbclasser | |
| 515 * \param[in] pixs input page | |
| 516 * \param[in] boxas b.b. of components for this page | |
| 517 * \param[in] pixas components for this page | |
| 518 * \return 0 if OK; 1 on error | |
| 519 * | |
| 520 * <pre> | |
| 521 * Notes: | |
| 522 * (1) If there are no components on the page, we don't require input | |
| 523 * of empty boxas or pixas, although that's the typical situation. | |
| 524 * </pre> | |
| 525 */ | |
| 526 l_ok | |
| 527 jbAddPageComponents(JBCLASSER *classer, | |
| 528 PIX *pixs, | |
| 529 BOXA *boxas, | |
| 530 PIXA *pixas) | |
| 531 { | |
| 532 l_int32 n; | |
| 533 | |
| 534 if (!classer) | |
| 535 return ERROR_INT("classer not defined", __func__, 1); | |
| 536 if (!pixs) | |
| 537 return ERROR_INT("pix not defined", __func__, 1); | |
| 538 | |
| 539 /* Test for no components on the current page. Always update the | |
| 540 * number of pages processed, even if nothing is on it. */ | |
| 541 if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) { | |
| 542 classer->npages++; | |
| 543 return 0; | |
| 544 } | |
| 545 | |
| 546 /* Get classes. For hausdorff, it uses a specified size of | |
| 547 * structuring element and specified rank. For correlation, | |
| 548 * it uses a specified threshold. */ | |
| 549 if (classer->method == JB_RANKHAUS) { | |
| 550 if (jbClassifyRankHaus(classer, boxas, pixas)) | |
| 551 return ERROR_INT("rankhaus classification failed", __func__, 1); | |
| 552 } else { /* classer->method == JB_CORRELATION */ | |
| 553 if (jbClassifyCorrelation(classer, boxas, pixas)) | |
| 554 return ERROR_INT("correlation classification failed", __func__, 1); | |
| 555 } | |
| 556 | |
| 557 /* Find the global UL corners, adjusted for each instance so | |
| 558 * that the class template and instance will have their | |
| 559 * centroids in the same place. Then the template can be | |
| 560 * used to replace the instance. */ | |
| 561 if (jbGetULCorners(classer, pixs, boxas)) | |
| 562 return ERROR_INT("UL corners not found", __func__, 1); | |
| 563 | |
| 564 /* Update total component counts and number of pages processed. */ | |
| 565 n = boxaGetCount(boxas); | |
| 566 classer->baseindex += n; | |
| 567 numaAddNumber(classer->nacomps, (l_float32)n); | |
| 568 classer->npages++; | |
| 569 return 0; | |
| 570 } | |
| 571 | |
| 572 | |
| 573 /*----------------------------------------------------------------------* | |
| 574 * Classification using windowed rank hausdorff metric * | |
| 575 *----------------------------------------------------------------------*/ | |
| 576 /*! | |
| 577 * \brief jbClassifyRankHaus() | |
| 578 * | |
| 579 * \param[in] jbclasser | |
| 580 * \param[in] boxa new components for classification | |
| 581 * \param[in] pixas new components for classification | |
| 582 * \return 0 if OK; 1 on error | |
| 583 */ | |
| 584 l_ok | |
| 585 jbClassifyRankHaus(JBCLASSER *classer, | |
| 586 BOXA *boxa, | |
| 587 PIXA *pixas) | |
| 588 { | |
| 589 l_int32 n, nt, i, wt, ht, iclass, size, found, testval; | |
| 590 l_int32 npages, area1, area3; | |
| 591 l_int32 *tab8; | |
| 592 l_float32 rank, x1, y1, x2, y2; | |
| 593 BOX *box; | |
| 594 NUMA *naclass, *napage; | |
| 595 NUMA *nafg; /* fg area of all instances */ | |
| 596 NUMA *nafgt; /* fg area of all templates */ | |
| 597 JBFINDCTX *findcontext; | |
| 598 L_DNAHASH *dahash; | |
| 599 PIX *pix, *pix1, *pix2, *pix3, *pix4; | |
| 600 PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd; | |
| 601 PIXAA *pixaa; | |
| 602 PTA *pta, *ptac, *ptact; | |
| 603 SEL *sel; | |
| 604 | |
| 605 if (!classer) | |
| 606 return ERROR_INT("classer not defined", __func__, 1); | |
| 607 if (!boxa) | |
| 608 return ERROR_INT("boxa not defined", __func__, 1); | |
| 609 if (!pixas) | |
| 610 return ERROR_INT("pixas not defined", __func__, 1); | |
| 611 if ((n = pixaGetCount(pixas)) == 0) | |
| 612 return ERROR_INT("pixas is empty", __func__, 1); | |
| 613 if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */ | |
| 614 return ERROR_INT("fg counting failed", __func__, 1); | |
| 615 | |
| 616 npages = classer->npages; | |
| 617 size = classer->sizehaus; | |
| 618 sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT); | |
| 619 | |
| 620 /* Generate the bordered pixa, with and without dilation. | |
| 621 * pixa1 and pixa2 contain all the input components. */ | |
| 622 pixa1 = pixaCreate(n); | |
| 623 pixa2 = pixaCreate(n); | |
| 624 for (i = 0; i < n; i++) { | |
| 625 pix = pixaGetPix(pixas, i, L_CLONE); | |
| 626 pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, | |
| 627 JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); | |
| 628 pix2 = pixDilate(NULL, pix1, sel); | |
| 629 pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */ | |
| 630 pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */ | |
| 631 pixDestroy(&pix); | |
| 632 } | |
| 633 | |
| 634 /* Get the centroids of all the bordered images. | |
| 635 * These are relative to the UL corner of each (bordered) pix. */ | |
| 636 pta = pixaCentroids(pixa1); /* centroids for this page; use here */ | |
| 637 ptac = classer->ptac; /* holds centroids of components up to this page */ | |
| 638 ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */ | |
| 639 ptact = classer->ptact; /* holds centroids of templates */ | |
| 640 | |
| 641 /* Use these to save the class and page of each component. */ | |
| 642 naclass = classer->naclass; | |
| 643 napage = classer->napage; | |
| 644 | |
| 645 /* Store the unbordered pix in a pixaa, in a hierarchical | |
| 646 * set of arrays. There is one pixa for each class, | |
| 647 * and the pix in each pixa are all the instances found | |
| 648 * of that class. This is actually more than one would need | |
| 649 * for a jbig2 encoder, but there are two reasons to keep | |
| 650 * them around: (1) the set of instances for each class | |
| 651 * can be used to make an improved binary (or, better, | |
| 652 * a grayscale) template, rather than simply using the first | |
| 653 * one in the set; (2) we can investigate the failures | |
| 654 * of the classifier. This pixaa grows as we process | |
| 655 * successive pages. */ | |
| 656 pixaa = classer->pixaa; | |
| 657 | |
| 658 /* arrays to store class exemplars (templates) */ | |
| 659 pixat = classer->pixat; /* un-dilated */ | |
| 660 pixatd = classer->pixatd; /* dilated */ | |
| 661 | |
| 662 /* Fill up the pixaa tree with the template exemplars as | |
| 663 * the first pix in each pixa. As we add each pix, | |
| 664 * we also add the associated box to the pixa. | |
| 665 * We also keep track of the centroid of each pix, | |
| 666 * and use the difference between centroids (of the | |
| 667 * pix with the exemplar we are checking it with) | |
| 668 * to align the two when checking that the Hausdorff | |
| 669 * distance does not exceed a threshold. | |
| 670 * The threshold is set by the Sel used for dilating. | |
| 671 * For example, a 3x3 brick, sel_3, corresponds to a | |
| 672 * Hausdorff distance of 1. In general, for an NxN brick, | |
| 673 * with N odd, corresponds to a Hausdorff distance of (N - 1)/2. | |
| 674 * It turns out that we actually need to use a sel of size 2x2 | |
| 675 * to avoid small bad components when there is a halftone image | |
| 676 * from which components can be chosen. | |
| 677 * The larger the Sel you use, the fewer the number of classes, | |
| 678 * and the greater the likelihood of putting semantically | |
| 679 * different objects in the same class. For simplicity, | |
| 680 * we do this separately for the case of rank == 1.0 (exact | |
| 681 * match within the Hausdorff distance) and rank < 1.0. */ | |
| 682 rank = classer->rankhaus; | |
| 683 dahash = classer->dahash; | |
| 684 if (rank == 1.0) { | |
| 685 for (i = 0; i < n; i++) { | |
| 686 pix1 = pixaGetPix(pixa1, i, L_CLONE); | |
| 687 pix2 = pixaGetPix(pixa2, i, L_CLONE); | |
| 688 ptaGetPt(pta, i, &x1, &y1); | |
| 689 nt = pixaGetCount(pixat); /* number of templates */ | |
| 690 found = FALSE; | |
| 691 findcontext = findSimilarSizedTemplatesInit(classer, pix1); | |
| 692 while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { | |
| 693 /* Find score for this template */ | |
| 694 pix3 = pixaGetPix(pixat, iclass, L_CLONE); | |
| 695 pix4 = pixaGetPix(pixatd, iclass, L_CLONE); | |
| 696 ptaGetPt(ptact, iclass, &x2, &y2); | |
| 697 testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2, | |
| 698 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT); | |
| 699 pixDestroy(&pix3); | |
| 700 pixDestroy(&pix4); | |
| 701 if (testval == 1) { | |
| 702 found = TRUE; | |
| 703 numaAddNumber(naclass, (l_float32)iclass); | |
| 704 numaAddNumber(napage, (l_float32)npages); | |
| 705 if (classer->keep_pixaa) { | |
| 706 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); | |
| 707 pix = pixaGetPix(pixas, i, L_CLONE); | |
| 708 pixaAddPix(pixa, pix, L_INSERT); | |
| 709 box = boxaGetBox(boxa, i, L_CLONE); | |
| 710 pixaAddBox(pixa, box, L_INSERT); | |
| 711 pixaDestroy(&pixa); | |
| 712 } | |
| 713 break; | |
| 714 } | |
| 715 } | |
| 716 findSimilarSizedTemplatesDestroy(&findcontext); | |
| 717 if (found == FALSE) { /* new class */ | |
| 718 numaAddNumber(naclass, (l_float32)nt); | |
| 719 numaAddNumber(napage, (l_float32)npages); | |
| 720 pixa = pixaCreate(0); | |
| 721 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ | |
| 722 pixaAddPix(pixa, pix, L_INSERT); | |
| 723 wt = pixGetWidth(pix); | |
| 724 ht = pixGetHeight(pix); | |
| 725 l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); | |
| 726 box = boxaGetBox(boxa, i, L_CLONE); | |
| 727 pixaAddBox(pixa, box, L_INSERT); | |
| 728 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ | |
| 729 ptaAddPt(ptact, x1, y1); | |
| 730 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ | |
| 731 pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */ | |
| 732 } else { /* don't save them */ | |
| 733 pixDestroy(&pix1); | |
| 734 pixDestroy(&pix2); | |
| 735 } | |
| 736 } | |
| 737 } else { /* rank < 1.0 */ | |
| 738 nafgt = classer->nafgt; | |
| 739 tab8 = makePixelSumTab8(); | |
| 740 for (i = 0; i < n; i++) { /* all instances on this page */ | |
| 741 pix1 = pixaGetPix(pixa1, i, L_CLONE); | |
| 742 numaGetIValue(nafg, i, &area1); | |
| 743 pix2 = pixaGetPix(pixa2, i, L_CLONE); | |
| 744 ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */ | |
| 745 nt = pixaGetCount(pixat); /* number of templates */ | |
| 746 found = FALSE; | |
| 747 findcontext = findSimilarSizedTemplatesInit(classer, pix1); | |
| 748 while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { | |
| 749 /* Find score for this template */ | |
| 750 pix3 = pixaGetPix(pixat, iclass, L_CLONE); | |
| 751 numaGetIValue(nafgt, iclass, &area3); | |
| 752 pix4 = pixaGetPix(pixatd, iclass, L_CLONE); | |
| 753 ptaGetPt(ptact, iclass, &x2, &y2); | |
| 754 testval = pixRankHaustest(pix1, pix2, pix3, pix4, | |
| 755 x1 - x2, y1 - y2, | |
| 756 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, | |
| 757 area1, area3, rank, tab8); | |
| 758 pixDestroy(&pix3); | |
| 759 pixDestroy(&pix4); | |
| 760 if (testval == 1) { /* greedy match; take the first */ | |
| 761 found = TRUE; | |
| 762 numaAddNumber(naclass, (l_float32)iclass); | |
| 763 numaAddNumber(napage, (l_float32)npages); | |
| 764 if (classer->keep_pixaa) { | |
| 765 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); | |
| 766 pix = pixaGetPix(pixas, i, L_CLONE); | |
| 767 pixaAddPix(pixa, pix, L_INSERT); | |
| 768 box = boxaGetBox(boxa, i, L_CLONE); | |
| 769 pixaAddBox(pixa, box, L_INSERT); | |
| 770 pixaDestroy(&pixa); | |
| 771 } | |
| 772 break; | |
| 773 } | |
| 774 } | |
| 775 findSimilarSizedTemplatesDestroy(&findcontext); | |
| 776 if (found == FALSE) { /* new class */ | |
| 777 numaAddNumber(naclass, (l_float32)nt); | |
| 778 numaAddNumber(napage, (l_float32)npages); | |
| 779 pixa = pixaCreate(0); | |
| 780 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ | |
| 781 pixaAddPix(pixa, pix, L_INSERT); | |
| 782 wt = pixGetWidth(pix); | |
| 783 ht = pixGetHeight(pix); | |
| 784 l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); | |
| 785 box = boxaGetBox(boxa, i, L_CLONE); | |
| 786 pixaAddBox(pixa, box, L_INSERT); | |
| 787 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ | |
| 788 ptaAddPt(ptact, x1, y1); | |
| 789 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ | |
| 790 pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */ | |
| 791 numaAddNumber(nafgt, (l_float32)area1); | |
| 792 } else { /* don't save them */ | |
| 793 pixDestroy(&pix1); | |
| 794 pixDestroy(&pix2); | |
| 795 } | |
| 796 } | |
| 797 LEPT_FREE(tab8); | |
| 798 } | |
| 799 classer->nclass = pixaGetCount(pixat); | |
| 800 | |
| 801 numaDestroy(&nafg); | |
| 802 ptaDestroy(&pta); | |
| 803 pixaDestroy(&pixa1); | |
| 804 pixaDestroy(&pixa2); | |
| 805 selDestroy(&sel); | |
| 806 return 0; | |
| 807 } | |
| 808 | |
| 809 | |
| 810 /*! | |
| 811 * \brief pixHaustest() | |
| 812 * | |
| 813 * \param[in] pix1 new pix, not dilated | |
| 814 * \param[in] pix2 new pix, dilated | |
| 815 * \param[in] pix3 exemplar pix, not dilated | |
| 816 * \param[in] pix4 exemplar pix, dilated | |
| 817 * \param[in] delx x comp of centroid difference | |
| 818 * \param[in] dely y comp of centroid difference | |
| 819 * \param[in] maxdiffw max width difference of pix1 and pix2 | |
| 820 * \param[in] maxdiffh max height difference of pix1 and pix2 | |
| 821 * \return 0 FALSE) if no match, 1 (TRUE if the new | |
| 822 * pix is in the same class as the exemplar. | |
| 823 * | |
| 824 * <pre> | |
| 825 * Notes: | |
| 826 * We check first that the two pix are roughly | |
| 827 * the same size. Only if they meet that criterion do | |
| 828 * we compare the bitmaps. The Hausdorff is a 2-way | |
| 829 * check. The centroid difference is used to align the two | |
| 830 * images to the nearest integer for each of the checks. | |
| 831 * These check that the dilated image of one contains | |
| 832 * ALL the pixels of the undilated image of the other. | |
| 833 * Checks are done in both direction. A single pixel not | |
| 834 * contained in either direction results in failure of the test. | |
| 835 * </pre> | |
| 836 */ | |
| 837 l_int32 | |
| 838 pixHaustest(PIX *pix1, | |
| 839 PIX *pix2, | |
| 840 PIX *pix3, | |
| 841 PIX *pix4, | |
| 842 l_float32 delx, /* x(1) - x(3) */ | |
| 843 l_float32 dely, /* y(1) - y(3) */ | |
| 844 l_int32 maxdiffw, | |
| 845 l_int32 maxdiffh) | |
| 846 { | |
| 847 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; | |
| 848 PIX *pixt; | |
| 849 | |
| 850 /* Eliminate possible matches based on size difference */ | |
| 851 wi = pixGetWidth(pix1); | |
| 852 hi = pixGetHeight(pix1); | |
| 853 wt = pixGetWidth(pix3); | |
| 854 ht = pixGetHeight(pix3); | |
| 855 delw = L_ABS(wi - wt); | |
| 856 if (delw > maxdiffw) | |
| 857 return FALSE; | |
| 858 delh = L_ABS(hi - ht); | |
| 859 if (delh > maxdiffh) | |
| 860 return FALSE; | |
| 861 | |
| 862 /* Round difference in centroid location to nearest integer; | |
| 863 * use this as a shift when doing the matching. */ | |
| 864 if (delx >= 0) | |
| 865 idelx = (l_int32)(delx + 0.5); | |
| 866 else | |
| 867 idelx = (l_int32)(delx - 0.5); | |
| 868 if (dely >= 0) | |
| 869 idely = (l_int32)(dely + 0.5); | |
| 870 else | |
| 871 idely = (l_int32)(dely - 0.5); | |
| 872 | |
| 873 /* Do 1-direction hausdorff, checking that every pixel in pix1 | |
| 874 * is within a dilation distance of some pixel in pix3. Namely, | |
| 875 * that pix4 entirely covers pix1: | |
| 876 * pixt = pixSubtract(NULL, pix1, pix4), including shift | |
| 877 * where pixt has no ON pixels. */ | |
| 878 pixt = pixCreateTemplate(pix1); | |
| 879 pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); | |
| 880 pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), | |
| 881 pix4, 0, 0); | |
| 882 pixZero(pixt, &boolmatch); | |
| 883 if (boolmatch == 0) { | |
| 884 pixDestroy(&pixt); | |
| 885 return FALSE; | |
| 886 } | |
| 887 | |
| 888 /* Do 1-direction hausdorff, checking that every pixel in pix3 | |
| 889 * is within a dilation distance of some pixel in pix1. Namely, | |
| 890 * that pix2 entirely covers pix3: | |
| 891 * pixSubtract(pixt, pix3, pix2), including shift | |
| 892 * where pixt has no ON pixels. */ | |
| 893 pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); | |
| 894 pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); | |
| 895 pixZero(pixt, &boolmatch); | |
| 896 pixDestroy(&pixt); | |
| 897 return boolmatch; | |
| 898 } | |
| 899 | |
| 900 | |
| 901 /*! | |
| 902 * \brief pixRankHaustest() | |
| 903 * | |
| 904 * \param[in] pix1 new pix, not dilated | |
| 905 * \param[in] pix2 new pix, dilated | |
| 906 * \param[in] pix3 exemplar pix, not dilated | |
| 907 * \param[in] pix4 exemplar pix, dilated | |
| 908 * \param[in] delx x comp of centroid difference | |
| 909 * \param[in] dely y comp of centroid difference | |
| 910 * \param[in] maxdiffw max width difference of pix1 and pix2 | |
| 911 * \param[in] maxdiffh max height difference of pix1 and pix2 | |
| 912 * \param[in] area1 fg pixels in pix1 | |
| 913 * \param[in] area3 fg pixels in pix3 | |
| 914 * \param[in] rank rank value of test, each way | |
| 915 * \param[in] tab8 table of pixel sums for byte | |
| 916 * \return 0 FALSE) if no match, 1 (TRUE if the new | |
| 917 * pix is in the same class as the exemplar. | |
| 918 * | |
| 919 * <pre> | |
| 920 * Notes: | |
| 921 * We check first that the two pix are roughly | |
| 922 * the same size. Only if they meet that criterion do | |
| 923 * we compare the bitmaps. We convert the rank value to | |
| 924 * a number of pixels by multiplying the rank fraction by the number | |
| 925 * of pixels in the undilated image. The Hausdorff is a 2-way | |
| 926 * check. The centroid difference is used to align the two | |
| 927 * images to the nearest integer for each of the checks. | |
| 928 * The rank hausdorff checks that the dilated image of one | |
| 929 * contains the rank fraction of the pixels of the undilated | |
| 930 * image of the other. Checks are done in both direction. | |
| 931 * Failure of the test in either direction results in failure | |
| 932 * of the test. | |
| 933 * </pre> | |
| 934 */ | |
| 935 l_int32 | |
| 936 pixRankHaustest(PIX *pix1, | |
| 937 PIX *pix2, | |
| 938 PIX *pix3, | |
| 939 PIX *pix4, | |
| 940 l_float32 delx, /* x(1) - x(3) */ | |
| 941 l_float32 dely, /* y(1) - y(3) */ | |
| 942 l_int32 maxdiffw, | |
| 943 l_int32 maxdiffh, | |
| 944 l_int32 area1, | |
| 945 l_int32 area3, | |
| 946 l_float32 rank, | |
| 947 l_int32 *tab8) | |
| 948 { | |
| 949 l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; | |
| 950 l_int32 thresh1, thresh3; | |
| 951 PIX *pixt; | |
| 952 | |
| 953 /* Eliminate possible matches based on size difference */ | |
| 954 wi = pixGetWidth(pix1); | |
| 955 hi = pixGetHeight(pix1); | |
| 956 wt = pixGetWidth(pix3); | |
| 957 ht = pixGetHeight(pix3); | |
| 958 delw = L_ABS(wi - wt); | |
| 959 if (delw > maxdiffw) | |
| 960 return FALSE; | |
| 961 delh = L_ABS(hi - ht); | |
| 962 if (delh > maxdiffh) | |
| 963 return FALSE; | |
| 964 | |
| 965 /* Upper bounds in remaining pixels for allowable match */ | |
| 966 thresh1 = (l_int32)(area1 * (1. - rank) + 0.5); | |
| 967 thresh3 = (l_int32)(area3 * (1. - rank) + 0.5); | |
| 968 | |
| 969 /* Round difference in centroid location to nearest integer; | |
| 970 * use this as a shift when doing the matching. */ | |
| 971 if (delx >= 0) | |
| 972 idelx = (l_int32)(delx + 0.5); | |
| 973 else | |
| 974 idelx = (l_int32)(delx - 0.5); | |
| 975 if (dely >= 0) | |
| 976 idely = (l_int32)(dely + 0.5); | |
| 977 else | |
| 978 idely = (l_int32)(dely - 0.5); | |
| 979 | |
| 980 /* Do 1-direction hausdorff, checking that every pixel in pix1 | |
| 981 * is within a dilation distance of some pixel in pix3. Namely, | |
| 982 * that pix4 entirely covers pix1: | |
| 983 * pixt = pixSubtract(NULL, pix1, pix4), including shift | |
| 984 * where pixt has no ON pixels. */ | |
| 985 pixt = pixCreateTemplate(pix1); | |
| 986 pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); | |
| 987 pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), | |
| 988 pix4, 0, 0); | |
| 989 pixThresholdPixelSum(pixt, thresh1, &boolmatch, tab8); | |
| 990 if (boolmatch == 1) { /* above thresh1 */ | |
| 991 pixDestroy(&pixt); | |
| 992 return FALSE; | |
| 993 } | |
| 994 | |
| 995 /* Do 1-direction hausdorff, checking that every pixel in pix3 | |
| 996 * is within a dilation distance of some pixel in pix1. Namely, | |
| 997 * that pix2 entirely covers pix3: | |
| 998 * pixSubtract(pixt, pix3, pix2), including shift | |
| 999 * where pixt has no ON pixels. */ | |
| 1000 pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); | |
| 1001 pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); | |
| 1002 pixThresholdPixelSum(pixt, thresh3, &boolmatch, tab8); | |
| 1003 pixDestroy(&pixt); | |
| 1004 if (boolmatch == 1) /* above thresh3 */ | |
| 1005 return FALSE; | |
| 1006 else | |
| 1007 return TRUE; | |
| 1008 } | |
| 1009 | |
| 1010 | |
| 1011 /*----------------------------------------------------------------------* | |
| 1012 * Classification using windowed correlation score * | |
| 1013 *----------------------------------------------------------------------*/ | |
| 1014 /*! | |
| 1015 * \brief jbClassifyCorrelation() | |
| 1016 * | |
| 1017 * \param[in] jbclasser | |
| 1018 * \param[in] boxa new components for classification | |
| 1019 * \param[in] pixas new components for classification | |
| 1020 * \return 0 if OK; 1 on error | |
| 1021 */ | |
| 1022 l_ok | |
| 1023 jbClassifyCorrelation(JBCLASSER *classer, | |
| 1024 BOXA *boxa, | |
| 1025 PIXA *pixas) | |
| 1026 { | |
| 1027 l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages, | |
| 1028 overthreshold; | |
| 1029 l_int32 *sumtab, *centtab; | |
| 1030 l_uint32 *row, word; | |
| 1031 l_float32 x1, y1, x2, y2, xsum, ysum; | |
| 1032 l_float32 thresh, weight, threshold; | |
| 1033 BOX *box; | |
| 1034 NUMA *naclass, *napage; | |
| 1035 NUMA *nafgt; /* fg area of all templates */ | |
| 1036 NUMA *naarea; /* w * h area of all templates */ | |
| 1037 JBFINDCTX *findcontext; | |
| 1038 L_DNAHASH *dahash; | |
| 1039 PIX *pix, *pix1, *pix2; | |
| 1040 PIXA *pixa, *pixa1, *pixat; | |
| 1041 PIXAA *pixaa; | |
| 1042 PTA *pta, *ptac, *ptact; | |
| 1043 l_int32 *pixcts; /* pixel counts of each pixa */ | |
| 1044 l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */ | |
| 1045 l_int32 x, y, rowcount, downcount, wpl; | |
| 1046 l_uint8 byte; | |
| 1047 | |
| 1048 if (!classer) | |
| 1049 return ERROR_INT("classer not found", __func__, 1); | |
| 1050 if (!boxa) | |
| 1051 return ERROR_INT("boxa not found", __func__, 1); | |
| 1052 if (!pixas) | |
| 1053 return ERROR_INT("pixas not found", __func__, 1); | |
| 1054 | |
| 1055 npages = classer->npages; | |
| 1056 | |
| 1057 /* Generate the bordered pixa, which contains all the the | |
| 1058 * input components. This will not be saved. */ | |
| 1059 if ((n = pixaGetCount(pixas)) == 0) { | |
| 1060 L_WARNING("pixas is empty\n", __func__); | |
| 1061 return 0; | |
| 1062 } | |
| 1063 pixa1 = pixaCreate(n); | |
| 1064 for (i = 0; i < n; i++) { | |
| 1065 pix = pixaGetPix(pixas, i, L_CLONE); | |
| 1066 pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, | |
| 1067 JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); | |
| 1068 pixaAddPix(pixa1, pix1, L_INSERT); | |
| 1069 pixDestroy(&pix); | |
| 1070 } | |
| 1071 | |
| 1072 /* Use these to save the class and page of each component. */ | |
| 1073 naclass = classer->naclass; | |
| 1074 napage = classer->napage; | |
| 1075 | |
| 1076 /* Get the number of fg pixels in each component. */ | |
| 1077 nafgt = classer->nafgt; /* holds fg areas of the templates */ | |
| 1078 sumtab = makePixelSumTab8(); | |
| 1079 | |
| 1080 pixcts = (l_int32 *)LEPT_CALLOC(n, sizeof(*pixcts)); | |
| 1081 pixrowcts = (l_int32 **)LEPT_CALLOC(n, sizeof(*pixrowcts)); | |
| 1082 centtab = makePixelCentroidTab8(); | |
| 1083 | |
| 1084 /* Count the "1" pixels in each row of the pix in pixa1; this | |
| 1085 * allows pixCorrelationScoreThresholded to abort early if a match | |
| 1086 * is impossible. This loop merges three calculations: the total | |
| 1087 * number of "1" pixels, the number of "1" pixels in each row, and | |
| 1088 * the centroid. The centroids are relative to the UL corner of | |
| 1089 * each (bordered) pix. The pixrowcts[i][y] are the total number | |
| 1090 * of fg pixels in pixa[i] below row y. */ | |
| 1091 pta = ptaCreate(n); | |
| 1092 for (i = 0; i < n; i++) { | |
| 1093 pix = pixaGetPix(pixa1, i, L_CLONE); | |
| 1094 pixrowcts[i] = (l_int32 *)LEPT_CALLOC(pixGetHeight(pix), | |
| 1095 sizeof(**pixrowcts)); | |
| 1096 xsum = 0; | |
| 1097 ysum = 0; | |
| 1098 wpl = pixGetWpl(pix); | |
| 1099 row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl; | |
| 1100 downcount = 0; | |
| 1101 for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) { | |
| 1102 pixrowcts[i][y] = downcount; | |
| 1103 rowcount = 0; | |
| 1104 for (x = 0; x < wpl; x++) { | |
| 1105 word = row[x]; | |
| 1106 byte = word & 0xff; | |
| 1107 rowcount += sumtab[byte]; | |
| 1108 xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte]; | |
| 1109 byte = (word >> 8) & 0xff; | |
| 1110 rowcount += sumtab[byte]; | |
| 1111 xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte]; | |
| 1112 byte = (word >> 16) & 0xff; | |
| 1113 rowcount += sumtab[byte]; | |
| 1114 xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte]; | |
| 1115 byte = (word >> 24) & 0xff; | |
| 1116 rowcount += sumtab[byte]; | |
| 1117 xsum += centtab[byte] + x * 32 * sumtab[byte]; | |
| 1118 } | |
| 1119 downcount += rowcount; | |
| 1120 ysum += rowcount * y; | |
| 1121 } | |
| 1122 pixcts[i] = downcount; | |
| 1123 if (downcount > 0) { | |
| 1124 ptaAddPt(pta, | |
| 1125 xsum / (l_float32)downcount, ysum / (l_float32)downcount); | |
| 1126 } else { /* no pixels; shouldn't happen */ | |
| 1127 L_ERROR("downcount == 0 !\n", __func__); | |
| 1128 ptaAddPt(pta, pixGetWidth(pix) / 2, pixGetHeight(pix) / 2); | |
| 1129 } | |
| 1130 pixDestroy(&pix); | |
| 1131 } | |
| 1132 | |
| 1133 ptac = classer->ptac; /* holds centroids of components up to this page */ | |
| 1134 ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */ | |
| 1135 ptact = classer->ptact; /* holds centroids of templates */ | |
| 1136 | |
| 1137 /* Store the unbordered pix in a pixaa, in a hierarchical | |
| 1138 * set of arrays. There is one pixa for each class, | |
| 1139 * and the pix in each pixa are all the instances found | |
| 1140 * of that class. This is actually more than one would need | |
| 1141 * for a jbig2 encoder, but there are two reasons to keep | |
| 1142 * them around: (1) the set of instances for each class | |
| 1143 * can be used to make an improved binary (or, better, | |
| 1144 * a grayscale) template, rather than simply using the first | |
| 1145 * one in the set; (2) we can investigate the failures | |
| 1146 * of the classifier. This pixaa grows as we process | |
| 1147 * successive pages. */ | |
| 1148 pixaa = classer->pixaa; | |
| 1149 | |
| 1150 /* Array to store class exemplars */ | |
| 1151 pixat = classer->pixat; | |
| 1152 | |
| 1153 /* Fill up the pixaa tree with the template exemplars as | |
| 1154 * the first pix in each pixa. As we add each pix, | |
| 1155 * we also add the associated box to the pixa. | |
| 1156 * We also keep track of the centroid of each pix, | |
| 1157 * and use the difference between centroids (of the | |
| 1158 * pix with the exemplar we are checking it with) | |
| 1159 * to align the two when checking that the correlation | |
| 1160 * score exceeds a threshold. The correlation score | |
| 1161 * is given by the square of the area of the AND | |
| 1162 * between aligned instance and template, divided by | |
| 1163 * the product of areas of each image. For identical | |
| 1164 * template and instance, the score is 1.0. | |
| 1165 * If the threshold is too small, non-equivalent instances | |
| 1166 * will be placed in the same class; if too large, there will | |
| 1167 * be an unnecessary division of classes representing the | |
| 1168 * same character. The weightfactor adds in some of the | |
| 1169 * difference (1.0 - thresh), depending on the heaviness | |
| 1170 * of the template (measured as the fraction of fg pixels). */ | |
| 1171 thresh = classer->thresh; | |
| 1172 weight = classer->weightfactor; | |
| 1173 naarea = classer->naarea; | |
| 1174 dahash = classer->dahash; | |
| 1175 for (i = 0; i < n; i++) { | |
| 1176 pix1 = pixaGetPix(pixa1, i, L_CLONE); | |
| 1177 area1 = pixcts[i]; | |
| 1178 ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */ | |
| 1179 nt = pixaGetCount(pixat); | |
| 1180 found = FALSE; | |
| 1181 findcontext = findSimilarSizedTemplatesInit(classer, pix1); | |
| 1182 while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { | |
| 1183 /* Get the template */ | |
| 1184 pix2 = pixaGetPix(pixat, iclass, L_CLONE); | |
| 1185 numaGetIValue(nafgt, iclass, &area2); | |
| 1186 ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */ | |
| 1187 | |
| 1188 /* Find threshold for this template */ | |
| 1189 if (weight > 0.0) { | |
| 1190 numaGetIValue(naarea, iclass, &area); | |
| 1191 threshold = thresh + (1.f - thresh) * weight * area2 / area; | |
| 1192 } else { | |
| 1193 threshold = thresh; | |
| 1194 } | |
| 1195 | |
| 1196 /* Find score for this template */ | |
| 1197 overthreshold = pixCorrelationScoreThresholded(pix1, pix2, | |
| 1198 area1, area2, x1 - x2, y1 - y2, | |
| 1199 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, | |
| 1200 sumtab, pixrowcts[i], threshold); | |
| 1201 #if DEBUG_CORRELATION_SCORE | |
| 1202 { | |
| 1203 l_float32 score, testscore; | |
| 1204 l_int32 count, testcount; | |
| 1205 pixCorrelationScore(pix1, pix2, area1, area2, x1 - x2, y1 - y2, | |
| 1206 MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, | |
| 1207 sumtab, &score); | |
| 1208 | |
| 1209 pixCorrelationScoreSimple(pix1, pix2, area1, area2, | |
| 1210 x1 - x2, y1 - y2, MAX_DIFF_WIDTH, | |
| 1211 MAX_DIFF_HEIGHT, sumtab, &testscore); | |
| 1212 count = (l_int32)rint(sqrt(score * area1 * area2)); | |
| 1213 testcount = (l_int32)rint(sqrt(testscore * area1 * area2)); | |
| 1214 if ((score >= threshold) != (testscore >= threshold)) { | |
| 1215 lept_stderr("Correlation score mismatch: " | |
| 1216 "%d(%g,%d) vs %d(%g,%d) (%g)\n", | |
| 1217 count, score, score >= threshold, | |
| 1218 testcount, testscore, testscore >= threshold, | |
| 1219 score - testscore); | |
| 1220 } | |
| 1221 | |
| 1222 if ((score >= threshold) != overthreshold) { | |
| 1223 lept_stderr("Mismatch between correlation/threshold " | |
| 1224 "comparison: %g(%g,%d) >= %g(%g) vs %s\n", | |
| 1225 score, score*area1*area2, count, threshold, | |
| 1226 threshold*area1*area2, | |
| 1227 (overthreshold ? "true" : "false")); | |
| 1228 } | |
| 1229 } | |
| 1230 #endif /* DEBUG_CORRELATION_SCORE */ | |
| 1231 pixDestroy(&pix2); | |
| 1232 | |
| 1233 if (overthreshold) { /* greedy match */ | |
| 1234 found = TRUE; | |
| 1235 numaAddNumber(naclass, (l_float32)iclass); | |
| 1236 numaAddNumber(napage, (l_float32)npages); | |
| 1237 if (classer->keep_pixaa) { | |
| 1238 /* We are keeping a record of all components */ | |
| 1239 pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); | |
| 1240 pix = pixaGetPix(pixas, i, L_CLONE); | |
| 1241 pixaAddPix(pixa, pix, L_INSERT); | |
| 1242 box = boxaGetBox(boxa, i, L_CLONE); | |
| 1243 pixaAddBox(pixa, box, L_INSERT); | |
| 1244 pixaDestroy(&pixa); | |
| 1245 } | |
| 1246 break; | |
| 1247 } | |
| 1248 } | |
| 1249 findSimilarSizedTemplatesDestroy(&findcontext); | |
| 1250 if (found == FALSE) { /* new class */ | |
| 1251 numaAddNumber(naclass, (l_float32)nt); | |
| 1252 numaAddNumber(napage, (l_float32)npages); | |
| 1253 pixa = pixaCreate(0); | |
| 1254 pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ | |
| 1255 pixaAddPix(pixa, pix, L_INSERT); | |
| 1256 wt = pixGetWidth(pix); | |
| 1257 ht = pixGetHeight(pix); | |
| 1258 l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); | |
| 1259 box = boxaGetBox(boxa, i, L_CLONE); | |
| 1260 pixaAddBox(pixa, box, L_INSERT); | |
| 1261 pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ | |
| 1262 ptaAddPt(ptact, x1, y1); | |
| 1263 numaAddNumber(nafgt, (l_float32)area1); | |
| 1264 pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ | |
| 1265 area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) * | |
| 1266 (pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS); | |
| 1267 numaAddNumber(naarea, (l_float32)area); | |
| 1268 } else { /* don't save it */ | |
| 1269 pixDestroy(&pix1); | |
| 1270 } | |
| 1271 } | |
| 1272 classer->nclass = pixaGetCount(pixat); | |
| 1273 | |
| 1274 LEPT_FREE(pixcts); | |
| 1275 LEPT_FREE(centtab); | |
| 1276 for (i = 0; i < n; i++) { | |
| 1277 LEPT_FREE(pixrowcts[i]); | |
| 1278 } | |
| 1279 LEPT_FREE(pixrowcts); | |
| 1280 | |
| 1281 LEPT_FREE(sumtab); | |
| 1282 ptaDestroy(&pta); | |
| 1283 pixaDestroy(&pixa1); | |
| 1284 return 0; | |
| 1285 } | |
| 1286 | |
| 1287 | |
| 1288 /*----------------------------------------------------------------------* | |
| 1289 * Determine the image components we start with * | |
| 1290 *----------------------------------------------------------------------*/ | |
| 1291 /*! | |
| 1292 * \brief jbGetComponents() | |
| 1293 * | |
| 1294 * \param[in] pixs 1 bpp | |
| 1295 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS | |
| 1296 * \param[in] maxwidth of saved components; larger are discarded | |
| 1297 * \param[in] maxheight of saved components; larger are discarded | |
| 1298 * \param[out] ppboxa b.b. of component items | |
| 1299 * \param[out] pppixa component items | |
| 1300 * \return 0 if OK, 1 on error | |
| 1301 */ | |
| 1302 l_ok | |
| 1303 jbGetComponents(PIX *pixs, | |
| 1304 l_int32 components, | |
| 1305 l_int32 maxwidth, | |
| 1306 l_int32 maxheight, | |
| 1307 BOXA **pboxad, | |
| 1308 PIXA **ppixad) | |
| 1309 { | |
| 1310 l_int32 empty, res, redfactor; | |
| 1311 BOXA *boxa; | |
| 1312 PIX *pix1, *pix2, *pix3; | |
| 1313 PIXA *pixa, *pixat; | |
| 1314 | |
| 1315 if (!pboxad) | |
| 1316 return ERROR_INT("&boxad not defined", __func__, 1); | |
| 1317 *pboxad = NULL; | |
| 1318 if (!ppixad) | |
| 1319 return ERROR_INT("&pixad not defined", __func__, 1); | |
| 1320 *ppixad = NULL; | |
| 1321 if (!pixs) | |
| 1322 return ERROR_INT("pixs not defined", __func__, 1); | |
| 1323 if (components != JB_CONN_COMPS && components != JB_CHARACTERS && | |
| 1324 components != JB_WORDS) | |
| 1325 return ERROR_INT("invalid components", __func__, 1); | |
| 1326 | |
| 1327 pixZero(pixs, &empty); | |
| 1328 if (empty) { | |
| 1329 *pboxad = boxaCreate(0); | |
| 1330 *ppixad = pixaCreate(0); | |
| 1331 return 0; | |
| 1332 } | |
| 1333 | |
| 1334 /* If required, preprocess input pixs. The method for both | |
| 1335 * characters and words is to generate a connected component | |
| 1336 * mask over the units that we want to aggregrate, which are, | |
| 1337 * in general, sets of related connected components in pixs. | |
| 1338 * For characters, we want to include the dots with | |
| 1339 * 'i', 'j' and '!', so we do a small vertical closing to | |
| 1340 * generate the mask. For words, we make a mask over all | |
| 1341 * characters in each word. This is a bit more tricky, because | |
| 1342 * the spacing between words is difficult to predict a priori, | |
| 1343 * and words can be typeset with variable spacing that can | |
| 1344 * in some cases be barely larger than the space between | |
| 1345 * characters. The first step is to generate the mask and | |
| 1346 * identify each of its connected components. */ | |
| 1347 if (components == JB_CONN_COMPS) { /* no preprocessing */ | |
| 1348 boxa = pixConnComp(pixs, &pixa, 8); | |
| 1349 } else if (components == JB_CHARACTERS) { | |
| 1350 pix1 = pixMorphSequence(pixs, "c1.6", 0); | |
| 1351 boxa = pixConnComp(pix1, &pixat, 8); | |
| 1352 pixa = pixaClipToPix(pixat, pixs); | |
| 1353 pixDestroy(&pix1); | |
| 1354 pixaDestroy(&pixat); | |
| 1355 } else { /* components == JB_WORDS */ | |
| 1356 | |
| 1357 /* Do the operations at about 150 ppi resolution. | |
| 1358 * It is much faster at 75 ppi, but the results are | |
| 1359 * more accurate at 150 ppi. This will segment the | |
| 1360 * words in body text. It can be expected that relatively | |
| 1361 * infrequent words in a larger font will be split. */ | |
| 1362 res = pixGetXRes(pixs); | |
| 1363 if (res <= 200) { | |
| 1364 redfactor = 1; | |
| 1365 pix1 = pixClone(pixs); | |
| 1366 } else if (res <= 400) { | |
| 1367 redfactor = 2; | |
| 1368 pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); | |
| 1369 } else { | |
| 1370 redfactor = 4; | |
| 1371 pix1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); | |
| 1372 } | |
| 1373 | |
| 1374 /* Estimate the word mask, at approximately 150 ppi. | |
| 1375 * This has both very large and very small components left in. */ | |
| 1376 pixWordMaskByDilation(pix1, &pix2, NULL, NULL); | |
| 1377 | |
| 1378 /* Expand the optimally dilated word mask to full res. */ | |
| 1379 pix3 = pixExpandReplicate(pix2, redfactor); | |
| 1380 | |
| 1381 /* Pull out the pixels in pixs corresponding to the mask | |
| 1382 * components in pix3. Note that above we used threshold | |
| 1383 * levels in the reduction of 1 to insure that the resulting | |
| 1384 * mask fully covers the input pixs. The downside of using | |
| 1385 * a threshold of 1 is that very close characters from adjacent | |
| 1386 * lines can be joined. But with a level of 2 or greater, | |
| 1387 * it is necessary to use a seedfill, followed by a pixOr(): | |
| 1388 * pixt4 = pixSeedfillBinary(NULL, pix3, pixs, 8); | |
| 1389 * pixOr(pix3, pix3, pixt4); | |
| 1390 * to insure that the mask coverage is complete over pixs. */ | |
| 1391 boxa = pixConnComp(pix3, &pixat, 4); | |
| 1392 pixa = pixaClipToPix(pixat, pixs); | |
| 1393 pixaDestroy(&pixat); | |
| 1394 pixDestroy(&pix1); | |
| 1395 pixDestroy(&pix2); | |
| 1396 pixDestroy(&pix3); | |
| 1397 } | |
| 1398 | |
| 1399 /* Remove large components, and save the results. */ | |
| 1400 *ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH, | |
| 1401 L_SELECT_IF_LTE, NULL); | |
| 1402 *pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH, | |
| 1403 L_SELECT_IF_LTE, NULL); | |
| 1404 pixaDestroy(&pixa); | |
| 1405 boxaDestroy(&boxa); | |
| 1406 | |
| 1407 return 0; | |
| 1408 } | |
| 1409 | |
| 1410 | |
| 1411 /*! | |
| 1412 * \brief pixWordMaskByDilation() | |
| 1413 * | |
| 1414 * \param[in] pixs 1 bpp; typ. at 75 to 150 ppi | |
| 1415 * \param[out] pmask [optional] dilated word mask | |
| 1416 * \param[out] psize [optional] size of good horizontal dilation | |
| 1417 * \param[out] pixadb [optional] debug: pixa of intermediate steps | |
| 1418 * \return 0 if OK, 1 on error | |
| 1419 * | |
| 1420 * <pre> | |
| 1421 * Notes: | |
| 1422 * (1) This gives an estimate of the word masks. See | |
| 1423 * pixWordBoxesByDilation() for further filtering of the word boxes. | |
| 1424 * (2) The resolution should be between 75 and 150 ppi, and the optimal | |
| 1425 * dilation will be between 3 and 10. | |
| 1426 * (3) A good size for dilating to get word masks is optionally returned. | |
| 1427 * (4) Typically, the number of c.c. reduced with each successive | |
| 1428 * dilation (stored in nadiff) decreases quickly to a minimum | |
| 1429 * (where the characters in a word are joined), and then | |
| 1430 * increases again as the smaller number of words are joined. | |
| 1431 * For the typical case, you can then look for this minimum | |
| 1432 * and dilate to get the word mask. However, there are many | |
| 1433 * cases where the function is not so simple. For example, if the | |
| 1434 * pix has been upscaled 2x, the nadiff function oscillates, with | |
| 1435 * every other value being zero! And for some images it tails | |
| 1436 * off without a clear minimum to indicate where to break. | |
| 1437 * So a more simple and robust method is to find the dilation | |
| 1438 * where the initial number of c.c. has been reduced by some | |
| 1439 * fraction (we use a 70% reduction). | |
| 1440 * </pre> | |
| 1441 */ | |
| 1442 l_ok | |
| 1443 pixWordMaskByDilation(PIX *pixs, | |
| 1444 PIX **ppixm, | |
| 1445 l_int32 *psize, | |
| 1446 PIXA *pixadb) | |
| 1447 { | |
| 1448 l_int32 i, n, ndil, maxdiff, diff, ibest; | |
| 1449 l_int32 check, count, total, xres; | |
| 1450 l_int32 ncc[13]; /* max dilation + 1 */ | |
| 1451 l_int32 *diffa; | |
| 1452 BOXA *boxa; | |
| 1453 NUMA *nacc, *nadiff; | |
| 1454 PIX *pix1, *pix2; | |
| 1455 | |
| 1456 if (ppixm) *ppixm = NULL; | |
| 1457 if (psize) *psize = 0; | |
| 1458 if (!pixs || pixGetDepth(pixs) != 1) | |
| 1459 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); | |
| 1460 if (!ppixm && !psize) | |
| 1461 return ERROR_INT("no output requested", __func__, 1); | |
| 1462 | |
| 1463 /* Find a good dilation to create the word mask, by successively | |
| 1464 * increasing dilation size and counting the connected components. */ | |
| 1465 pix1 = pixCopy(NULL, pixs); | |
| 1466 ndil = 12; /* appropriate for 75 to 150 ppi */ | |
| 1467 nacc = numaCreate(ndil + 1); | |
| 1468 nadiff = numaCreate(ndil + 1); | |
| 1469 for (i = 0; i <= ndil; i++) { | |
| 1470 if (i == 0) /* first one not dilated */ | |
| 1471 pix2 = pixCopy(NULL, pix1); | |
| 1472 else /* successive dilation by sel_2h */ | |
| 1473 pix2 = pixMorphSequence(pix1, "d2.1", 0); | |
| 1474 boxa = pixConnCompBB(pix2, 4); | |
| 1475 ncc[i] = boxaGetCount(boxa); | |
| 1476 numaAddNumber(nacc, (l_float32)ncc[i]); | |
| 1477 if (i == 0) total = ncc[0]; | |
| 1478 if (i > 0) { | |
| 1479 diff = ncc[i - 1] - ncc[i]; | |
| 1480 numaAddNumber(nadiff, (l_float32)diff); | |
| 1481 } | |
| 1482 pixDestroy(&pix1); | |
| 1483 pix1 = pix2; | |
| 1484 boxaDestroy(&boxa); | |
| 1485 } | |
| 1486 pixDestroy(&pix1); | |
| 1487 | |
| 1488 /* Find the dilation at which the c.c. count has reduced | |
| 1489 * to 30% of the initial value. Although 30% seems high, | |
| 1490 * it seems better to use this but add one to ibest. */ | |
| 1491 diffa = numaGetIArray(nadiff); | |
| 1492 n = numaGetCount(nadiff); | |
| 1493 maxdiff = 0; | |
| 1494 check = TRUE; | |
| 1495 ibest = 2; | |
| 1496 for (i = 1; i < n; i++) { | |
| 1497 numaGetIValue(nacc, i, &count); | |
| 1498 if (check && count < 0.3 * total) { | |
| 1499 ibest = i + 1; | |
| 1500 check = FALSE; | |
| 1501 } | |
| 1502 diff = diffa[i]; | |
| 1503 if (diff > maxdiff) | |
| 1504 maxdiff = diff; | |
| 1505 } | |
| 1506 LEPT_FREE(diffa); | |
| 1507 | |
| 1508 /* Add small compensation for higher resolution */ | |
| 1509 xres = pixGetXRes(pixs); | |
| 1510 if (xres == 0) xres = 150; | |
| 1511 if (xres > 110) ibest++; | |
| 1512 if (ibest < 2) { | |
| 1513 L_INFO("setting ibest to minimum allowed value of 2\n", __func__); | |
| 1514 ibest = 2; | |
| 1515 } | |
| 1516 | |
| 1517 if (pixadb) { | |
| 1518 lept_mkdir("lept/jb"); | |
| 1519 {NUMA *naseq; | |
| 1520 PIX *pix3, *pix4; | |
| 1521 L_INFO("Best dilation: %d\n", __func__, L_MAX(3, ibest + 1)); | |
| 1522 naseq = numaMakeSequence(1, 1, numaGetCount(nacc)); | |
| 1523 pix3 = gplotGeneralPix2(naseq, nacc, GPLOT_LINES, | |
| 1524 "/tmp/lept/jb/numcc", | |
| 1525 "Number of cc vs. horizontal dilation", | |
| 1526 "Sel horiz", "Number of cc"); | |
| 1527 pixaAddPix(pixadb, pix3, L_INSERT); | |
| 1528 numaDestroy(&naseq); | |
| 1529 naseq = numaMakeSequence(1, 1, numaGetCount(nadiff)); | |
| 1530 pix3 = gplotGeneralPix2(naseq, nadiff, GPLOT_LINES, | |
| 1531 "/tmp/lept/jb/diffcc", | |
| 1532 "Diff count of cc vs. horizontal dilation", | |
| 1533 "Sel horiz", "Diff in cc"); | |
| 1534 pixaAddPix(pixadb, pix3, L_INSERT); | |
| 1535 numaDestroy(&naseq); | |
| 1536 pix3 = pixCloseBrick(NULL, pixs, ibest + 1, 1); | |
| 1537 pix4 = pixScaleToSize(pix3, 600, 0); | |
| 1538 pixaAddPix(pixadb, pix4, L_INSERT); | |
| 1539 pixDestroy(&pix3); | |
| 1540 } | |
| 1541 } | |
| 1542 | |
| 1543 if (psize) *psize = ibest + 1; | |
| 1544 if (ppixm) | |
| 1545 *ppixm = pixCloseBrick(NULL, pixs, ibest + 1, 1); | |
| 1546 | |
| 1547 numaDestroy(&nacc); | |
| 1548 numaDestroy(&nadiff); | |
| 1549 return 0; | |
| 1550 } | |
| 1551 | |
| 1552 | |
| 1553 /*! | |
| 1554 * \brief pixWordBoxesByDilation() | |
| 1555 * | |
| 1556 * \param[in] pixs 1 bpp; typ. 75 - 200 ppi | |
| 1557 * \param[in] minwidth saved components; smaller are discarded | |
| 1558 * \param[in] minheight saved components; smaller are discarded | |
| 1559 * \param[in] maxwidth saved components; larger are discarded | |
| 1560 * \param[in] maxheight saved components; larger are discarded | |
| 1561 * \param[out] pboxa of dilated word mask | |
| 1562 * \param[out] psize [optional] size of good horizontal dilation | |
| 1563 * \param[out] pixadb [optional] debug: pixa of intermediate steps | |
| 1564 * \return 0 if OK, 1 on error | |
| 1565 * | |
| 1566 * <pre> | |
| 1567 * Notes: | |
| 1568 * (1) Returns a pruned set of word boxes. | |
| 1569 * (2) See pixWordMaskByDilation(). | |
| 1570 * </pre> | |
| 1571 */ | |
| 1572 l_ok | |
| 1573 pixWordBoxesByDilation(PIX *pixs, | |
| 1574 l_int32 minwidth, | |
| 1575 l_int32 minheight, | |
| 1576 l_int32 maxwidth, | |
| 1577 l_int32 maxheight, | |
| 1578 BOXA **pboxa, | |
| 1579 l_int32 *psize, | |
| 1580 PIXA *pixadb) | |
| 1581 { | |
| 1582 BOXA *boxa1, *boxa2; | |
| 1583 PIX *pix1, *pix2; | |
| 1584 | |
| 1585 if (psize) *psize = 0; | |
| 1586 if (!pixs || pixGetDepth(pixs) != 1) | |
| 1587 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); | |
| 1588 if (!pboxa) | |
| 1589 return ERROR_INT("&boxa not defined", __func__, 1); | |
| 1590 *pboxa = NULL; | |
| 1591 | |
| 1592 /* Make a first estimate of the word mask */ | |
| 1593 if (pixWordMaskByDilation(pixs, &pix1, psize, pixadb)) | |
| 1594 return ERROR_INT("pixWordMaskByDilation() failed", __func__, 1); | |
| 1595 | |
| 1596 /* Prune the word mask. Get the bounding boxes of the words. | |
| 1597 * Remove the small ones, which can be due to punctuation | |
| 1598 * that was not joined to a word. Also remove the large ones, | |
| 1599 * which are not likely to be words. */ | |
| 1600 boxa1 = pixConnComp(pix1, NULL, 8); | |
| 1601 boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH, | |
| 1602 L_SELECT_IF_GTE, NULL); | |
| 1603 *pboxa = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH, | |
| 1604 L_SELECT_IF_LTE, NULL); | |
| 1605 if (pixadb) { | |
| 1606 pix2 = pixUnpackBinary(pixs, 32, 1); | |
| 1607 pixRenderBoxaArb(pix2, boxa1, 2, 255, 0, 0); | |
| 1608 pixaAddPix(pixadb, pix2, L_INSERT); | |
| 1609 pix2 = pixUnpackBinary(pixs, 32, 1); | |
| 1610 pixRenderBoxaArb(pix2, boxa2, 2, 0, 255, 0); | |
| 1611 pixaAddPix(pixadb, pix2, L_INSERT); | |
| 1612 } | |
| 1613 boxaDestroy(&boxa1); | |
| 1614 boxaDestroy(&boxa2); | |
| 1615 pixDestroy(&pix1); | |
| 1616 return 0; | |
| 1617 } | |
| 1618 | |
| 1619 | |
| 1620 /*----------------------------------------------------------------------* | |
| 1621 * Build grayscale composites (templates) * | |
| 1622 *----------------------------------------------------------------------*/ | |
| 1623 /*! | |
| 1624 * \brief jbAccumulateComposites() | |
| 1625 * | |
| 1626 * \param[in] pixaa one pixa for each class | |
| 1627 * \param[out] ppna number of samples used to build each composite | |
| 1628 * \param[out] pptat centroids of bordered composites | |
| 1629 * \return pixad accumulated sum of samples in each class, or NULL on error | |
| 1630 * | |
| 1631 */ | |
| 1632 PIXA * | |
| 1633 jbAccumulateComposites(PIXAA *pixaa, | |
| 1634 NUMA **pna, | |
| 1635 PTA **pptat) | |
| 1636 { | |
| 1637 l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff; | |
| 1638 l_float32 x, y, xave, yave; | |
| 1639 NUMA *na; | |
| 1640 PIX *pix, *pixt1, *pixt2, *pixsum; | |
| 1641 PIXA *pixa, *pixad; | |
| 1642 PTA *ptat, *pta; | |
| 1643 | |
| 1644 if (!pptat) | |
| 1645 return (PIXA *)ERROR_PTR("&ptat not defined", __func__, NULL); | |
| 1646 *pptat = NULL; | |
| 1647 if (!pna) | |
| 1648 return (PIXA *)ERROR_PTR("&na not defined", __func__, NULL); | |
| 1649 *pna = NULL; | |
| 1650 if (!pixaa) | |
| 1651 return (PIXA *)ERROR_PTR("pixaa not defined", __func__, NULL); | |
| 1652 | |
| 1653 n = pixaaGetCount(pixaa, NULL); | |
| 1654 if ((ptat = ptaCreate(n)) == NULL) | |
| 1655 return (PIXA *)ERROR_PTR("ptat not made", __func__, NULL); | |
| 1656 *pptat = ptat; | |
| 1657 pixad = pixaCreate(n); | |
| 1658 na = numaCreate(n); | |
| 1659 *pna = na; | |
| 1660 | |
| 1661 for (i = 0; i < n; i++) { | |
| 1662 pixa = pixaaGetPixa(pixaa, i, L_CLONE); | |
| 1663 nt = pixaGetCount(pixa); | |
| 1664 numaAddNumber(na, nt); | |
| 1665 if (nt == 0) { | |
| 1666 L_WARNING("empty pixa found!\n", __func__); | |
| 1667 pixaDestroy(&pixa); | |
| 1668 continue; | |
| 1669 } | |
| 1670 pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh); | |
| 1671 pix = pixaGetPix(pixa, 0, L_CLONE); | |
| 1672 d = pixGetDepth(pix); | |
| 1673 pixDestroy(&pix); | |
| 1674 pixt1 = pixCreate(maxw, maxh, d); | |
| 1675 pixsum = pixInitAccumulate(maxw, maxh, 0); | |
| 1676 pta = pixaCentroids(pixa); | |
| 1677 | |
| 1678 /* Find the average value of the centroids ... */ | |
| 1679 xave = yave = 0; | |
| 1680 for (j = 0; j < nt; j++) { | |
| 1681 ptaGetPt(pta, j, &x, &y); | |
| 1682 xave += x; | |
| 1683 yave += y; | |
| 1684 } | |
| 1685 xave = xave / (l_float32)nt; | |
| 1686 yave = yave / (l_float32)nt; | |
| 1687 | |
| 1688 /* and place all centroids at their average value */ | |
| 1689 for (j = 0; j < nt; j++) { | |
| 1690 pixt2 = pixaGetPix(pixa, j, L_CLONE); | |
| 1691 ptaGetPt(pta, j, &x, &y); | |
| 1692 xdiff = (l_int32)(x - xave); | |
| 1693 ydiff = (l_int32)(y - yave); | |
| 1694 pixClearAll(pixt1); | |
| 1695 pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC, | |
| 1696 pixt2, 0, 0); | |
| 1697 pixAccumulate(pixsum, pixt1, L_ARITH_ADD); | |
| 1698 pixDestroy(&pixt2); | |
| 1699 } | |
| 1700 pixaAddPix(pixad, pixsum, L_INSERT); | |
| 1701 ptaAddPt(ptat, xave, yave); | |
| 1702 | |
| 1703 pixaDestroy(&pixa); | |
| 1704 pixDestroy(&pixt1); | |
| 1705 ptaDestroy(&pta); | |
| 1706 } | |
| 1707 | |
| 1708 return pixad; | |
| 1709 } | |
| 1710 | |
| 1711 | |
| 1712 /*! | |
| 1713 * \brief jbTemplatesFromComposites() | |
| 1714 * | |
| 1715 * \param[in] pixac one pix of composites for each class | |
| 1716 * \param[in] na number of samples used for each class composite | |
| 1717 * \return pixad 8 bpp templates for each class, or NULL on error | |
| 1718 * | |
| 1719 */ | |
| 1720 PIXA * | |
| 1721 jbTemplatesFromComposites(PIXA *pixac, | |
| 1722 NUMA *na) | |
| 1723 { | |
| 1724 l_int32 n, i; | |
| 1725 l_float32 nt; /* number of samples in the composite; always an integer */ | |
| 1726 l_float32 factor; | |
| 1727 PIX *pixsum; /* accumulated composite */ | |
| 1728 PIX *pixd; | |
| 1729 PIXA *pixad; | |
| 1730 | |
| 1731 if (!pixac) | |
| 1732 return (PIXA *)ERROR_PTR("pixac not defined", __func__, NULL); | |
| 1733 if (!na) | |
| 1734 return (PIXA *)ERROR_PTR("na not defined", __func__, NULL); | |
| 1735 | |
| 1736 n = pixaGetCount(pixac); | |
| 1737 pixad = pixaCreate(n); | |
| 1738 for (i = 0; i < n; i++) { | |
| 1739 pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */ | |
| 1740 numaGetFValue(na, i, &nt); | |
| 1741 factor = 255.f / nt; | |
| 1742 pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */ | |
| 1743 pixd = pixFinalAccumulate(pixsum, 0, 8); | |
| 1744 pixaAddPix(pixad, pixd, L_INSERT); | |
| 1745 pixDestroy(&pixsum); | |
| 1746 } | |
| 1747 | |
| 1748 return pixad; | |
| 1749 } | |
| 1750 | |
| 1751 | |
| 1752 | |
| 1753 /*----------------------------------------------------------------------* | |
| 1754 * jbig2 utility routines * | |
| 1755 *----------------------------------------------------------------------*/ | |
| 1756 /*! | |
| 1757 * \brief jbClasserCreate() | |
| 1758 * | |
| 1759 * \param[in] method JB_RANKHAUS, JB_CORRELATION | |
| 1760 * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS | |
| 1761 * \return jbclasser, or NULL on error | |
| 1762 */ | |
| 1763 JBCLASSER * | |
| 1764 jbClasserCreate(l_int32 method, | |
| 1765 l_int32 components) | |
| 1766 { | |
| 1767 JBCLASSER *classer; | |
| 1768 | |
| 1769 if (method != JB_RANKHAUS && method != JB_CORRELATION) | |
| 1770 return (JBCLASSER *)ERROR_PTR("invalid method", __func__, NULL); | |
| 1771 if (components != JB_CONN_COMPS && components != JB_CHARACTERS && | |
| 1772 components != JB_WORDS) | |
| 1773 return (JBCLASSER *)ERROR_PTR("invalid component", __func__, NULL); | |
| 1774 | |
| 1775 classer = (JBCLASSER *)LEPT_CALLOC(1, sizeof(JBCLASSER)); | |
| 1776 classer->method = method; | |
| 1777 classer->components = components; | |
| 1778 classer->nacomps = numaCreate(0); | |
| 1779 classer->pixaa = pixaaCreate(0); | |
| 1780 classer->pixat = pixaCreate(0); | |
| 1781 classer->pixatd = pixaCreate(0); | |
| 1782 classer->nafgt = numaCreate(0); | |
| 1783 classer->naarea = numaCreate(0); | |
| 1784 classer->ptac = ptaCreate(0); | |
| 1785 classer->ptact = ptaCreate(0); | |
| 1786 classer->naclass = numaCreate(0); | |
| 1787 classer->napage = numaCreate(0); | |
| 1788 classer->ptaul = ptaCreate(0); | |
| 1789 return classer; | |
| 1790 } | |
| 1791 | |
| 1792 | |
| 1793 /* | |
| 1794 * \brief jbClasserDestroy() | |
| 1795 * | |
| 1796 * \param[in,out] pclasser will be set to null before returning | |
| 1797 * \return void | |
| 1798 */ | |
| 1799 void | |
| 1800 jbClasserDestroy(JBCLASSER **pclasser) | |
| 1801 { | |
| 1802 JBCLASSER *classer; | |
| 1803 | |
| 1804 if (!pclasser) | |
| 1805 return; | |
| 1806 if ((classer = *pclasser) == NULL) | |
| 1807 return; | |
| 1808 | |
| 1809 sarrayDestroy(&classer->safiles); | |
| 1810 numaDestroy(&classer->nacomps); | |
| 1811 pixaaDestroy(&classer->pixaa); | |
| 1812 pixaDestroy(&classer->pixat); | |
| 1813 pixaDestroy(&classer->pixatd); | |
| 1814 l_dnaHashDestroy(&classer->dahash); | |
| 1815 numaDestroy(&classer->nafgt); | |
| 1816 numaDestroy(&classer->naarea); | |
| 1817 ptaDestroy(&classer->ptac); | |
| 1818 ptaDestroy(&classer->ptact); | |
| 1819 numaDestroy(&classer->naclass); | |
| 1820 numaDestroy(&classer->napage); | |
| 1821 ptaDestroy(&classer->ptaul); | |
| 1822 ptaDestroy(&classer->ptall); | |
| 1823 LEPT_FREE(classer); | |
| 1824 *pclasser = NULL; | |
| 1825 } | |
| 1826 | |
| 1827 | |
| 1828 /*! | |
| 1829 * \brief jbDataSave() | |
| 1830 * | |
| 1831 * \param[in] jbclasser | |
| 1832 * \param[in] latticew cell width used to store each connected | |
| 1833 * component in the composite | |
| 1834 * \param[in] latticeh ditto for cell height | |
| 1835 * \return jbdata, or NULL on error | |
| 1836 * | |
| 1837 * <pre> | |
| 1838 * Notes: | |
| 1839 * (1) This routine stores the jbig2-type data required for | |
| 1840 * generating a lossy jbig2 version of the image. | |
| 1841 * It can be losslessly written to (and read from) two files. | |
| 1842 * (2) It generates and stores the mosaic of templates. | |
| 1843 * (3) It clones the Numa and Pta arrays, so these must all | |
| 1844 * be destroyed by the caller. | |
| 1845 * (4) Input 0 to use the default values for latticew and/or latticeh, | |
| 1846 * </pre> | |
| 1847 */ | |
| 1848 JBDATA * | |
| 1849 jbDataSave(JBCLASSER *classer) | |
| 1850 { | |
| 1851 l_int32 maxw, maxh; | |
| 1852 JBDATA *data; | |
| 1853 PIX *pix; | |
| 1854 | |
| 1855 if (!classer) | |
| 1856 return (JBDATA *)ERROR_PTR("classer not defined", __func__, NULL); | |
| 1857 | |
| 1858 /* Write the templates into an array. */ | |
| 1859 pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh); | |
| 1860 pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1, | |
| 1861 NULL, NULL); | |
| 1862 if (!pix) | |
| 1863 return (JBDATA *)ERROR_PTR("data not made", __func__, NULL); | |
| 1864 | |
| 1865 data = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA)); | |
| 1866 data->pix = pix; | |
| 1867 data->npages = classer->npages; | |
| 1868 data->w = classer->w; | |
| 1869 data->h = classer->h; | |
| 1870 data->nclass = classer->nclass; | |
| 1871 data->latticew = maxw + 1; | |
| 1872 data->latticeh = maxh + 1; | |
| 1873 data->naclass = numaClone(classer->naclass); | |
| 1874 data->napage = numaClone(classer->napage); | |
| 1875 data->ptaul = ptaClone(classer->ptaul); | |
| 1876 return data; | |
| 1877 } | |
| 1878 | |
| 1879 | |
| 1880 /* | |
| 1881 * \brief jbDataDestroy() | |
| 1882 * | |
| 1883 * \param[in,out] pdata will be set to null before returning | |
| 1884 * \return void | |
| 1885 */ | |
| 1886 void | |
| 1887 jbDataDestroy(JBDATA **pdata) | |
| 1888 { | |
| 1889 JBDATA *data; | |
| 1890 | |
| 1891 if (!pdata) | |
| 1892 return; | |
| 1893 if ((data = *pdata) == NULL) | |
| 1894 return; | |
| 1895 | |
| 1896 pixDestroy(&data->pix); | |
| 1897 numaDestroy(&data->naclass); | |
| 1898 numaDestroy(&data->napage); | |
| 1899 ptaDestroy(&data->ptaul); | |
| 1900 LEPT_FREE(data); | |
| 1901 *pdata = NULL; | |
| 1902 } | |
| 1903 | |
| 1904 | |
| 1905 /*! | |
| 1906 * \brief jbDataWrite() | |
| 1907 * | |
| 1908 * \param[in] rootname for output files; everything but the extension | |
| 1909 * \param[in] jbdata | |
| 1910 * \return 0 if OK, 1 on error | |
| 1911 * | |
| 1912 * <pre> | |
| 1913 * Notes: | |
| 1914 * (1) Serialization function that writes data in jbdata to file. | |
| 1915 * </pre> | |
| 1916 */ | |
| 1917 l_ok | |
| 1918 jbDataWrite(const char *rootout, | |
| 1919 JBDATA *jbdata) | |
| 1920 { | |
| 1921 char buf[L_BUF_SIZE]; | |
| 1922 l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage; | |
| 1923 NUMA *naclass, *napage; | |
| 1924 PTA *ptaul; | |
| 1925 PIX *pixt; | |
| 1926 FILE *fp; | |
| 1927 | |
| 1928 if (!rootout) | |
| 1929 return ERROR_INT("no rootout", __func__, 1); | |
| 1930 if (!jbdata) | |
| 1931 return ERROR_INT("no jbdata", __func__, 1); | |
| 1932 | |
| 1933 npages = jbdata->npages; | |
| 1934 w = jbdata->w; | |
| 1935 h = jbdata->h; | |
| 1936 pixt = jbdata->pix; | |
| 1937 nclass = jbdata->nclass; | |
| 1938 cellw = jbdata->latticew; | |
| 1939 cellh = jbdata->latticeh; | |
| 1940 naclass = jbdata->naclass; | |
| 1941 napage = jbdata->napage; | |
| 1942 ptaul = jbdata->ptaul; | |
| 1943 | |
| 1944 snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT); | |
| 1945 pixWrite(buf, pixt, IFF_PNG); | |
| 1946 | |
| 1947 snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT); | |
| 1948 if ((fp = fopenWriteStream(buf, "wb")) == NULL) | |
| 1949 return ERROR_INT_1("stream not opened", buf, __func__, 1); | |
| 1950 ncomp = ptaGetCount(ptaul); | |
| 1951 fprintf(fp, "jb data file\n"); | |
| 1952 fprintf(fp, "num pages = %d\n", npages); | |
| 1953 fprintf(fp, "page size: w = %d, h = %d\n", w, h); | |
| 1954 fprintf(fp, "num components = %d\n", ncomp); | |
| 1955 fprintf(fp, "num classes = %d\n", nclass); | |
| 1956 fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh); | |
| 1957 for (i = 0; i < ncomp; i++) { | |
| 1958 numaGetIValue(napage, i, &ipage); | |
| 1959 numaGetIValue(naclass, i, &iclass); | |
| 1960 ptaGetIPt(ptaul, i, &x, &y); | |
| 1961 fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y); | |
| 1962 } | |
| 1963 fclose(fp); | |
| 1964 | |
| 1965 return 0; | |
| 1966 } | |
| 1967 | |
| 1968 | |
| 1969 /*! | |
| 1970 * \brief jbDataRead() | |
| 1971 * | |
| 1972 * \param[in] rootname for template and data files | |
| 1973 * \return jbdata, or NULL on error | |
| 1974 */ | |
| 1975 JBDATA * | |
| 1976 jbDataRead(const char *rootname) | |
| 1977 { | |
| 1978 char fname[L_BUF_SIZE]; | |
| 1979 char *linestr; | |
| 1980 l_uint8 *data; | |
| 1981 l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage; | |
| 1982 l_int32 npages, nclass, ncomp, ninit; | |
| 1983 size_t size; | |
| 1984 JBDATA *jbdata; | |
| 1985 NUMA *naclass, *napage; | |
| 1986 PIX *pixs; | |
| 1987 PTA *ptaul; | |
| 1988 SARRAY *sa; | |
| 1989 | |
| 1990 if (!rootname) | |
| 1991 return (JBDATA *)ERROR_PTR("rootname not defined", __func__, NULL); | |
| 1992 | |
| 1993 snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT); | |
| 1994 if ((pixs = pixRead(fname)) == NULL) | |
| 1995 return (JBDATA *)ERROR_PTR("pix not read", __func__, NULL); | |
| 1996 | |
| 1997 snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT); | |
| 1998 if ((data = l_binaryRead(fname, &size)) == NULL) { | |
| 1999 pixDestroy(&pixs); | |
| 2000 return (JBDATA *)ERROR_PTR("data not read", __func__, NULL); | |
| 2001 } | |
| 2002 | |
| 2003 if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) { | |
| 2004 pixDestroy(&pixs); | |
| 2005 LEPT_FREE(data); | |
| 2006 return (JBDATA *)ERROR_PTR("sa not made", __func__, NULL); | |
| 2007 } | |
| 2008 nsa = sarrayGetCount(sa); /* number of cc + 6 */ | |
| 2009 linestr = sarrayGetString(sa, 0, L_NOCOPY); | |
| 2010 if (strcmp(linestr, "jb data file") != 0) { | |
| 2011 pixDestroy(&pixs); | |
| 2012 LEPT_FREE(data); | |
| 2013 sarrayDestroy(&sa); | |
| 2014 return (JBDATA *)ERROR_PTR("invalid jb data file", __func__, NULL); | |
| 2015 } | |
| 2016 linestr = sarrayGetString(sa, 1, L_NOCOPY); | |
| 2017 sscanf(linestr, "num pages = %d", &npages); | |
| 2018 linestr = sarrayGetString(sa, 2, L_NOCOPY); | |
| 2019 sscanf(linestr, "page size: w = %d, h = %d", &w, &h); | |
| 2020 linestr = sarrayGetString(sa, 3, L_NOCOPY); | |
| 2021 sscanf(linestr, "num components = %d", &ncomp); | |
| 2022 linestr = sarrayGetString(sa, 4, L_NOCOPY); | |
| 2023 sscanf(linestr, "num classes = %d\n", &nclass); | |
| 2024 linestr = sarrayGetString(sa, 5, L_NOCOPY); | |
| 2025 sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh); | |
| 2026 | |
| 2027 #if 1 | |
| 2028 lept_stderr("num pages = %d\n", npages); | |
| 2029 lept_stderr("page size: w = %d, h = %d\n", w, h); | |
| 2030 lept_stderr("num components = %d\n", ncomp); | |
| 2031 lept_stderr("num classes = %d\n", nclass); | |
| 2032 lept_stderr("template lattice size: w = %d, h = %d\n", cellw, cellh); | |
| 2033 #endif | |
| 2034 | |
| 2035 ninit = ncomp; | |
| 2036 if (ncomp > 1000000) { /* fuzz protection */ | |
| 2037 L_WARNING("ncomp > 1M\n", __func__); | |
| 2038 ninit = 1000000; | |
| 2039 } | |
| 2040 naclass = numaCreate(ninit); | |
| 2041 napage = numaCreate(ninit); | |
| 2042 ptaul = ptaCreate(ninit); | |
| 2043 for (i = 6; i < nsa; i++) { | |
| 2044 linestr = sarrayGetString(sa, i, L_NOCOPY); | |
| 2045 sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y); | |
| 2046 numaAddNumber(napage, (l_float32)ipage); | |
| 2047 numaAddNumber(naclass, (l_float32)iclass); | |
| 2048 ptaAddPt(ptaul, (l_float32)x, (l_float32)y); | |
| 2049 } | |
| 2050 | |
| 2051 jbdata = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA)); | |
| 2052 jbdata->pix = pixs; | |
| 2053 jbdata->npages = npages; | |
| 2054 jbdata->w = w; | |
| 2055 jbdata->h = h; | |
| 2056 jbdata->nclass = nclass; | |
| 2057 jbdata->latticew = cellw; | |
| 2058 jbdata->latticeh = cellh; | |
| 2059 jbdata->naclass = naclass; | |
| 2060 jbdata->napage = napage; | |
| 2061 jbdata->ptaul = ptaul; | |
| 2062 | |
| 2063 LEPT_FREE(data); | |
| 2064 sarrayDestroy(&sa); | |
| 2065 return jbdata; | |
| 2066 } | |
| 2067 | |
| 2068 | |
| 2069 /*! | |
| 2070 * \brief jbDataRender() | |
| 2071 * | |
| 2072 * \param[in] jbdata | |
| 2073 * \param[in] debugflag if TRUE, writes into 2 bpp pix and adds | |
| 2074 * component outlines in color | |
| 2075 * \return pixa reconstruction of original images, using templates or | |
| 2076 * NULL on error | |
| 2077 */ | |
| 2078 PIXA * | |
| 2079 jbDataRender(JBDATA *data, | |
| 2080 l_int32 debugflag) | |
| 2081 { | |
| 2082 l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage; | |
| 2083 l_int32 npages, nclass, ncomp, wp, hp; | |
| 2084 BOX *box; | |
| 2085 BOXA *boxa; | |
| 2086 BOXAA *baa; | |
| 2087 NUMA *naclass, *napage; | |
| 2088 PIX *pixt, *pix1, *pix2, *pixd; | |
| 2089 PIXA *pixat; /* pixa of templates */ | |
| 2090 PIXA *pixad; /* pixa of output images */ | |
| 2091 PIXCMAP *cmap; | |
| 2092 PTA *ptaul; | |
| 2093 | |
| 2094 if (!data) | |
| 2095 return (PIXA *)ERROR_PTR("data not defined", __func__, NULL); | |
| 2096 | |
| 2097 npages = data->npages; | |
| 2098 w = data->w; | |
| 2099 h = data->h; | |
| 2100 pixt = data->pix; | |
| 2101 nclass = data->nclass; | |
| 2102 cellw = data->latticew; | |
| 2103 cellh = data->latticeh; | |
| 2104 naclass = data->naclass; | |
| 2105 napage = data->napage; | |
| 2106 ptaul = data->ptaul; | |
| 2107 ncomp = numaGetCount(naclass); | |
| 2108 | |
| 2109 /* Reconstruct the original set of images from the templates | |
| 2110 * and the data associated with each component. First, | |
| 2111 * generate the output pixa as a set of empty pix. For debug, | |
| 2112 * where the bounding boxes of each component will be displayed | |
| 2113 * in red, use 2 bpp colormapped output pix. */ | |
| 2114 if ((pixad = pixaCreate(npages)) == NULL) | |
| 2115 return (PIXA *)ERROR_PTR("pixad not made", __func__, NULL); | |
| 2116 for (i = 0; i < npages; i++) { | |
| 2117 if (debugflag == FALSE) { | |
| 2118 pix1 = pixCreate(w, h, 1); | |
| 2119 } else { | |
| 2120 pix1 = pixCreate(w, h, 2); | |
| 2121 cmap = pixcmapCreate(2); | |
| 2122 pixcmapAddColor(cmap, 255, 255, 255); | |
| 2123 pixcmapAddColor(cmap, 0, 0, 0); | |
| 2124 pixSetColormap(pix1, cmap); | |
| 2125 } | |
| 2126 pixaAddPix(pixad, pix1, L_INSERT); | |
| 2127 } | |
| 2128 | |
| 2129 /* Put the class templates into a pixa. */ | |
| 2130 if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) { | |
| 2131 pixaDestroy(&pixad); | |
| 2132 return (PIXA *)ERROR_PTR("pixat not made", __func__, NULL); | |
| 2133 } | |
| 2134 | |
| 2135 /* Place each component in the right location on its page. | |
| 2136 * For debug, first generate the boxa of component bounding | |
| 2137 * boxes for each page, and save the results in a boxaa. | |
| 2138 * Nota bene. In general we cannot use rasterop on colormap | |
| 2139 * indices with operations like PIX_SRC | PIX_DST. So we must | |
| 2140 * do the rasterop of image components first, and then paint | |
| 2141 * the component bounding boxes later. We can use rasterop | |
| 2142 * on the 2 bpp pix here because the colormap has only two | |
| 2143 * index values, 0 and 1, * so doing a bit-or between pixels | |
| 2144 * only affects the lower-order bit and does not generate | |
| 2145 * spurious colormap indices. */ | |
| 2146 if (debugflag == TRUE) { | |
| 2147 baa = boxaaCreate(npages); | |
| 2148 boxa = boxaCreate(0); | |
| 2149 boxaaInitFull(baa, boxa); | |
| 2150 boxaDestroy(&boxa); | |
| 2151 } | |
| 2152 for (i = 0; i < ncomp; i++) { | |
| 2153 numaGetIValue(napage, i, &ipage); | |
| 2154 numaGetIValue(naclass, i, &iclass); | |
| 2155 pix1 = pixaGetPix(pixat, iclass, L_CLONE); /* the template */ | |
| 2156 wp = pixGetWidth(pix1); | |
| 2157 hp = pixGetHeight(pix1); | |
| 2158 ptaGetIPt(ptaul, i, &x, &y); | |
| 2159 pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */ | |
| 2160 if (debugflag == FALSE) { | |
| 2161 pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix1, 0, 0); | |
| 2162 } else { | |
| 2163 pix2 = pixConvert1To2Cmap(pix1); | |
| 2164 pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix2, 0, 0); | |
| 2165 boxa = boxaaGetBoxa(baa, ipage, L_CLONE); | |
| 2166 box = boxCreate(x, y, wp, hp); | |
| 2167 boxaAddBox(boxa, box, L_INSERT); | |
| 2168 boxaDestroy(&boxa); /* clone */ | |
| 2169 pixDestroy(&pix2); /* clone */ | |
| 2170 } | |
| 2171 pixDestroy(&pix1); /* clone */ | |
| 2172 pixDestroy(&pixd); /* clone */ | |
| 2173 } | |
| 2174 | |
| 2175 /* For debug, for each page image, render the box outlines in red. | |
| 2176 * This adds a red colormap entry to each page. */ | |
| 2177 if (debugflag == TRUE) { | |
| 2178 for (i = 0; i < npages; i++) { | |
| 2179 pixd = pixaGetPix(pixad, i, L_CLONE); | |
| 2180 boxa = boxaaGetBoxa(baa, i, L_CLONE); | |
| 2181 pixRenderBoxaArb(pixd, boxa, 1, 255, 0, 0); | |
| 2182 pixDestroy(&pixd); /* clone */ | |
| 2183 boxaDestroy(&boxa); /* clone */ | |
| 2184 } | |
| 2185 boxaaDestroy(&baa); | |
| 2186 } | |
| 2187 | |
| 2188 pixaDestroy(&pixat); | |
| 2189 return pixad; | |
| 2190 } | |
| 2191 | |
| 2192 | |
| 2193 /*! | |
| 2194 * \brief jbGetULCorners() | |
| 2195 * | |
| 2196 * \param[in] jbclasser | |
| 2197 * \param[in] pixs full res image | |
| 2198 * \param[in] boxa of c.c. bounding rectangles for this page | |
| 2199 * \return 0 if OK, 1 on error | |
| 2200 * | |
| 2201 * <pre> | |
| 2202 * Notes: | |
| 2203 * (1) This computes the ptaul field, which has the global UL corners, | |
| 2204 * adjusted for each specific component, so that each component | |
| 2205 * can be replaced by the template for its class and have the | |
| 2206 * centroid in the template in the same position as the | |
| 2207 * centroid of the original connected component. It is important | |
| 2208 * that this be done properly to avoid a wavy baseline in the | |
| 2209 * result. | |
| 2210 * (2) The array fields ptac and ptact give the centroids of | |
| 2211 * those components relative to the UL corner of each component. | |
| 2212 * Here, we compute the difference in each component, round to | |
| 2213 * nearest integer, and correct the box->x and box->y by | |
| 2214 * the appropriate integral difference. | |
| 2215 * (3) The templates and stored instances are all bordered. | |
| 2216 * </pre> | |
| 2217 */ | |
| 2218 l_ok | |
| 2219 jbGetULCorners(JBCLASSER *classer, | |
| 2220 PIX *pixs, | |
| 2221 BOXA *boxa) | |
| 2222 { | |
| 2223 l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy; | |
| 2224 l_int32 *sumtab; | |
| 2225 l_float32 x1, x2, y1, y2, delx, dely; | |
| 2226 BOX *box; | |
| 2227 NUMA *naclass; | |
| 2228 PIX *pixt; | |
| 2229 PTA *ptac, *ptact, *ptaul; | |
| 2230 | |
| 2231 if (!classer) | |
| 2232 return ERROR_INT("classer not defined", __func__, 1); | |
| 2233 if (!pixs) | |
| 2234 return ERROR_INT("pixs not defined", __func__, 1); | |
| 2235 if (!boxa) | |
| 2236 return ERROR_INT("boxa not defined", __func__, 1); | |
| 2237 | |
| 2238 n = boxaGetCount(boxa); | |
| 2239 ptaul = classer->ptaul; | |
| 2240 naclass = classer->naclass; | |
| 2241 ptac = classer->ptac; | |
| 2242 ptact = classer->ptact; | |
| 2243 baseindex = classer->baseindex; /* num components before this page */ | |
| 2244 sumtab = makePixelSumTab8(); | |
| 2245 for (i = 0; i < n; i++) { | |
| 2246 index = baseindex + i; | |
| 2247 ptaGetPt(ptac, index, &x1, &y1); | |
| 2248 numaGetIValue(naclass, index, &iclass); | |
| 2249 ptaGetPt(ptact, iclass, &x2, &y2); | |
| 2250 delx = x2 - x1; | |
| 2251 dely = y2 - y1; | |
| 2252 if (delx >= 0) | |
| 2253 idelx = (l_int32)(delx + 0.5); | |
| 2254 else | |
| 2255 idelx = (l_int32)(delx - 0.5); | |
| 2256 if (dely >= 0) | |
| 2257 idely = (l_int32)(dely + 0.5); | |
| 2258 else | |
| 2259 idely = (l_int32)(dely - 0.5); | |
| 2260 if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) { | |
| 2261 LEPT_FREE(sumtab); | |
| 2262 return ERROR_INT("box not found", __func__, 1); | |
| 2263 } | |
| 2264 boxGetGeometry(box, &x, &y, NULL, NULL); | |
| 2265 | |
| 2266 /* Get final increments dx and dy for best alignment */ | |
| 2267 pixt = pixaGetPix(classer->pixat, iclass, L_CLONE); | |
| 2268 finalPositioningForAlignment(pixs, x, y, idelx, idely, | |
| 2269 pixt, sumtab, &dx, &dy); | |
| 2270 /* if (i % 20 == 0) | |
| 2271 lept_stderr("dx = %d, dy = %d\n", dx, dy); */ | |
| 2272 ptaAddPt(ptaul, (l_float32)(x - idelx + dx), (l_float32)(y - idely + dy)); | |
| 2273 boxDestroy(&box); | |
| 2274 pixDestroy(&pixt); | |
| 2275 } | |
| 2276 | |
| 2277 LEPT_FREE(sumtab); | |
| 2278 return 0; | |
| 2279 } | |
| 2280 | |
| 2281 | |
| 2282 /*! | |
| 2283 * \brief jbGetLLCorners() | |
| 2284 * | |
| 2285 * \param[in] jbclasser | |
| 2286 * \return 0 if OK, 1 on error | |
| 2287 * | |
| 2288 * <pre> | |
| 2289 * Notes: | |
| 2290 * (1) This computes the ptall field, which has the global LL corners, | |
| 2291 * adjusted for each specific component, so that each component | |
| 2292 * can be replaced by the template for its class and have the | |
| 2293 * centroid in the template in the same position as the | |
| 2294 * centroid of the original connected component. It is important | |
| 2295 * that this be done properly to avoid a wavy baseline in the result. | |
| 2296 * (2) It is computed here from the corresponding UL corners, where | |
| 2297 * the input templates and stored instances are all bordered. | |
| 2298 * This should be done after all pages have been processed. | |
| 2299 * (3) For proper substitution, the templates whose LL corners are | |
| 2300 * placed in these locations must be UN-bordered. | |
| 2301 * This is available for a realistic jbig2 encoder, which would | |
| 2302 * (1) encode each template without a border, and (2) encode | |
| 2303 * the position using the LL corner (rather than the UL | |
| 2304 * corner) because the difference between y-values | |
| 2305 * of successive instances is typically close to zero. | |
| 2306 * </pre> | |
| 2307 */ | |
| 2308 l_ok | |
| 2309 jbGetLLCorners(JBCLASSER *classer) | |
| 2310 { | |
| 2311 l_int32 i, iclass, n, x1, y1, h; | |
| 2312 NUMA *naclass; | |
| 2313 PIX *pix; | |
| 2314 PIXA *pixat; | |
| 2315 PTA *ptaul, *ptall; | |
| 2316 | |
| 2317 if (!classer) | |
| 2318 return ERROR_INT("classer not defined", __func__, 1); | |
| 2319 | |
| 2320 ptaul = classer->ptaul; | |
| 2321 naclass = classer->naclass; | |
| 2322 pixat = classer->pixat; | |
| 2323 | |
| 2324 ptaDestroy(&classer->ptall); | |
| 2325 n = ptaGetCount(ptaul); | |
| 2326 ptall = ptaCreate(n); | |
| 2327 classer->ptall = ptall; | |
| 2328 | |
| 2329 /* If the templates were bordered, we would add h - 1 to the UL | |
| 2330 * corner y-value. However, because the templates to be used | |
| 2331 * here have their borders removed, and the borders are | |
| 2332 * JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS | |
| 2333 * to the UL corner y-value. */ | |
| 2334 for (i = 0; i < n; i++) { | |
| 2335 ptaGetIPt(ptaul, i, &x1, &y1); | |
| 2336 numaGetIValue(naclass, i, &iclass); | |
| 2337 pix = pixaGetPix(pixat, iclass, L_CLONE); | |
| 2338 h = pixGetHeight(pix); | |
| 2339 ptaAddPt(ptall, (l_float32)x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS); | |
| 2340 pixDestroy(&pix); | |
| 2341 } | |
| 2342 | |
| 2343 return 0; | |
| 2344 } | |
| 2345 | |
| 2346 | |
| 2347 /*----------------------------------------------------------------------* | |
| 2348 * Static helpers * | |
| 2349 *----------------------------------------------------------------------*/ | |
| 2350 /* When looking for similar matches we check templates whose size is +/- 2 in | |
| 2351 * each direction. This involves 25 possible sizes. This array contains the | |
| 2352 * offsets for each of those positions in a spiral pattern. There are 25 pairs | |
| 2353 * of numbers in this array: even positions are x values. */ | |
| 2354 static int two_by_two_walk[50] = { | |
| 2355 0, 0, | |
| 2356 0, 1, | |
| 2357 -1, 0, | |
| 2358 0, -1, | |
| 2359 1, 0, | |
| 2360 -1, 1, | |
| 2361 1, 1, | |
| 2362 -1, -1, | |
| 2363 1, -1, | |
| 2364 0, -2, | |
| 2365 2, 0, | |
| 2366 0, 2, | |
| 2367 -2, 0, | |
| 2368 -1, -2, | |
| 2369 1, -2, | |
| 2370 2, -1, | |
| 2371 2, 1, | |
| 2372 1, 2, | |
| 2373 -1, 2, | |
| 2374 -2, 1, | |
| 2375 -2, -1, | |
| 2376 -2, -2, | |
| 2377 2, -2, | |
| 2378 2, 2, | |
| 2379 -2, 2}; | |
| 2380 | |
| 2381 | |
| 2382 /*! | |
| 2383 * \brief findSimilarSizedTemplatesInit() | |
| 2384 * | |
| 2385 * \param[in] classer | |
| 2386 * \param[in] pixs instance to be matched | |
| 2387 * \return Allocated context to be used with findSimilar* | |
| 2388 */ | |
| 2389 static JBFINDCTX * | |
| 2390 findSimilarSizedTemplatesInit(JBCLASSER *classer, | |
| 2391 PIX *pixs) | |
| 2392 { | |
| 2393 JBFINDCTX *state; | |
| 2394 | |
| 2395 state = (JBFINDCTX *)LEPT_CALLOC(1, sizeof(JBFINDCTX)); | |
| 2396 state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS; | |
| 2397 state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS; | |
| 2398 state->classer = classer; | |
| 2399 return state; | |
| 2400 } | |
| 2401 | |
| 2402 | |
| 2403 static void | |
| 2404 findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate) | |
| 2405 { | |
| 2406 JBFINDCTX *state; | |
| 2407 | |
| 2408 if (pstate == NULL) { | |
| 2409 L_WARNING("ptr address is null\n", __func__); | |
| 2410 return; | |
| 2411 } | |
| 2412 if ((state = *pstate) == NULL) | |
| 2413 return; | |
| 2414 | |
| 2415 l_dnaDestroy(&state->dna); | |
| 2416 LEPT_FREE(state); | |
| 2417 *pstate = NULL; | |
| 2418 return; | |
| 2419 } | |
| 2420 | |
| 2421 | |
| 2422 /*! | |
| 2423 * \brief findSimilarSizedTemplatesNext() | |
| 2424 * | |
| 2425 * \param[in] state from findSimilarSizedTemplatesInit | |
| 2426 * \return next template number, or -1 when finished | |
| 2427 * | |
| 2428 * We have a dna hash table that maps template area to a list of template | |
| 2429 * numbers with that area. We wish to find similar sized templates, | |
| 2430 * so we first look for templates with the same width and height, and | |
| 2431 * then with width + 1, etc. This walk is guided by the | |
| 2432 * two_by_two_walk array, above. | |
| 2433 * | |
| 2434 * We don't want to have to collect the whole list of templates first, | |
| 2435 * because we hope to find a well-matching template quickly. So we | |
| 2436 * keep the context for this walk in an explictit state structure, | |
| 2437 * and this function acts like a generator. | |
| 2438 */ | |
| 2439 static l_int32 | |
| 2440 findSimilarSizedTemplatesNext(JBFINDCTX *state) | |
| 2441 { | |
| 2442 l_int32 desiredh, desiredw, size, templ; | |
| 2443 PIX *pixt; | |
| 2444 | |
| 2445 while(1) { /* Continue the walk over step 'i' */ | |
| 2446 if (state->i >= 25) { /* all done; didn't find a good match */ | |
| 2447 return -1; | |
| 2448 } | |
| 2449 | |
| 2450 desiredw = state->w + two_by_two_walk[2 * state->i]; | |
| 2451 desiredh = state->h + two_by_two_walk[2 * state->i + 1]; | |
| 2452 if (desiredh < 1 || desiredw < 1) { /* invalid size */ | |
| 2453 state->i++; | |
| 2454 continue; | |
| 2455 } | |
| 2456 | |
| 2457 if (!state->dna) { | |
| 2458 /* We have yet to start walking the array for the step 'i' */ | |
| 2459 state->dna = l_dnaHashGetDna(state->classer->dahash, | |
| 2460 (l_uint64)desiredh * desiredw, L_CLONE); | |
| 2461 if (!state->dna) { /* nothing there */ | |
| 2462 state->i++; | |
| 2463 continue; | |
| 2464 } | |
| 2465 | |
| 2466 state->n = 0; /* OK, we got a dna. */ | |
| 2467 } | |
| 2468 | |
| 2469 /* Continue working on this dna */ | |
| 2470 size = l_dnaGetCount(state->dna); | |
| 2471 for ( ; state->n < size; ) { | |
| 2472 templ = (l_int32)(state->dna->array[state->n++] + 0.5); | |
| 2473 pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE); | |
| 2474 if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw && | |
| 2475 pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) { | |
| 2476 pixDestroy(&pixt); | |
| 2477 return templ; | |
| 2478 } | |
| 2479 pixDestroy(&pixt); | |
| 2480 } | |
| 2481 | |
| 2482 /* Exhausted the dna (no match found); take another step and | |
| 2483 * try again. */ | |
| 2484 state->i++; | |
| 2485 l_dnaDestroy(&state->dna); | |
| 2486 continue; | |
| 2487 } | |
| 2488 } | |
| 2489 | |
| 2490 | |
| 2491 /*! | |
| 2492 * \brief finalPositioningForAlignment() | |
| 2493 * | |
| 2494 * \param[in] pixs input page image | |
| 2495 * \param[in] x, y location of UL corner of bb of component in pixs | |
| 2496 * \param[in] idelx, idely compensation to match centroids of component | |
| 2497 * and template | |
| 2498 * \param[in] pixt template, with JB_ADDED_PIXELS of padding | |
| 2499 * on all sides | |
| 2500 * \param[in] sumtab for summing fg pixels in an image | |
| 2501 * \param[in] pdx, pdy return delta on position for best match; each | |
| 2502 * one is in the set {-1, 0, 1} | |
| 2503 * \return 0 if OK, 1 on error | |
| 2504 * | |
| 2505 */ | |
| 2506 static l_int32 | |
| 2507 finalPositioningForAlignment(PIX *pixs, | |
| 2508 l_int32 x, | |
| 2509 l_int32 y, | |
| 2510 l_int32 idelx, | |
| 2511 l_int32 idely, | |
| 2512 PIX *pixt, | |
| 2513 l_int32 *sumtab, | |
| 2514 l_int32 *pdx, | |
| 2515 l_int32 *pdy) | |
| 2516 { | |
| 2517 l_int32 w, h, i, j, minx, miny, count, mincount; | |
| 2518 PIX *pixi; /* clipped from source pixs */ | |
| 2519 PIX *pixr; /* temporary storage */ | |
| 2520 BOX *box; | |
| 2521 | |
| 2522 if (!pixs) | |
| 2523 return ERROR_INT("pixs not defined", __func__, 1); | |
| 2524 if (!pixt) | |
| 2525 return ERROR_INT("pixt not defined", __func__, 1); | |
| 2526 if (!pdx || !pdy) | |
| 2527 return ERROR_INT("&dx and &dy not both defined", __func__, 1); | |
| 2528 if (!sumtab) | |
| 2529 return ERROR_INT("sumtab not defined", __func__, 1); | |
| 2530 *pdx = *pdy = 0; | |
| 2531 | |
| 2532 /* Use JB_ADDED_PIXELS pixels padding on each side */ | |
| 2533 pixGetDimensions(pixt, &w, &h, NULL); | |
| 2534 box = boxCreate(x - idelx - JB_ADDED_PIXELS, | |
| 2535 y - idely - JB_ADDED_PIXELS, w, h); | |
| 2536 pixi = pixClipRectangle(pixs, box, NULL); | |
| 2537 boxDestroy(&box); | |
| 2538 if (!pixi) | |
| 2539 return ERROR_INT("pixi not made", __func__, 1); | |
| 2540 | |
| 2541 pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1); | |
| 2542 mincount = 0x7fffffff; | |
| 2543 for (i = -1; i <= 1; i++) { | |
| 2544 for (j = -1; j <= 1; j++) { | |
| 2545 pixCopy(pixr, pixi); | |
| 2546 pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0); | |
| 2547 pixCountPixels(pixr, &count, sumtab); | |
| 2548 if (count < mincount) { | |
| 2549 minx = j; | |
| 2550 miny = i; | |
| 2551 mincount = count; | |
| 2552 } | |
| 2553 } | |
| 2554 } | |
| 2555 pixDestroy(&pixi); | |
| 2556 pixDestroy(&pixr); | |
| 2557 | |
| 2558 *pdx = minx; | |
| 2559 *pdy = miny; | |
| 2560 return 0; | |
| 2561 } |
