Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/pageseg.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file pageseg.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Top level page segmentation | |
| 32 * l_int32 pixGetRegionsBinary() | |
| 33 * | |
| 34 * Halftone region extraction | |
| 35 * PIX *pixGenHalftoneMask() **Deprecated wrapper** | |
| 36 * PIX *pixGenerateHalftoneMask() | |
| 37 * | |
| 38 * Textline extraction | |
| 39 * PIX *pixGenTextlineMask() | |
| 40 * | |
| 41 * Textblock extraction | |
| 42 * PIX *pixGenTextblockMask() | |
| 43 * | |
| 44 * Location and extraction of page foreground; cleaning pages | |
| 45 * PIX *pixCropImage() | |
| 46 * static l_int32 pixMaxCompAfterVClosing() | |
| 47 * static l_int32 pixFindPageInsideBlackBorder() | |
| 48 * static PIX *pixRescaleForCropping() | |
| 49 * PIX *pixCleanImage() | |
| 50 * BOX *pixFindPageForeground() | |
| 51 * | |
| 52 * Extraction of characters from image with only text | |
| 53 * l_int32 pixSplitIntoCharacters() | |
| 54 * BOXA *pixSplitComponentWithProfile() | |
| 55 * | |
| 56 * Extraction of lines of text | |
| 57 * PIXA *pixExtractTextlines() | |
| 58 * PIXA *pixExtractRawTextlines() | |
| 59 * | |
| 60 * How many text columns | |
| 61 * l_int32 pixCountTextColumns() | |
| 62 * | |
| 63 * Decision: text vs photo | |
| 64 * l_int32 pixDecideIfText() | |
| 65 * l_int32 pixFindThreshFgExtent() | |
| 66 * | |
| 67 * Decision: table vs text | |
| 68 * l_int32 pixDecideIfTable() | |
| 69 * Pix *pixPrepare1bpp() | |
| 70 * | |
| 71 * Estimate the grayscale background value | |
| 72 * l_int32 pixEstimateBackground() | |
| 73 * | |
| 74 * Largest white or black rectangles in an image | |
| 75 * l_int32 pixFindLargeRectangles() | |
| 76 * l_int32 pixFindLargestRectangle() | |
| 77 * | |
| 78 * Generate rectangle inside connected component | |
| 79 * BOX *pixFindRectangleInCC() | |
| 80 * | |
| 81 * Automatic photoinvert for OCR | |
| 82 * PIX *pixAutoPhotoinvert() | |
| 83 * </pre> | |
| 84 */ | |
| 85 | |
| 86 #ifdef HAVE_CONFIG_H | |
| 87 #include <config_auto.h> | |
| 88 #endif /* HAVE_CONFIG_H */ | |
| 89 | |
| 90 #include <math.h> | |
| 91 #include "allheaders.h" | |
| 92 #include "pix_internal.h" | |
| 93 | |
| 94 /* These functions are not intended to work on very low-res images */ | |
| 95 static const l_int32 MinWidth = 100; | |
| 96 static const l_int32 MinHeight = 100; | |
| 97 | |
| 98 static l_ok pixMaxCompAfterVClosing(PIX *pixs, BOX **pbox); | |
| 99 static l_ok pixFindPageInsideBlackBorder(PIX *pixs, BOX **pbox); | |
| 100 static PIX *pixRescaleForCropping(PIX *pixs, l_int32 w, l_int32 h, | |
| 101 l_int32 lr_border, l_int32 tb_border, | |
| 102 l_float32 maxwiden, PIX **ppixsc); | |
| 103 | |
| 104 /*------------------------------------------------------------------* | |
| 105 * Top level page segmentation * | |
| 106 *------------------------------------------------------------------*/ | |
| 107 /*! | |
| 108 * \brief pixGetRegionsBinary() | |
| 109 * | |
| 110 * \param[in] pixs 1 bpp, assumed to be 300 to 400 ppi | |
| 111 * \param[out] ppixhm [optional] halftone mask | |
| 112 * \param[out] ppixtm [optional] textline mask | |
| 113 * \param[out] ppixtb [optional] textblock mask | |
| 114 * \param[in] pixadb input for collecting debug pix; use NULL to skip | |
| 115 * \return 0 if OK, 1 on error | |
| 116 * | |
| 117 * <pre> | |
| 118 * Notes: | |
| 119 * (1) It is best to deskew the image before segmenting. | |
| 120 * (2) Passing in %pixadb enables debug output. | |
| 121 * </pre> | |
| 122 */ | |
| 123 l_ok | |
| 124 pixGetRegionsBinary(PIX *pixs, | |
| 125 PIX **ppixhm, | |
| 126 PIX **ppixtm, | |
| 127 PIX **ppixtb, | |
| 128 PIXA *pixadb) | |
| 129 { | |
| 130 l_int32 w, h, htfound, tlfound; | |
| 131 PIX *pixr, *pix1, *pix2; | |
| 132 PIX *pixtext; /* text pixels only */ | |
| 133 PIX *pixhm2; /* halftone mask; 2x reduction */ | |
| 134 PIX *pixhm; /* halftone mask; */ | |
| 135 PIX *pixtm2; /* textline mask; 2x reduction */ | |
| 136 PIX *pixtm; /* textline mask */ | |
| 137 PIX *pixvws; /* vertical white space mask */ | |
| 138 PIX *pixtb2; /* textblock mask; 2x reduction */ | |
| 139 PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ | |
| 140 PIX *pixtb; /* textblock mask */ | |
| 141 | |
| 142 if (ppixhm) *ppixhm = NULL; | |
| 143 if (ppixtm) *ppixtm = NULL; | |
| 144 if (ppixtb) *ppixtb = NULL; | |
| 145 if (!pixs || pixGetDepth(pixs) != 1) | |
| 146 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); | |
| 147 pixGetDimensions(pixs, &w, &h, NULL); | |
| 148 if (w < MinWidth || h < MinHeight) { | |
| 149 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); | |
| 150 return 1; | |
| 151 } | |
| 152 | |
| 153 /* 2x reduce, to 150 -200 ppi */ | |
| 154 pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); | |
| 155 if (pixadb) pixaAddPix(pixadb, pixr, L_COPY); | |
| 156 | |
| 157 /* Get the halftone mask */ | |
| 158 pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb); | |
| 159 | |
| 160 /* Get the textline mask from the text pixels */ | |
| 161 pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb); | |
| 162 | |
| 163 /* Get the textblock mask from the textline mask */ | |
| 164 pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb); | |
| 165 pixDestroy(&pixr); | |
| 166 pixDestroy(&pixtext); | |
| 167 pixDestroy(&pixvws); | |
| 168 | |
| 169 /* Remove small components from the mask, where a small | |
| 170 * component is defined as one with both width and height < 60 */ | |
| 171 pixtbf2 = NULL; | |
| 172 if (pixtb2) { | |
| 173 pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, | |
| 174 L_SELECT_IF_GTE, NULL); | |
| 175 pixDestroy(&pixtb2); | |
| 176 if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY); | |
| 177 } | |
| 178 | |
| 179 /* Expand all masks to full resolution, and do filling or | |
| 180 * small dilations for better coverage. */ | |
| 181 pixhm = pixExpandReplicate(pixhm2, 2); | |
| 182 pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); | |
| 183 pixOr(pixhm, pixhm, pix1); | |
| 184 pixDestroy(&pixhm2); | |
| 185 pixDestroy(&pix1); | |
| 186 if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY); | |
| 187 | |
| 188 pix1 = pixExpandReplicate(pixtm2, 2); | |
| 189 pixtm = pixDilateBrick(NULL, pix1, 3, 3); | |
| 190 pixDestroy(&pixtm2); | |
| 191 pixDestroy(&pix1); | |
| 192 if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY); | |
| 193 | |
| 194 if (pixtbf2) { | |
| 195 pix1 = pixExpandReplicate(pixtbf2, 2); | |
| 196 pixtb = pixDilateBrick(NULL, pix1, 3, 3); | |
| 197 pixDestroy(&pixtbf2); | |
| 198 pixDestroy(&pix1); | |
| 199 if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY); | |
| 200 } else { | |
| 201 pixtb = pixCreateTemplate(pixs); /* empty mask */ | |
| 202 } | |
| 203 | |
| 204 /* Debug: identify objects that are neither text nor halftone image */ | |
| 205 if (pixadb) { | |
| 206 pix1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ | |
| 207 pix2 = pixSubtract(NULL, pix1, pixhm); /* remove halftone pixels */ | |
| 208 pixaAddPix(pixadb, pix2, L_INSERT); | |
| 209 pixDestroy(&pix1); | |
| 210 } | |
| 211 | |
| 212 /* Debug: display textline components with random colors */ | |
| 213 if (pixadb) { | |
| 214 l_int32 w, h; | |
| 215 BOXA *boxa; | |
| 216 PIXA *pixa; | |
| 217 boxa = pixConnComp(pixtm, &pixa, 8); | |
| 218 pixGetDimensions(pixtm, &w, &h, NULL); | |
| 219 pix1 = pixaDisplayRandomCmap(pixa, w, h); | |
| 220 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); | |
| 221 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 222 pixaDestroy(&pixa); | |
| 223 boxaDestroy(&boxa); | |
| 224 } | |
| 225 | |
| 226 /* Debug: identify the outlines of each textblock */ | |
| 227 if (pixadb) { | |
| 228 PIXCMAP *cmap; | |
| 229 PTAA *ptaa; | |
| 230 ptaa = pixGetOuterBordersPtaa(pixtb); | |
| 231 lept_mkdir("lept/pageseg"); | |
| 232 ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1); | |
| 233 pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); | |
| 234 cmap = pixGetColormap(pix1); | |
| 235 pixcmapResetColor(cmap, 0, 130, 130, 130); | |
| 236 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 237 ptaaDestroy(&ptaa); | |
| 238 } | |
| 239 | |
| 240 /* Debug: get b.b. for all mask components */ | |
| 241 if (pixadb) { | |
| 242 BOXA *bahm, *batm, *batb; | |
| 243 bahm = pixConnComp(pixhm, NULL, 4); | |
| 244 batm = pixConnComp(pixtm, NULL, 4); | |
| 245 batb = pixConnComp(pixtb, NULL, 4); | |
| 246 boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm); | |
| 247 boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm); | |
| 248 boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb); | |
| 249 boxaDestroy(&bahm); | |
| 250 boxaDestroy(&batm); | |
| 251 boxaDestroy(&batb); | |
| 252 } | |
| 253 if (pixadb) { | |
| 254 pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation", | |
| 255 "/tmp/lept/pageseg/debug.pdf"); | |
| 256 L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__); | |
| 257 } | |
| 258 | |
| 259 if (ppixhm) | |
| 260 *ppixhm = pixhm; | |
| 261 else | |
| 262 pixDestroy(&pixhm); | |
| 263 if (ppixtm) | |
| 264 *ppixtm = pixtm; | |
| 265 else | |
| 266 pixDestroy(&pixtm); | |
| 267 if (ppixtb) | |
| 268 *ppixtb = pixtb; | |
| 269 else | |
| 270 pixDestroy(&pixtb); | |
| 271 | |
| 272 return 0; | |
| 273 } | |
| 274 | |
| 275 | |
| 276 /*------------------------------------------------------------------* | |
| 277 * Halftone region extraction * | |
| 278 *------------------------------------------------------------------*/ | |
| 279 /*! | |
| 280 * \brief pixGenHalftoneMask() | |
| 281 * | |
| 282 * <pre> | |
| 283 * Deprecated: | |
| 284 * This wrapper avoids an ABI change with tesseract 3.0.4. | |
| 285 * It should be removed when we no longer need to support 3.0.4. | |
| 286 * The debug parameter is ignored (assumed 0). | |
| 287 * </pre> | |
| 288 */ | |
| 289 PIX * | |
| 290 pixGenHalftoneMask(PIX *pixs, | |
| 291 PIX **ppixtext, | |
| 292 l_int32 *phtfound, | |
| 293 l_int32 debug) | |
| 294 { | |
| 295 return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL); | |
| 296 } | |
| 297 | |
| 298 | |
| 299 /*! | |
| 300 * \brief pixGenerateHalftoneMask() | |
| 301 * | |
| 302 * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi | |
| 303 * \param[out] ppixtext [optional] text part of pixs | |
| 304 * \param[out] phtfound [optional] 1 if the mask is not empty | |
| 305 * \param[in] pixadb input for collecting debug pix; use NULL to skip | |
| 306 * \return pixd halftone mask, or NULL on error | |
| 307 * | |
| 308 * <pre> | |
| 309 * Notes: | |
| 310 * (1) This is not intended to work on small thumbnails. The | |
| 311 * dimensions of pixs must be at least MinWidth x MinHeight. | |
| 312 * </pre> | |
| 313 */ | |
| 314 PIX * | |
| 315 pixGenerateHalftoneMask(PIX *pixs, | |
| 316 PIX **ppixtext, | |
| 317 l_int32 *phtfound, | |
| 318 PIXA *pixadb) | |
| 319 { | |
| 320 l_int32 w, h, empty; | |
| 321 PIX *pix1, *pix2, *pixhs, *pixhm, *pixd; | |
| 322 | |
| 323 if (ppixtext) *ppixtext = NULL; | |
| 324 if (phtfound) *phtfound = 0; | |
| 325 if (!pixs || pixGetDepth(pixs) != 1) | |
| 326 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 327 pixGetDimensions(pixs, &w, &h, NULL); | |
| 328 if (w < MinWidth || h < MinHeight) { | |
| 329 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); | |
| 330 return NULL; | |
| 331 } | |
| 332 | |
| 333 /* Compute seed for halftone parts at 8x reduction */ | |
| 334 pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0); | |
| 335 pix2 = pixOpenBrick(NULL, pix1, 5, 5); | |
| 336 pixhs = pixExpandReplicate(pix2, 4); /* back to 2x reduction */ | |
| 337 pixDestroy(&pix1); | |
| 338 pixDestroy(&pix2); | |
| 339 if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY); | |
| 340 | |
| 341 /* Compute mask for connected regions */ | |
| 342 pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4); | |
| 343 if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY); | |
| 344 | |
| 345 /* Fill seed into mask to get halftone mask */ | |
| 346 pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4); | |
| 347 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); | |
| 348 | |
| 349 #if 0 | |
| 350 pixOpenBrick(pixd, pixd, 9, 9); | |
| 351 #endif | |
| 352 | |
| 353 /* Check if mask is empty */ | |
| 354 pixZero(pixd, &empty); | |
| 355 if (phtfound && !empty) | |
| 356 *phtfound = 1; | |
| 357 | |
| 358 /* Optionally, get all pixels that are not under the halftone mask */ | |
| 359 if (ppixtext) { | |
| 360 if (empty) | |
| 361 *ppixtext = pixCopy(NULL, pixs); | |
| 362 else | |
| 363 *ppixtext = pixSubtract(NULL, pixs, pixd); | |
| 364 if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY); | |
| 365 } | |
| 366 | |
| 367 pixDestroy(&pixhs); | |
| 368 pixDestroy(&pixhm); | |
| 369 return pixd; | |
| 370 } | |
| 371 | |
| 372 | |
| 373 /*------------------------------------------------------------------* | |
| 374 * Textline extraction * | |
| 375 *------------------------------------------------------------------*/ | |
| 376 /*! | |
| 377 * \brief pixGenTextlineMask() | |
| 378 * | |
| 379 * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi | |
| 380 * \param[out] ppixvws vertical whitespace mask | |
| 381 * \param[out] ptlfound [optional] 1 if the mask is not empty | |
| 382 * \param[in] pixadb input for collecting debug pix; use NULL to skip | |
| 383 * \return pixd textline mask, or NULL on error | |
| 384 * | |
| 385 * <pre> | |
| 386 * Notes: | |
| 387 * (1) The input pixs should be deskewed. | |
| 388 * (2) pixs should have no halftone pixels. | |
| 389 * (3) This is not intended to work on small thumbnails. The | |
| 390 * dimensions of pixs must be at least MinWidth x MinHeight. | |
| 391 * (4) Both the input image and the returned textline mask | |
| 392 * are at the same resolution. | |
| 393 * </pre> | |
| 394 */ | |
| 395 PIX * | |
| 396 pixGenTextlineMask(PIX *pixs, | |
| 397 PIX **ppixvws, | |
| 398 l_int32 *ptlfound, | |
| 399 PIXA *pixadb) | |
| 400 { | |
| 401 l_int32 w, h, empty; | |
| 402 PIX *pix1, *pix2, *pixvws, *pixd; | |
| 403 | |
| 404 if (ptlfound) *ptlfound = 0; | |
| 405 if (!ppixvws) | |
| 406 return (PIX *)ERROR_PTR("&pixvws not defined", __func__, NULL); | |
| 407 *ppixvws = NULL; | |
| 408 if (!pixs || pixGetDepth(pixs) != 1) | |
| 409 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 410 pixGetDimensions(pixs, &w, &h, NULL); | |
| 411 if (w < MinWidth || h < MinHeight) { | |
| 412 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); | |
| 413 return NULL; | |
| 414 } | |
| 415 | |
| 416 /* First we need a vertical whitespace mask. Invert the image. */ | |
| 417 pix1 = pixInvert(NULL, pixs); | |
| 418 | |
| 419 /* The whitespace mask will break textlines where there | |
| 420 * is a large amount of white space below or above. | |
| 421 * This can be prevented by identifying regions of the | |
| 422 * inverted image that have large horizontal extent (bigger than | |
| 423 * the separation between columns) and significant | |
| 424 * vertical extent (bigger than the separation between | |
| 425 * textlines), and subtracting this from the bg. */ | |
| 426 pix2 = pixMorphCompSequence(pix1, "o80.60", 0); | |
| 427 pixSubtract(pix1, pix1, pix2); | |
| 428 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); | |
| 429 pixDestroy(&pix2); | |
| 430 | |
| 431 /* Identify vertical whitespace by opening the remaining bg. | |
| 432 * o5.1 removes thin vertical bg lines and o1.200 extracts | |
| 433 * long vertical bg lines. */ | |
| 434 pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0); | |
| 435 *ppixvws = pixvws; | |
| 436 if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY); | |
| 437 pixDestroy(&pix1); | |
| 438 | |
| 439 /* Three steps to getting text line mask: | |
| 440 * (1) close the characters and words in the textlines | |
| 441 * (2) open the vertical whitespace corridors back up | |
| 442 * (3) small opening to remove noise */ | |
| 443 pix1 = pixMorphSequence(pixs, "c30.1", 0); | |
| 444 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); | |
| 445 pixd = pixSubtract(NULL, pix1, pixvws); | |
| 446 pixOpenBrick(pixd, pixd, 3, 3); | |
| 447 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); | |
| 448 pixDestroy(&pix1); | |
| 449 | |
| 450 /* Check if text line mask is empty */ | |
| 451 if (ptlfound) { | |
| 452 pixZero(pixd, &empty); | |
| 453 if (!empty) | |
| 454 *ptlfound = 1; | |
| 455 } | |
| 456 | |
| 457 return pixd; | |
| 458 } | |
| 459 | |
| 460 | |
| 461 /*------------------------------------------------------------------* | |
| 462 * Textblock extraction * | |
| 463 *------------------------------------------------------------------*/ | |
| 464 /*! | |
| 465 * \brief pixGenTextblockMask() | |
| 466 * | |
| 467 * \param[in] pixs 1 bpp, textline mask, assumed to be 150 to 200 ppi | |
| 468 * \param[in] pixvws vertical white space mask | |
| 469 * \param[in] pixadb input for collecting debug pix; use NULL to skip | |
| 470 * \return pixd textblock mask, or NULL if empty or on error | |
| 471 * | |
| 472 * <pre> | |
| 473 * Notes: | |
| 474 * (1) Both the input masks (textline and vertical white space) and | |
| 475 * the returned textblock mask are at the same resolution. | |
| 476 * (2) This is not intended to work on small thumbnails. The | |
| 477 * dimensions of pixs must be at least MinWidth x MinHeight. | |
| 478 * (3) The result is somewhat noisy, in that small "blocks" of | |
| 479 * text may be included. These can be removed by post-processing, | |
| 480 * using, e.g., | |
| 481 * pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER, | |
| 482 * L_SELECT_IF_GTE, NULL); | |
| 483 * </pre> | |
| 484 */ | |
| 485 PIX * | |
| 486 pixGenTextblockMask(PIX *pixs, | |
| 487 PIX *pixvws, | |
| 488 PIXA *pixadb) | |
| 489 { | |
| 490 l_int32 w, h, empty; | |
| 491 PIX *pix1, *pix2, *pix3, *pixd; | |
| 492 | |
| 493 if (!pixs || pixGetDepth(pixs) != 1) | |
| 494 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 495 pixGetDimensions(pixs, &w, &h, NULL); | |
| 496 if (w < MinWidth || h < MinHeight) { | |
| 497 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); | |
| 498 return NULL; | |
| 499 } | |
| 500 if (!pixvws) | |
| 501 return (PIX *)ERROR_PTR("pixvws not defined", __func__, NULL); | |
| 502 | |
| 503 /* Join pixels vertically to make a textblock mask */ | |
| 504 pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0); | |
| 505 pixZero(pix1, &empty); | |
| 506 if (empty) { | |
| 507 pixDestroy(&pix1); | |
| 508 L_INFO("no fg pixels in textblock mask\n", __func__); | |
| 509 return NULL; | |
| 510 } | |
| 511 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); | |
| 512 | |
| 513 /* Solidify the textblock mask and remove noise: | |
| 514 * (1) For each cc, close the blocks and dilate slightly | |
| 515 * to form a solid mask. | |
| 516 * (2) Small horizontal closing between components. | |
| 517 * (3) Open the white space between columns, again. | |
| 518 * (4) Remove small components. */ | |
| 519 pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL); | |
| 520 pixCloseSafeBrick(pix2, pix2, 10, 1); | |
| 521 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); | |
| 522 pix3 = pixSubtract(NULL, pix2, pixvws); | |
| 523 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); | |
| 524 pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH, | |
| 525 L_SELECT_IF_GTE, NULL); | |
| 526 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY); | |
| 527 | |
| 528 pixDestroy(&pix1); | |
| 529 pixDestroy(&pix2); | |
| 530 pixDestroy(&pix3); | |
| 531 return pixd; | |
| 532 } | |
| 533 | |
| 534 | |
| 535 /*------------------------------------------------------------------* | |
| 536 * Location and extraction of page foreground; cleaning pages * | |
| 537 *------------------------------------------------------------------*/ | |
| 538 /*! | |
| 539 * \brief pixCropImage() | |
| 540 * | |
| 541 * \param[in] pixs full resolution (any type or depth) | |
| 542 * \param[in] lr_clear full res pixels cleared at left and right sides | |
| 543 * \param[in] tb_clear full res pixels cleared at top and bottom sides | |
| 544 * \param[in] edgeclean parameter for removing edge noise (-1 to 15) | |
| 545 * default = 0 (no removal); | |
| 546 * 15 is maximally aggressive for random noise | |
| 547 * -1 for aggressively removing side noise | |
| 548 * -2 to extract page embedded in black background | |
| 549 * \param[in] lr_border full res final "added" pixels on left and right | |
| 550 * \param[in] tb_border full res final "added" pixels on top and bottom | |
| 551 * \param[in] maxwiden max fractional horizontal stretch allowed | |
| 552 * \param[in] printwiden 0 to skip, 1 for 8.5x11, 2 for A4 | |
| 553 * \param[in] *debugfile [optional] usually is NULL | |
| 554 * \param[out] *pcropbox [optional] crop box at full resolution | |
| 555 * \return cropped pix, or NULL on error | |
| 556 * | |
| 557 * <pre> | |
| 558 * Notes: | |
| 559 * (1) This binarizes and crops a page image. | |
| 560 * (a) Binarizes if necessary and does 2x reduction. | |
| 561 * (b) Clears near the border by %lr_clear and %tb_clear full | |
| 562 * resolution pixels. (This is done at 2x reduction.) | |
| 563 * (c) If %edgeclean > 0, it removes isolated sets of pixels, | |
| 564 * using a close/open operation of size %edgeclean + 1. | |
| 565 * If %edgeclean == -1, it uses a large vertical morphological | |
| 566 * close/open and the extraction of either the largest | |
| 567 * resulting connected component (or the largest two components | |
| 568 * if the page has 2 columns), to eliminate noise on left | |
| 569 * and right sides. | |
| 570 * If %edgeclean == -2, it extracts the page region from a | |
| 571 * possible exterior black surround. | |
| 572 * (d) Find the bounding box of remaining fg pixels and scales | |
| 573 * the box up 2x back to full resolution. | |
| 574 * (e) Crops the binarized image to the bounding box. | |
| 575 * (f) Slightly thickens long horizontal lines. | |
| 576 * (g) Rescales this image to fit within the original image, | |
| 577 * less lr_border on the sides and tb_border above and below. | |
| 578 * The rescaling is done isomorphically with a (possible) | |
| 579 * optional additional widening. Suggest the additional | |
| 580 * widening factor not exceed 1.15. | |
| 581 * (h) Optionally do additional horizontal stretch if needed to | |
| 582 * better fill a printed page. Default is 0 to skip; 1 to | |
| 583 * widen for 8.5x11 page, 2 for A4 page. | |
| 584 * Note that (b) - (d) are done at 2x reduction for efficiency. | |
| 585 * (2) Side clearing must not exceed 1/6 of the dimension on that side. | |
| 586 * (3) The clear and border pixel parameters must be >= 0. | |
| 587 * (4) The "clear" parameters act on the input image, whereas the | |
| 588 * "border" parameters act to give a white border to the final | |
| 589 * image. They are not literally added, because the input and final | |
| 590 * images are the same size. If the resulting images are to be | |
| 591 * printed, it is useful to have border pixel parameters of at | |
| 592 * least 60 at 300 ppi, to avoid losing content at the edges. | |
| 593 * (5) This is not intended to work on small thumbnails. The | |
| 594 * dimensions of pixs must be at least MinWidth x MinHeight. | |
| 595 * (6) Step (f) above helps with orthographically-produced music notation, | |
| 596 * where the horizontal staff lines can be very thin and thus | |
| 597 * subject to printer alias. | |
| 598 * (7) If you are not concerned with printing on paper, use the | |
| 599 * default value 0 for %printwiden. Widening only takes place | |
| 600 * if the ratio h/w exceeds the specified paper size by 3%, | |
| 601 * and the horizontal scaling factor will not exceed 1.25. | |
| 602 * </pre> | |
| 603 */ | |
| 604 PIX * | |
| 605 pixCropImage(PIX *pixs, | |
| 606 l_int32 lr_clear, | |
| 607 l_int32 tb_clear, | |
| 608 l_int32 edgeclean, | |
| 609 l_int32 lr_border, | |
| 610 l_int32 tb_border, | |
| 611 l_float32 maxwiden, | |
| 612 l_int32 printwiden, | |
| 613 const char *debugfile, | |
| 614 BOX **pcropbox) | |
| 615 { | |
| 616 char cmd[64]; | |
| 617 l_int32 w, h, val, ret; | |
| 618 l_float32 r1, r2; | |
| 619 BOX *box1, *box2; | |
| 620 PIX *pix1, *pix2, *pix3, *pix4; | |
| 621 PIXA *pixa1; | |
| 622 | |
| 623 if (pcropbox) *pcropbox = NULL; | |
| 624 if (!pixs) | |
| 625 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 626 if (edgeclean > 15) { | |
| 627 L_WARNING("edgeclean > 15; setting to 15\n", __func__); | |
| 628 edgeclean = 15; | |
| 629 } | |
| 630 if (edgeclean < -1) { | |
| 631 lept_stderr("Using edgeclean = -2\n"); | |
| 632 edgeclean = -2; | |
| 633 } | |
| 634 pixGetDimensions(pixs, &w, &h, NULL); | |
| 635 if (w < MinWidth || h < MinHeight) { | |
| 636 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); | |
| 637 return NULL; | |
| 638 } | |
| 639 if (lr_clear < 0) lr_clear = 0; | |
| 640 if (tb_clear < 0) tb_clear = 0; | |
| 641 if (lr_border < 0) lr_border = 0; | |
| 642 if (tb_border < 0) tb_border = 0; | |
| 643 if (lr_clear > w / 6 || tb_clear > h / 6) { | |
| 644 L_ERROR("lr_clear or tb_clear too large; must be <= %d and %d\n", | |
| 645 __func__, w / 6, h / 6); | |
| 646 return NULL; | |
| 647 } | |
| 648 if (maxwiden > 1.15) | |
| 649 L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n", | |
| 650 __func__, maxwiden); | |
| 651 if (printwiden < 0 || printwiden > 2) printwiden = 0; | |
| 652 pixa1 = (debugfile) ? pixaCreate(5) : NULL; | |
| 653 if (pixa1) pixaAddPix(pixa1, pixs, L_COPY); | |
| 654 | |
| 655 /* Binarize if necessary and 2x reduction */ | |
| 656 pix1 = pixBackgroundNormTo1MinMax(pixs, 1, 1); | |
| 657 pix2 = pixReduceRankBinary2(pix1, 2, NULL); | |
| 658 | |
| 659 /* Clear out pixels near the image edges */ | |
| 660 pixSetOrClearBorder(pix2, lr_clear / 2, lr_clear / 2, tb_clear / 2, | |
| 661 tb_clear / 2, PIX_CLR); | |
| 662 if (pixa1) pixaAddPix(pixa1, pixScale(pix2, 2.0, 2.0), L_INSERT); | |
| 663 | |
| 664 /* Choose one of three methods for extracting foreground pixels: | |
| 665 * (1) Include all foreground pixels | |
| 666 * (2) Do a morphological close/open to remove noise throughout | |
| 667 * the image before finding a b.b. for remaining f.g. pixels | |
| 668 * (3) Do a large vertical closing and choose the largest (by area) | |
| 669 * component to avoid foreground noise on left and right sides */ | |
| 670 if (edgeclean == 0) { | |
| 671 ret = pixClipToForeground(pix2, NULL, &box1); | |
| 672 } else if (edgeclean > 0) { | |
| 673 val = edgeclean + 1; | |
| 674 snprintf(cmd, 64, "c%d.%d + o%d.%d", val, val, val, val); | |
| 675 pix3 = pixMorphSequence(pix2, cmd, 0); | |
| 676 ret = pixClipToForeground(pix3, NULL, &box1); | |
| 677 pixDestroy(&pix3); | |
| 678 } else if (edgeclean == -1) { | |
| 679 ret = pixMaxCompAfterVClosing(pix2, &box1); | |
| 680 } else { /* edgeclean == -2 */ | |
| 681 ret = pixFindPageInsideBlackBorder(pix2, &box1); | |
| 682 } | |
| 683 pixDestroy(&pix2); | |
| 684 if (ret) { | |
| 685 L_ERROR("no returned b.b. for foreground\n", __func__); | |
| 686 boxDestroy(&box1); | |
| 687 pixDestroy(&pix1); | |
| 688 pixaDestroy(&pixa1); | |
| 689 return NULL; | |
| 690 } | |
| 691 | |
| 692 /* Transform to full resolution */ | |
| 693 box2 = boxTransform(box1, 0, 0, 2.0, 2.0); /* full res */ | |
| 694 boxDestroy(&box1); | |
| 695 if (pixa1) { | |
| 696 pix2 = pixCopy(NULL, pix1); | |
| 697 pixRenderBoxArb(pix2, box2, 5, 255, 0, 0); | |
| 698 pixaAddPix(pixa1, pix2, L_INSERT); | |
| 699 } | |
| 700 | |
| 701 /* Grab the foreground region */ | |
| 702 pix2 = pixClipRectangle(pix1, box2, NULL); | |
| 703 pixDestroy(&pix1); | |
| 704 | |
| 705 /* Slightly thicken long horizontal lines. This prevents loss of | |
| 706 * printed thin music staff lines due to aliasing. */ | |
| 707 pix3 = pixMorphSequence(pix2, "o80.1 + d1.2", 0); | |
| 708 pixOr(pix2, pix2, pix3); | |
| 709 pixDestroy(&pix3); | |
| 710 | |
| 711 /* Rescale the fg and paste into the input-sized image */ | |
| 712 pix3 = pixRescaleForCropping(pix2, w, h, lr_border, tb_border, | |
| 713 maxwiden, NULL); | |
| 714 pixDestroy(&pix2); | |
| 715 if (pixa1) { | |
| 716 pix2 = pixCopy(NULL, pix3); | |
| 717 pixaAddPix(pixa1, pix2, L_INSERT); | |
| 718 } | |
| 719 | |
| 720 /* Optionally widen image if possible, for printing on 8.5 x 11 inch | |
| 721 * or A4 paper. Specifically, widen the image if the h/w asperity | |
| 722 * ratio of the input image exceeds that of the selected paper by | |
| 723 * more than 3%. Do not widen by more than 20%. */ | |
| 724 r1 = (l_float32)h / (l_float32)w; | |
| 725 r2 = 0.0; /* for default case */ | |
| 726 if (printwiden == 1) /* standard */ | |
| 727 r2 = r1 / 1.294; | |
| 728 else if (printwiden == 2) /* A4 */ | |
| 729 r2 = r1 / 1.414; | |
| 730 if (r2 > 1.03) { | |
| 731 r2 = L_MIN(r2, 1.20); | |
| 732 lept_stderr("oversize h/w ratio by factor %6.3f\n", r2); | |
| 733 pix4 = pixScale(pix3, r2, 1.0); | |
| 734 } else { | |
| 735 pix4 = pixClone(pix3); | |
| 736 } | |
| 737 pixDestroy(&pix3); | |
| 738 | |
| 739 if (pcropbox) | |
| 740 *pcropbox = box2; | |
| 741 else | |
| 742 boxDestroy(&box2); | |
| 743 if (pixa1) { | |
| 744 pixaAddPix(pixa1, pix4, L_COPY); | |
| 745 lept_stderr("Writing debug file: %s\n", debugfile); | |
| 746 pixaConvertToPdf(pixa1, 0, 1.0, L_DEFAULT_ENCODE, 0, NULL, debugfile); | |
| 747 pixaDestroy(&pixa1); | |
| 748 } | |
| 749 return pix4; | |
| 750 } | |
| 751 | |
| 752 | |
| 753 /*! | |
| 754 * \brief pixMaxCompAfterVClosing() | |
| 755 * | |
| 756 * \param[in] pixs 1 bpp (input at 2x reduction) | |
| 757 * \param[out] **pbox main region at input resolution (2x reduction) | |
| 758 * \return 0 if OK, 1 on error | |
| 759 * | |
| 760 * <pre> | |
| 761 * Notes: | |
| 762 * (1) This removes foreground noise along left and right edges, | |
| 763 * returning a bounding box for the remaining foreground pixels | |
| 764 * at the input resolution. | |
| 765 * (2) The input %pixs should be at a resolution 100 - 150 ppi. | |
| 766 * (3) It does two 2x level1 rank binary reductions, followed | |
| 767 * by a large vertical close/open, with a very small horizontal | |
| 768 * close/oopen, and then a 4x expansion back to the input resolution. | |
| 769 * (4) To work properly with 2-column layout, if the largest and | |
| 770 * second-largest regions are comparable in size, both are included. | |
| 771 * (5) This is used as an option to pixCropImage(), when given | |
| 772 * an %edgecrop parameter of -1. | |
| 773 * </pre> | |
| 774 */ | |
| 775 static l_ok | |
| 776 pixMaxCompAfterVClosing(PIX *pixs, | |
| 777 BOX **pbox) | |
| 778 { | |
| 779 l_int32 w1, h1, w2, h2, n, empty; | |
| 780 BOX *box1, *box2; | |
| 781 BOXA *boxa1, *boxa2; | |
| 782 PIX *pix1; | |
| 783 | |
| 784 if (!pbox) | |
| 785 return ERROR_INT("pbox not defined", __func__, 1); | |
| 786 *pbox = NULL; | |
| 787 if (!pixs || pixGetDepth(pixs) != 1) | |
| 788 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); | |
| 789 | |
| 790 /* Strong vertical closing */ | |
| 791 pix1 = pixMorphSequence(pixs, "r11 + c3.80 + o3.80 + x4", 0); | |
| 792 pixZero(pix1, &empty); | |
| 793 if (empty) { | |
| 794 pixDestroy(&pix1); | |
| 795 return ERROR_INT("pix1 is empty", __func__, 1); | |
| 796 } | |
| 797 | |
| 798 /* Find the two c.c. with largest area. If they are not comparable | |
| 799 * in area, return the bounding box of the largest; otherwise, | |
| 800 * return the bounding box of both regions. */ | |
| 801 boxa1 = pixConnCompBB(pix1, 8); | |
| 802 pixDestroy(&pix1); | |
| 803 boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); | |
| 804 if ((n = boxaGetCount(boxa2)) == 1) { | |
| 805 *pbox = boxaGetBox(boxa2, 0, L_COPY); | |
| 806 } else { /* 2 or more */ | |
| 807 box1 = boxaGetBox(boxa2, 0, L_COPY); | |
| 808 box2 = boxaGetBox(boxa2, 1, L_COPY); | |
| 809 boxGetGeometry(box1, NULL, NULL, &w1, &h1); | |
| 810 boxGetGeometry(box2, NULL, NULL, &w2, &h2); | |
| 811 if (((l_float32)(w2 * h2) / (l_float32)(w1 * h1)) > 0.7) { | |
| 812 *pbox = boxBoundingRegion(box1, box2); | |
| 813 boxDestroy(&box1); | |
| 814 } else { | |
| 815 *pbox = box1; | |
| 816 } | |
| 817 boxDestroy(&box2); | |
| 818 } | |
| 819 boxaDestroy(&boxa1); | |
| 820 boxaDestroy(&boxa2); | |
| 821 return 0; | |
| 822 } | |
| 823 | |
| 824 | |
| 825 /*! | |
| 826 * \brief pixFindPageInsideBlackBorder() | |
| 827 * | |
| 828 * \param[in] pixs 1 bpp (input at 2x reduction) | |
| 829 * \param[out] **pbox page region at input resolution (2x reduction) | |
| 830 * \return 0 if OK, 1 on error | |
| 831 * | |
| 832 * <pre> | |
| 833 * Notes: | |
| 834 * (1) This extracts the page region from the image. It is designed | |
| 835 * to work when the page is within a fairly solid black border. | |
| 836 * (2) It returns a bounding box for the page region at the input res. | |
| 837 * (3) The input %pixs is expected to be at a resolution 100 - 150 ppi. | |
| 838 * (4) This is used as an option to pixCropImage(), when given an | |
| 839 * %edgecrop parameter of -2. | |
| 840 * </pre> | |
| 841 */ | |
| 842 static l_ok | |
| 843 pixFindPageInsideBlackBorder(PIX *pixs, | |
| 844 BOX **pbox) | |
| 845 { | |
| 846 l_int32 empty; | |
| 847 BOX *box1; | |
| 848 BOXA *boxa1, *boxa2; | |
| 849 PIX *pix1, *pix2; | |
| 850 | |
| 851 if (!pbox) | |
| 852 return ERROR_INT("pbox not defined", __func__, 1); | |
| 853 *pbox = NULL; | |
| 854 if (!pixs || pixGetDepth(pixs) != 1) | |
| 855 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1); | |
| 856 | |
| 857 /* Reduce 4x and remove some remaining small foreground */ | |
| 858 pix1 = pixMorphSequence(pixs, "r22 + c5.5 + o7.7", 0); | |
| 859 pixZero(pix1, &empty); | |
| 860 if (empty) { | |
| 861 pixDestroy(&pix1); | |
| 862 return ERROR_INT("pix1 is empty", __func__, 1); | |
| 863 } | |
| 864 | |
| 865 /* Photoinvert image and Find the c.c. with largest area. */ | |
| 866 pixInvert(pix1, pix1); | |
| 867 pix2 = pixMorphSequence(pix1, "c11.11 + o11.11", 0); | |
| 868 pixDestroy(&pix1); | |
| 869 boxa1 = pixConnCompBB(pix2, 8); | |
| 870 pixDestroy(&pix2); | |
| 871 boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); | |
| 872 box1 = boxaGetBox(boxa2, 0, L_COPY); /* largest by area */ | |
| 873 boxAdjustSides(box1, box1, 5, -5, 5, -5); | |
| 874 *pbox = boxTransform(box1, 0, 0, 4.0, 4.0); | |
| 875 boxaDestroy(&boxa1); | |
| 876 boxaDestroy(&boxa2); | |
| 877 boxDestroy(&box1); | |
| 878 return 0; | |
| 879 } | |
| 880 | |
| 881 | |
| 882 /*! | |
| 883 * \brief pixRescaleForCropping() | |
| 884 * | |
| 885 * \param[in] pixs 1 bpp | |
| 886 * \param[in] w width of output lmage | |
| 887 * \param[in] h height of output lmage | |
| 888 * \param[in] lr_border cleared final border pixels on left and right | |
| 889 * \param[in] tb_border cleared final border pixels on top and bottom | |
| 890 * \param[in] maxwiden max fractional horizontal stretch allowed; >= 1.0 | |
| 891 * \param[out] *ppixsc [optional] rescaled foreground region | |
| 892 * \return pixd output image, or NULL on error | |
| 893 * | |
| 894 * <pre> | |
| 895 * Notes: | |
| 896 * (1) This rescales %pixs to fit maximally within an image of | |
| 897 * size (w x h), under two conditions: | |
| 898 * (a) the final image has cleared border regions given by the | |
| 899 * input parameters %lr_border and %tb_border, and | |
| 900 * (b) the input image is first isotropically scaled to fit | |
| 901 * maximally within the allowed final region, and then further | |
| 902 * maxiximally widened, subject to the constraints of the | |
| 903 * cleared border and the %maxwiden parameter. | |
| 904 * (2) The cleared border pixel parameters must be >= 0. | |
| 905 * (3) If there is extra horizontal stretching by a factor | |
| 906 * %maxwiden larger than about 1.15, the appearance may be | |
| 907 * unpleasingly distorted; hence the suggestion not to exceed it. | |
| 908 * </pre> | |
| 909 */ | |
| 910 static PIX * | |
| 911 pixRescaleForCropping(PIX *pixs, | |
| 912 l_int32 w, | |
| 913 l_int32 h, | |
| 914 l_int32 lr_border, | |
| 915 l_int32 tb_border, | |
| 916 l_float32 maxwiden, | |
| 917 PIX **ppixsc) | |
| 918 { | |
| 919 static l_int32 first_time = TRUE; | |
| 920 l_int32 wi, hi, wmax, hmax, wn, wf, hf, xf; | |
| 921 l_float32 ratio, scaleh, scalew, scalewid; | |
| 922 PIX *pix1, *pixd; | |
| 923 | |
| 924 if (ppixsc) *ppixsc = NULL; | |
| 925 if (!pixs || pixGetDepth(pixs) != 1) | |
| 926 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 927 if (lr_border < 0) lr_border = 0; | |
| 928 if (tb_border < 0) tb_border = 0; | |
| 929 maxwiden = L_MAX(1.0, maxwiden); | |
| 930 if (maxwiden > 1.15) | |
| 931 L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n", | |
| 932 __func__, maxwiden); | |
| 933 | |
| 934 /* Rescale the foreground region. | |
| 935 * First, decide if scaling is to full width or full height. | |
| 936 * If scaling to full height, determine how much additional | |
| 937 * width widening is possible, given the maxwiden constraint. | |
| 938 * If scaling to full width, both width and height are | |
| 939 * scaled isotropically. Scaling is done so that the resulting | |
| 940 * foreground is maximally widened, so it can be horizontally | |
| 941 * centered in an image of size (w x h), less %lr_border | |
| 942 * on each side. */ | |
| 943 pixGetDimensions(pixs, &wi, &hi, NULL); | |
| 944 wmax = w - 2 * lr_border; | |
| 945 hmax = h - 2 * tb_border; | |
| 946 ratio = (l_float32)(wmax * hi) / (l_float32)(hmax * wi); | |
| 947 if (ratio >= 1) { /* width can be widened after isotropic scaling */ | |
| 948 scaleh = (l_float32)hmax / (l_float32)hi; | |
| 949 wn = scaleh * wi; /* scaled but not widened */ | |
| 950 scalewid = L_MIN(maxwiden, (l_float32)wmax / (l_float32)wn); | |
| 951 scalew = scaleh * scalewid; | |
| 952 wf = scalew * wi; | |
| 953 hf = hmax; /* scale to full height */ | |
| 954 pix1 = pixScale(pixs, scalew, scaleh); | |
| 955 if (first_time == TRUE) { | |
| 956 lept_stderr("Width stretched by factor %5.3f\n", scalewid); | |
| 957 first_time = FALSE; | |
| 958 } | |
| 959 xf = (w - wf) / 2.0; | |
| 960 } else { /* width cannot be widened after isotropic scaling */ | |
| 961 scalew = (l_float32)wmax / (l_float32)wi; | |
| 962 pix1 = pixScale(pixs, scalew, scalew); | |
| 963 wf = wmax; /* scale to full width */ | |
| 964 hf = scalew * hi; /* no extra vertical stretching allowed */ | |
| 965 xf = lr_border; | |
| 966 } | |
| 967 | |
| 968 /* Paste it, horizontally centered and vertically placed as | |
| 969 * high as allowed (by %tb_border) into the final page image. */ | |
| 970 pixd = pixCreate(w, h, 1); | |
| 971 pixRasterop(pixd, xf, tb_border, wf, hf, PIX_SRC, pix1, 0, 0); | |
| 972 | |
| 973 if (ppixsc) | |
| 974 *ppixsc = pix1; | |
| 975 else | |
| 976 pixDestroy(&pix1); | |
| 977 return pixd; | |
| 978 } | |
| 979 | |
| 980 | |
| 981 /*! | |
| 982 * \brief pixCleanImage() | |
| 983 * | |
| 984 * \param[in] pixs full resolution (any type or depth) | |
| 985 * \param[in] contrast vary contrast: 1 = lightest; 10 = darkest; | |
| 986 * suggest 1 unless light features are being lost | |
| 987 * \param[in] rotation cw by 90 degrees: {0,1,2,3} represent | |
| 988 * 0, 90, 180 and 270 degree cw rotations | |
| 989 * \param[in] scale 1 (no scaling) or 2 (2x upscaling) | |
| 990 * \param[in] opensize opening size of structuring element for noise | |
| 991 * removal: {0 or 1 to skip; 2, 3 for opening} | |
| 992 * \return cleaned pix, or NULL on error | |
| 993 * | |
| 994 * <pre> | |
| 995 * Notes: | |
| 996 * (1) This deskews, optionally rotates and darkens, cleans background | |
| 997 * to white, binarizes and optionally removes small noise. | |
| 998 * (2) For color and grayscale input, local background normalization is | |
| 999 * done to 200, and a threshold of 180 sets the maximum foreground | |
| 1000 * value in the normalized image. | |
| 1001 * (3) The %contrast parameter adjusts the binarization to avoid losing | |
| 1002 * lighter input pixels. Contrast is increased as %contrast increases | |
| 1003 * from 1 to 10. | |
| 1004 * (4) The %scale parameter controls the thresholding to 1 bpp. Two values: | |
| 1005 * 1 = threshold | |
| 1006 * 2 = linear interpolated 2x upscaling before threshold. | |
| 1007 * (5) The #opensize parameter is the size of a square SEL used with | |
| 1008 * opening to remove small speckle noise. Allowed open sizes are 2,3. | |
| 1009 * If this is to be used, try 2 before 3. | |
| 1010 * (6) This does the image processing for cleanTo1bppFilesToPdf() and | |
| 1011 * prog/cleanpdf.c. | |
| 1012 * </pre> | |
| 1013 */ | |
| 1014 PIX * | |
| 1015 pixCleanImage(PIX *pixs, | |
| 1016 l_int32 contrast, | |
| 1017 l_int32 rotation, | |
| 1018 l_int32 scale, | |
| 1019 l_int32 opensize) | |
| 1020 { | |
| 1021 char sequence[32]; | |
| 1022 PIX *pix1, *pix2, *pix3, *pix4, *pix5; | |
| 1023 | |
| 1024 if (!pixs) | |
| 1025 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 1026 if (rotation < 0 || rotation > 3) { | |
| 1027 L_ERROR("invalid rotation = %d; rotation must be in {0,1,2,3}\n", | |
| 1028 __func__, rotation); | |
| 1029 return NULL; | |
| 1030 } | |
| 1031 if (contrast < 1 || contrast > 10) { | |
| 1032 L_ERROR("invalid contrast = %d; contrast must be in [1...10]\n", | |
| 1033 __func__, contrast); | |
| 1034 return NULL; | |
| 1035 } | |
| 1036 if (scale != 1 && scale != 2) { | |
| 1037 L_ERROR("invalid scale = %d; scale must be 1 or 2\n", | |
| 1038 __func__, opensize); | |
| 1039 return NULL; | |
| 1040 } | |
| 1041 if (opensize > 3) { | |
| 1042 L_ERROR("invalid opensize = %d; opensize must be <= 3\n", | |
| 1043 __func__, opensize); | |
| 1044 return NULL; | |
| 1045 } | |
| 1046 | |
| 1047 if (pixGetDepth(pixs) == 1) { | |
| 1048 if (rotation > 0) | |
| 1049 pix1 = pixRotateOrth(pixs, rotation); | |
| 1050 else | |
| 1051 pix1 = pixClone(pixs); | |
| 1052 pix2 = pixFindSkewAndDeskew(pix1, 2, NULL, NULL); | |
| 1053 if (scale == 2) | |
| 1054 pix4 = pixExpandBinaryReplicate(pix2, 2, 2); | |
| 1055 else /* scale == 1 */ | |
| 1056 pix4 = pixClone(pix2); | |
| 1057 } else { | |
| 1058 pix1 = pixConvertTo8MinMax(pixs); | |
| 1059 if (rotation > 0) | |
| 1060 pix2 = pixRotateOrth(pix1, rotation); | |
| 1061 else | |
| 1062 pix2 = pixClone(pix1); | |
| 1063 pix3 = pixFindSkewAndDeskew(pix2, 2, NULL, NULL); | |
| 1064 pix4 = pixBackgroundNormTo1MinMax(pix3, contrast, scale); | |
| 1065 pixDestroy(&pix3); | |
| 1066 } | |
| 1067 | |
| 1068 if (opensize == 2 || opensize == 3) { | |
| 1069 snprintf(sequence, sizeof(sequence), "o%d.%d", opensize, opensize); | |
| 1070 pix5 = pixMorphSequence(pix4, sequence, 0); | |
| 1071 } else { | |
| 1072 pix5 = pixClone(pix4); | |
| 1073 } | |
| 1074 | |
| 1075 pixDestroy(&pix1); | |
| 1076 pixDestroy(&pix2); | |
| 1077 pixDestroy(&pix4); | |
| 1078 return pix5; | |
| 1079 } | |
| 1080 | |
| 1081 | |
| 1082 /*! | |
| 1083 * \brief pixFindPageForeground() | |
| 1084 * | |
| 1085 * \param[in] pixs full resolution (any type or depth) | |
| 1086 * \param[in] threshold for binarization; typically about 128 | |
| 1087 * \param[in] mindist min distance of text from border to allow | |
| 1088 * cleaning near border; at 2x reduction, this | |
| 1089 * should be larger than 50; typically about 70 | |
| 1090 * \param[in] erasedist when conditions are satisfied, erase anything | |
| 1091 * within this distance of the edge; | |
| 1092 * typically 20-30 at 2x reduction | |
| 1093 * \param[in] showmorph debug: set to a negative integer to show steps | |
| 1094 * in generating masks; this is typically used | |
| 1095 * for debugging region extraction | |
| 1096 * \param[in] pixac debug: allocate outside and pass this in to | |
| 1097 * accumulate results of each call to this function, | |
| 1098 * which can be displayed in a mosaic or a pdf. | |
| 1099 * \return box region including foreground, with some pixel noise | |
| 1100 * removed, or NULL if not found | |
| 1101 * | |
| 1102 * <pre> | |
| 1103 * Notes: | |
| 1104 * (1) This doesn't simply crop to the fg. It attempts to remove | |
| 1105 * pixel noise and junk at the edge of the image before cropping. | |
| 1106 * The input %threshold is used if pixs is not 1 bpp. | |
| 1107 * (2) This is not intended to work on small thumbnails. The | |
| 1108 * dimensions of pixs must be at least MinWidth x MinHeight. | |
| 1109 * (3) Debug: set showmorph to display the intermediate image in | |
| 1110 * the morphological operations on this page. | |
| 1111 * (4) Debug: to get pdf output of results when called repeatedly, | |
| 1112 * call with an existing pixac, which will add an image of this page, | |
| 1113 * with the fg outlined. If no foreground is found, there is | |
| 1114 * no output for this page image. | |
| 1115 * </pre> | |
| 1116 */ | |
| 1117 BOX * | |
| 1118 pixFindPageForeground(PIX *pixs, | |
| 1119 l_int32 threshold, | |
| 1120 l_int32 mindist, | |
| 1121 l_int32 erasedist, | |
| 1122 l_int32 showmorph, | |
| 1123 PIXAC *pixac) | |
| 1124 { | |
| 1125 l_int32 flag, nbox, intersects; | |
| 1126 l_int32 w, h, bx, by, bw, bh, left, right, top, bottom; | |
| 1127 PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2; | |
| 1128 BOX *box, *boxfg, *boxin, *boxd; | |
| 1129 BOXA *ba1, *ba2; | |
| 1130 | |
| 1131 if (!pixs) | |
| 1132 return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 1133 pixGetDimensions(pixs, &w, &h, NULL); | |
| 1134 if (w < MinWidth || h < MinHeight) { | |
| 1135 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h); | |
| 1136 return NULL; | |
| 1137 } | |
| 1138 | |
| 1139 /* Binarize, downscale by 0.5, remove the noise to generate a seed, | |
| 1140 * and do a seedfill back from the seed into those 8-connected | |
| 1141 * components of the binarized image for which there was at least | |
| 1142 * one seed pixel. */ | |
| 1143 flag = (showmorph) ? 100 : 0; | |
| 1144 pixb = pixConvertTo1(pixs, threshold); | |
| 1145 pixb2 = pixScale(pixb, 0.5, 0.5); | |
| 1146 pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag); | |
| 1147 pix1 = pixMorphSequence(pixb2, "o50.1", 0); | |
| 1148 pixOr(pixseed, pixseed, pix1); | |
| 1149 pixDestroy(&pix1); | |
| 1150 pix1 = pixMorphSequence(pixb2, "o1.50", 0); | |
| 1151 pixOr(pixseed, pixseed, pix1); | |
| 1152 pixDestroy(&pix1); | |
| 1153 pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8); | |
| 1154 pixm = pixRemoveBorderConnComps(pixsf, 8); | |
| 1155 | |
| 1156 /* Now, where is the main block of text? We want to remove noise near | |
| 1157 * the edge of the image, but to do that, we have to be convinced that | |
| 1158 * (1) there is noise and (2) it is far enough from the text block | |
| 1159 * and close enough to the edge. For each edge, if the block | |
| 1160 * is more than mindist from that edge, then clean 'erasedist' | |
| 1161 * pixels from the edge. */ | |
| 1162 pix1 = pixMorphSequence(pixm, "c50.50", flag); | |
| 1163 ba1 = pixConnComp(pix1, NULL, 8); | |
| 1164 ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); | |
| 1165 pixGetDimensions(pix1, &w, &h, NULL); | |
| 1166 nbox = boxaGetCount(ba2); | |
| 1167 if (nbox > 1) { | |
| 1168 box = boxaGetBox(ba2, 0, L_CLONE); | |
| 1169 boxGetGeometry(box, &bx, &by, &bw, &bh); | |
| 1170 left = (bx > mindist) ? erasedist : 0; | |
| 1171 right = (w - bx - bw > mindist) ? erasedist : 0; | |
| 1172 top = (by > mindist) ? erasedist : 0; | |
| 1173 bottom = (h - by - bh > mindist) ? erasedist : 0; | |
| 1174 pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR); | |
| 1175 boxDestroy(&box); | |
| 1176 } | |
| 1177 pixDestroy(&pix1); | |
| 1178 boxaDestroy(&ba1); | |
| 1179 boxaDestroy(&ba2); | |
| 1180 | |
| 1181 /* Locate the foreground region; don't bother cropping */ | |
| 1182 pixClipToForeground(pixm, NULL, &boxfg); | |
| 1183 | |
| 1184 /* Sanity check the fg region. Make sure it's not confined | |
| 1185 * to a thin boundary on the left and right sides of the image, | |
| 1186 * in which case it is likely to be noise. */ | |
| 1187 if (boxfg) { | |
| 1188 boxin = boxCreate(0.1 * w, 0, 0.8 * w, h); | |
| 1189 boxIntersects(boxfg, boxin, &intersects); | |
| 1190 boxDestroy(&boxin); | |
| 1191 if (!intersects) boxDestroy(&boxfg); | |
| 1192 } | |
| 1193 | |
| 1194 boxd = NULL; | |
| 1195 if (boxfg) { | |
| 1196 boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */ | |
| 1197 boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0); | |
| 1198 | |
| 1199 /* Save the debug image showing the box for this page */ | |
| 1200 if (pixac) { | |
| 1201 pixg2 = pixConvert1To4Cmap(pixb); | |
| 1202 pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0); | |
| 1203 pixacompAddPix(pixac, pixg2, IFF_DEFAULT); | |
| 1204 pixDestroy(&pixg2); | |
| 1205 } | |
| 1206 } | |
| 1207 | |
| 1208 pixDestroy(&pixb); | |
| 1209 pixDestroy(&pixb2); | |
| 1210 pixDestroy(&pixseed); | |
| 1211 pixDestroy(&pixsf); | |
| 1212 pixDestroy(&pixm); | |
| 1213 boxDestroy(&boxfg); | |
| 1214 return boxd; | |
| 1215 } | |
| 1216 | |
| 1217 | |
| 1218 /*------------------------------------------------------------------* | |
| 1219 * Extraction of characters from image with only text * | |
| 1220 *------------------------------------------------------------------*/ | |
| 1221 /*! | |
| 1222 * \brief pixSplitIntoCharacters() | |
| 1223 * | |
| 1224 * \param[in] pixs 1 bpp, contains only deskewed text | |
| 1225 * \param[in] minw min component width for initial filtering; typ. 4 | |
| 1226 * \param[in] minh min component height for initial filtering; typ. 4 | |
| 1227 * \param[out] pboxa [optional] character bounding boxes | |
| 1228 * \param[out] ppixa [optional] character images | |
| 1229 * \param[out] ppixdebug [optional] showing splittings | |
| 1230 * | |
| 1231 * \return 0 if OK, 1 on error | |
| 1232 * | |
| 1233 * <pre> | |
| 1234 * Notes: | |
| 1235 * (1) This is a simple function that attempts to find split points | |
| 1236 * based on vertical pixel profiles. | |
| 1237 * (2) It should be given an image that has an arbitrary number | |
| 1238 * of text characters. | |
| 1239 * (3) The returned pixa includes the boxes from which the | |
| 1240 * (possibly split) components are extracted. | |
| 1241 * </pre> | |
| 1242 */ | |
| 1243 l_ok | |
| 1244 pixSplitIntoCharacters(PIX *pixs, | |
| 1245 l_int32 minw, | |
| 1246 l_int32 minh, | |
| 1247 BOXA **pboxa, | |
| 1248 PIXA **ppixa, | |
| 1249 PIX **ppixdebug) | |
| 1250 { | |
| 1251 l_int32 ncomp, i, xoff, yoff; | |
| 1252 BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad; | |
| 1253 BOXAA *baa; | |
| 1254 PIX *pix, *pix1, *pix2, *pixdb; | |
| 1255 PIXA *pixa1, *pixadb; | |
| 1256 | |
| 1257 if (pboxa) *pboxa = NULL; | |
| 1258 if (ppixa) *ppixa = NULL; | |
| 1259 if (ppixdebug) *ppixdebug = NULL; | |
| 1260 if (!pixs || pixGetDepth(pixs) != 1) | |
| 1261 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 1262 | |
| 1263 /* Remove the small stuff */ | |
| 1264 pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH, | |
| 1265 L_SELECT_IF_GT, NULL); | |
| 1266 | |
| 1267 /* Small vertical close for consolidation */ | |
| 1268 pix2 = pixMorphSequence(pix1, "c1.10", 0); | |
| 1269 pixDestroy(&pix1); | |
| 1270 | |
| 1271 /* Get the 8-connected components */ | |
| 1272 boxa1 = pixConnComp(pix2, &pixa1, 8); | |
| 1273 pixDestroy(&pix2); | |
| 1274 boxaDestroy(&boxa1); | |
| 1275 | |
| 1276 /* Split the components if obvious */ | |
| 1277 ncomp = pixaGetCount(pixa1); | |
| 1278 boxa2 = boxaCreate(ncomp); | |
| 1279 pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL; | |
| 1280 for (i = 0; i < ncomp; i++) { | |
| 1281 pix = pixaGetPix(pixa1, i, L_CLONE); | |
| 1282 if (ppixdebug) { | |
| 1283 boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb); | |
| 1284 if (pixdb) | |
| 1285 pixaAddPix(pixadb, pixdb, L_INSERT); | |
| 1286 } else { | |
| 1287 boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL); | |
| 1288 } | |
| 1289 pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL); | |
| 1290 boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0); | |
| 1291 boxaJoin(boxa2, boxat2, 0, -1); | |
| 1292 pixDestroy(&pix); | |
| 1293 boxaDestroy(&boxat1); | |
| 1294 boxaDestroy(&boxat2); | |
| 1295 } | |
| 1296 pixaDestroy(&pixa1); | |
| 1297 | |
| 1298 /* Generate the debug image */ | |
| 1299 if (ppixdebug) { | |
| 1300 if (pixaGetCount(pixadb) > 0) { | |
| 1301 *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500, | |
| 1302 1.0, 0, 20, 1); | |
| 1303 } | |
| 1304 pixaDestroy(&pixadb); | |
| 1305 } | |
| 1306 | |
| 1307 /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */ | |
| 1308 baa = boxaSort2d(boxa2, NULL, 0, 0, 5); | |
| 1309 boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE); | |
| 1310 boxaaDestroy(&baa); | |
| 1311 boxaDestroy(&boxa2); | |
| 1312 | |
| 1313 /* Optionally extract the pieces from the input image */ | |
| 1314 if (ppixa) | |
| 1315 *ppixa = pixClipRectangles(pixs, boxad); | |
| 1316 if (pboxa) | |
| 1317 *pboxa = boxad; | |
| 1318 else | |
| 1319 boxaDestroy(&boxad); | |
| 1320 return 0; | |
| 1321 } | |
| 1322 | |
| 1323 | |
| 1324 /*! | |
| 1325 * \brief pixSplitComponentWithProfile() | |
| 1326 * | |
| 1327 * \param[in] pixs 1 bpp, exactly one connected component | |
| 1328 * \param[in] delta distance used in extrema finding in a numa; typ. 10 | |
| 1329 * \param[in] mindel minimum required difference between profile | |
| 1330 * minimum and profile values +2 and -2 away; typ. 7 | |
| 1331 * \param[out] ppixdebug [optional] debug image of splitting | |
| 1332 * \return boxa of c.c. after splitting, or NULL on error | |
| 1333 * | |
| 1334 * <pre> | |
| 1335 * Notes: | |
| 1336 * (1) This will split the most obvious cases of touching characters. | |
| 1337 * The split points it is searching for are narrow and deep | |
| 1338 * minimima in the vertical pixel projection profile, after a | |
| 1339 * large vertical closing has been applied to the component. | |
| 1340 * </pre> | |
| 1341 */ | |
| 1342 BOXA * | |
| 1343 pixSplitComponentWithProfile(PIX *pixs, | |
| 1344 l_int32 delta, | |
| 1345 l_int32 mindel, | |
| 1346 PIX **ppixdebug) | |
| 1347 { | |
| 1348 l_int32 w, h, n2, i, firstmin, xmin, xshift; | |
| 1349 l_int32 nmin, nleft, nright, nsplit, isplit, ncomp; | |
| 1350 l_int32 *array1, *array2; | |
| 1351 BOX *box; | |
| 1352 BOXA *boxad; | |
| 1353 NUMA *na1, *na2, *nasplit; | |
| 1354 PIX *pix1, *pixdb; | |
| 1355 | |
| 1356 if (ppixdebug) *ppixdebug = NULL; | |
| 1357 if (!pixs || pixGetDepth(pixs) != 1) | |
| 1358 return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", __func__, NULL); | |
| 1359 pixGetDimensions(pixs, &w, &h, NULL); | |
| 1360 | |
| 1361 /* Closing to consolidate characters vertically */ | |
| 1362 pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100); | |
| 1363 | |
| 1364 /* Get extrema of column projections */ | |
| 1365 boxad = boxaCreate(2); | |
| 1366 na1 = pixCountPixelsByColumn(pix1); /* w elements */ | |
| 1367 pixDestroy(&pix1); | |
| 1368 na2 = numaFindExtrema(na1, delta, NULL); | |
| 1369 n2 = numaGetCount(na2); | |
| 1370 if (n2 < 3) { /* no split possible */ | |
| 1371 box = boxCreate(0, 0, w, h); | |
| 1372 boxaAddBox(boxad, box, L_INSERT); | |
| 1373 numaDestroy(&na1); | |
| 1374 numaDestroy(&na2); | |
| 1375 return boxad; | |
| 1376 } | |
| 1377 | |
| 1378 /* Look for sufficiently deep and narrow minima. | |
| 1379 * All minima of of interest must be surrounded by max on each | |
| 1380 * side. firstmin is the index of first possible minimum. */ | |
| 1381 array1 = numaGetIArray(na1); | |
| 1382 array2 = numaGetIArray(na2); | |
| 1383 if (ppixdebug) numaWriteStderr(na2); | |
| 1384 firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2; | |
| 1385 nasplit = numaCreate(n2); /* will hold split locations */ | |
| 1386 for (i = firstmin; i < n2 - 1; i+= 2) { | |
| 1387 xmin = array2[i]; | |
| 1388 nmin = array1[xmin]; | |
| 1389 if (xmin + 2 >= w) break; /* no more splits possible */ | |
| 1390 nleft = array1[xmin - 2]; | |
| 1391 nright = array1[xmin + 2]; | |
| 1392 if (ppixdebug) { | |
| 1393 lept_stderr( | |
| 1394 "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n", | |
| 1395 xmin, w, nleft, nmin, nright); | |
| 1396 } | |
| 1397 if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */ | |
| 1398 numaAddNumber(nasplit, xmin); | |
| 1399 } | |
| 1400 nsplit = numaGetCount(nasplit); | |
| 1401 | |
| 1402 #if 0 | |
| 1403 if (ppixdebug && nsplit > 0) { | |
| 1404 lept_mkdir("lept/split"); | |
| 1405 gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL); | |
| 1406 } | |
| 1407 #endif | |
| 1408 | |
| 1409 numaDestroy(&na1); | |
| 1410 numaDestroy(&na2); | |
| 1411 LEPT_FREE(array1); | |
| 1412 LEPT_FREE(array2); | |
| 1413 | |
| 1414 if (nsplit == 0) { /* no splitting */ | |
| 1415 numaDestroy(&nasplit); | |
| 1416 box = boxCreate(0, 0, w, h); | |
| 1417 boxaAddBox(boxad, box, L_INSERT); | |
| 1418 return boxad; | |
| 1419 } | |
| 1420 | |
| 1421 /* Use split points to generate b.b. after splitting */ | |
| 1422 for (i = 0, xshift = 0; i < nsplit; i++) { | |
| 1423 numaGetIValue(nasplit, i, &isplit); | |
| 1424 box = boxCreate(xshift, 0, isplit - xshift, h); | |
| 1425 boxaAddBox(boxad, box, L_INSERT); | |
| 1426 xshift = isplit + 1; | |
| 1427 } | |
| 1428 box = boxCreate(xshift, 0, w - xshift, h); | |
| 1429 boxaAddBox(boxad, box, L_INSERT); | |
| 1430 numaDestroy(&nasplit); | |
| 1431 | |
| 1432 if (ppixdebug) { | |
| 1433 pixdb = pixConvertTo32(pixs); | |
| 1434 ncomp = boxaGetCount(boxad); | |
| 1435 for (i = 0; i < ncomp; i++) { | |
| 1436 box = boxaGetBox(boxad, i, L_CLONE); | |
| 1437 pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5); | |
| 1438 boxDestroy(&box); | |
| 1439 } | |
| 1440 *ppixdebug = pixdb; | |
| 1441 } | |
| 1442 | |
| 1443 return boxad; | |
| 1444 } | |
| 1445 | |
| 1446 | |
| 1447 /*------------------------------------------------------------------* | |
| 1448 * Extraction of lines of text * | |
| 1449 *------------------------------------------------------------------*/ | |
| 1450 /*! | |
| 1451 * \brief pixExtractTextlines() | |
| 1452 * | |
| 1453 * \param[in] pixs any depth, assumed to have nearly horizontal text | |
| 1454 * \param[in] maxw, maxh initial filtering: remove any components in pixs | |
| 1455 * with components larger than maxw or maxh | |
| 1456 * \param[in] minw, minh final filtering: remove extracted 'lines' | |
| 1457 * with sizes smaller than minw or minh; use | |
| 1458 * 0 for default. | |
| 1459 * \param[in] adjw, adjh final adjustment of boxes representing each | |
| 1460 * text line. If > 0, these increase the box | |
| 1461 * size at each edge by this amount. | |
| 1462 * \param[in] pixadb pixa for saving intermediate steps; NULL to omit | |
| 1463 * \return pixa of textline images, including bounding boxes, or | |
| 1464 * NULL on error | |
| 1465 * | |
| 1466 * <pre> | |
| 1467 * Notes: | |
| 1468 * (1) This function assumes that textline fragments have sufficient | |
| 1469 * vertical separation and small enough skew so that a | |
| 1470 * horizontal dilation sufficient to join words will not join | |
| 1471 * textlines. It does not guarantee that horizontally adjacent | |
| 1472 * textline fragments on the same line will be joined. | |
| 1473 * (2) For images with multiple columns, it attempts to avoid joining | |
| 1474 * textlines across the space between columns. If that is not | |
| 1475 * a concern, you can also use pixExtractRawTextlines(), | |
| 1476 * which will join them with alacrity. | |
| 1477 * (3) This first removes components from pixs that are either | |
| 1478 * wide (> %maxw) or tall (> %maxh). | |
| 1479 * (4) A final filtering operation removes small components, such | |
| 1480 * that width < %minw or height < %minh. | |
| 1481 * (5) For reasonable accuracy, the resolution of pixs should be | |
| 1482 * at least 100 ppi. For reasonable efficiency, the resolution | |
| 1483 * should not exceed 600 ppi. | |
| 1484 * (6) This can be used to determine if some region of a scanned | |
| 1485 * image is horizontal text. | |
| 1486 * (7) As an example, for a pix with resolution 300 ppi, a reasonable | |
| 1487 * set of parameters is: | |
| 1488 * pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL); | |
| 1489 * The defaults minw and minh for 300 ppi are about 36 and 20, | |
| 1490 * so the same result is obtained with: | |
| 1491 * pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL); | |
| 1492 * (8) The output pixa is composed of subimages, one for each textline, | |
| 1493 * and the boxa in the pixa tells where in %pixs each textline goes. | |
| 1494 * </pre> | |
| 1495 */ | |
| 1496 PIXA * | |
| 1497 pixExtractTextlines(PIX *pixs, | |
| 1498 l_int32 maxw, | |
| 1499 l_int32 maxh, | |
| 1500 l_int32 minw, | |
| 1501 l_int32 minh, | |
| 1502 l_int32 adjw, | |
| 1503 l_int32 adjh, | |
| 1504 PIXA *pixadb) | |
| 1505 { | |
| 1506 char buf[64]; | |
| 1507 l_int32 res, csize, empty; | |
| 1508 BOXA *boxa1, *boxa2, *boxa3; | |
| 1509 PIX *pix1, *pix2, *pix3; | |
| 1510 PIXA *pixa1, *pixa2, *pixa3; | |
| 1511 | |
| 1512 if (!pixs) | |
| 1513 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 1514 | |
| 1515 /* Binarize carefully, if necessary */ | |
| 1516 if (pixGetDepth(pixs) > 1) { | |
| 1517 pix2 = pixConvertTo8(pixs, FALSE); | |
| 1518 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190); | |
| 1519 pix1 = pixThresholdToBinary(pix3, 150); | |
| 1520 pixDestroy(&pix2); | |
| 1521 pixDestroy(&pix3); | |
| 1522 } else { | |
| 1523 pix1 = pixClone(pixs); | |
| 1524 } | |
| 1525 pixZero(pix1, &empty); | |
| 1526 if (empty) { | |
| 1527 pixDestroy(&pix1); | |
| 1528 L_INFO("no fg pixels in input image\n", __func__); | |
| 1529 return NULL; | |
| 1530 } | |
| 1531 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); | |
| 1532 | |
| 1533 /* Remove any very tall or very wide connected components */ | |
| 1534 pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH, | |
| 1535 L_SELECT_IF_LT, NULL); | |
| 1536 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); | |
| 1537 pixDestroy(&pix1); | |
| 1538 | |
| 1539 /* Filter to solidify the text lines within the x-height region. | |
| 1540 * The closing (csize) bridges gaps between words. The opening | |
| 1541 * removes isolated bridges between textlines. */ | |
| 1542 if ((res = pixGetXRes(pixs)) == 0) { | |
| 1543 L_INFO("Resolution is not set: setting to 300 ppi\n", __func__); | |
| 1544 res = 300; | |
| 1545 } | |
| 1546 csize = L_MIN(120., 60.0 * res / 300.0); | |
| 1547 snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3); | |
| 1548 pix3 = pixMorphCompSequence(pix2, buf, 0); | |
| 1549 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); | |
| 1550 | |
| 1551 /* Extract the connected components. These should be dilated lines */ | |
| 1552 boxa1 = pixConnComp(pix3, &pixa1, 4); | |
| 1553 if (pixadb) { | |
| 1554 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0); | |
| 1555 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); | |
| 1556 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 1557 } | |
| 1558 | |
| 1559 /* Set minw, minh if default is requested */ | |
| 1560 minw = (minw != 0) ? minw : (l_int32)(0.12 * res); | |
| 1561 minh = (minh != 0) ? minh : (l_int32)(0.07 * res); | |
| 1562 | |
| 1563 /* Remove line components that are too small */ | |
| 1564 pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH, | |
| 1565 L_SELECT_IF_GTE, NULL); | |
| 1566 if (pixadb) { | |
| 1567 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0); | |
| 1568 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); | |
| 1569 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 1570 pix1 = pixConvertTo32(pix2); | |
| 1571 pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0); | |
| 1572 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 1573 } | |
| 1574 | |
| 1575 /* Selectively AND with the version before dilation, and save */ | |
| 1576 boxa2 = pixaGetBoxa(pixa2, L_CLONE); | |
| 1577 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh); | |
| 1578 pixa3 = pixClipRectangles(pix2, boxa3); | |
| 1579 if (pixadb) { | |
| 1580 pix1 = pixaDisplayRandomCmap(pixa3, 0, 0); | |
| 1581 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); | |
| 1582 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 1583 } | |
| 1584 | |
| 1585 pixDestroy(&pix2); | |
| 1586 pixDestroy(&pix3); | |
| 1587 pixaDestroy(&pixa1); | |
| 1588 pixaDestroy(&pixa2); | |
| 1589 boxaDestroy(&boxa1); | |
| 1590 boxaDestroy(&boxa2); | |
| 1591 boxaDestroy(&boxa3); | |
| 1592 return pixa3; | |
| 1593 } | |
| 1594 | |
| 1595 | |
| 1596 /*! | |
| 1597 * \brief pixExtractRawTextlines() | |
| 1598 * | |
| 1599 * \param[in] pixs any depth, assumed to have nearly horizontal text | |
| 1600 * \param[in] maxw, maxh initial filtering: remove any components in pixs | |
| 1601 * with components larger than maxw or maxh; | |
| 1602 * use 0 for default values. | |
| 1603 * \param[in] adjw, adjh final adjustment of boxes representing each | |
| 1604 * text line. If > 0, these increase the box | |
| 1605 * size at each edge by this amount. | |
| 1606 * \param[in] pixadb pixa for saving intermediate steps; NULL to omit | |
| 1607 * \return pixa of textline images, including bounding boxes, or | |
| 1608 * NULL on error | |
| 1609 * | |
| 1610 * <pre> | |
| 1611 * Notes: | |
| 1612 * (1) This function assumes that textlines have sufficient | |
| 1613 * vertical separation and small enough skew so that a | |
| 1614 * horizontal dilation sufficient to join words will not join | |
| 1615 * textlines. It aggressively joins textlines across multiple | |
| 1616 * columns, so if that is not desired, you must either (a) make | |
| 1617 * sure that %pixs is a single column of text or (b) use instead | |
| 1618 * pixExtractTextlines(), which is more conservative | |
| 1619 * about joining text fragments that have vertical overlap. | |
| 1620 * (2) This first removes components from pixs that are either | |
| 1621 * very wide (> %maxw) or very tall (> %maxh). | |
| 1622 * (3) For reasonable accuracy, the resolution of pixs should be | |
| 1623 * at least 100 ppi. For reasonable efficiency, the resolution | |
| 1624 * should not exceed 600 ppi. | |
| 1625 * (4) This can be used to determine if some region of a scanned | |
| 1626 * image is horizontal text. | |
| 1627 * (5) As an example, for a pix with resolution 300 ppi, a reasonable | |
| 1628 * set of parameters is: | |
| 1629 * pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL); | |
| 1630 * (6) The output pixa is composed of subimages, one for each textline, | |
| 1631 * and the boxa in the pixa tells where in %pixs each textline goes. | |
| 1632 * </pre> | |
| 1633 */ | |
| 1634 PIXA * | |
| 1635 pixExtractRawTextlines(PIX *pixs, | |
| 1636 l_int32 maxw, | |
| 1637 l_int32 maxh, | |
| 1638 l_int32 adjw, | |
| 1639 l_int32 adjh, | |
| 1640 PIXA *pixadb) | |
| 1641 { | |
| 1642 char buf[64]; | |
| 1643 l_int32 res, csize, empty; | |
| 1644 BOXA *boxa1, *boxa2, *boxa3; | |
| 1645 BOXAA *baa1; | |
| 1646 PIX *pix1, *pix2, *pix3; | |
| 1647 PIXA *pixa1, *pixa2; | |
| 1648 | |
| 1649 if (!pixs) | |
| 1650 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 1651 | |
| 1652 /* Set maxw, maxh if default is requested */ | |
| 1653 if ((res = pixGetXRes(pixs)) == 0) { | |
| 1654 L_INFO("Resolution is not set: setting to 300 ppi\n", __func__); | |
| 1655 res = 300; | |
| 1656 } | |
| 1657 maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res); | |
| 1658 maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res); | |
| 1659 | |
| 1660 /* Binarize carefully, if necessary */ | |
| 1661 if (pixGetDepth(pixs) > 1) { | |
| 1662 pix2 = pixConvertTo8(pixs, FALSE); | |
| 1663 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190); | |
| 1664 pix1 = pixThresholdToBinary(pix3, 150); | |
| 1665 pixDestroy(&pix2); | |
| 1666 pixDestroy(&pix3); | |
| 1667 } else { | |
| 1668 pix1 = pixClone(pixs); | |
| 1669 } | |
| 1670 pixZero(pix1, &empty); | |
| 1671 if (empty) { | |
| 1672 pixDestroy(&pix1); | |
| 1673 L_INFO("no fg pixels in input image\n", __func__); | |
| 1674 return NULL; | |
| 1675 } | |
| 1676 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); | |
| 1677 | |
| 1678 /* Remove any very tall or very wide connected components */ | |
| 1679 pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH, | |
| 1680 L_SELECT_IF_LT, NULL); | |
| 1681 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); | |
| 1682 pixDestroy(&pix1); | |
| 1683 | |
| 1684 /* Filter to solidify the text lines within the x-height region. | |
| 1685 * The closing (csize) bridges gaps between words. */ | |
| 1686 csize = L_MIN(120., 60.0 * res / 300.0); | |
| 1687 snprintf(buf, sizeof(buf), "c%d.1", csize); | |
| 1688 pix3 = pixMorphCompSequence(pix2, buf, 0); | |
| 1689 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); | |
| 1690 | |
| 1691 /* Extract the connected components. These should be dilated lines */ | |
| 1692 boxa1 = pixConnComp(pix3, &pixa1, 4); | |
| 1693 if (pixadb) { | |
| 1694 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0); | |
| 1695 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); | |
| 1696 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 1697 } | |
| 1698 | |
| 1699 /* Do a 2-d sort, and generate a bounding box for each set of text | |
| 1700 * line segments that is aligned horizontally (i.e., has vertical | |
| 1701 * overlap) into a box representing a single text line. */ | |
| 1702 baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5); | |
| 1703 boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2); | |
| 1704 if (pixadb) { | |
| 1705 pix1 = pixConvertTo32(pix2); | |
| 1706 pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0); | |
| 1707 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 1708 } | |
| 1709 | |
| 1710 /* Optionally adjust the sides of each text line box, and then | |
| 1711 * use the boxes to generate a pixa of the text lines. */ | |
| 1712 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh); | |
| 1713 pixa2 = pixClipRectangles(pix2, boxa3); | |
| 1714 if (pixadb) { | |
| 1715 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0); | |
| 1716 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255); | |
| 1717 pixaAddPix(pixadb, pix1, L_INSERT); | |
| 1718 } | |
| 1719 | |
| 1720 pixDestroy(&pix2); | |
| 1721 pixDestroy(&pix3); | |
| 1722 pixaDestroy(&pixa1); | |
| 1723 boxaDestroy(&boxa1); | |
| 1724 boxaDestroy(&boxa2); | |
| 1725 boxaDestroy(&boxa3); | |
| 1726 boxaaDestroy(&baa1); | |
| 1727 return pixa2; | |
| 1728 } | |
| 1729 | |
| 1730 | |
| 1731 /*------------------------------------------------------------------* | |
| 1732 * How many text columns * | |
| 1733 *------------------------------------------------------------------*/ | |
| 1734 /*! | |
| 1735 * \brief pixCountTextColumns() | |
| 1736 * | |
| 1737 * \param[in] pixs 1 bpp | |
| 1738 * \param[in] deltafract fraction of (max - min) to be used in the delta | |
| 1739 * for extrema finding; typ 0.3 | |
| 1740 * \param[in] peakfract fraction of (max - min) to be used to threshold | |
| 1741 * the peak value; typ. 0.5 | |
| 1742 * \param[in] clipfract fraction of image dimension removed on each side; | |
| 1743 * typ. 0.1, which leaves w and h reduced by 0.8 | |
| 1744 * \param[out] pncols number of columns; -1 if not determined | |
| 1745 * \param[in] pixadb [optional] pre-allocated, for showing | |
| 1746 * intermediate computation; use null to skip | |
| 1747 * \return 0 if OK, 1 on error | |
| 1748 * | |
| 1749 * <pre> | |
| 1750 * Notes: | |
| 1751 * (1) It is assumed that pixs has the correct resolution set. | |
| 1752 * If the resolution is 0, we set to 300 and issue a warning. | |
| 1753 * (2) If necessary, the image is scaled to between 37 and 75 ppi; | |
| 1754 * most of the processing is done at this resolution. | |
| 1755 * (3) If no text is found (essentially a blank page), | |
| 1756 * this returns ncols = 0. | |
| 1757 * (4) For debug output, input a pre-allocated pixa. | |
| 1758 * </pre> | |
| 1759 */ | |
| 1760 l_ok | |
| 1761 pixCountTextColumns(PIX *pixs, | |
| 1762 l_float32 deltafract, | |
| 1763 l_float32 peakfract, | |
| 1764 l_float32 clipfract, | |
| 1765 l_int32 *pncols, | |
| 1766 PIXA *pixadb) | |
| 1767 { | |
| 1768 l_int32 w, h, res, i, n, npeak; | |
| 1769 l_float32 scalefact, redfact, minval, maxval, val4, val5, fract; | |
| 1770 BOX *box; | |
| 1771 NUMA *na1, *na2, *na3, *na4, *na5; | |
| 1772 PIX *pix1, *pix2, *pix3, *pix4, *pix5; | |
| 1773 | |
| 1774 if (!pncols) | |
| 1775 return ERROR_INT("&ncols not defined", __func__, 1); | |
| 1776 *pncols = -1; /* init */ | |
| 1777 if (!pixs || pixGetDepth(pixs) != 1) | |
| 1778 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 1779 if (deltafract < 0.15 || deltafract > 0.75) | |
| 1780 L_WARNING("deltafract not in [0.15 ... 0.75]\n", __func__); | |
| 1781 if (peakfract < 0.25 || peakfract > 0.9) | |
| 1782 L_WARNING("peakfract not in [0.25 ... 0.9]\n", __func__); | |
| 1783 if (clipfract < 0.0 || clipfract >= 0.5) | |
| 1784 return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", __func__, 1); | |
| 1785 if (pixadb) pixaAddPix(pixadb, pixs, L_COPY); | |
| 1786 | |
| 1787 /* Scale to between 37.5 and 75 ppi */ | |
| 1788 if ((res = pixGetXRes(pixs)) == 0) { | |
| 1789 L_WARNING("resolution undefined; set to 300\n", __func__); | |
| 1790 pixSetResolution(pixs, 300, 300); | |
| 1791 res = 300; | |
| 1792 } | |
| 1793 if (res < 37) { | |
| 1794 L_WARNING("resolution %d very low\n", __func__, res); | |
| 1795 scalefact = 37.5 / res; | |
| 1796 pix1 = pixScale(pixs, scalefact, scalefact); | |
| 1797 } else { | |
| 1798 redfact = (l_float32)res / 37.5; | |
| 1799 if (redfact < 2.0) | |
| 1800 pix1 = pixClone(pixs); | |
| 1801 else if (redfact < 4.0) | |
| 1802 pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); | |
| 1803 else if (redfact < 8.0) | |
| 1804 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0); | |
| 1805 else if (redfact < 16.0) | |
| 1806 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0); | |
| 1807 else | |
| 1808 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2); | |
| 1809 } | |
| 1810 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); | |
| 1811 | |
| 1812 /* Crop inner 80% of image */ | |
| 1813 pixGetDimensions(pix1, &w, &h, NULL); | |
| 1814 box = boxCreate(clipfract * w, clipfract * h, | |
| 1815 (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h); | |
| 1816 pix2 = pixClipRectangle(pix1, box, NULL); | |
| 1817 pixGetDimensions(pix2, &w, &h, NULL); | |
| 1818 boxDestroy(&box); | |
| 1819 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); | |
| 1820 | |
| 1821 /* Deskew */ | |
| 1822 pix3 = pixDeskew(pix2, 0); | |
| 1823 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY); | |
| 1824 | |
| 1825 /* Close to increase column counts for text */ | |
| 1826 pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21); | |
| 1827 if (pixadb) pixaAddPix(pixadb, pix4, L_COPY); | |
| 1828 pixInvert(pix4, pix4); | |
| 1829 na1 = pixCountByColumn(pix4, NULL); | |
| 1830 | |
| 1831 if (pixadb) { | |
| 1832 gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL); | |
| 1833 pix5 = pixRead("/tmp/lept/plot.png"); | |
| 1834 pixaAddPix(pixadb, pix5, L_INSERT); | |
| 1835 } | |
| 1836 | |
| 1837 /* Analyze the column counts. na4 gives the locations of | |
| 1838 * the extrema in normalized units (0.0 to 1.0) across the | |
| 1839 * cropped image. na5 gives the magnitude of the | |
| 1840 * extrema, normalized to the dynamic range. The peaks | |
| 1841 * are values that are at least peakfract of (max - min). */ | |
| 1842 numaGetMax(na1, &maxval, NULL); | |
| 1843 numaGetMin(na1, &minval, NULL); | |
| 1844 fract = (l_float32)(maxval - minval) / h; /* is there much at all? */ | |
| 1845 if (fract < 0.05) { | |
| 1846 L_INFO("very little content on page; 0 text columns\n", __func__); | |
| 1847 *pncols = 0; | |
| 1848 } else { | |
| 1849 na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3); | |
| 1850 na4 = numaTransform(na2, 0, 1.0 / w); | |
| 1851 na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval)); | |
| 1852 n = numaGetCount(na4); | |
| 1853 for (i = 0, npeak = 0; i < n; i++) { | |
| 1854 numaGetFValue(na4, i, &val4); | |
| 1855 numaGetFValue(na5, i, &val5); | |
| 1856 if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) { | |
| 1857 npeak++; | |
| 1858 L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5); | |
| 1859 } | |
| 1860 } | |
| 1861 *pncols = npeak + 1; | |
| 1862 numaDestroy(&na2); | |
| 1863 numaDestroy(&na3); | |
| 1864 numaDestroy(&na4); | |
| 1865 numaDestroy(&na5); | |
| 1866 } | |
| 1867 | |
| 1868 pixDestroy(&pix1); | |
| 1869 pixDestroy(&pix2); | |
| 1870 pixDestroy(&pix3); | |
| 1871 pixDestroy(&pix4); | |
| 1872 numaDestroy(&na1); | |
| 1873 return 0; | |
| 1874 } | |
| 1875 | |
| 1876 | |
| 1877 /*------------------------------------------------------------------* | |
| 1878 * Decision text vs photo * | |
| 1879 *------------------------------------------------------------------*/ | |
| 1880 /*! | |
| 1881 * \brief pixDecideIfText() | |
| 1882 * | |
| 1883 * \param[in] pixs any depth | |
| 1884 * \param[in] box [optional] if null, use entire pixs | |
| 1885 * \param[out] pistext 1 if text; 0 if photo; -1 if not determined or empty | |
| 1886 * \param[in] pixadb [optional] pre-allocated, for showing intermediate | |
| 1887 * computation; use NULL to skip | |
| 1888 * \return 0 if OK, 1 on error | |
| 1889 * | |
| 1890 * <pre> | |
| 1891 * Notes: | |
| 1892 * (1) It is assumed that pixs has the correct resolution set. | |
| 1893 * If the resolution is 0, we set to 300 and issue a warning. | |
| 1894 * (2) If necessary, the image is scaled to 300 ppi; most of the | |
| 1895 * processing is done at this resolution. | |
| 1896 * (3) Text is assumed to be in horizontal lines. | |
| 1897 * (4) Because thin vertical lines are removed before filtering for | |
| 1898 * text lines, this should identify tables as text. | |
| 1899 * (5) If %box is null and pixs contains both text lines and line art, | |
| 1900 * this function might return %istext == true. | |
| 1901 * (6) If the input pixs is empty, or for some other reason the | |
| 1902 * result can not be determined, return -1. | |
| 1903 * (7) For debug output, input a pre-allocated pixa. | |
| 1904 * </pre> | |
| 1905 */ | |
| 1906 l_ok | |
| 1907 pixDecideIfText(PIX *pixs, | |
| 1908 BOX *box, | |
| 1909 l_int32 *pistext, | |
| 1910 PIXA *pixadb) | |
| 1911 { | |
| 1912 l_int32 i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp; | |
| 1913 l_float32 ratio1, ratio2; | |
| 1914 L_BMF *bmf; | |
| 1915 BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxa5; | |
| 1916 PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7; | |
| 1917 PIXA *pixa1; | |
| 1918 SEL *sel1; | |
| 1919 | |
| 1920 if (!pistext) | |
| 1921 return ERROR_INT("&istext not defined", __func__, 1); | |
| 1922 *pistext = -1; | |
| 1923 if (!pixs) | |
| 1924 return ERROR_INT("pixs not defined", __func__, 1); | |
| 1925 | |
| 1926 /* Crop, convert to 1 bpp, 300 ppi */ | |
| 1927 if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL) | |
| 1928 return ERROR_INT("pix1 not made", __func__, 1); | |
| 1929 | |
| 1930 pixZero(pix1, &empty); | |
| 1931 if (empty) { | |
| 1932 pixDestroy(&pix1); | |
| 1933 L_INFO("pix is empty\n", __func__); | |
| 1934 return 0; | |
| 1935 } | |
| 1936 w = pixGetWidth(pix1); | |
| 1937 | |
| 1938 /* Identify and remove tall, thin vertical lines (as found in tables) | |
| 1939 * that are up to 9 pixels wide. Make a hit-miss sel with an | |
| 1940 * 81 pixel vertical set of hits and with 3 pairs of misses that | |
| 1941 * are 10 pixels apart horizontally. It is necessary to use a | |
| 1942 * hit-miss transform; if we only opened with a vertical line of | |
| 1943 * hits, we would remove solid regions of pixels that are not | |
| 1944 * text or vertical lines. */ | |
| 1945 pix2 = pixCreate(11, 81, 1); | |
| 1946 for (i = 0; i < 81; i++) | |
| 1947 pixSetPixel(pix2, 5, i, 1); | |
| 1948 sel1 = selCreateFromPix(pix2, 40, 5, NULL); | |
| 1949 selSetElement(sel1, 20, 0, SEL_MISS); | |
| 1950 selSetElement(sel1, 20, 10, SEL_MISS); | |
| 1951 selSetElement(sel1, 40, 0, SEL_MISS); | |
| 1952 selSetElement(sel1, 40, 10, SEL_MISS); | |
| 1953 selSetElement(sel1, 60, 0, SEL_MISS); | |
| 1954 selSetElement(sel1, 60, 10, SEL_MISS); | |
| 1955 pix3 = pixHMT(NULL, pix1, sel1); | |
| 1956 pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000); | |
| 1957 pix5 = pixXor(NULL, pix1, pix4); | |
| 1958 pixDestroy(&pix2); | |
| 1959 selDestroy(&sel1); | |
| 1960 | |
| 1961 /* Convert the text lines to separate long horizontal components */ | |
| 1962 pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0); | |
| 1963 | |
| 1964 /* Estimate the distance to the bottom of the significant region */ | |
| 1965 if (box) { /* use full height */ | |
| 1966 pixGetDimensions(pix6, NULL, &h, NULL); | |
| 1967 } else { /* use height of region that has text lines */ | |
| 1968 pixFindThreshFgExtent(pix6, 400, NULL, &h); | |
| 1969 } | |
| 1970 | |
| 1971 if (pixadb) { | |
| 1972 bmf = bmfCreate(NULL, 6); | |
| 1973 pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary", | |
| 1974 0x0000ff00, L_ADD_BELOW); | |
| 1975 pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line", | |
| 1976 0x0000ff00, L_ADD_BELOW); | |
| 1977 pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill", | |
| 1978 0x0000ff00, L_ADD_BELOW); | |
| 1979 pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor", | |
| 1980 0x0000ff00, L_ADD_BELOW); | |
| 1981 pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components", | |
| 1982 0x0000ff00, L_ADD_BELOW); | |
| 1983 } | |
| 1984 | |
| 1985 /* Extract the connected components */ | |
| 1986 if (pixadb) { | |
| 1987 boxa1 = pixConnComp(pix6, &pixa1, 8); | |
| 1988 pix7 = pixaDisplayRandomCmap(pixa1, 0, 0); | |
| 1989 pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255); | |
| 1990 pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components", | |
| 1991 0x0000ff00, L_ADD_BELOW); | |
| 1992 pixDestroy(&pix7); | |
| 1993 pixaDestroy(&pixa1); | |
| 1994 bmfDestroy(&bmf); | |
| 1995 } else { | |
| 1996 boxa1 = pixConnComp(pix6, NULL, 8); | |
| 1997 } | |
| 1998 | |
| 1999 /* Analyze the connected components. The following conditions | |
| 2000 * at 300 ppi must be satisfied if the image is text: | |
| 2001 * (1) There are no components that are wider than 400 pixels and | |
| 2002 * taller than 175 pixels. | |
| 2003 * (2) The second longest component is at least 60% of the | |
| 2004 * (possibly cropped) image width. This catches images | |
| 2005 * that don't have any significant content. | |
| 2006 * (3) Of the components that are at least 40% of the length | |
| 2007 * of the longest (n2), at least 80% of them must not exceed | |
| 2008 * 60 pixels in height. | |
| 2009 * (4) The number of those long, thin components (n3) must | |
| 2010 * equal or exceed a minimum that scales linearly with the | |
| 2011 * image height. | |
| 2012 * Most images that are not text fail more than one of these | |
| 2013 * conditions. */ | |
| 2014 boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL); | |
| 2015 boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL); /* 2nd longest */ | |
| 2016 boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH, | |
| 2017 L_SELECT_IF_GTE, NULL); | |
| 2018 boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT, | |
| 2019 L_SELECT_IF_LTE, NULL); | |
| 2020 boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH, | |
| 2021 L_SELECT_IF_GT, NULL); | |
| 2022 big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1; | |
| 2023 n1 = boxaGetCount(boxa1); | |
| 2024 n2 = boxaGetCount(boxa3); | |
| 2025 n3 = boxaGetCount(boxa4); | |
| 2026 ratio1 = (l_float32)maxw / (l_float32)w; | |
| 2027 ratio2 = (l_float32)n3 / (l_float32)n2; | |
| 2028 minlines = L_MAX(2, h / 125); | |
| 2029 if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines) | |
| 2030 *pistext = 0; | |
| 2031 else | |
| 2032 *pistext = 1; | |
| 2033 if (pixadb) { | |
| 2034 if (*pistext == 1) { | |
| 2035 L_INFO("This is text: \n n1 = %d, n2 = %d, n3 = %d, " | |
| 2036 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, " | |
| 2037 "big_comp = %d\n", __func__, n1, n2, n3, minlines, | |
| 2038 maxw, ratio1, h, big_comp); | |
| 2039 } else { | |
| 2040 L_INFO("This is not text: \n n1 = %d, n2 = %d, n3 = %d, " | |
| 2041 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, " | |
| 2042 "big_comp = %d\n", __func__, n1, n2, n3, minlines, | |
| 2043 maxw, ratio1, h, big_comp); | |
| 2044 } | |
| 2045 } | |
| 2046 | |
| 2047 boxaDestroy(&boxa1); | |
| 2048 boxaDestroy(&boxa2); | |
| 2049 boxaDestroy(&boxa3); | |
| 2050 boxaDestroy(&boxa4); | |
| 2051 boxaDestroy(&boxa5); | |
| 2052 pixDestroy(&pix1); | |
| 2053 pixDestroy(&pix3); | |
| 2054 pixDestroy(&pix4); | |
| 2055 pixDestroy(&pix5); | |
| 2056 pixDestroy(&pix6); | |
| 2057 return 0; | |
| 2058 } | |
| 2059 | |
| 2060 | |
| 2061 /*! | |
| 2062 * \brief pixFindThreshFgExtent() | |
| 2063 * | |
| 2064 * \param[in] pixs 1 bpp | |
| 2065 * \param[in] thresh threshold number of pixels in row | |
| 2066 * \param[out] ptop [optional] location of top of region | |
| 2067 * \param[out] pbot [optional] location of bottom of region | |
| 2068 * \return 0 if OK, 1 on error | |
| 2069 */ | |
| 2070 l_ok | |
| 2071 pixFindThreshFgExtent(PIX *pixs, | |
| 2072 l_int32 thresh, | |
| 2073 l_int32 *ptop, | |
| 2074 l_int32 *pbot) | |
| 2075 { | |
| 2076 l_int32 i, n; | |
| 2077 l_int32 *array; | |
| 2078 NUMA *na; | |
| 2079 | |
| 2080 if (ptop) *ptop = 0; | |
| 2081 if (pbot) *pbot = 0; | |
| 2082 if (!ptop && !pbot) | |
| 2083 return ERROR_INT("nothing to determine", __func__, 1); | |
| 2084 if (!pixs || pixGetDepth(pixs) != 1) | |
| 2085 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 2086 | |
| 2087 na = pixCountPixelsByRow(pixs, NULL); | |
| 2088 n = numaGetCount(na); | |
| 2089 array = numaGetIArray(na); | |
| 2090 if (ptop) { | |
| 2091 for (i = 0; i < n; i++) { | |
| 2092 if (array[i] >= thresh) { | |
| 2093 *ptop = i; | |
| 2094 break; | |
| 2095 } | |
| 2096 } | |
| 2097 } | |
| 2098 if (pbot) { | |
| 2099 for (i = n - 1; i >= 0; i--) { | |
| 2100 if (array[i] >= thresh) { | |
| 2101 *pbot = i; | |
| 2102 break; | |
| 2103 } | |
| 2104 } | |
| 2105 } | |
| 2106 LEPT_FREE(array); | |
| 2107 numaDestroy(&na); | |
| 2108 return 0; | |
| 2109 } | |
| 2110 | |
| 2111 | |
| 2112 /*------------------------------------------------------------------* | |
| 2113 * Decision: table vs text * | |
| 2114 *------------------------------------------------------------------*/ | |
| 2115 /*! | |
| 2116 * \brief pixDecideIfTable() | |
| 2117 * | |
| 2118 * \param[in] pixs any depth, any resolution >= 75 ppi | |
| 2119 * \param[in] box [optional] if null, use entire pixs | |
| 2120 * \param[in] orient L_PORTRAIT_MODE, L_LANDSCAPE_MODE | |
| 2121 * \param[out] pscore 0 - 4; -1 if not determined | |
| 2122 * \param[in] pixadb [optional] pre-allocated, for showing intermediate | |
| 2123 * computation; use NULL to skip | |
| 2124 * \return 0 if OK, 1 on error | |
| 2125 * | |
| 2126 * <pre> | |
| 2127 * Notes: | |
| 2128 * (1) It is assumed that pixs has the correct resolution set. | |
| 2129 * If the resolution is 0, we assume it is 300 ppi and issue a warning. | |
| 2130 * (2) If %orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees | |
| 2131 * clockwise before being analyzed. | |
| 2132 * (3) The interpretation of the returned score: | |
| 2133 * -1 undetermined | |
| 2134 * 0 no table | |
| 2135 * 1 unlikely to have a table | |
| 2136 * 2 likely to have a table | |
| 2137 * 3 even more likely to have a table | |
| 2138 * 4 extremely likely to have a table | |
| 2139 * * Setting the condition for finding a table at score >= 2 works | |
| 2140 * well, except for false positives on kanji and landscape text. | |
| 2141 * * These false positives can be removed by setting the condition | |
| 2142 * at score >= 3, but recall is lowered because it will not find | |
| 2143 * tables without either horizontal or vertical lines. | |
| 2144 * (4) Most of the processing takes place at 75 ppi. | |
| 2145 * (5) Internally, three numbers are determined, for horizontal and | |
| 2146 * vertical fg lines, and for vertical bg lines. From these, | |
| 2147 * four tests are made to decide if there is a table occupying | |
| 2148 * a significant part of the image. | |
| 2149 * (6) Images have arbitrary content and would be likely to trigger | |
| 2150 * this detector, so they are checked for first, and if found, | |
| 2151 * return with a 0 (no table) score. | |
| 2152 * (7) Musical scores (tablature) are likely to trigger the detector. | |
| 2153 * (8) Tables of content with more than 2 columns are likely to | |
| 2154 * trigger the detector. | |
| 2155 * (9) For debug output, input a pre-allocated pixa. | |
| 2156 * </pre> | |
| 2157 */ | |
| 2158 l_ok | |
| 2159 pixDecideIfTable(PIX *pixs, | |
| 2160 BOX *box, | |
| 2161 l_int32 orient, | |
| 2162 l_int32 *pscore, | |
| 2163 PIXA *pixadb) | |
| 2164 { | |
| 2165 l_int32 empty, nhb, nvb, nvw, score, htfound; | |
| 2166 PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9; | |
| 2167 | |
| 2168 if (!pscore) | |
| 2169 return ERROR_INT("&score not defined", __func__, 1); | |
| 2170 *pscore = -1; | |
| 2171 if (!pixs) | |
| 2172 return ERROR_INT("pixs not defined", __func__, 1); | |
| 2173 | |
| 2174 /* Check if there is an image region. First convert to 1 bpp | |
| 2175 * at 175 ppi. If an image is found, assume there is no table. */ | |
| 2176 pix1 = pixPrepare1bpp(pixs, box, 0.1f, 175); | |
| 2177 pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL); | |
| 2178 if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY); | |
| 2179 pixDestroy(&pix1); | |
| 2180 pixDestroy(&pix2); | |
| 2181 if (htfound) { | |
| 2182 *pscore = 0; | |
| 2183 L_INFO("pix has an image region\n", __func__); | |
| 2184 return 0; | |
| 2185 } | |
| 2186 | |
| 2187 /* Crop, convert to 1 bpp, 75 ppi */ | |
| 2188 if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL) | |
| 2189 return ERROR_INT("pix1 not made", __func__, 1); | |
| 2190 | |
| 2191 pixZero(pix1, &empty); | |
| 2192 if (empty) { | |
| 2193 *pscore = 0; | |
| 2194 pixDestroy(&pix1); | |
| 2195 L_INFO("pix is empty\n", __func__); | |
| 2196 return 0; | |
| 2197 } | |
| 2198 | |
| 2199 /* The 2x2 dilation on 75 ppi makes these two approaches very similar: | |
| 2200 * (1) pix1 = pixPrepare1bpp(..., 300); // 300 ppi resolution | |
| 2201 * pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); | |
| 2202 * (2) pix1 = pixPrepare1bpp(..., 75); // 75 ppi resolution | |
| 2203 * pix2 = pixDilateBrick(NULL, pix1, 2, 2); | |
| 2204 * But (2) is more efficient if the input image to pixPrepare1bpp() | |
| 2205 * is not at 300 ppi. */ | |
| 2206 pix2 = pixDilateBrick(NULL, pix1, 2, 2); | |
| 2207 | |
| 2208 /* Deskew both horizontally and vertically; rotate by 90 | |
| 2209 * degrees if in landscape mode. */ | |
| 2210 pix3 = pixDeskewBoth(pix2, 1); | |
| 2211 if (pixadb) { | |
| 2212 pixaAddPix(pixadb, pix2, L_COPY); | |
| 2213 pixaAddPix(pixadb, pix3, L_COPY); | |
| 2214 } | |
| 2215 if (orient == L_LANDSCAPE_MODE) | |
| 2216 pix4 = pixRotate90(pix3, 1); | |
| 2217 else | |
| 2218 pix4 = pixClone(pix3); | |
| 2219 pixDestroy(&pix1); | |
| 2220 pixDestroy(&pix2); | |
| 2221 pixDestroy(&pix3); | |
| 2222 pix1 = pixClone(pix4); | |
| 2223 pixDestroy(&pix4); | |
| 2224 | |
| 2225 /* Look for horizontal and vertical lines */ | |
| 2226 pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0); | |
| 2227 pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8); | |
| 2228 pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0); | |
| 2229 pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8); | |
| 2230 pix6 = pixOr(NULL, pix3, pix5); | |
| 2231 if (pixadb) { | |
| 2232 pixaAddPix(pixadb, pix2, L_COPY); | |
| 2233 pixaAddPix(pixadb, pix4, L_COPY); | |
| 2234 pixaAddPix(pixadb, pix3, L_COPY); | |
| 2235 pixaAddPix(pixadb, pix5, L_COPY); | |
| 2236 pixaAddPix(pixadb, pix6, L_COPY); | |
| 2237 } | |
| 2238 pixCountConnComp(pix2, 8, &nhb); /* number of horizontal black lines */ | |
| 2239 pixCountConnComp(pix4, 8, &nvb); /* number of vertical black lines */ | |
| 2240 | |
| 2241 /* Remove the lines */ | |
| 2242 pixSubtract(pix1, pix1, pix6); | |
| 2243 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); | |
| 2244 | |
| 2245 /* Remove noise pixels */ | |
| 2246 pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0); | |
| 2247 if (pixadb) pixaAddPix(pixadb, pix7, L_COPY); | |
| 2248 | |
| 2249 /* Look for vertical white space. Invert to convert white bg | |
| 2250 * to fg. Use a single rank-1 2x reduction, which closes small | |
| 2251 * fg holes, for the final processing at 37.5 ppi. | |
| 2252 * The vertical opening is then about 3 inches on a 300 ppi image. | |
| 2253 * We also remove vertical whitespace that is less than 5 pixels | |
| 2254 * wide at this resolution (about 0.1 inches) */ | |
| 2255 pixInvert(pix7, pix7); | |
| 2256 pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0); | |
| 2257 pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH, | |
| 2258 L_SELECT_IF_GTE, NULL); | |
| 2259 pixCountConnComp(pix9, 8, &nvw); /* number of vertical white lines */ | |
| 2260 if (pixadb) { | |
| 2261 pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT); | |
| 2262 pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT); | |
| 2263 } | |
| 2264 | |
| 2265 /* Require at least 2 of the following 4 conditions for a table. | |
| 2266 * Some tables do not have black (fg) lines, and for those we | |
| 2267 * require more than 6 long vertical whitespace (bg) lines. */ | |
| 2268 score = 0; | |
| 2269 if (nhb > 1) score++; | |
| 2270 if (nvb > 2) score++; | |
| 2271 if (nvw > 3) score++; | |
| 2272 if (nvw > 6) score++; | |
| 2273 *pscore = score; | |
| 2274 | |
| 2275 pixDestroy(&pix1); | |
| 2276 pixDestroy(&pix2); | |
| 2277 pixDestroy(&pix3); | |
| 2278 pixDestroy(&pix4); | |
| 2279 pixDestroy(&pix5); | |
| 2280 pixDestroy(&pix6); | |
| 2281 pixDestroy(&pix7); | |
| 2282 pixDestroy(&pix8); | |
| 2283 pixDestroy(&pix9); | |
| 2284 return 0; | |
| 2285 } | |
| 2286 | |
| 2287 | |
| 2288 /*! | |
| 2289 * \brief pixPrepare1bpp() | |
| 2290 * | |
| 2291 * \param[in] pixs any depth | |
| 2292 * \param[in] box [optional] if null, use entire pixs | |
| 2293 * \param[in] cropfract fraction to be removed from the boundary; | |
| 2294 * use 0.0 to retain the entire image | |
| 2295 * \param[in] outres desired resolution of output image; if the | |
| 2296 * input image resolution is not set, assume | |
| 2297 * 300 ppi; use 0 to skip scaling. | |
| 2298 * \return pixd if OK, NULL on error | |
| 2299 * | |
| 2300 * <pre> | |
| 2301 * Notes: | |
| 2302 * (1) This handles some common pre-processing operations, | |
| 2303 * where the page segmentation algorithm takes a 1 bpp image. | |
| 2304 * </pre> | |
| 2305 */ | |
| 2306 PIX * | |
| 2307 pixPrepare1bpp(PIX *pixs, | |
| 2308 BOX *box, | |
| 2309 l_float32 cropfract, | |
| 2310 l_int32 outres) | |
| 2311 { | |
| 2312 l_int32 w, h, res; | |
| 2313 l_float32 factor; | |
| 2314 BOX *box1; | |
| 2315 PIX *pix1, *pix2, *pix3, *pix4, *pix5; | |
| 2316 | |
| 2317 if (!pixs) | |
| 2318 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 2319 | |
| 2320 /* Crop the image. If no box is given, use %cropfract to remove | |
| 2321 * pixels near the image boundary; this helps avoid false | |
| 2322 * negatives from noise that is often found there. */ | |
| 2323 if (box) { | |
| 2324 pix1 = pixClipRectangle(pixs, box, NULL); | |
| 2325 } else { | |
| 2326 pixGetDimensions(pixs, &w, &h, NULL); | |
| 2327 box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h), | |
| 2328 (l_int32)((1.0 - 2 * cropfract) * w), | |
| 2329 (l_int32)((1.0 - 2 * cropfract) * h)); | |
| 2330 pix1 = pixClipRectangle(pixs, box1, NULL); | |
| 2331 boxDestroy(&box1); | |
| 2332 } | |
| 2333 | |
| 2334 /* Convert to 1 bpp with adaptive background cleaning */ | |
| 2335 if (pixGetDepth(pixs) > 1) { | |
| 2336 pix2 = pixConvertTo8(pix1, 0); | |
| 2337 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160); | |
| 2338 pixDestroy(&pix1); | |
| 2339 pixDestroy(&pix2); | |
| 2340 if (!pix3) { | |
| 2341 L_INFO("pix cleaning failed\n", __func__); | |
| 2342 return NULL; | |
| 2343 } | |
| 2344 pix4 = pixThresholdToBinary(pix3, 200); | |
| 2345 pixDestroy(&pix3); | |
| 2346 } else { | |
| 2347 pix4 = pixClone(pix1); | |
| 2348 pixDestroy(&pix1); | |
| 2349 } | |
| 2350 | |
| 2351 /* Scale the image to the requested output resolution; | |
| 2352 do not scale if %outres <= 0 */ | |
| 2353 if (outres <= 0) | |
| 2354 return pix4; | |
| 2355 if ((res = pixGetXRes(pixs)) == 0) { | |
| 2356 L_WARNING("Resolution is not set: using 300 ppi\n", __func__); | |
| 2357 res = 300; | |
| 2358 } | |
| 2359 if (res != outres) { | |
| 2360 factor = (l_float32)outres / (l_float32)res; | |
| 2361 pix5 = pixScale(pix4, factor, factor); | |
| 2362 } else { | |
| 2363 pix5 = pixClone(pix4); | |
| 2364 } | |
| 2365 pixDestroy(&pix4); | |
| 2366 return pix5; | |
| 2367 } | |
| 2368 | |
| 2369 | |
| 2370 /*------------------------------------------------------------------* | |
| 2371 * Estimate the grayscale background value * | |
| 2372 *------------------------------------------------------------------*/ | |
| 2373 /*! | |
| 2374 * \brief pixEstimateBackground() | |
| 2375 * | |
| 2376 * \param[in] pixs 8 bpp, with or without colormap | |
| 2377 * \param[in] darkthresh pixels below this value are never considered | |
| 2378 * part of the background; typ. 70; use 0 to skip | |
| 2379 * \param[in] edgecrop fraction of half-width on each side, and of | |
| 2380 * half-height at top and bottom, that are cropped | |
| 2381 * \param[out] pbg estimated background, or 0 on error | |
| 2382 * \return 0 if OK, 1 on error | |
| 2383 * | |
| 2384 * <pre> | |
| 2385 * Notes: | |
| 2386 * (1) Caller should check that return bg value is > 0. | |
| 2387 * </pre> | |
| 2388 */ | |
| 2389 l_ok | |
| 2390 pixEstimateBackground(PIX *pixs, | |
| 2391 l_int32 darkthresh, | |
| 2392 l_float32 edgecrop, | |
| 2393 l_int32 *pbg) | |
| 2394 { | |
| 2395 l_int32 w, h, sampling; | |
| 2396 l_float32 fbg; | |
| 2397 BOX *box; | |
| 2398 PIX *pix1, *pix2, *pixm; | |
| 2399 | |
| 2400 if (!pbg) | |
| 2401 return ERROR_INT("&bg not defined", __func__, 1); | |
| 2402 *pbg = 0; | |
| 2403 if (!pixs || pixGetDepth(pixs) != 8) | |
| 2404 return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1); | |
| 2405 if (darkthresh > 128) | |
| 2406 L_WARNING("darkthresh unusually large\n", __func__); | |
| 2407 if (edgecrop < 0.0 || edgecrop >= 1.0) | |
| 2408 return ERROR_INT("edgecrop not in [0.0 ... 1.0)", __func__, 1); | |
| 2409 | |
| 2410 pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); | |
| 2411 pixGetDimensions(pix1, &w, &h, NULL); | |
| 2412 | |
| 2413 /* Optionally crop inner part of image */ | |
| 2414 if (edgecrop > 0.0) { | |
| 2415 box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h, | |
| 2416 (1.0 - edgecrop) * w, (1.0 - edgecrop) * h); | |
| 2417 pix2 = pixClipRectangle(pix1, box, NULL); | |
| 2418 boxDestroy(&box); | |
| 2419 } else { | |
| 2420 pix2 = pixClone(pix1); | |
| 2421 } | |
| 2422 | |
| 2423 /* We will use no more than 50K samples */ | |
| 2424 sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5)); | |
| 2425 | |
| 2426 /* Optionally make a mask over all pixels lighter than %darkthresh */ | |
| 2427 pixm = NULL; | |
| 2428 if (darkthresh > 0) { | |
| 2429 pixm = pixThresholdToBinary(pix2, darkthresh); | |
| 2430 pixInvert(pixm, pixm); | |
| 2431 } | |
| 2432 | |
| 2433 pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL); | |
| 2434 *pbg = (l_int32)(fbg + 0.5); | |
| 2435 pixDestroy(&pix1); | |
| 2436 pixDestroy(&pix2); | |
| 2437 pixDestroy(&pixm); | |
| 2438 return 0; | |
| 2439 } | |
| 2440 | |
| 2441 | |
| 2442 /*---------------------------------------------------------------------* | |
| 2443 * Largest white or black rectangles in an image * | |
| 2444 *---------------------------------------------------------------------*/ | |
| 2445 /*! | |
| 2446 * \brief pixFindLargeRectangles() | |
| 2447 * | |
| 2448 * \param[in] pixs 1 bpp | |
| 2449 * \param[in] polarity 0 within background, 1 within foreground | |
| 2450 * \param[in] nrect number of rectangles to be found | |
| 2451 * \param[out] pboxa largest rectangles, sorted by decreasing area | |
| 2452 * \param[in,out] ppixdb optional return output with rectangles drawn on it | |
| 2453 * \return 0 if OK, 1 on error | |
| 2454 * | |
| 2455 * <pre> | |
| 2456 * Notes: | |
| 2457 * (1) This does a greedy search to find the largest rectangles, | |
| 2458 * either black or white and without overlaps, in %pix. | |
| 2459 * (2) See pixFindLargestRectangle(), which is called multiple | |
| 2460 * times, for details. On each call, the largest rectangle | |
| 2461 * found is painted, so that none of its pixels can be | |
| 2462 * used later, before calling it again. | |
| 2463 * (3) This function is surprisingly fast. Although | |
| 2464 * pixFindLargestRectangle() runs at about 50 MPix/sec, when it | |
| 2465 * is run multiple times by pixFindLargeRectangles(), it processes | |
| 2466 * at 150 - 250 MPix/sec, and the time is approximately linear | |
| 2467 * in %nrect. For example, for a 1 MPix image, searching for | |
| 2468 * the largest 50 boxes takes about 0.2 seconds. | |
| 2469 * </pre> | |
| 2470 */ | |
| 2471 l_ok | |
| 2472 pixFindLargeRectangles(PIX *pixs, | |
| 2473 l_int32 polarity, | |
| 2474 l_int32 nrect, | |
| 2475 BOXA **pboxa, | |
| 2476 PIX **ppixdb) | |
| 2477 { | |
| 2478 l_int32 i, op, bx, by, bw, bh; | |
| 2479 BOX *box; | |
| 2480 BOXA *boxa; | |
| 2481 PIX *pix; | |
| 2482 | |
| 2483 if (ppixdb) *ppixdb = NULL; | |
| 2484 if (!pboxa) | |
| 2485 return ERROR_INT("&boxa not defined", __func__, 1); | |
| 2486 *pboxa = NULL; | |
| 2487 if (!pixs || pixGetDepth(pixs) != 1) | |
| 2488 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 2489 if (polarity != 0 && polarity != 1) | |
| 2490 return ERROR_INT("invalid polarity", __func__, 1); | |
| 2491 if (nrect > 1000) { | |
| 2492 L_WARNING("large num rectangles = %d requested; using 1000\n", | |
| 2493 __func__, nrect); | |
| 2494 nrect = 1000; | |
| 2495 } | |
| 2496 | |
| 2497 pix = pixCopy(NULL, pixs); | |
| 2498 boxa = boxaCreate(nrect); | |
| 2499 *pboxa = boxa; | |
| 2500 | |
| 2501 /* Sequentially find largest rectangle and fill with opposite color */ | |
| 2502 for (i = 0; i < nrect; i++) { | |
| 2503 if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) { | |
| 2504 boxDestroy(&box); | |
| 2505 L_ERROR("failure in pixFindLargestRectangle\n", __func__); | |
| 2506 break; | |
| 2507 } | |
| 2508 boxaAddBox(boxa, box, L_INSERT); | |
| 2509 op = (polarity == 0) ? PIX_SET : PIX_CLR; | |
| 2510 boxGetGeometry(box, &bx, &by, &bw, &bh); | |
| 2511 pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0); | |
| 2512 } | |
| 2513 | |
| 2514 if (ppixdb) | |
| 2515 *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3); | |
| 2516 | |
| 2517 pixDestroy(&pix); | |
| 2518 return 0; | |
| 2519 } | |
| 2520 | |
| 2521 | |
| 2522 /*! | |
| 2523 * \brief pixFindLargestRectangle() | |
| 2524 * | |
| 2525 * \param[in] pixs 1 bpp | |
| 2526 * \param[in] polarity 0 within background, 1 within foreground | |
| 2527 * \param[out] pbox largest area rectangle | |
| 2528 * \param[in,out] ppixdb optional return output with rectangle drawn on it | |
| 2529 * \return 0 if OK, 1 on error | |
| 2530 * | |
| 2531 * <pre> | |
| 2532 * Notes: | |
| 2533 * (1) This is a simple and elegant solution to a problem in | |
| 2534 * computational geometry that at first appears to be quite | |
| 2535 * difficult: what is the largest rectangle that can be | |
| 2536 * placed in the image, covering only pixels of one polarity | |
| 2537 * (bg or fg)? The solution is O(n), where n is the number | |
| 2538 * of pixels in the image, and it requires nothing more than | |
| 2539 * using a simple recursion relation in a single sweep of the image. | |
| 2540 * (2) In a sweep from UL to LR with left-to-right being the fast | |
| 2541 * direction, calculate the largest white rectangle at (x, y), | |
| 2542 * using previously calculated values at pixels #1 and #2: | |
| 2543 * #1: (x, y - 1) | |
| 2544 * #2: (x - 1, y) | |
| 2545 * We also need the most recent "black" pixels that were seen | |
| 2546 * in the current row and column. | |
| 2547 * Consider the largest area. There are only two possibilities: | |
| 2548 * (a) Min(w(1), horizdist) * (h(1) + 1) | |
| 2549 * (b) Min(h(2), vertdist) * (w(2) + 1) | |
| 2550 * where | |
| 2551 * horizdist: the distance from the rightmost "black" pixel seen | |
| 2552 * in the current row across to the current pixel | |
| 2553 * vertdist: the distance from the lowest "black" pixel seen | |
| 2554 * in the current column down to the current pixel | |
| 2555 * and we choose the Max of (a) and (b). | |
| 2556 * (3) To convince yourself that these recursion relations are correct, | |
| 2557 * it helps to draw the maximum rectangles at #1 and #2. | |
| 2558 * Then for #1, you try to extend the rectangle down one line, | |
| 2559 * so that the height is h(1) + 1. Do you get the full | |
| 2560 * width of #1, w(1)? It depends on where the black pixels are | |
| 2561 * in the current row. You know the final width is bounded by w(1) | |
| 2562 * and w(2) + 1, but the actual value depends on the distribution | |
| 2563 * of black pixels in the current row that are at a distance | |
| 2564 * from the current pixel that is between these limits. | |
| 2565 * We call that value "horizdist", and the area is then given | |
| 2566 * by the expression (a) above. Using similar reasoning for #2, | |
| 2567 * where you attempt to extend the rectangle to the right | |
| 2568 * by 1 pixel, you arrive at (b). The largest rectangle is | |
| 2569 * then found by taking the Max. | |
| 2570 * </pre> | |
| 2571 */ | |
| 2572 l_ok | |
| 2573 pixFindLargestRectangle(PIX *pixs, | |
| 2574 l_int32 polarity, | |
| 2575 BOX **pbox, | |
| 2576 PIX **ppixdb) | |
| 2577 { | |
| 2578 l_int32 i, j, w, h, d, wpls, val; | |
| 2579 l_int32 wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2; | |
| 2580 l_int32 xmax, ymax; /* LR corner of the largest rectangle */ | |
| 2581 l_int32 maxarea, wmax, hmax, vertdist, horizdist, prevfg; | |
| 2582 l_int32 *lowestfg; | |
| 2583 l_uint32 *datas, *lines; | |
| 2584 l_uint32 **linew, **lineh; | |
| 2585 BOX *box; | |
| 2586 PIX *pixw, *pixh; /* keeps the width and height for the largest */ | |
| 2587 /* rectangles whose LR corner is located there. */ | |
| 2588 | |
| 2589 if (ppixdb) *ppixdb = NULL; | |
| 2590 if (!pbox) | |
| 2591 return ERROR_INT("&box not defined", __func__, 1); | |
| 2592 *pbox = NULL; | |
| 2593 if (!pixs) | |
| 2594 return ERROR_INT("pixs not defined", __func__, 1); | |
| 2595 pixGetDimensions(pixs, &w, &h, &d); | |
| 2596 if (d != 1) | |
| 2597 return ERROR_INT("pixs not 1 bpp", __func__, 1); | |
| 2598 if (polarity != 0 && polarity != 1) | |
| 2599 return ERROR_INT("invalid polarity", __func__, 1); | |
| 2600 | |
| 2601 /* Initialize lowest "fg" seen so far for each column */ | |
| 2602 lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32)); | |
| 2603 for (i = 0; i < w; i++) | |
| 2604 lowestfg[i] = -1; | |
| 2605 | |
| 2606 /* The combination (val ^ polarity) is the color for which we | |
| 2607 * are searching for the maximum rectangle. For polarity == 0, | |
| 2608 * we search in the bg (white). */ | |
| 2609 pixw = pixCreate(w, h, 32); /* stores width */ | |
| 2610 pixh = pixCreate(w, h, 32); /* stores height */ | |
| 2611 linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL); | |
| 2612 lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL); | |
| 2613 datas = pixGetData(pixs); | |
| 2614 wpls = pixGetWpl(pixs); | |
| 2615 maxarea = xmax = ymax = wmax = hmax = 0; | |
| 2616 for (i = 0; i < h; i++) { | |
| 2617 lines = datas + i * wpls; | |
| 2618 prevfg = -1; | |
| 2619 for (j = 0; j < w; j++) { | |
| 2620 val = GET_DATA_BIT(lines, j); | |
| 2621 if ((val ^ polarity) == 0) { /* bg (0) if polarity == 0, etc. */ | |
| 2622 if (i == 0 && j == 0) { | |
| 2623 wp = hp = 1; | |
| 2624 } else if (i == 0) { | |
| 2625 wp = linew[i][j - 1] + 1; | |
| 2626 hp = 1; | |
| 2627 } else if (j == 0) { | |
| 2628 wp = 1; | |
| 2629 hp = lineh[i - 1][j] + 1; | |
| 2630 } else { | |
| 2631 /* Expand #1 prev rectangle down */ | |
| 2632 w1 = linew[i - 1][j]; | |
| 2633 h1 = lineh[i - 1][j]; | |
| 2634 horizdist = j - prevfg; | |
| 2635 wmin = L_MIN(w1, horizdist); /* width of new rectangle */ | |
| 2636 area1 = wmin * (h1 + 1); | |
| 2637 | |
| 2638 /* Expand #2 prev rectangle to right */ | |
| 2639 w2 = linew[i][j - 1]; | |
| 2640 h2 = lineh[i][j - 1]; | |
| 2641 vertdist = i - lowestfg[j]; | |
| 2642 hmin = L_MIN(h2, vertdist); /* height of new rectangle */ | |
| 2643 area2 = hmin * (w2 + 1); | |
| 2644 | |
| 2645 if (area1 > area2) { | |
| 2646 wp = wmin; | |
| 2647 hp = h1 + 1; | |
| 2648 } else { | |
| 2649 wp = w2 + 1; | |
| 2650 hp = hmin; | |
| 2651 } | |
| 2652 } | |
| 2653 } else { /* fg (1) if polarity == 0; bg (0) if polarity == 1 */ | |
| 2654 prevfg = j; | |
| 2655 lowestfg[j] = i; | |
| 2656 wp = hp = 0; | |
| 2657 } | |
| 2658 linew[i][j] = wp; | |
| 2659 lineh[i][j] = hp; | |
| 2660 if (wp * hp > maxarea) { | |
| 2661 maxarea = wp * hp; | |
| 2662 xmax = j; | |
| 2663 ymax = i; | |
| 2664 wmax = wp; | |
| 2665 hmax = hp; | |
| 2666 } | |
| 2667 } | |
| 2668 } | |
| 2669 | |
| 2670 /* Translate from LR corner to Box coords (UL corner, w, h) */ | |
| 2671 box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax); | |
| 2672 *pbox = box; | |
| 2673 | |
| 2674 if (ppixdb) { | |
| 2675 *ppixdb = pixConvertTo8(pixs, TRUE); | |
| 2676 pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0); | |
| 2677 } | |
| 2678 | |
| 2679 LEPT_FREE(linew); | |
| 2680 LEPT_FREE(lineh); | |
| 2681 LEPT_FREE(lowestfg); | |
| 2682 pixDestroy(&pixw); | |
| 2683 pixDestroy(&pixh); | |
| 2684 return 0; | |
| 2685 } | |
| 2686 | |
| 2687 | |
| 2688 /*---------------------------------------------------------------------* | |
| 2689 * Generate rectangle inside connected component * | |
| 2690 *---------------------------------------------------------------------*/ | |
| 2691 /*! | |
| 2692 * \brief pixFindRectangleInCC() | |
| 2693 * | |
| 2694 * \param[in] pixs 1 bpp, with sufficient closings to make the fg be | |
| 2695 * a single c.c. that is a convex hull | |
| 2696 * \param[in] boxs [optional] if NULL, %pixs should be a minimum | |
| 2697 * container of a single c.c. | |
| 2698 * \param[in] fract first and all consecutive lines found must be at | |
| 2699 * least this fraction of the fast scan dimension | |
| 2700 * \param[in] dir L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of | |
| 2701 * fast scan | |
| 2702 * \param[in] select L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION, | |
| 2703 * L_LARGEST_AREA, L_SMALEST_AREA | |
| 2704 * \param[in] debug if 1, generates output pdf showing intermediate | |
| 2705 * computation and final result | |
| 2706 * \return box of included rectangle, or NULL on error | |
| 2707 * | |
| 2708 * <pre> | |
| 2709 * Notes: | |
| 2710 * (1) Computation is similar to pixFindLargestRectangle(), but allows | |
| 2711 * a different set of results to choose from. | |
| 2712 * (2) Select the fast scan direction. Then, scanning in the slow | |
| 2713 * direction, find the longest run of ON pixels in the fast | |
| 2714 * scan direction and look for the first run that is longer | |
| 2715 * than %fract of the dimension. Continue until a shorter run | |
| 2716 * is found. This generates a box of ON pixels fitting into the c.c. | |
| 2717 * (3) Do this from both slow scan directions and use %select to get | |
| 2718 * a resulting box from these two. | |
| 2719 * (4) The extracted rectangle is not necessarily the largest that | |
| 2720 * can fit in the c.c. To get that, use pixFindLargestRectangle(). | |
| 2721 */ | |
| 2722 BOX * | |
| 2723 pixFindRectangleInCC(PIX *pixs, | |
| 2724 BOX *boxs, | |
| 2725 l_float32 fract, | |
| 2726 l_int32 dir, | |
| 2727 l_int32 select, | |
| 2728 l_int32 debug) | |
| 2729 { | |
| 2730 l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res; | |
| 2731 l_int32 xfirst, xlast, xstart, yfirst, ylast, length; | |
| 2732 BOX *box1, *box2, *box3, *box4, *box5; | |
| 2733 PIX *pix1, *pix2, *pixdb1, *pixdb2; | |
| 2734 PIXA *pixadb; | |
| 2735 | |
| 2736 if (!pixs || pixGetDepth(pixs) != 1) | |
| 2737 return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 2738 if (fract <= 0.0 || fract > 1.0) | |
| 2739 return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL); | |
| 2740 if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL) | |
| 2741 return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL); | |
| 2742 if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION && | |
| 2743 select != L_LARGEST_AREA && select != L_SMALLEST_AREA) | |
| 2744 return (BOX *)ERROR_PTR("invalid select", __func__, NULL); | |
| 2745 | |
| 2746 /* Extract the c.c. if necessary */ | |
| 2747 x = y = 0; | |
| 2748 if (boxs) { | |
| 2749 pix1 = pixClipRectangle(pixs, boxs, NULL); | |
| 2750 boxGetGeometry(boxs, &x, &y, NULL, NULL); | |
| 2751 } else { | |
| 2752 pix1 = pixClone(pixs); | |
| 2753 } | |
| 2754 | |
| 2755 /* All fast scans are horizontal; rotate 90 deg cw if necessary */ | |
| 2756 if (dir == L_SCAN_VERTICAL) | |
| 2757 pix2 = pixRotate90(pix1, 1); | |
| 2758 else /* L_SCAN_HORIZONTAL */ | |
| 2759 pix2 = pixClone(pix1); | |
| 2760 pixGetDimensions(pix2, &w, &h, NULL); | |
| 2761 | |
| 2762 pixadb = (debug) ? pixaCreate(0) : NULL; | |
| 2763 pixdb1 = NULL; | |
| 2764 if (pixadb) { | |
| 2765 lept_mkdir("lept/rect"); | |
| 2766 pixaAddPix(pixadb, pix1, L_CLONE); | |
| 2767 pixdb1 = pixConvertTo32(pix2); | |
| 2768 } | |
| 2769 pixDestroy(&pix1); | |
| 2770 | |
| 2771 /* Scanning down, find the first scanline with a long enough run. | |
| 2772 * That run goes from (xfirst, yfirst) to (xlast, yfirst). */ | |
| 2773 found = FALSE; | |
| 2774 for (i = 0; i < h; i++) { | |
| 2775 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); | |
| 2776 if (length >= (l_int32)(fract * w + 0.5)) { | |
| 2777 yfirst = i; | |
| 2778 xfirst = xstart; | |
| 2779 xlast = xfirst + length - 1; | |
| 2780 found = TRUE; | |
| 2781 break; | |
| 2782 } | |
| 2783 } | |
| 2784 if (!found) { | |
| 2785 L_WARNING("no run of sufficient size was found\n", __func__); | |
| 2786 pixDestroy(&pix2); | |
| 2787 pixDestroy(&pixdb1); | |
| 2788 pixaDestroy(&pixadb); | |
| 2789 return NULL; | |
| 2790 } | |
| 2791 | |
| 2792 /* Continue down until the condition fails */ | |
| 2793 w1 = xlast - xfirst + 1; | |
| 2794 h1 = h - yfirst; /* init */ | |
| 2795 ylast = h - 1; /* init */ | |
| 2796 for (i = yfirst + 1; i < h; i++) { | |
| 2797 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); | |
| 2798 if (xstart > xfirst || (xstart + length - 1 < xlast) || | |
| 2799 i == h - 1) { | |
| 2800 ylast = i - 1; | |
| 2801 h1 = ylast - yfirst + 1; | |
| 2802 break; | |
| 2803 } | |
| 2804 } | |
| 2805 box1 = boxCreate(xfirst, yfirst, w1, h1); | |
| 2806 | |
| 2807 /* Scanning up, find the first scanline with a long enough run. | |
| 2808 * That run goes from (xfirst, ylast) to (xlast, ylast). */ | |
| 2809 for (i = h - 1; i >= 0; i--) { | |
| 2810 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); | |
| 2811 if (length >= (l_int32)(fract * w + 0.5)) { | |
| 2812 ylast = i; | |
| 2813 xfirst = xstart; | |
| 2814 xlast = xfirst + length - 1; | |
| 2815 break; | |
| 2816 } | |
| 2817 } | |
| 2818 | |
| 2819 /* Continue up until the condition fails */ | |
| 2820 w2 = xlast - xfirst + 1; | |
| 2821 h2 = ylast + 1; /* initialize */ | |
| 2822 for (i = ylast - 1; i >= 0; i--) { | |
| 2823 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); | |
| 2824 if (xstart > xfirst || (xstart + length - 1 < xlast) || | |
| 2825 i == 0) { | |
| 2826 yfirst = i + 1; | |
| 2827 h2 = ylast - yfirst + 1; | |
| 2828 break; | |
| 2829 } | |
| 2830 } | |
| 2831 box2 = boxCreate(xfirst, yfirst, w2, h2); | |
| 2832 pixDestroy(&pix2); | |
| 2833 | |
| 2834 if (pixadb) { | |
| 2835 pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0); | |
| 2836 pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0); | |
| 2837 pixaAddPix(pixadb, pixdb1, L_INSERT); | |
| 2838 } | |
| 2839 | |
| 2840 /* Select the final result from the two boxes */ | |
| 2841 if (select == L_GEOMETRIC_UNION) | |
| 2842 box3 = boxBoundingRegion(box1, box2); | |
| 2843 else if (select == L_GEOMETRIC_INTERSECTION) | |
| 2844 box3 = boxOverlapRegion(box1, box2); | |
| 2845 else if (select == L_LARGEST_AREA) | |
| 2846 box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2); | |
| 2847 else /* select == L_SMALLEST_AREA) */ | |
| 2848 box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2); | |
| 2849 boxDestroy(&box1); | |
| 2850 boxDestroy(&box2); | |
| 2851 | |
| 2852 /* Rotate the box 90 degrees ccw if necessary */ | |
| 2853 box4 = NULL; | |
| 2854 if (box3) { | |
| 2855 if (dir == L_SCAN_VERTICAL) | |
| 2856 box4 = boxRotateOrth(box3, w, h, 3); | |
| 2857 else | |
| 2858 box4 = boxCopy(box3); | |
| 2859 } | |
| 2860 | |
| 2861 /* Transform back to global coordinates if %boxs exists */ | |
| 2862 box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL; | |
| 2863 boxDestroy(&box3); | |
| 2864 boxDestroy(&box4); | |
| 2865 | |
| 2866 /* Debug output */ | |
| 2867 if (pixadb) { | |
| 2868 pixdb1 = pixConvertTo8(pixs, 0); | |
| 2869 pixAddConstantGray(pixdb1, 190); | |
| 2870 pixdb2 = pixConvertTo32(pixdb1); | |
| 2871 if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255); | |
| 2872 pixaAddPix(pixadb, pixdb2, L_INSERT); | |
| 2873 res = pixGetXRes(pixs); | |
| 2874 L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__); | |
| 2875 pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL, | |
| 2876 "/tmp/lept/rect/fitrect.pdf"); | |
| 2877 pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2); | |
| 2878 pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG); | |
| 2879 pixDestroy(&pix1); | |
| 2880 pixDestroy(&pixdb1); | |
| 2881 pixaDestroy(&pixadb); | |
| 2882 } | |
| 2883 | |
| 2884 return box5; | |
| 2885 } | |
| 2886 | |
| 2887 /*------------------------------------------------------------------* | |
| 2888 * Automatic photoinvert for OCR * | |
| 2889 *------------------------------------------------------------------*/ | |
| 2890 /*! | |
| 2891 * \brief pixAutoPhotoinvert() | |
| 2892 * | |
| 2893 * \param[in] pixs any depth, colormap ok | |
| 2894 * \param[in] thresh binarization threshold; use 0 for default | |
| 2895 * \param[out] ppixm [optional] image regions to be inverted | |
| 2896 * \param[out] pixadb [optional] debug; input NULL to skip | |
| 2897 * \return pixd 1 bpp image to be sent to OCR, or NULL on error | |
| 2898 * | |
| 2899 * <pre> | |
| 2900 * Notes: | |
| 2901 * (1) A 1 bpp image is returned, where pixels in image regions are | |
| 2902 * photo-inverted. | |
| 2903 * (2) If there is light text with a dark background, this will | |
| 2904 * identify the region and photoinvert the pixels there if | |
| 2905 * there are at least 60% fg pixels in the region. | |
| 2906 * (3) For debug output, input a (typically empty) %pixadb. | |
| 2907 * </pre> | |
| 2908 */ | |
| 2909 PIX * | |
| 2910 pixAutoPhotoinvert(PIX *pixs, | |
| 2911 l_int32 thresh, | |
| 2912 PIX **ppixm, | |
| 2913 PIXA *pixadb) | |
| 2914 { | |
| 2915 l_int32 i, n, empty, x, y, w, h; | |
| 2916 l_float32 fgfract; | |
| 2917 BOX *box1; | |
| 2918 BOXA *boxa1; | |
| 2919 PIX *pix1, *pix2, *pix3, *pix4, *pix5; | |
| 2920 | |
| 2921 if (ppixm) *ppixm = NULL; | |
| 2922 if (!pixs) | |
| 2923 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 2924 if (thresh == 0) thresh = 128; | |
| 2925 | |
| 2926 if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL) | |
| 2927 return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL); | |
| 2928 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY); | |
| 2929 | |
| 2930 /* Identify regions for photo-inversion: | |
| 2931 * (1) Start with the halftone mask. | |
| 2932 * (2) Eliminate ordinary text and halftones in the mask. | |
| 2933 * (3) Some regions of inverted text may have been removed in | |
| 2934 * steps (1) and (2). Conditionally fill holes in the mask, | |
| 2935 * but do not fill out to the bounding rect. */ | |
| 2936 pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb); | |
| 2937 pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0); /* remove noise */ | |
| 2938 pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0); | |
| 2939 if (pixadb) { | |
| 2940 pixaAddPix(pixadb, pix2, L_CLONE); | |
| 2941 pixaAddPix(pixadb, pix3, L_CLONE); | |
| 2942 pixaAddPix(pixadb, pix4, L_COPY); | |
| 2943 } | |
| 2944 pixDestroy(&pix2); | |
| 2945 pixDestroy(&pix3); | |
| 2946 pixZero(pix4, &empty); | |
| 2947 if (empty) { | |
| 2948 pixDestroy(&pix4); | |
| 2949 return pix1; | |
| 2950 } | |
| 2951 | |
| 2952 /* Examine each component and validate the inversion. | |
| 2953 * Require at least 60% of pixels under each component to be FG. */ | |
| 2954 boxa1 = pixConnCompBB(pix4, 8); | |
| 2955 n = boxaGetCount(boxa1); | |
| 2956 for (i = 0; i < n; i++) { | |
| 2957 box1 = boxaGetBox(boxa1, i, L_COPY); | |
| 2958 pix5 = pixClipRectangle(pix1, box1, NULL); | |
| 2959 pixForegroundFraction(pix5, &fgfract); | |
| 2960 if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract); | |
| 2961 boxGetGeometry(box1, &x, &y, &w, &h); | |
| 2962 if (fgfract < 0.6) /* erase from the mask */ | |
| 2963 pixRasterop(pix4, x, y, w, h, PIX_CLR, NULL, 0, 0); | |
| 2964 pixDestroy(&pix5); | |
| 2965 boxDestroy(&box1); | |
| 2966 } | |
| 2967 boxaDestroy(&boxa1); | |
| 2968 pixZero(pix4, &empty); | |
| 2969 if (empty) { | |
| 2970 pixDestroy(&pix4); | |
| 2971 return pix1; | |
| 2972 } | |
| 2973 | |
| 2974 /* Combine pixels of the photo-inverted pix with the binarized input */ | |
| 2975 pix5 = pixInvert(NULL, pix1); | |
| 2976 pixCombineMasked(pix1, pix5, pix4); | |
| 2977 | |
| 2978 if (pixadb) { | |
| 2979 pixaAddPix(pixadb, pix5, L_CLONE); | |
| 2980 pixaAddPix(pixadb, pix1, L_COPY); | |
| 2981 } | |
| 2982 pixDestroy(&pix5); | |
| 2983 if (ppixm) | |
| 2984 *ppixm = pix4; | |
| 2985 else | |
| 2986 pixDestroy(&pix4); | |
| 2987 return pix1; | |
| 2988 } |
