Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/binarize.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file binarize.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * =================================================================== | |
| 32 * Image binarization algorithms are found in: | |
| 33 * grayquant.c: standard, simple, general grayscale quantization | |
| 34 * adaptmap.c: local adaptive; mostly gray-to-gray in preparation | |
| 35 * for binarization | |
| 36 * binarize.c: special binarization methods, locally adaptive and | |
| 37 * global. | |
| 38 * =================================================================== | |
| 39 * | |
| 40 * Adaptive Otsu-based thresholding | |
| 41 * l_int32 pixOtsuAdaptiveThreshold() 8 bpp | |
| 42 * | |
| 43 * Otsu thresholding on adaptive background normalization | |
| 44 * PIX *pixOtsuThreshOnBackgroundNorm() 8 bpp | |
| 45 * | |
| 46 * Masking and Otsu estimate on adaptive background normalization | |
| 47 * PIX *pixMaskedThreshOnBackgroundNorm() 8 bpp | |
| 48 * | |
| 49 * Sauvola local thresholding | |
| 50 * l_int32 pixSauvolaBinarizeTiled() | |
| 51 * l_int32 pixSauvolaBinarize() | |
| 52 * static PIX *pixSauvolaGetThreshold() | |
| 53 * static PIX *pixApplyLocalThreshold(); | |
| 54 * | |
| 55 * Sauvola binarization on contrast normalization | |
| 56 * PIX *pixSauvolaOnContrastNorm() 8 bpp | |
| 57 * | |
| 58 * Contrast normalization followed by bg normalization and thresholding | |
| 59 * PIX *pixThreshOnDoubleNorm() | |
| 60 * | |
| 61 * Global thresholding using connected components | |
| 62 * PIX *pixThresholdByConnComp() | |
| 63 * | |
| 64 * Global thresholding by histogram | |
| 65 * PIX *pixThresholdByHisto() | |
| 66 * | |
| 67 * Notes: | |
| 68 * (1) pixOtsuAdaptiveThreshold() computes a global threshold over each | |
| 69 * tile and performs the threshold operation, resulting in a | |
| 70 * binary image for each tile. These are stitched into the | |
| 71 * final result. | |
| 72 * (2) pixOtsuThreshOnBackgroundNorm() and | |
| 73 * pixMaskedThreshOnBackgroundNorm() are binarization functions | |
| 74 * that use background normalization with other techniques. | |
| 75 * (3) Sauvola binarization computes a local threshold based on | |
| 76 * the local average and square average. It takes two constants: | |
| 77 * the window size for the measurement at each pixel and a | |
| 78 * parameter that determines the amount of normalized local | |
| 79 * standard deviation to subtract from the local average value. | |
| 80 * (4) pixThresholdByConnComp() uses the numbers of 4 and 8 connected | |
| 81 * components at different thresholding to determine if a | |
| 82 * global threshold can be used (for text or line-art) and the | |
| 83 * value it should have. | |
| 84 * </pre> | |
| 85 */ | |
| 86 | |
| 87 #ifdef HAVE_CONFIG_H | |
| 88 #include <config_auto.h> | |
| 89 #endif /* HAVE_CONFIG_H */ | |
| 90 | |
| 91 #include <math.h> | |
| 92 #include "allheaders.h" | |
| 93 | |
| 94 static PIX *pixSauvolaGetThreshold(PIX *pixm, PIX *pixms, l_float32 factor, | |
| 95 PIX **ppixsd); | |
| 96 static PIX *pixApplyLocalThreshold(PIX *pixs, PIX *pixth); | |
| 97 | |
| 98 /*------------------------------------------------------------------* | |
| 99 * Adaptive Otsu-based thresholding * | |
| 100 *------------------------------------------------------------------*/ | |
| 101 /*! | |
| 102 * \brief pixOtsuAdaptiveThreshold() | |
| 103 * | |
| 104 * \param[in] pixs 8 bpp | |
| 105 * \param[in] sx, sy desired tile dimensions; actual size may vary | |
| 106 * \param[in] smoothx, smoothy half-width of convolution kernel applied to | |
| 107 * threshold array: use 0 for no smoothing | |
| 108 * \param[in] scorefract fraction of the max Otsu score; typ. 0.1; | |
| 109 * use 0.0 for standard Otsu | |
| 110 * \param[out] ppixth [optional] array of threshold values | |
| 111 * found for each tile | |
| 112 * \param[out] ppixd [optional] thresholded input pixs, | |
| 113 * based on the threshold array | |
| 114 * \return 0 if OK, 1 on error | |
| 115 * | |
| 116 * <pre> | |
| 117 * Notes: | |
| 118 * (1) The Otsu method finds a single global threshold for an image. | |
| 119 * This function allows a locally adapted threshold to be | |
| 120 * found for each tile into which the image is broken up. | |
| 121 * (2) The array of threshold values, one for each tile, constitutes | |
| 122 * a highly downscaled image. This array is optionally | |
| 123 * smoothed using a convolution. The full width and height of the | |
| 124 * convolution kernel are (2 * %smoothx + 1) and (2 * %smoothy + 1). | |
| 125 * (3) The minimum tile dimension allowed is 16. If such small | |
| 126 * tiles are used, it is recommended to use smoothing, because | |
| 127 * without smoothing, each small tile determines the splitting | |
| 128 * threshold independently. A tile that is entirely in the | |
| 129 * image bg will then hallucinate fg, resulting in a very noisy | |
| 130 * binarization. The smoothing should be large enough that no | |
| 131 * tile is only influenced by one type (fg or bg) of pixels, | |
| 132 * because it will force a split of its pixels. | |
| 133 * (4) To get a single global threshold for the entire image, use | |
| 134 * input values of %sx and %sy that are larger than the image. | |
| 135 * For this situation, the smoothing parameters are ignored. | |
| 136 * (5) The threshold values partition the image pixels into two classes: | |
| 137 * one whose values are less than the threshold and another | |
| 138 * whose values are greater than or equal to the threshold. | |
| 139 * This is the same use of 'threshold' as in pixThresholdToBinary(). | |
| 140 * (6) The scorefract is the fraction of the maximum Otsu score, which | |
| 141 * is used to determine the range over which the histogram minimum | |
| 142 * is searched. See numaSplitDistribution() for details on the | |
| 143 * underlying method of choosing a threshold. | |
| 144 * (7) This uses enables a modified version of the Otsu criterion for | |
| 145 * splitting the distribution of pixels in each tile into a | |
| 146 * fg and bg part. The modification consists of searching for | |
| 147 * a minimum in the histogram over a range of pixel values where | |
| 148 * the Otsu score is within a defined fraction, %scorefract, | |
| 149 * of the max score. To get the original Otsu algorithm, set | |
| 150 * %scorefract == 0. | |
| 151 * (8) N.B. This method is NOT recommended for images with weak text | |
| 152 * and significant background noise, such as bleedthrough, because | |
| 153 * of the problem noted in (3) above for tiling. Use Sauvola. | |
| 154 * </pre> | |
| 155 */ | |
| 156 l_ok | |
| 157 pixOtsuAdaptiveThreshold(PIX *pixs, | |
| 158 l_int32 sx, | |
| 159 l_int32 sy, | |
| 160 l_int32 smoothx, | |
| 161 l_int32 smoothy, | |
| 162 l_float32 scorefract, | |
| 163 PIX **ppixth, | |
| 164 PIX **ppixd) | |
| 165 { | |
| 166 l_int32 w, h, nx, ny, i, j, thresh; | |
| 167 l_uint32 val; | |
| 168 PIX *pixt, *pixb, *pixthresh, *pixth, *pixd; | |
| 169 PIXTILING *pt; | |
| 170 | |
| 171 if (!ppixth && !ppixd) | |
| 172 return ERROR_INT("neither &pixth nor &pixd defined", __func__, 1); | |
| 173 if (ppixth) *ppixth = NULL; | |
| 174 if (ppixd) *ppixd = NULL; | |
| 175 if (!pixs || pixGetDepth(pixs) != 8) | |
| 176 return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1); | |
| 177 if (sx < 16 || sy < 16) | |
| 178 return ERROR_INT("sx and sy must be >= 16", __func__, 1); | |
| 179 | |
| 180 /* Compute the threshold array for the tiles */ | |
| 181 pixGetDimensions(pixs, &w, &h, NULL); | |
| 182 nx = L_MAX(1, w / sx); | |
| 183 ny = L_MAX(1, h / sy); | |
| 184 smoothx = L_MIN(smoothx, (nx - 1) / 2); | |
| 185 smoothy = L_MIN(smoothy, (ny - 1) / 2); | |
| 186 pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0); | |
| 187 pixthresh = pixCreate(nx, ny, 8); | |
| 188 for (i = 0; i < ny; i++) { | |
| 189 for (j = 0; j < nx; j++) { | |
| 190 pixt = pixTilingGetTile(pt, i, j); | |
| 191 pixSplitDistributionFgBg(pixt, scorefract, 1, &thresh, | |
| 192 NULL, NULL, NULL); | |
| 193 pixSetPixel(pixthresh, j, i, thresh); /* see note (4) */ | |
| 194 pixDestroy(&pixt); | |
| 195 } | |
| 196 } | |
| 197 | |
| 198 /* Optionally smooth the threshold array */ | |
| 199 if (smoothx > 0 || smoothy > 0) | |
| 200 pixth = pixBlockconv(pixthresh, smoothx, smoothy); | |
| 201 else | |
| 202 pixth = pixClone(pixthresh); | |
| 203 pixDestroy(&pixthresh); | |
| 204 | |
| 205 /* Optionally apply the threshold array to binarize pixs */ | |
| 206 if (ppixd) { | |
| 207 pixd = pixCreate(w, h, 1); | |
| 208 pixCopyResolution(pixd, pixs); | |
| 209 for (i = 0; i < ny; i++) { | |
| 210 for (j = 0; j < nx; j++) { | |
| 211 pixt = pixTilingGetTile(pt, i, j); | |
| 212 pixGetPixel(pixth, j, i, &val); | |
| 213 pixb = pixThresholdToBinary(pixt, val); | |
| 214 pixTilingPaintTile(pixd, i, j, pixb, pt); | |
| 215 pixDestroy(&pixt); | |
| 216 pixDestroy(&pixb); | |
| 217 } | |
| 218 } | |
| 219 *ppixd = pixd; | |
| 220 } | |
| 221 | |
| 222 if (ppixth) | |
| 223 *ppixth = pixth; | |
| 224 else | |
| 225 pixDestroy(&pixth); | |
| 226 | |
| 227 pixTilingDestroy(&pt); | |
| 228 return 0; | |
| 229 } | |
| 230 | |
| 231 | |
| 232 /*------------------------------------------------------------------* | |
| 233 * Otsu thresholding on adaptive background normalization * | |
| 234 *------------------------------------------------------------------*/ | |
| 235 /*! | |
| 236 * \brief pixOtsuThreshOnBackgroundNorm() | |
| 237 * | |
| 238 * \param[in] pixs 8 bpp grayscale; not colormapped | |
| 239 * \param[in] pixim [optional] 1 bpp 'image' mask; can be null | |
| 240 * \param[in] sx, sy tile size in pixels | |
| 241 * \param[in] thresh threshold for determining foreground | |
| 242 * \param[in] mincount min threshold on counts in a tile | |
| 243 * \param[in] bgval target bg val; typ. > 128 | |
| 244 * \param[in] smoothx half-width of block convolution kernel width | |
| 245 * \param[in] smoothy half-width of block convolution kernel height | |
| 246 * \param[in] scorefract fraction of the max Otsu score; typ. 0.1 | |
| 247 * \param[out] pthresh [optional] threshold value that was | |
| 248 * used on the normalized image | |
| 249 * \return pixd 1 bpp thresholded image, or NULL on error | |
| 250 * | |
| 251 * <pre> | |
| 252 * Notes: | |
| 253 * (1) This does background normalization followed by Otsu | |
| 254 * thresholding. Otsu binarization attempts to split the | |
| 255 * image into two roughly equal sets of pixels, and it does | |
| 256 * a very poor job when there are large amounts of dark | |
| 257 * background. By doing a background normalization first, | |
| 258 * to get the background near 255, we remove this problem. | |
| 259 * Then we use a modified Otsu to estimate the best global | |
| 260 * threshold on the normalized image. | |
| 261 * (2) See pixBackgroundNorm() for meaning and typical values | |
| 262 * of input parameters. For a start, you can try: | |
| 263 * sx, sy = 10, 15 | |
| 264 * thresh = 100 | |
| 265 * mincount = 50 | |
| 266 * bgval = 255 | |
| 267 * smoothx, smoothy = 2 | |
| 268 * </pre> | |
| 269 */ | |
| 270 PIX * | |
| 271 pixOtsuThreshOnBackgroundNorm(PIX *pixs, | |
| 272 PIX *pixim, | |
| 273 l_int32 sx, | |
| 274 l_int32 sy, | |
| 275 l_int32 thresh, | |
| 276 l_int32 mincount, | |
| 277 l_int32 bgval, | |
| 278 l_int32 smoothx, | |
| 279 l_int32 smoothy, | |
| 280 l_float32 scorefract, | |
| 281 l_int32 *pthresh) | |
| 282 { | |
| 283 l_int32 w, h; | |
| 284 l_uint32 val; | |
| 285 PIX *pixn, *pixt, *pixd; | |
| 286 | |
| 287 if (pthresh) *pthresh = 0; | |
| 288 if (!pixs || pixGetDepth(pixs) != 8) | |
| 289 return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", __func__, NULL); | |
| 290 if (pixGetColormap(pixs)) | |
| 291 return (PIX *)ERROR_PTR("pixs is colormapped", __func__, NULL); | |
| 292 if (sx < 4 || sy < 4) | |
| 293 return (PIX *)ERROR_PTR("sx and sy must be >= 4", __func__, NULL); | |
| 294 if (mincount > sx * sy) { | |
| 295 L_WARNING("mincount too large for tile size\n", __func__); | |
| 296 mincount = (sx * sy) / 3; | |
| 297 } | |
| 298 | |
| 299 pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, | |
| 300 mincount, bgval, smoothx, smoothy); | |
| 301 if (!pixn) | |
| 302 return (PIX *)ERROR_PTR("pixn not made", __func__, NULL); | |
| 303 | |
| 304 /* Just use 1 tile for a global threshold, which is stored | |
| 305 * as a single pixel in pixt. */ | |
| 306 pixGetDimensions(pixn, &w, &h, NULL); | |
| 307 pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd); | |
| 308 pixDestroy(&pixn); | |
| 309 | |
| 310 if (pixt && pthresh) { | |
| 311 pixGetPixel(pixt, 0, 0, &val); | |
| 312 *pthresh = val; | |
| 313 } | |
| 314 pixDestroy(&pixt); | |
| 315 | |
| 316 if (!pixd) | |
| 317 return (PIX *)ERROR_PTR("pixd not made", __func__, NULL); | |
| 318 else | |
| 319 return pixd; | |
| 320 } | |
| 321 | |
| 322 | |
| 323 | |
| 324 /*----------------------------------------------------------------------* | |
| 325 * Masking and Otsu estimate on adaptive background normalization * | |
| 326 *----------------------------------------------------------------------*/ | |
| 327 /*! | |
| 328 * \brief pixMaskedThreshOnBackgroundNorm() | |
| 329 * | |
| 330 * \param[in] pixs 8 bpp grayscale; not colormapped | |
| 331 * \param[in] pixim [optional] 1 bpp 'image' mask; can be null | |
| 332 * \param[in] sx, sy tile size in pixels | |
| 333 * \param[in] thresh threshold for determining foreground | |
| 334 * \param[in] mincount min threshold on counts in a tile | |
| 335 * \param[in] smoothx half-width of block convolution kernel width | |
| 336 * \param[in] smoothy half-width of block convolution kernel height | |
| 337 * \param[in] scorefract fraction of the max Otsu score; typ. ~ 0.1 | |
| 338 * \param[out] pthresh [optional] threshold value that was | |
| 339 * used on the normalized image | |
| 340 * \return pixd 1 bpp thresholded image, or NULL on error | |
| 341 * | |
| 342 * <pre> | |
| 343 * Notes: | |
| 344 * (1) This begins with a standard background normalization. | |
| 345 * Additionally, there is a flexible background norm, that | |
| 346 * will adapt to a rapidly varying background, and this | |
| 347 * puts white pixels in the background near regions with | |
| 348 * significant foreground. The white pixels are turned into | |
| 349 * a 1 bpp selection mask by binarization followed by dilation. | |
| 350 * Otsu thresholding is performed on the input image to get an | |
| 351 * estimate of the threshold in the non-mask regions. | |
| 352 * The background normalized image is thresholded with two | |
| 353 * different values, and the result is combined using | |
| 354 * the selection mask. | |
| 355 * (2) Note that the numbers 255 (for bgval target) and 190 (for | |
| 356 * thresholding on pixn) are tied together, and explicitly | |
| 357 * defined in this function. | |
| 358 * (3) See pixBackgroundNorm() for meaning and typical values | |
| 359 * of input parameters. For a start, you can try: | |
| 360 * sx, sy = 10, 15 | |
| 361 * thresh = 100 | |
| 362 * mincount = 50 | |
| 363 * smoothx, smoothy = 2 | |
| 364 * </pre> | |
| 365 */ | |
| 366 PIX * | |
| 367 pixMaskedThreshOnBackgroundNorm(PIX *pixs, | |
| 368 PIX *pixim, | |
| 369 l_int32 sx, | |
| 370 l_int32 sy, | |
| 371 l_int32 thresh, | |
| 372 l_int32 mincount, | |
| 373 l_int32 smoothx, | |
| 374 l_int32 smoothy, | |
| 375 l_float32 scorefract, | |
| 376 l_int32 *pthresh) | |
| 377 { | |
| 378 l_int32 w, h, highthresh; | |
| 379 l_uint32 val; | |
| 380 PIX *pixn, *pixm, *pixd, *pix1, *pix2, *pix3, *pix4; | |
| 381 | |
| 382 if (pthresh) *pthresh = 0; | |
| 383 if (!pixs || pixGetDepth(pixs) != 8) | |
| 384 return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", __func__, NULL); | |
| 385 if (pixGetColormap(pixs)) | |
| 386 return (PIX *)ERROR_PTR("pixs is colormapped", __func__, NULL); | |
| 387 if (sx < 4 || sy < 4) | |
| 388 return (PIX *)ERROR_PTR("sx and sy must be >= 4", __func__, NULL); | |
| 389 if (mincount > sx * sy) { | |
| 390 L_WARNING("mincount too large for tile size\n", __func__); | |
| 391 mincount = (sx * sy) / 3; | |
| 392 } | |
| 393 | |
| 394 /* Standard background normalization */ | |
| 395 pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, | |
| 396 mincount, 255, smoothx, smoothy); | |
| 397 if (!pixn) | |
| 398 return (PIX *)ERROR_PTR("pixn not made", __func__, NULL); | |
| 399 | |
| 400 /* Special background normalization for adaptation to quickly | |
| 401 * varying background. Threshold on the very light parts, | |
| 402 * which tend to be near significant edges, and dilate to | |
| 403 * form a mask over regions that are typically text. The | |
| 404 * dilation size is chosen to cover the text completely, | |
| 405 * except for very thick fonts. */ | |
| 406 pix1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20); | |
| 407 pix2 = pixThresholdToBinary(pix1, 240); | |
| 408 pixInvert(pix2, pix2); | |
| 409 pixm = pixMorphSequence(pix2, "d21.21", 0); | |
| 410 pixDestroy(&pix1); | |
| 411 pixDestroy(&pix2); | |
| 412 | |
| 413 /* Use Otsu to get a global threshold estimate for the image, | |
| 414 * which is stored as a single pixel in pix3. */ | |
| 415 pixGetDimensions(pixs, &w, &h, NULL); | |
| 416 pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pix3, NULL); | |
| 417 pixGetPixel(pix3, 0, 0, &val); | |
| 418 if (pthresh) *pthresh = val; | |
| 419 pixDestroy(&pix3); | |
| 420 | |
| 421 /* Threshold the background normalized images differentially, | |
| 422 * using a high value correlated with the background normalization | |
| 423 * for the part of the image under the mask (i.e., near the | |
| 424 * darker, thicker foreground), and a value that depends on the Otsu | |
| 425 * threshold for the rest of the image. This gives a solid | |
| 426 * (high) thresholding for the foreground parts of the image, | |
| 427 * while allowing the background and light foreground to be | |
| 428 * reasonably well cleaned using a threshold adapted to the | |
| 429 * input image. */ | |
| 430 highthresh = L_MIN(256, val + 30); | |
| 431 pixd = pixThresholdToBinary(pixn, highthresh); /* for bg and light fg */ | |
| 432 pix4 = pixThresholdToBinary(pixn, 190); /* for heavier fg */ | |
| 433 pixCombineMasked(pixd, pix4, pixm); | |
| 434 pixDestroy(&pix4); | |
| 435 pixDestroy(&pixm); | |
| 436 pixDestroy(&pixn); | |
| 437 | |
| 438 if (!pixd) | |
| 439 return (PIX *)ERROR_PTR("pixd not made", __func__, NULL); | |
| 440 else | |
| 441 return pixd; | |
| 442 } | |
| 443 | |
| 444 | |
| 445 /*----------------------------------------------------------------------* | |
| 446 * Sauvola binarization * | |
| 447 *----------------------------------------------------------------------*/ | |
| 448 /*! | |
| 449 * \brief pixSauvolaBinarizeTiled() | |
| 450 * | |
| 451 * \param[in] pixs 8 bpp grayscale, not colormapped | |
| 452 * \param[in] whsize window half-width for measuring local statistics | |
| 453 * \param[in] factor factor for reducing threshold due to variance; >= 0 | |
| 454 * \param[in] nx, ny subdivision into tiles; >= 1 | |
| 455 * \param[out] ppixth [optional] Sauvola threshold values | |
| 456 * \param[out] ppixd [optional] thresholded image | |
| 457 * \return 0 if OK, 1 on error | |
| 458 * | |
| 459 * <pre> | |
| 460 * Notes: | |
| 461 * (1) The window width and height are 2 * %whsize + 1. The minimum | |
| 462 * value for %whsize is 2; typically it is >= 7. | |
| 463 * (2) For nx == ny == 1, this defaults to pixSauvolaBinarize(). | |
| 464 * (3) Why a tiled version? | |
| 465 * (a) A uint32 is used for the mean value accumulator, so | |
| 466 * overflow can occur for an image with more than 16M pixels. | |
| 467 * (b) A dpix is used to accumulate mean square values, and it | |
| 468 * can only accommodate images with less than 2^28 pixels. | |
| 469 * Using tiles reduces the size of all the arrays. | |
| 470 * (c) Each tile can be processed independently, in parallel, | |
| 471 * on a multicore processor. | |
| 472 * (4) The Sauvola threshold is determined from the formula: | |
| 473 * t = m * (1 - k * (1 - s / 128)) | |
| 474 * See pixSauvolaBinarize() for details. | |
| 475 * </pre> | |
| 476 */ | |
| 477 l_ok | |
| 478 pixSauvolaBinarizeTiled(PIX *pixs, | |
| 479 l_int32 whsize, | |
| 480 l_float32 factor, | |
| 481 l_int32 nx, | |
| 482 l_int32 ny, | |
| 483 PIX **ppixth, | |
| 484 PIX **ppixd) | |
| 485 { | |
| 486 l_int32 i, j, w, h, xrat, yrat; | |
| 487 PIX *pixth = NULL, *pixd = NULL, *tileth = NULL, *tiled = NULL, *pixt; | |
| 488 PIX **ptileth, **ptiled; | |
| 489 PIXTILING *pt; | |
| 490 | |
| 491 if (!ppixth && !ppixd) | |
| 492 return ERROR_INT("no outputs", __func__, 1); | |
| 493 if (ppixth) *ppixth = NULL; | |
| 494 if (ppixd) *ppixd = NULL; | |
| 495 if (!pixs || pixGetDepth(pixs) != 8) | |
| 496 return ERROR_INT("pixs undefined or not 8 bpp", __func__, 1); | |
| 497 if (pixGetColormap(pixs)) | |
| 498 return ERROR_INT("pixs is cmapped", __func__, 1); | |
| 499 pixGetDimensions(pixs, &w, &h, NULL); | |
| 500 if (whsize < 2) | |
| 501 return ERROR_INT("whsize must be >= 2", __func__, 1); | |
| 502 if (w < 2 * whsize + 3 || h < 2 * whsize + 3) | |
| 503 return ERROR_INT("whsize too large for image", __func__, 1); | |
| 504 if (factor < 0.0) | |
| 505 return ERROR_INT("factor must be >= 0", __func__, 1); | |
| 506 | |
| 507 if (nx <= 1 && ny <= 1) | |
| 508 return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL, | |
| 509 ppixth, ppixd); | |
| 510 | |
| 511 /* Test to see if the tiles are too small. The required | |
| 512 * condition is that the tile dimensions must be at least | |
| 513 * (whsize + 2) x (whsize + 2). */ | |
| 514 xrat = w / nx; | |
| 515 yrat = h / ny; | |
| 516 if (xrat < whsize + 2) { | |
| 517 nx = w / (whsize + 2); | |
| 518 L_WARNING("tile width too small; nx reduced to %d\n", __func__, nx); | |
| 519 } | |
| 520 if (yrat < whsize + 2) { | |
| 521 ny = h / (whsize + 2); | |
| 522 L_WARNING("tile height too small; ny reduced to %d\n", __func__, ny); | |
| 523 } | |
| 524 if (nx <= 1 && ny <= 1) | |
| 525 return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL, | |
| 526 ppixth, ppixd); | |
| 527 | |
| 528 /* We can use pixtiling for painting both outputs, if requested */ | |
| 529 if (ppixth) { | |
| 530 pixth = pixCreate(w, h, 8); | |
| 531 *ppixth = pixth; | |
| 532 } | |
| 533 if (ppixd) { | |
| 534 pixd = pixCreate(w, h, 1); | |
| 535 *ppixd = pixd; | |
| 536 } | |
| 537 pt = pixTilingCreate(pixs, nx, ny, 0, 0, whsize + 1, whsize + 1); | |
| 538 pixTilingNoStripOnPaint(pt); /* pixSauvolaBinarize() does the stripping */ | |
| 539 | |
| 540 for (i = 0; i < ny; i++) { | |
| 541 for (j = 0; j < nx; j++) { | |
| 542 pixt = pixTilingGetTile(pt, i, j); | |
| 543 ptileth = (ppixth) ? &tileth : NULL; | |
| 544 ptiled = (ppixd) ? &tiled : NULL; | |
| 545 pixSauvolaBinarize(pixt, whsize, factor, 0, NULL, NULL, | |
| 546 ptileth, ptiled); | |
| 547 if (ppixth) { /* do not strip */ | |
| 548 pixTilingPaintTile(pixth, i, j, tileth, pt); | |
| 549 pixDestroy(&tileth); | |
| 550 } | |
| 551 if (ppixd) { | |
| 552 pixTilingPaintTile(pixd, i, j, tiled, pt); | |
| 553 pixDestroy(&tiled); | |
| 554 } | |
| 555 pixDestroy(&pixt); | |
| 556 } | |
| 557 } | |
| 558 | |
| 559 pixTilingDestroy(&pt); | |
| 560 return 0; | |
| 561 } | |
| 562 | |
| 563 | |
| 564 /*! | |
| 565 * \brief pixSauvolaBinarize() | |
| 566 * | |
| 567 * \param[in] pixs 8 bpp grayscale; not colormapped | |
| 568 * \param[in] whsize window half-width for measuring local statistics | |
| 569 * \param[in] factor factor for reducing threshold due to variance; >= 0 | |
| 570 * \param[in] addborder 1 to add border of width (%whsize + 1) on all sides | |
| 571 * \param[out] ppixm [optional] local mean values | |
| 572 * \param[out] ppixsd [optional] local standard deviation values | |
| 573 * \param[out] ppixth [optional] threshold values | |
| 574 * \param[out] ppixd [optional] thresholded image | |
| 575 * \return 0 if OK, 1 on error | |
| 576 * | |
| 577 * <pre> | |
| 578 * Notes: | |
| 579 * (1) The window width and height are 2 * %whsize + 1. The minimum | |
| 580 * value for %whsize is 2; typically it is >= 7.. | |
| 581 * (2) The local statistics, measured over the window, are the | |
| 582 * average and standard deviation. | |
| 583 * (3) The measurements of the mean and standard deviation are | |
| 584 * performed inside a border of (%whsize + 1) pixels. If pixs does | |
| 585 * not have these added border pixels, use %addborder = 1 to add | |
| 586 * it here; otherwise use %addborder = 0. | |
| 587 * (4) The Sauvola threshold is determined from the formula: | |
| 588 * t = m * (1 - k * (1 - s / 128)) | |
| 589 * where: | |
| 590 * t = local threshold | |
| 591 * m = local mean | |
| 592 * k = %factor (>= 0) [ typ. 0.35 ] | |
| 593 * s = local standard deviation, which is maximized at | |
| 594 * 127.5 when half the samples are 0 and half are 255. | |
| 595 * (5) The basic idea of Niblack and Sauvola binarization is that | |
| 596 * the local threshold should be less than the median value, | |
| 597 * and the larger the variance, the closer to the median | |
| 598 * it should be chosen. Typical values for k are between | |
| 599 * 0.2 and 0.5. | |
| 600 * </pre> | |
| 601 */ | |
| 602 l_ok | |
| 603 pixSauvolaBinarize(PIX *pixs, | |
| 604 l_int32 whsize, | |
| 605 l_float32 factor, | |
| 606 l_int32 addborder, | |
| 607 PIX **ppixm, | |
| 608 PIX **ppixsd, | |
| 609 PIX **ppixth, | |
| 610 PIX **ppixd) | |
| 611 { | |
| 612 l_int32 w, h; | |
| 613 PIX *pixg, *pixsc, *pixm = NULL, *pixms = NULL, *pixth = NULL, *pixd = NULL; | |
| 614 | |
| 615 if (ppixm) *ppixm = NULL; | |
| 616 if (ppixsd) *ppixsd = NULL; | |
| 617 if (ppixth) *ppixth = NULL; | |
| 618 if (ppixd) *ppixd = NULL; | |
| 619 if (!ppixm && !ppixsd && !ppixth && !ppixd) | |
| 620 return ERROR_INT("no outputs", __func__, 1); | |
| 621 if (!pixs || pixGetDepth(pixs) != 8) | |
| 622 return ERROR_INT("pixs undefined or not 8 bpp", __func__, 1); | |
| 623 if (pixGetColormap(pixs)) | |
| 624 return ERROR_INT("pixs is cmapped", __func__, 1); | |
| 625 pixGetDimensions(pixs, &w, &h, NULL); | |
| 626 if (whsize < 2) | |
| 627 return ERROR_INT("whsize must be >= 2", __func__, 1); | |
| 628 if (w < 2 * whsize + 3 || h < 2 * whsize + 3) | |
| 629 return ERROR_INT("whsize too large for image", __func__, 1); | |
| 630 if (factor < 0.0) | |
| 631 return ERROR_INT("factor must be >= 0", __func__, 1); | |
| 632 | |
| 633 if (addborder) { | |
| 634 pixg = pixAddMirroredBorder(pixs, whsize + 1, whsize + 1, | |
| 635 whsize + 1, whsize + 1); | |
| 636 pixsc = pixClone(pixs); | |
| 637 } else { | |
| 638 pixg = pixClone(pixs); | |
| 639 pixsc = pixRemoveBorder(pixs, whsize + 1); | |
| 640 } | |
| 641 if (!pixg || !pixsc) | |
| 642 return ERROR_INT("pixg and pixsc not made", __func__, 1); | |
| 643 | |
| 644 /* All these functions strip off the border pixels. */ | |
| 645 if (ppixm || ppixth || ppixd) | |
| 646 pixm = pixWindowedMean(pixg, whsize, whsize, 1, 1); | |
| 647 if (ppixsd || ppixth || ppixd) | |
| 648 pixms = pixWindowedMeanSquare(pixg, whsize, whsize, 1); | |
| 649 if (ppixth || ppixd) | |
| 650 pixth = pixSauvolaGetThreshold(pixm, pixms, factor, ppixsd); | |
| 651 if (ppixd) { | |
| 652 pixd = pixApplyLocalThreshold(pixsc, pixth); | |
| 653 pixCopyResolution(pixd, pixs); | |
| 654 } | |
| 655 | |
| 656 if (ppixm) | |
| 657 *ppixm = pixm; | |
| 658 else | |
| 659 pixDestroy(&pixm); | |
| 660 pixDestroy(&pixms); | |
| 661 if (ppixth) | |
| 662 *ppixth = pixth; | |
| 663 else | |
| 664 pixDestroy(&pixth); | |
| 665 if (ppixd) | |
| 666 *ppixd = pixd; | |
| 667 pixDestroy(&pixg); | |
| 668 pixDestroy(&pixsc); | |
| 669 return 0; | |
| 670 } | |
| 671 | |
| 672 | |
| 673 /*! | |
| 674 * \brief pixSauvolaGetThreshold() | |
| 675 * | |
| 676 * \param[in] pixm 8 bpp grayscale; not colormapped | |
| 677 * \param[in] pixms 32 bpp | |
| 678 * \param[in] factor factor for reducing threshold due to variance; >= 0 | |
| 679 * \param[out] ppixsd [optional] local standard deviation | |
| 680 * \return pixd 8 bpp, sauvola threshold values, or NULL on error | |
| 681 * | |
| 682 * <pre> | |
| 683 * Notes: | |
| 684 * (1) The Sauvola threshold is determined from the formula: | |
| 685 * t = m * (1 - k * (1 - s / 128)) | |
| 686 * where: | |
| 687 * t = local threshold | |
| 688 * m = local mean | |
| 689 * k = %factor (>= 0) [ typ. 0.35 ] | |
| 690 * s = local standard deviation, which is maximized at | |
| 691 * 127.5 when half the samples are 0 and half are 255. | |
| 692 * (2) See pixSauvolaBinarize() for other details. | |
| 693 * (3) Important definitions and relations for computing averages: | |
| 694 * v == pixel value | |
| 695 * E(p) == expected value of p == average of p over some pixel set | |
| 696 * S(v) == square of v == v * v | |
| 697 * mv == E(v) == expected pixel value == mean value | |
| 698 * ms == E(S(v)) == expected square of pixel values | |
| 699 * == mean square value | |
| 700 * var == variance == expected square of deviation from mean | |
| 701 * == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv)) | |
| 702 * = E(S(v)) - S(mv) | |
| 703 * = ms - mv * mv | |
| 704 * s == standard deviation = sqrt(var) | |
| 705 * So for evaluating the standard deviation in the Sauvola | |
| 706 * threshold, we take | |
| 707 * s = sqrt(ms - mv * mv) | |
| 708 * </pre> | |
| 709 */ | |
| 710 static PIX * | |
| 711 pixSauvolaGetThreshold(PIX *pixm, | |
| 712 PIX *pixms, | |
| 713 l_float32 factor, | |
| 714 PIX **ppixsd) | |
| 715 { | |
| 716 l_int32 i, j, w, h, tabsize, wplm, wplms, wplsd, wpld, usetab; | |
| 717 l_int32 mv, ms, var, thresh; | |
| 718 l_uint32 *datam, *datams, *datasd = NULL, *datad; | |
| 719 l_uint32 *linem, *linems, *linesd = NULL, *lined; | |
| 720 l_float32 sd; | |
| 721 l_float32 *tab = NULL; /* of 2^16 square roots */ | |
| 722 PIX *pixsd = NULL, *pixd = NULL; | |
| 723 | |
| 724 if (ppixsd) *ppixsd = NULL; | |
| 725 if (!pixm || pixGetDepth(pixm) != 8) | |
| 726 return (PIX *)ERROR_PTR("pixm undefined or not 8 bpp", __func__, NULL); | |
| 727 if (pixGetColormap(pixm)) | |
| 728 return (PIX *)ERROR_PTR("pixm is colormapped", __func__, NULL); | |
| 729 if (!pixms || pixGetDepth(pixms) != 32) | |
| 730 return (PIX *)ERROR_PTR("pixms undefined or not 32 bpp", | |
| 731 __func__, NULL); | |
| 732 if (factor < 0.0) | |
| 733 return (PIX *)ERROR_PTR("factor must be >= 0", __func__, NULL); | |
| 734 | |
| 735 /* Only make a table of 2^16 square roots if there | |
| 736 * are enough pixels to justify it. */ | |
| 737 pixGetDimensions(pixm, &w, &h, NULL); | |
| 738 usetab = (w * h > 100000) ? 1 : 0; | |
| 739 if (usetab) { | |
| 740 tabsize = 1 << 16; | |
| 741 tab = (l_float32 *)LEPT_CALLOC(tabsize, sizeof(l_float32)); | |
| 742 for (i = 0; i < tabsize; i++) | |
| 743 tab[i] = sqrtf((l_float32)i); | |
| 744 } | |
| 745 | |
| 746 pixd = pixCreate(w, h, 8); | |
| 747 if (ppixsd) { | |
| 748 pixsd = pixCreate(w, h, 8); | |
| 749 *ppixsd = pixsd; | |
| 750 } | |
| 751 datam = pixGetData(pixm); | |
| 752 datams = pixGetData(pixms); | |
| 753 if (ppixsd) datasd = pixGetData(pixsd); | |
| 754 datad = pixGetData(pixd); | |
| 755 wplm = pixGetWpl(pixm); | |
| 756 wplms = pixGetWpl(pixms); | |
| 757 if (ppixsd) wplsd = pixGetWpl(pixsd); | |
| 758 wpld = pixGetWpl(pixd); | |
| 759 for (i = 0; i < h; i++) { | |
| 760 linem = datam + i * wplm; | |
| 761 linems = datams + i * wplms; | |
| 762 if (ppixsd) linesd = datasd + i * wplsd; | |
| 763 lined = datad + i * wpld; | |
| 764 for (j = 0; j < w; j++) { | |
| 765 mv = GET_DATA_BYTE(linem, j); | |
| 766 ms = linems[j]; | |
| 767 var = ms - mv * mv; | |
| 768 if (usetab) | |
| 769 sd = tab[var]; | |
| 770 else | |
| 771 sd = sqrtf((l_float32)var); | |
| 772 if (ppixsd) SET_DATA_BYTE(linesd, j, (l_int32)sd); | |
| 773 thresh = (l_int32)(mv * (1.0 - factor * (1.0 - sd / 128.))); | |
| 774 SET_DATA_BYTE(lined, j, thresh); | |
| 775 } | |
| 776 } | |
| 777 | |
| 778 if (usetab) LEPT_FREE(tab); | |
| 779 return pixd; | |
| 780 } | |
| 781 | |
| 782 | |
| 783 /*! | |
| 784 * \brief pixApplyLocalThreshold() | |
| 785 * | |
| 786 * \param[in] pixs 8 bpp grayscale; not colormapped | |
| 787 * \param[in] pixth 8 bpp array of local thresholds | |
| 788 * \return pixd 1 bpp, thresholded image, or NULL on error | |
| 789 */ | |
| 790 static PIX * | |
| 791 pixApplyLocalThreshold(PIX *pixs, | |
| 792 PIX *pixth) | |
| 793 { | |
| 794 l_int32 i, j, w, h, wpls, wplt, wpld, vals, valt; | |
| 795 l_uint32 *datas, *datat, *datad, *lines, *linet, *lined; | |
| 796 PIX *pixd; | |
| 797 | |
| 798 if (!pixs || pixGetDepth(pixs) != 8) | |
| 799 return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", __func__, NULL); | |
| 800 if (pixGetColormap(pixs)) | |
| 801 return (PIX *)ERROR_PTR("pixs is colormapped", __func__, NULL); | |
| 802 if (!pixth || pixGetDepth(pixth) != 8) | |
| 803 return (PIX *)ERROR_PTR("pixth undefined or not 8 bpp", __func__, NULL); | |
| 804 | |
| 805 pixGetDimensions(pixs, &w, &h, NULL); | |
| 806 pixd = pixCreate(w, h, 1); | |
| 807 datas = pixGetData(pixs); | |
| 808 datat = pixGetData(pixth); | |
| 809 datad = pixGetData(pixd); | |
| 810 wpls = pixGetWpl(pixs); | |
| 811 wplt = pixGetWpl(pixth); | |
| 812 wpld = pixGetWpl(pixd); | |
| 813 for (i = 0; i < h; i++) { | |
| 814 lines = datas + i * wpls; | |
| 815 linet = datat + i * wplt; | |
| 816 lined = datad + i * wpld; | |
| 817 for (j = 0; j < w; j++) { | |
| 818 vals = GET_DATA_BYTE(lines, j); | |
| 819 valt = GET_DATA_BYTE(linet, j); | |
| 820 if (vals < valt) | |
| 821 SET_DATA_BIT(lined, j); | |
| 822 } | |
| 823 } | |
| 824 | |
| 825 return pixd; | |
| 826 } | |
| 827 | |
| 828 | |
| 829 /*----------------------------------------------------------------------* | |
| 830 * Contrast normalization followed by Sauvola binarization * | |
| 831 *----------------------------------------------------------------------*/ | |
| 832 /*! | |
| 833 * \brief pixSauvolaOnContrastNorm() | |
| 834 * | |
| 835 * \param[in] pixs 8 or 32 bpp | |
| 836 * \param[in] mindiff minimum diff to accept as valid in contrast | |
| 837 * normalization. Use ~130 for noisy images. | |
| 838 * \param[out] ppixn [optional] intermediate output from contrast | |
| 839 * normalization | |
| 840 * \param[out] ppixth [optional] threshold array for binarization | |
| 841 * \return pixd 1 bpp thresholded image, or NULL on error | |
| 842 * | |
| 843 * <pre> | |
| 844 * Notes: | |
| 845 * (1) This composite operation is good for adaptively removing | |
| 846 * dark background. | |
| 847 * </pre> | |
| 848 */ | |
| 849 PIX * | |
| 850 pixSauvolaOnContrastNorm(PIX *pixs, | |
| 851 l_int32 mindiff, | |
| 852 PIX **ppixn, | |
| 853 PIX **ppixth) | |
| 854 { | |
| 855 l_int32 w, h, d, nx, ny; | |
| 856 PIX *pixg, *pix1, *pixd; | |
| 857 | |
| 858 if (ppixn) *ppixn = NULL; | |
| 859 if (ppixth) *ppixth = NULL; | |
| 860 if (!pixs || (d = pixGetDepth(pixs)) < 8) | |
| 861 return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", __func__, NULL); | |
| 862 if (d == 32) | |
| 863 pixg = pixConvertRGBToGray(pixs, 0.3f, 0.4f, 0.3f); | |
| 864 else | |
| 865 pixg = pixConvertTo8(pixs, 0); | |
| 866 | |
| 867 pix1 = pixContrastNorm(NULL, pixg, 50, 50, mindiff, 2, 2); | |
| 868 | |
| 869 /* Use tiles of size approximately 250 x 250 */ | |
| 870 pixGetDimensions(pix1, &w, &h, NULL); | |
| 871 nx = L_MAX(1, (w + 125) / 250); | |
| 872 ny = L_MAX(1, (h + 125) / 250); | |
| 873 pixSauvolaBinarizeTiled(pix1, 25, 0.40f, nx, ny, ppixth, &pixd); | |
| 874 pixDestroy(&pixg); | |
| 875 if (ppixn) | |
| 876 *ppixn = pix1; | |
| 877 else | |
| 878 pixDestroy(&pix1); | |
| 879 return pixd; | |
| 880 } | |
| 881 | |
| 882 | |
| 883 /*----------------------------------------------------------------------* | |
| 884 * Contrast normalization followed by background normalization * | |
| 885 * and thresholding * | |
| 886 *----------------------------------------------------------------------*/ | |
| 887 /*! | |
| 888 * \brief pixThreshOnDoubleNorm() | |
| 889 * | |
| 890 * \param[in] pixs 8 or 32 bpp | |
| 891 * \param[in] mindiff minimum diff to accept as valid in contrast | |
| 892 * normalization. Use ~130 for noisy images. | |
| 893 * \return pixd 1 bpp thresholded image, or NULL on error | |
| 894 * | |
| 895 * <pre> | |
| 896 * Notes: | |
| 897 * (1) This composite operation is good for adaptively removing | |
| 898 * dark background. | |
| 899 * (2) The threshold for the binarization uses an estimate based | |
| 900 * on Otsu adaptive thresholding. | |
| 901 * </pre> | |
| 902 */ | |
| 903 PIX * | |
| 904 pixThreshOnDoubleNorm(PIX *pixs, | |
| 905 l_int32 mindiff) | |
| 906 { | |
| 907 l_int32 d, ival; | |
| 908 l_uint32 val; | |
| 909 PIX *pixg, *pix1, *pixd; | |
| 910 | |
| 911 if (!pixs || (d = pixGetDepth(pixs)) < 8) | |
| 912 return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", __func__, NULL); | |
| 913 if (d == 32) | |
| 914 pixg = pixConvertRGBToGray(pixs, 0.3f, 0.4f, 0.3f); | |
| 915 else | |
| 916 pixg = pixConvertTo8(pixs, 0); | |
| 917 | |
| 918 /* Use the entire image for the estimate; pix1 is 1x1 */ | |
| 919 pixOtsuAdaptiveThreshold(pixg, 5000, 5000, 0, 0, 0.1f, &pix1, NULL); | |
| 920 pixGetPixel(pix1, 0, 0, &val); | |
| 921 ival = (l_int32)val; | |
| 922 ival = L_MIN(ival, 110); | |
| 923 pixDestroy(&pix1); | |
| 924 | |
| 925 /* Double normalization */ | |
| 926 pixContrastNorm(pixg, pixg, 50, 50, mindiff, 2, 2); | |
| 927 pix1 = pixBackgroundNormSimple(pixg, NULL, NULL); | |
| 928 pixDestroy(&pixg); | |
| 929 | |
| 930 /* lept_stderr("ival = %d\n", ival); */ | |
| 931 pixd = pixThresholdToBinary(pix1, ival); | |
| 932 pixDestroy(&pix1); | |
| 933 return pixd; | |
| 934 } | |
| 935 | |
| 936 | |
| 937 /*----------------------------------------------------------------------* | |
| 938 * Global thresholding using connected components * | |
| 939 *----------------------------------------------------------------------*/ | |
| 940 /*! | |
| 941 * \brief pixThresholdByConnComp() | |
| 942 * | |
| 943 * \param[in] pixs depth > 1, colormap OK | |
| 944 * \param[in] pixm [optional] 1 bpp mask giving region to ignore | |
| 945 * by setting pixels to white; use NULL if no mask | |
| 946 * \param[in] start, end, incr binarization threshold levels to test | |
| 947 * \param[in] thresh48 threshold on normalized difference between the | |
| 948 * numbers of 4 and 8 connected components | |
| 949 * \param[in] threshdiff threshold on normalized difference between the | |
| 950 * number of 4 cc at successive iterations | |
| 951 * \param[out] pglobthresh [optional] best global threshold; 0 | |
| 952 * if no threshold is found | |
| 953 * \param[out] ppixd [optional] image thresholded to binary, or | |
| 954 * null if no threshold is found | |
| 955 * \param[in] debugflag 1 for plotted results | |
| 956 * \return 0 if OK, 1 on error or if no threshold is found | |
| 957 * | |
| 958 * <pre> | |
| 959 * Notes: | |
| 960 * (1) This finds a global threshold based on connected components. | |
| 961 * Although slow, it is reasonable to use it in a situation where | |
| 962 * (a) the background in the image is relatively uniform, and | |
| 963 * (b) the result will be fed to an OCR program that accepts 1 bpp | |
| 964 * images and works best with easily segmented characters. | |
| 965 * The reason for (b) is that this selects a threshold with a | |
| 966 * minimum number of both broken characters and merged characters. | |
| 967 * (2) If the pix has color, it is converted to gray using the | |
| 968 * max component. | |
| 969 * (3) Input 0 to use default values for any of these inputs: | |
| 970 * %start, %end, %incr, %thresh48, %threshdiff. | |
| 971 * (4) This approach can be understood as follows. When the | |
| 972 * binarization threshold is varied, the numbers of c.c. identify | |
| 973 * four regimes: | |
| 974 * (a) For low thresholds, text is broken into small pieces, and | |
| 975 * the number of c.c. is large, with the 4 c.c. significantly | |
| 976 * exceeding the 8 c.c. | |
| 977 * (b) As the threshold rises toward the optimum value, the text | |
| 978 * characters coalesce and there is very little difference | |
| 979 * between the numbers of 4 and 8 c.c, which both go | |
| 980 * through a minimum. | |
| 981 * (c) Above this, the image background gets noisy because some | |
| 982 * pixels are(thresholded to foreground, and the numbers | |
| 983 * of c.c. quickly increase, with the 4 c.c. significantly | |
| 984 * larger than the 8 c.c. | |
| 985 * (d) At even higher thresholds, the image background noise | |
| 986 * coalesces as it becomes mostly foreground, and the | |
| 987 * number of c.c. drops quickly. | |
| 988 * (5) If there is no global threshold that distinguishes foreground | |
| 989 * text from background (e.g., weak text over a background that | |
| 990 * has significant variation and/or bleedthrough), this returns 1, | |
| 991 * which the caller should check. | |
| 992 * </pre> | |
| 993 */ | |
| 994 l_ok | |
| 995 pixThresholdByConnComp(PIX *pixs, | |
| 996 PIX *pixm, | |
| 997 l_int32 start, | |
| 998 l_int32 end, | |
| 999 l_int32 incr, | |
| 1000 l_float32 thresh48, | |
| 1001 l_float32 threshdiff, | |
| 1002 l_int32 *pglobthresh, | |
| 1003 PIX **ppixd, | |
| 1004 l_int32 debugflag) | |
| 1005 { | |
| 1006 l_int32 i, thresh, n, n4, n8, mincounts, found, globthresh; | |
| 1007 l_float32 count4, count8, firstcount4, prevcount4, diff48, diff4; | |
| 1008 GPLOT *gplot; | |
| 1009 NUMA *na4, *na8; | |
| 1010 PIX *pix1, *pix2, *pix3; | |
| 1011 | |
| 1012 if (pglobthresh) *pglobthresh = 0; | |
| 1013 if (ppixd) *ppixd = NULL; | |
| 1014 if (!pixs || pixGetDepth(pixs) == 1) | |
| 1015 return ERROR_INT("pixs undefined or 1 bpp", __func__, 1); | |
| 1016 if (pixm && pixGetDepth(pixm) != 1) | |
| 1017 return ERROR_INT("pixm must be 1 bpp", __func__, 1); | |
| 1018 | |
| 1019 /* Assign default values if requested */ | |
| 1020 if (start <= 0) start = 80; | |
| 1021 if (end <= 0) end = 200; | |
| 1022 if (incr <= 0) incr = 10; | |
| 1023 if (thresh48 <= 0.0) thresh48 = 0.01f; | |
| 1024 if (threshdiff <= 0.0) threshdiff = 0.01f; | |
| 1025 if (start > end) | |
| 1026 return ERROR_INT("invalid start,end", __func__, 1); | |
| 1027 | |
| 1028 /* Make 8 bpp, using the max component if color. */ | |
| 1029 if (pixGetColormap(pixs)) | |
| 1030 pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); | |
| 1031 else | |
| 1032 pix1 = pixClone(pixs); | |
| 1033 if (pixGetDepth(pix1) == 32) | |
| 1034 pix2 = pixConvertRGBToGrayMinMax(pix1, L_CHOOSE_MAX); | |
| 1035 else | |
| 1036 pix2 = pixConvertTo8(pix1, 0); | |
| 1037 pixDestroy(&pix1); | |
| 1038 | |
| 1039 /* Mask out any non-text regions. Do this in-place, because pix2 | |
| 1040 * can never be the same pix as pixs. */ | |
| 1041 if (pixm) | |
| 1042 pixSetMasked(pix2, pixm, 255); | |
| 1043 | |
| 1044 /* Make sure there are enough components to get a valid signal */ | |
| 1045 pix3 = pixConvertTo1(pix2, start); | |
| 1046 pixCountConnComp(pix3, 4, &n4); | |
| 1047 pixDestroy(&pix3); | |
| 1048 mincounts = 500; | |
| 1049 if (n4 < mincounts) { | |
| 1050 L_INFO("Insufficient component count: %d\n", __func__, n4); | |
| 1051 pixDestroy(&pix2); | |
| 1052 return 1; | |
| 1053 } | |
| 1054 | |
| 1055 /* Compute the c.c. data */ | |
| 1056 na4 = numaCreate(0); | |
| 1057 na8 = numaCreate(0); | |
| 1058 numaSetParameters(na4, start, incr); | |
| 1059 numaSetParameters(na8, start, incr); | |
| 1060 for (thresh = start, i = 0; thresh <= end; thresh += incr, i++) { | |
| 1061 pix3 = pixConvertTo1(pix2, thresh); | |
| 1062 pixCountConnComp(pix3, 4, &n4); | |
| 1063 pixCountConnComp(pix3, 8, &n8); | |
| 1064 numaAddNumber(na4, n4); | |
| 1065 numaAddNumber(na8, n8); | |
| 1066 pixDestroy(&pix3); | |
| 1067 } | |
| 1068 if (debugflag) { | |
| 1069 lept_mkdir("lept/binarize"); | |
| 1070 gplot = gplotCreate("/tmp/lept/binarize", GPLOT_PNG, | |
| 1071 "number of cc vs. threshold", | |
| 1072 "threshold", "number of cc"); | |
| 1073 gplotAddPlot(gplot, NULL, na4, GPLOT_LINES, "plot 4cc"); | |
| 1074 gplotAddPlot(gplot, NULL, na8, GPLOT_LINES, "plot 8cc"); | |
| 1075 gplotMakeOutput(gplot); | |
| 1076 gplotDestroy(&gplot); | |
| 1077 } | |
| 1078 | |
| 1079 n = numaGetCount(na4); | |
| 1080 found = FALSE; | |
| 1081 for (i = 0; i < n; i++) { | |
| 1082 if (i == 0) { | |
| 1083 numaGetFValue(na4, i, &firstcount4); | |
| 1084 prevcount4 = firstcount4; | |
| 1085 } else { | |
| 1086 numaGetFValue(na4, i, &count4); | |
| 1087 numaGetFValue(na8, i, &count8); | |
| 1088 diff48 = (count4 - count8) / firstcount4; | |
| 1089 diff4 = L_ABS(prevcount4 - count4) / firstcount4; | |
| 1090 if (debugflag) { | |
| 1091 lept_stderr("diff48 = %7.3f, diff4 = %7.3f\n", | |
| 1092 diff48, diff4); | |
| 1093 } | |
| 1094 if (diff48 < thresh48 && diff4 < threshdiff) { | |
| 1095 found = TRUE; | |
| 1096 break; | |
| 1097 } | |
| 1098 prevcount4 = count4; | |
| 1099 } | |
| 1100 } | |
| 1101 numaDestroy(&na4); | |
| 1102 numaDestroy(&na8); | |
| 1103 | |
| 1104 if (found) { | |
| 1105 globthresh = start + i * incr; | |
| 1106 if (pglobthresh) *pglobthresh = globthresh; | |
| 1107 if (ppixd) { | |
| 1108 *ppixd = pixConvertTo1(pix2, globthresh); | |
| 1109 pixCopyResolution(*ppixd, pixs); | |
| 1110 } | |
| 1111 if (debugflag) lept_stderr("global threshold = %d\n", globthresh); | |
| 1112 pixDestroy(&pix2); | |
| 1113 return 0; | |
| 1114 } | |
| 1115 | |
| 1116 if (debugflag) lept_stderr("no global threshold found\n"); | |
| 1117 pixDestroy(&pix2); | |
| 1118 return 1; | |
| 1119 } | |
| 1120 | |
| 1121 /*----------------------------------------------------------------------* | |
| 1122 * Global thresholding by histogram * | |
| 1123 *----------------------------------------------------------------------*/ | |
| 1124 /*! | |
| 1125 * \brief pixThresholdByHisto() | |
| 1126 * | |
| 1127 * \param[in] pixs gray 8 bpp, no colormap | |
| 1128 * \param[in] factor subsampling factor >= 1 | |
| 1129 * \param[in] halfw half of window width for smoothing; | |
| 1130 * use 0 for default | |
| 1131 * \param[in] skip look-ahead distance to avoid false minima; | |
| 1132 * use 0 for default | |
| 1133 * \param[out] pthresh best global threshold; 0 if no threshold is found | |
| 1134 * \param[out] ppixd [optional] thresholded 1 bpp pix | |
| 1135 * \param[out] pnahisto [optional] rescaled histogram of gray values | |
| 1136 * \param[out] ppixhisto [optional] plot of rescaled histogram | |
| 1137 * \return 0 if OK, 1 on error or if no threshold is found | |
| 1138 * | |
| 1139 * <pre> | |
| 1140 * Notes: | |
| 1141 * (1) This finds a global threshold. It is best for an image that | |
| 1142 * has a fairly well-defined fg and bg. | |
| 1143 * (2) If it finds a good threshold and %ppixd is defined, the binarized | |
| 1144 * image is returned in &pixd; otherwise it return null. | |
| 1145 * (3) See numaFindLocForThreshold() for use of %skip. | |
| 1146 * (4) Suggest using default values (20) for %half and %skip. | |
| 1147 * (5) Returns 0 in %pthresh if it can't find a good threshold. | |
| 1148 * </pre> | |
| 1149 */ | |
| 1150 l_ok | |
| 1151 pixThresholdByHisto(PIX *pixs, | |
| 1152 l_int32 factor, | |
| 1153 l_int32 halfw, | |
| 1154 l_int32 skip, | |
| 1155 l_int32 *pthresh, | |
| 1156 PIX **ppixd, | |
| 1157 NUMA **pnahisto, | |
| 1158 PIX **ppixhisto) | |
| 1159 { | |
| 1160 l_float32 maxval, fract; | |
| 1161 NUMA *na1, *na2, *na3; | |
| 1162 | |
| 1163 if (ppixd) *ppixd = NULL; | |
| 1164 if (pnahisto) *pnahisto = NULL; | |
| 1165 if (ppixhisto) *ppixhisto = NULL; | |
| 1166 if (!pthresh) | |
| 1167 return ERROR_INT("&thresh not defined", __func__, 1); | |
| 1168 *pthresh = 0; | |
| 1169 if (!pixs || pixGetDepth(pixs) != 8) | |
| 1170 return ERROR_INT("pixs undefined or not 8 bpp", __func__, 1); | |
| 1171 if (pixGetColormap(pixs)) | |
| 1172 return ERROR_INT("pixs has colormap", __func__, 1); | |
| 1173 if (factor < 1) | |
| 1174 return ERROR_INT("sampling must be >= 1", __func__, 1); | |
| 1175 if (halfw <= 0) halfw = 20; | |
| 1176 if (skip <= 0) skip = 20; | |
| 1177 | |
| 1178 /* Make a histogram of pixel values where the largest peak | |
| 1179 * is normalized to a value of 1.0. */ | |
| 1180 na1 = pixGetGrayHistogram(pixs, factor); | |
| 1181 na2 = numaWindowedMean(na1, halfw); /* smoothing */ | |
| 1182 numaGetMax(na2, &maxval, NULL); | |
| 1183 na3 = numaTransform(na2, 0.0, 1.0 / maxval); /* rescale to max of 1.0 */ | |
| 1184 numaDestroy(&na1); | |
| 1185 numaDestroy(&na2); | |
| 1186 | |
| 1187 if (numaFindLocForThreshold(na3, skip, pthresh, &fract) == 1) { | |
| 1188 numaDestroy(&na3); | |
| 1189 return ERROR_INT("failure to find threshold", __func__, 1); | |
| 1190 } | |
| 1191 L_INFO("fractional area under first peak: %5.3f\n", __func__, fract); | |
| 1192 | |
| 1193 if (ppixhisto) { | |
| 1194 lept_mkdir("lept/histo"); | |
| 1195 gplotSimple1(na3, GPLOT_PNG, "/tmp/lept/histo/histo", NULL); | |
| 1196 *ppixhisto = pixRead("/tmp/lept/histo/histo.png"); | |
| 1197 } | |
| 1198 if (pnahisto) | |
| 1199 *pnahisto = na3; | |
| 1200 else | |
| 1201 numaDestroy(&na3); | |
| 1202 if (*pthresh > 0 && ppixd) | |
| 1203 *ppixd = pixThresholdToBinary(pixs, *pthresh); | |
| 1204 return 0; | |
| 1205 } | |
| 1206 |
