Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/strokes.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 | |
| 28 /*! | |
| 29 * \file strokes.c | |
| 30 * <pre> | |
| 31 * | |
| 32 * Operations on 1 bpp images to: | |
| 33 * (1) measure stroke parameters, such as length and average width | |
| 34 * (2) change the average stroke width to a given value by eroding | |
| 35 * or dilating the image. | |
| 36 * | |
| 37 * These operations are intended to operate on a single text | |
| 38 * character, to regularize the stroke width. It is expected | |
| 39 * that character matching by correlation, as used in the recog | |
| 40 * application, can often be improved by pre-processing both | |
| 41 * template and character images to a fixed stroke width. | |
| 42 * | |
| 43 * Stroke parameter measurement | |
| 44 * l_int32 pixFindStrokeLength() | |
| 45 * l_int32 pixFindStrokeWidth() | |
| 46 * NUMA *pixaFindStrokeWidth() | |
| 47 * | |
| 48 * Stroke width regulation | |
| 49 * PIXA *pixaModifyStrokeWidth() | |
| 50 * PIX *pixModifyStrokeWidth() | |
| 51 * PIXA *pixaSetStrokeWidth() | |
| 52 * PIX *pixSetStrokeWidth() | |
| 53 * </pre> | |
| 54 */ | |
| 55 | |
| 56 #ifdef HAVE_CONFIG_H | |
| 57 #include <config_auto.h> | |
| 58 #endif /* HAVE_CONFIG_H */ | |
| 59 | |
| 60 #include "allheaders.h" | |
| 61 | |
| 62 /*-----------------------------------------------------------------* | |
| 63 * Stroke parameter measurement * | |
| 64 *-----------------------------------------------------------------*/ | |
| 65 /*! | |
| 66 * \brief pixFindStrokeLength() | |
| 67 * | |
| 68 * \param[in] pixs 1 bpp | |
| 69 * \param[in] tab8 [optional] table for counting fg pixels; can be NULL | |
| 70 * \param[out] plength estimated length of the strokes | |
| 71 * \return 0 if OK, 1 on error | |
| 72 * | |
| 73 * <pre> | |
| 74 * Notes: | |
| 75 * (1) Returns half the number of fg boundary pixels. | |
| 76 * </pre> | |
| 77 */ | |
| 78 l_ok | |
| 79 pixFindStrokeLength(PIX *pixs, | |
| 80 l_int32 *tab8, | |
| 81 l_int32 *plength) | |
| 82 { | |
| 83 l_int32 n; | |
| 84 l_int32 *tab; | |
| 85 PIX *pix1; | |
| 86 | |
| 87 if (!plength) | |
| 88 return ERROR_INT("&length not defined", __func__, 1); | |
| 89 *plength = 0; | |
| 90 if (!pixs) | |
| 91 return ERROR_INT("pixs not defined", __func__, 1); | |
| 92 | |
| 93 pix1 = pixExtractBoundary(pixs, 1); | |
| 94 tab = (tab8) ? tab8 : makePixelSumTab8(); | |
| 95 pixCountPixels(pix1, &n, tab); | |
| 96 *plength = n / 2; | |
| 97 if (!tab8) LEPT_FREE(tab); | |
| 98 pixDestroy(&pix1); | |
| 99 return 0; | |
| 100 } | |
| 101 | |
| 102 | |
| 103 /*! | |
| 104 * \brief pixFindStrokeWidth() | |
| 105 * | |
| 106 * \param[in] pixs 1 bpp | |
| 107 * \param[in] thresh fractional count threshold relative to distance 1 | |
| 108 * \param[in] tab8 [optional] table for counting fg pixels; can be NULL | |
| 109 * \param[out] pwidth estimated width of the strokes | |
| 110 * \param[out] pnahisto [optional] histo of pixel distances from bg | |
| 111 * \return 0 if OK, 1 on error | |
| 112 * | |
| 113 * <pre> | |
| 114 * Notes: | |
| 115 * (1) This uses two methods to estimate the stroke width: | |
| 116 * (a) half the fg boundary length | |
| 117 * (b) a value derived from the histogram of the fg distance transform | |
| 118 * (2) Distance is measured in 8-connected | |
| 119 * (3) %thresh is the minimum fraction N(dist=d)/N(dist=1) of pixels | |
| 120 * required to determine if the pixels at distance d are above | |
| 121 * the noise. It is typically about 0.15. | |
| 122 * </pre> | |
| 123 */ | |
| 124 l_ok | |
| 125 pixFindStrokeWidth(PIX *pixs, | |
| 126 l_float32 thresh, | |
| 127 l_int32 *tab8, | |
| 128 l_float32 *pwidth, | |
| 129 NUMA **pnahisto) | |
| 130 { | |
| 131 l_int32 i, n, count, length, first, last; | |
| 132 l_int32 *tab; | |
| 133 l_float32 width1, width2, ratio, extra; | |
| 134 l_float32 *fa; | |
| 135 NUMA *na1, *na2; | |
| 136 PIX *pix1; | |
| 137 | |
| 138 if (!pwidth) | |
| 139 return ERROR_INT("&width not defined", __func__, 1); | |
| 140 *pwidth = 0; | |
| 141 if (!pixs) | |
| 142 return ERROR_INT("pixs not defined", __func__, 1); | |
| 143 | |
| 144 tab = (tab8) ? tab8 : makePixelSumTab8(); | |
| 145 | |
| 146 /* ------- Method 1: via boundary length ------- */ | |
| 147 /* The computed stroke length is a bit larger than that actual | |
| 148 * length, because of the addition of the 'caps' at the | |
| 149 * stroke ends. Therefore the computed width is a bit | |
| 150 * smaller than the average width. */ | |
| 151 pixFindStrokeLength(pixs, tab8, &length); | |
| 152 pixCountPixels(pixs, &count, tab8); | |
| 153 width1 = (l_float32)count / (l_float32)length; | |
| 154 | |
| 155 /* ------- Method 2: via distance transform ------- */ | |
| 156 /* First get the histogram of distances */ | |
| 157 pix1 = pixDistanceFunction(pixs, 8, 8, L_BOUNDARY_BG); | |
| 158 na1 = pixGetGrayHistogram(pix1, 1); | |
| 159 pixDestroy(&pix1); | |
| 160 numaGetNonzeroRange(na1, 0.1f, &first, &last); | |
| 161 na2 = numaClipToInterval(na1, 0, last); | |
| 162 numaWriteStderr(na2); | |
| 163 | |
| 164 /* Find the bucket with the largest distance whose contents | |
| 165 * exceed the threshold. */ | |
| 166 fa = numaGetFArray(na2, L_NOCOPY); | |
| 167 n = numaGetCount(na2); | |
| 168 for (i = n - 1; i > 0; i--) { | |
| 169 ratio = fa[i] / fa[1]; | |
| 170 if (ratio > thresh) break; | |
| 171 } | |
| 172 /* Let the last skipped bucket contribute to the stop bucket. | |
| 173 * This is the 'extra' term below. The result may be a slight | |
| 174 * over-correction, so the computed width may be a bit larger | |
| 175 * than the average width. */ | |
| 176 extra = (i < n - 1) ? fa[i + 1] / fa[1] : 0; | |
| 177 width2 = 2.0f * (i - 1.0f + ratio + extra); | |
| 178 lept_stderr("width1 = %5.2f, width2 = %5.2f\n", width1, width2); | |
| 179 | |
| 180 /* Average the two results */ | |
| 181 *pwidth = (width1 + width2) / 2.0f; | |
| 182 | |
| 183 if (!tab8) LEPT_FREE(tab); | |
| 184 numaDestroy(&na1); | |
| 185 if (pnahisto) | |
| 186 *pnahisto = na2; | |
| 187 else | |
| 188 numaDestroy(&na2); | |
| 189 return 0; | |
| 190 } | |
| 191 | |
| 192 | |
| 193 /*! | |
| 194 * \brief pixaFindStrokeWidth() | |
| 195 * | |
| 196 * \param[in] pixa of 1 bpp images | |
| 197 * \param[in] thresh fractional count threshold relative to distance 1 | |
| 198 * \param[in] tab8 [optional] table for counting fg pixels; can be NULL | |
| 199 * \param[in] debug 1 for debug output; 0 to skip | |
| 200 * \return na array of stroke widths for each pix in %pixa; NULL on error | |
| 201 * | |
| 202 * <pre> | |
| 203 * Notes: | |
| 204 * (1) See pixFindStrokeWidth() for details. | |
| 205 * </pre> | |
| 206 */ | |
| 207 NUMA * | |
| 208 pixaFindStrokeWidth(PIXA *pixa, | |
| 209 l_float32 thresh, | |
| 210 l_int32 *tab8, | |
| 211 l_int32 debug) | |
| 212 { | |
| 213 l_int32 i, n, same, maxd; | |
| 214 l_int32 *tab; | |
| 215 l_float32 width; | |
| 216 NUMA *na; | |
| 217 PIX *pix; | |
| 218 | |
| 219 if (!pixa) | |
| 220 return (NUMA *)ERROR_PTR("pixa not defined", __func__, NULL); | |
| 221 pixaVerifyDepth(pixa, &same, &maxd); | |
| 222 if (maxd > 1) | |
| 223 return (NUMA *)ERROR_PTR("pix not all 1 bpp", __func__, NULL); | |
| 224 | |
| 225 tab = (tab8) ? tab8 : makePixelSumTab8(); | |
| 226 | |
| 227 n = pixaGetCount(pixa); | |
| 228 na = numaCreate(n); | |
| 229 for (i = 0; i < n; i++) { | |
| 230 pix = pixaGetPix(pixa, i, L_CLONE); | |
| 231 pixFindStrokeWidth(pix, thresh, tab8, &width, NULL); | |
| 232 numaAddNumber(na, width); | |
| 233 pixDestroy(&pix); | |
| 234 } | |
| 235 | |
| 236 if (!tab8) LEPT_FREE(tab); | |
| 237 return na; | |
| 238 } | |
| 239 | |
| 240 | |
| 241 /*-----------------------------------------------------------------* | |
| 242 * Change stroke width * | |
| 243 *-----------------------------------------------------------------*/ | |
| 244 /*! | |
| 245 * \brief pixaModifyStrokeWidth() | |
| 246 * | |
| 247 * \param[in] pixas of 1 bpp pix | |
| 248 * \param[out] targetw desired width for strokes in each pix | |
| 249 * \return pixa with modified stroke widths, or NULL on error | |
| 250 */ | |
| 251 PIXA * | |
| 252 pixaModifyStrokeWidth(PIXA *pixas, | |
| 253 l_float32 targetw) | |
| 254 { | |
| 255 l_int32 i, n, same, maxd; | |
| 256 l_float32 width; | |
| 257 NUMA *na; | |
| 258 PIX *pix1, *pix2; | |
| 259 PIXA *pixad; | |
| 260 | |
| 261 if (!pixas) | |
| 262 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL); | |
| 263 if (targetw < 1) | |
| 264 return (PIXA *)ERROR_PTR("target width < 1", __func__, NULL); | |
| 265 pixaVerifyDepth(pixas, &same, &maxd); | |
| 266 if (maxd > 1) | |
| 267 return (PIXA *)ERROR_PTR("pix not all 1 bpp", __func__, NULL); | |
| 268 | |
| 269 na = pixaFindStrokeWidth(pixas, 0.1f, NULL, 0); | |
| 270 n = pixaGetCount(pixas); | |
| 271 pixad = pixaCreate(n); | |
| 272 for (i = 0; i < n; i++) { | |
| 273 pix1 = pixaGetPix(pixas, i, L_CLONE); | |
| 274 numaGetFValue(na, i, &width); | |
| 275 pix2 = pixModifyStrokeWidth(pix1, width, targetw); | |
| 276 pixaAddPix(pixad, pix2, L_INSERT); | |
| 277 pixDestroy(&pix1); | |
| 278 } | |
| 279 | |
| 280 numaDestroy(&na); | |
| 281 return pixad; | |
| 282 } | |
| 283 | |
| 284 | |
| 285 /*! | |
| 286 * \brief pixModifyStrokeWidth() | |
| 287 * | |
| 288 * \param[in] pixs of 1 bpp pix | |
| 289 * \param[in] width measured average stroke width | |
| 290 * \param[in] targetw desired stroke width | |
| 291 * \return pix with modified stroke width, or NULL on error | |
| 292 */ | |
| 293 PIX * | |
| 294 pixModifyStrokeWidth(PIX *pixs, | |
| 295 l_float32 width, | |
| 296 l_float32 targetw) | |
| 297 { | |
| 298 char buf[32]; | |
| 299 l_int32 diff, size; | |
| 300 | |
| 301 if (!pixs || (pixGetDepth(pixs) != 1)) | |
| 302 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 303 if (targetw < 1) | |
| 304 return (PIX *)ERROR_PTR("target width < 1", __func__, NULL); | |
| 305 | |
| 306 diff = lept_roundftoi(targetw - width); | |
| 307 if (diff == 0) return pixCopy(NULL, pixs); | |
| 308 | |
| 309 size = L_ABS(diff) + 1; | |
| 310 if (diff < 0) /* erode */ | |
| 311 snprintf(buf, sizeof(buf), "e%d.%d", size, size); | |
| 312 else /* diff > 0; dilate */ | |
| 313 snprintf(buf, sizeof(buf), "d%d.%d", size, size); | |
| 314 return pixMorphSequence(pixs, buf, 0); | |
| 315 } | |
| 316 | |
| 317 | |
| 318 /*! | |
| 319 * \brief pixaSetStrokeWidth() | |
| 320 * | |
| 321 * \param[in] pixas of 1 bpp pix | |
| 322 * \param[in] width set stroke width to this value, in [1 ... 100]. | |
| 323 * \param[in] thinfirst 1 to thin all pix to a skeleton first; 0 to skip | |
| 324 * \param[in] connectivity 4 or 8, to be used if %thinfirst == 1 | |
| 325 * \return pixa with all stroke widths being %width, or NULL on error | |
| 326 * | |
| 327 * <pre> | |
| 328 * Notes: | |
| 329 * (1) If %thinfirst == 1, thin to a skeleton using the specified | |
| 330 * %connectivity. Use %thinfirst == 0 if all pix in pixas | |
| 331 * have already been thinned as far as possible. | |
| 332 * (2) The image is dilated to the required %width. This dilation | |
| 333 * is not connectivity preserving, so this is typically | |
| 334 * used in a situation where merging of c.c. in the individual | |
| 335 * pix is not a problem; e.g., where each pix is a single c.c. | |
| 336 * </pre> | |
| 337 */ | |
| 338 PIXA * | |
| 339 pixaSetStrokeWidth(PIXA *pixas, | |
| 340 l_int32 width, | |
| 341 l_int32 thinfirst, | |
| 342 l_int32 connectivity) | |
| 343 { | |
| 344 l_int32 i, n, maxd, same; | |
| 345 PIX *pix1, *pix2; | |
| 346 PIXA *pixad; | |
| 347 | |
| 348 if (!pixas) | |
| 349 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL); | |
| 350 if (width < 1 || width > 100) | |
| 351 return (PIXA *)ERROR_PTR("width not in [1 ... 100]", __func__, NULL); | |
| 352 if (connectivity != 4 && connectivity != 8) | |
| 353 return (PIXA *)ERROR_PTR("connectivity not 4 or 8", __func__, NULL); | |
| 354 pixaVerifyDepth(pixas, &same, &maxd); | |
| 355 if (maxd > 1) | |
| 356 return (PIXA *)ERROR_PTR("pix are not all 1 bpp", __func__, NULL); | |
| 357 | |
| 358 n = pixaGetCount(pixas); | |
| 359 pixad = pixaCreate(n); | |
| 360 for (i = 0; i < n; i++) { | |
| 361 pix1 = pixaGetPix(pixas, i, L_CLONE); | |
| 362 pix2 = pixSetStrokeWidth(pix1, width, thinfirst, connectivity); | |
| 363 pixaAddPix(pixad, pix2, L_INSERT); | |
| 364 pixDestroy(&pix1); | |
| 365 } | |
| 366 | |
| 367 return pixad; | |
| 368 } | |
| 369 | |
| 370 | |
| 371 /*! | |
| 372 * \brief pixSetStrokeWidth() | |
| 373 * | |
| 374 * \param[in] pixs 1 bpp | |
| 375 * \param[in] width set stroke width to this value, in [1 ... 100]. | |
| 376 * \param[in] thinfirst 1 to thin all pix to a skeleton first; 0 to skip | |
| 377 * \param[in] connectivity 4 or 8, to be used if %thinfirst == 1 | |
| 378 * \return pixd with stroke width set to %width, or NULL on error | |
| 379 * | |
| 380 * <pre> | |
| 381 * Notes: | |
| 382 * (1) See notes in pixaSetStrokeWidth(). | |
| 383 * (2) A white border of sufficient width to avoid boundary | |
| 384 * artifacts in the thickening step is added before thinning. | |
| 385 * (3) %connectivity == 8 usually gives a slightly smoother result. | |
| 386 * </pre> | |
| 387 */ | |
| 388 PIX * | |
| 389 pixSetStrokeWidth(PIX *pixs, | |
| 390 l_int32 width, | |
| 391 l_int32 thinfirst, | |
| 392 l_int32 connectivity) | |
| 393 { | |
| 394 char buf[16]; | |
| 395 l_int32 border; | |
| 396 PIX *pix1, *pix2, *pixd; | |
| 397 | |
| 398 if (!pixs || (pixGetDepth(pixs) != 1)) | |
| 399 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 400 if (width < 1 || width > 100) | |
| 401 return (PIX *)ERROR_PTR("width not in [1 ... 100]", __func__, NULL); | |
| 402 if (connectivity != 4 && connectivity != 8) | |
| 403 return (PIX *)ERROR_PTR("connectivity not 4 or 8", __func__, NULL); | |
| 404 | |
| 405 if (!thinfirst && width == 1) /* nothing to do */ | |
| 406 return pixCopy(NULL, pixs); | |
| 407 | |
| 408 /* Add a white border */ | |
| 409 border = width / 2; | |
| 410 pix1 = pixAddBorder(pixs, border, 0); | |
| 411 | |
| 412 /* Thin to a skeleton */ | |
| 413 if (thinfirst) | |
| 414 pix2 = pixThinConnected(pix1, L_THIN_FG, connectivity, 0); | |
| 415 else | |
| 416 pix2 = pixClone(pix1); | |
| 417 pixDestroy(&pix1); | |
| 418 | |
| 419 /* Dilate */ | |
| 420 snprintf(buf, sizeof(buf), "D%d.%d", width, width); | |
| 421 pixd = pixMorphSequence(pix2, buf, 0); | |
| 422 pixCopyText(pixd, pixs); | |
| 423 pixDestroy(&pix2); | |
| 424 return pixd; | |
| 425 } |
