comparison mupdf-source/thirdparty/leptonica/src/strokes.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27
28 /*!
29 * \file strokes.c
30 * <pre>
31 *
32 * Operations on 1 bpp images to:
33 * (1) measure stroke parameters, such as length and average width
34 * (2) change the average stroke width to a given value by eroding
35 * or dilating the image.
36 *
37 * These operations are intended to operate on a single text
38 * character, to regularize the stroke width. It is expected
39 * that character matching by correlation, as used in the recog
40 * application, can often be improved by pre-processing both
41 * template and character images to a fixed stroke width.
42 *
43 * Stroke parameter measurement
44 * l_int32 pixFindStrokeLength()
45 * l_int32 pixFindStrokeWidth()
46 * NUMA *pixaFindStrokeWidth()
47 *
48 * Stroke width regulation
49 * PIXA *pixaModifyStrokeWidth()
50 * PIX *pixModifyStrokeWidth()
51 * PIXA *pixaSetStrokeWidth()
52 * PIX *pixSetStrokeWidth()
53 * </pre>
54 */
55
56 #ifdef HAVE_CONFIG_H
57 #include <config_auto.h>
58 #endif /* HAVE_CONFIG_H */
59
60 #include "allheaders.h"
61
62 /*-----------------------------------------------------------------*
63 * Stroke parameter measurement *
64 *-----------------------------------------------------------------*/
65 /*!
66 * \brief pixFindStrokeLength()
67 *
68 * \param[in] pixs 1 bpp
69 * \param[in] tab8 [optional] table for counting fg pixels; can be NULL
70 * \param[out] plength estimated length of the strokes
71 * \return 0 if OK, 1 on error
72 *
73 * <pre>
74 * Notes:
75 * (1) Returns half the number of fg boundary pixels.
76 * </pre>
77 */
78 l_ok
79 pixFindStrokeLength(PIX *pixs,
80 l_int32 *tab8,
81 l_int32 *plength)
82 {
83 l_int32 n;
84 l_int32 *tab;
85 PIX *pix1;
86
87 if (!plength)
88 return ERROR_INT("&length not defined", __func__, 1);
89 *plength = 0;
90 if (!pixs)
91 return ERROR_INT("pixs not defined", __func__, 1);
92
93 pix1 = pixExtractBoundary(pixs, 1);
94 tab = (tab8) ? tab8 : makePixelSumTab8();
95 pixCountPixels(pix1, &n, tab);
96 *plength = n / 2;
97 if (!tab8) LEPT_FREE(tab);
98 pixDestroy(&pix1);
99 return 0;
100 }
101
102
103 /*!
104 * \brief pixFindStrokeWidth()
105 *
106 * \param[in] pixs 1 bpp
107 * \param[in] thresh fractional count threshold relative to distance 1
108 * \param[in] tab8 [optional] table for counting fg pixels; can be NULL
109 * \param[out] pwidth estimated width of the strokes
110 * \param[out] pnahisto [optional] histo of pixel distances from bg
111 * \return 0 if OK, 1 on error
112 *
113 * <pre>
114 * Notes:
115 * (1) This uses two methods to estimate the stroke width:
116 * (a) half the fg boundary length
117 * (b) a value derived from the histogram of the fg distance transform
118 * (2) Distance is measured in 8-connected
119 * (3) %thresh is the minimum fraction N(dist=d)/N(dist=1) of pixels
120 * required to determine if the pixels at distance d are above
121 * the noise. It is typically about 0.15.
122 * </pre>
123 */
124 l_ok
125 pixFindStrokeWidth(PIX *pixs,
126 l_float32 thresh,
127 l_int32 *tab8,
128 l_float32 *pwidth,
129 NUMA **pnahisto)
130 {
131 l_int32 i, n, count, length, first, last;
132 l_int32 *tab;
133 l_float32 width1, width2, ratio, extra;
134 l_float32 *fa;
135 NUMA *na1, *na2;
136 PIX *pix1;
137
138 if (!pwidth)
139 return ERROR_INT("&width not defined", __func__, 1);
140 *pwidth = 0;
141 if (!pixs)
142 return ERROR_INT("pixs not defined", __func__, 1);
143
144 tab = (tab8) ? tab8 : makePixelSumTab8();
145
146 /* ------- Method 1: via boundary length ------- */
147 /* The computed stroke length is a bit larger than that actual
148 * length, because of the addition of the 'caps' at the
149 * stroke ends. Therefore the computed width is a bit
150 * smaller than the average width. */
151 pixFindStrokeLength(pixs, tab8, &length);
152 pixCountPixels(pixs, &count, tab8);
153 width1 = (l_float32)count / (l_float32)length;
154
155 /* ------- Method 2: via distance transform ------- */
156 /* First get the histogram of distances */
157 pix1 = pixDistanceFunction(pixs, 8, 8, L_BOUNDARY_BG);
158 na1 = pixGetGrayHistogram(pix1, 1);
159 pixDestroy(&pix1);
160 numaGetNonzeroRange(na1, 0.1f, &first, &last);
161 na2 = numaClipToInterval(na1, 0, last);
162 numaWriteStderr(na2);
163
164 /* Find the bucket with the largest distance whose contents
165 * exceed the threshold. */
166 fa = numaGetFArray(na2, L_NOCOPY);
167 n = numaGetCount(na2);
168 for (i = n - 1; i > 0; i--) {
169 ratio = fa[i] / fa[1];
170 if (ratio > thresh) break;
171 }
172 /* Let the last skipped bucket contribute to the stop bucket.
173 * This is the 'extra' term below. The result may be a slight
174 * over-correction, so the computed width may be a bit larger
175 * than the average width. */
176 extra = (i < n - 1) ? fa[i + 1] / fa[1] : 0;
177 width2 = 2.0f * (i - 1.0f + ratio + extra);
178 lept_stderr("width1 = %5.2f, width2 = %5.2f\n", width1, width2);
179
180 /* Average the two results */
181 *pwidth = (width1 + width2) / 2.0f;
182
183 if (!tab8) LEPT_FREE(tab);
184 numaDestroy(&na1);
185 if (pnahisto)
186 *pnahisto = na2;
187 else
188 numaDestroy(&na2);
189 return 0;
190 }
191
192
193 /*!
194 * \brief pixaFindStrokeWidth()
195 *
196 * \param[in] pixa of 1 bpp images
197 * \param[in] thresh fractional count threshold relative to distance 1
198 * \param[in] tab8 [optional] table for counting fg pixels; can be NULL
199 * \param[in] debug 1 for debug output; 0 to skip
200 * \return na array of stroke widths for each pix in %pixa; NULL on error
201 *
202 * <pre>
203 * Notes:
204 * (1) See pixFindStrokeWidth() for details.
205 * </pre>
206 */
207 NUMA *
208 pixaFindStrokeWidth(PIXA *pixa,
209 l_float32 thresh,
210 l_int32 *tab8,
211 l_int32 debug)
212 {
213 l_int32 i, n, same, maxd;
214 l_int32 *tab;
215 l_float32 width;
216 NUMA *na;
217 PIX *pix;
218
219 if (!pixa)
220 return (NUMA *)ERROR_PTR("pixa not defined", __func__, NULL);
221 pixaVerifyDepth(pixa, &same, &maxd);
222 if (maxd > 1)
223 return (NUMA *)ERROR_PTR("pix not all 1 bpp", __func__, NULL);
224
225 tab = (tab8) ? tab8 : makePixelSumTab8();
226
227 n = pixaGetCount(pixa);
228 na = numaCreate(n);
229 for (i = 0; i < n; i++) {
230 pix = pixaGetPix(pixa, i, L_CLONE);
231 pixFindStrokeWidth(pix, thresh, tab8, &width, NULL);
232 numaAddNumber(na, width);
233 pixDestroy(&pix);
234 }
235
236 if (!tab8) LEPT_FREE(tab);
237 return na;
238 }
239
240
241 /*-----------------------------------------------------------------*
242 * Change stroke width *
243 *-----------------------------------------------------------------*/
244 /*!
245 * \brief pixaModifyStrokeWidth()
246 *
247 * \param[in] pixas of 1 bpp pix
248 * \param[out] targetw desired width for strokes in each pix
249 * \return pixa with modified stroke widths, or NULL on error
250 */
251 PIXA *
252 pixaModifyStrokeWidth(PIXA *pixas,
253 l_float32 targetw)
254 {
255 l_int32 i, n, same, maxd;
256 l_float32 width;
257 NUMA *na;
258 PIX *pix1, *pix2;
259 PIXA *pixad;
260
261 if (!pixas)
262 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
263 if (targetw < 1)
264 return (PIXA *)ERROR_PTR("target width < 1", __func__, NULL);
265 pixaVerifyDepth(pixas, &same, &maxd);
266 if (maxd > 1)
267 return (PIXA *)ERROR_PTR("pix not all 1 bpp", __func__, NULL);
268
269 na = pixaFindStrokeWidth(pixas, 0.1f, NULL, 0);
270 n = pixaGetCount(pixas);
271 pixad = pixaCreate(n);
272 for (i = 0; i < n; i++) {
273 pix1 = pixaGetPix(pixas, i, L_CLONE);
274 numaGetFValue(na, i, &width);
275 pix2 = pixModifyStrokeWidth(pix1, width, targetw);
276 pixaAddPix(pixad, pix2, L_INSERT);
277 pixDestroy(&pix1);
278 }
279
280 numaDestroy(&na);
281 return pixad;
282 }
283
284
285 /*!
286 * \brief pixModifyStrokeWidth()
287 *
288 * \param[in] pixs of 1 bpp pix
289 * \param[in] width measured average stroke width
290 * \param[in] targetw desired stroke width
291 * \return pix with modified stroke width, or NULL on error
292 */
293 PIX *
294 pixModifyStrokeWidth(PIX *pixs,
295 l_float32 width,
296 l_float32 targetw)
297 {
298 char buf[32];
299 l_int32 diff, size;
300
301 if (!pixs || (pixGetDepth(pixs) != 1))
302 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
303 if (targetw < 1)
304 return (PIX *)ERROR_PTR("target width < 1", __func__, NULL);
305
306 diff = lept_roundftoi(targetw - width);
307 if (diff == 0) return pixCopy(NULL, pixs);
308
309 size = L_ABS(diff) + 1;
310 if (diff < 0) /* erode */
311 snprintf(buf, sizeof(buf), "e%d.%d", size, size);
312 else /* diff > 0; dilate */
313 snprintf(buf, sizeof(buf), "d%d.%d", size, size);
314 return pixMorphSequence(pixs, buf, 0);
315 }
316
317
318 /*!
319 * \brief pixaSetStrokeWidth()
320 *
321 * \param[in] pixas of 1 bpp pix
322 * \param[in] width set stroke width to this value, in [1 ... 100].
323 * \param[in] thinfirst 1 to thin all pix to a skeleton first; 0 to skip
324 * \param[in] connectivity 4 or 8, to be used if %thinfirst == 1
325 * \return pixa with all stroke widths being %width, or NULL on error
326 *
327 * <pre>
328 * Notes:
329 * (1) If %thinfirst == 1, thin to a skeleton using the specified
330 * %connectivity. Use %thinfirst == 0 if all pix in pixas
331 * have already been thinned as far as possible.
332 * (2) The image is dilated to the required %width. This dilation
333 * is not connectivity preserving, so this is typically
334 * used in a situation where merging of c.c. in the individual
335 * pix is not a problem; e.g., where each pix is a single c.c.
336 * </pre>
337 */
338 PIXA *
339 pixaSetStrokeWidth(PIXA *pixas,
340 l_int32 width,
341 l_int32 thinfirst,
342 l_int32 connectivity)
343 {
344 l_int32 i, n, maxd, same;
345 PIX *pix1, *pix2;
346 PIXA *pixad;
347
348 if (!pixas)
349 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
350 if (width < 1 || width > 100)
351 return (PIXA *)ERROR_PTR("width not in [1 ... 100]", __func__, NULL);
352 if (connectivity != 4 && connectivity != 8)
353 return (PIXA *)ERROR_PTR("connectivity not 4 or 8", __func__, NULL);
354 pixaVerifyDepth(pixas, &same, &maxd);
355 if (maxd > 1)
356 return (PIXA *)ERROR_PTR("pix are not all 1 bpp", __func__, NULL);
357
358 n = pixaGetCount(pixas);
359 pixad = pixaCreate(n);
360 for (i = 0; i < n; i++) {
361 pix1 = pixaGetPix(pixas, i, L_CLONE);
362 pix2 = pixSetStrokeWidth(pix1, width, thinfirst, connectivity);
363 pixaAddPix(pixad, pix2, L_INSERT);
364 pixDestroy(&pix1);
365 }
366
367 return pixad;
368 }
369
370
371 /*!
372 * \brief pixSetStrokeWidth()
373 *
374 * \param[in] pixs 1 bpp
375 * \param[in] width set stroke width to this value, in [1 ... 100].
376 * \param[in] thinfirst 1 to thin all pix to a skeleton first; 0 to skip
377 * \param[in] connectivity 4 or 8, to be used if %thinfirst == 1
378 * \return pixd with stroke width set to %width, or NULL on error
379 *
380 * <pre>
381 * Notes:
382 * (1) See notes in pixaSetStrokeWidth().
383 * (2) A white border of sufficient width to avoid boundary
384 * artifacts in the thickening step is added before thinning.
385 * (3) %connectivity == 8 usually gives a slightly smoother result.
386 * </pre>
387 */
388 PIX *
389 pixSetStrokeWidth(PIX *pixs,
390 l_int32 width,
391 l_int32 thinfirst,
392 l_int32 connectivity)
393 {
394 char buf[16];
395 l_int32 border;
396 PIX *pix1, *pix2, *pixd;
397
398 if (!pixs || (pixGetDepth(pixs) != 1))
399 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
400 if (width < 1 || width > 100)
401 return (PIX *)ERROR_PTR("width not in [1 ... 100]", __func__, NULL);
402 if (connectivity != 4 && connectivity != 8)
403 return (PIX *)ERROR_PTR("connectivity not 4 or 8", __func__, NULL);
404
405 if (!thinfirst && width == 1) /* nothing to do */
406 return pixCopy(NULL, pixs);
407
408 /* Add a white border */
409 border = width / 2;
410 pix1 = pixAddBorder(pixs, border, 0);
411
412 /* Thin to a skeleton */
413 if (thinfirst)
414 pix2 = pixThinConnected(pix1, L_THIN_FG, connectivity, 0);
415 else
416 pix2 = pixClone(pix1);
417 pixDestroy(&pix1);
418
419 /* Dilate */
420 snprintf(buf, sizeof(buf), "D%d.%d", width, width);
421 pixd = pixMorphSequence(pix2, buf, 0);
422 pixCopyText(pixd, pixs);
423 pixDestroy(&pix2);
424 return pixd;
425 }