comparison mupdf-source/thirdparty/leptonica/src/rank.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file rank.c
29 * <pre>
30 *
31 * Rank filter (gray and rgb)
32 * PIX *pixRankFilter()
33 * PIX *pixRankFilterRGB()
34 * PIX *pixRankFilterGray()
35 *
36 * Median filter
37 * PIX *pixMedianFilter()
38 *
39 * Rank filter (accelerated with downscaling)
40 * PIX *pixRankFilterWithScaling()
41 *
42 * What is a brick rank filter?
43 *
44 * A brick rank order filter evaluates, for every pixel in the image,
45 * a rectangular set of n = wf x hf pixels in its neighborhood (where the
46 * pixel in question is at the "center" of the rectangle and is
47 * included in the evaluation). It determines the value of the
48 * neighboring pixel that is the r-th smallest in the set,
49 * where r is some integer between 1 and n. The input rank parameter
50 * is a fraction between 0.0 and 1.0, where 0.0 represents the
51 * smallest value (r = 1) and 1.0 represents the largest value (r = n).
52 * A median filter is a rank filter where rank = 0.5.
53 *
54 * It is important to note that grayscale erosion is equivalent
55 * to rank = 0.0, and grayscale dilation is equivalent to rank = 1.0.
56 * These are much easier to calculate than the general rank value,
57 * thanks to the van Herk/Gil-Werman algorithm:
58 * http://www.leptonica.com/grayscale-morphology.html
59 * so you should use pixErodeGray() and pixDilateGray() for
60 * rank 0.0 and 1.0, rsp. See notes below in the function header.
61 *
62 * How is a rank filter implemented efficiently on an image?
63 *
64 * Sorting will not work.
65 *
66 * * The best sort algorithms are O(n*logn), where n is the number
67 * of values to be sorted (the area of the filter). For large
68 * filters this is an impractically large number.
69 *
70 * * Selection of the rank value is O(n). (To understand why it's not
71 * O(n*logn), see Numerical Recipes in C, 2nd edition, 1992, p. 355ff).
72 * This also still far too much computation for large filters.
73 *
74 * * Suppose we get clever. We really only need to do an incremental
75 * selection or sorting, because, for example, moving the filter
76 * down by one pixel causes one filter width of pixels to be added
77 * and another to be removed. Can we do this incrementally in
78 * an efficient way? Unfortunately, no. The sorted values will be
79 * in an array. Even if the filter width is 1, we can expect to
80 * have to move O(n) pixels, because insertion and deletion can happen
81 * anywhere in the array. By comparison, heapsort is excellent for
82 * incremental sorting, where the cost for insertion or deletion
83 * is O(logn), because the array itself doesn't need to
84 * be sorted into strictly increasing order. However, heapsort
85 * only gives the max (or min) value, not the general rank value.
86 *
87 * This leaves histograms.
88 *
89 * * Represented as an array. The problem with an array of 256
90 * bins is that, in general, a significant fraction of the
91 * entire histogram must be summed to find the rank value bin.
92 * Suppose the filter size is 5x5. You spend most of your time
93 * adding zeroes. Ouch!
94 *
95 * * Represented as a linked list. This would overcome the
96 * summing-over-empty-bin problem, but you lose random access
97 * for insertions and deletions. No way.
98 *
99 * * Two histogram solution. Maintain two histograms with
100 * bin sizes of 1 and 16. Proceed from coarse to fine.
101 * First locate the coarse bin for the given rank, of which
102 * there are only 16. Then, in the 256 entry (fine) histogram,
103 * you need look at a maximum of 16 bins. For each output
104 * pixel, the average number of bins summed over, both in the
105 * coarse and fine histograms, is thus 16.
106 *
107 * If someone has a better method, please let me know!
108 *
109 * The rank filtering operation is relatively expensive, compared to most
110 * of the other imaging operations. The speed is only weakly dependent
111 * on the size of the rank filter. On standard hardware, it runs at
112 * about 10 Mpix/sec for a 50 x 50 filter, and 25 Mpix/sec for
113 * a 5 x 5 filter. For applications where the rank filter can be
114 * performed on a downscaled image, significant speedup can be
115 * achieved because the time goes as the square of the scaling factor.
116 * We provide an interface that handles the details, and only
117 * requires the amount of downscaling to be input.
118 * </pre>
119 */
120
121 #ifdef HAVE_CONFIG_H
122 #include <config_auto.h>
123 #endif /* HAVE_CONFIG_H */
124
125 #include "allheaders.h"
126
127 /*----------------------------------------------------------------------*
128 * Rank order filter *
129 *----------------------------------------------------------------------*/
130 /*!
131 * \brief pixRankFilter()
132 *
133 * \param[in] pixs 8 or 32 bpp; no colormap
134 * \param[in] wf, hf width and height of filter; each is >= 1
135 * \param[in] rank in [0.0 ... 1.0]
136 * \return pixd of rank values, or NULL on error
137 *
138 * <pre>
139 * Notes:
140 * (1) This defines, for each pixel in pixs, a neighborhood of
141 * pixels given by a rectangle "centered" on the pixel.
142 * This set of wf*hf pixels has a distribution of values.
143 * For each component, if the values are sorted in increasing
144 * order, we choose the component such that rank*(wf*hf-1)
145 * pixels have a lower or equal value and
146 * (1-rank)*(wf*hf-1) pixels have an equal or greater value.
147 * (2) See notes in pixRankFilterGray() for further details.
148 * </pre>
149 */
150 PIX *
151 pixRankFilter(PIX *pixs,
152 l_int32 wf,
153 l_int32 hf,
154 l_float32 rank)
155 {
156 l_int32 d;
157
158 if (!pixs)
159 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
160 if (pixGetColormap(pixs) != NULL)
161 return (PIX *)ERROR_PTR("pixs has colormap", __func__, NULL);
162 d = pixGetDepth(pixs);
163 if (d != 8 && d != 32)
164 return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", __func__, NULL);
165 if (wf < 1 || hf < 1)
166 return (PIX *)ERROR_PTR("wf < 1 || hf < 1", __func__, NULL);
167 if (rank < 0.0 || rank > 1.0)
168 return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", __func__, NULL);
169 if (wf == 1 && hf == 1) /* no-op */
170 return pixCopy(NULL, pixs);
171
172 if (d == 8)
173 return pixRankFilterGray(pixs, wf, hf, rank);
174 else /* d == 32 */
175 return pixRankFilterRGB(pixs, wf, hf, rank);
176 }
177
178
179 /*!
180 * \brief pixRankFilterRGB()
181 *
182 * \param[in] pixs 32 bpp
183 * \param[in] wf, hf width and height of filter; each is >= 1
184 * \param[in] rank in [0.0 ... 1.0]
185 * \return pixd of rank values, or NULL on error
186 *
187 * <pre>
188 * Notes:
189 * (1) This defines, for each pixel in pixs, a neighborhood of
190 * pixels given by a rectangle "centered" on the pixel.
191 * This set of wf*hf pixels has a distribution of values.
192 * For each component, if the values are sorted in increasing
193 * order, we choose the component such that rank*(wf*hf-1)
194 * pixels have a lower or equal value and
195 * (1-rank)*(wf*hf-1) pixels have an equal or greater value.
196 * (2) Apply gray rank filtering to each component independently.
197 * (3) See notes in pixRankFilterGray() for further details.
198 * </pre>
199 */
200 PIX *
201 pixRankFilterRGB(PIX *pixs,
202 l_int32 wf,
203 l_int32 hf,
204 l_float32 rank)
205 {
206 PIX *pixr, *pixg, *pixb, *pixrf, *pixgf, *pixbf, *pixd;
207
208 if (!pixs)
209 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
210 if (pixGetDepth(pixs) != 32)
211 return (PIX *)ERROR_PTR("pixs not 32 bpp", __func__, NULL);
212 if (wf < 1 || hf < 1)
213 return (PIX *)ERROR_PTR("wf < 1 || hf < 1", __func__, NULL);
214 if (rank < 0.0 || rank > 1.0)
215 return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", __func__, NULL);
216 if (wf == 1 && hf == 1) /* no-op */
217 return pixCopy(NULL, pixs);
218
219 pixr = pixGetRGBComponent(pixs, COLOR_RED);
220 pixg = pixGetRGBComponent(pixs, COLOR_GREEN);
221 pixb = pixGetRGBComponent(pixs, COLOR_BLUE);
222
223 pixrf = pixRankFilterGray(pixr, wf, hf, rank);
224 pixgf = pixRankFilterGray(pixg, wf, hf, rank);
225 pixbf = pixRankFilterGray(pixb, wf, hf, rank);
226
227 pixd = pixCreateRGBImage(pixrf, pixgf, pixbf);
228 pixDestroy(&pixr);
229 pixDestroy(&pixg);
230 pixDestroy(&pixb);
231 pixDestroy(&pixrf);
232 pixDestroy(&pixgf);
233 pixDestroy(&pixbf);
234 return pixd;
235 }
236
237
238 /*!
239 * \brief pixRankFilterGray()
240 *
241 * \param[in] pixs 8 bpp; no colormap
242 * \param[in] wf, hf width and height of filter; each is >= 1
243 * \param[in] rank in [0.0 ... 1.0]
244 * \return pixd of rank values, or NULL on error
245 *
246 * <pre>
247 * Notes:
248 * (1) This defines, for each pixel in pixs, a neighborhood of
249 * pixels given by a rectangle "centered" on the pixel.
250 * This set of wf*hf pixels has a distribution of values,
251 * and if they are sorted in increasing order, we choose
252 * the pixel such that rank*(wf*hf-1) pixels have a lower
253 * or equal value and (1-rank)*(wf*hf-1) pixels have an equal
254 * or greater value.
255 * (2) By this definition, the rank = 0.0 pixel has the lowest
256 * value, and the rank = 1.0 pixel has the highest value.
257 * (3) We add mirrored boundary pixels to avoid boundary effects,
258 * and put the filter center at (0, 0).
259 * (4) This dispatches to grayscale erosion or dilation if the
260 * filter dimensions are odd and the rank is 0.0 or 1.0, rsp.
261 * (5) Returns a copy if both wf and hf are 1.
262 * (6) Uses row-major or column-major incremental updates to the
263 * histograms depending on whether hf > wf or hv <= wf, rsp.
264 * </pre>
265 */
266 PIX *
267 pixRankFilterGray(PIX *pixs,
268 l_int32 wf,
269 l_int32 hf,
270 l_float32 rank)
271 {
272 l_int32 w, h, d, i, j, k, m, n, rankloc, wplt, wpld, val, sum;
273 l_int32 *histo, *histo16;
274 l_uint32 *datat, *linet, *datad, *lined;
275 PIX *pixt, *pixd;
276
277 if (!pixs)
278 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
279 if (pixGetColormap(pixs) != NULL)
280 return (PIX *)ERROR_PTR("pixs has colormap", __func__, NULL);
281 pixGetDimensions(pixs, &w, &h, &d);
282 if (d != 8)
283 return (PIX *)ERROR_PTR("pixs not 8 bpp", __func__, NULL);
284 if (wf < 1 || hf < 1)
285 return (PIX *)ERROR_PTR("wf < 1 || hf < 1", __func__, NULL);
286 if (rank < 0.0 || rank > 1.0)
287 return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", __func__, NULL);
288 if (wf == 1 && hf == 1) /* no-op */
289 return pixCopy(NULL, pixs);
290
291 /* For rank = 0.0, this is a grayscale erosion, and for rank = 1.0,
292 * a dilation. Grayscale morphology operations are implemented
293 * for filters of odd dimension, so we dispatch to grayscale
294 * morphology if both wf and hf are odd. Otherwise, we
295 * slightly adjust the rank (to get the correct behavior) and
296 * use the slower rank filter here. */
297 if (wf % 2 && hf % 2) {
298 if (rank == 0.0)
299 return pixErodeGray(pixs, wf, hf);
300 else if (rank == 1.0)
301 return pixDilateGray(pixs, wf, hf);
302 }
303 if (rank == 0.0) rank = 0.0001f;
304 if (rank == 1.0) rank = 0.9999f;
305
306 /* Add wf/2 to each side, and hf/2 to top and bottom of the
307 * image, mirroring for accuracy and to avoid special-casing
308 * the boundary. */
309 if ((pixt = pixAddMirroredBorder(pixs, wf / 2, wf / 2, hf / 2, hf / 2))
310 == NULL)
311 return (PIX *)ERROR_PTR("pixt not made", __func__, NULL);
312
313 /* Set up the two histogram arrays. */
314 histo = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32));
315 histo16 = (l_int32 *)LEPT_CALLOC(16, sizeof(l_int32));
316 rankloc = (l_int32)(rank * wf * hf);
317
318 /* Place the filter center at (0, 0). This is just a
319 * convenient location, because it allows us to perform
320 * the rank filter over x:(0 ... w - 1) and y:(0 ... h - 1). */
321 pixd = pixCreateTemplate(pixs);
322 datat = pixGetData(pixt);
323 wplt = pixGetWpl(pixt);
324 datad = pixGetData(pixd);
325 wpld = pixGetWpl(pixd);
326
327 /* If hf > wf, it's more efficient to use row-major scanning.
328 * Otherwise, traverse the image in use column-major order. */
329 if (hf > wf) {
330 for (j = 0; j < w; j++) { /* row-major */
331 /* Start each column with clean histogram arrays. */
332 for (n = 0; n < 256; n++)
333 histo[n] = 0;
334 for (n = 0; n < 16; n++)
335 histo16[n] = 0;
336
337 for (i = 0; i < h; i++) { /* fast scan on columns */
338 /* Update the histos for the new location */
339 lined = datad + i * wpld;
340 if (i == 0) { /* do full histo */
341 for (k = 0; k < hf; k++) {
342 linet = datat + (i + k) * wplt;
343 for (m = 0; m < wf; m++) {
344 val = GET_DATA_BYTE(linet, j + m);
345 histo[val]++;
346 histo16[val >> 4]++;
347 }
348 }
349 } else { /* incremental update */
350 linet = datat + (i - 1) * wplt;
351 for (m = 0; m < wf; m++) { /* remove top line */
352 val = GET_DATA_BYTE(linet, j + m);
353 histo[val]--;
354 histo16[val >> 4]--;
355 }
356 linet = datat + (i + hf - 1) * wplt;
357 for (m = 0; m < wf; m++) { /* add bottom line */
358 val = GET_DATA_BYTE(linet, j + m);
359 histo[val]++;
360 histo16[val >> 4]++;
361 }
362 }
363
364 /* Find the rank value */
365 sum = 0;
366 for (n = 0; n < 16; n++) { /* search over coarse histo */
367 sum += histo16[n];
368 if (sum > rankloc) {
369 sum -= histo16[n];
370 break;
371 }
372 }
373 if (n == 16) { /* avoid accessing out of bounds */
374 L_WARNING("n = 16; reducing\n", __func__);
375 n = 15;
376 sum -= histo16[n];
377 }
378 k = 16 * n; /* starting value in fine histo */
379 for (m = 0; m < 16; m++) {
380 sum += histo[k];
381 if (sum > rankloc) {
382 SET_DATA_BYTE(lined, j, k);
383 break;
384 }
385 k++;
386 }
387 }
388 }
389 } else { /* wf >= hf */
390 for (i = 0; i < h; i++) { /* column-major */
391 /* Start each row with clean histogram arrays. */
392 for (n = 0; n < 256; n++)
393 histo[n] = 0;
394 for (n = 0; n < 16; n++)
395 histo16[n] = 0;
396 lined = datad + i * wpld;
397 for (j = 0; j < w; j++) { /* fast scan on rows */
398 /* Update the histos for the new location */
399 if (j == 0) { /* do full histo */
400 for (k = 0; k < hf; k++) {
401 linet = datat + (i + k) * wplt;
402 for (m = 0; m < wf; m++) {
403 val = GET_DATA_BYTE(linet, j + m);
404 histo[val]++;
405 histo16[val >> 4]++;
406 }
407 }
408 } else { /* incremental update at left and right sides */
409 for (k = 0; k < hf; k++) {
410 linet = datat + (i + k) * wplt;
411 val = GET_DATA_BYTE(linet, j - 1);
412 histo[val]--;
413 histo16[val >> 4]--;
414 val = GET_DATA_BYTE(linet, j + wf - 1);
415 histo[val]++;
416 histo16[val >> 4]++;
417 }
418 }
419
420 /* Find the rank value */
421 sum = 0;
422 for (n = 0; n < 16; n++) { /* search over coarse histo */
423 sum += histo16[n];
424 if (sum > rankloc) {
425 sum -= histo16[n];
426 break;
427 }
428 }
429 if (n == 16) { /* avoid accessing out of bounds */
430 L_WARNING("n = 16; reducing\n", __func__);
431 n = 15;
432 sum -= histo16[n];
433 }
434 k = 16 * n; /* starting value in fine histo */
435 for (m = 0; m < 16; m++) {
436 sum += histo[k];
437 if (sum > rankloc) {
438 SET_DATA_BYTE(lined, j, k);
439 break;
440 }
441 k++;
442 }
443 }
444 }
445 }
446
447 pixDestroy(&pixt);
448 LEPT_FREE(histo);
449 LEPT_FREE(histo16);
450 return pixd;
451 }
452
453
454 /*----------------------------------------------------------------------*
455 * Median filter *
456 *----------------------------------------------------------------------*/
457 /*!
458 * \brief pixMedianFilter()
459 *
460 * \param[in] pixs 8 or 32 bpp; no colormap
461 * \param[in] wf, hf width and height of filter; each is >= 1
462 * \return pixd of median values, or NULL on error
463 */
464 PIX *
465 pixMedianFilter(PIX *pixs,
466 l_int32 wf,
467 l_int32 hf)
468 {
469 if (!pixs)
470 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
471 return pixRankFilter(pixs, wf, hf, 0.5);
472 }
473
474
475 /*----------------------------------------------------------------------*
476 * Rank filter (accelerated with downscaling) *
477 *----------------------------------------------------------------------*/
478 /*!
479 * \brief pixRankFilterWithScaling()
480 *
481 * \param[in] pixs 8 or 32 bpp; no colormap
482 * \param[in] wf, hf width and height of filter; each is >= 1
483 * \param[in] rank in [0.0 ... 1.0]
484 * \param[in] scalefactor scale factor; must be >= 0.2 and <= 0.7
485 * \return pixd of rank values, or NULL on error
486 *
487 * <pre>
488 * Notes:
489 * (1) This is a convenience function that downscales, does
490 * the rank filtering, and upscales. Because the down-
491 * and up-scaling functions are very fast compared to
492 * rank filtering, the time it takes is reduced from that
493 * for the simple rank filtering operation by approximately
494 * the square of the scaling factor.
495 * </pre>
496 */
497 PIX *
498 pixRankFilterWithScaling(PIX *pixs,
499 l_int32 wf,
500 l_int32 hf,
501 l_float32 rank,
502 l_float32 scalefactor)
503 {
504 l_int32 w, h, d, wfs, hfs;
505 PIX *pix1, *pix2, *pixd;
506
507 if (!pixs)
508 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
509 if (pixGetColormap(pixs) != NULL)
510 return (PIX *)ERROR_PTR("pixs has colormap", __func__, NULL);
511 d = pixGetDepth(pixs);
512 if (d != 8 && d != 32)
513 return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", __func__, NULL);
514 if (wf < 1 || hf < 1)
515 return (PIX *)ERROR_PTR("wf < 1 || hf < 1", __func__, NULL);
516 if (rank < 0.0 || rank > 1.0)
517 return (PIX *)ERROR_PTR("rank must be in [0.0, 1.0]", __func__, NULL);
518 if (wf == 1 && hf == 1) /* no-op */
519 return pixCopy(NULL, pixs);
520 if (scalefactor < 0.2 || scalefactor > 0.7) {
521 L_ERROR("invalid scale factor; no scaling used\n", __func__);
522 return pixRankFilter(pixs, wf, hf, rank);
523 }
524
525 pix1 = pixScaleAreaMap(pixs, scalefactor, scalefactor);
526 wfs = L_MAX(1, (l_int32)(scalefactor * wf + 0.5));
527 hfs = L_MAX(1, (l_int32)(scalefactor * hf + 0.5));
528 pix2 = pixRankFilter(pix1, wfs, hfs, rank);
529 pixGetDimensions(pixs, &w, &h, NULL);
530 pixd = pixScaleToSize(pix2, w, h);
531 pixDestroy(&pix1);
532 pixDestroy(&pix2);
533 return pixd;
534 }