comparison mupdf-source/thirdparty/leptonica/src/partition.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file partition.c
29 * <pre>
30 *
31 * Whitespace block extraction
32 * BOXA *boxaGetWhiteblocks()
33 *
34 * Helpers
35 * static PARTEL *partelCreate()
36 * static void partelDestroy()
37 * static l_int32 partelSetSize()
38 * static BOXA *boxaGenerateSubboxes()
39 * static BOX *boxaSelectPivotBox()
40 * static l_int32 boxaCheckIfOverlapIsSmall()
41 * BOXA *boxaPruneSortedOnOverlap()
42 * </pre>
43 */
44
45 #ifdef HAVE_CONFIG_H
46 #include <config_auto.h>
47 #endif /* HAVE_CONFIG_H */
48
49 #include "allheaders.h"
50
51 /*! Partition element */
52 struct PartitionElement {
53 l_float32 size; /* sorting key */
54 BOX *box; /* region of the element */
55 BOXA *boxa; /* set of intersecting boxes */
56 };
57 typedef struct PartitionElement PARTEL;
58
59 static PARTEL * partelCreate(BOX *box);
60 static void partelDestroy(PARTEL **ppartel);
61 static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag);
62 static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim,
63 l_float32 fract);
64 static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim,
65 l_float32 fract);
66 static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa,
67 l_float32 maxoverlap);
68
69 static const l_int32 DefaultMaxPops = 20000;
70
71
72 #ifndef NO_CONSOLE_IO
73 #define OUTPUT_HEAP_STATS 0
74 #endif /* ~NO_CONSOLE_IO */
75
76 /*------------------------------------------------------------------*
77 * Whitespace block extraction *
78 *------------------------------------------------------------------*/
79 /*!
80 * \brief boxaGetWhiteblocks()
81 *
82 * \param[in] boxas typ. a set of bounding boxes of fg components
83 * \param[in] box initial region; typically including all boxes
84 * in boxas; if null, it computes the region to
85 * include all boxes in boxas
86 * \param[in] sortflag L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
87 * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
88 * L_SORT_BY_PERIMETER, L_SORT_BY_AREA
89 * \param[in] maxboxes max number of output whitespace boxes; e.g., 100
90 * \param[in] maxoverlap maximum fractional overlap of a box by any
91 * of the larger boxes; e.g., 0.2
92 * \param[in] maxperim maximum half-perimeter, in pixels, for which
93 * pivot is selected by proximity to box centroid;
94 * e.g., 200
95 * \param[in] fract fraction of box diagonal that is an acceptable
96 * distance from the box centroid to select
97 * the pivot; e.g., 0.2
98 * \param[in] maxpops max number of pops from the heap; use 0 as default
99 * \return boxa of sorted whitespace boxes, or NULL on error
100 *
101 * <pre>
102 * Notes:
103 * (1) This uses the elegant Breuel algorithm, found in "Two
104 * Geometric Algorithms for Layout Analysis", 2002,
105 * url: "citeseer.ist.psu.edu/breuel02two.html".
106 * It starts with the bounding boxes (b.b.) of the connected
107 * components (c.c.) in a region, along with the rectangle
108 * representing that region. It repeatedly divides the
109 * rectangle into four maximal rectangles that exclude a
110 * pivot rectangle, sorting them in a priority queue
111 * according to one of the six sort flags. It returns a boxa
112 * of the "largest" set that have no intersection with boxes
113 * from the input boxas.
114 * (2) If box == NULL, the initial region is the minimal region
115 * that includes the origin and every box in boxas.
116 * (3) maxboxes is the maximum number of whitespace boxes that will
117 * be returned. The actual number will depend on the image
118 * and the values chosen for maxoverlap and maxpops. In many
119 * cases, the actual number will be 'maxboxes'.
120 * (4) maxoverlap allows pruning of whitespace boxes depending on
121 * the overlap. To avoid all pruning, use maxoverlap = 1.0.
122 * To select only boxes that have no overlap with each other
123 * (maximal pruning), choose maxoverlap = 0.0.
124 * Otherwise, no box can have more than the 'maxoverlap' fraction
125 * of its area overlapped by any larger (in the sense of the
126 * sortflag) box.
127 * (5) Choose maxperim (actually, maximum half-perimeter) to
128 * represent a c.c. that is small enough so that you don't care
129 * about the white space that could be inside of it. For all such
130 * c.c., the pivot for 'quadfurcation' of a rectangle is selected
131 * as having a reasonable proximity to the rectangle centroid.
132 * (6) Use fract in the range [0.0 ... 1.0]. Set fract = 0.0
133 * to choose the small box nearest the centroid as the pivot.
134 * If you choose fract > 0.0, it is suggested that you call
135 * boxaPermuteRandom() first, to permute the boxes (see usage below).
136 * This should reduce the search time for each of the pivot boxes.
137 * (7) Choose maxpops to be the maximum number of rectangles that
138 * are popped from the heap. This is an indirect way to limit the
139 * execution time. Use 0 for default (a fairly large number).
140 * At any time, you can expect the heap to contain about
141 * 2.5 times as many boxes as have been popped off.
142 * (8) The output result is a sorted set of overlapping
143 * boxes, constrained by 'maxboxes', 'maxoverlap' and 'maxpops'.
144 * (9) The main defect of the method is that it abstracts out the
145 * actual components, retaining only the b.b. for analysis.
146 * Consider a component with a large b.b. If this is chosen
147 * as a pivot, all white space inside is immediately taken
148 * out of consideration. Furthermore, even if it is never chosen
149 * as a pivot, as the partitioning continues, at no time will
150 * any of the whitespace inside this component be part of a
151 * rectangle with zero overlapping boxes. Thus, the interiors
152 * of all boxes are necessarily excluded from the union of
153 * the returned whitespace boxes.
154 * (10) It should be noted that the algorithm puts a large number
155 * of partels on the queue. Setting a limit of X partels to
156 * remove from the queue, one typically finds that there will be
157 * several times that number (say, 2X - 3X) left on the queue.
158 * For an efficient algorithm to find the largest white or
159 * or black rectangles, without permitting them to overlap,
160 * see pixFindLargeRectangles().
161 * (11) USAGE: One way to accommodate to this weakness is to remove such
162 * large b.b. before starting the computation. For example,
163 * if 'box' is an input image region containing 'boxa' b.b. of c.c.:
164 *
165 * // Faster pivot choosing
166 * boxaPermuteRandom(boxa, boxa);
167 *
168 * // Remove anything either large width or height
169 * boxat = boxaSelectBySize(boxa, maxwidth, maxheight,
170 * L_SELECT_IF_BOTH, L_SELECT_IF_LT,
171 * NULL);
172 *
173 * boxad = boxaGetWhiteblocks(boxat, box, type, maxboxes,
174 * maxoverlap, maxperim, fract,
175 * maxpops);
176 *
177 * The result will be rectangular regions of "white space" that
178 * extend into (and often through) the excluded components.
179 * (11) As a simple example, suppose you wish to find the columns on a page.
180 * First exclude large c.c. that may block the columns, and then call:
181 *
182 * boxad = boxaGetWhiteblocks(boxa, box, L_SORT_BY_HEIGHT,
183 * 20, 0.15, 200, 0.2, 2000);
184 *
185 * to get the 20 tallest boxes with no more than 0.15 overlap
186 * between a box and any of the taller ones, and avoiding the
187 * use of any c.c. with a b.b. half perimeter greater than 200
188 * as a pivot.
189 * </pre>
190 */
191 BOXA *
192 boxaGetWhiteblocks(BOXA *boxas,
193 BOX *box,
194 l_int32 sortflag,
195 l_int32 maxboxes,
196 l_float32 maxoverlap,
197 l_int32 maxperim,
198 l_float32 fract,
199 l_int32 maxpops)
200 {
201 l_int32 i, w, h, n, nsub, npush, npop;
202 BOX *boxsub;
203 BOXA *boxa, *boxa4, *boxasub, *boxad;
204 PARTEL *partel;
205 L_HEAP *lh;
206
207 if (!boxas)
208 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
209 if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT &&
210 sortflag != L_SORT_BY_MIN_DIMENSION &&
211 sortflag != L_SORT_BY_MAX_DIMENSION &&
212 sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA)
213 return (BOXA *)ERROR_PTR("invalid sort flag", __func__, NULL);
214 if (maxboxes < 1) {
215 maxboxes = 1;
216 L_WARNING("setting maxboxes = 1\n", __func__);
217 }
218 if (maxoverlap < 0.0 || maxoverlap > 1.0)
219 return (BOXA *)ERROR_PTR("invalid maxoverlap", __func__, NULL);
220 if (maxpops == 0)
221 maxpops = DefaultMaxPops;
222
223 if (!box) {
224 boxaGetExtent(boxas, &w, &h, NULL);
225 box = boxCreate(0, 0, w, h);
226 }
227
228 /* Prime the heap */
229 lh = lheapCreate(20, L_SORT_DECREASING);
230 partel = partelCreate(box);
231 partel->boxa = boxaCopy(boxas, L_CLONE);
232 partelSetSize(partel, sortflag);
233 lheapAdd(lh, partel);
234
235 npush = 1;
236 npop = 0;
237 boxad = boxaCreate(0);
238 while (1) {
239 if ((partel = (PARTEL *)lheapRemove(lh)) == NULL) /* we're done */
240 break;
241
242 npop++; /* How many boxes have we retrieved from the queue? */
243 if (npop > maxpops) {
244 partelDestroy(&partel);
245 break;
246 }
247
248 /* Extract the contents */
249 boxa = boxaCopy(partel->boxa, L_CLONE);
250 box = boxClone(partel->box);
251 partelDestroy(&partel);
252
253 /* Can we output this one? */
254 n = boxaGetCount(boxa);
255 if (n == 0) {
256 if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0)
257 boxaAddBox(boxad, box, L_INSERT);
258 else
259 boxDestroy(&box);
260 boxaDestroy(&boxa);
261 if (boxaGetCount(boxad) >= maxboxes) /* we're done */
262 break;
263 continue;
264 }
265
266
267 /* Generate up to 4 subboxes and put them on the heap */
268 boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract);
269 boxDestroy(&box);
270 nsub = boxaGetCount(boxa4);
271 for (i = 0; i < nsub; i++) {
272 boxsub = boxaGetBox(boxa4, i, L_CLONE);
273 boxasub = boxaIntersectsBox(boxa, boxsub);
274 partel = partelCreate(boxsub);
275 partel->boxa = boxasub;
276 partelSetSize(partel, sortflag);
277 lheapAdd(lh, partel);
278 boxDestroy(&boxsub);
279 }
280 npush += nsub; /* How many boxes have we put on the queue? */
281
282 /* boxaWriteStderr(boxa4); */
283
284 boxaDestroy(&boxa4);
285 boxaDestroy(&boxa);
286 }
287
288 #if OUTPUT_HEAP_STATS
289 lept_stderr("Heap statistics:\n");
290 lept_stderr(" Number of boxes pushed: %d\n", npush);
291 lept_stderr(" Number of boxes popped: %d\n", npop);
292 lept_stderr(" Number of boxes on heap: %d\n", lheapGetCount(lh));
293 #endif /* OUTPUT_HEAP_STATS */
294
295 /* Clean up the heap */
296 while ((partel = (PARTEL *)lheapRemove(lh)) != NULL)
297 partelDestroy(&partel);
298 lheapDestroy(&lh, FALSE);
299
300 return boxad;
301 }
302
303
304 /*------------------------------------------------------------------*
305 * Helpers *
306 *------------------------------------------------------------------*/
307 /*!
308 * \brief partelCreate()
309 *
310 * \param[in] box region; inserts a copy
311 * \return partel, or NULL on error
312 */
313 static PARTEL *
314 partelCreate(BOX *box)
315 {
316 PARTEL *partel;
317
318 partel = (PARTEL *)LEPT_CALLOC(1, sizeof(PARTEL));
319 partel->box = boxCopy(box);
320 return partel;
321 }
322
323
324 /*!
325 * \brief partelDestroy()
326 *
327 * \param[in,out] ppartel contents will be set to null before returning
328 * \return void
329 */
330 static void
331 partelDestroy(PARTEL **ppartel)
332 {
333 PARTEL *partel;
334
335 if (ppartel == NULL) {
336 L_WARNING("ptr address is null!\n", __func__);
337 return;
338 }
339
340 if ((partel = *ppartel) == NULL)
341 return;
342
343 boxDestroy(&partel->box);
344 boxaDestroy(&partel->boxa);
345 LEPT_FREE(partel);
346 *ppartel = NULL;
347 return;
348 }
349
350
351 /*!
352 * \brief partelSetSize()
353 *
354 * \param[in] partel
355 * \param[in] sortflag L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
356 * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
357 * L_SORT_BY_PERIMETER, L_SORT_BY_AREA
358 * \return 0 if OK, 1 on error
359 */
360 static l_int32
361 partelSetSize(PARTEL *partel,
362 l_int32 sortflag)
363 {
364 l_int32 w, h;
365
366 if (!partel)
367 return ERROR_INT("partel not defined", __func__, 1);
368
369 boxGetGeometry(partel->box, NULL, NULL, &w, &h);
370 if (sortflag == L_SORT_BY_WIDTH)
371 partel->size = (l_float32)w;
372 else if (sortflag == L_SORT_BY_HEIGHT)
373 partel->size = (l_float32)h;
374 else if (sortflag == L_SORT_BY_MIN_DIMENSION)
375 partel->size = (l_float32)L_MIN(w, h);
376 else if (sortflag == L_SORT_BY_MAX_DIMENSION)
377 partel->size = (l_float32)L_MAX(w, h);
378 else if (sortflag == L_SORT_BY_PERIMETER)
379 partel->size = (l_float32)(w + h);
380 else if (sortflag == L_SORT_BY_AREA)
381 partel->size = (l_float32)(w * h);
382 else
383 return ERROR_INT("invalid sortflag", __func__, 1);
384 return 0;
385 }
386
387
388 /*!
389 * \brief boxaGenerateSubboxes()
390 *
391 * \param[in] box region to be split into up to four overlapping
392 * subregions
393 * \param[in] boxa boxes of rectangles intersecting the box
394 * \param[in] maxperim maximum half-perimeter for which pivot
395 * is selected by proximity to box centroid
396 * \param[in] fract fraction of box diagonal that is an acceptable
397 * distance from the box centroid to select the pivot
398 * \return boxa of four or less overlapping subrectangles of
399 * the box, or NULL on error
400 */
401 static BOXA *
402 boxaGenerateSubboxes(BOX *box,
403 BOXA *boxa,
404 l_int32 maxperim,
405 l_float32 fract)
406 {
407 l_int32 x, y, w, h, xp, yp, wp, hp;
408 BOX *boxp; /* pivot box */
409 BOX *boxsub;
410 BOXA *boxa4;
411
412 if (!box)
413 return (BOXA *)ERROR_PTR("box not defined", __func__, NULL);
414 if (!boxa)
415 return (BOXA *)ERROR_PTR("boxa not defined", __func__, NULL);
416
417 boxa4 = boxaCreate(4);
418 boxp = boxaSelectPivotBox(box, boxa, maxperim, fract);
419 boxGetGeometry(box, &x, &y, &w, &h);
420 boxGetGeometry(boxp, &xp, &yp, &wp, &hp);
421 boxDestroy(&boxp);
422 if (xp > x) { /* left sub-box */
423 boxsub = boxCreate(x, y, xp - x, h);
424 boxaAddBox(boxa4, boxsub, L_INSERT);
425 }
426 if (yp > y) { /* top sub-box */
427 boxsub = boxCreate(x, y, w, yp - y);
428 boxaAddBox(boxa4, boxsub, L_INSERT);
429 }
430 if (xp + wp < x + w) { /* right sub-box */
431 boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h);
432 boxaAddBox(boxa4, boxsub, L_INSERT);
433 }
434 if (yp + hp < y + h) { /* bottom sub-box */
435 boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp);
436 boxaAddBox(boxa4, boxsub, L_INSERT);
437 }
438
439 return boxa4;
440 }
441
442
443 /*!
444 * \brief boxaSelectPivotBox()
445 *
446 * \param[in] box containing box; to be split by the pivot box
447 * \param[in] boxa boxes of rectangles, from which 1 is to be chosen
448 * \param[in] maxperim maximum half-perimeter for which pivot
449 * is selected by proximity to box centroid
450 * \param[in] fract fraction of box diagonal that is an acceptable
451 * distance from the box centroid to select the pivot
452 * \return box pivot box for subdivision into 4 rectangles,
453 * or NULL on error
454 *
455 * <pre>
456 * Notes:
457 * (1) This is a tricky piece that wasn't discussed in the
458 * Breuel's 2002 paper.
459 * (2) Selects a box from boxa whose centroid is reasonably close to
460 * the centroid of the containing box (xc, yc) and whose
461 * half-perimeter does not exceed the maxperim value.
462 * (3) If there are no boxes in the boxa that are small enough,
463 * then it selects the smallest of the larger boxes,
464 * without reference to its location in the containing box.
465 * (4) If a small box has a centroid at a distance from the
466 * centroid of the containing box that is not more than
467 * the fraction 'fract' of the diagonal of the containing
468 * box, that box is chosen as the pivot, terminating the
469 * search for the nearest small box.
470 * (5) Use fract in the range [0.0 ... 1.0]. Set fract = 0.0
471 * to choose the small box nearest the centroid.
472 * (6) Choose maxperim to represent a connected component that is
473 * small enough so that you don't care about the white space
474 * that could be inside of it.
475 * </pre>
476 */
477 static BOX *
478 boxaSelectPivotBox(BOX *box,
479 BOXA *boxa,
480 l_int32 maxperim,
481 l_float32 fract)
482 {
483 l_int32 i, n, bw, bh, w, h;
484 l_int32 smallfound, minindex, perim, minsize;
485 l_float32 delx, dely, mindist, threshdist, dist, x, y, cx, cy;
486 BOX *boxt;
487
488 if (!box)
489 return (BOX *)ERROR_PTR("box not defined", __func__, NULL);
490 if (!boxa)
491 return (BOX *)ERROR_PTR("boxa not defined", __func__, NULL);
492 n = boxaGetCount(boxa);
493 if (n == 0)
494 return (BOX *)ERROR_PTR("no boxes in boxa", __func__, NULL);
495 if (fract < 0.0 || fract > 1.0) {
496 L_WARNING("fract out of bounds; using 0.0\n", __func__);
497 fract = 0.0;
498 }
499
500 boxGetGeometry(box, NULL, NULL, &w, &h);
501 boxGetCenter(box, &x, &y);
502 threshdist = fract * (w * w + h * h);
503 mindist = 1000000000.;
504 minindex = 0;
505 smallfound = FALSE;
506 for (i = 0; i < n; i++) {
507 boxt = boxaGetBox(boxa, i, L_CLONE);
508 boxGetGeometry(boxt, NULL, NULL, &bw, &bh);
509 boxGetCenter(boxt, &cx, &cy);
510 boxDestroy(&boxt);
511 if (bw + bh > maxperim)
512 continue;
513 smallfound = TRUE;
514 delx = cx - x;
515 dely = cy - y;
516 dist = delx * delx + dely * dely;
517 if (dist <= threshdist)
518 return boxaGetBox(boxa, i, L_COPY);
519 if (dist < mindist) {
520 minindex = i;
521 mindist = dist;
522 }
523 }
524
525 /* If there are small boxes but none are within 'fract' of the
526 * centroid, return the nearest one. */
527 if (smallfound == TRUE)
528 return boxaGetBox(boxa, minindex, L_COPY);
529
530 /* No small boxes; return the smallest of the large boxes */
531 minsize = 1000000000;
532 minindex = 0;
533 for (i = 0; i < n; i++) {
534 boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
535 perim = bw + bh;
536 if (perim < minsize) {
537 minsize = perim;
538 minindex = i;
539 }
540 }
541 return boxaGetBox(boxa, minindex, L_COPY);
542 }
543
544
545 /*!
546 * \brief boxCheckIfOverlapIsBig()
547 *
548 * \param[in] box to be tested
549 * \param[in] boxa of boxes already stored
550 * \param[in] maxoverlap maximum fractional overlap of the input box
551 * by any of the boxes in boxa
552 * \return 0 if box has small overlap with every box in boxa;
553 * 1 otherwise or on error
554 */
555 static l_int32
556 boxCheckIfOverlapIsBig(BOX *box,
557 BOXA *boxa,
558 l_float32 maxoverlap)
559 {
560 l_int32 i, n, bigoverlap;
561 l_float32 fract;
562 BOX *boxt;
563
564 if (!box)
565 return ERROR_INT("box not defined", __func__, 1);
566 if (!boxa)
567 return ERROR_INT("boxa not defined", __func__, 1);
568 if (maxoverlap < 0.0 || maxoverlap > 1.0)
569 return ERROR_INT("invalid maxoverlap", __func__, 1);
570
571 n = boxaGetCount(boxa);
572 if (n == 0 || maxoverlap == 1.0)
573 return 0;
574
575 bigoverlap = 0;
576 for (i = 0; i < n; i++) {
577 boxt = boxaGetBox(boxa, i, L_CLONE);
578 boxOverlapFraction(boxt, box, &fract);
579 boxDestroy(&boxt);
580 if (fract > maxoverlap) {
581 bigoverlap = 1;
582 break;
583 }
584 }
585
586 return bigoverlap;
587 }
588
589
590 /*!
591 * \brief boxaPruneSortedOnOverlap()
592 *
593 * \param[in] boxas sorted by size in decreasing order
594 * \param[in] maxoverlap maximum fractional overlap of a box by any
595 * of the larger boxes
596 * \return boxad pruned, or NULL on error
597 *
598 * <pre>
599 * Notes:
600 * (1) This selectively removes smaller boxes when they are overlapped
601 * by any larger box by more than the input 'maxoverlap' fraction.
602 * (2) To avoid all pruning, use maxoverlap = 1.0. To select only
603 * boxes that have no overlap with each other (maximal pruning),
604 * set maxoverlap = 0.0.
605 * (3) If there are no boxes in boxas, returns an empty boxa.
606 * </pre>
607 */
608 BOXA *
609 boxaPruneSortedOnOverlap(BOXA *boxas,
610 l_float32 maxoverlap)
611 {
612 l_int32 i, j, n, remove;
613 l_float32 fract;
614 BOX *box1, *box2;
615 BOXA *boxad;
616
617 if (!boxas)
618 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
619 if (maxoverlap < 0.0 || maxoverlap > 1.0)
620 return (BOXA *)ERROR_PTR("invalid maxoverlap", __func__, NULL);
621
622 n = boxaGetCount(boxas);
623 if (n == 0 || maxoverlap == 1.0)
624 return boxaCopy(boxas, L_COPY);
625
626 boxad = boxaCreate(0);
627 box2 = boxaGetBox(boxas, 0, L_COPY);
628 boxaAddBox(boxad, box2, L_INSERT);
629 for (j = 1; j < n; j++) { /* prune on j */
630 box2 = boxaGetBox(boxas, j, L_COPY);
631 remove = FALSE;
632 for (i = 0; i < j; i++) { /* test on i */
633 box1 = boxaGetBox(boxas, i, L_CLONE);
634 boxOverlapFraction(box1, box2, &fract);
635 boxDestroy(&box1);
636 if (fract > maxoverlap) {
637 remove = TRUE;
638 break;
639 }
640 }
641 if (remove == TRUE)
642 boxDestroy(&box2);
643 else
644 boxaAddBox(boxad, box2, L_INSERT);
645 }
646
647 return boxad;
648 }