Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/partition.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file partition.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Whitespace block extraction | |
| 32 * BOXA *boxaGetWhiteblocks() | |
| 33 * | |
| 34 * Helpers | |
| 35 * static PARTEL *partelCreate() | |
| 36 * static void partelDestroy() | |
| 37 * static l_int32 partelSetSize() | |
| 38 * static BOXA *boxaGenerateSubboxes() | |
| 39 * static BOX *boxaSelectPivotBox() | |
| 40 * static l_int32 boxaCheckIfOverlapIsSmall() | |
| 41 * BOXA *boxaPruneSortedOnOverlap() | |
| 42 * </pre> | |
| 43 */ | |
| 44 | |
| 45 #ifdef HAVE_CONFIG_H | |
| 46 #include <config_auto.h> | |
| 47 #endif /* HAVE_CONFIG_H */ | |
| 48 | |
| 49 #include "allheaders.h" | |
| 50 | |
| 51 /*! Partition element */ | |
| 52 struct PartitionElement { | |
| 53 l_float32 size; /* sorting key */ | |
| 54 BOX *box; /* region of the element */ | |
| 55 BOXA *boxa; /* set of intersecting boxes */ | |
| 56 }; | |
| 57 typedef struct PartitionElement PARTEL; | |
| 58 | |
| 59 static PARTEL * partelCreate(BOX *box); | |
| 60 static void partelDestroy(PARTEL **ppartel); | |
| 61 static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag); | |
| 62 static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim, | |
| 63 l_float32 fract); | |
| 64 static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim, | |
| 65 l_float32 fract); | |
| 66 static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa, | |
| 67 l_float32 maxoverlap); | |
| 68 | |
| 69 static const l_int32 DefaultMaxPops = 20000; | |
| 70 | |
| 71 | |
| 72 #ifndef NO_CONSOLE_IO | |
| 73 #define OUTPUT_HEAP_STATS 0 | |
| 74 #endif /* ~NO_CONSOLE_IO */ | |
| 75 | |
| 76 /*------------------------------------------------------------------* | |
| 77 * Whitespace block extraction * | |
| 78 *------------------------------------------------------------------*/ | |
| 79 /*! | |
| 80 * \brief boxaGetWhiteblocks() | |
| 81 * | |
| 82 * \param[in] boxas typ. a set of bounding boxes of fg components | |
| 83 * \param[in] box initial region; typically including all boxes | |
| 84 * in boxas; if null, it computes the region to | |
| 85 * include all boxes in boxas | |
| 86 * \param[in] sortflag L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, | |
| 87 * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, | |
| 88 * L_SORT_BY_PERIMETER, L_SORT_BY_AREA | |
| 89 * \param[in] maxboxes max number of output whitespace boxes; e.g., 100 | |
| 90 * \param[in] maxoverlap maximum fractional overlap of a box by any | |
| 91 * of the larger boxes; e.g., 0.2 | |
| 92 * \param[in] maxperim maximum half-perimeter, in pixels, for which | |
| 93 * pivot is selected by proximity to box centroid; | |
| 94 * e.g., 200 | |
| 95 * \param[in] fract fraction of box diagonal that is an acceptable | |
| 96 * distance from the box centroid to select | |
| 97 * the pivot; e.g., 0.2 | |
| 98 * \param[in] maxpops max number of pops from the heap; use 0 as default | |
| 99 * \return boxa of sorted whitespace boxes, or NULL on error | |
| 100 * | |
| 101 * <pre> | |
| 102 * Notes: | |
| 103 * (1) This uses the elegant Breuel algorithm, found in "Two | |
| 104 * Geometric Algorithms for Layout Analysis", 2002, | |
| 105 * url: "citeseer.ist.psu.edu/breuel02two.html". | |
| 106 * It starts with the bounding boxes (b.b.) of the connected | |
| 107 * components (c.c.) in a region, along with the rectangle | |
| 108 * representing that region. It repeatedly divides the | |
| 109 * rectangle into four maximal rectangles that exclude a | |
| 110 * pivot rectangle, sorting them in a priority queue | |
| 111 * according to one of the six sort flags. It returns a boxa | |
| 112 * of the "largest" set that have no intersection with boxes | |
| 113 * from the input boxas. | |
| 114 * (2) If box == NULL, the initial region is the minimal region | |
| 115 * that includes the origin and every box in boxas. | |
| 116 * (3) maxboxes is the maximum number of whitespace boxes that will | |
| 117 * be returned. The actual number will depend on the image | |
| 118 * and the values chosen for maxoverlap and maxpops. In many | |
| 119 * cases, the actual number will be 'maxboxes'. | |
| 120 * (4) maxoverlap allows pruning of whitespace boxes depending on | |
| 121 * the overlap. To avoid all pruning, use maxoverlap = 1.0. | |
| 122 * To select only boxes that have no overlap with each other | |
| 123 * (maximal pruning), choose maxoverlap = 0.0. | |
| 124 * Otherwise, no box can have more than the 'maxoverlap' fraction | |
| 125 * of its area overlapped by any larger (in the sense of the | |
| 126 * sortflag) box. | |
| 127 * (5) Choose maxperim (actually, maximum half-perimeter) to | |
| 128 * represent a c.c. that is small enough so that you don't care | |
| 129 * about the white space that could be inside of it. For all such | |
| 130 * c.c., the pivot for 'quadfurcation' of a rectangle is selected | |
| 131 * as having a reasonable proximity to the rectangle centroid. | |
| 132 * (6) Use fract in the range [0.0 ... 1.0]. Set fract = 0.0 | |
| 133 * to choose the small box nearest the centroid as the pivot. | |
| 134 * If you choose fract > 0.0, it is suggested that you call | |
| 135 * boxaPermuteRandom() first, to permute the boxes (see usage below). | |
| 136 * This should reduce the search time for each of the pivot boxes. | |
| 137 * (7) Choose maxpops to be the maximum number of rectangles that | |
| 138 * are popped from the heap. This is an indirect way to limit the | |
| 139 * execution time. Use 0 for default (a fairly large number). | |
| 140 * At any time, you can expect the heap to contain about | |
| 141 * 2.5 times as many boxes as have been popped off. | |
| 142 * (8) The output result is a sorted set of overlapping | |
| 143 * boxes, constrained by 'maxboxes', 'maxoverlap' and 'maxpops'. | |
| 144 * (9) The main defect of the method is that it abstracts out the | |
| 145 * actual components, retaining only the b.b. for analysis. | |
| 146 * Consider a component with a large b.b. If this is chosen | |
| 147 * as a pivot, all white space inside is immediately taken | |
| 148 * out of consideration. Furthermore, even if it is never chosen | |
| 149 * as a pivot, as the partitioning continues, at no time will | |
| 150 * any of the whitespace inside this component be part of a | |
| 151 * rectangle with zero overlapping boxes. Thus, the interiors | |
| 152 * of all boxes are necessarily excluded from the union of | |
| 153 * the returned whitespace boxes. | |
| 154 * (10) It should be noted that the algorithm puts a large number | |
| 155 * of partels on the queue. Setting a limit of X partels to | |
| 156 * remove from the queue, one typically finds that there will be | |
| 157 * several times that number (say, 2X - 3X) left on the queue. | |
| 158 * For an efficient algorithm to find the largest white or | |
| 159 * or black rectangles, without permitting them to overlap, | |
| 160 * see pixFindLargeRectangles(). | |
| 161 * (11) USAGE: One way to accommodate to this weakness is to remove such | |
| 162 * large b.b. before starting the computation. For example, | |
| 163 * if 'box' is an input image region containing 'boxa' b.b. of c.c.: | |
| 164 * | |
| 165 * // Faster pivot choosing | |
| 166 * boxaPermuteRandom(boxa, boxa); | |
| 167 * | |
| 168 * // Remove anything either large width or height | |
| 169 * boxat = boxaSelectBySize(boxa, maxwidth, maxheight, | |
| 170 * L_SELECT_IF_BOTH, L_SELECT_IF_LT, | |
| 171 * NULL); | |
| 172 * | |
| 173 * boxad = boxaGetWhiteblocks(boxat, box, type, maxboxes, | |
| 174 * maxoverlap, maxperim, fract, | |
| 175 * maxpops); | |
| 176 * | |
| 177 * The result will be rectangular regions of "white space" that | |
| 178 * extend into (and often through) the excluded components. | |
| 179 * (11) As a simple example, suppose you wish to find the columns on a page. | |
| 180 * First exclude large c.c. that may block the columns, and then call: | |
| 181 * | |
| 182 * boxad = boxaGetWhiteblocks(boxa, box, L_SORT_BY_HEIGHT, | |
| 183 * 20, 0.15, 200, 0.2, 2000); | |
| 184 * | |
| 185 * to get the 20 tallest boxes with no more than 0.15 overlap | |
| 186 * between a box and any of the taller ones, and avoiding the | |
| 187 * use of any c.c. with a b.b. half perimeter greater than 200 | |
| 188 * as a pivot. | |
| 189 * </pre> | |
| 190 */ | |
| 191 BOXA * | |
| 192 boxaGetWhiteblocks(BOXA *boxas, | |
| 193 BOX *box, | |
| 194 l_int32 sortflag, | |
| 195 l_int32 maxboxes, | |
| 196 l_float32 maxoverlap, | |
| 197 l_int32 maxperim, | |
| 198 l_float32 fract, | |
| 199 l_int32 maxpops) | |
| 200 { | |
| 201 l_int32 i, w, h, n, nsub, npush, npop; | |
| 202 BOX *boxsub; | |
| 203 BOXA *boxa, *boxa4, *boxasub, *boxad; | |
| 204 PARTEL *partel; | |
| 205 L_HEAP *lh; | |
| 206 | |
| 207 if (!boxas) | |
| 208 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 209 if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT && | |
| 210 sortflag != L_SORT_BY_MIN_DIMENSION && | |
| 211 sortflag != L_SORT_BY_MAX_DIMENSION && | |
| 212 sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA) | |
| 213 return (BOXA *)ERROR_PTR("invalid sort flag", __func__, NULL); | |
| 214 if (maxboxes < 1) { | |
| 215 maxboxes = 1; | |
| 216 L_WARNING("setting maxboxes = 1\n", __func__); | |
| 217 } | |
| 218 if (maxoverlap < 0.0 || maxoverlap > 1.0) | |
| 219 return (BOXA *)ERROR_PTR("invalid maxoverlap", __func__, NULL); | |
| 220 if (maxpops == 0) | |
| 221 maxpops = DefaultMaxPops; | |
| 222 | |
| 223 if (!box) { | |
| 224 boxaGetExtent(boxas, &w, &h, NULL); | |
| 225 box = boxCreate(0, 0, w, h); | |
| 226 } | |
| 227 | |
| 228 /* Prime the heap */ | |
| 229 lh = lheapCreate(20, L_SORT_DECREASING); | |
| 230 partel = partelCreate(box); | |
| 231 partel->boxa = boxaCopy(boxas, L_CLONE); | |
| 232 partelSetSize(partel, sortflag); | |
| 233 lheapAdd(lh, partel); | |
| 234 | |
| 235 npush = 1; | |
| 236 npop = 0; | |
| 237 boxad = boxaCreate(0); | |
| 238 while (1) { | |
| 239 if ((partel = (PARTEL *)lheapRemove(lh)) == NULL) /* we're done */ | |
| 240 break; | |
| 241 | |
| 242 npop++; /* How many boxes have we retrieved from the queue? */ | |
| 243 if (npop > maxpops) { | |
| 244 partelDestroy(&partel); | |
| 245 break; | |
| 246 } | |
| 247 | |
| 248 /* Extract the contents */ | |
| 249 boxa = boxaCopy(partel->boxa, L_CLONE); | |
| 250 box = boxClone(partel->box); | |
| 251 partelDestroy(&partel); | |
| 252 | |
| 253 /* Can we output this one? */ | |
| 254 n = boxaGetCount(boxa); | |
| 255 if (n == 0) { | |
| 256 if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0) | |
| 257 boxaAddBox(boxad, box, L_INSERT); | |
| 258 else | |
| 259 boxDestroy(&box); | |
| 260 boxaDestroy(&boxa); | |
| 261 if (boxaGetCount(boxad) >= maxboxes) /* we're done */ | |
| 262 break; | |
| 263 continue; | |
| 264 } | |
| 265 | |
| 266 | |
| 267 /* Generate up to 4 subboxes and put them on the heap */ | |
| 268 boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract); | |
| 269 boxDestroy(&box); | |
| 270 nsub = boxaGetCount(boxa4); | |
| 271 for (i = 0; i < nsub; i++) { | |
| 272 boxsub = boxaGetBox(boxa4, i, L_CLONE); | |
| 273 boxasub = boxaIntersectsBox(boxa, boxsub); | |
| 274 partel = partelCreate(boxsub); | |
| 275 partel->boxa = boxasub; | |
| 276 partelSetSize(partel, sortflag); | |
| 277 lheapAdd(lh, partel); | |
| 278 boxDestroy(&boxsub); | |
| 279 } | |
| 280 npush += nsub; /* How many boxes have we put on the queue? */ | |
| 281 | |
| 282 /* boxaWriteStderr(boxa4); */ | |
| 283 | |
| 284 boxaDestroy(&boxa4); | |
| 285 boxaDestroy(&boxa); | |
| 286 } | |
| 287 | |
| 288 #if OUTPUT_HEAP_STATS | |
| 289 lept_stderr("Heap statistics:\n"); | |
| 290 lept_stderr(" Number of boxes pushed: %d\n", npush); | |
| 291 lept_stderr(" Number of boxes popped: %d\n", npop); | |
| 292 lept_stderr(" Number of boxes on heap: %d\n", lheapGetCount(lh)); | |
| 293 #endif /* OUTPUT_HEAP_STATS */ | |
| 294 | |
| 295 /* Clean up the heap */ | |
| 296 while ((partel = (PARTEL *)lheapRemove(lh)) != NULL) | |
| 297 partelDestroy(&partel); | |
| 298 lheapDestroy(&lh, FALSE); | |
| 299 | |
| 300 return boxad; | |
| 301 } | |
| 302 | |
| 303 | |
| 304 /*------------------------------------------------------------------* | |
| 305 * Helpers * | |
| 306 *------------------------------------------------------------------*/ | |
| 307 /*! | |
| 308 * \brief partelCreate() | |
| 309 * | |
| 310 * \param[in] box region; inserts a copy | |
| 311 * \return partel, or NULL on error | |
| 312 */ | |
| 313 static PARTEL * | |
| 314 partelCreate(BOX *box) | |
| 315 { | |
| 316 PARTEL *partel; | |
| 317 | |
| 318 partel = (PARTEL *)LEPT_CALLOC(1, sizeof(PARTEL)); | |
| 319 partel->box = boxCopy(box); | |
| 320 return partel; | |
| 321 } | |
| 322 | |
| 323 | |
| 324 /*! | |
| 325 * \brief partelDestroy() | |
| 326 * | |
| 327 * \param[in,out] ppartel contents will be set to null before returning | |
| 328 * \return void | |
| 329 */ | |
| 330 static void | |
| 331 partelDestroy(PARTEL **ppartel) | |
| 332 { | |
| 333 PARTEL *partel; | |
| 334 | |
| 335 if (ppartel == NULL) { | |
| 336 L_WARNING("ptr address is null!\n", __func__); | |
| 337 return; | |
| 338 } | |
| 339 | |
| 340 if ((partel = *ppartel) == NULL) | |
| 341 return; | |
| 342 | |
| 343 boxDestroy(&partel->box); | |
| 344 boxaDestroy(&partel->boxa); | |
| 345 LEPT_FREE(partel); | |
| 346 *ppartel = NULL; | |
| 347 return; | |
| 348 } | |
| 349 | |
| 350 | |
| 351 /*! | |
| 352 * \brief partelSetSize() | |
| 353 * | |
| 354 * \param[in] partel | |
| 355 * \param[in] sortflag L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, | |
| 356 * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, | |
| 357 * L_SORT_BY_PERIMETER, L_SORT_BY_AREA | |
| 358 * \return 0 if OK, 1 on error | |
| 359 */ | |
| 360 static l_int32 | |
| 361 partelSetSize(PARTEL *partel, | |
| 362 l_int32 sortflag) | |
| 363 { | |
| 364 l_int32 w, h; | |
| 365 | |
| 366 if (!partel) | |
| 367 return ERROR_INT("partel not defined", __func__, 1); | |
| 368 | |
| 369 boxGetGeometry(partel->box, NULL, NULL, &w, &h); | |
| 370 if (sortflag == L_SORT_BY_WIDTH) | |
| 371 partel->size = (l_float32)w; | |
| 372 else if (sortflag == L_SORT_BY_HEIGHT) | |
| 373 partel->size = (l_float32)h; | |
| 374 else if (sortflag == L_SORT_BY_MIN_DIMENSION) | |
| 375 partel->size = (l_float32)L_MIN(w, h); | |
| 376 else if (sortflag == L_SORT_BY_MAX_DIMENSION) | |
| 377 partel->size = (l_float32)L_MAX(w, h); | |
| 378 else if (sortflag == L_SORT_BY_PERIMETER) | |
| 379 partel->size = (l_float32)(w + h); | |
| 380 else if (sortflag == L_SORT_BY_AREA) | |
| 381 partel->size = (l_float32)(w * h); | |
| 382 else | |
| 383 return ERROR_INT("invalid sortflag", __func__, 1); | |
| 384 return 0; | |
| 385 } | |
| 386 | |
| 387 | |
| 388 /*! | |
| 389 * \brief boxaGenerateSubboxes() | |
| 390 * | |
| 391 * \param[in] box region to be split into up to four overlapping | |
| 392 * subregions | |
| 393 * \param[in] boxa boxes of rectangles intersecting the box | |
| 394 * \param[in] maxperim maximum half-perimeter for which pivot | |
| 395 * is selected by proximity to box centroid | |
| 396 * \param[in] fract fraction of box diagonal that is an acceptable | |
| 397 * distance from the box centroid to select the pivot | |
| 398 * \return boxa of four or less overlapping subrectangles of | |
| 399 * the box, or NULL on error | |
| 400 */ | |
| 401 static BOXA * | |
| 402 boxaGenerateSubboxes(BOX *box, | |
| 403 BOXA *boxa, | |
| 404 l_int32 maxperim, | |
| 405 l_float32 fract) | |
| 406 { | |
| 407 l_int32 x, y, w, h, xp, yp, wp, hp; | |
| 408 BOX *boxp; /* pivot box */ | |
| 409 BOX *boxsub; | |
| 410 BOXA *boxa4; | |
| 411 | |
| 412 if (!box) | |
| 413 return (BOXA *)ERROR_PTR("box not defined", __func__, NULL); | |
| 414 if (!boxa) | |
| 415 return (BOXA *)ERROR_PTR("boxa not defined", __func__, NULL); | |
| 416 | |
| 417 boxa4 = boxaCreate(4); | |
| 418 boxp = boxaSelectPivotBox(box, boxa, maxperim, fract); | |
| 419 boxGetGeometry(box, &x, &y, &w, &h); | |
| 420 boxGetGeometry(boxp, &xp, &yp, &wp, &hp); | |
| 421 boxDestroy(&boxp); | |
| 422 if (xp > x) { /* left sub-box */ | |
| 423 boxsub = boxCreate(x, y, xp - x, h); | |
| 424 boxaAddBox(boxa4, boxsub, L_INSERT); | |
| 425 } | |
| 426 if (yp > y) { /* top sub-box */ | |
| 427 boxsub = boxCreate(x, y, w, yp - y); | |
| 428 boxaAddBox(boxa4, boxsub, L_INSERT); | |
| 429 } | |
| 430 if (xp + wp < x + w) { /* right sub-box */ | |
| 431 boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h); | |
| 432 boxaAddBox(boxa4, boxsub, L_INSERT); | |
| 433 } | |
| 434 if (yp + hp < y + h) { /* bottom sub-box */ | |
| 435 boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp); | |
| 436 boxaAddBox(boxa4, boxsub, L_INSERT); | |
| 437 } | |
| 438 | |
| 439 return boxa4; | |
| 440 } | |
| 441 | |
| 442 | |
| 443 /*! | |
| 444 * \brief boxaSelectPivotBox() | |
| 445 * | |
| 446 * \param[in] box containing box; to be split by the pivot box | |
| 447 * \param[in] boxa boxes of rectangles, from which 1 is to be chosen | |
| 448 * \param[in] maxperim maximum half-perimeter for which pivot | |
| 449 * is selected by proximity to box centroid | |
| 450 * \param[in] fract fraction of box diagonal that is an acceptable | |
| 451 * distance from the box centroid to select the pivot | |
| 452 * \return box pivot box for subdivision into 4 rectangles, | |
| 453 * or NULL on error | |
| 454 * | |
| 455 * <pre> | |
| 456 * Notes: | |
| 457 * (1) This is a tricky piece that wasn't discussed in the | |
| 458 * Breuel's 2002 paper. | |
| 459 * (2) Selects a box from boxa whose centroid is reasonably close to | |
| 460 * the centroid of the containing box (xc, yc) and whose | |
| 461 * half-perimeter does not exceed the maxperim value. | |
| 462 * (3) If there are no boxes in the boxa that are small enough, | |
| 463 * then it selects the smallest of the larger boxes, | |
| 464 * without reference to its location in the containing box. | |
| 465 * (4) If a small box has a centroid at a distance from the | |
| 466 * centroid of the containing box that is not more than | |
| 467 * the fraction 'fract' of the diagonal of the containing | |
| 468 * box, that box is chosen as the pivot, terminating the | |
| 469 * search for the nearest small box. | |
| 470 * (5) Use fract in the range [0.0 ... 1.0]. Set fract = 0.0 | |
| 471 * to choose the small box nearest the centroid. | |
| 472 * (6) Choose maxperim to represent a connected component that is | |
| 473 * small enough so that you don't care about the white space | |
| 474 * that could be inside of it. | |
| 475 * </pre> | |
| 476 */ | |
| 477 static BOX * | |
| 478 boxaSelectPivotBox(BOX *box, | |
| 479 BOXA *boxa, | |
| 480 l_int32 maxperim, | |
| 481 l_float32 fract) | |
| 482 { | |
| 483 l_int32 i, n, bw, bh, w, h; | |
| 484 l_int32 smallfound, minindex, perim, minsize; | |
| 485 l_float32 delx, dely, mindist, threshdist, dist, x, y, cx, cy; | |
| 486 BOX *boxt; | |
| 487 | |
| 488 if (!box) | |
| 489 return (BOX *)ERROR_PTR("box not defined", __func__, NULL); | |
| 490 if (!boxa) | |
| 491 return (BOX *)ERROR_PTR("boxa not defined", __func__, NULL); | |
| 492 n = boxaGetCount(boxa); | |
| 493 if (n == 0) | |
| 494 return (BOX *)ERROR_PTR("no boxes in boxa", __func__, NULL); | |
| 495 if (fract < 0.0 || fract > 1.0) { | |
| 496 L_WARNING("fract out of bounds; using 0.0\n", __func__); | |
| 497 fract = 0.0; | |
| 498 } | |
| 499 | |
| 500 boxGetGeometry(box, NULL, NULL, &w, &h); | |
| 501 boxGetCenter(box, &x, &y); | |
| 502 threshdist = fract * (w * w + h * h); | |
| 503 mindist = 1000000000.; | |
| 504 minindex = 0; | |
| 505 smallfound = FALSE; | |
| 506 for (i = 0; i < n; i++) { | |
| 507 boxt = boxaGetBox(boxa, i, L_CLONE); | |
| 508 boxGetGeometry(boxt, NULL, NULL, &bw, &bh); | |
| 509 boxGetCenter(boxt, &cx, &cy); | |
| 510 boxDestroy(&boxt); | |
| 511 if (bw + bh > maxperim) | |
| 512 continue; | |
| 513 smallfound = TRUE; | |
| 514 delx = cx - x; | |
| 515 dely = cy - y; | |
| 516 dist = delx * delx + dely * dely; | |
| 517 if (dist <= threshdist) | |
| 518 return boxaGetBox(boxa, i, L_COPY); | |
| 519 if (dist < mindist) { | |
| 520 minindex = i; | |
| 521 mindist = dist; | |
| 522 } | |
| 523 } | |
| 524 | |
| 525 /* If there are small boxes but none are within 'fract' of the | |
| 526 * centroid, return the nearest one. */ | |
| 527 if (smallfound == TRUE) | |
| 528 return boxaGetBox(boxa, minindex, L_COPY); | |
| 529 | |
| 530 /* No small boxes; return the smallest of the large boxes */ | |
| 531 minsize = 1000000000; | |
| 532 minindex = 0; | |
| 533 for (i = 0; i < n; i++) { | |
| 534 boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh); | |
| 535 perim = bw + bh; | |
| 536 if (perim < minsize) { | |
| 537 minsize = perim; | |
| 538 minindex = i; | |
| 539 } | |
| 540 } | |
| 541 return boxaGetBox(boxa, minindex, L_COPY); | |
| 542 } | |
| 543 | |
| 544 | |
| 545 /*! | |
| 546 * \brief boxCheckIfOverlapIsBig() | |
| 547 * | |
| 548 * \param[in] box to be tested | |
| 549 * \param[in] boxa of boxes already stored | |
| 550 * \param[in] maxoverlap maximum fractional overlap of the input box | |
| 551 * by any of the boxes in boxa | |
| 552 * \return 0 if box has small overlap with every box in boxa; | |
| 553 * 1 otherwise or on error | |
| 554 */ | |
| 555 static l_int32 | |
| 556 boxCheckIfOverlapIsBig(BOX *box, | |
| 557 BOXA *boxa, | |
| 558 l_float32 maxoverlap) | |
| 559 { | |
| 560 l_int32 i, n, bigoverlap; | |
| 561 l_float32 fract; | |
| 562 BOX *boxt; | |
| 563 | |
| 564 if (!box) | |
| 565 return ERROR_INT("box not defined", __func__, 1); | |
| 566 if (!boxa) | |
| 567 return ERROR_INT("boxa not defined", __func__, 1); | |
| 568 if (maxoverlap < 0.0 || maxoverlap > 1.0) | |
| 569 return ERROR_INT("invalid maxoverlap", __func__, 1); | |
| 570 | |
| 571 n = boxaGetCount(boxa); | |
| 572 if (n == 0 || maxoverlap == 1.0) | |
| 573 return 0; | |
| 574 | |
| 575 bigoverlap = 0; | |
| 576 for (i = 0; i < n; i++) { | |
| 577 boxt = boxaGetBox(boxa, i, L_CLONE); | |
| 578 boxOverlapFraction(boxt, box, &fract); | |
| 579 boxDestroy(&boxt); | |
| 580 if (fract > maxoverlap) { | |
| 581 bigoverlap = 1; | |
| 582 break; | |
| 583 } | |
| 584 } | |
| 585 | |
| 586 return bigoverlap; | |
| 587 } | |
| 588 | |
| 589 | |
| 590 /*! | |
| 591 * \brief boxaPruneSortedOnOverlap() | |
| 592 * | |
| 593 * \param[in] boxas sorted by size in decreasing order | |
| 594 * \param[in] maxoverlap maximum fractional overlap of a box by any | |
| 595 * of the larger boxes | |
| 596 * \return boxad pruned, or NULL on error | |
| 597 * | |
| 598 * <pre> | |
| 599 * Notes: | |
| 600 * (1) This selectively removes smaller boxes when they are overlapped | |
| 601 * by any larger box by more than the input 'maxoverlap' fraction. | |
| 602 * (2) To avoid all pruning, use maxoverlap = 1.0. To select only | |
| 603 * boxes that have no overlap with each other (maximal pruning), | |
| 604 * set maxoverlap = 0.0. | |
| 605 * (3) If there are no boxes in boxas, returns an empty boxa. | |
| 606 * </pre> | |
| 607 */ | |
| 608 BOXA * | |
| 609 boxaPruneSortedOnOverlap(BOXA *boxas, | |
| 610 l_float32 maxoverlap) | |
| 611 { | |
| 612 l_int32 i, j, n, remove; | |
| 613 l_float32 fract; | |
| 614 BOX *box1, *box2; | |
| 615 BOXA *boxad; | |
| 616 | |
| 617 if (!boxas) | |
| 618 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 619 if (maxoverlap < 0.0 || maxoverlap > 1.0) | |
| 620 return (BOXA *)ERROR_PTR("invalid maxoverlap", __func__, NULL); | |
| 621 | |
| 622 n = boxaGetCount(boxas); | |
| 623 if (n == 0 || maxoverlap == 1.0) | |
| 624 return boxaCopy(boxas, L_COPY); | |
| 625 | |
| 626 boxad = boxaCreate(0); | |
| 627 box2 = boxaGetBox(boxas, 0, L_COPY); | |
| 628 boxaAddBox(boxad, box2, L_INSERT); | |
| 629 for (j = 1; j < n; j++) { /* prune on j */ | |
| 630 box2 = boxaGetBox(boxas, j, L_COPY); | |
| 631 remove = FALSE; | |
| 632 for (i = 0; i < j; i++) { /* test on i */ | |
| 633 box1 = boxaGetBox(boxas, i, L_CLONE); | |
| 634 boxOverlapFraction(box1, box2, &fract); | |
| 635 boxDestroy(&box1); | |
| 636 if (fract > maxoverlap) { | |
| 637 remove = TRUE; | |
| 638 break; | |
| 639 } | |
| 640 } | |
| 641 if (remove == TRUE) | |
| 642 boxDestroy(&box2); | |
| 643 else | |
| 644 boxaAddBox(boxad, box2, L_INSERT); | |
| 645 } | |
| 646 | |
| 647 return boxad; | |
| 648 } |
