Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/boxfunc2.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file boxfunc2.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Boxa/Box transform (shift, scale) and orthogonal rotation | |
| 32 * BOXA *boxaTransform() | |
| 33 * BOX *boxTransform() | |
| 34 * BOXA *boxaTransformOrdered() | |
| 35 * BOX *boxTransformOrdered() | |
| 36 * BOXA *boxaRotateOrth() | |
| 37 * BOX *boxRotateOrth() | |
| 38 * BOXA *boxaShiftWithPta() | |
| 39 * | |
| 40 * Boxa sort | |
| 41 * BOXA *boxaSort() | |
| 42 * BOXA *boxaBinSort() | |
| 43 * BOXA *boxaSortByIndex() | |
| 44 * BOXAA *boxaSort2d() | |
| 45 * BOXAA *boxaSort2dByIndex() | |
| 46 * | |
| 47 * Boxa statistics | |
| 48 * l_int32 boxaGetRankVals() | |
| 49 * l_int32 boxaGetMedianVals() | |
| 50 * l_int32 boxaGetAverageSize() | |
| 51 * | |
| 52 * Boxa array extraction | |
| 53 * l_int32 boxaExtractAsNuma() | |
| 54 * l_int32 boxaExtractAsPta() | |
| 55 * PTA *boxaExtractCorners() | |
| 56 * | |
| 57 * Other Boxaa functions | |
| 58 * l_int32 boxaaGetExtent() | |
| 59 * BOXA *boxaaFlattenToBoxa() | |
| 60 * BOXA *boxaaFlattenAligned() | |
| 61 * BOXAA *boxaEncapsulateAligned() | |
| 62 * BOXAA *boxaaTranspose() | |
| 63 * l_int32 boxaaAlignBox() | |
| 64 * </pre> | |
| 65 */ | |
| 66 | |
| 67 #ifdef HAVE_CONFIG_H | |
| 68 #include <config_auto.h> | |
| 69 #endif /* HAVE_CONFIG_H */ | |
| 70 | |
| 71 #include <math.h> | |
| 72 #include "allheaders.h" | |
| 73 #include "pix_internal.h" | |
| 74 | |
| 75 /* For more than this number of c.c. in a binarized image of | |
| 76 * semi-perimeter (w + h) about 5000 or less, the O(n) binsort | |
| 77 * is faster than the O(nlogn) shellsort. */ | |
| 78 static const l_int32 MinCompsForBinSort = 200; | |
| 79 | |
| 80 /*---------------------------------------------------------------------* | |
| 81 * Boxa/Box transform (shift, scale) and orthogonal rotation * | |
| 82 *---------------------------------------------------------------------*/ | |
| 83 /*! | |
| 84 * \brief boxaTransform() | |
| 85 * | |
| 86 * \param[in] boxas | |
| 87 * \param[in] shiftx | |
| 88 * \param[in] shifty | |
| 89 * \param[in] scalex | |
| 90 * \param[in] scaley | |
| 91 * \return boxad, or NULL on error | |
| 92 * | |
| 93 * <pre> | |
| 94 * Notes: | |
| 95 * (1) This is a very simple function that first shifts, then scales. | |
| 96 * (2) The UL corner coordinates of all boxes in the output %boxad | |
| 97 * (3) For the boxes in the output %boxad, the UL corner coordinates | |
| 98 * must be non-negative, and the width and height of valid | |
| 99 * boxes must be at least 1. | |
| 100 * </pre> | |
| 101 */ | |
| 102 BOXA * | |
| 103 boxaTransform(BOXA *boxas, | |
| 104 l_int32 shiftx, | |
| 105 l_int32 shifty, | |
| 106 l_float32 scalex, | |
| 107 l_float32 scaley) | |
| 108 { | |
| 109 l_int32 i, n; | |
| 110 BOX *boxs, *boxd; | |
| 111 BOXA *boxad; | |
| 112 | |
| 113 if (!boxas) | |
| 114 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 115 n = boxaGetCount(boxas); | |
| 116 if ((boxad = boxaCreate(n)) == NULL) | |
| 117 return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL); | |
| 118 for (i = 0; i < n; i++) { | |
| 119 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { | |
| 120 boxaDestroy(&boxad); | |
| 121 return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL); | |
| 122 } | |
| 123 boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley); | |
| 124 boxDestroy(&boxs); | |
| 125 boxaAddBox(boxad, boxd, L_INSERT); | |
| 126 } | |
| 127 | |
| 128 return boxad; | |
| 129 } | |
| 130 | |
| 131 | |
| 132 /*! | |
| 133 * \brief boxTransform() | |
| 134 * | |
| 135 * \param[in] box | |
| 136 * \param[in] shiftx | |
| 137 * \param[in] shifty | |
| 138 * \param[in] scalex | |
| 139 * \param[in] scaley | |
| 140 * \return boxd, or NULL on error | |
| 141 * | |
| 142 * <pre> | |
| 143 * Notes: | |
| 144 * (1) This is a very simple function that first shifts, then scales. | |
| 145 * (2) If the box is invalid, a new invalid box is returned. | |
| 146 * (3) The UL corner coordinates must be non-negative, and the | |
| 147 * width and height of valid boxes must be at least 1. | |
| 148 * </pre> | |
| 149 */ | |
| 150 BOX * | |
| 151 boxTransform(BOX *box, | |
| 152 l_int32 shiftx, | |
| 153 l_int32 shifty, | |
| 154 l_float32 scalex, | |
| 155 l_float32 scaley) | |
| 156 { | |
| 157 if (!box) | |
| 158 return (BOX *)ERROR_PTR("box not defined", __func__, NULL); | |
| 159 if (box->w <= 0 || box->h <= 0) | |
| 160 return boxCreate(0, 0, 0, 0); | |
| 161 else | |
| 162 return boxCreate((l_int32)(L_MAX(0, scalex * (box->x + shiftx) + 0.5)), | |
| 163 (l_int32)(L_MAX(0, scaley * (box->y + shifty) + 0.5)), | |
| 164 (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)), | |
| 165 (l_int32)(L_MAX(1.0, scaley * box->h + 0.5))); | |
| 166 } | |
| 167 | |
| 168 | |
| 169 /*! | |
| 170 * \brief boxaTransformOrdered() | |
| 171 * | |
| 172 * \param[in] boxas | |
| 173 * \param[in] shiftx | |
| 174 * \param[in] shifty | |
| 175 * \param[in] scalex | |
| 176 * \param[in] scaley | |
| 177 * \param[in] xcen, ycen center of rotation | |
| 178 * \param[in] angle in radians; clockwise is positive | |
| 179 * \param[in] order one of 6 combinations: L_TR_SC_RO, ... | |
| 180 * \return boxd, or NULL on error | |
| 181 * | |
| 182 * <pre> | |
| 183 * shift, scaling and rotation, and the order of the | |
| 184 * transforms is specified. | |
| 185 * (2) Although these operations appear to be on an infinite | |
| 186 * 2D plane, in practice the region of interest is clipped | |
| 187 * to a finite image. The center of rotation is usually taken | |
| 188 * with respect to the image (either the UL corner or the | |
| 189 * center). A translation can have two very different effects: | |
| 190 * (a) Moves the boxes across the fixed image region. | |
| 191 * (b) Moves the image origin, causing a change in the image | |
| 192 * region and an opposite effective translation of the boxes. | |
| 193 * This function should only be used for (a), where the image | |
| 194 * region is fixed on translation. If the image region is | |
| 195 * changed by the translation, use instead the functions | |
| 196 * in affinecompose.c, where the image region and rotation | |
| 197 * center can be computed from the actual clipping due to | |
| 198 * translation of the image origin. | |
| 199 * (3) See boxTransformOrdered() for usage and implementation details. | |
| 200 * </pre> | |
| 201 */ | |
| 202 BOXA * | |
| 203 boxaTransformOrdered(BOXA *boxas, | |
| 204 l_int32 shiftx, | |
| 205 l_int32 shifty, | |
| 206 l_float32 scalex, | |
| 207 l_float32 scaley, | |
| 208 l_int32 xcen, | |
| 209 l_int32 ycen, | |
| 210 l_float32 angle, | |
| 211 l_int32 order) | |
| 212 { | |
| 213 l_int32 i, n; | |
| 214 BOX *boxs, *boxd; | |
| 215 BOXA *boxad; | |
| 216 | |
| 217 if (!boxas) | |
| 218 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 219 n = boxaGetCount(boxas); | |
| 220 if ((boxad = boxaCreate(n)) == NULL) | |
| 221 return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL); | |
| 222 for (i = 0; i < n; i++) { | |
| 223 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { | |
| 224 boxaDestroy(&boxad); | |
| 225 return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL); | |
| 226 } | |
| 227 boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley, | |
| 228 xcen, ycen, angle, order); | |
| 229 boxDestroy(&boxs); | |
| 230 boxaAddBox(boxad, boxd, L_INSERT); | |
| 231 } | |
| 232 | |
| 233 return boxad; | |
| 234 } | |
| 235 | |
| 236 | |
| 237 /*! | |
| 238 * \brief boxTransformOrdered() | |
| 239 * | |
| 240 * \param[in] boxs | |
| 241 * \param[in] shiftx | |
| 242 * \param[in] shifty | |
| 243 * \param[in] scalex | |
| 244 * \param[in] scaley | |
| 245 * \param[in] xcen, ycen center of rotation | |
| 246 * \param[in] angle in radians; clockwise is positive | |
| 247 * \param[in] order one of 6 combinations: L_TR_SC_RO, ... | |
| 248 * \return boxd, or NULL on error | |
| 249 * | |
| 250 * <pre> | |
| 251 * Notes: | |
| 252 * (1) This allows a sequence of linear transforms, composed of | |
| 253 * shift, scaling and rotation, where the order of the | |
| 254 * transforms is specified. | |
| 255 * (2) The rotation is taken about a point specified by (xcen, ycen). | |
| 256 * Let the components of the vector from the center of rotation | |
| 257 * to the box center be (xdif, ydif): | |
| 258 * xdif = (bx + 0.5 * bw) - xcen | |
| 259 * ydif = (by + 0.5 * bh) - ycen | |
| 260 * Then the box center after rotation has new components: | |
| 261 * bxcen = xcen + xdif * cosa + ydif * sina | |
| 262 * bycen = ycen + ydif * cosa - xdif * sina | |
| 263 * where cosa and sina are the cos and sin of the angle, | |
| 264 * and the enclosing box for the rotated box has size: | |
| 265 * rw = |bw * cosa| + |bh * sina| | |
| 266 * rh = |bh * cosa| + |bw * sina| | |
| 267 * where bw and bh are the unrotated width and height. | |
| 268 * Then the box UL corner (rx, ry) is | |
| 269 * rx = bxcen - 0.5 * rw | |
| 270 * ry = bycen - 0.5 * rh | |
| 271 * (3) The center of rotation specified by args %xcen and %ycen | |
| 272 * is the point BEFORE any translation or scaling. If the | |
| 273 * rotation is not the first operation, this function finds | |
| 274 * the actual center at the time of rotation. It does this | |
| 275 * by making the following assumptions: | |
| 276 * (1) Any scaling is with respect to the UL corner, so | |
| 277 * that the center location scales accordingly. | |
| 278 * (2) A translation does not affect the center of | |
| 279 * the image; it just moves the boxes. | |
| 280 * We always use assumption (1). However, assumption (2) | |
| 281 * will be incorrect if the apparent translation is due | |
| 282 * to a clipping operation that, in effect, moves the | |
| 283 * origin of the image. In that case, you should NOT use | |
| 284 * these simple functions. Instead, use the functions | |
| 285 * in affinecompose.c, where the rotation center can be | |
| 286 * computed from the actual clipping due to translation | |
| 287 * of the image origin. | |
| 288 * </pre> | |
| 289 */ | |
| 290 BOX * | |
| 291 boxTransformOrdered(BOX *boxs, | |
| 292 l_int32 shiftx, | |
| 293 l_int32 shifty, | |
| 294 l_float32 scalex, | |
| 295 l_float32 scaley, | |
| 296 l_int32 xcen, | |
| 297 l_int32 ycen, | |
| 298 l_float32 angle, | |
| 299 l_int32 order) | |
| 300 { | |
| 301 l_int32 bx, by, bw, bh, tx, ty, tw, th; | |
| 302 l_int32 xcent, ycent; /* transformed center of rotation due to scaling */ | |
| 303 l_float32 sina, cosa, xdif, ydif, rx, ry, rw, rh; | |
| 304 BOX *boxd; | |
| 305 | |
| 306 if (!boxs) | |
| 307 return (BOX *)ERROR_PTR("boxs not defined", __func__, NULL); | |
| 308 if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC && | |
| 309 order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO) | |
| 310 return (BOX *)ERROR_PTR("order invalid", __func__, NULL); | |
| 311 | |
| 312 boxGetGeometry(boxs, &bx, &by, &bw, &bh); | |
| 313 if (bw <= 0 || bh <= 0) /* invalid */ | |
| 314 return boxCreate(0, 0, 0, 0); | |
| 315 if (angle != 0.0) { | |
| 316 sina = sin(angle); | |
| 317 cosa = cos(angle); | |
| 318 } | |
| 319 | |
| 320 if (order == L_TR_SC_RO) { | |
| 321 tx = (l_int32)(scalex * (bx + shiftx) + 0.5); | |
| 322 ty = (l_int32)(scaley * (by + shifty) + 0.5); | |
| 323 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); | |
| 324 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); | |
| 325 xcent = (l_int32)(scalex * xcen + 0.5); | |
| 326 ycent = (l_int32)(scaley * ycen + 0.5); | |
| 327 if (angle == 0.0) { | |
| 328 boxd = boxCreate(tx, ty, tw, th); | |
| 329 } else { | |
| 330 xdif = tx + 0.5 * tw - xcent; | |
| 331 ydif = ty + 0.5 * th - ycent; | |
| 332 rw = L_ABS(tw * cosa) + L_ABS(th * sina); | |
| 333 rh = L_ABS(th * cosa) + L_ABS(tw * sina); | |
| 334 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; | |
| 335 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; | |
| 336 boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw, | |
| 337 (l_int32)rh); | |
| 338 } | |
| 339 } else if (order == L_SC_TR_RO) { | |
| 340 tx = (l_int32)(scalex * bx + shiftx + 0.5); | |
| 341 ty = (l_int32)(scaley * by + shifty + 0.5); | |
| 342 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); | |
| 343 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); | |
| 344 xcent = (l_int32)(scalex * xcen + 0.5); | |
| 345 ycent = (l_int32)(scaley * ycen + 0.5); | |
| 346 if (angle == 0.0) { | |
| 347 boxd = boxCreate(tx, ty, tw, th); | |
| 348 } else { | |
| 349 xdif = tx + 0.5 * tw - xcent; | |
| 350 ydif = ty + 0.5 * th - ycent; | |
| 351 rw = L_ABS(tw * cosa) + L_ABS(th * sina); | |
| 352 rh = L_ABS(th * cosa) + L_ABS(tw * sina); | |
| 353 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; | |
| 354 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; | |
| 355 boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw, | |
| 356 (l_int32)rh); | |
| 357 } | |
| 358 } else if (order == L_RO_TR_SC) { | |
| 359 if (angle == 0.0) { | |
| 360 rx = bx; | |
| 361 ry = by; | |
| 362 rw = bw; | |
| 363 rh = bh; | |
| 364 } else { | |
| 365 xdif = bx + 0.5 * bw - xcen; | |
| 366 ydif = by + 0.5 * bh - ycen; | |
| 367 rw = L_ABS(bw * cosa) + L_ABS(bh * sina); | |
| 368 rh = L_ABS(bh * cosa) + L_ABS(bw * sina); | |
| 369 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; | |
| 370 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; | |
| 371 } | |
| 372 tx = (l_int32)(scalex * (rx + shiftx) + 0.5); | |
| 373 ty = (l_int32)(scaley * (ry + shifty) + 0.5); | |
| 374 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); | |
| 375 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); | |
| 376 boxd = boxCreate(tx, ty, tw, th); | |
| 377 } else if (order == L_RO_SC_TR) { | |
| 378 if (angle == 0.0) { | |
| 379 rx = bx; | |
| 380 ry = by; | |
| 381 rw = bw; | |
| 382 rh = bh; | |
| 383 } else { | |
| 384 xdif = bx + 0.5 * bw - xcen; | |
| 385 ydif = by + 0.5 * bh - ycen; | |
| 386 rw = L_ABS(bw * cosa) + L_ABS(bh * sina); | |
| 387 rh = L_ABS(bh * cosa) + L_ABS(bw * sina); | |
| 388 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; | |
| 389 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; | |
| 390 } | |
| 391 tx = (l_int32)(scalex * rx + shiftx + 0.5); | |
| 392 ty = (l_int32)(scaley * ry + shifty + 0.5); | |
| 393 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); | |
| 394 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); | |
| 395 boxd = boxCreate(tx, ty, tw, th); | |
| 396 } else if (order == L_TR_RO_SC) { | |
| 397 tx = bx + shiftx; | |
| 398 ty = by + shifty; | |
| 399 if (angle == 0.0) { | |
| 400 rx = tx; | |
| 401 ry = ty; | |
| 402 rw = bw; | |
| 403 rh = bh; | |
| 404 } else { | |
| 405 xdif = tx + 0.5 * bw - xcen; | |
| 406 ydif = ty + 0.5 * bh - ycen; | |
| 407 rw = L_ABS(bw * cosa) + L_ABS(bh * sina); | |
| 408 rh = L_ABS(bh * cosa) + L_ABS(bw * sina); | |
| 409 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw; | |
| 410 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh; | |
| 411 } | |
| 412 tx = (l_int32)(scalex * rx + 0.5); | |
| 413 ty = (l_int32)(scaley * ry + 0.5); | |
| 414 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5)); | |
| 415 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5)); | |
| 416 boxd = boxCreate(tx, ty, tw, th); | |
| 417 } else { /* order == L_SC_RO_TR) */ | |
| 418 tx = (l_int32)(scalex * bx + 0.5); | |
| 419 ty = (l_int32)(scaley * by + 0.5); | |
| 420 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5)); | |
| 421 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5)); | |
| 422 xcent = (l_int32)(scalex * xcen + 0.5); | |
| 423 ycent = (l_int32)(scaley * ycen + 0.5); | |
| 424 if (angle == 0.0) { | |
| 425 rx = tx; | |
| 426 ry = ty; | |
| 427 rw = tw; | |
| 428 rh = th; | |
| 429 } else { | |
| 430 xdif = tx + 0.5 * tw - xcent; | |
| 431 ydif = ty + 0.5 * th - ycent; | |
| 432 rw = L_ABS(tw * cosa) + L_ABS(th * sina); | |
| 433 rh = L_ABS(th * cosa) + L_ABS(tw * sina); | |
| 434 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw; | |
| 435 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh; | |
| 436 } | |
| 437 tx = (l_int32)(rx + shiftx + 0.5); | |
| 438 ty = (l_int32)(ry + shifty + 0.5); | |
| 439 tw = (l_int32)(rw + 0.5); | |
| 440 th = (l_int32)(rh + 0.5); | |
| 441 boxd = boxCreate(tx, ty, tw, th); | |
| 442 } | |
| 443 | |
| 444 return boxd; | |
| 445 } | |
| 446 | |
| 447 | |
| 448 /*! | |
| 449 * \brief boxaRotateOrth() | |
| 450 * | |
| 451 * \param[in] boxas | |
| 452 * \param[in] w, h of image in which the boxa is embedded | |
| 453 * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; | |
| 454 * all rotations are clockwise | |
| 455 * \return boxad, or NULL on error | |
| 456 * | |
| 457 * <pre> | |
| 458 * Notes: | |
| 459 * (1) See boxRotateOrth() for details. | |
| 460 * </pre> | |
| 461 */ | |
| 462 BOXA * | |
| 463 boxaRotateOrth(BOXA *boxas, | |
| 464 l_int32 w, | |
| 465 l_int32 h, | |
| 466 l_int32 rotation) | |
| 467 { | |
| 468 l_int32 i, n; | |
| 469 BOX *boxs, *boxd; | |
| 470 BOXA *boxad; | |
| 471 | |
| 472 if (!boxas) | |
| 473 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 474 if (rotation < 0 || rotation > 3) | |
| 475 return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", __func__, NULL); | |
| 476 if (rotation == 0) | |
| 477 return boxaCopy(boxas, L_COPY); | |
| 478 | |
| 479 n = boxaGetCount(boxas); | |
| 480 if ((boxad = boxaCreate(n)) == NULL) | |
| 481 return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL); | |
| 482 for (i = 0; i < n; i++) { | |
| 483 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) { | |
| 484 boxaDestroy(&boxad); | |
| 485 return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL); | |
| 486 } | |
| 487 boxd = boxRotateOrth(boxs, w, h, rotation); | |
| 488 boxDestroy(&boxs); | |
| 489 boxaAddBox(boxad, boxd, L_INSERT); | |
| 490 } | |
| 491 | |
| 492 return boxad; | |
| 493 } | |
| 494 | |
| 495 | |
| 496 /*! | |
| 497 * \brief boxRotateOrth() | |
| 498 * | |
| 499 * \param[in] box | |
| 500 * \param[in] w, h of image in which the box is embedded | |
| 501 * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; | |
| 502 * all rotations are clockwise | |
| 503 * \return boxd, or NULL on error | |
| 504 * | |
| 505 * <pre> | |
| 506 * Notes: | |
| 507 * (1) Rotate the image with the embedded box by the specified amount. | |
| 508 * (2) After rotation, the rotated box is always measured with | |
| 509 * respect to the UL corner of the image. | |
| 510 * </pre> | |
| 511 */ | |
| 512 BOX * | |
| 513 boxRotateOrth(BOX *box, | |
| 514 l_int32 w, | |
| 515 l_int32 h, | |
| 516 l_int32 rotation) | |
| 517 { | |
| 518 l_int32 bx, by, bw, bh, xdist, ydist; | |
| 519 | |
| 520 if (!box) | |
| 521 return (BOX *)ERROR_PTR("box not defined", __func__, NULL); | |
| 522 if (rotation < 0 || rotation > 3) | |
| 523 return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", __func__, NULL); | |
| 524 if (rotation == 0) | |
| 525 return boxCopy(box); | |
| 526 | |
| 527 boxGetGeometry(box, &bx, &by, &bw, &bh); | |
| 528 if (bw <= 0 || bh <= 0) /* invalid */ | |
| 529 return boxCreate(0, 0, 0, 0); | |
| 530 ydist = h - by - bh; /* below box */ | |
| 531 xdist = w - bx - bw; /* to right of box */ | |
| 532 if (rotation == 1) /* 90 deg cw */ | |
| 533 return boxCreate(ydist, bx, bh, bw); | |
| 534 else if (rotation == 2) /* 180 deg cw */ | |
| 535 return boxCreate(xdist, ydist, bw, bh); | |
| 536 else /* rotation == 3, 270 deg cw */ | |
| 537 return boxCreate(by, xdist, bh, bw); | |
| 538 } | |
| 539 | |
| 540 | |
| 541 /*! | |
| 542 * \brief boxaShiftWithPta() | |
| 543 * | |
| 544 * \param[in] boxas | |
| 545 * \param[in] pta aligned with the boxes; determines shift amount | |
| 546 * \param[in] dir +1 to shift by the values in pta; -1 to shift | |
| 547 * by the negative of the values in the pta. | |
| 548 * \return boxad, or NULL on error | |
| 549 * | |
| 550 * <pre> | |
| 551 * Notes: | |
| 552 * (1) In use, %pta may come from the UL corners of of a boxa, each | |
| 553 * of whose boxes contains the corresponding box of %boxas | |
| 554 * within it. The output %boxad is then a boxa in the (global) | |
| 555 * coordinates of the containing boxa. So the input %pta | |
| 556 * could come from boxaExtractCorners(). | |
| 557 * (2) The operations with %dir == 1 and %dir == -1 are inverses if | |
| 558 * called in order (1, -1). Starting with an input boxa and | |
| 559 * calling twice with these values of %dir results in a boxa | |
| 560 * identical to the input. However, because box parameters can | |
| 561 * never be negative, calling in the order (-1, 1) may result | |
| 562 * in clipping at the left side and the top. | |
| 563 * </pre> | |
| 564 */ | |
| 565 BOXA * | |
| 566 boxaShiftWithPta(BOXA *boxas, | |
| 567 PTA *pta, | |
| 568 l_int32 dir) | |
| 569 { | |
| 570 l_int32 i, n, x, y, full; | |
| 571 BOX *box1, *box2; | |
| 572 BOXA *boxad; | |
| 573 | |
| 574 if (!boxas) | |
| 575 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 576 boxaIsFull(boxas, &full); | |
| 577 if (!full) | |
| 578 return (BOXA *)ERROR_PTR("boxas not full", __func__, NULL); | |
| 579 if (!pta) | |
| 580 return (BOXA *)ERROR_PTR("pta not defined", __func__, NULL); | |
| 581 if (dir != 1 && dir != -1) | |
| 582 return (BOXA *)ERROR_PTR("invalid dir", __func__, NULL); | |
| 583 n = boxaGetCount(boxas); | |
| 584 if (n != ptaGetCount(pta)) | |
| 585 return (BOXA *)ERROR_PTR("boxas and pta not same size", __func__, NULL); | |
| 586 | |
| 587 if ((boxad = boxaCreate(n)) == NULL) | |
| 588 return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL); | |
| 589 for (i = 0; i < n; i++) { | |
| 590 box1 = boxaGetBox(boxas, i, L_COPY); | |
| 591 ptaGetIPt(pta, i, &x, &y); | |
| 592 box2 = boxTransform(box1, dir * x, dir * y, 1.0, 1.0); | |
| 593 boxaAddBox(boxad, box2, L_INSERT); | |
| 594 boxDestroy(&box1); | |
| 595 } | |
| 596 return boxad; | |
| 597 } | |
| 598 | |
| 599 | |
| 600 /*---------------------------------------------------------------------* | |
| 601 * Boxa sort * | |
| 602 *---------------------------------------------------------------------*/ | |
| 603 /*! | |
| 604 * \brief boxaSort() | |
| 605 * | |
| 606 * \param[in] boxas | |
| 607 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, | |
| 608 * L_SORT_BY_RIGHT, L_SORT_BY_BOT, | |
| 609 * L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, | |
| 610 * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, | |
| 611 * L_SORT_BY_PERIMETER, L_SORT_BY_AREA, | |
| 612 * L_SORT_BY_ASPECT_RATIO | |
| 613 * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING | |
| 614 * \param[out] pnaindex [optional] index of sorted order into | |
| 615 * original array | |
| 616 * \return boxad sorted version of boxas, or NULL on error | |
| 617 * | |
| 618 * <pre> | |
| 619 * Notes: | |
| 620 * (1) An empty boxa returns a copy, with a warning. | |
| 621 * </pre> | |
| 622 */ | |
| 623 BOXA * | |
| 624 boxaSort(BOXA *boxas, | |
| 625 l_int32 sorttype, | |
| 626 l_int32 sortorder, | |
| 627 NUMA **pnaindex) | |
| 628 { | |
| 629 l_int32 i, n, x, y, w, h, size; | |
| 630 BOXA *boxad; | |
| 631 NUMA *na, *naindex; | |
| 632 | |
| 633 if (pnaindex) *pnaindex = NULL; | |
| 634 if (!boxas) | |
| 635 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 636 if ((n = boxaGetCount(boxas)) == 0) { | |
| 637 L_WARNING("boxas is empty\n", __func__); | |
| 638 return boxaCopy(boxas, L_COPY); | |
| 639 } | |
| 640 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && | |
| 641 sorttype != L_SORT_BY_RIGHT && sorttype != L_SORT_BY_BOT && | |
| 642 sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && | |
| 643 sorttype != L_SORT_BY_MIN_DIMENSION && | |
| 644 sorttype != L_SORT_BY_MAX_DIMENSION && | |
| 645 sorttype != L_SORT_BY_PERIMETER && | |
| 646 sorttype != L_SORT_BY_AREA && | |
| 647 sorttype != L_SORT_BY_ASPECT_RATIO) | |
| 648 return (BOXA *)ERROR_PTR("invalid sort type", __func__, NULL); | |
| 649 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) | |
| 650 return (BOXA *)ERROR_PTR("invalid sort order", __func__, NULL); | |
| 651 | |
| 652 /* Use O(n) binsort if possible */ | |
| 653 if (n > MinCompsForBinSort && | |
| 654 ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) || | |
| 655 (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) || | |
| 656 (sorttype == L_SORT_BY_PERIMETER))) | |
| 657 return boxaBinSort(boxas, sorttype, sortorder, pnaindex); | |
| 658 | |
| 659 /* Build up numa of specific data */ | |
| 660 if ((na = numaCreate(n)) == NULL) | |
| 661 return (BOXA *)ERROR_PTR("na not made", __func__, NULL); | |
| 662 for (i = 0; i < n; i++) { | |
| 663 boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h); | |
| 664 switch (sorttype) | |
| 665 { | |
| 666 case L_SORT_BY_X: | |
| 667 numaAddNumber(na, x); | |
| 668 break; | |
| 669 case L_SORT_BY_Y: | |
| 670 numaAddNumber(na, y); | |
| 671 break; | |
| 672 case L_SORT_BY_RIGHT: | |
| 673 numaAddNumber(na, x + w - 1); | |
| 674 break; | |
| 675 case L_SORT_BY_BOT: | |
| 676 numaAddNumber(na, y + h - 1); | |
| 677 break; | |
| 678 case L_SORT_BY_WIDTH: | |
| 679 numaAddNumber(na, w); | |
| 680 break; | |
| 681 case L_SORT_BY_HEIGHT: | |
| 682 numaAddNumber(na, h); | |
| 683 break; | |
| 684 case L_SORT_BY_MIN_DIMENSION: | |
| 685 size = L_MIN(w, h); | |
| 686 numaAddNumber(na, size); | |
| 687 break; | |
| 688 case L_SORT_BY_MAX_DIMENSION: | |
| 689 size = L_MAX(w, h); | |
| 690 numaAddNumber(na, size); | |
| 691 break; | |
| 692 case L_SORT_BY_PERIMETER: | |
| 693 size = w + h; | |
| 694 numaAddNumber(na, size); | |
| 695 break; | |
| 696 case L_SORT_BY_AREA: | |
| 697 size = w * h; | |
| 698 numaAddNumber(na, size); | |
| 699 break; | |
| 700 case L_SORT_BY_ASPECT_RATIO: | |
| 701 numaAddNumber(na, (l_float32)w / (l_float32)h); | |
| 702 break; | |
| 703 default: | |
| 704 L_WARNING("invalid sort type\n", __func__); | |
| 705 } | |
| 706 } | |
| 707 | |
| 708 /* Get the sort index for data array */ | |
| 709 naindex = numaGetSortIndex(na, sortorder); | |
| 710 numaDestroy(&na); | |
| 711 if (!naindex) | |
| 712 return (BOXA *)ERROR_PTR("naindex not made", __func__, NULL); | |
| 713 | |
| 714 /* Build up sorted boxa using sort index */ | |
| 715 boxad = boxaSortByIndex(boxas, naindex); | |
| 716 | |
| 717 if (pnaindex) | |
| 718 *pnaindex = naindex; | |
| 719 else | |
| 720 numaDestroy(&naindex); | |
| 721 return boxad; | |
| 722 } | |
| 723 | |
| 724 | |
| 725 /*! | |
| 726 * \brief boxaBinSort() | |
| 727 * | |
| 728 * \param[in] boxas | |
| 729 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, | |
| 730 * L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER | |
| 731 * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING | |
| 732 * \param[out] pnaindex [optional] index of sorted order into | |
| 733 * original array | |
| 734 * \return boxad sorted version of boxas, or NULL on error | |
| 735 * | |
| 736 * <pre> | |
| 737 * Notes: | |
| 738 * (1) For a large number of boxes (say, greater than 1000), this | |
| 739 * O(n) binsort is much faster than the O(nlogn) shellsort. | |
| 740 * For 5000 components, this is over 20x faster than boxaSort(). | |
| 741 * (2) Consequently, boxaSort() calls this function if it will | |
| 742 * likely go much faster. | |
| 743 * </pre> | |
| 744 */ | |
| 745 BOXA * | |
| 746 boxaBinSort(BOXA *boxas, | |
| 747 l_int32 sorttype, | |
| 748 l_int32 sortorder, | |
| 749 NUMA **pnaindex) | |
| 750 { | |
| 751 l_int32 i, n, x, y, w, h; | |
| 752 BOXA *boxad; | |
| 753 NUMA *na, *naindex; | |
| 754 | |
| 755 if (pnaindex) *pnaindex = NULL; | |
| 756 if (!boxas) | |
| 757 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 758 if ((n = boxaGetCount(boxas)) == 0) { | |
| 759 L_WARNING("boxas is empty\n", __func__); | |
| 760 return boxaCopy(boxas, L_COPY); | |
| 761 } | |
| 762 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && | |
| 763 sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && | |
| 764 sorttype != L_SORT_BY_PERIMETER) | |
| 765 return (BOXA *)ERROR_PTR("invalid sort type", __func__, NULL); | |
| 766 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) | |
| 767 return (BOXA *)ERROR_PTR("invalid sort order", __func__, NULL); | |
| 768 | |
| 769 /* Generate Numa of appropriate box dimensions */ | |
| 770 if ((na = numaCreate(n)) == NULL) | |
| 771 return (BOXA *)ERROR_PTR("na not made", __func__, NULL); | |
| 772 for (i = 0; i < n; i++) { | |
| 773 boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h); | |
| 774 switch (sorttype) | |
| 775 { | |
| 776 case L_SORT_BY_X: | |
| 777 numaAddNumber(na, x); | |
| 778 break; | |
| 779 case L_SORT_BY_Y: | |
| 780 numaAddNumber(na, y); | |
| 781 break; | |
| 782 case L_SORT_BY_WIDTH: | |
| 783 numaAddNumber(na, w); | |
| 784 break; | |
| 785 case L_SORT_BY_HEIGHT: | |
| 786 numaAddNumber(na, h); | |
| 787 break; | |
| 788 case L_SORT_BY_PERIMETER: | |
| 789 numaAddNumber(na, w + h); | |
| 790 break; | |
| 791 default: | |
| 792 L_WARNING("invalid sort type\n", __func__); | |
| 793 } | |
| 794 } | |
| 795 | |
| 796 /* Get the sort index for data array */ | |
| 797 naindex = numaGetBinSortIndex(na, sortorder); | |
| 798 numaDestroy(&na); | |
| 799 if (!naindex) | |
| 800 return (BOXA *)ERROR_PTR("naindex not made", __func__, NULL); | |
| 801 | |
| 802 /* Build up sorted boxa using the sort index */ | |
| 803 boxad = boxaSortByIndex(boxas, naindex); | |
| 804 | |
| 805 if (pnaindex) | |
| 806 *pnaindex = naindex; | |
| 807 else | |
| 808 numaDestroy(&naindex); | |
| 809 return boxad; | |
| 810 } | |
| 811 | |
| 812 | |
| 813 /*! | |
| 814 * \brief boxaSortByIndex() | |
| 815 * | |
| 816 * \param[in] boxas | |
| 817 * \param[in] naindex na that maps from the new boxa to the input boxa | |
| 818 * \return boxad sorted, or NULL on error | |
| 819 */ | |
| 820 BOXA * | |
| 821 boxaSortByIndex(BOXA *boxas, | |
| 822 NUMA *naindex) | |
| 823 { | |
| 824 l_int32 i, n, index; | |
| 825 BOX *box; | |
| 826 BOXA *boxad; | |
| 827 | |
| 828 if (!boxas) | |
| 829 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 830 if ((n = boxaGetCount(boxas)) == 0) { | |
| 831 L_WARNING("boxas is empty\n", __func__); | |
| 832 return boxaCopy(boxas, L_COPY); | |
| 833 } | |
| 834 if (!naindex) | |
| 835 return (BOXA *)ERROR_PTR("naindex not defined", __func__, NULL); | |
| 836 | |
| 837 boxad = boxaCreate(n); | |
| 838 for (i = 0; i < n; i++) { | |
| 839 numaGetIValue(naindex, i, &index); | |
| 840 box = boxaGetBox(boxas, index, L_COPY); | |
| 841 boxaAddBox(boxad, box, L_INSERT); | |
| 842 } | |
| 843 | |
| 844 return boxad; | |
| 845 } | |
| 846 | |
| 847 | |
| 848 /*! | |
| 849 * \brief boxaSort2d() | |
| 850 * | |
| 851 * \param[in] boxas | |
| 852 * \param[out] pnaad [optional] numaa with sorted indices | |
| 853 * whose values are the indices of the input array | |
| 854 * \param[in] delta1 min separation that permits aggregation of a box | |
| 855 * onto a boxa of horizontally-aligned boxes; pass 1 | |
| 856 * \param[in] delta2 min separation that permits aggregation of a box | |
| 857 * onto a boxa of horizontally-aligned boxes; pass 2 | |
| 858 * \param[in] minh1 components less than this height either join an | |
| 859 * existing boxa or are set aside for pass 2 | |
| 860 * \return baa 2d sorted version of boxa, or NULL on error | |
| 861 * | |
| 862 * <pre> | |
| 863 * Notes: | |
| 864 * (1) The final result is a sort where the 'fast scan' direction is | |
| 865 * left to right, and the 'slow scan' direction is from top | |
| 866 * to bottom. Each boxa in the baa represents a sorted set | |
| 867 * of boxes from left to right. | |
| 868 * (2) Three passes are used to aggregate the boxas, which can correspond | |
| 869 * to characters or words in a line of text. In pass 1, only | |
| 870 * taller components, which correspond to xheight or larger, | |
| 871 * are permitted to start a new boxa. In pass 2, the remaining | |
| 872 * vertically-challenged components are allowed to join an | |
| 873 * existing boxa or start a new one. In pass 3, boxa whose extent | |
| 874 * is overlapping are joined. After that, the boxes in each | |
| 875 * boxa are sorted horizontally, and finally the boxa are | |
| 876 * sorted vertically. | |
| 877 * (3) If %delta1 > 0, the first pass allows aggregation when | |
| 878 * boxes in the same boxa do not overlap vertically. In fact, | |
| 879 * %delta1 is the max distance by which they can miss and still | |
| 880 * be aggregated. If %delta1 < 0, the box must have vertical | |
| 881 * overlap of at least abs(%delta1) with the boxa before it | |
| 882 * can be merged. Similar for delta2 on the second pass. | |
| 883 * (4) On the first pass, any component of height less than minh1 | |
| 884 * cannot start a new boxa; it's put aside for later insertion. | |
| 885 * (5) On the second pass, any small component that doesn't align | |
| 886 * with an existing boxa can start a new one. | |
| 887 * (6) This can be used to identify lines of text from | |
| 888 * character or word bounding boxes. | |
| 889 * (7) Typical values for the input parameters on 300 ppi text are: | |
| 890 * delta1 ~ 0 | |
| 891 * delta2 ~ 0 | |
| 892 * minh1 ~ 5 | |
| 893 * </pre> | |
| 894 */ | |
| 895 BOXAA * | |
| 896 boxaSort2d(BOXA *boxas, | |
| 897 NUMAA **pnaad, | |
| 898 l_int32 delta1, | |
| 899 l_int32 delta2, | |
| 900 l_int32 minh1) | |
| 901 { | |
| 902 l_int32 i, index, h, nt, ne, n, m, ival; | |
| 903 BOX *box; | |
| 904 BOXA *boxa, *boxae, *boxan, *boxa1, *boxa2, *boxa3, *boxav, *boxavs; | |
| 905 BOXAA *baa, *baa1, *baad; | |
| 906 NUMA *naindex, *nae, *nan, *nah, *nav, *na1, *na2, *nad, *namap; | |
| 907 NUMAA *naa, *naa1, *naad; | |
| 908 | |
| 909 if (pnaad) *pnaad = NULL; | |
| 910 if (!boxas) | |
| 911 return (BOXAA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 912 if (boxaGetCount(boxas) == 0) | |
| 913 return (BOXAA *)ERROR_PTR("boxas is empty", __func__, NULL); | |
| 914 | |
| 915 /* Sort from left to right */ | |
| 916 if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex)) | |
| 917 == NULL) | |
| 918 return (BOXAA *)ERROR_PTR("boxa not made", __func__, NULL); | |
| 919 | |
| 920 /* First pass: assign taller boxes to boxa by row */ | |
| 921 nt = boxaGetCount(boxa); | |
| 922 baa = boxaaCreate(0); | |
| 923 naa = numaaCreate(0); | |
| 924 boxae = boxaCreate(0); /* save small height boxes here */ | |
| 925 nae = numaCreate(0); /* keep track of small height boxes */ | |
| 926 for (i = 0; i < nt; i++) { | |
| 927 box = boxaGetBox(boxa, i, L_CLONE); | |
| 928 boxGetGeometry(box, NULL, NULL, NULL, &h); | |
| 929 if (h < minh1) { /* save for 2nd pass */ | |
| 930 boxaAddBox(boxae, box, L_INSERT); | |
| 931 numaAddNumber(nae, i); | |
| 932 } else { | |
| 933 n = boxaaGetCount(baa); | |
| 934 boxaaAlignBox(baa, box, delta1, &index); | |
| 935 if (index < n) { /* append to an existing boxa */ | |
| 936 boxaaAddBox(baa, index, box, L_INSERT); | |
| 937 } else { /* doesn't align, need new boxa */ | |
| 938 boxan = boxaCreate(0); | |
| 939 boxaAddBox(boxan, box, L_INSERT); | |
| 940 boxaaAddBoxa(baa, boxan, L_INSERT); | |
| 941 nan = numaCreate(0); | |
| 942 numaaAddNuma(naa, nan, L_INSERT); | |
| 943 } | |
| 944 numaGetIValue(naindex, i, &ival); | |
| 945 numaaAddNumber(naa, index, ival); | |
| 946 } | |
| 947 } | |
| 948 boxaDestroy(&boxa); | |
| 949 numaDestroy(&naindex); | |
| 950 | |
| 951 /* Second pass: feed in small height boxes */ | |
| 952 ne = boxaGetCount(boxae); | |
| 953 for (i = 0; i < ne; i++) { | |
| 954 box = boxaGetBox(boxae, i, L_CLONE); | |
| 955 n = boxaaGetCount(baa); | |
| 956 boxaaAlignBox(baa, box, delta2, &index); | |
| 957 if (index < n) { /* append to an existing boxa */ | |
| 958 boxaaAddBox(baa, index, box, L_INSERT); | |
| 959 } else { /* doesn't align, need new boxa */ | |
| 960 boxan = boxaCreate(0); | |
| 961 boxaAddBox(boxan, box, L_INSERT); | |
| 962 boxaaAddBoxa(baa, boxan, L_INSERT); | |
| 963 nan = numaCreate(0); | |
| 964 numaaAddNuma(naa, nan, L_INSERT); | |
| 965 } | |
| 966 numaGetIValue(nae, i, &ival); /* location in original boxas */ | |
| 967 numaaAddNumber(naa, index, ival); | |
| 968 } | |
| 969 | |
| 970 /* Third pass: merge some boxa whose extent is overlapping. | |
| 971 * Think of these boxa as text lines, where the bounding boxes | |
| 972 * of the text lines can overlap, but likely won't have | |
| 973 * a huge overlap. | |
| 974 * First do a greedy find of pairs of overlapping boxa, where | |
| 975 * the two boxa overlap by at least 50% of the smaller, and | |
| 976 * the smaller is not more than half the area of the larger. | |
| 977 * For such pairs, call the larger one the primary boxa. The | |
| 978 * boxes in the smaller one are appended to those in the primary | |
| 979 * in pass 3a, and the primaries are extracted in pass 3b. | |
| 980 * In this way, all boxes in the original baa are saved. */ | |
| 981 n = boxaaGetCount(baa); | |
| 982 boxaaGetExtent(baa, NULL, NULL, NULL, &boxa3); | |
| 983 boxa1 = boxaHandleOverlaps(boxa3, L_REMOVE_SMALL, 1000, 0.5, 0.5, &namap); | |
| 984 boxaDestroy(&boxa1); | |
| 985 boxaDestroy(&boxa3); | |
| 986 for (i = 0; i < n; i++) { /* Pass 3a: join selected copies of boxa */ | |
| 987 numaGetIValue(namap, i, &ival); | |
| 988 if (ival >= 0) { /* join current to primary boxa[ival] */ | |
| 989 boxa1 = boxaaGetBoxa(baa, i, L_COPY); | |
| 990 boxa2 = boxaaGetBoxa(baa, ival, L_CLONE); | |
| 991 boxaJoin(boxa2, boxa1, 0, -1); | |
| 992 boxaDestroy(&boxa2); | |
| 993 boxaDestroy(&boxa1); | |
| 994 na1 = numaaGetNuma(naa, i, L_COPY); | |
| 995 na2 = numaaGetNuma(naa, ival, L_CLONE); | |
| 996 numaJoin(na2, na1, 0, -1); | |
| 997 numaDestroy(&na1); | |
| 998 numaDestroy(&na2); | |
| 999 } | |
| 1000 } | |
| 1001 baa1 = boxaaCreate(n); | |
| 1002 naa1 = numaaCreate(n); | |
| 1003 for (i = 0; i < n; i++) { /* Pass 3b: save primary boxa */ | |
| 1004 numaGetIValue(namap, i, &ival); | |
| 1005 if (ival == -1) { | |
| 1006 boxa1 = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1007 boxaaAddBoxa(baa1, boxa1, L_INSERT); | |
| 1008 na1 = numaaGetNuma(naa, i, L_CLONE); | |
| 1009 numaaAddNuma(naa1, na1, L_INSERT); | |
| 1010 } | |
| 1011 } | |
| 1012 numaDestroy(&namap); | |
| 1013 boxaaDestroy(&baa); | |
| 1014 baa = baa1; | |
| 1015 numaaDestroy(&naa); | |
| 1016 naa = naa1; | |
| 1017 | |
| 1018 /* Sort the boxes in each boxa horizontally */ | |
| 1019 m = boxaaGetCount(baa); | |
| 1020 for (i = 0; i < m; i++) { | |
| 1021 boxa1 = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1022 boxa2 = boxaSort(boxa1, L_SORT_BY_X, L_SORT_INCREASING, &nah); | |
| 1023 boxaaReplaceBoxa(baa, i, boxa2); | |
| 1024 na1 = numaaGetNuma(naa, i, L_CLONE); | |
| 1025 na2 = numaSortByIndex(na1, nah); | |
| 1026 numaaReplaceNuma(naa, i, na2); | |
| 1027 boxaDestroy(&boxa1); | |
| 1028 numaDestroy(&na1); | |
| 1029 numaDestroy(&nah); | |
| 1030 } | |
| 1031 | |
| 1032 /* Sort the boxa vertically within boxaa, using the first box | |
| 1033 * in each boxa. */ | |
| 1034 m = boxaaGetCount(baa); | |
| 1035 boxav = boxaCreate(m); /* holds first box in each boxa in baa */ | |
| 1036 naad = numaaCreate(m); | |
| 1037 if (pnaad) | |
| 1038 *pnaad = naad; | |
| 1039 baad = boxaaCreate(m); | |
| 1040 for (i = 0; i < m; i++) { | |
| 1041 boxa1 = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1042 box = boxaGetBox(boxa1, 0, L_CLONE); | |
| 1043 boxaAddBox(boxav, box, L_INSERT); | |
| 1044 boxaDestroy(&boxa1); | |
| 1045 } | |
| 1046 boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav); | |
| 1047 for (i = 0; i < m; i++) { | |
| 1048 numaGetIValue(nav, i, &index); | |
| 1049 boxa = boxaaGetBoxa(baa, index, L_CLONE); | |
| 1050 boxaaAddBoxa(baad, boxa, L_INSERT); | |
| 1051 nad = numaaGetNuma(naa, index, L_CLONE); | |
| 1052 numaaAddNuma(naad, nad, L_INSERT); | |
| 1053 } | |
| 1054 | |
| 1055 | |
| 1056 /* lept_stderr("box count = %d, numaa count = %d\n", nt, | |
| 1057 numaaGetNumberCount(naad)); */ | |
| 1058 | |
| 1059 boxaaDestroy(&baa); | |
| 1060 boxaDestroy(&boxav); | |
| 1061 boxaDestroy(&boxavs); | |
| 1062 boxaDestroy(&boxae); | |
| 1063 numaDestroy(&nav); | |
| 1064 numaDestroy(&nae); | |
| 1065 numaaDestroy(&naa); | |
| 1066 if (!pnaad) | |
| 1067 numaaDestroy(&naad); | |
| 1068 | |
| 1069 return baad; | |
| 1070 } | |
| 1071 | |
| 1072 | |
| 1073 /*! | |
| 1074 * \brief boxaSort2dByIndex() | |
| 1075 * | |
| 1076 * \param[in] boxas | |
| 1077 * \param[in] naa numaa that maps from the new baa to the input boxa | |
| 1078 * \return baa sorted boxaa, or NULL on error | |
| 1079 */ | |
| 1080 BOXAA * | |
| 1081 boxaSort2dByIndex(BOXA *boxas, | |
| 1082 NUMAA *naa) | |
| 1083 { | |
| 1084 l_int32 ntot, boxtot, i, j, n, nn, index; | |
| 1085 BOX *box; | |
| 1086 BOXA *boxa; | |
| 1087 BOXAA *baa; | |
| 1088 NUMA *na; | |
| 1089 | |
| 1090 if (!boxas) | |
| 1091 return (BOXAA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 1092 if ((boxtot = boxaGetCount(boxas)) == 0) | |
| 1093 return (BOXAA *)ERROR_PTR("boxas is empty", __func__, NULL); | |
| 1094 if (!naa) | |
| 1095 return (BOXAA *)ERROR_PTR("naindex not defined", __func__, NULL); | |
| 1096 | |
| 1097 /* Check counts */ | |
| 1098 ntot = numaaGetNumberCount(naa); | |
| 1099 if (ntot != boxtot) | |
| 1100 return (BOXAA *)ERROR_PTR("element count mismatch", __func__, NULL); | |
| 1101 | |
| 1102 n = numaaGetCount(naa); | |
| 1103 baa = boxaaCreate(n); | |
| 1104 for (i = 0; i < n; i++) { | |
| 1105 na = numaaGetNuma(naa, i, L_CLONE); | |
| 1106 nn = numaGetCount(na); | |
| 1107 boxa = boxaCreate(nn); | |
| 1108 for (j = 0; j < nn; j++) { | |
| 1109 numaGetIValue(na, i, &index); | |
| 1110 box = boxaGetBox(boxas, index, L_COPY); | |
| 1111 boxaAddBox(boxa, box, L_INSERT); | |
| 1112 } | |
| 1113 boxaaAddBoxa(baa, boxa, L_INSERT); | |
| 1114 numaDestroy(&na); | |
| 1115 } | |
| 1116 | |
| 1117 return baa; | |
| 1118 } | |
| 1119 | |
| 1120 | |
| 1121 /*---------------------------------------------------------------------* | |
| 1122 * Boxa array extraction * | |
| 1123 *---------------------------------------------------------------------*/ | |
| 1124 /*! | |
| 1125 * \brief boxaExtractAsNuma() | |
| 1126 * | |
| 1127 * \param[in] boxa | |
| 1128 * \param[out] pnal [optional] array of left locations | |
| 1129 * \param[out] pnat [optional] array of top locations | |
| 1130 * \param[out] pnar [optional] array of right locations | |
| 1131 * \param[out] pnab [optional] array of bottom locations | |
| 1132 * \param[out] pnaw [optional] array of widths | |
| 1133 * \param[out] pnah [optional] array of heights | |
| 1134 * \param[in] keepinvalid 1 to keep invalid boxes; 0 to remove them | |
| 1135 * \return 0 if OK, 1 on error | |
| 1136 * | |
| 1137 * <pre> | |
| 1138 * Notes: | |
| 1139 * (1) If you are counting or sorting values, such as determining | |
| 1140 * rank order, you must remove invalid boxes. | |
| 1141 * (2) If you are parametrizing the values, or doing an evaluation | |
| 1142 * where the position in the boxa sequence is important, you | |
| 1143 * must replace the invalid boxes with valid ones before | |
| 1144 * doing the extraction. This is easily done with boxaFillSequence(). | |
| 1145 * </pre> | |
| 1146 */ | |
| 1147 l_ok | |
| 1148 boxaExtractAsNuma(BOXA *boxa, | |
| 1149 NUMA **pnal, | |
| 1150 NUMA **pnat, | |
| 1151 NUMA **pnar, | |
| 1152 NUMA **pnab, | |
| 1153 NUMA **pnaw, | |
| 1154 NUMA **pnah, | |
| 1155 l_int32 keepinvalid) | |
| 1156 { | |
| 1157 l_int32 i, n, left, top, right, bot, w, h; | |
| 1158 | |
| 1159 if (!pnal && !pnat && !pnar && !pnab && !pnaw && !pnah) | |
| 1160 return ERROR_INT("no output requested", __func__, 1); | |
| 1161 if (pnal) *pnal = NULL; | |
| 1162 if (pnat) *pnat = NULL; | |
| 1163 if (pnar) *pnar = NULL; | |
| 1164 if (pnab) *pnab = NULL; | |
| 1165 if (pnaw) *pnaw = NULL; | |
| 1166 if (pnah) *pnah = NULL; | |
| 1167 if (!boxa) | |
| 1168 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1169 if (!keepinvalid && boxaGetValidCount(boxa) == 0) | |
| 1170 return ERROR_INT("no valid boxes", __func__, 1); | |
| 1171 | |
| 1172 n = boxaGetCount(boxa); | |
| 1173 if (pnal) *pnal = numaCreate(n); | |
| 1174 if (pnat) *pnat = numaCreate(n); | |
| 1175 if (pnar) *pnar = numaCreate(n); | |
| 1176 if (pnab) *pnab = numaCreate(n); | |
| 1177 if (pnaw) *pnaw = numaCreate(n); | |
| 1178 if (pnah) *pnah = numaCreate(n); | |
| 1179 for (i = 0; i < n; i++) { | |
| 1180 boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); | |
| 1181 if (!keepinvalid && (w <= 0 || h <= 0)) | |
| 1182 continue; | |
| 1183 right = left + w - 1; | |
| 1184 bot = top + h - 1; | |
| 1185 if (pnal) numaAddNumber(*pnal, left); | |
| 1186 if (pnat) numaAddNumber(*pnat, top); | |
| 1187 if (pnar) numaAddNumber(*pnar, right); | |
| 1188 if (pnab) numaAddNumber(*pnab, bot); | |
| 1189 if (pnaw) numaAddNumber(*pnaw, w); | |
| 1190 if (pnah) numaAddNumber(*pnah, h); | |
| 1191 } | |
| 1192 | |
| 1193 return 0; | |
| 1194 } | |
| 1195 | |
| 1196 | |
| 1197 /*! | |
| 1198 * \brief boxaExtractAsPta() | |
| 1199 * | |
| 1200 * \param[in] boxa | |
| 1201 * \param[out] pptal [optional] array of left locations vs. index | |
| 1202 * \param[out] pptat [optional] array of top locations vs. index | |
| 1203 * \param[out] pptar [optional] array of right locations vs. index | |
| 1204 * \param[out] pptab [optional] array of bottom locations vs. index | |
| 1205 * \param[out] pptaw [optional] array of widths vs. index | |
| 1206 * \param[out] pptah [optional] array of heights vs. index | |
| 1207 * \param[in] keepinvalid 1 to keep invalid boxes; 0 to remove them | |
| 1208 * \return 0 if OK, 1 on error | |
| 1209 * | |
| 1210 * <pre> | |
| 1211 * Notes: | |
| 1212 * (1) For most applications, such as counting, sorting, fitting | |
| 1213 * to some parametrized form, plotting or filtering in general, | |
| 1214 * you should remove the invalid boxes. Each pta saves the | |
| 1215 * box index in the x array, so replacing invalid boxes by | |
| 1216 * filling with boxaFillSequence(), which is required for | |
| 1217 * boxaExtractAsNuma(), is not necessary. | |
| 1218 * (2) If invalid boxes are retained, each one will result in | |
| 1219 * entries (typically 0) in all selected output pta. | |
| 1220 * (3) Other boxa --> pta functions are: | |
| 1221 * * boxaExtractCorners(): extracts any of the four corners as a pta. | |
| 1222 * * boxaConvertToPta(): extracts sufficient number of corners | |
| 1223 * to allow reconstruction of the original boxa from the pta. | |
| 1224 * </pre> | |
| 1225 */ | |
| 1226 l_ok | |
| 1227 boxaExtractAsPta(BOXA *boxa, | |
| 1228 PTA **pptal, | |
| 1229 PTA **pptat, | |
| 1230 PTA **pptar, | |
| 1231 PTA **pptab, | |
| 1232 PTA **pptaw, | |
| 1233 PTA **pptah, | |
| 1234 l_int32 keepinvalid) | |
| 1235 { | |
| 1236 l_int32 i, n, left, top, right, bot, w, h; | |
| 1237 | |
| 1238 if (!pptal && !pptar && !pptat && !pptab && !pptaw && !pptah) | |
| 1239 return ERROR_INT("no output requested", __func__, 1); | |
| 1240 if (pptal) *pptal = NULL; | |
| 1241 if (pptat) *pptat = NULL; | |
| 1242 if (pptar) *pptar = NULL; | |
| 1243 if (pptab) *pptab = NULL; | |
| 1244 if (pptaw) *pptaw = NULL; | |
| 1245 if (pptah) *pptah = NULL; | |
| 1246 if (!boxa) | |
| 1247 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1248 if (!keepinvalid && boxaGetValidCount(boxa) == 0) | |
| 1249 return ERROR_INT("no valid boxes", __func__, 1); | |
| 1250 | |
| 1251 n = boxaGetCount(boxa); | |
| 1252 if (pptal) *pptal = ptaCreate(n); | |
| 1253 if (pptat) *pptat = ptaCreate(n); | |
| 1254 if (pptar) *pptar = ptaCreate(n); | |
| 1255 if (pptab) *pptab = ptaCreate(n); | |
| 1256 if (pptaw) *pptaw = ptaCreate(n); | |
| 1257 if (pptah) *pptah = ptaCreate(n); | |
| 1258 for (i = 0; i < n; i++) { | |
| 1259 boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); | |
| 1260 if (!keepinvalid && (w <= 0 || h <= 0)) | |
| 1261 continue; | |
| 1262 right = left + w - 1; | |
| 1263 bot = top + h - 1; | |
| 1264 if (pptal) ptaAddPt(*pptal, i, left); | |
| 1265 if (pptat) ptaAddPt(*pptat, i, top); | |
| 1266 if (pptar) ptaAddPt(*pptar, i, right); | |
| 1267 if (pptab) ptaAddPt(*pptab, i, bot); | |
| 1268 if (pptaw) ptaAddPt(*pptaw, i, w); | |
| 1269 if (pptah) ptaAddPt(*pptah, i, h); | |
| 1270 } | |
| 1271 | |
| 1272 return 0; | |
| 1273 } | |
| 1274 | |
| 1275 | |
| 1276 /*! | |
| 1277 * \brief boxaExtractCorners() | |
| 1278 * | |
| 1279 * \param[in] boxa | |
| 1280 * \param[in] loc L_UPPER_LEFT, L_UPPER_RIGHT, L_LOWER_LEFT, | |
| 1281 * L_LOWER_RIGHT, L_BOX_CENTER | |
| 1282 * \return pta of requested coordinates, or NULL on error | |
| 1283 * | |
| 1284 * <pre> | |
| 1285 * Notes: | |
| 1286 * (1) Extracts (0,0) for invalid boxes. | |
| 1287 * (2) Other boxa --> pta functions are: | |
| 1288 * * boxaExtractAsPta(): allows extraction of any dimension | |
| 1289 * and/or side location, with each in a separate pta. | |
| 1290 * * boxaConvertToPta(): extracts sufficient number of corners | |
| 1291 * to allow reconstruction of the original boxa from the pta. | |
| 1292 * </pre> | |
| 1293 */ | |
| 1294 PTA * | |
| 1295 boxaExtractCorners(BOXA *boxa, | |
| 1296 l_int32 loc) | |
| 1297 { | |
| 1298 l_int32 i, n, left, top, right, bot, w, h; | |
| 1299 PTA *pta; | |
| 1300 | |
| 1301 if (!boxa) | |
| 1302 return (PTA *)ERROR_PTR("boxa not defined", __func__, NULL); | |
| 1303 if (loc != L_UPPER_LEFT && loc != L_UPPER_RIGHT && loc != L_LOWER_LEFT && | |
| 1304 loc != L_LOWER_RIGHT && loc != L_BOX_CENTER) | |
| 1305 return (PTA *)ERROR_PTR("invalid location", __func__, NULL); | |
| 1306 | |
| 1307 n = boxaGetCount(boxa); | |
| 1308 if ((pta = ptaCreate(n)) == NULL) | |
| 1309 return (PTA *)ERROR_PTR("pta not made", __func__, NULL); | |
| 1310 | |
| 1311 for (i = 0; i < n; i++) { | |
| 1312 boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h); | |
| 1313 right = left + w - 1; | |
| 1314 bot = top + h - 1; | |
| 1315 if (w == 0 || h == 0) { /* invalid */ | |
| 1316 left = 0; | |
| 1317 top = 0; | |
| 1318 right = 0; | |
| 1319 bot = 0; | |
| 1320 } | |
| 1321 if (loc == L_UPPER_LEFT) | |
| 1322 ptaAddPt(pta, left, top); | |
| 1323 else if (loc == L_UPPER_RIGHT) | |
| 1324 ptaAddPt(pta, right, top); | |
| 1325 else if (loc == L_LOWER_LEFT) | |
| 1326 ptaAddPt(pta, left, bot); | |
| 1327 else if (loc == L_LOWER_RIGHT) | |
| 1328 ptaAddPt(pta, right, bot); | |
| 1329 else if (loc == L_BOX_CENTER) | |
| 1330 ptaAddPt(pta, (left + right) / 2, (top + bot) / 2); | |
| 1331 } | |
| 1332 | |
| 1333 return pta; | |
| 1334 } | |
| 1335 | |
| 1336 | |
| 1337 /*---------------------------------------------------------------------* | |
| 1338 * Boxa statistics * | |
| 1339 *---------------------------------------------------------------------*/ | |
| 1340 /*! | |
| 1341 * \brief boxaGetRankVals() | |
| 1342 * | |
| 1343 * \param[in] boxa | |
| 1344 * \param[in] fract use 0.0 for smallest, 1.0 for largest width and height | |
| 1345 * \param[out] px [optional] rank value of x (left side) | |
| 1346 * \param[out] py [optional] rank value of y (top side) | |
| 1347 * \param[out] pr [optional] rank value of right side | |
| 1348 * \param[out] pb [optional] rank value of bottom side | |
| 1349 * \param[out] pw [optional] rank value of width | |
| 1350 * \param[out] ph [optional] rank value of height | |
| 1351 * \return 0 if OK, 1 on error or if the boxa is empty or has no valid boxes | |
| 1352 * | |
| 1353 * <pre> | |
| 1354 * Notes: | |
| 1355 * (1) This function does not assume that all boxes in the boxa are valid | |
| 1356 * (2) The six box parameters are sorted independently. | |
| 1357 * For rank order, the width and height are sorted in increasing | |
| 1358 * order. But what does it mean to sort x and y in "rank order"? | |
| 1359 * If the boxes are of comparable size and somewhat | |
| 1360 * aligned (e.g., from multiple images), it makes some sense | |
| 1361 * to give a "rank order" for x and y by sorting them in | |
| 1362 * decreasing order. (By the same argument, we choose to sort | |
| 1363 * the r and b sides in increasing order.) In general, the | |
| 1364 * interpretation of a rank order on x and y (or on r and b) | |
| 1365 * is highly application dependent. In summary: | |
| 1366 * ~ x and y are sorted in decreasing order | |
| 1367 * ~ r and b are sorted in increasing order | |
| 1368 * ~ w and h are sorted in increasing order | |
| 1369 * </pre> | |
| 1370 */ | |
| 1371 l_ok | |
| 1372 boxaGetRankVals(BOXA *boxa, | |
| 1373 l_float32 fract, | |
| 1374 l_int32 *px, | |
| 1375 l_int32 *py, | |
| 1376 l_int32 *pr, | |
| 1377 l_int32 *pb, | |
| 1378 l_int32 *pw, | |
| 1379 l_int32 *ph) | |
| 1380 { | |
| 1381 l_float32 xval, yval, rval, bval, wval, hval; | |
| 1382 NUMA *nax, *nay, *nar, *nab, *naw, *nah; | |
| 1383 | |
| 1384 if (px) *px = 0; | |
| 1385 if (py) *py = 0; | |
| 1386 if (pr) *pr = 0; | |
| 1387 if (pb) *pb = 0; | |
| 1388 if (pw) *pw = 0; | |
| 1389 if (ph) *ph = 0; | |
| 1390 if (!boxa) | |
| 1391 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1392 if (fract < 0.0 || fract > 1.0) | |
| 1393 return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1); | |
| 1394 if (boxaGetValidCount(boxa) == 0) | |
| 1395 return ERROR_INT("no valid boxes in boxa", __func__, 1); | |
| 1396 | |
| 1397 /* Use only the valid boxes */ | |
| 1398 boxaExtractAsNuma(boxa, &nax, &nay, &nar, &nab, &naw, &nah, 0); | |
| 1399 | |
| 1400 if (px) { | |
| 1401 numaGetRankValue(nax, 1.0 - fract, NULL, 1, &xval); | |
| 1402 *px = (l_int32)xval; | |
| 1403 } | |
| 1404 if (py) { | |
| 1405 numaGetRankValue(nay, 1.0 - fract, NULL, 1, &yval); | |
| 1406 *py = (l_int32)yval; | |
| 1407 } | |
| 1408 if (pr) { | |
| 1409 numaGetRankValue(nar, fract, NULL, 1, &rval); | |
| 1410 *pr = (l_int32)rval; | |
| 1411 } | |
| 1412 if (pb) { | |
| 1413 numaGetRankValue(nab, fract, NULL, 1, &bval); | |
| 1414 *pb = (l_int32)bval; | |
| 1415 } | |
| 1416 if (pw) { | |
| 1417 numaGetRankValue(naw, fract, NULL, 1, &wval); | |
| 1418 *pw = (l_int32)wval; | |
| 1419 } | |
| 1420 if (ph) { | |
| 1421 numaGetRankValue(nah, fract, NULL, 1, &hval); | |
| 1422 *ph = (l_int32)hval; | |
| 1423 } | |
| 1424 numaDestroy(&nax); | |
| 1425 numaDestroy(&nay); | |
| 1426 numaDestroy(&nar); | |
| 1427 numaDestroy(&nab); | |
| 1428 numaDestroy(&naw); | |
| 1429 numaDestroy(&nah); | |
| 1430 return 0; | |
| 1431 } | |
| 1432 | |
| 1433 | |
| 1434 /*! | |
| 1435 * \brief boxaGetMedianVals() | |
| 1436 * | |
| 1437 * \param[in] boxa | |
| 1438 * \param[out] px [optional] median value of x (left side) | |
| 1439 * \param[out] py [optional] median value of y (top side) | |
| 1440 * \param[out] pr [optional] median value of right side | |
| 1441 * \param[out] pb [optional] median value of bottom side | |
| 1442 * \param[out] pw [optional] median value of width | |
| 1443 * \param[out] ph [optional] median value of height | |
| 1444 * \return 0 if OK, 1 on error or if the boxa is empty or has no valid boxes | |
| 1445 * | |
| 1446 * <pre> | |
| 1447 * Notes: | |
| 1448 * (1) See boxaGetRankVals() | |
| 1449 * </pre> | |
| 1450 */ | |
| 1451 l_ok | |
| 1452 boxaGetMedianVals(BOXA *boxa, | |
| 1453 l_int32 *px, | |
| 1454 l_int32 *py, | |
| 1455 l_int32 *pr, | |
| 1456 l_int32 *pb, | |
| 1457 l_int32 *pw, | |
| 1458 l_int32 *ph) | |
| 1459 { | |
| 1460 if (!boxa) | |
| 1461 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1462 if (boxaGetValidCount(boxa) == 0) | |
| 1463 return ERROR_INT("no valid boxes in boxa", __func__, 1); | |
| 1464 | |
| 1465 return boxaGetRankVals(boxa, 0.5, px, py, pr, pb, pw, ph); | |
| 1466 } | |
| 1467 | |
| 1468 | |
| 1469 /*! | |
| 1470 * \brief boxaGetAverageSize() | |
| 1471 * | |
| 1472 * \param[in] boxa | |
| 1473 * \param[out] pw [optional] average width | |
| 1474 * \param[out] ph [optional] average height | |
| 1475 * \return 0 if OK, 1 on error or if the boxa is empty | |
| 1476 */ | |
| 1477 l_ok | |
| 1478 boxaGetAverageSize(BOXA *boxa, | |
| 1479 l_float32 *pw, | |
| 1480 l_float32 *ph) | |
| 1481 { | |
| 1482 l_int32 i, n, bw, bh; | |
| 1483 l_float32 sumw, sumh; | |
| 1484 | |
| 1485 if (pw) *pw = 0.0; | |
| 1486 if (ph) *ph = 0.0; | |
| 1487 if (!boxa) | |
| 1488 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1489 if ((n = boxaGetCount(boxa)) == 0) | |
| 1490 return ERROR_INT("boxa is empty", __func__, 1); | |
| 1491 | |
| 1492 sumw = sumh = 0.0; | |
| 1493 for (i = 0; i < n; i++) { | |
| 1494 boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh); | |
| 1495 sumw += bw; | |
| 1496 sumh += bh; | |
| 1497 } | |
| 1498 | |
| 1499 if (pw) *pw = sumw / n; | |
| 1500 if (ph) *ph = sumh / n; | |
| 1501 return 0; | |
| 1502 } | |
| 1503 | |
| 1504 | |
| 1505 /*---------------------------------------------------------------------* | |
| 1506 * Other Boxaa functions * | |
| 1507 *---------------------------------------------------------------------*/ | |
| 1508 /*! | |
| 1509 * \brief boxaaGetExtent() | |
| 1510 * | |
| 1511 * \param[in] baa | |
| 1512 * \param[out] pw [optional] width | |
| 1513 * \param[out] ph [optional] height | |
| 1514 * \param[out] pbox [optional] minimum box containing all boxa | |
| 1515 * in boxaa | |
| 1516 * \param[out] pboxa [optional] boxa containing all boxes in each | |
| 1517 * boxa in the boxaa | |
| 1518 * \return 0 if OK, 1 on error | |
| 1519 * | |
| 1520 * <pre> | |
| 1521 * Notes: | |
| 1522 * (1) The returned w and h are the minimum size image | |
| 1523 * that would contain all boxes untranslated. | |
| 1524 * (2) Each box in the returned boxa is the minimum box required to | |
| 1525 * hold all the boxes in the respective boxa of baa. | |
| 1526 * (3) If there are no valid boxes in a boxa, the box corresponding | |
| 1527 * to its extent has all fields set to 0 (an invalid box). | |
| 1528 * </pre> | |
| 1529 */ | |
| 1530 l_ok | |
| 1531 boxaaGetExtent(BOXAA *baa, | |
| 1532 l_int32 *pw, | |
| 1533 l_int32 *ph, | |
| 1534 BOX **pbox, | |
| 1535 BOXA **pboxa) | |
| 1536 { | |
| 1537 l_int32 i, n, x, y, w, h, xmax, ymax, xmin, ymin, found; | |
| 1538 BOX *box1; | |
| 1539 BOXA *boxa, *boxa1; | |
| 1540 | |
| 1541 if (!pw && !ph && !pbox && !pboxa) | |
| 1542 return ERROR_INT("no ptrs defined", __func__, 1); | |
| 1543 if (pw) *pw = 0; | |
| 1544 if (ph) *ph = 0; | |
| 1545 if (pbox) *pbox = NULL; | |
| 1546 if (pboxa) *pboxa = NULL; | |
| 1547 if (!baa) | |
| 1548 return ERROR_INT("baa not defined", __func__, 1); | |
| 1549 | |
| 1550 n = boxaaGetCount(baa); | |
| 1551 if (n == 0) | |
| 1552 return ERROR_INT("no boxa in baa", __func__, 1); | |
| 1553 | |
| 1554 boxa = boxaCreate(n); | |
| 1555 xmax = ymax = 0; | |
| 1556 xmin = ymin = 100000000; | |
| 1557 found = FALSE; | |
| 1558 for (i = 0; i < n; i++) { | |
| 1559 boxa1 = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1560 boxaGetExtent(boxa1, NULL, NULL, &box1); | |
| 1561 boxaDestroy(&boxa1); | |
| 1562 boxGetGeometry(box1, &x, &y, &w, &h); | |
| 1563 if (w > 0 && h > 0) { /* a valid extent box */ | |
| 1564 found = TRUE; /* found at least one valid extent box */ | |
| 1565 xmin = L_MIN(xmin, x); | |
| 1566 ymin = L_MIN(ymin, y); | |
| 1567 xmax = L_MAX(xmax, x + w); | |
| 1568 ymax = L_MAX(ymax, y + h); | |
| 1569 } | |
| 1570 boxaAddBox(boxa, box1, L_INSERT); | |
| 1571 } | |
| 1572 if (found == FALSE) /* no valid extent boxes */ | |
| 1573 xmin = ymin = 0; | |
| 1574 | |
| 1575 if (pw) *pw = xmax; | |
| 1576 if (ph) *ph = ymax; | |
| 1577 if (pbox) | |
| 1578 *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin); | |
| 1579 if (pboxa) | |
| 1580 *pboxa = boxa; | |
| 1581 else | |
| 1582 boxaDestroy(&boxa); | |
| 1583 return 0; | |
| 1584 } | |
| 1585 | |
| 1586 | |
| 1587 /*! | |
| 1588 * \brief boxaaFlattenToBoxa() | |
| 1589 * | |
| 1590 * \param[in] baa | |
| 1591 * \param[out] pnaindex [optional] the boxa index in the baa | |
| 1592 * \param[in] copyflag L_COPY or L_CLONE | |
| 1593 * \return boxa, or NULL on error | |
| 1594 * | |
| 1595 * <pre> | |
| 1596 * Notes: | |
| 1597 * (1) This 'flattens' the baa to a boxa, taking the boxes in | |
| 1598 * order in the first boxa, then the second, etc. | |
| 1599 * (2) If a boxa is empty, we generate an invalid, placeholder box | |
| 1600 * of zero size. This is useful when converting from a baa | |
| 1601 * where each boxa has either 0 or 1 boxes, and it is necessary | |
| 1602 * to maintain a 1:1 correspondence between the initial | |
| 1603 * boxa array and the resulting box array. | |
| 1604 * (3) If &naindex is defined, we generate a Numa that gives, for | |
| 1605 * each box in the baa, the index of the boxa to which it belongs. | |
| 1606 * </pre> | |
| 1607 */ | |
| 1608 BOXA * | |
| 1609 boxaaFlattenToBoxa(BOXAA *baa, | |
| 1610 NUMA **pnaindex, | |
| 1611 l_int32 copyflag) | |
| 1612 { | |
| 1613 l_int32 i, j, m, n; | |
| 1614 BOXA *boxa, *boxat; | |
| 1615 BOX *box; | |
| 1616 NUMA *naindex = NULL; | |
| 1617 | |
| 1618 if (pnaindex) *pnaindex = NULL; | |
| 1619 if (!baa) | |
| 1620 return (BOXA *)ERROR_PTR("baa not defined", __func__, NULL); | |
| 1621 if (copyflag != L_COPY && copyflag != L_CLONE) | |
| 1622 return (BOXA *)ERROR_PTR("invalid copyflag", __func__, NULL); | |
| 1623 if (pnaindex) { | |
| 1624 naindex = numaCreate(0); | |
| 1625 *pnaindex = naindex; | |
| 1626 } | |
| 1627 | |
| 1628 n = boxaaGetCount(baa); | |
| 1629 boxa = boxaCreate(n); | |
| 1630 for (i = 0; i < n; i++) { | |
| 1631 boxat = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1632 m = boxaGetCount(boxat); | |
| 1633 if (m == 0) { /* placeholder box */ | |
| 1634 box = boxCreate(0, 0, 0, 0); | |
| 1635 boxaAddBox(boxa, box, L_INSERT); | |
| 1636 if (pnaindex) | |
| 1637 numaAddNumber(naindex, i); /* save 'row' number */ | |
| 1638 } else { | |
| 1639 for (j = 0; j < m; j++) { | |
| 1640 box = boxaGetBox(boxat, j, copyflag); | |
| 1641 boxaAddBox(boxa, box, L_INSERT); | |
| 1642 if (pnaindex) | |
| 1643 numaAddNumber(naindex, i); /* save 'row' number */ | |
| 1644 } | |
| 1645 } | |
| 1646 boxaDestroy(&boxat); | |
| 1647 } | |
| 1648 | |
| 1649 return boxa; | |
| 1650 } | |
| 1651 | |
| 1652 | |
| 1653 /*! | |
| 1654 * \brief boxaaFlattenAligned() | |
| 1655 * | |
| 1656 * \param[in] baa | |
| 1657 * \param[in] num number extracted from each | |
| 1658 * \param[in] fillerbox [optional] that fills if necessary | |
| 1659 * \param[in] copyflag L_COPY or L_CLONE | |
| 1660 * \return boxa, or NULL on error | |
| 1661 * | |
| 1662 * <pre> | |
| 1663 * Notes: | |
| 1664 * (1) This 'flattens' the baa to a boxa, taking the first %num | |
| 1665 * boxes from each boxa. | |
| 1666 * (2) In each boxa, if there are less than %num boxes, we preserve | |
| 1667 * the alignment between the input baa and the output boxa | |
| 1668 * by inserting one or more fillerbox(es) or, if %fillerbox == NULL, | |
| 1669 * one or more invalid placeholder boxes. | |
| 1670 * </pre> | |
| 1671 */ | |
| 1672 BOXA * | |
| 1673 boxaaFlattenAligned(BOXAA *baa, | |
| 1674 l_int32 num, | |
| 1675 BOX *fillerbox, | |
| 1676 l_int32 copyflag) | |
| 1677 { | |
| 1678 l_int32 i, j, m, n, mval, nshort; | |
| 1679 BOXA *boxat, *boxad; | |
| 1680 BOX *box; | |
| 1681 | |
| 1682 if (!baa) | |
| 1683 return (BOXA *)ERROR_PTR("baa not defined", __func__, NULL); | |
| 1684 if (copyflag != L_COPY && copyflag != L_CLONE) | |
| 1685 return (BOXA *)ERROR_PTR("invalid copyflag", __func__, NULL); | |
| 1686 | |
| 1687 n = boxaaGetCount(baa); | |
| 1688 boxad = boxaCreate(n); | |
| 1689 for (i = 0; i < n; i++) { | |
| 1690 boxat = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1691 m = boxaGetCount(boxat); | |
| 1692 mval = L_MIN(m, num); | |
| 1693 nshort = num - mval; | |
| 1694 for (j = 0; j < mval; j++) { /* take the first %num if possible */ | |
| 1695 box = boxaGetBox(boxat, j, copyflag); | |
| 1696 boxaAddBox(boxad, box, L_INSERT); | |
| 1697 } | |
| 1698 for (j = 0; j < nshort; j++) { /* add fillers if necessary */ | |
| 1699 if (fillerbox) { | |
| 1700 boxaAddBox(boxad, fillerbox, L_COPY); | |
| 1701 } else { | |
| 1702 box = boxCreate(0, 0, 0, 0); /* invalid placeholder box */ | |
| 1703 boxaAddBox(boxad, box, L_INSERT); | |
| 1704 } | |
| 1705 } | |
| 1706 boxaDestroy(&boxat); | |
| 1707 } | |
| 1708 | |
| 1709 return boxad; | |
| 1710 } | |
| 1711 | |
| 1712 | |
| 1713 /*! | |
| 1714 * \brief boxaEncapsulateAligned() | |
| 1715 * | |
| 1716 * \param[in] boxa | |
| 1717 * \param[in] num number put into each boxa in the baa | |
| 1718 * \param[in] copyflag L_COPY or L_CLONE | |
| 1719 * \return baa, or NULL on error | |
| 1720 * | |
| 1721 * <pre> | |
| 1722 * Notes: | |
| 1723 * (1) This puts %num boxes from the input %boxa into each of a | |
| 1724 * set of boxa within an output baa. | |
| 1725 * (2) This assumes that the boxes in %boxa are in sets of %num each. | |
| 1726 * </pre> | |
| 1727 */ | |
| 1728 BOXAA * | |
| 1729 boxaEncapsulateAligned(BOXA *boxa, | |
| 1730 l_int32 num, | |
| 1731 l_int32 copyflag) | |
| 1732 { | |
| 1733 l_int32 i, j, n, nbaa, index; | |
| 1734 BOX *box; | |
| 1735 BOXA *boxat; | |
| 1736 BOXAA *baa; | |
| 1737 | |
| 1738 if (!boxa) | |
| 1739 return (BOXAA *)ERROR_PTR("boxa not defined", __func__, NULL); | |
| 1740 if (copyflag != L_COPY && copyflag != L_CLONE) | |
| 1741 return (BOXAA *)ERROR_PTR("invalid copyflag", __func__, NULL); | |
| 1742 | |
| 1743 n = boxaGetCount(boxa); | |
| 1744 nbaa = n / num; | |
| 1745 if (num * nbaa != n) | |
| 1746 L_ERROR("inconsistent alignment: num doesn't divide n\n", __func__); | |
| 1747 baa = boxaaCreate(nbaa); | |
| 1748 for (i = 0, index = 0; i < nbaa; i++) { | |
| 1749 boxat = boxaCreate(num); | |
| 1750 for (j = 0; j < num; j++, index++) { | |
| 1751 box = boxaGetBox(boxa, index, copyflag); | |
| 1752 boxaAddBox(boxat, box, L_INSERT); | |
| 1753 } | |
| 1754 boxaaAddBoxa(baa, boxat, L_INSERT); | |
| 1755 } | |
| 1756 | |
| 1757 return baa; | |
| 1758 } | |
| 1759 | |
| 1760 | |
| 1761 /*! | |
| 1762 * \brief boxaaTranspose() | |
| 1763 * | |
| 1764 * \param[in] baas | |
| 1765 * \return baad, or NULL on error | |
| 1766 * | |
| 1767 * <pre> | |
| 1768 * Notes: | |
| 1769 * (1) If you think of a boxaa as a 2D array of boxes that is accessed | |
| 1770 * row major, then each row is represented by one of the boxa. | |
| 1771 * This function creates a new boxaa related to the input boxaa | |
| 1772 * as a column major traversal of the input boxaa. | |
| 1773 * (2) For example, if %baas has 2 boxa, each with 10 boxes, then | |
| 1774 * %baad will have 10 boxa, each with 2 boxes. | |
| 1775 * (3) Require for this transpose operation that each boxa in | |
| 1776 * %baas has the same number of boxes. This operation is useful | |
| 1777 * when the i-th boxes in each boxa are meaningfully related. | |
| 1778 * </pre> | |
| 1779 */ | |
| 1780 BOXAA * | |
| 1781 boxaaTranspose(BOXAA *baas) | |
| 1782 { | |
| 1783 l_int32 i, j, ny, nb, nbox; | |
| 1784 BOX *box; | |
| 1785 BOXA *boxa; | |
| 1786 BOXAA *baad; | |
| 1787 | |
| 1788 if (!baas) | |
| 1789 return (BOXAA *)ERROR_PTR("baas not defined", __func__, NULL); | |
| 1790 if ((ny = boxaaGetCount(baas)) == 0) | |
| 1791 return (BOXAA *)ERROR_PTR("baas empty", __func__, NULL); | |
| 1792 | |
| 1793 /* Make sure that each boxa in baas has the same number of boxes */ | |
| 1794 for (i = 0; i < ny; i++) { | |
| 1795 if ((boxa = boxaaGetBoxa(baas, i, L_CLONE)) == NULL) | |
| 1796 return (BOXAA *)ERROR_PTR("baas is missing a boxa", __func__, NULL); | |
| 1797 nb = boxaGetCount(boxa); | |
| 1798 boxaDestroy(&boxa); | |
| 1799 if (i == 0) | |
| 1800 nbox = nb; | |
| 1801 else if (nb != nbox) | |
| 1802 return (BOXAA *)ERROR_PTR("boxa are not all the same size", | |
| 1803 __func__, NULL); | |
| 1804 } | |
| 1805 | |
| 1806 /* baad[i][j] = baas[j][i] */ | |
| 1807 baad = boxaaCreate(nbox); | |
| 1808 for (i = 0; i < nbox; i++) { | |
| 1809 boxa = boxaCreate(ny); | |
| 1810 for (j = 0; j < ny; j++) { | |
| 1811 box = boxaaGetBox(baas, j, i, L_COPY); | |
| 1812 boxaAddBox(boxa, box, L_INSERT); | |
| 1813 } | |
| 1814 boxaaAddBoxa(baad, boxa, L_INSERT); | |
| 1815 } | |
| 1816 return baad; | |
| 1817 } | |
| 1818 | |
| 1819 | |
| 1820 /*! | |
| 1821 * \brief boxaaAlignBox() | |
| 1822 * | |
| 1823 * \param[in] baa | |
| 1824 * \param[in] box to be aligned with bext boxa in the baa, if possible | |
| 1825 * \param[in] delta amount by which consecutive components can miss | |
| 1826 * in overlap and still be included in the array | |
| 1827 * \param[out] pindex index of boxa with best overlap, or if none match, | |
| 1828 * this is the index of the next boxa to be generated | |
| 1829 * \return 0 if OK, 1 on error | |
| 1830 * | |
| 1831 * <pre> | |
| 1832 * Notes: | |
| 1833 * (1) This is not greedy. It finds the boxa whose vertical | |
| 1834 * extent has the closest overlap with the input box. | |
| 1835 * </pre> | |
| 1836 */ | |
| 1837 l_ok | |
| 1838 boxaaAlignBox(BOXAA *baa, | |
| 1839 BOX *box, | |
| 1840 l_int32 delta, | |
| 1841 l_int32 *pindex) | |
| 1842 { | |
| 1843 l_int32 i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex; | |
| 1844 BOX *boxt; | |
| 1845 BOXA *boxa; | |
| 1846 | |
| 1847 if (pindex) *pindex = 0; | |
| 1848 if (!baa) | |
| 1849 return ERROR_INT("baa not defined", __func__, 1); | |
| 1850 if (!box) | |
| 1851 return ERROR_INT("box not defined", __func__, 1); | |
| 1852 if (!pindex) | |
| 1853 return ERROR_INT("&index not defined", __func__, 1); | |
| 1854 | |
| 1855 n = boxaaGetCount(baa); | |
| 1856 boxGetGeometry(box, NULL, &y, NULL, &h); | |
| 1857 maxovlp = -10000000; | |
| 1858 for (i = 0; i < n; i++) { | |
| 1859 boxa = boxaaGetBoxa(baa, i, L_CLONE); | |
| 1860 if ((m = boxaGetCount(boxa)) == 0) { | |
| 1861 boxaDestroy(&boxa); | |
| 1862 L_WARNING("no boxes in boxa\n", __func__); | |
| 1863 continue; | |
| 1864 } | |
| 1865 boxaGetExtent(boxa, NULL, NULL, &boxt); | |
| 1866 boxGetGeometry(boxt, NULL, &yt, NULL, &ht); | |
| 1867 boxDestroy(&boxt); | |
| 1868 boxaDestroy(&boxa); | |
| 1869 | |
| 1870 /* Overlap < 0 means the components do not overlap vertically */ | |
| 1871 if (yt >= y) | |
| 1872 ovlp = y + h - 1 - yt; | |
| 1873 else | |
| 1874 ovlp = yt + ht - 1 - y; | |
| 1875 if (ovlp > maxovlp) { | |
| 1876 maxovlp = ovlp; | |
| 1877 maxindex = i; | |
| 1878 } | |
| 1879 } | |
| 1880 | |
| 1881 if (maxovlp + delta >= 0) | |
| 1882 *pindex = maxindex; | |
| 1883 else | |
| 1884 *pindex = n; | |
| 1885 return 0; | |
| 1886 } |
