Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/boxfunc5.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file boxfunc5.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Boxa sequence fitting | |
| 32 * BOXA *boxaSmoothSequenceMedian() | |
| 33 * BOXA *boxaWindowedMedian() | |
| 34 * BOXA *boxaModifyWithBoxa() | |
| 35 * BOXA *boxaReconcilePairWidth() | |
| 36 * l_int32 boxaSizeConsistency() | |
| 37 * BOXA *boxaReconcileAllByMedian() | |
| 38 * BOXA *boxaReconcileSidesByMedian() | |
| 39 * static void adjustSidePlotName() -- debug | |
| 40 * BOXA *boxaReconcileSizeByMedian() | |
| 41 * l_int32 boxaPlotSides() [for debugging] | |
| 42 * l_int32 boxaPlotSizes() [for debugging] | |
| 43 * BOXA *boxaFillSequence() | |
| 44 * static l_int32 boxaFillAll() | |
| 45 * l_int32 boxaSizeVariation() | |
| 46 * l_int32 boxaMedianDimensions() | |
| 47 * </pre> | |
| 48 */ | |
| 49 | |
| 50 #ifdef HAVE_CONFIG_H | |
| 51 #include <config_auto.h> | |
| 52 #endif /* HAVE_CONFIG_H */ | |
| 53 | |
| 54 #include <math.h> | |
| 55 #include "allheaders.h" | |
| 56 | |
| 57 static l_int32 boxaFillAll(BOXA *boxa); | |
| 58 static void adjustSidePlotName(char *buf, size_t size, const char *preface, | |
| 59 l_int32 select); | |
| 60 | |
| 61 /*---------------------------------------------------------------------* | |
| 62 * Boxa sequence fitting * | |
| 63 *---------------------------------------------------------------------*/ | |
| 64 /*! | |
| 65 * \brief boxaSmoothSequenceMedian() | |
| 66 * | |
| 67 * \param[in] boxas source boxa | |
| 68 * \param[in] halfwin half-width of sliding window; used to find median | |
| 69 * \param[in] subflag L_USE_MINSIZE, L_USE_MAXSIZE, | |
| 70 * L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, | |
| 71 * L_USE_CAPPED_MIN, L_USE_CAPPED_MAX | |
| 72 * \param[in] maxdiff parameter used with L_SUB_ON_LOC_DIFF, | |
| 73 * L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, | |
| 74 * L_USE_CAPPED_MAX | |
| 75 * \param[in] extrapixels pixels added on all sides (or subtracted | |
| 76 * if %extrapixels < 0) when using | |
| 77 * L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF | |
| 78 * \param[in] debug 1 for debug output | |
| 79 * \return boxad fitted boxa, or NULL on error | |
| 80 * | |
| 81 * <pre> | |
| 82 * Notes: | |
| 83 * (1) The target width of the sliding window is 2 * %halfwin + 1. | |
| 84 * If necessary, this will be reduced by boxaWindowedMedian(). | |
| 85 * (2) This returns a modified version of %boxas by constructing | |
| 86 * for each input box a box that has been smoothed with windowed | |
| 87 * median filtering. The filtering is done to each of the | |
| 88 * box sides independently, and it is computed separately for | |
| 89 * sequences of even and odd boxes. The output %boxad is | |
| 90 * constructed from the input boxa and the filtered boxa, | |
| 91 * depending on %subflag. See boxaModifyWithBoxa() for | |
| 92 * details on the use of %subflag, %maxdiff and %extrapixels. | |
| 93 * (3) This is useful for removing noise separately in the even | |
| 94 * and odd sets, where the box edge locations can have | |
| 95 * discontinuities but otherwise vary roughly linearly within | |
| 96 * intervals of size %halfwin or larger. | |
| 97 * (4) If you don't need to handle even and odd sets separately, | |
| 98 * just do this: | |
| 99 * boxam = boxaWindowedMedian(boxas, halfwin, debug); | |
| 100 * boxad = boxaModifyWithBoxa(boxas, boxam, subflag, maxdiff, | |
| 101 * extrapixels); | |
| 102 * boxaDestroy(&boxam); | |
| 103 * </pre> | |
| 104 */ | |
| 105 BOXA * | |
| 106 boxaSmoothSequenceMedian(BOXA *boxas, | |
| 107 l_int32 halfwin, | |
| 108 l_int32 subflag, | |
| 109 l_int32 maxdiff, | |
| 110 l_int32 extrapixels, | |
| 111 l_int32 debug) | |
| 112 { | |
| 113 l_int32 n; | |
| 114 BOXA *boxae, *boxao, *boxamede, *boxamedo, *boxame, *boxamo, *boxad; | |
| 115 PIX *pix1; | |
| 116 | |
| 117 if (!boxas) | |
| 118 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 119 if (halfwin <= 0) { | |
| 120 L_WARNING("halfwin must be > 0; returning copy\n", __func__); | |
| 121 return boxaCopy(boxas, L_COPY); | |
| 122 } | |
| 123 if (maxdiff < 0) { | |
| 124 L_WARNING("maxdiff must be >= 0; returning copy\n", __func__); | |
| 125 return boxaCopy(boxas, L_COPY); | |
| 126 } | |
| 127 if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE && | |
| 128 subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF && | |
| 129 subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) { | |
| 130 L_WARNING("invalid subflag; returning copy\n", __func__); | |
| 131 return boxaCopy(boxas, L_COPY); | |
| 132 } | |
| 133 if ((n = boxaGetCount(boxas)) < 6) { | |
| 134 L_WARNING("need at least 6 boxes; returning copy\n", __func__); | |
| 135 return boxaCopy(boxas, L_COPY); | |
| 136 } | |
| 137 | |
| 138 boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); | |
| 139 if (debug) { | |
| 140 lept_mkdir("lept/smooth"); | |
| 141 boxaWriteDebug("/tmp/lept/smooth/boxae.ba", boxae); | |
| 142 boxaWriteDebug("/tmp/lept/smooth/boxao.ba", boxao); | |
| 143 } | |
| 144 | |
| 145 boxamede = boxaWindowedMedian(boxae, halfwin, debug); | |
| 146 boxamedo = boxaWindowedMedian(boxao, halfwin, debug); | |
| 147 if (debug) { | |
| 148 boxaWriteDebug("/tmp/lept/smooth/boxamede.ba", boxamede); | |
| 149 boxaWriteDebug("/tmp/lept/smooth/boxamedo.ba", boxamedo); | |
| 150 } | |
| 151 | |
| 152 boxame = boxaModifyWithBoxa(boxae, boxamede, subflag, maxdiff, extrapixels); | |
| 153 boxamo = boxaModifyWithBoxa(boxao, boxamedo, subflag, maxdiff, extrapixels); | |
| 154 if (debug) { | |
| 155 boxaWriteDebug("/tmp/lept/smooth/boxame.ba", boxame); | |
| 156 boxaWriteDebug("/tmp/lept/smooth/boxamo.ba", boxamo); | |
| 157 } | |
| 158 | |
| 159 boxad = boxaMergeEvenOdd(boxame, boxamo, 0); | |
| 160 if (debug) { | |
| 161 boxaPlotSides(boxas, NULL, NULL, NULL, NULL, NULL, &pix1); | |
| 162 pixWrite("/tmp/lept/smooth/plotsides1.png", pix1, IFF_PNG); | |
| 163 pixDestroy(&pix1); | |
| 164 boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1); | |
| 165 pixWrite("/tmp/lept/smooth/plotsides2.png", pix1, IFF_PNG); | |
| 166 pixDestroy(&pix1); | |
| 167 boxaPlotSizes(boxas, NULL, NULL, NULL, &pix1); | |
| 168 pixWrite("/tmp/lept/smooth/plotsizes1.png", pix1, IFF_PNG); | |
| 169 pixDestroy(&pix1); | |
| 170 boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1); | |
| 171 pixWrite("/tmp/lept/smooth/plotsizes2.png", pix1, IFF_PNG); | |
| 172 pixDestroy(&pix1); | |
| 173 } | |
| 174 | |
| 175 boxaDestroy(&boxae); | |
| 176 boxaDestroy(&boxao); | |
| 177 boxaDestroy(&boxamede); | |
| 178 boxaDestroy(&boxamedo); | |
| 179 boxaDestroy(&boxame); | |
| 180 boxaDestroy(&boxamo); | |
| 181 return boxad; | |
| 182 } | |
| 183 | |
| 184 | |
| 185 /*! | |
| 186 * \brief boxaWindowedMedian() | |
| 187 * | |
| 188 * \param[in] boxas source boxa | |
| 189 * \param[in] halfwin half width of window over which the median is found | |
| 190 * \param[in] debug 1 for debug output | |
| 191 * \return boxad smoothed boxa, or NULL on error | |
| 192 * | |
| 193 * <pre> | |
| 194 * Notes: | |
| 195 * (1) This finds a set of boxes (boxad) where each edge of each box is | |
| 196 * a windowed median smoothed value to the edges of the | |
| 197 * input set of boxes (boxas). | |
| 198 * (2) Invalid input boxes are filled from nearby ones. | |
| 199 * (3) The returned boxad can then be used in boxaModifyWithBoxa() | |
| 200 * to selectively change the boxes in the source boxa. | |
| 201 * </pre> | |
| 202 */ | |
| 203 BOXA * | |
| 204 boxaWindowedMedian(BOXA *boxas, | |
| 205 l_int32 halfwin, | |
| 206 l_int32 debug) | |
| 207 { | |
| 208 l_int32 n, i, left, top, right, bot; | |
| 209 BOX *box; | |
| 210 BOXA *boxaf, *boxad; | |
| 211 NUMA *nal, *nat, *nar, *nab, *naml, *namt, *namr, *namb; | |
| 212 PIX *pix1; | |
| 213 | |
| 214 if (!boxas) | |
| 215 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 216 if ((n = boxaGetCount(boxas)) < 3) { | |
| 217 L_WARNING("less than 3 boxes; returning a copy\n", __func__); | |
| 218 return boxaCopy(boxas, L_COPY); | |
| 219 } | |
| 220 if (halfwin <= 0) { | |
| 221 L_WARNING("halfwin must be > 0; returning copy\n", __func__); | |
| 222 return boxaCopy(boxas, L_COPY); | |
| 223 } | |
| 224 | |
| 225 /* Fill invalid boxes in the input sequence */ | |
| 226 if ((boxaf = boxaFillSequence(boxas, L_USE_ALL_BOXES, debug)) == NULL) | |
| 227 return (BOXA *)ERROR_PTR("filled boxa not made", __func__, NULL); | |
| 228 | |
| 229 /* Get the windowed median output from each of the sides */ | |
| 230 boxaExtractAsNuma(boxaf, &nal, &nat, &nar, &nab, NULL, NULL, 0); | |
| 231 naml = numaWindowedMedian(nal, halfwin); | |
| 232 namt = numaWindowedMedian(nat, halfwin); | |
| 233 namr = numaWindowedMedian(nar, halfwin); | |
| 234 namb = numaWindowedMedian(nab, halfwin); | |
| 235 | |
| 236 n = boxaGetCount(boxaf); | |
| 237 boxad = boxaCreate(n); | |
| 238 for (i = 0; i < n; i++) { | |
| 239 numaGetIValue(naml, i, &left); | |
| 240 numaGetIValue(namt, i, &top); | |
| 241 numaGetIValue(namr, i, &right); | |
| 242 numaGetIValue(namb, i, &bot); | |
| 243 box = boxCreate(left, top, right - left + 1, bot - top + 1); | |
| 244 boxaAddBox(boxad, box, L_INSERT); | |
| 245 } | |
| 246 | |
| 247 if (debug) { | |
| 248 lept_mkdir("lept/windowed"); | |
| 249 boxaPlotSides(boxaf, NULL, NULL, NULL, NULL, NULL, &pix1); | |
| 250 pixWrite("/tmp/lept/windowed/plotsides1.png", pix1, IFF_PNG); | |
| 251 pixDestroy(&pix1); | |
| 252 boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1); | |
| 253 pixWrite("/tmp/lept/windowed/plotsides2.png", pix1, IFF_PNG); | |
| 254 pixDestroy(&pix1); | |
| 255 boxaPlotSizes(boxaf, NULL, NULL, NULL, &pix1); | |
| 256 pixWrite("/tmp/lept/windowed/plotsizes1.png", pix1, IFF_PNG); | |
| 257 pixDestroy(&pix1); | |
| 258 boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1); | |
| 259 pixWrite("/tmp/lept/windowed/plotsizes2.png", pix1, IFF_PNG); | |
| 260 pixDestroy(&pix1); | |
| 261 } | |
| 262 | |
| 263 boxaDestroy(&boxaf); | |
| 264 numaDestroy(&nal); | |
| 265 numaDestroy(&nat); | |
| 266 numaDestroy(&nar); | |
| 267 numaDestroy(&nab); | |
| 268 numaDestroy(&naml); | |
| 269 numaDestroy(&namt); | |
| 270 numaDestroy(&namr); | |
| 271 numaDestroy(&namb); | |
| 272 return boxad; | |
| 273 } | |
| 274 | |
| 275 | |
| 276 /*! | |
| 277 * \brief boxaModifyWithBoxa() | |
| 278 * | |
| 279 * \param[in] boxas | |
| 280 * \param[in] boxam boxa with boxes used to modify those in boxas | |
| 281 * \param[in] subflag L_USE_MINSIZE, L_USE_MAXSIZE, | |
| 282 * L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF, | |
| 283 * L_USE_CAPPED_MIN, L_USE_CAPPED_MAX | |
| 284 * \param[in] maxdiff parameter used with L_SUB_ON_LOC_DIFF, | |
| 285 * L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN, | |
| 286 * L_USE_CAPPED_MAX | |
| 287 * \param[in] extrapixels pixels added on all sides (or subtracted | |
| 288 * if %extrapixels < 0) when using | |
| 289 * L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF | |
| 290 * \return boxad result after adjusting boxes in boxas, or NULL on error. | |
| 291 * | |
| 292 * <pre> | |
| 293 * Notes: | |
| 294 * (1) This takes two input boxa (boxas, boxam) and constructs boxad, | |
| 295 * where each box in boxad is generated from the corresponding | |
| 296 * boxes in boxas and boxam. The rule for constructing each | |
| 297 * output box depends on %subflag and %maxdiff. Let boxs be | |
| 298 * a box from %boxas and boxm be a box from %boxam. | |
| 299 * * If %subflag == L_USE_MINSIZE: the output box is the intersection | |
| 300 * of the two input boxes. | |
| 301 * * If %subflag == L_USE_MAXSIZE: the output box is the union of the | |
| 302 * two input boxes; i.e., the minimum bounding rectangle for the | |
| 303 * two input boxes. | |
| 304 * * If %subflag == L_SUB_ON_LOC_DIFF: each side of the output box | |
| 305 * is found separately from the corresponding side of boxs and boxm. | |
| 306 * Use the boxm side, expanded by %extrapixels, if greater than | |
| 307 * %maxdiff pixels from the boxs side. | |
| 308 * * If %subflag == L_SUB_ON_SIZE_DIFF: the sides of the output box | |
| 309 * are determined in pairs from the width and height of boxs | |
| 310 * and boxm. If the boxm width differs by more than %maxdiff | |
| 311 * pixels from boxs, use the boxm left and right sides, | |
| 312 * expanded by %extrapixels. Ditto for the height difference. | |
| 313 * For the last two flags, each side of the output box is found | |
| 314 * separately from the corresponding side of boxs and boxm, | |
| 315 * according to these rules, where "smaller"("bigger") mean in a | |
| 316 * direction that decreases(increases) the size of the output box: | |
| 317 * * If %subflag == L_USE_CAPPED_MIN: use the Min of boxm | |
| 318 * with the Max of (boxs, boxm +- %maxdiff), where the sign | |
| 319 * is adjusted to make the box smaller (e.g., use "+" on left side). | |
| 320 * * If %subflag == L_USE_CAPPED_MAX: use the Max of boxm | |
| 321 * with the Min of (boxs, boxm +- %maxdiff), where the sign | |
| 322 * is adjusted to make the box bigger (e.g., use "-" on left side). | |
| 323 * Use of the last 2 flags is further explained in (3) and (4). | |
| 324 * (2) boxas and boxam must be the same size. If boxam == NULL, | |
| 325 * this returns a copy of boxas with a warning. | |
| 326 * (3) If %subflag == L_SUB_ON_LOC_DIFF, use boxm for each side | |
| 327 * where the corresponding sides differ by more than %maxdiff. | |
| 328 * Two extreme cases: | |
| 329 * (a) set %maxdiff == 0 to use only values from boxam in boxad. | |
| 330 * (b) set %maxdiff == 10000 to ignore all values from boxam; | |
| 331 * then boxad will be the same as boxas. | |
| 332 * (4) If %subflag == L_USE_CAPPED_MAX: use boxm if boxs is smaller; | |
| 333 * use boxs if boxs is bigger than boxm by an amount up to %maxdiff; | |
| 334 * and use boxm +- %maxdiff (the 'capped' value) if boxs is | |
| 335 * bigger than boxm by an amount larger than %maxdiff. | |
| 336 * Similarly, with interchange of Min/Max and sign of %maxdiff, | |
| 337 * for %subflag == L_USE_CAPPED_MIN. | |
| 338 * (5) If either of corresponding boxes in boxas and boxam is invalid, | |
| 339 * an invalid box is copied to the result. | |
| 340 * (6) Typical input for boxam may be the output of boxaLinearFit(). | |
| 341 * where outliers have been removed and each side is LS fit to a line. | |
| 342 * (7) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(), | |
| 343 * this uses two boxes and does not specify target dimensions. | |
| 344 * </pre> | |
| 345 */ | |
| 346 BOXA * | |
| 347 boxaModifyWithBoxa(BOXA *boxas, | |
| 348 BOXA *boxam, | |
| 349 l_int32 subflag, | |
| 350 l_int32 maxdiff, | |
| 351 l_int32 extrapixels) | |
| 352 { | |
| 353 l_int32 n, i, ls, ts, rs, bs, ws, hs, lm, tm, rm, bm, wm, hm, ld, td, rd, bd; | |
| 354 BOX *boxs, *boxm, *boxd, *boxempty; | |
| 355 BOXA *boxad; | |
| 356 | |
| 357 if (!boxas) | |
| 358 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 359 if (!boxam) { | |
| 360 L_WARNING("boxam not defined; returning copy", __func__); | |
| 361 return boxaCopy(boxas, L_COPY); | |
| 362 } | |
| 363 if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE && | |
| 364 subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF && | |
| 365 subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) { | |
| 366 L_WARNING("invalid subflag; returning copy", __func__); | |
| 367 return boxaCopy(boxas, L_COPY); | |
| 368 } | |
| 369 n = boxaGetCount(boxas); | |
| 370 if (n != boxaGetCount(boxam)) { | |
| 371 L_WARNING("boxas and boxam sizes differ; returning copy", __func__); | |
| 372 return boxaCopy(boxas, L_COPY); | |
| 373 } | |
| 374 | |
| 375 boxad = boxaCreate(n); | |
| 376 boxempty = boxCreate(0, 0, 0, 0); /* placeholders */ | |
| 377 for (i = 0; i < n; i++) { | |
| 378 boxs = boxaGetValidBox(boxas, i, L_CLONE); | |
| 379 boxm = boxaGetValidBox(boxam, i, L_CLONE); | |
| 380 if (!boxs || !boxm) { | |
| 381 boxaAddBox(boxad, boxempty, L_COPY); | |
| 382 } else { | |
| 383 boxGetGeometry(boxs, &ls, &ts, &ws, &hs); | |
| 384 boxGetGeometry(boxm, &lm, &tm, &wm, &hm); | |
| 385 rs = ls + ws - 1; | |
| 386 bs = ts + hs - 1; | |
| 387 rm = lm + wm - 1; | |
| 388 bm = tm + hm - 1; | |
| 389 if (subflag == L_USE_MINSIZE) { | |
| 390 ld = L_MAX(ls, lm); | |
| 391 rd = L_MIN(rs, rm); | |
| 392 td = L_MAX(ts, tm); | |
| 393 bd = L_MIN(bs, bm); | |
| 394 } else if (subflag == L_USE_MAXSIZE) { | |
| 395 ld = L_MIN(ls, lm); | |
| 396 rd = L_MAX(rs, rm); | |
| 397 td = L_MIN(ts, tm); | |
| 398 bd = L_MAX(bs, bm); | |
| 399 } else if (subflag == L_SUB_ON_LOC_DIFF) { | |
| 400 ld = (L_ABS(lm - ls) <= maxdiff) ? ls : lm - extrapixels; | |
| 401 td = (L_ABS(tm - ts) <= maxdiff) ? ts : tm - extrapixels; | |
| 402 rd = (L_ABS(rm - rs) <= maxdiff) ? rs : rm + extrapixels; | |
| 403 bd = (L_ABS(bm - bs) <= maxdiff) ? bs : bm + extrapixels; | |
| 404 } else if (subflag == L_SUB_ON_SIZE_DIFF) { | |
| 405 ld = (L_ABS(wm - ws) <= maxdiff) ? ls : lm - extrapixels; | |
| 406 td = (L_ABS(hm - hs) <= maxdiff) ? ts : tm - extrapixels; | |
| 407 rd = (L_ABS(wm - ws) <= maxdiff) ? rs : rm + extrapixels; | |
| 408 bd = (L_ABS(hm - hs) <= maxdiff) ? bs : bm + extrapixels; | |
| 409 } else if (subflag == L_USE_CAPPED_MIN) { | |
| 410 ld = L_MAX(lm, L_MIN(ls, lm + maxdiff)); | |
| 411 td = L_MAX(tm, L_MIN(ts, tm + maxdiff)); | |
| 412 rd = L_MIN(rm, L_MAX(rs, rm - maxdiff)); | |
| 413 bd = L_MIN(bm, L_MAX(bs, bm - maxdiff)); | |
| 414 } else { /* subflag == L_USE_CAPPED_MAX */ | |
| 415 ld = L_MIN(lm, L_MAX(ls, lm - maxdiff)); | |
| 416 td = L_MIN(tm, L_MAX(ts, tm - maxdiff)); | |
| 417 rd = L_MAX(rm, L_MIN(rs, rm + maxdiff)); | |
| 418 bd = L_MAX(bm, L_MIN(bs, bm + maxdiff)); | |
| 419 } | |
| 420 boxd = boxCreate(ld, td, rd - ld + 1, bd - td + 1); | |
| 421 boxaAddBox(boxad, boxd, L_INSERT); | |
| 422 } | |
| 423 boxDestroy(&boxs); | |
| 424 boxDestroy(&boxm); | |
| 425 } | |
| 426 boxDestroy(&boxempty); | |
| 427 | |
| 428 return boxad; | |
| 429 } | |
| 430 | |
| 431 | |
| 432 /*! | |
| 433 * \brief boxaReconcilePairWidth() | |
| 434 * | |
| 435 * \param[in] boxas | |
| 436 * \param[in] delw threshold on adjacent width difference | |
| 437 * \param[in] op L_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX | |
| 438 * \param[in] factor > 0.0, typically near 1.0 | |
| 439 * \param[in] na [optional] indicator array allowing change | |
| 440 * \return boxad adjusted, or a copy of boxas on error | |
| 441 * | |
| 442 * <pre> | |
| 443 * Notes: | |
| 444 * (1) This reconciles differences in the width of adjacent boxes, | |
| 445 * by moving one side of one of the boxes in each pair. | |
| 446 * If the widths in the pair differ by more than some | |
| 447 * threshold, move either the left side for even boxes or | |
| 448 * the right side for odd boxes, depending on if we're choosing | |
| 449 * the min or max. If choosing min, the width of the max is | |
| 450 * set to factor * (width of min). If choosing max, the width | |
| 451 * of the min is set to factor * (width of max). | |
| 452 * (2) If %na exists, it is an indicator array corresponding to the | |
| 453 * boxes in %boxas. If %na != NULL, only boxes with an | |
| 454 * indicator value of 1 are allowed to adjust; otherwise, | |
| 455 * all boxes can adjust. | |
| 456 * (3) Typical input might be the output of boxaSmoothSequenceMedian(), | |
| 457 * where even and odd boxa have been independently regulated. | |
| 458 * </pre> | |
| 459 */ | |
| 460 BOXA * | |
| 461 boxaReconcilePairWidth(BOXA *boxas, | |
| 462 l_int32 delw, | |
| 463 l_int32 op, | |
| 464 l_float32 factor, | |
| 465 NUMA *na) | |
| 466 { | |
| 467 l_int32 i, ne, no, nmin, xe, we, xo, wo, inde, indo, x, w; | |
| 468 BOX *boxe, *boxo; | |
| 469 BOXA *boxae, *boxao, *boxad; | |
| 470 | |
| 471 if (!boxas) | |
| 472 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 473 if (factor <= 0.0) { | |
| 474 L_WARNING("invalid factor; setting to 1.0\n", __func__); | |
| 475 factor = 1.0; | |
| 476 } | |
| 477 | |
| 478 /* Taking the boxes in pairs, if the difference in width reaches | |
| 479 * the threshold %delw, adjust the left or right side of one | |
| 480 * of the pair. */ | |
| 481 boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); | |
| 482 ne = boxaGetCount(boxae); | |
| 483 no = boxaGetCount(boxao); | |
| 484 nmin = L_MIN(ne, no); | |
| 485 for (i = 0; i < nmin; i++) { | |
| 486 /* Set indicator values */ | |
| 487 if (na) { | |
| 488 numaGetIValue(na, 2 * i, &inde); | |
| 489 numaGetIValue(na, 2 * i + 1, &indo); | |
| 490 } else { | |
| 491 inde = indo = 1; | |
| 492 } | |
| 493 if (inde == 0 && indo == 0) continue; | |
| 494 | |
| 495 boxe = boxaGetBox(boxae, i, L_CLONE); | |
| 496 boxo = boxaGetBox(boxao, i, L_CLONE); | |
| 497 boxGetGeometry(boxe, &xe, NULL, &we, NULL); | |
| 498 boxGetGeometry(boxo, &xo, NULL, &wo, NULL); | |
| 499 if (we == 0 || wo == 0) { /* if either is invalid; skip */ | |
| 500 boxDestroy(&boxe); | |
| 501 boxDestroy(&boxo); | |
| 502 continue; | |
| 503 } else if (L_ABS(we - wo) > delw) { | |
| 504 if (op == L_ADJUST_CHOOSE_MIN) { | |
| 505 if (we > wo && inde == 1) { | |
| 506 /* move left side of even to the right */ | |
| 507 w = factor * wo; | |
| 508 x = xe + (we - w); | |
| 509 boxSetGeometry(boxe, x, -1, w, -1); | |
| 510 } else if (we < wo && indo == 1) { | |
| 511 /* move right side of odd to the left */ | |
| 512 w = factor * we; | |
| 513 boxSetGeometry(boxo, -1, -1, w, -1); | |
| 514 } | |
| 515 } else { /* maximize width */ | |
| 516 if (we < wo && inde == 1) { | |
| 517 /* move left side of even to the left */ | |
| 518 w = factor * wo; | |
| 519 x = L_MAX(0, xe + (we - w)); | |
| 520 w = we + (xe - x); /* covers both cases for the max */ | |
| 521 boxSetGeometry(boxe, x, -1, w, -1); | |
| 522 } else if (we > wo && indo == 1) { | |
| 523 /* move right side of odd to the right */ | |
| 524 w = factor * we; | |
| 525 boxSetGeometry(boxo, -1, -1, w, -1); | |
| 526 } | |
| 527 } | |
| 528 } | |
| 529 boxDestroy(&boxe); | |
| 530 boxDestroy(&boxo); | |
| 531 } | |
| 532 | |
| 533 boxad = boxaMergeEvenOdd(boxae, boxao, 0); | |
| 534 boxaDestroy(&boxae); | |
| 535 boxaDestroy(&boxao); | |
| 536 return boxad; | |
| 537 } | |
| 538 | |
| 539 | |
| 540 /*! | |
| 541 * \brief boxaSizeConsistency() | |
| 542 * | |
| 543 * \param[in] boxas of size >= 10 | |
| 544 * \param[in] type L_CHECK_WIDTH, L_CHECK_HEIGHT | |
| 545 * \param[in] threshp threshold for pairwise fractional variation | |
| 546 * \param[in] threshm threshold for fractional variation from median | |
| 547 * \param[out] pfvarp [optional] average fractional pairwise variation | |
| 548 * \param[out] pfvarm [optional] average fractional median variation | |
| 549 * \param[out] psame decision for uniformity of page size (1, 0, -1) | |
| 550 * | |
| 551 * <pre> | |
| 552 * Notes: | |
| 553 * (1) This evaluates a boxa for particular types of dimensional | |
| 554 * variation. Select either width or height variation. Then | |
| 555 * it returns two numbers: one is based on pairwise (even/odd) | |
| 556 * variation; the other is based on the average variation | |
| 557 * from the boxa median. | |
| 558 * (2) For the pairwise variation, get the fraction of the absolute | |
| 559 * difference in dimension of each pair of boxes, and take | |
| 560 * the average value. The median variation is simply the | |
| 561 * the average of the fractional deviation from the median | |
| 562 * of all the boxes. | |
| 563 * (3) Use 0 for default values of %threshp and %threshm. They are | |
| 564 * threshp: 0.02 | |
| 565 * threshm: 0.015 | |
| 566 * (4) The intended application is that the boxes are a sequence of | |
| 567 * page regions in a book scan, and we calculate two numbers | |
| 568 * that can give an indication if the pages are approximately | |
| 569 * the same size. The pairwise variation should be small if | |
| 570 * the boxes are correctly calculated. If there are a | |
| 571 * significant number of random or systematic outliers, the | |
| 572 * pairwise variation will be large, and no decision will be made | |
| 573 * (i.e., return same == -1). Here are the possible outcomes: | |
| 574 * Pairwise Var Median Var Decision | |
| 575 * ------------ ---------- -------- | |
| 576 * small small same size (1) | |
| 577 * small large different size (0) | |
| 578 * large small/large unknown (-1) | |
| 579 * </pre> | |
| 580 */ | |
| 581 l_ok | |
| 582 boxaSizeConsistency(BOXA *boxas, | |
| 583 l_int32 type, | |
| 584 l_float32 threshp, | |
| 585 l_float32 threshm, | |
| 586 l_float32 *pfvarp, | |
| 587 l_float32 *pfvarm, | |
| 588 l_int32 *psame) | |
| 589 { | |
| 590 l_int32 i, n, bw1, bh1, bw2, bh2, npairs; | |
| 591 l_float32 ave, fdiff, sumdiff, med, fvarp, fvarm; | |
| 592 NUMA *na1; | |
| 593 | |
| 594 if (pfvarp) *pfvarp = 0.0; | |
| 595 if (pfvarm) *pfvarm = 0.0; | |
| 596 if (!psame) | |
| 597 return ERROR_INT("&same not defined", __func__, 1); | |
| 598 *psame = -1; | |
| 599 if (!boxas) | |
| 600 return ERROR_INT("boxas not defined", __func__, 1); | |
| 601 if (boxaGetValidCount(boxas) < 6) | |
| 602 return ERROR_INT("need a least 6 valid boxes", __func__, 1); | |
| 603 if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT) | |
| 604 return ERROR_INT("invalid type", __func__, 1); | |
| 605 if (threshp < 0.0 || threshp >= 0.5) | |
| 606 return ERROR_INT("invalid threshp", __func__, 1); | |
| 607 if (threshm < 0.0 || threshm >= 0.5) | |
| 608 return ERROR_INT("invalid threshm", __func__, 1); | |
| 609 if (threshp == 0.0) threshp = 0.02f; | |
| 610 if (threshm == 0.0) threshm = 0.015f; | |
| 611 | |
| 612 /* Evaluate pairwise variation */ | |
| 613 n = boxaGetCount(boxas); | |
| 614 na1 = numaCreate(0); | |
| 615 for (i = 0, npairs = 0, sumdiff = 0; i < n - 1; i += 2) { | |
| 616 boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw1, &bh1); | |
| 617 boxaGetBoxGeometry(boxas, i + 1, NULL, NULL, &bw2, &bh2); | |
| 618 if (bw1 == 0 || bh1 == 0 || bw2 == 0 || bh2 == 0) | |
| 619 continue; | |
| 620 npairs++; | |
| 621 if (type == L_CHECK_WIDTH) { | |
| 622 ave = (bw1 + bw2) / 2.0; | |
| 623 fdiff = L_ABS(bw1 - bw2) / ave; | |
| 624 numaAddNumber(na1, bw1); | |
| 625 numaAddNumber(na1, bw2); | |
| 626 } else { /* type == L_CHECK_HEIGHT) */ | |
| 627 ave = (bh1 + bh2) / 2.0; | |
| 628 fdiff = L_ABS(bh1 - bh2) / ave; | |
| 629 numaAddNumber(na1, bh1); | |
| 630 numaAddNumber(na1, bh2); | |
| 631 } | |
| 632 sumdiff += fdiff; | |
| 633 } | |
| 634 fvarp = sumdiff / npairs; | |
| 635 if (pfvarp) *pfvarp = fvarp; | |
| 636 | |
| 637 /* Evaluate the average abs fractional deviation from the median */ | |
| 638 numaGetMedian(na1, &med); | |
| 639 if (med == 0.0) { | |
| 640 L_WARNING("median value is 0\n", __func__); | |
| 641 } else { | |
| 642 numaGetMeanDevFromMedian(na1, med, &fvarm); | |
| 643 fvarm /= med; | |
| 644 if (pfvarm) *pfvarm = fvarm; | |
| 645 } | |
| 646 numaDestroy(&na1); | |
| 647 | |
| 648 /* Make decision */ | |
| 649 if (fvarp < threshp && fvarm < threshm) | |
| 650 *psame = 1; | |
| 651 else if (fvarp < threshp && fvarm > threshm) | |
| 652 *psame = 0; | |
| 653 else | |
| 654 *psame = -1; /* unknown */ | |
| 655 return 0; | |
| 656 } | |
| 657 | |
| 658 | |
| 659 /*! | |
| 660 * \brief boxaReconcileAllByMedian() | |
| 661 * | |
| 662 * \param[in] boxas containing at least 6 valid boxes | |
| 663 * \param[in] select1 L_ADJUST_LEFT_AND_RIGHT or L_ADJUST_SKIP | |
| 664 * \param[in] select2 L_ADJUST_TOP_AND_BOT or L_ADJUST_SKIP | |
| 665 * \param[in] thresh threshold number of pixels to make adjustment | |
| 666 * \param[in] extra extra pixels to add beyond median value | |
| 667 * \param[in] pixadb use NULL to skip debug output | |
| 668 * \return boxad possibly adjusted from boxas; a copy of boxas on error | |
| 669 * | |
| 670 * <pre> | |
| 671 * Notes: | |
| 672 * (1) This uses boxaReconcileSidesByMedian() to reconcile | |
| 673 * the left-and-right and/or top-and-bottom sides of the | |
| 674 * even and odd boxes, separately. | |
| 675 * (2) See boxaReconcileSidesByMedian() for use of %thresh and %extra. | |
| 676 * (3) If all box sides are within %thresh of the median value, | |
| 677 * the returned box will be identical to %boxas. | |
| 678 * </pre> | |
| 679 */ | |
| 680 BOXA * | |
| 681 boxaReconcileAllByMedian(BOXA *boxas, | |
| 682 l_int32 select1, | |
| 683 l_int32 select2, | |
| 684 l_int32 thresh, | |
| 685 l_int32 extra, | |
| 686 PIXA *pixadb) | |
| 687 { | |
| 688 l_int32 ncols; | |
| 689 BOXA *boxa1e, *boxa1o, *boxa2e, *boxa2o, *boxa3e, *boxa3o, *boxad; | |
| 690 PIX *pix1; | |
| 691 | |
| 692 if (!boxas) | |
| 693 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 694 if (select1 != L_ADJUST_LEFT_AND_RIGHT && select1 != L_ADJUST_SKIP) { | |
| 695 L_WARNING("invalid select1; returning copy\n", __func__); | |
| 696 return boxaCopy(boxas, L_COPY); | |
| 697 } | |
| 698 if (select2 != L_ADJUST_TOP_AND_BOT && select2 != L_ADJUST_SKIP) { | |
| 699 L_WARNING("invalid select2; returning copy\n", __func__); | |
| 700 return boxaCopy(boxas, L_COPY); | |
| 701 } | |
| 702 if (thresh < 0) { | |
| 703 L_WARNING("thresh must be >= 0; returning copy\n", __func__); | |
| 704 return boxaCopy(boxas, L_COPY); | |
| 705 } | |
| 706 if (boxaGetValidCount(boxas) < 3) { | |
| 707 L_WARNING("need at least 3 valid boxes; returning copy\n", __func__); | |
| 708 return boxaCopy(boxas, L_COPY); | |
| 709 } | |
| 710 | |
| 711 /* Adjust even and odd box sides separately */ | |
| 712 boxaSplitEvenOdd(boxas, 0, &boxa1e, &boxa1o); | |
| 713 ncols = 1; | |
| 714 if (select1 == L_ADJUST_LEFT_AND_RIGHT) { | |
| 715 ncols += 2; | |
| 716 boxa2e = boxaReconcileSidesByMedian(boxa1e, select1, thresh, | |
| 717 extra, pixadb); | |
| 718 } else { | |
| 719 boxa2e = boxaCopy(boxa1e, L_COPY); | |
| 720 } | |
| 721 if (select2 == L_ADJUST_TOP_AND_BOT) { | |
| 722 ncols += 2; | |
| 723 boxa3e = boxaReconcileSidesByMedian(boxa2e, select2, thresh, | |
| 724 extra, pixadb); | |
| 725 } else { | |
| 726 boxa3e = boxaCopy(boxa2e, L_COPY); | |
| 727 } | |
| 728 if (select1 == L_ADJUST_LEFT_AND_RIGHT) | |
| 729 boxa2o = boxaReconcileSidesByMedian(boxa1o, select1, thresh, | |
| 730 extra, pixadb); | |
| 731 else | |
| 732 boxa2o = boxaCopy(boxa1o, L_COPY); | |
| 733 if (select2 == L_ADJUST_TOP_AND_BOT) | |
| 734 boxa3o = boxaReconcileSidesByMedian(boxa2o, select2, thresh, | |
| 735 extra, pixadb); | |
| 736 else | |
| 737 boxa3o = boxaCopy(boxa2o, L_COPY); | |
| 738 boxad = boxaMergeEvenOdd(boxa3e, boxa3o, 0); | |
| 739 | |
| 740 /* This generates 2 sets of 3 or 5 plots in a row, depending | |
| 741 * on whether select1 and select2 are true (not skipping). | |
| 742 * The top row is for even boxes; the bottom row is for odd boxes. */ | |
| 743 if (pixadb) { | |
| 744 lept_mkdir("lept/boxa"); | |
| 745 pix1 = pixaDisplayTiledInColumns(pixadb, ncols, 1.0, 30, 2); | |
| 746 pixWrite("/tmp/lept/boxa/recon_sides.png", pix1, IFF_PNG); | |
| 747 pixDestroy(&pix1); | |
| 748 } | |
| 749 | |
| 750 boxaDestroy(&boxa1e); | |
| 751 boxaDestroy(&boxa1o); | |
| 752 boxaDestroy(&boxa2e); | |
| 753 boxaDestroy(&boxa2o); | |
| 754 boxaDestroy(&boxa3e); | |
| 755 boxaDestroy(&boxa3o); | |
| 756 return boxad; | |
| 757 } | |
| 758 | |
| 759 | |
| 760 /*! | |
| 761 * \brief boxaReconcileSidesByMedian() | |
| 762 * | |
| 763 * \param[in] boxas containing at least 3 valid boxes | |
| 764 * \param[in] select L_ADJUST_LEFT, L_ADJUST_RIGHT, etc. | |
| 765 * \param[in] thresh threshold number of pixels to make adjustment | |
| 766 * \param[in] extra extra pixels to add beyond median value | |
| 767 * \param[in] pixadb use NULL to skip debug output | |
| 768 * \return boxad possibly adjusted from boxas; a copy of boxas on error | |
| 769 * | |
| 770 * <pre> | |
| 771 * Notes: | |
| 772 * (1) This modifies individual box sides if their location differs | |
| 773 * significantly (>= %thresh) from the median value. | |
| 774 * (2) %select specifies which sides are to be checked. | |
| 775 * (3) %thresh specifies the tolerance for different side locations. | |
| 776 * Any box side that differs from the median by this much will | |
| 777 * be set to the median value, plus the %extra amount. | |
| 778 * (4) If %extra is positive, the box dimensions are expanded. | |
| 779 * For example, for the left side, a positive %extra results in | |
| 780 * moving the left side farther to the left (i.e., in a negative | |
| 781 * direction). | |
| 782 * (5) If all box sides are within %thresh - 1 of the median value, | |
| 783 * the returned box will be identical to %boxas. | |
| 784 * (6) N.B. If you expect that even and odd box sides should be | |
| 785 * significantly different, this function must be called separately | |
| 786 * on the even and odd boxes in %boxas. Note also that the | |
| 787 * higher level function boxaReconcileAllByMedian() handles the | |
| 788 * even and odd box sides separately. | |
| 789 * </pre> | |
| 790 */ | |
| 791 BOXA * | |
| 792 boxaReconcileSidesByMedian(BOXA *boxas, | |
| 793 l_int32 select, | |
| 794 l_int32 thresh, | |
| 795 l_int32 extra, | |
| 796 PIXA *pixadb) | |
| 797 { | |
| 798 char buf[128]; | |
| 799 l_int32 i, n, diff; | |
| 800 l_int32 left, right, top, bot, medleft, medright, medtop, medbot; | |
| 801 BOX *box; | |
| 802 BOXA *boxa1, *boxad; | |
| 803 PIX *pix; | |
| 804 | |
| 805 if (!boxas) | |
| 806 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 807 if (select != L_ADJUST_LEFT && select != L_ADJUST_RIGHT && | |
| 808 select != L_ADJUST_TOP && select != L_ADJUST_BOT && | |
| 809 select != L_ADJUST_LEFT_AND_RIGHT && select != L_ADJUST_TOP_AND_BOT) { | |
| 810 L_WARNING("invalid select; returning copy\n", __func__); | |
| 811 return boxaCopy(boxas, L_COPY); | |
| 812 } | |
| 813 if (thresh < 0) { | |
| 814 L_WARNING("thresh must be >= 0; returning copy\n", __func__); | |
| 815 return boxaCopy(boxas, L_COPY); | |
| 816 } | |
| 817 if (boxaGetValidCount(boxas) < 3) { | |
| 818 L_WARNING("need at least 3 valid boxes; returning copy\n", __func__); | |
| 819 return boxaCopy(boxas, L_COPY); | |
| 820 } | |
| 821 | |
| 822 if (select == L_ADJUST_LEFT_AND_RIGHT) { | |
| 823 boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_LEFT, thresh, extra, | |
| 824 pixadb); | |
| 825 boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_RIGHT, thresh, extra, | |
| 826 pixadb); | |
| 827 boxaDestroy(&boxa1); | |
| 828 return boxad; | |
| 829 } | |
| 830 if (select == L_ADJUST_TOP_AND_BOT) { | |
| 831 boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_TOP, thresh, extra, | |
| 832 pixadb); | |
| 833 boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_BOT, thresh, extra, | |
| 834 pixadb); | |
| 835 boxaDestroy(&boxa1); | |
| 836 return boxad; | |
| 837 } | |
| 838 | |
| 839 if (pixadb) { | |
| 840 l_int32 ndb = pixaGetCount(pixadb); | |
| 841 if (ndb == 0 || ndb == 5) { /* first of even and odd box sets */ | |
| 842 adjustSidePlotName(buf, sizeof(buf), "init", select); | |
| 843 boxaPlotSides(boxas, buf, NULL, NULL, NULL, NULL, &pix); | |
| 844 pixaAddPix(pixadb, pix, L_INSERT); | |
| 845 } | |
| 846 } | |
| 847 | |
| 848 n = boxaGetCount(boxas); | |
| 849 boxad = boxaCreate(n); | |
| 850 if (select == L_ADJUST_LEFT) { | |
| 851 boxaGetMedianVals(boxas, &medleft, NULL, NULL, NULL, NULL, NULL); | |
| 852 for (i = 0; i < n; i++) { | |
| 853 box = boxaGetBox(boxas, i, L_COPY); | |
| 854 boxGetSideLocations(box, &left, NULL, NULL, NULL); | |
| 855 diff = medleft - left; | |
| 856 if (L_ABS(diff) >= thresh) | |
| 857 boxAdjustSides(box, box, diff - extra, 0, 0, 0); | |
| 858 boxaAddBox(boxad, box, L_INSERT); | |
| 859 } | |
| 860 } else if (select == L_ADJUST_RIGHT) { | |
| 861 boxaGetMedianVals(boxas, NULL, NULL, &medright, NULL, NULL, NULL); | |
| 862 for (i = 0; i < n; i++) { | |
| 863 box = boxaGetBox(boxas, i, L_COPY); | |
| 864 boxGetSideLocations(box, NULL, &right, NULL, NULL); | |
| 865 diff = medright - right; | |
| 866 if (L_ABS(diff) >= thresh) | |
| 867 boxAdjustSides(box, box, 0, diff + extra, 0, 0); | |
| 868 boxaAddBox(boxad, box, L_INSERT); | |
| 869 } | |
| 870 } else if (select == L_ADJUST_TOP) { | |
| 871 boxaGetMedianVals(boxas, NULL, &medtop, NULL, NULL, NULL, NULL); | |
| 872 for (i = 0; i < n; i++) { | |
| 873 box = boxaGetBox(boxas, i, L_COPY); | |
| 874 boxGetSideLocations(box, NULL, NULL, &top, NULL); | |
| 875 diff = medtop - top; | |
| 876 if (L_ABS(diff) >= thresh) | |
| 877 boxAdjustSides(box, box, 0, 0, diff - extra, 0); | |
| 878 boxaAddBox(boxad, box, L_INSERT); | |
| 879 } | |
| 880 } else { /* select == L_ADJUST_BOT */ | |
| 881 boxaGetMedianVals(boxas, NULL, NULL, NULL, &medbot, NULL, NULL); | |
| 882 for (i = 0; i < n; i++) { | |
| 883 box = boxaGetBox(boxas, i, L_COPY); | |
| 884 boxGetSideLocations(box, NULL, NULL, NULL, &bot); | |
| 885 diff = medbot - bot; | |
| 886 if (L_ABS(diff) >= thresh) | |
| 887 boxAdjustSides(box, box, 0, 0, 0, diff + extra); | |
| 888 boxaAddBox(boxad, box, L_INSERT); | |
| 889 } | |
| 890 } | |
| 891 | |
| 892 if (pixadb) { | |
| 893 adjustSidePlotName(buf, sizeof(buf), "final", select); | |
| 894 boxaPlotSides(boxad, buf, NULL, NULL, NULL, NULL, &pix); | |
| 895 pixaAddPix(pixadb, pix, L_INSERT); | |
| 896 } | |
| 897 return boxad; | |
| 898 } | |
| 899 | |
| 900 | |
| 901 static void | |
| 902 adjustSidePlotName(char *buf, | |
| 903 size_t size, | |
| 904 const char *preface, | |
| 905 l_int32 select) | |
| 906 { | |
| 907 stringCopy(buf, preface, size - 8); | |
| 908 if (select == L_ADJUST_LEFT) | |
| 909 stringCat(buf, size, "-left"); | |
| 910 else if (select == L_ADJUST_RIGHT) | |
| 911 stringCat(buf, size, "-right"); | |
| 912 else if (select == L_ADJUST_TOP) | |
| 913 stringCat(buf, size, "-top"); | |
| 914 else if (select == L_ADJUST_BOT) | |
| 915 stringCat(buf, size, "-bot"); | |
| 916 } | |
| 917 | |
| 918 | |
| 919 /*! | |
| 920 * \brief boxaReconcileSizeByMedian() | |
| 921 * | |
| 922 * \param[in] boxas containing at least 6 valid boxes | |
| 923 * \param[in] type L_CHECK_WIDTH, L_CHECK_HEIGHT, L_CHECK_BOTH | |
| 924 * \param[in] dfract threshold fraction of dimensional variation from | |
| 925 * median; in range (0 ... 1); typ. about 0.05. | |
| 926 * \param[in] sfract threshold fraction of side variation from median; | |
| 927 * in range (0 ... 1); typ. about 0.04. | |
| 928 * \param[in] factor expansion for fixed box beyond median width; | |
| 929 * should be near 1.0. | |
| 930 * \param[out] pnadelw [optional] diff from median width for boxes | |
| 931 * above threshold | |
| 932 * \param[out] pnadelh [optional] diff from median height for boxes | |
| 933 * above threshold | |
| 934 * \param[out] pratiowh [optional] ratio of median width/height of boxas | |
| 935 * \return boxad possibly adjusted from boxas; a copy of boxas on error | |
| 936 * | |
| 937 * <pre> | |
| 938 * Notes: | |
| 939 * (1) The basic idea is to identify significant differences in box | |
| 940 * dimension (either width or height) and modify the outlier boxes. | |
| 941 * (2) %type specifies if we are reconciling the width, height or both. | |
| 942 * (3) %dfract specifies the tolerance for different dimensions. Any | |
| 943 * box with a fractional difference from the median size that | |
| 944 * exceeds %dfract will be altered. | |
| 945 * (4) %sfract specifies the tolerance for different side locations. | |
| 946 * If a box has been marked by (3) for alteration, any side | |
| 947 * location that differs from the median side location by | |
| 948 * more than %sfract of the median dimension (medw or medh) | |
| 949 * will be moved. | |
| 950 * (5) Median width and height are found for all valid boxes (i.e., | |
| 951 * for all boxes with width and height > 0. | |
| 952 * Median side locations are found separately for even and odd boxes, | |
| 953 * using only boxes that are "inliers"; i.e., that have been | |
| 954 * found by (3) to be within tolerance for width or height. | |
| 955 * (6) If all box dimensions are within threshold of the median size, | |
| 956 * just return a copy. Otherwise, box sides of the outliers | |
| 957 * will be adjusted. | |
| 958 * (7) Using %sfract, sides that are sufficiently far from the median | |
| 959 * are first moved to the median value. Then they are moved | |
| 960 * together (in or out) so that the final box dimension | |
| 961 * is %factor times the median dimension. | |
| 962 * (8) The arrays that are the initial deviation from median size | |
| 963 * (width and height) are optionally returned. Also optionally | |
| 964 * returned is the median w/h asperity ratio of the input %boxas. | |
| 965 * </pre> | |
| 966 */ | |
| 967 BOXA * | |
| 968 boxaReconcileSizeByMedian(BOXA *boxas, | |
| 969 l_int32 type, | |
| 970 l_float32 dfract, | |
| 971 l_float32 sfract, | |
| 972 l_float32 factor, | |
| 973 NUMA **pnadelw, | |
| 974 NUMA **pnadelh, | |
| 975 l_float32 *pratiowh) | |
| 976 { | |
| 977 l_int32 i, n, ne, no, outfound, isvalid, ind, del, maxdel; | |
| 978 l_int32 medw, medh, bw, bh, left, right, top, bot; | |
| 979 l_int32 medleft, medlefte, medlefto, medright, medrighte, medrighto; | |
| 980 l_int32 medtop, medtope, medtopo, medbot, medbote, medboto; | |
| 981 l_float32 brat; | |
| 982 BOX *box; | |
| 983 BOXA *boxa1, *boxae, *boxao, *boxad; | |
| 984 NUMA *naind, *nadelw, *nadelh; | |
| 985 | |
| 986 if (pnadelw) *pnadelw = NULL; | |
| 987 if (pnadelh) *pnadelh = NULL; | |
| 988 if (pratiowh) *pratiowh = 0.0; | |
| 989 if (!boxas) | |
| 990 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 991 if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT && | |
| 992 type != L_CHECK_BOTH) { | |
| 993 L_WARNING("invalid type; returning copy\n", __func__); | |
| 994 return boxaCopy(boxas, L_COPY); | |
| 995 } | |
| 996 if (dfract <= 0.0 || dfract >= 0.5) { | |
| 997 L_WARNING("invalid dimensional fract; returning copy\n", __func__); | |
| 998 return boxaCopy(boxas, L_COPY); | |
| 999 } | |
| 1000 if (sfract <= 0.0 || sfract >= 0.5) { | |
| 1001 L_WARNING("invalid side fract; returning copy\n", __func__); | |
| 1002 return boxaCopy(boxas, L_COPY); | |
| 1003 } | |
| 1004 if (factor < 0.8 || factor > 1.25) | |
| 1005 L_WARNING("factor %5.3f is typ. closer to 1.0\n", __func__, factor); | |
| 1006 if (boxaGetValidCount(boxas) < 6) { | |
| 1007 L_WARNING("need at least 6 valid boxes; returning copy\n", __func__); | |
| 1008 return boxaCopy(boxas, L_COPY); | |
| 1009 } | |
| 1010 | |
| 1011 /* If reconciling both width and height, optionally return array of | |
| 1012 * median deviations and even/odd ratio for width measurements */ | |
| 1013 if (type == L_CHECK_BOTH) { | |
| 1014 boxa1 = boxaReconcileSizeByMedian(boxas, L_CHECK_WIDTH, dfract, sfract, | |
| 1015 factor, pnadelw, NULL, pratiowh); | |
| 1016 boxad = boxaReconcileSizeByMedian(boxa1, L_CHECK_HEIGHT, dfract, sfract, | |
| 1017 factor, NULL, pnadelh, NULL); | |
| 1018 boxaDestroy(&boxa1); | |
| 1019 return boxad; | |
| 1020 } | |
| 1021 | |
| 1022 n = boxaGetCount(boxas); | |
| 1023 naind = numaCreate(n); /* outlier indicator array */ | |
| 1024 boxae = boxaCreate(0); /* even inliers */ | |
| 1025 boxao = boxaCreate(0); /* odd inliers */ | |
| 1026 outfound = FALSE; | |
| 1027 if (type == L_CHECK_WIDTH) { | |
| 1028 boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL, | |
| 1029 &nadelw, NULL); | |
| 1030 if (pratiowh) { | |
| 1031 *pratiowh = (l_float32)medw / (l_float32)medh; | |
| 1032 L_INFO("median ratio w/h = %5.3f\n", __func__, *pratiowh); | |
| 1033 } | |
| 1034 if (pnadelw) | |
| 1035 *pnadelw = nadelw; | |
| 1036 else | |
| 1037 numaDestroy(&nadelw); | |
| 1038 | |
| 1039 /* Check for outliers; assemble inliers */ | |
| 1040 for (i = 0; i < n; i++) { | |
| 1041 if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) { | |
| 1042 numaAddNumber(naind, 0); | |
| 1043 continue; | |
| 1044 } | |
| 1045 boxGetGeometry(box, NULL, NULL, &bw, NULL); | |
| 1046 brat = (l_float32)bw / (l_float32)medw; | |
| 1047 if (brat < 1.0 - dfract || brat > 1.0 + dfract) { | |
| 1048 outfound = TRUE; | |
| 1049 numaAddNumber(naind, 1); | |
| 1050 boxDestroy(&box); | |
| 1051 } else { /* add to inliers */ | |
| 1052 numaAddNumber(naind, 0); | |
| 1053 if (i % 2 == 0) | |
| 1054 boxaAddBox(boxae, box, L_INSERT); | |
| 1055 else | |
| 1056 boxaAddBox(boxao, box, L_INSERT); | |
| 1057 } | |
| 1058 } | |
| 1059 if (!outfound) { /* nothing to do */ | |
| 1060 numaDestroy(&naind); | |
| 1061 boxaDestroy(&boxae); | |
| 1062 boxaDestroy(&boxao); | |
| 1063 L_INFO("no width outlier boxes found\n", __func__); | |
| 1064 return boxaCopy(boxas, L_COPY); | |
| 1065 } | |
| 1066 | |
| 1067 /* Get left/right parameters from inliers. Handle the case | |
| 1068 * where there are no inliers for one of the sets. For example, | |
| 1069 * when all the even boxes have a different dimension from | |
| 1070 * the odd boxes, and the median arbitrarily gets assigned | |
| 1071 * to the even boxes, there are no odd inliers; in that case, | |
| 1072 * use the even inliers sides to decide whether to adjust | |
| 1073 * the left or the right sides of individual outliers. */ | |
| 1074 L_INFO("fixing width of outlier boxes\n", __func__); | |
| 1075 medlefte = medrighte = medlefto = medrighto = 0; | |
| 1076 if ((ne = boxaGetValidCount(boxae)) > 0) | |
| 1077 boxaGetMedianVals(boxae, &medlefte, NULL, &medrighte, NULL, | |
| 1078 NULL, NULL); | |
| 1079 if ((no = boxaGetValidCount(boxao)) > 0) | |
| 1080 boxaGetMedianVals(boxao, &medlefto, NULL, &medrighto, NULL, | |
| 1081 NULL, NULL); | |
| 1082 if (ne == 0) { /* use odd inliers values for both */ | |
| 1083 medlefte = medlefto; | |
| 1084 medrighte = medrighto; | |
| 1085 } else if (no == 0) { /* use even inliers values for both */ | |
| 1086 medlefto = medlefte; | |
| 1087 medrighto = medrighte; | |
| 1088 } | |
| 1089 | |
| 1090 /* Adjust the left and/or right sides of outliers. | |
| 1091 * For each box that is a dimensional outlier, consider each side. | |
| 1092 * Any side that differs fractionally from the median value | |
| 1093 * by more than %sfract times the median width (medw) is set to | |
| 1094 * the median value for that side. Then both sides are moved | |
| 1095 * an equal distance in or out to make w = %factor * medw. */ | |
| 1096 boxad = boxaCreate(n); | |
| 1097 maxdel = (l_int32)(sfract * medw + 0.5); | |
| 1098 for (i = 0; i < n; i++) { | |
| 1099 box = boxaGetBox(boxas, i, L_COPY); | |
| 1100 boxIsValid(box, &isvalid); | |
| 1101 numaGetIValue(naind, i, &ind); | |
| 1102 medleft = (i % 2 == 0) ? medlefte : medlefto; | |
| 1103 medright = (i % 2 == 0) ? medrighte : medrighto; | |
| 1104 if (ind == 1 && isvalid) { /* adjust sides */ | |
| 1105 boxGetSideLocations(box, &left, &right, NULL, NULL); | |
| 1106 if (L_ABS(left - medleft) > maxdel) left = medleft; | |
| 1107 if (L_ABS(right - medright) > maxdel) right = medright; | |
| 1108 del = (l_int32)(factor * medw - (right - left)) / 2; | |
| 1109 boxSetSide(box, L_SET_LEFT, left - del, 0); | |
| 1110 boxSetSide(box, L_SET_RIGHT, right + del, 0); | |
| 1111 } | |
| 1112 boxaAddBox(boxad, box, L_INSERT); | |
| 1113 } | |
| 1114 } else { /* L_CHECK_HEIGHT */ | |
| 1115 boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL, | |
| 1116 NULL, &nadelh); | |
| 1117 if (pratiowh) { | |
| 1118 *pratiowh = (l_float32)medw / (l_float32)medh; | |
| 1119 L_INFO("median ratio w/h = %5.3f\n", __func__, *pratiowh); | |
| 1120 } | |
| 1121 if (pnadelh) | |
| 1122 *pnadelh = nadelh; | |
| 1123 else | |
| 1124 numaDestroy(&nadelh); | |
| 1125 | |
| 1126 /* Check for outliers; assemble inliers */ | |
| 1127 for (i = 0; i < n; i++) { | |
| 1128 if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) { | |
| 1129 numaAddNumber(naind, 0); | |
| 1130 continue; | |
| 1131 } | |
| 1132 boxGetGeometry(box, NULL, NULL, NULL, &bh); | |
| 1133 brat = (l_float32)bh / (l_float32)medh; | |
| 1134 if (brat < 1.0 - dfract || brat > 1.0 + dfract) { | |
| 1135 outfound = TRUE; | |
| 1136 numaAddNumber(naind, 1); | |
| 1137 boxDestroy(&box); | |
| 1138 } else { /* add to inliers */ | |
| 1139 numaAddNumber(naind, 0); | |
| 1140 if (i % 2 == 0) | |
| 1141 boxaAddBox(boxae, box, L_INSERT); | |
| 1142 else | |
| 1143 boxaAddBox(boxao, box, L_INSERT); | |
| 1144 } | |
| 1145 } | |
| 1146 if (!outfound) { /* nothing to do */ | |
| 1147 numaDestroy(&naind); | |
| 1148 boxaDestroy(&boxae); | |
| 1149 boxaDestroy(&boxao); | |
| 1150 L_INFO("no height outlier boxes found\n", __func__); | |
| 1151 return boxaCopy(boxas, L_COPY); | |
| 1152 } | |
| 1153 | |
| 1154 /* Get top/bot parameters from inliers. Handle the case | |
| 1155 * where there are no inliers for one of the sets. For example, | |
| 1156 * when all the even boxes have a different dimension from | |
| 1157 * the odd boxes, and the median arbitrarily gets assigned | |
| 1158 * to the even boxes, there are no odd inliers; in that case, | |
| 1159 * use the even inlier sides to decide whether to adjust | |
| 1160 * the top or the bottom sides of individual outliers. */ | |
| 1161 L_INFO("fixing height of outlier boxes\n", __func__); | |
| 1162 medlefte = medtope = medbote = medtopo = medboto = 0; | |
| 1163 if ((ne = boxaGetValidCount(boxae)) > 0) | |
| 1164 boxaGetMedianVals(boxae, NULL, &medtope, NULL, &medbote, | |
| 1165 NULL, NULL); | |
| 1166 if ((no = boxaGetValidCount(boxao)) > 0) | |
| 1167 boxaGetMedianVals(boxao, NULL, &medtopo, NULL, &medboto, | |
| 1168 NULL, NULL); | |
| 1169 if (ne == 0) { /* use odd inliers values for both */ | |
| 1170 medtope = medtopo; | |
| 1171 medbote = medboto; | |
| 1172 } else if (no == 0) { /* use even inliers values for both */ | |
| 1173 medtopo = medtope; | |
| 1174 medboto = medbote; | |
| 1175 } | |
| 1176 | |
| 1177 /* Adjust the top and/or bottom sides of outliers. | |
| 1178 * For each box that is a dimensional outlier, consider each side. | |
| 1179 * Any side that differs fractionally from the median value | |
| 1180 * by more than %sfract times the median height (medh) is | |
| 1181 * set to the median value for that that side. Then both | |
| 1182 * sides are moved an equal distance in or out to make | |
| 1183 * h = %factor * medh). */ | |
| 1184 boxad = boxaCreate(n); | |
| 1185 maxdel = (l_int32)(sfract * medh + 0.5); | |
| 1186 for (i = 0; i < n; i++) { | |
| 1187 box = boxaGetBox(boxas, i, L_COPY); | |
| 1188 boxIsValid(box, &isvalid); | |
| 1189 numaGetIValue(naind, i, &ind); | |
| 1190 medtop = (i % 2 == 0) ? medtope : medtopo; | |
| 1191 medbot = (i % 2 == 0) ? medbote : medboto; | |
| 1192 if (ind == 1 && isvalid) { /* adjust sides */ | |
| 1193 boxGetSideLocations(box, NULL, NULL, &top, &bot); | |
| 1194 if (L_ABS(top - medtop) > maxdel) top = medtop; | |
| 1195 if (L_ABS(bot - medbot) > maxdel) bot = medbot; | |
| 1196 del = (l_int32)(factor * medh - (bot - top)) / 2; /* typ > 0 */ | |
| 1197 boxSetSide(box, L_SET_TOP, L_MAX(0, top - del), 0); | |
| 1198 boxSetSide(box, L_SET_BOT, bot + del, 0); | |
| 1199 } | |
| 1200 boxaAddBox(boxad, box, L_INSERT); | |
| 1201 } | |
| 1202 } | |
| 1203 numaDestroy(&naind); | |
| 1204 boxaDestroy(&boxae); | |
| 1205 boxaDestroy(&boxao); | |
| 1206 return boxad; | |
| 1207 } | |
| 1208 | |
| 1209 | |
| 1210 /*! | |
| 1211 * \brief boxaPlotSides() | |
| 1212 * | |
| 1213 * \param[in] boxa source boxa | |
| 1214 * \param[in] plotname [optional], can be NULL | |
| 1215 * \param[out] pnal [optional] na of left sides | |
| 1216 * \param[out] pnat [optional] na of top sides | |
| 1217 * \param[out] pnar [optional] na of right sides | |
| 1218 * \param[out] pnab [optional] na of bottom sides | |
| 1219 * \param[out] ppixd pix of the output plot | |
| 1220 * \return 0 if OK, 1 on error | |
| 1221 * | |
| 1222 * <pre> | |
| 1223 * Notes: | |
| 1224 * (1) This debugging function shows the progression of the four | |
| 1225 * sides in the boxa. There must be at least 2 boxes. | |
| 1226 * (2) If there are invalid boxes (e.g., if only even or odd | |
| 1227 * indices have valid boxes), this will fill them with the | |
| 1228 * nearest valid box before plotting. | |
| 1229 * (3) The plotfiles are put in /tmp/lept/plots/, and are named | |
| 1230 * either with %plotname or, if NULL, a default name. If | |
| 1231 * %plotname is used, make sure it has no whitespace characters. | |
| 1232 * </pre> | |
| 1233 */ | |
| 1234 l_ok | |
| 1235 boxaPlotSides(BOXA *boxa, | |
| 1236 const char *plotname, | |
| 1237 NUMA **pnal, | |
| 1238 NUMA **pnat, | |
| 1239 NUMA **pnar, | |
| 1240 NUMA **pnab, | |
| 1241 PIX **ppixd) | |
| 1242 { | |
| 1243 char buf[128], titlebuf[128]; | |
| 1244 char *dataname; | |
| 1245 static l_int32 plotid = 0; | |
| 1246 l_int32 n, i, w, h, left, top, right, bot; | |
| 1247 l_int32 debugprint = FALSE; /* change to TRUE to spam stderr */ | |
| 1248 l_float32 med, dev; | |
| 1249 BOXA *boxat; | |
| 1250 GPLOT *gplot; | |
| 1251 NUMA *nal, *nat, *nar, *nab; | |
| 1252 | |
| 1253 if (pnal) *pnal = NULL; | |
| 1254 if (pnat) *pnat = NULL; | |
| 1255 if (pnar) *pnar = NULL; | |
| 1256 if (pnab) *pnab = NULL; | |
| 1257 if (ppixd) *ppixd = NULL; | |
| 1258 if (!boxa) | |
| 1259 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1260 if ((n = boxaGetCount(boxa)) < 2) | |
| 1261 return ERROR_INT("less than 2 boxes", __func__, 1); | |
| 1262 if (!ppixd) | |
| 1263 return ERROR_INT("&pixd not defined", __func__, 1); | |
| 1264 | |
| 1265 boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0); | |
| 1266 | |
| 1267 /* Build the numas for each side */ | |
| 1268 nal = numaCreate(n); | |
| 1269 nat = numaCreate(n); | |
| 1270 nar = numaCreate(n); | |
| 1271 nab = numaCreate(n); | |
| 1272 | |
| 1273 for (i = 0; i < n; i++) { | |
| 1274 boxaGetBoxGeometry(boxat, i, &left, &top, &w, &h); | |
| 1275 right = left + w - 1; | |
| 1276 bot = top + h - 1; | |
| 1277 numaAddNumber(nal, left); | |
| 1278 numaAddNumber(nat, top); | |
| 1279 numaAddNumber(nar, right); | |
| 1280 numaAddNumber(nab, bot); | |
| 1281 } | |
| 1282 boxaDestroy(&boxat); | |
| 1283 | |
| 1284 lept_mkdir("lept/plots"); | |
| 1285 if (plotname) { | |
| 1286 snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%s", plotname); | |
| 1287 snprintf(titlebuf, sizeof(titlebuf), "%s: Box sides vs. box index", | |
| 1288 plotname); | |
| 1289 } else { | |
| 1290 snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%d", plotid++); | |
| 1291 snprintf(titlebuf, sizeof(titlebuf), "Box sides vs. box index"); | |
| 1292 } | |
| 1293 gplot = gplotCreate(buf, GPLOT_PNG, titlebuf, | |
| 1294 "box index", "side location"); | |
| 1295 gplotAddPlot(gplot, NULL, nal, GPLOT_LINES, "left side"); | |
| 1296 gplotAddPlot(gplot, NULL, nat, GPLOT_LINES, "top side"); | |
| 1297 gplotAddPlot(gplot, NULL, nar, GPLOT_LINES, "right side"); | |
| 1298 gplotAddPlot(gplot, NULL, nab, GPLOT_LINES, "bottom side"); | |
| 1299 *ppixd = gplotMakeOutputPix(gplot); | |
| 1300 gplotDestroy(&gplot); | |
| 1301 | |
| 1302 if (debugprint) { | |
| 1303 dataname = (plotname) ? stringNew(plotname) : stringNew("no_name"); | |
| 1304 numaGetMedian(nal, &med); | |
| 1305 numaGetMeanDevFromMedian(nal, med, &dev); | |
| 1306 lept_stderr("%s left: med = %7.3f, meandev = %7.3f\n", | |
| 1307 dataname, med, dev); | |
| 1308 numaGetMedian(nat, &med); | |
| 1309 numaGetMeanDevFromMedian(nat, med, &dev); | |
| 1310 lept_stderr("%s top: med = %7.3f, meandev = %7.3f\n", | |
| 1311 dataname, med, dev); | |
| 1312 numaGetMedian(nar, &med); | |
| 1313 numaGetMeanDevFromMedian(nar, med, &dev); | |
| 1314 lept_stderr("%s right: med = %7.3f, meandev = %7.3f\n", | |
| 1315 dataname, med, dev); | |
| 1316 numaGetMedian(nab, &med); | |
| 1317 numaGetMeanDevFromMedian(nab, med, &dev); | |
| 1318 lept_stderr("%s bot: med = %7.3f, meandev = %7.3f\n", | |
| 1319 dataname, med, dev); | |
| 1320 LEPT_FREE(dataname); | |
| 1321 } | |
| 1322 | |
| 1323 if (pnal) | |
| 1324 *pnal = nal; | |
| 1325 else | |
| 1326 numaDestroy(&nal); | |
| 1327 if (pnat) | |
| 1328 *pnat = nat; | |
| 1329 else | |
| 1330 numaDestroy(&nat); | |
| 1331 if (pnar) | |
| 1332 *pnar = nar; | |
| 1333 else | |
| 1334 numaDestroy(&nar); | |
| 1335 if (pnab) | |
| 1336 *pnab = nab; | |
| 1337 else | |
| 1338 numaDestroy(&nab); | |
| 1339 return 0; | |
| 1340 } | |
| 1341 | |
| 1342 | |
| 1343 /*! | |
| 1344 * \brief boxaPlotSizes() | |
| 1345 * | |
| 1346 * \param[in] boxa source boxa | |
| 1347 * \param[in] plotname [optional], can be NULL | |
| 1348 * \param[out] pnaw [optional] na of widths | |
| 1349 * \param[out] pnah [optional] na of heights | |
| 1350 * \param[out] ppixd pix of the output plot | |
| 1351 * \return 0 if OK, 1 on error | |
| 1352 * | |
| 1353 * <pre> | |
| 1354 * Notes: | |
| 1355 * (1) This debugging function shows the progression of box width | |
| 1356 * and height in the boxa. There must be at least 2 boxes. | |
| 1357 * (2) If there are invalid boxes (e.g., if only even or odd | |
| 1358 * indices have valid boxes), this will fill them with the | |
| 1359 * nearest valid box before plotting. | |
| 1360 * (3) The plotfiles are put in /tmp/lept/plots/, and are named | |
| 1361 * either with %plotname or, if NULL, a default name. If | |
| 1362 * %plotname is used, make sure it has no whitespace characters. | |
| 1363 * </pre> | |
| 1364 */ | |
| 1365 l_ok | |
| 1366 boxaPlotSizes(BOXA *boxa, | |
| 1367 const char *plotname, | |
| 1368 NUMA **pnaw, | |
| 1369 NUMA **pnah, | |
| 1370 PIX **ppixd) | |
| 1371 { | |
| 1372 char buf[128], titlebuf[128]; | |
| 1373 static l_int32 plotid = 0; | |
| 1374 l_int32 n, i, w, h; | |
| 1375 BOXA *boxat; | |
| 1376 GPLOT *gplot; | |
| 1377 NUMA *naw, *nah; | |
| 1378 | |
| 1379 if (pnaw) *pnaw = NULL; | |
| 1380 if (pnah) *pnah = NULL; | |
| 1381 if (ppixd) *ppixd = NULL; | |
| 1382 if (!boxa) | |
| 1383 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1384 if ((n = boxaGetCount(boxa)) < 2) | |
| 1385 return ERROR_INT("less than 2 boxes", __func__, 1); | |
| 1386 if (!ppixd) | |
| 1387 return ERROR_INT("&pixd not defined", __func__, 1); | |
| 1388 | |
| 1389 boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0); | |
| 1390 | |
| 1391 /* Build the numas for the width and height */ | |
| 1392 naw = numaCreate(n); | |
| 1393 nah = numaCreate(n); | |
| 1394 for (i = 0; i < n; i++) { | |
| 1395 boxaGetBoxGeometry(boxat, i, NULL, NULL, &w, &h); | |
| 1396 numaAddNumber(naw, w); | |
| 1397 numaAddNumber(nah, h); | |
| 1398 } | |
| 1399 boxaDestroy(&boxat); | |
| 1400 | |
| 1401 lept_mkdir("lept/plots"); | |
| 1402 if (plotname) { | |
| 1403 snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%s", plotname); | |
| 1404 snprintf(titlebuf, sizeof(titlebuf), "%s: Box size vs. box index", | |
| 1405 plotname); | |
| 1406 } else { | |
| 1407 snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%d", plotid++); | |
| 1408 snprintf(titlebuf, sizeof(titlebuf), "Box size vs. box index"); | |
| 1409 } | |
| 1410 gplot = gplotCreate(buf, GPLOT_PNG, titlebuf, | |
| 1411 "box index", "box dimension"); | |
| 1412 gplotAddPlot(gplot, NULL, naw, GPLOT_LINES, "width"); | |
| 1413 gplotAddPlot(gplot, NULL, nah, GPLOT_LINES, "height"); | |
| 1414 *ppixd = gplotMakeOutputPix(gplot); | |
| 1415 gplotDestroy(&gplot); | |
| 1416 | |
| 1417 if (pnaw) | |
| 1418 *pnaw = naw; | |
| 1419 else | |
| 1420 numaDestroy(&naw); | |
| 1421 if (pnah) | |
| 1422 *pnah = nah; | |
| 1423 else | |
| 1424 numaDestroy(&nah); | |
| 1425 return 0; | |
| 1426 } | |
| 1427 | |
| 1428 | |
| 1429 /*! | |
| 1430 * \brief boxaFillSequence() | |
| 1431 * | |
| 1432 * \param[in] boxas with at least 3 boxes | |
| 1433 * \param[in] useflag L_USE_ALL_BOXES, L_USE_SAME_PARITY_BOXES | |
| 1434 * \param[in] debug 1 for debug output | |
| 1435 * \return boxad filled boxa, or NULL on error | |
| 1436 * | |
| 1437 * <pre> | |
| 1438 * Notes: | |
| 1439 * (1) This simple function replaces invalid boxes with a copy of | |
| 1440 * the nearest valid box, selected from either the entire | |
| 1441 * sequence (L_USE_ALL_BOXES) or from the boxes with the | |
| 1442 * same parity (L_USE_SAME_PARITY_BOXES). It returns a new boxa. | |
| 1443 * (2) This is useful if you expect boxes in the sequence to | |
| 1444 * vary slowly with index. | |
| 1445 * </pre> | |
| 1446 */ | |
| 1447 BOXA * | |
| 1448 boxaFillSequence(BOXA *boxas, | |
| 1449 l_int32 useflag, | |
| 1450 l_int32 debug) | |
| 1451 { | |
| 1452 l_int32 n, nv; | |
| 1453 BOXA *boxae, *boxao, *boxad; | |
| 1454 | |
| 1455 if (!boxas) | |
| 1456 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL); | |
| 1457 if (useflag != L_USE_ALL_BOXES && useflag != L_USE_SAME_PARITY_BOXES) | |
| 1458 return (BOXA *)ERROR_PTR("invalid useflag", __func__, NULL); | |
| 1459 | |
| 1460 n = boxaGetCount(boxas); | |
| 1461 nv = boxaGetValidCount(boxas); | |
| 1462 if (n == nv) | |
| 1463 return boxaCopy(boxas, L_COPY); /* all valid */ | |
| 1464 if (debug) | |
| 1465 L_INFO("%d valid boxes, %d invalid boxes\n", __func__, nv, n - nv); | |
| 1466 if (useflag == L_USE_SAME_PARITY_BOXES && n < 3) { | |
| 1467 L_WARNING("n < 3; some invalid\n", __func__); | |
| 1468 return boxaCopy(boxas, L_COPY); | |
| 1469 } | |
| 1470 | |
| 1471 if (useflag == L_USE_ALL_BOXES) { | |
| 1472 boxad = boxaCopy(boxas, L_COPY); | |
| 1473 boxaFillAll(boxad); | |
| 1474 } else { | |
| 1475 boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); | |
| 1476 boxaFillAll(boxae); | |
| 1477 boxaFillAll(boxao); | |
| 1478 boxad = boxaMergeEvenOdd(boxae, boxao, 0); | |
| 1479 boxaDestroy(&boxae); | |
| 1480 boxaDestroy(&boxao); | |
| 1481 } | |
| 1482 | |
| 1483 nv = boxaGetValidCount(boxad); | |
| 1484 if (n != nv) | |
| 1485 L_WARNING("there are still %d invalid boxes\n", __func__, n - nv); | |
| 1486 | |
| 1487 return boxad; | |
| 1488 } | |
| 1489 | |
| 1490 | |
| 1491 /*! | |
| 1492 * \brief boxaFillAll() | |
| 1493 * | |
| 1494 * \param[in] boxa | |
| 1495 * \return 0 if OK, 1 on error | |
| 1496 * | |
| 1497 * <pre> | |
| 1498 * Notes: | |
| 1499 * (1) This static function replaces every invalid box with the | |
| 1500 * nearest valid box. If there are no valid boxes, it | |
| 1501 * issues a warning. | |
| 1502 * </pre> | |
| 1503 */ | |
| 1504 static l_int32 | |
| 1505 boxaFillAll(BOXA *boxa) | |
| 1506 { | |
| 1507 l_int32 n, nv, i, j, spandown, spanup; | |
| 1508 l_int32 *indic; | |
| 1509 BOX *box, *boxt; | |
| 1510 | |
| 1511 if (!boxa) | |
| 1512 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1513 n = boxaGetCount(boxa); | |
| 1514 nv = boxaGetValidCount(boxa); | |
| 1515 if (n == nv) return 0; | |
| 1516 if (nv == 0) { | |
| 1517 L_WARNING("no valid boxes out of %d boxes\n", __func__, n); | |
| 1518 return 0; | |
| 1519 } | |
| 1520 | |
| 1521 /* Make indicator array for valid boxes */ | |
| 1522 if ((indic = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32))) == NULL) | |
| 1523 return ERROR_INT("indic not made", __func__, 1); | |
| 1524 for (i = 0; i < n; i++) { | |
| 1525 box = boxaGetValidBox(boxa, i, L_CLONE); | |
| 1526 if (box) | |
| 1527 indic[i] = 1; | |
| 1528 boxDestroy(&box); | |
| 1529 } | |
| 1530 | |
| 1531 /* Replace invalid boxes with the nearest valid one */ | |
| 1532 for (i = 0; i < n; i++) { | |
| 1533 box = boxaGetValidBox(boxa, i, L_CLONE); | |
| 1534 if (!box) { | |
| 1535 spandown = spanup = 10000000; | |
| 1536 for (j = i - 1; j >= 0; j--) { | |
| 1537 if (indic[j] == 1) { | |
| 1538 spandown = i - j; | |
| 1539 break; | |
| 1540 } | |
| 1541 } | |
| 1542 for (j = i + 1; j < n; j++) { | |
| 1543 if (indic[j] == 1) { | |
| 1544 spanup = j - i; | |
| 1545 break; | |
| 1546 } | |
| 1547 } | |
| 1548 if (spandown < spanup) | |
| 1549 boxt = boxaGetBox(boxa, i - spandown, L_COPY); | |
| 1550 else | |
| 1551 boxt = boxaGetBox(boxa, i + spanup, L_COPY); | |
| 1552 boxaReplaceBox(boxa, i, boxt); | |
| 1553 } | |
| 1554 boxDestroy(&box); | |
| 1555 } | |
| 1556 | |
| 1557 LEPT_FREE(indic); | |
| 1558 return 0; | |
| 1559 } | |
| 1560 | |
| 1561 | |
| 1562 /*! | |
| 1563 * \brief boxaSizeVariation() | |
| 1564 * | |
| 1565 * \param[in] boxa at least 4 boxes | |
| 1566 * \param[in] type L_SELECT_WIDTH, L_SELECT_HEIGHT | |
| 1567 * \param[out] pdel_evenodd [optional] average absolute value of | |
| 1568 * (even - odd) size pairs | |
| 1569 * \param[out] prms_even [optional] rms deviation of even boxes | |
| 1570 * \param[out] prms_odd [optional] rms deviation of odd boxes | |
| 1571 * \param[out] prms_all [optional] rms deviation of all boxes | |
| 1572 * \return 0 if OK, 1 on error | |
| 1573 * | |
| 1574 * <pre> | |
| 1575 * Notes: | |
| 1576 * (1) This gives several measures of the smoothness of either the | |
| 1577 * width or height of a sequence of boxes. | |
| 1578 * See boxaMedianDimensions() for some other measures. | |
| 1579 * (2) Statistics can be found separately for even and odd boxes. | |
| 1580 * Additionally, the average pair-wise difference between | |
| 1581 * adjacent even and odd boxes can be returned. | |
| 1582 * (3) The use case is bounding boxes for scanned page images, | |
| 1583 * where ideally the sizes should have little variance. | |
| 1584 * </pre> | |
| 1585 */ | |
| 1586 l_ok | |
| 1587 boxaSizeVariation(BOXA *boxa, | |
| 1588 l_int32 type, | |
| 1589 l_float32 *pdel_evenodd, | |
| 1590 l_float32 *prms_even, | |
| 1591 l_float32 *prms_odd, | |
| 1592 l_float32 *prms_all) | |
| 1593 { | |
| 1594 l_int32 n, ne, no, nmin, vale, valo, i; | |
| 1595 l_float32 sum; | |
| 1596 BOXA *boxae, *boxao; | |
| 1597 NUMA *nae, *nao, *na_all; | |
| 1598 | |
| 1599 if (pdel_evenodd) *pdel_evenodd = 0.0; | |
| 1600 if (prms_even) *prms_even = 0.0; | |
| 1601 if (prms_odd) *prms_odd = 0.0; | |
| 1602 if (prms_all) *prms_all = 0.0; | |
| 1603 if (!boxa) | |
| 1604 return ERROR_INT("boxa not defined", __func__, 1); | |
| 1605 if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT) | |
| 1606 return ERROR_INT("invalid type", __func__, 1); | |
| 1607 if (!pdel_evenodd && !prms_even && !prms_odd && !prms_all) | |
| 1608 return ERROR_INT("nothing to do", __func__, 1); | |
| 1609 n = boxaGetCount(boxa); | |
| 1610 if (n < 4) | |
| 1611 return ERROR_INT("too few boxes", __func__, 1); | |
| 1612 | |
| 1613 boxaSplitEvenOdd(boxa, 0, &boxae, &boxao); | |
| 1614 ne = boxaGetCount(boxae); | |
| 1615 no = boxaGetCount(boxao); | |
| 1616 nmin = L_MIN(ne, no); | |
| 1617 if (nmin == 0) { | |
| 1618 boxaDestroy(&boxae); | |
| 1619 boxaDestroy(&boxao); | |
| 1620 return ERROR_INT("either no even or no odd boxes", __func__, 1); | |
| 1621 } | |
| 1622 | |
| 1623 if (type == L_SELECT_WIDTH) { | |
| 1624 boxaGetSizes(boxae, &nae, NULL); | |
| 1625 boxaGetSizes(boxao, &nao, NULL); | |
| 1626 boxaGetSizes(boxa, &na_all, NULL); | |
| 1627 } else { /* L_SELECT_HEIGHT) */ | |
| 1628 boxaGetSizes(boxae, NULL, &nae); | |
| 1629 boxaGetSizes(boxao, NULL, &nao); | |
| 1630 boxaGetSizes(boxa, NULL, &na_all); | |
| 1631 } | |
| 1632 | |
| 1633 if (pdel_evenodd) { | |
| 1634 sum = 0.0; | |
| 1635 for (i = 0; i < nmin; i++) { | |
| 1636 numaGetIValue(nae, i, &vale); | |
| 1637 numaGetIValue(nao, i, &valo); | |
| 1638 sum += L_ABS(vale - valo); | |
| 1639 } | |
| 1640 *pdel_evenodd = sum / nmin; | |
| 1641 } | |
| 1642 if (prms_even) | |
| 1643 numaSimpleStats(nae, 0, -1, NULL, NULL, prms_even); | |
| 1644 if (prms_odd) | |
| 1645 numaSimpleStats(nao, 0, -1, NULL, NULL, prms_odd); | |
| 1646 if (prms_all) | |
| 1647 numaSimpleStats(na_all, 0, -1, NULL, NULL, prms_all); | |
| 1648 | |
| 1649 boxaDestroy(&boxae); | |
| 1650 boxaDestroy(&boxao); | |
| 1651 numaDestroy(&nae); | |
| 1652 numaDestroy(&nao); | |
| 1653 numaDestroy(&na_all); | |
| 1654 return 0; | |
| 1655 } | |
| 1656 | |
| 1657 | |
| 1658 /*! | |
| 1659 * \brief boxaMedianDimensions() | |
| 1660 * | |
| 1661 * \param[in] boxas containing at least 3 valid boxes in even and odd | |
| 1662 * \param[out] pmedw [optional] median width of all boxes | |
| 1663 * \param[out] pmedh [optional] median height of all boxes | |
| 1664 * \param[out] pmedwe [optional] median width of even boxes | |
| 1665 * \param[out] pmedwo [optional] median width of odd boxes | |
| 1666 * \param[out] pmedhe [optional] median height of even boxes | |
| 1667 * \param[out] pmedho [optional] median height of odd boxes | |
| 1668 * \param[out] pnadelw [optional] width diff of each box from median | |
| 1669 * \param[out] pnadelh [optional] height diff of each box from median | |
| 1670 * \return 0 if OK, 1 on error | |
| 1671 * | |
| 1672 * <pre> | |
| 1673 * Notes: | |
| 1674 * (1) This provides information that (1) allows identification of | |
| 1675 * boxes that have unusual (outlier) width or height, and (2) can | |
| 1676 * be used to regularize the sizes of the outlier boxes, assuming | |
| 1677 * that the boxes satisfy a fairly regular sequence and should | |
| 1678 * mostly have the same width and height. | |
| 1679 * (2) This finds the median width and height, as well as separate | |
| 1680 * median widths and heights of even and odd boxes. It also | |
| 1681 * generates arrays that give the difference in width and height | |
| 1682 * of each box from the median, which can be used to correct | |
| 1683 * individual boxes. | |
| 1684 * (3) All return values are optional. | |
| 1685 * </pre> | |
| 1686 */ | |
| 1687 l_ok | |
| 1688 boxaMedianDimensions(BOXA *boxas, | |
| 1689 l_int32 *pmedw, | |
| 1690 l_int32 *pmedh, | |
| 1691 l_int32 *pmedwe, | |
| 1692 l_int32 *pmedwo, | |
| 1693 l_int32 *pmedhe, | |
| 1694 l_int32 *pmedho, | |
| 1695 NUMA **pnadelw, | |
| 1696 NUMA **pnadelh) | |
| 1697 { | |
| 1698 l_int32 i, n, bw, bh, medw, medh, medwe, medwo, medhe, medho; | |
| 1699 BOXA *boxae, *boxao; | |
| 1700 NUMA *nadelw, *nadelh; | |
| 1701 | |
| 1702 if (pmedw) *pmedw = 0; | |
| 1703 if (pmedh) *pmedh = 0; | |
| 1704 if (pmedwe) *pmedwe= 0; | |
| 1705 if (pmedwo) *pmedwo= 0; | |
| 1706 if (pmedhe) *pmedhe= 0; | |
| 1707 if (pmedho) *pmedho= 0; | |
| 1708 if (pnadelw) *pnadelw = NULL; | |
| 1709 if (pnadelh) *pnadelh = NULL; | |
| 1710 if (!boxas) | |
| 1711 return ERROR_INT("boxas not defined", __func__, 1); | |
| 1712 if (boxaGetValidCount(boxas) < 6) | |
| 1713 return ERROR_INT("need at least 6 valid boxes", __func__, 1); | |
| 1714 | |
| 1715 /* Require at least 3 valid boxes of both types */ | |
| 1716 boxaSplitEvenOdd(boxas, 0, &boxae, &boxao); | |
| 1717 if (boxaGetValidCount(boxae) < 3 || boxaGetValidCount(boxao) < 3) { | |
| 1718 boxaDestroy(&boxae); | |
| 1719 boxaDestroy(&boxao); | |
| 1720 return ERROR_INT("don't have 3+ valid boxes of each type", __func__, 1); | |
| 1721 } | |
| 1722 | |
| 1723 /* Get the relevant median widths and heights */ | |
| 1724 boxaGetMedianVals(boxas, NULL, NULL, NULL, NULL, &medw, &medh); | |
| 1725 boxaGetMedianVals(boxae, NULL, NULL, NULL, NULL, &medwe, &medhe); | |
| 1726 boxaGetMedianVals(boxao, NULL, NULL, NULL, NULL, &medwo, &medho); | |
| 1727 if (pmedw) *pmedw = medw; | |
| 1728 if (pmedh) *pmedh = medh; | |
| 1729 if (pmedwe) *pmedwe = medwe; | |
| 1730 if (pmedwo) *pmedwo = medwo; | |
| 1731 if (pmedhe) *pmedhe = medhe; | |
| 1732 if (pmedho) *pmedho = medho; | |
| 1733 | |
| 1734 /* Find the variation from median dimension for each box */ | |
| 1735 n = boxaGetCount(boxas); | |
| 1736 nadelw = numaCreate(n); | |
| 1737 nadelh = numaCreate(n); | |
| 1738 for (i = 0; i < n; i++) { | |
| 1739 boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw, &bh); | |
| 1740 if (bw == 0 || bh == 0) { /* invalid box */ | |
| 1741 numaAddNumber(nadelw, 0); | |
| 1742 numaAddNumber(nadelh, 0); | |
| 1743 } else { | |
| 1744 numaAddNumber(nadelw, bw - medw); | |
| 1745 numaAddNumber(nadelh, bh - medh); | |
| 1746 } | |
| 1747 } | |
| 1748 if (pnadelw) | |
| 1749 *pnadelw = nadelw; | |
| 1750 else | |
| 1751 numaDestroy(&nadelw); | |
| 1752 if (pnadelh) | |
| 1753 *pnadelh = nadelh; | |
| 1754 else | |
| 1755 numaDestroy(&nadelh); | |
| 1756 | |
| 1757 boxaDestroy(&boxae); | |
| 1758 boxaDestroy(&boxao); | |
| 1759 return 0; | |
| 1760 } | |
| 1761 |
