comparison mupdf-source/thirdparty/leptonica/src/boxfunc5.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file boxfunc5.c
29 * <pre>
30 *
31 * Boxa sequence fitting
32 * BOXA *boxaSmoothSequenceMedian()
33 * BOXA *boxaWindowedMedian()
34 * BOXA *boxaModifyWithBoxa()
35 * BOXA *boxaReconcilePairWidth()
36 * l_int32 boxaSizeConsistency()
37 * BOXA *boxaReconcileAllByMedian()
38 * BOXA *boxaReconcileSidesByMedian()
39 * static void adjustSidePlotName() -- debug
40 * BOXA *boxaReconcileSizeByMedian()
41 * l_int32 boxaPlotSides() [for debugging]
42 * l_int32 boxaPlotSizes() [for debugging]
43 * BOXA *boxaFillSequence()
44 * static l_int32 boxaFillAll()
45 * l_int32 boxaSizeVariation()
46 * l_int32 boxaMedianDimensions()
47 * </pre>
48 */
49
50 #ifdef HAVE_CONFIG_H
51 #include <config_auto.h>
52 #endif /* HAVE_CONFIG_H */
53
54 #include <math.h>
55 #include "allheaders.h"
56
57 static l_int32 boxaFillAll(BOXA *boxa);
58 static void adjustSidePlotName(char *buf, size_t size, const char *preface,
59 l_int32 select);
60
61 /*---------------------------------------------------------------------*
62 * Boxa sequence fitting *
63 *---------------------------------------------------------------------*/
64 /*!
65 * \brief boxaSmoothSequenceMedian()
66 *
67 * \param[in] boxas source boxa
68 * \param[in] halfwin half-width of sliding window; used to find median
69 * \param[in] subflag L_USE_MINSIZE, L_USE_MAXSIZE,
70 * L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF,
71 * L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
72 * \param[in] maxdiff parameter used with L_SUB_ON_LOC_DIFF,
73 * L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN,
74 * L_USE_CAPPED_MAX
75 * \param[in] extrapixels pixels added on all sides (or subtracted
76 * if %extrapixels < 0) when using
77 * L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF
78 * \param[in] debug 1 for debug output
79 * \return boxad fitted boxa, or NULL on error
80 *
81 * <pre>
82 * Notes:
83 * (1) The target width of the sliding window is 2 * %halfwin + 1.
84 * If necessary, this will be reduced by boxaWindowedMedian().
85 * (2) This returns a modified version of %boxas by constructing
86 * for each input box a box that has been smoothed with windowed
87 * median filtering. The filtering is done to each of the
88 * box sides independently, and it is computed separately for
89 * sequences of even and odd boxes. The output %boxad is
90 * constructed from the input boxa and the filtered boxa,
91 * depending on %subflag. See boxaModifyWithBoxa() for
92 * details on the use of %subflag, %maxdiff and %extrapixels.
93 * (3) This is useful for removing noise separately in the even
94 * and odd sets, where the box edge locations can have
95 * discontinuities but otherwise vary roughly linearly within
96 * intervals of size %halfwin or larger.
97 * (4) If you don't need to handle even and odd sets separately,
98 * just do this:
99 * boxam = boxaWindowedMedian(boxas, halfwin, debug);
100 * boxad = boxaModifyWithBoxa(boxas, boxam, subflag, maxdiff,
101 * extrapixels);
102 * boxaDestroy(&boxam);
103 * </pre>
104 */
105 BOXA *
106 boxaSmoothSequenceMedian(BOXA *boxas,
107 l_int32 halfwin,
108 l_int32 subflag,
109 l_int32 maxdiff,
110 l_int32 extrapixels,
111 l_int32 debug)
112 {
113 l_int32 n;
114 BOXA *boxae, *boxao, *boxamede, *boxamedo, *boxame, *boxamo, *boxad;
115 PIX *pix1;
116
117 if (!boxas)
118 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
119 if (halfwin <= 0) {
120 L_WARNING("halfwin must be > 0; returning copy\n", __func__);
121 return boxaCopy(boxas, L_COPY);
122 }
123 if (maxdiff < 0) {
124 L_WARNING("maxdiff must be >= 0; returning copy\n", __func__);
125 return boxaCopy(boxas, L_COPY);
126 }
127 if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE &&
128 subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF &&
129 subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) {
130 L_WARNING("invalid subflag; returning copy\n", __func__);
131 return boxaCopy(boxas, L_COPY);
132 }
133 if ((n = boxaGetCount(boxas)) < 6) {
134 L_WARNING("need at least 6 boxes; returning copy\n", __func__);
135 return boxaCopy(boxas, L_COPY);
136 }
137
138 boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
139 if (debug) {
140 lept_mkdir("lept/smooth");
141 boxaWriteDebug("/tmp/lept/smooth/boxae.ba", boxae);
142 boxaWriteDebug("/tmp/lept/smooth/boxao.ba", boxao);
143 }
144
145 boxamede = boxaWindowedMedian(boxae, halfwin, debug);
146 boxamedo = boxaWindowedMedian(boxao, halfwin, debug);
147 if (debug) {
148 boxaWriteDebug("/tmp/lept/smooth/boxamede.ba", boxamede);
149 boxaWriteDebug("/tmp/lept/smooth/boxamedo.ba", boxamedo);
150 }
151
152 boxame = boxaModifyWithBoxa(boxae, boxamede, subflag, maxdiff, extrapixels);
153 boxamo = boxaModifyWithBoxa(boxao, boxamedo, subflag, maxdiff, extrapixels);
154 if (debug) {
155 boxaWriteDebug("/tmp/lept/smooth/boxame.ba", boxame);
156 boxaWriteDebug("/tmp/lept/smooth/boxamo.ba", boxamo);
157 }
158
159 boxad = boxaMergeEvenOdd(boxame, boxamo, 0);
160 if (debug) {
161 boxaPlotSides(boxas, NULL, NULL, NULL, NULL, NULL, &pix1);
162 pixWrite("/tmp/lept/smooth/plotsides1.png", pix1, IFF_PNG);
163 pixDestroy(&pix1);
164 boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1);
165 pixWrite("/tmp/lept/smooth/plotsides2.png", pix1, IFF_PNG);
166 pixDestroy(&pix1);
167 boxaPlotSizes(boxas, NULL, NULL, NULL, &pix1);
168 pixWrite("/tmp/lept/smooth/plotsizes1.png", pix1, IFF_PNG);
169 pixDestroy(&pix1);
170 boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1);
171 pixWrite("/tmp/lept/smooth/plotsizes2.png", pix1, IFF_PNG);
172 pixDestroy(&pix1);
173 }
174
175 boxaDestroy(&boxae);
176 boxaDestroy(&boxao);
177 boxaDestroy(&boxamede);
178 boxaDestroy(&boxamedo);
179 boxaDestroy(&boxame);
180 boxaDestroy(&boxamo);
181 return boxad;
182 }
183
184
185 /*!
186 * \brief boxaWindowedMedian()
187 *
188 * \param[in] boxas source boxa
189 * \param[in] halfwin half width of window over which the median is found
190 * \param[in] debug 1 for debug output
191 * \return boxad smoothed boxa, or NULL on error
192 *
193 * <pre>
194 * Notes:
195 * (1) This finds a set of boxes (boxad) where each edge of each box is
196 * a windowed median smoothed value to the edges of the
197 * input set of boxes (boxas).
198 * (2) Invalid input boxes are filled from nearby ones.
199 * (3) The returned boxad can then be used in boxaModifyWithBoxa()
200 * to selectively change the boxes in the source boxa.
201 * </pre>
202 */
203 BOXA *
204 boxaWindowedMedian(BOXA *boxas,
205 l_int32 halfwin,
206 l_int32 debug)
207 {
208 l_int32 n, i, left, top, right, bot;
209 BOX *box;
210 BOXA *boxaf, *boxad;
211 NUMA *nal, *nat, *nar, *nab, *naml, *namt, *namr, *namb;
212 PIX *pix1;
213
214 if (!boxas)
215 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
216 if ((n = boxaGetCount(boxas)) < 3) {
217 L_WARNING("less than 3 boxes; returning a copy\n", __func__);
218 return boxaCopy(boxas, L_COPY);
219 }
220 if (halfwin <= 0) {
221 L_WARNING("halfwin must be > 0; returning copy\n", __func__);
222 return boxaCopy(boxas, L_COPY);
223 }
224
225 /* Fill invalid boxes in the input sequence */
226 if ((boxaf = boxaFillSequence(boxas, L_USE_ALL_BOXES, debug)) == NULL)
227 return (BOXA *)ERROR_PTR("filled boxa not made", __func__, NULL);
228
229 /* Get the windowed median output from each of the sides */
230 boxaExtractAsNuma(boxaf, &nal, &nat, &nar, &nab, NULL, NULL, 0);
231 naml = numaWindowedMedian(nal, halfwin);
232 namt = numaWindowedMedian(nat, halfwin);
233 namr = numaWindowedMedian(nar, halfwin);
234 namb = numaWindowedMedian(nab, halfwin);
235
236 n = boxaGetCount(boxaf);
237 boxad = boxaCreate(n);
238 for (i = 0; i < n; i++) {
239 numaGetIValue(naml, i, &left);
240 numaGetIValue(namt, i, &top);
241 numaGetIValue(namr, i, &right);
242 numaGetIValue(namb, i, &bot);
243 box = boxCreate(left, top, right - left + 1, bot - top + 1);
244 boxaAddBox(boxad, box, L_INSERT);
245 }
246
247 if (debug) {
248 lept_mkdir("lept/windowed");
249 boxaPlotSides(boxaf, NULL, NULL, NULL, NULL, NULL, &pix1);
250 pixWrite("/tmp/lept/windowed/plotsides1.png", pix1, IFF_PNG);
251 pixDestroy(&pix1);
252 boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1);
253 pixWrite("/tmp/lept/windowed/plotsides2.png", pix1, IFF_PNG);
254 pixDestroy(&pix1);
255 boxaPlotSizes(boxaf, NULL, NULL, NULL, &pix1);
256 pixWrite("/tmp/lept/windowed/plotsizes1.png", pix1, IFF_PNG);
257 pixDestroy(&pix1);
258 boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1);
259 pixWrite("/tmp/lept/windowed/plotsizes2.png", pix1, IFF_PNG);
260 pixDestroy(&pix1);
261 }
262
263 boxaDestroy(&boxaf);
264 numaDestroy(&nal);
265 numaDestroy(&nat);
266 numaDestroy(&nar);
267 numaDestroy(&nab);
268 numaDestroy(&naml);
269 numaDestroy(&namt);
270 numaDestroy(&namr);
271 numaDestroy(&namb);
272 return boxad;
273 }
274
275
276 /*!
277 * \brief boxaModifyWithBoxa()
278 *
279 * \param[in] boxas
280 * \param[in] boxam boxa with boxes used to modify those in boxas
281 * \param[in] subflag L_USE_MINSIZE, L_USE_MAXSIZE,
282 * L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF,
283 * L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
284 * \param[in] maxdiff parameter used with L_SUB_ON_LOC_DIFF,
285 * L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN,
286 * L_USE_CAPPED_MAX
287 * \param[in] extrapixels pixels added on all sides (or subtracted
288 * if %extrapixels < 0) when using
289 * L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF
290 * \return boxad result after adjusting boxes in boxas, or NULL on error.
291 *
292 * <pre>
293 * Notes:
294 * (1) This takes two input boxa (boxas, boxam) and constructs boxad,
295 * where each box in boxad is generated from the corresponding
296 * boxes in boxas and boxam. The rule for constructing each
297 * output box depends on %subflag and %maxdiff. Let boxs be
298 * a box from %boxas and boxm be a box from %boxam.
299 * * If %subflag == L_USE_MINSIZE: the output box is the intersection
300 * of the two input boxes.
301 * * If %subflag == L_USE_MAXSIZE: the output box is the union of the
302 * two input boxes; i.e., the minimum bounding rectangle for the
303 * two input boxes.
304 * * If %subflag == L_SUB_ON_LOC_DIFF: each side of the output box
305 * is found separately from the corresponding side of boxs and boxm.
306 * Use the boxm side, expanded by %extrapixels, if greater than
307 * %maxdiff pixels from the boxs side.
308 * * If %subflag == L_SUB_ON_SIZE_DIFF: the sides of the output box
309 * are determined in pairs from the width and height of boxs
310 * and boxm. If the boxm width differs by more than %maxdiff
311 * pixels from boxs, use the boxm left and right sides,
312 * expanded by %extrapixels. Ditto for the height difference.
313 * For the last two flags, each side of the output box is found
314 * separately from the corresponding side of boxs and boxm,
315 * according to these rules, where "smaller"("bigger") mean in a
316 * direction that decreases(increases) the size of the output box:
317 * * If %subflag == L_USE_CAPPED_MIN: use the Min of boxm
318 * with the Max of (boxs, boxm +- %maxdiff), where the sign
319 * is adjusted to make the box smaller (e.g., use "+" on left side).
320 * * If %subflag == L_USE_CAPPED_MAX: use the Max of boxm
321 * with the Min of (boxs, boxm +- %maxdiff), where the sign
322 * is adjusted to make the box bigger (e.g., use "-" on left side).
323 * Use of the last 2 flags is further explained in (3) and (4).
324 * (2) boxas and boxam must be the same size. If boxam == NULL,
325 * this returns a copy of boxas with a warning.
326 * (3) If %subflag == L_SUB_ON_LOC_DIFF, use boxm for each side
327 * where the corresponding sides differ by more than %maxdiff.
328 * Two extreme cases:
329 * (a) set %maxdiff == 0 to use only values from boxam in boxad.
330 * (b) set %maxdiff == 10000 to ignore all values from boxam;
331 * then boxad will be the same as boxas.
332 * (4) If %subflag == L_USE_CAPPED_MAX: use boxm if boxs is smaller;
333 * use boxs if boxs is bigger than boxm by an amount up to %maxdiff;
334 * and use boxm +- %maxdiff (the 'capped' value) if boxs is
335 * bigger than boxm by an amount larger than %maxdiff.
336 * Similarly, with interchange of Min/Max and sign of %maxdiff,
337 * for %subflag == L_USE_CAPPED_MIN.
338 * (5) If either of corresponding boxes in boxas and boxam is invalid,
339 * an invalid box is copied to the result.
340 * (6) Typical input for boxam may be the output of boxaLinearFit().
341 * where outliers have been removed and each side is LS fit to a line.
342 * (7) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(),
343 * this uses two boxes and does not specify target dimensions.
344 * </pre>
345 */
346 BOXA *
347 boxaModifyWithBoxa(BOXA *boxas,
348 BOXA *boxam,
349 l_int32 subflag,
350 l_int32 maxdiff,
351 l_int32 extrapixels)
352 {
353 l_int32 n, i, ls, ts, rs, bs, ws, hs, lm, tm, rm, bm, wm, hm, ld, td, rd, bd;
354 BOX *boxs, *boxm, *boxd, *boxempty;
355 BOXA *boxad;
356
357 if (!boxas)
358 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
359 if (!boxam) {
360 L_WARNING("boxam not defined; returning copy", __func__);
361 return boxaCopy(boxas, L_COPY);
362 }
363 if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE &&
364 subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF &&
365 subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) {
366 L_WARNING("invalid subflag; returning copy", __func__);
367 return boxaCopy(boxas, L_COPY);
368 }
369 n = boxaGetCount(boxas);
370 if (n != boxaGetCount(boxam)) {
371 L_WARNING("boxas and boxam sizes differ; returning copy", __func__);
372 return boxaCopy(boxas, L_COPY);
373 }
374
375 boxad = boxaCreate(n);
376 boxempty = boxCreate(0, 0, 0, 0); /* placeholders */
377 for (i = 0; i < n; i++) {
378 boxs = boxaGetValidBox(boxas, i, L_CLONE);
379 boxm = boxaGetValidBox(boxam, i, L_CLONE);
380 if (!boxs || !boxm) {
381 boxaAddBox(boxad, boxempty, L_COPY);
382 } else {
383 boxGetGeometry(boxs, &ls, &ts, &ws, &hs);
384 boxGetGeometry(boxm, &lm, &tm, &wm, &hm);
385 rs = ls + ws - 1;
386 bs = ts + hs - 1;
387 rm = lm + wm - 1;
388 bm = tm + hm - 1;
389 if (subflag == L_USE_MINSIZE) {
390 ld = L_MAX(ls, lm);
391 rd = L_MIN(rs, rm);
392 td = L_MAX(ts, tm);
393 bd = L_MIN(bs, bm);
394 } else if (subflag == L_USE_MAXSIZE) {
395 ld = L_MIN(ls, lm);
396 rd = L_MAX(rs, rm);
397 td = L_MIN(ts, tm);
398 bd = L_MAX(bs, bm);
399 } else if (subflag == L_SUB_ON_LOC_DIFF) {
400 ld = (L_ABS(lm - ls) <= maxdiff) ? ls : lm - extrapixels;
401 td = (L_ABS(tm - ts) <= maxdiff) ? ts : tm - extrapixels;
402 rd = (L_ABS(rm - rs) <= maxdiff) ? rs : rm + extrapixels;
403 bd = (L_ABS(bm - bs) <= maxdiff) ? bs : bm + extrapixels;
404 } else if (subflag == L_SUB_ON_SIZE_DIFF) {
405 ld = (L_ABS(wm - ws) <= maxdiff) ? ls : lm - extrapixels;
406 td = (L_ABS(hm - hs) <= maxdiff) ? ts : tm - extrapixels;
407 rd = (L_ABS(wm - ws) <= maxdiff) ? rs : rm + extrapixels;
408 bd = (L_ABS(hm - hs) <= maxdiff) ? bs : bm + extrapixels;
409 } else if (subflag == L_USE_CAPPED_MIN) {
410 ld = L_MAX(lm, L_MIN(ls, lm + maxdiff));
411 td = L_MAX(tm, L_MIN(ts, tm + maxdiff));
412 rd = L_MIN(rm, L_MAX(rs, rm - maxdiff));
413 bd = L_MIN(bm, L_MAX(bs, bm - maxdiff));
414 } else { /* subflag == L_USE_CAPPED_MAX */
415 ld = L_MIN(lm, L_MAX(ls, lm - maxdiff));
416 td = L_MIN(tm, L_MAX(ts, tm - maxdiff));
417 rd = L_MAX(rm, L_MIN(rs, rm + maxdiff));
418 bd = L_MAX(bm, L_MIN(bs, bm + maxdiff));
419 }
420 boxd = boxCreate(ld, td, rd - ld + 1, bd - td + 1);
421 boxaAddBox(boxad, boxd, L_INSERT);
422 }
423 boxDestroy(&boxs);
424 boxDestroy(&boxm);
425 }
426 boxDestroy(&boxempty);
427
428 return boxad;
429 }
430
431
432 /*!
433 * \brief boxaReconcilePairWidth()
434 *
435 * \param[in] boxas
436 * \param[in] delw threshold on adjacent width difference
437 * \param[in] op L_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX
438 * \param[in] factor > 0.0, typically near 1.0
439 * \param[in] na [optional] indicator array allowing change
440 * \return boxad adjusted, or a copy of boxas on error
441 *
442 * <pre>
443 * Notes:
444 * (1) This reconciles differences in the width of adjacent boxes,
445 * by moving one side of one of the boxes in each pair.
446 * If the widths in the pair differ by more than some
447 * threshold, move either the left side for even boxes or
448 * the right side for odd boxes, depending on if we're choosing
449 * the min or max. If choosing min, the width of the max is
450 * set to factor * (width of min). If choosing max, the width
451 * of the min is set to factor * (width of max).
452 * (2) If %na exists, it is an indicator array corresponding to the
453 * boxes in %boxas. If %na != NULL, only boxes with an
454 * indicator value of 1 are allowed to adjust; otherwise,
455 * all boxes can adjust.
456 * (3) Typical input might be the output of boxaSmoothSequenceMedian(),
457 * where even and odd boxa have been independently regulated.
458 * </pre>
459 */
460 BOXA *
461 boxaReconcilePairWidth(BOXA *boxas,
462 l_int32 delw,
463 l_int32 op,
464 l_float32 factor,
465 NUMA *na)
466 {
467 l_int32 i, ne, no, nmin, xe, we, xo, wo, inde, indo, x, w;
468 BOX *boxe, *boxo;
469 BOXA *boxae, *boxao, *boxad;
470
471 if (!boxas)
472 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
473 if (factor <= 0.0) {
474 L_WARNING("invalid factor; setting to 1.0\n", __func__);
475 factor = 1.0;
476 }
477
478 /* Taking the boxes in pairs, if the difference in width reaches
479 * the threshold %delw, adjust the left or right side of one
480 * of the pair. */
481 boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
482 ne = boxaGetCount(boxae);
483 no = boxaGetCount(boxao);
484 nmin = L_MIN(ne, no);
485 for (i = 0; i < nmin; i++) {
486 /* Set indicator values */
487 if (na) {
488 numaGetIValue(na, 2 * i, &inde);
489 numaGetIValue(na, 2 * i + 1, &indo);
490 } else {
491 inde = indo = 1;
492 }
493 if (inde == 0 && indo == 0) continue;
494
495 boxe = boxaGetBox(boxae, i, L_CLONE);
496 boxo = boxaGetBox(boxao, i, L_CLONE);
497 boxGetGeometry(boxe, &xe, NULL, &we, NULL);
498 boxGetGeometry(boxo, &xo, NULL, &wo, NULL);
499 if (we == 0 || wo == 0) { /* if either is invalid; skip */
500 boxDestroy(&boxe);
501 boxDestroy(&boxo);
502 continue;
503 } else if (L_ABS(we - wo) > delw) {
504 if (op == L_ADJUST_CHOOSE_MIN) {
505 if (we > wo && inde == 1) {
506 /* move left side of even to the right */
507 w = factor * wo;
508 x = xe + (we - w);
509 boxSetGeometry(boxe, x, -1, w, -1);
510 } else if (we < wo && indo == 1) {
511 /* move right side of odd to the left */
512 w = factor * we;
513 boxSetGeometry(boxo, -1, -1, w, -1);
514 }
515 } else { /* maximize width */
516 if (we < wo && inde == 1) {
517 /* move left side of even to the left */
518 w = factor * wo;
519 x = L_MAX(0, xe + (we - w));
520 w = we + (xe - x); /* covers both cases for the max */
521 boxSetGeometry(boxe, x, -1, w, -1);
522 } else if (we > wo && indo == 1) {
523 /* move right side of odd to the right */
524 w = factor * we;
525 boxSetGeometry(boxo, -1, -1, w, -1);
526 }
527 }
528 }
529 boxDestroy(&boxe);
530 boxDestroy(&boxo);
531 }
532
533 boxad = boxaMergeEvenOdd(boxae, boxao, 0);
534 boxaDestroy(&boxae);
535 boxaDestroy(&boxao);
536 return boxad;
537 }
538
539
540 /*!
541 * \brief boxaSizeConsistency()
542 *
543 * \param[in] boxas of size >= 10
544 * \param[in] type L_CHECK_WIDTH, L_CHECK_HEIGHT
545 * \param[in] threshp threshold for pairwise fractional variation
546 * \param[in] threshm threshold for fractional variation from median
547 * \param[out] pfvarp [optional] average fractional pairwise variation
548 * \param[out] pfvarm [optional] average fractional median variation
549 * \param[out] psame decision for uniformity of page size (1, 0, -1)
550 *
551 * <pre>
552 * Notes:
553 * (1) This evaluates a boxa for particular types of dimensional
554 * variation. Select either width or height variation. Then
555 * it returns two numbers: one is based on pairwise (even/odd)
556 * variation; the other is based on the average variation
557 * from the boxa median.
558 * (2) For the pairwise variation, get the fraction of the absolute
559 * difference in dimension of each pair of boxes, and take
560 * the average value. The median variation is simply the
561 * the average of the fractional deviation from the median
562 * of all the boxes.
563 * (3) Use 0 for default values of %threshp and %threshm. They are
564 * threshp: 0.02
565 * threshm: 0.015
566 * (4) The intended application is that the boxes are a sequence of
567 * page regions in a book scan, and we calculate two numbers
568 * that can give an indication if the pages are approximately
569 * the same size. The pairwise variation should be small if
570 * the boxes are correctly calculated. If there are a
571 * significant number of random or systematic outliers, the
572 * pairwise variation will be large, and no decision will be made
573 * (i.e., return same == -1). Here are the possible outcomes:
574 * Pairwise Var Median Var Decision
575 * ------------ ---------- --------
576 * small small same size (1)
577 * small large different size (0)
578 * large small/large unknown (-1)
579 * </pre>
580 */
581 l_ok
582 boxaSizeConsistency(BOXA *boxas,
583 l_int32 type,
584 l_float32 threshp,
585 l_float32 threshm,
586 l_float32 *pfvarp,
587 l_float32 *pfvarm,
588 l_int32 *psame)
589 {
590 l_int32 i, n, bw1, bh1, bw2, bh2, npairs;
591 l_float32 ave, fdiff, sumdiff, med, fvarp, fvarm;
592 NUMA *na1;
593
594 if (pfvarp) *pfvarp = 0.0;
595 if (pfvarm) *pfvarm = 0.0;
596 if (!psame)
597 return ERROR_INT("&same not defined", __func__, 1);
598 *psame = -1;
599 if (!boxas)
600 return ERROR_INT("boxas not defined", __func__, 1);
601 if (boxaGetValidCount(boxas) < 6)
602 return ERROR_INT("need a least 6 valid boxes", __func__, 1);
603 if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT)
604 return ERROR_INT("invalid type", __func__, 1);
605 if (threshp < 0.0 || threshp >= 0.5)
606 return ERROR_INT("invalid threshp", __func__, 1);
607 if (threshm < 0.0 || threshm >= 0.5)
608 return ERROR_INT("invalid threshm", __func__, 1);
609 if (threshp == 0.0) threshp = 0.02f;
610 if (threshm == 0.0) threshm = 0.015f;
611
612 /* Evaluate pairwise variation */
613 n = boxaGetCount(boxas);
614 na1 = numaCreate(0);
615 for (i = 0, npairs = 0, sumdiff = 0; i < n - 1; i += 2) {
616 boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw1, &bh1);
617 boxaGetBoxGeometry(boxas, i + 1, NULL, NULL, &bw2, &bh2);
618 if (bw1 == 0 || bh1 == 0 || bw2 == 0 || bh2 == 0)
619 continue;
620 npairs++;
621 if (type == L_CHECK_WIDTH) {
622 ave = (bw1 + bw2) / 2.0;
623 fdiff = L_ABS(bw1 - bw2) / ave;
624 numaAddNumber(na1, bw1);
625 numaAddNumber(na1, bw2);
626 } else { /* type == L_CHECK_HEIGHT) */
627 ave = (bh1 + bh2) / 2.0;
628 fdiff = L_ABS(bh1 - bh2) / ave;
629 numaAddNumber(na1, bh1);
630 numaAddNumber(na1, bh2);
631 }
632 sumdiff += fdiff;
633 }
634 fvarp = sumdiff / npairs;
635 if (pfvarp) *pfvarp = fvarp;
636
637 /* Evaluate the average abs fractional deviation from the median */
638 numaGetMedian(na1, &med);
639 if (med == 0.0) {
640 L_WARNING("median value is 0\n", __func__);
641 } else {
642 numaGetMeanDevFromMedian(na1, med, &fvarm);
643 fvarm /= med;
644 if (pfvarm) *pfvarm = fvarm;
645 }
646 numaDestroy(&na1);
647
648 /* Make decision */
649 if (fvarp < threshp && fvarm < threshm)
650 *psame = 1;
651 else if (fvarp < threshp && fvarm > threshm)
652 *psame = 0;
653 else
654 *psame = -1; /* unknown */
655 return 0;
656 }
657
658
659 /*!
660 * \brief boxaReconcileAllByMedian()
661 *
662 * \param[in] boxas containing at least 6 valid boxes
663 * \param[in] select1 L_ADJUST_LEFT_AND_RIGHT or L_ADJUST_SKIP
664 * \param[in] select2 L_ADJUST_TOP_AND_BOT or L_ADJUST_SKIP
665 * \param[in] thresh threshold number of pixels to make adjustment
666 * \param[in] extra extra pixels to add beyond median value
667 * \param[in] pixadb use NULL to skip debug output
668 * \return boxad possibly adjusted from boxas; a copy of boxas on error
669 *
670 * <pre>
671 * Notes:
672 * (1) This uses boxaReconcileSidesByMedian() to reconcile
673 * the left-and-right and/or top-and-bottom sides of the
674 * even and odd boxes, separately.
675 * (2) See boxaReconcileSidesByMedian() for use of %thresh and %extra.
676 * (3) If all box sides are within %thresh of the median value,
677 * the returned box will be identical to %boxas.
678 * </pre>
679 */
680 BOXA *
681 boxaReconcileAllByMedian(BOXA *boxas,
682 l_int32 select1,
683 l_int32 select2,
684 l_int32 thresh,
685 l_int32 extra,
686 PIXA *pixadb)
687 {
688 l_int32 ncols;
689 BOXA *boxa1e, *boxa1o, *boxa2e, *boxa2o, *boxa3e, *boxa3o, *boxad;
690 PIX *pix1;
691
692 if (!boxas)
693 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
694 if (select1 != L_ADJUST_LEFT_AND_RIGHT && select1 != L_ADJUST_SKIP) {
695 L_WARNING("invalid select1; returning copy\n", __func__);
696 return boxaCopy(boxas, L_COPY);
697 }
698 if (select2 != L_ADJUST_TOP_AND_BOT && select2 != L_ADJUST_SKIP) {
699 L_WARNING("invalid select2; returning copy\n", __func__);
700 return boxaCopy(boxas, L_COPY);
701 }
702 if (thresh < 0) {
703 L_WARNING("thresh must be >= 0; returning copy\n", __func__);
704 return boxaCopy(boxas, L_COPY);
705 }
706 if (boxaGetValidCount(boxas) < 3) {
707 L_WARNING("need at least 3 valid boxes; returning copy\n", __func__);
708 return boxaCopy(boxas, L_COPY);
709 }
710
711 /* Adjust even and odd box sides separately */
712 boxaSplitEvenOdd(boxas, 0, &boxa1e, &boxa1o);
713 ncols = 1;
714 if (select1 == L_ADJUST_LEFT_AND_RIGHT) {
715 ncols += 2;
716 boxa2e = boxaReconcileSidesByMedian(boxa1e, select1, thresh,
717 extra, pixadb);
718 } else {
719 boxa2e = boxaCopy(boxa1e, L_COPY);
720 }
721 if (select2 == L_ADJUST_TOP_AND_BOT) {
722 ncols += 2;
723 boxa3e = boxaReconcileSidesByMedian(boxa2e, select2, thresh,
724 extra, pixadb);
725 } else {
726 boxa3e = boxaCopy(boxa2e, L_COPY);
727 }
728 if (select1 == L_ADJUST_LEFT_AND_RIGHT)
729 boxa2o = boxaReconcileSidesByMedian(boxa1o, select1, thresh,
730 extra, pixadb);
731 else
732 boxa2o = boxaCopy(boxa1o, L_COPY);
733 if (select2 == L_ADJUST_TOP_AND_BOT)
734 boxa3o = boxaReconcileSidesByMedian(boxa2o, select2, thresh,
735 extra, pixadb);
736 else
737 boxa3o = boxaCopy(boxa2o, L_COPY);
738 boxad = boxaMergeEvenOdd(boxa3e, boxa3o, 0);
739
740 /* This generates 2 sets of 3 or 5 plots in a row, depending
741 * on whether select1 and select2 are true (not skipping).
742 * The top row is for even boxes; the bottom row is for odd boxes. */
743 if (pixadb) {
744 lept_mkdir("lept/boxa");
745 pix1 = pixaDisplayTiledInColumns(pixadb, ncols, 1.0, 30, 2);
746 pixWrite("/tmp/lept/boxa/recon_sides.png", pix1, IFF_PNG);
747 pixDestroy(&pix1);
748 }
749
750 boxaDestroy(&boxa1e);
751 boxaDestroy(&boxa1o);
752 boxaDestroy(&boxa2e);
753 boxaDestroy(&boxa2o);
754 boxaDestroy(&boxa3e);
755 boxaDestroy(&boxa3o);
756 return boxad;
757 }
758
759
760 /*!
761 * \brief boxaReconcileSidesByMedian()
762 *
763 * \param[in] boxas containing at least 3 valid boxes
764 * \param[in] select L_ADJUST_LEFT, L_ADJUST_RIGHT, etc.
765 * \param[in] thresh threshold number of pixels to make adjustment
766 * \param[in] extra extra pixels to add beyond median value
767 * \param[in] pixadb use NULL to skip debug output
768 * \return boxad possibly adjusted from boxas; a copy of boxas on error
769 *
770 * <pre>
771 * Notes:
772 * (1) This modifies individual box sides if their location differs
773 * significantly (>= %thresh) from the median value.
774 * (2) %select specifies which sides are to be checked.
775 * (3) %thresh specifies the tolerance for different side locations.
776 * Any box side that differs from the median by this much will
777 * be set to the median value, plus the %extra amount.
778 * (4) If %extra is positive, the box dimensions are expanded.
779 * For example, for the left side, a positive %extra results in
780 * moving the left side farther to the left (i.e., in a negative
781 * direction).
782 * (5) If all box sides are within %thresh - 1 of the median value,
783 * the returned box will be identical to %boxas.
784 * (6) N.B. If you expect that even and odd box sides should be
785 * significantly different, this function must be called separately
786 * on the even and odd boxes in %boxas. Note also that the
787 * higher level function boxaReconcileAllByMedian() handles the
788 * even and odd box sides separately.
789 * </pre>
790 */
791 BOXA *
792 boxaReconcileSidesByMedian(BOXA *boxas,
793 l_int32 select,
794 l_int32 thresh,
795 l_int32 extra,
796 PIXA *pixadb)
797 {
798 char buf[128];
799 l_int32 i, n, diff;
800 l_int32 left, right, top, bot, medleft, medright, medtop, medbot;
801 BOX *box;
802 BOXA *boxa1, *boxad;
803 PIX *pix;
804
805 if (!boxas)
806 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
807 if (select != L_ADJUST_LEFT && select != L_ADJUST_RIGHT &&
808 select != L_ADJUST_TOP && select != L_ADJUST_BOT &&
809 select != L_ADJUST_LEFT_AND_RIGHT && select != L_ADJUST_TOP_AND_BOT) {
810 L_WARNING("invalid select; returning copy\n", __func__);
811 return boxaCopy(boxas, L_COPY);
812 }
813 if (thresh < 0) {
814 L_WARNING("thresh must be >= 0; returning copy\n", __func__);
815 return boxaCopy(boxas, L_COPY);
816 }
817 if (boxaGetValidCount(boxas) < 3) {
818 L_WARNING("need at least 3 valid boxes; returning copy\n", __func__);
819 return boxaCopy(boxas, L_COPY);
820 }
821
822 if (select == L_ADJUST_LEFT_AND_RIGHT) {
823 boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_LEFT, thresh, extra,
824 pixadb);
825 boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_RIGHT, thresh, extra,
826 pixadb);
827 boxaDestroy(&boxa1);
828 return boxad;
829 }
830 if (select == L_ADJUST_TOP_AND_BOT) {
831 boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_TOP, thresh, extra,
832 pixadb);
833 boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_BOT, thresh, extra,
834 pixadb);
835 boxaDestroy(&boxa1);
836 return boxad;
837 }
838
839 if (pixadb) {
840 l_int32 ndb = pixaGetCount(pixadb);
841 if (ndb == 0 || ndb == 5) { /* first of even and odd box sets */
842 adjustSidePlotName(buf, sizeof(buf), "init", select);
843 boxaPlotSides(boxas, buf, NULL, NULL, NULL, NULL, &pix);
844 pixaAddPix(pixadb, pix, L_INSERT);
845 }
846 }
847
848 n = boxaGetCount(boxas);
849 boxad = boxaCreate(n);
850 if (select == L_ADJUST_LEFT) {
851 boxaGetMedianVals(boxas, &medleft, NULL, NULL, NULL, NULL, NULL);
852 for (i = 0; i < n; i++) {
853 box = boxaGetBox(boxas, i, L_COPY);
854 boxGetSideLocations(box, &left, NULL, NULL, NULL);
855 diff = medleft - left;
856 if (L_ABS(diff) >= thresh)
857 boxAdjustSides(box, box, diff - extra, 0, 0, 0);
858 boxaAddBox(boxad, box, L_INSERT);
859 }
860 } else if (select == L_ADJUST_RIGHT) {
861 boxaGetMedianVals(boxas, NULL, NULL, &medright, NULL, NULL, NULL);
862 for (i = 0; i < n; i++) {
863 box = boxaGetBox(boxas, i, L_COPY);
864 boxGetSideLocations(box, NULL, &right, NULL, NULL);
865 diff = medright - right;
866 if (L_ABS(diff) >= thresh)
867 boxAdjustSides(box, box, 0, diff + extra, 0, 0);
868 boxaAddBox(boxad, box, L_INSERT);
869 }
870 } else if (select == L_ADJUST_TOP) {
871 boxaGetMedianVals(boxas, NULL, &medtop, NULL, NULL, NULL, NULL);
872 for (i = 0; i < n; i++) {
873 box = boxaGetBox(boxas, i, L_COPY);
874 boxGetSideLocations(box, NULL, NULL, &top, NULL);
875 diff = medtop - top;
876 if (L_ABS(diff) >= thresh)
877 boxAdjustSides(box, box, 0, 0, diff - extra, 0);
878 boxaAddBox(boxad, box, L_INSERT);
879 }
880 } else { /* select == L_ADJUST_BOT */
881 boxaGetMedianVals(boxas, NULL, NULL, NULL, &medbot, NULL, NULL);
882 for (i = 0; i < n; i++) {
883 box = boxaGetBox(boxas, i, L_COPY);
884 boxGetSideLocations(box, NULL, NULL, NULL, &bot);
885 diff = medbot - bot;
886 if (L_ABS(diff) >= thresh)
887 boxAdjustSides(box, box, 0, 0, 0, diff + extra);
888 boxaAddBox(boxad, box, L_INSERT);
889 }
890 }
891
892 if (pixadb) {
893 adjustSidePlotName(buf, sizeof(buf), "final", select);
894 boxaPlotSides(boxad, buf, NULL, NULL, NULL, NULL, &pix);
895 pixaAddPix(pixadb, pix, L_INSERT);
896 }
897 return boxad;
898 }
899
900
901 static void
902 adjustSidePlotName(char *buf,
903 size_t size,
904 const char *preface,
905 l_int32 select)
906 {
907 stringCopy(buf, preface, size - 8);
908 if (select == L_ADJUST_LEFT)
909 stringCat(buf, size, "-left");
910 else if (select == L_ADJUST_RIGHT)
911 stringCat(buf, size, "-right");
912 else if (select == L_ADJUST_TOP)
913 stringCat(buf, size, "-top");
914 else if (select == L_ADJUST_BOT)
915 stringCat(buf, size, "-bot");
916 }
917
918
919 /*!
920 * \brief boxaReconcileSizeByMedian()
921 *
922 * \param[in] boxas containing at least 6 valid boxes
923 * \param[in] type L_CHECK_WIDTH, L_CHECK_HEIGHT, L_CHECK_BOTH
924 * \param[in] dfract threshold fraction of dimensional variation from
925 * median; in range (0 ... 1); typ. about 0.05.
926 * \param[in] sfract threshold fraction of side variation from median;
927 * in range (0 ... 1); typ. about 0.04.
928 * \param[in] factor expansion for fixed box beyond median width;
929 * should be near 1.0.
930 * \param[out] pnadelw [optional] diff from median width for boxes
931 * above threshold
932 * \param[out] pnadelh [optional] diff from median height for boxes
933 * above threshold
934 * \param[out] pratiowh [optional] ratio of median width/height of boxas
935 * \return boxad possibly adjusted from boxas; a copy of boxas on error
936 *
937 * <pre>
938 * Notes:
939 * (1) The basic idea is to identify significant differences in box
940 * dimension (either width or height) and modify the outlier boxes.
941 * (2) %type specifies if we are reconciling the width, height or both.
942 * (3) %dfract specifies the tolerance for different dimensions. Any
943 * box with a fractional difference from the median size that
944 * exceeds %dfract will be altered.
945 * (4) %sfract specifies the tolerance for different side locations.
946 * If a box has been marked by (3) for alteration, any side
947 * location that differs from the median side location by
948 * more than %sfract of the median dimension (medw or medh)
949 * will be moved.
950 * (5) Median width and height are found for all valid boxes (i.e.,
951 * for all boxes with width and height > 0.
952 * Median side locations are found separately for even and odd boxes,
953 * using only boxes that are "inliers"; i.e., that have been
954 * found by (3) to be within tolerance for width or height.
955 * (6) If all box dimensions are within threshold of the median size,
956 * just return a copy. Otherwise, box sides of the outliers
957 * will be adjusted.
958 * (7) Using %sfract, sides that are sufficiently far from the median
959 * are first moved to the median value. Then they are moved
960 * together (in or out) so that the final box dimension
961 * is %factor times the median dimension.
962 * (8) The arrays that are the initial deviation from median size
963 * (width and height) are optionally returned. Also optionally
964 * returned is the median w/h asperity ratio of the input %boxas.
965 * </pre>
966 */
967 BOXA *
968 boxaReconcileSizeByMedian(BOXA *boxas,
969 l_int32 type,
970 l_float32 dfract,
971 l_float32 sfract,
972 l_float32 factor,
973 NUMA **pnadelw,
974 NUMA **pnadelh,
975 l_float32 *pratiowh)
976 {
977 l_int32 i, n, ne, no, outfound, isvalid, ind, del, maxdel;
978 l_int32 medw, medh, bw, bh, left, right, top, bot;
979 l_int32 medleft, medlefte, medlefto, medright, medrighte, medrighto;
980 l_int32 medtop, medtope, medtopo, medbot, medbote, medboto;
981 l_float32 brat;
982 BOX *box;
983 BOXA *boxa1, *boxae, *boxao, *boxad;
984 NUMA *naind, *nadelw, *nadelh;
985
986 if (pnadelw) *pnadelw = NULL;
987 if (pnadelh) *pnadelh = NULL;
988 if (pratiowh) *pratiowh = 0.0;
989 if (!boxas)
990 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
991 if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT &&
992 type != L_CHECK_BOTH) {
993 L_WARNING("invalid type; returning copy\n", __func__);
994 return boxaCopy(boxas, L_COPY);
995 }
996 if (dfract <= 0.0 || dfract >= 0.5) {
997 L_WARNING("invalid dimensional fract; returning copy\n", __func__);
998 return boxaCopy(boxas, L_COPY);
999 }
1000 if (sfract <= 0.0 || sfract >= 0.5) {
1001 L_WARNING("invalid side fract; returning copy\n", __func__);
1002 return boxaCopy(boxas, L_COPY);
1003 }
1004 if (factor < 0.8 || factor > 1.25)
1005 L_WARNING("factor %5.3f is typ. closer to 1.0\n", __func__, factor);
1006 if (boxaGetValidCount(boxas) < 6) {
1007 L_WARNING("need at least 6 valid boxes; returning copy\n", __func__);
1008 return boxaCopy(boxas, L_COPY);
1009 }
1010
1011 /* If reconciling both width and height, optionally return array of
1012 * median deviations and even/odd ratio for width measurements */
1013 if (type == L_CHECK_BOTH) {
1014 boxa1 = boxaReconcileSizeByMedian(boxas, L_CHECK_WIDTH, dfract, sfract,
1015 factor, pnadelw, NULL, pratiowh);
1016 boxad = boxaReconcileSizeByMedian(boxa1, L_CHECK_HEIGHT, dfract, sfract,
1017 factor, NULL, pnadelh, NULL);
1018 boxaDestroy(&boxa1);
1019 return boxad;
1020 }
1021
1022 n = boxaGetCount(boxas);
1023 naind = numaCreate(n); /* outlier indicator array */
1024 boxae = boxaCreate(0); /* even inliers */
1025 boxao = boxaCreate(0); /* odd inliers */
1026 outfound = FALSE;
1027 if (type == L_CHECK_WIDTH) {
1028 boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL,
1029 &nadelw, NULL);
1030 if (pratiowh) {
1031 *pratiowh = (l_float32)medw / (l_float32)medh;
1032 L_INFO("median ratio w/h = %5.3f\n", __func__, *pratiowh);
1033 }
1034 if (pnadelw)
1035 *pnadelw = nadelw;
1036 else
1037 numaDestroy(&nadelw);
1038
1039 /* Check for outliers; assemble inliers */
1040 for (i = 0; i < n; i++) {
1041 if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) {
1042 numaAddNumber(naind, 0);
1043 continue;
1044 }
1045 boxGetGeometry(box, NULL, NULL, &bw, NULL);
1046 brat = (l_float32)bw / (l_float32)medw;
1047 if (brat < 1.0 - dfract || brat > 1.0 + dfract) {
1048 outfound = TRUE;
1049 numaAddNumber(naind, 1);
1050 boxDestroy(&box);
1051 } else { /* add to inliers */
1052 numaAddNumber(naind, 0);
1053 if (i % 2 == 0)
1054 boxaAddBox(boxae, box, L_INSERT);
1055 else
1056 boxaAddBox(boxao, box, L_INSERT);
1057 }
1058 }
1059 if (!outfound) { /* nothing to do */
1060 numaDestroy(&naind);
1061 boxaDestroy(&boxae);
1062 boxaDestroy(&boxao);
1063 L_INFO("no width outlier boxes found\n", __func__);
1064 return boxaCopy(boxas, L_COPY);
1065 }
1066
1067 /* Get left/right parameters from inliers. Handle the case
1068 * where there are no inliers for one of the sets. For example,
1069 * when all the even boxes have a different dimension from
1070 * the odd boxes, and the median arbitrarily gets assigned
1071 * to the even boxes, there are no odd inliers; in that case,
1072 * use the even inliers sides to decide whether to adjust
1073 * the left or the right sides of individual outliers. */
1074 L_INFO("fixing width of outlier boxes\n", __func__);
1075 medlefte = medrighte = medlefto = medrighto = 0;
1076 if ((ne = boxaGetValidCount(boxae)) > 0)
1077 boxaGetMedianVals(boxae, &medlefte, NULL, &medrighte, NULL,
1078 NULL, NULL);
1079 if ((no = boxaGetValidCount(boxao)) > 0)
1080 boxaGetMedianVals(boxao, &medlefto, NULL, &medrighto, NULL,
1081 NULL, NULL);
1082 if (ne == 0) { /* use odd inliers values for both */
1083 medlefte = medlefto;
1084 medrighte = medrighto;
1085 } else if (no == 0) { /* use even inliers values for both */
1086 medlefto = medlefte;
1087 medrighto = medrighte;
1088 }
1089
1090 /* Adjust the left and/or right sides of outliers.
1091 * For each box that is a dimensional outlier, consider each side.
1092 * Any side that differs fractionally from the median value
1093 * by more than %sfract times the median width (medw) is set to
1094 * the median value for that side. Then both sides are moved
1095 * an equal distance in or out to make w = %factor * medw. */
1096 boxad = boxaCreate(n);
1097 maxdel = (l_int32)(sfract * medw + 0.5);
1098 for (i = 0; i < n; i++) {
1099 box = boxaGetBox(boxas, i, L_COPY);
1100 boxIsValid(box, &isvalid);
1101 numaGetIValue(naind, i, &ind);
1102 medleft = (i % 2 == 0) ? medlefte : medlefto;
1103 medright = (i % 2 == 0) ? medrighte : medrighto;
1104 if (ind == 1 && isvalid) { /* adjust sides */
1105 boxGetSideLocations(box, &left, &right, NULL, NULL);
1106 if (L_ABS(left - medleft) > maxdel) left = medleft;
1107 if (L_ABS(right - medright) > maxdel) right = medright;
1108 del = (l_int32)(factor * medw - (right - left)) / 2;
1109 boxSetSide(box, L_SET_LEFT, left - del, 0);
1110 boxSetSide(box, L_SET_RIGHT, right + del, 0);
1111 }
1112 boxaAddBox(boxad, box, L_INSERT);
1113 }
1114 } else { /* L_CHECK_HEIGHT */
1115 boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL,
1116 NULL, &nadelh);
1117 if (pratiowh) {
1118 *pratiowh = (l_float32)medw / (l_float32)medh;
1119 L_INFO("median ratio w/h = %5.3f\n", __func__, *pratiowh);
1120 }
1121 if (pnadelh)
1122 *pnadelh = nadelh;
1123 else
1124 numaDestroy(&nadelh);
1125
1126 /* Check for outliers; assemble inliers */
1127 for (i = 0; i < n; i++) {
1128 if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) {
1129 numaAddNumber(naind, 0);
1130 continue;
1131 }
1132 boxGetGeometry(box, NULL, NULL, NULL, &bh);
1133 brat = (l_float32)bh / (l_float32)medh;
1134 if (brat < 1.0 - dfract || brat > 1.0 + dfract) {
1135 outfound = TRUE;
1136 numaAddNumber(naind, 1);
1137 boxDestroy(&box);
1138 } else { /* add to inliers */
1139 numaAddNumber(naind, 0);
1140 if (i % 2 == 0)
1141 boxaAddBox(boxae, box, L_INSERT);
1142 else
1143 boxaAddBox(boxao, box, L_INSERT);
1144 }
1145 }
1146 if (!outfound) { /* nothing to do */
1147 numaDestroy(&naind);
1148 boxaDestroy(&boxae);
1149 boxaDestroy(&boxao);
1150 L_INFO("no height outlier boxes found\n", __func__);
1151 return boxaCopy(boxas, L_COPY);
1152 }
1153
1154 /* Get top/bot parameters from inliers. Handle the case
1155 * where there are no inliers for one of the sets. For example,
1156 * when all the even boxes have a different dimension from
1157 * the odd boxes, and the median arbitrarily gets assigned
1158 * to the even boxes, there are no odd inliers; in that case,
1159 * use the even inlier sides to decide whether to adjust
1160 * the top or the bottom sides of individual outliers. */
1161 L_INFO("fixing height of outlier boxes\n", __func__);
1162 medlefte = medtope = medbote = medtopo = medboto = 0;
1163 if ((ne = boxaGetValidCount(boxae)) > 0)
1164 boxaGetMedianVals(boxae, NULL, &medtope, NULL, &medbote,
1165 NULL, NULL);
1166 if ((no = boxaGetValidCount(boxao)) > 0)
1167 boxaGetMedianVals(boxao, NULL, &medtopo, NULL, &medboto,
1168 NULL, NULL);
1169 if (ne == 0) { /* use odd inliers values for both */
1170 medtope = medtopo;
1171 medbote = medboto;
1172 } else if (no == 0) { /* use even inliers values for both */
1173 medtopo = medtope;
1174 medboto = medbote;
1175 }
1176
1177 /* Adjust the top and/or bottom sides of outliers.
1178 * For each box that is a dimensional outlier, consider each side.
1179 * Any side that differs fractionally from the median value
1180 * by more than %sfract times the median height (medh) is
1181 * set to the median value for that that side. Then both
1182 * sides are moved an equal distance in or out to make
1183 * h = %factor * medh). */
1184 boxad = boxaCreate(n);
1185 maxdel = (l_int32)(sfract * medh + 0.5);
1186 for (i = 0; i < n; i++) {
1187 box = boxaGetBox(boxas, i, L_COPY);
1188 boxIsValid(box, &isvalid);
1189 numaGetIValue(naind, i, &ind);
1190 medtop = (i % 2 == 0) ? medtope : medtopo;
1191 medbot = (i % 2 == 0) ? medbote : medboto;
1192 if (ind == 1 && isvalid) { /* adjust sides */
1193 boxGetSideLocations(box, NULL, NULL, &top, &bot);
1194 if (L_ABS(top - medtop) > maxdel) top = medtop;
1195 if (L_ABS(bot - medbot) > maxdel) bot = medbot;
1196 del = (l_int32)(factor * medh - (bot - top)) / 2; /* typ > 0 */
1197 boxSetSide(box, L_SET_TOP, L_MAX(0, top - del), 0);
1198 boxSetSide(box, L_SET_BOT, bot + del, 0);
1199 }
1200 boxaAddBox(boxad, box, L_INSERT);
1201 }
1202 }
1203 numaDestroy(&naind);
1204 boxaDestroy(&boxae);
1205 boxaDestroy(&boxao);
1206 return boxad;
1207 }
1208
1209
1210 /*!
1211 * \brief boxaPlotSides()
1212 *
1213 * \param[in] boxa source boxa
1214 * \param[in] plotname [optional], can be NULL
1215 * \param[out] pnal [optional] na of left sides
1216 * \param[out] pnat [optional] na of top sides
1217 * \param[out] pnar [optional] na of right sides
1218 * \param[out] pnab [optional] na of bottom sides
1219 * \param[out] ppixd pix of the output plot
1220 * \return 0 if OK, 1 on error
1221 *
1222 * <pre>
1223 * Notes:
1224 * (1) This debugging function shows the progression of the four
1225 * sides in the boxa. There must be at least 2 boxes.
1226 * (2) If there are invalid boxes (e.g., if only even or odd
1227 * indices have valid boxes), this will fill them with the
1228 * nearest valid box before plotting.
1229 * (3) The plotfiles are put in /tmp/lept/plots/, and are named
1230 * either with %plotname or, if NULL, a default name. If
1231 * %plotname is used, make sure it has no whitespace characters.
1232 * </pre>
1233 */
1234 l_ok
1235 boxaPlotSides(BOXA *boxa,
1236 const char *plotname,
1237 NUMA **pnal,
1238 NUMA **pnat,
1239 NUMA **pnar,
1240 NUMA **pnab,
1241 PIX **ppixd)
1242 {
1243 char buf[128], titlebuf[128];
1244 char *dataname;
1245 static l_int32 plotid = 0;
1246 l_int32 n, i, w, h, left, top, right, bot;
1247 l_int32 debugprint = FALSE; /* change to TRUE to spam stderr */
1248 l_float32 med, dev;
1249 BOXA *boxat;
1250 GPLOT *gplot;
1251 NUMA *nal, *nat, *nar, *nab;
1252
1253 if (pnal) *pnal = NULL;
1254 if (pnat) *pnat = NULL;
1255 if (pnar) *pnar = NULL;
1256 if (pnab) *pnab = NULL;
1257 if (ppixd) *ppixd = NULL;
1258 if (!boxa)
1259 return ERROR_INT("boxa not defined", __func__, 1);
1260 if ((n = boxaGetCount(boxa)) < 2)
1261 return ERROR_INT("less than 2 boxes", __func__, 1);
1262 if (!ppixd)
1263 return ERROR_INT("&pixd not defined", __func__, 1);
1264
1265 boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0);
1266
1267 /* Build the numas for each side */
1268 nal = numaCreate(n);
1269 nat = numaCreate(n);
1270 nar = numaCreate(n);
1271 nab = numaCreate(n);
1272
1273 for (i = 0; i < n; i++) {
1274 boxaGetBoxGeometry(boxat, i, &left, &top, &w, &h);
1275 right = left + w - 1;
1276 bot = top + h - 1;
1277 numaAddNumber(nal, left);
1278 numaAddNumber(nat, top);
1279 numaAddNumber(nar, right);
1280 numaAddNumber(nab, bot);
1281 }
1282 boxaDestroy(&boxat);
1283
1284 lept_mkdir("lept/plots");
1285 if (plotname) {
1286 snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%s", plotname);
1287 snprintf(titlebuf, sizeof(titlebuf), "%s: Box sides vs. box index",
1288 plotname);
1289 } else {
1290 snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%d", plotid++);
1291 snprintf(titlebuf, sizeof(titlebuf), "Box sides vs. box index");
1292 }
1293 gplot = gplotCreate(buf, GPLOT_PNG, titlebuf,
1294 "box index", "side location");
1295 gplotAddPlot(gplot, NULL, nal, GPLOT_LINES, "left side");
1296 gplotAddPlot(gplot, NULL, nat, GPLOT_LINES, "top side");
1297 gplotAddPlot(gplot, NULL, nar, GPLOT_LINES, "right side");
1298 gplotAddPlot(gplot, NULL, nab, GPLOT_LINES, "bottom side");
1299 *ppixd = gplotMakeOutputPix(gplot);
1300 gplotDestroy(&gplot);
1301
1302 if (debugprint) {
1303 dataname = (plotname) ? stringNew(plotname) : stringNew("no_name");
1304 numaGetMedian(nal, &med);
1305 numaGetMeanDevFromMedian(nal, med, &dev);
1306 lept_stderr("%s left: med = %7.3f, meandev = %7.3f\n",
1307 dataname, med, dev);
1308 numaGetMedian(nat, &med);
1309 numaGetMeanDevFromMedian(nat, med, &dev);
1310 lept_stderr("%s top: med = %7.3f, meandev = %7.3f\n",
1311 dataname, med, dev);
1312 numaGetMedian(nar, &med);
1313 numaGetMeanDevFromMedian(nar, med, &dev);
1314 lept_stderr("%s right: med = %7.3f, meandev = %7.3f\n",
1315 dataname, med, dev);
1316 numaGetMedian(nab, &med);
1317 numaGetMeanDevFromMedian(nab, med, &dev);
1318 lept_stderr("%s bot: med = %7.3f, meandev = %7.3f\n",
1319 dataname, med, dev);
1320 LEPT_FREE(dataname);
1321 }
1322
1323 if (pnal)
1324 *pnal = nal;
1325 else
1326 numaDestroy(&nal);
1327 if (pnat)
1328 *pnat = nat;
1329 else
1330 numaDestroy(&nat);
1331 if (pnar)
1332 *pnar = nar;
1333 else
1334 numaDestroy(&nar);
1335 if (pnab)
1336 *pnab = nab;
1337 else
1338 numaDestroy(&nab);
1339 return 0;
1340 }
1341
1342
1343 /*!
1344 * \brief boxaPlotSizes()
1345 *
1346 * \param[in] boxa source boxa
1347 * \param[in] plotname [optional], can be NULL
1348 * \param[out] pnaw [optional] na of widths
1349 * \param[out] pnah [optional] na of heights
1350 * \param[out] ppixd pix of the output plot
1351 * \return 0 if OK, 1 on error
1352 *
1353 * <pre>
1354 * Notes:
1355 * (1) This debugging function shows the progression of box width
1356 * and height in the boxa. There must be at least 2 boxes.
1357 * (2) If there are invalid boxes (e.g., if only even or odd
1358 * indices have valid boxes), this will fill them with the
1359 * nearest valid box before plotting.
1360 * (3) The plotfiles are put in /tmp/lept/plots/, and are named
1361 * either with %plotname or, if NULL, a default name. If
1362 * %plotname is used, make sure it has no whitespace characters.
1363 * </pre>
1364 */
1365 l_ok
1366 boxaPlotSizes(BOXA *boxa,
1367 const char *plotname,
1368 NUMA **pnaw,
1369 NUMA **pnah,
1370 PIX **ppixd)
1371 {
1372 char buf[128], titlebuf[128];
1373 static l_int32 plotid = 0;
1374 l_int32 n, i, w, h;
1375 BOXA *boxat;
1376 GPLOT *gplot;
1377 NUMA *naw, *nah;
1378
1379 if (pnaw) *pnaw = NULL;
1380 if (pnah) *pnah = NULL;
1381 if (ppixd) *ppixd = NULL;
1382 if (!boxa)
1383 return ERROR_INT("boxa not defined", __func__, 1);
1384 if ((n = boxaGetCount(boxa)) < 2)
1385 return ERROR_INT("less than 2 boxes", __func__, 1);
1386 if (!ppixd)
1387 return ERROR_INT("&pixd not defined", __func__, 1);
1388
1389 boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0);
1390
1391 /* Build the numas for the width and height */
1392 naw = numaCreate(n);
1393 nah = numaCreate(n);
1394 for (i = 0; i < n; i++) {
1395 boxaGetBoxGeometry(boxat, i, NULL, NULL, &w, &h);
1396 numaAddNumber(naw, w);
1397 numaAddNumber(nah, h);
1398 }
1399 boxaDestroy(&boxat);
1400
1401 lept_mkdir("lept/plots");
1402 if (plotname) {
1403 snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%s", plotname);
1404 snprintf(titlebuf, sizeof(titlebuf), "%s: Box size vs. box index",
1405 plotname);
1406 } else {
1407 snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%d", plotid++);
1408 snprintf(titlebuf, sizeof(titlebuf), "Box size vs. box index");
1409 }
1410 gplot = gplotCreate(buf, GPLOT_PNG, titlebuf,
1411 "box index", "box dimension");
1412 gplotAddPlot(gplot, NULL, naw, GPLOT_LINES, "width");
1413 gplotAddPlot(gplot, NULL, nah, GPLOT_LINES, "height");
1414 *ppixd = gplotMakeOutputPix(gplot);
1415 gplotDestroy(&gplot);
1416
1417 if (pnaw)
1418 *pnaw = naw;
1419 else
1420 numaDestroy(&naw);
1421 if (pnah)
1422 *pnah = nah;
1423 else
1424 numaDestroy(&nah);
1425 return 0;
1426 }
1427
1428
1429 /*!
1430 * \brief boxaFillSequence()
1431 *
1432 * \param[in] boxas with at least 3 boxes
1433 * \param[in] useflag L_USE_ALL_BOXES, L_USE_SAME_PARITY_BOXES
1434 * \param[in] debug 1 for debug output
1435 * \return boxad filled boxa, or NULL on error
1436 *
1437 * <pre>
1438 * Notes:
1439 * (1) This simple function replaces invalid boxes with a copy of
1440 * the nearest valid box, selected from either the entire
1441 * sequence (L_USE_ALL_BOXES) or from the boxes with the
1442 * same parity (L_USE_SAME_PARITY_BOXES). It returns a new boxa.
1443 * (2) This is useful if you expect boxes in the sequence to
1444 * vary slowly with index.
1445 * </pre>
1446 */
1447 BOXA *
1448 boxaFillSequence(BOXA *boxas,
1449 l_int32 useflag,
1450 l_int32 debug)
1451 {
1452 l_int32 n, nv;
1453 BOXA *boxae, *boxao, *boxad;
1454
1455 if (!boxas)
1456 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
1457 if (useflag != L_USE_ALL_BOXES && useflag != L_USE_SAME_PARITY_BOXES)
1458 return (BOXA *)ERROR_PTR("invalid useflag", __func__, NULL);
1459
1460 n = boxaGetCount(boxas);
1461 nv = boxaGetValidCount(boxas);
1462 if (n == nv)
1463 return boxaCopy(boxas, L_COPY); /* all valid */
1464 if (debug)
1465 L_INFO("%d valid boxes, %d invalid boxes\n", __func__, nv, n - nv);
1466 if (useflag == L_USE_SAME_PARITY_BOXES && n < 3) {
1467 L_WARNING("n < 3; some invalid\n", __func__);
1468 return boxaCopy(boxas, L_COPY);
1469 }
1470
1471 if (useflag == L_USE_ALL_BOXES) {
1472 boxad = boxaCopy(boxas, L_COPY);
1473 boxaFillAll(boxad);
1474 } else {
1475 boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
1476 boxaFillAll(boxae);
1477 boxaFillAll(boxao);
1478 boxad = boxaMergeEvenOdd(boxae, boxao, 0);
1479 boxaDestroy(&boxae);
1480 boxaDestroy(&boxao);
1481 }
1482
1483 nv = boxaGetValidCount(boxad);
1484 if (n != nv)
1485 L_WARNING("there are still %d invalid boxes\n", __func__, n - nv);
1486
1487 return boxad;
1488 }
1489
1490
1491 /*!
1492 * \brief boxaFillAll()
1493 *
1494 * \param[in] boxa
1495 * \return 0 if OK, 1 on error
1496 *
1497 * <pre>
1498 * Notes:
1499 * (1) This static function replaces every invalid box with the
1500 * nearest valid box. If there are no valid boxes, it
1501 * issues a warning.
1502 * </pre>
1503 */
1504 static l_int32
1505 boxaFillAll(BOXA *boxa)
1506 {
1507 l_int32 n, nv, i, j, spandown, spanup;
1508 l_int32 *indic;
1509 BOX *box, *boxt;
1510
1511 if (!boxa)
1512 return ERROR_INT("boxa not defined", __func__, 1);
1513 n = boxaGetCount(boxa);
1514 nv = boxaGetValidCount(boxa);
1515 if (n == nv) return 0;
1516 if (nv == 0) {
1517 L_WARNING("no valid boxes out of %d boxes\n", __func__, n);
1518 return 0;
1519 }
1520
1521 /* Make indicator array for valid boxes */
1522 if ((indic = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32))) == NULL)
1523 return ERROR_INT("indic not made", __func__, 1);
1524 for (i = 0; i < n; i++) {
1525 box = boxaGetValidBox(boxa, i, L_CLONE);
1526 if (box)
1527 indic[i] = 1;
1528 boxDestroy(&box);
1529 }
1530
1531 /* Replace invalid boxes with the nearest valid one */
1532 for (i = 0; i < n; i++) {
1533 box = boxaGetValidBox(boxa, i, L_CLONE);
1534 if (!box) {
1535 spandown = spanup = 10000000;
1536 for (j = i - 1; j >= 0; j--) {
1537 if (indic[j] == 1) {
1538 spandown = i - j;
1539 break;
1540 }
1541 }
1542 for (j = i + 1; j < n; j++) {
1543 if (indic[j] == 1) {
1544 spanup = j - i;
1545 break;
1546 }
1547 }
1548 if (spandown < spanup)
1549 boxt = boxaGetBox(boxa, i - spandown, L_COPY);
1550 else
1551 boxt = boxaGetBox(boxa, i + spanup, L_COPY);
1552 boxaReplaceBox(boxa, i, boxt);
1553 }
1554 boxDestroy(&box);
1555 }
1556
1557 LEPT_FREE(indic);
1558 return 0;
1559 }
1560
1561
1562 /*!
1563 * \brief boxaSizeVariation()
1564 *
1565 * \param[in] boxa at least 4 boxes
1566 * \param[in] type L_SELECT_WIDTH, L_SELECT_HEIGHT
1567 * \param[out] pdel_evenodd [optional] average absolute value of
1568 * (even - odd) size pairs
1569 * \param[out] prms_even [optional] rms deviation of even boxes
1570 * \param[out] prms_odd [optional] rms deviation of odd boxes
1571 * \param[out] prms_all [optional] rms deviation of all boxes
1572 * \return 0 if OK, 1 on error
1573 *
1574 * <pre>
1575 * Notes:
1576 * (1) This gives several measures of the smoothness of either the
1577 * width or height of a sequence of boxes.
1578 * See boxaMedianDimensions() for some other measures.
1579 * (2) Statistics can be found separately for even and odd boxes.
1580 * Additionally, the average pair-wise difference between
1581 * adjacent even and odd boxes can be returned.
1582 * (3) The use case is bounding boxes for scanned page images,
1583 * where ideally the sizes should have little variance.
1584 * </pre>
1585 */
1586 l_ok
1587 boxaSizeVariation(BOXA *boxa,
1588 l_int32 type,
1589 l_float32 *pdel_evenodd,
1590 l_float32 *prms_even,
1591 l_float32 *prms_odd,
1592 l_float32 *prms_all)
1593 {
1594 l_int32 n, ne, no, nmin, vale, valo, i;
1595 l_float32 sum;
1596 BOXA *boxae, *boxao;
1597 NUMA *nae, *nao, *na_all;
1598
1599 if (pdel_evenodd) *pdel_evenodd = 0.0;
1600 if (prms_even) *prms_even = 0.0;
1601 if (prms_odd) *prms_odd = 0.0;
1602 if (prms_all) *prms_all = 0.0;
1603 if (!boxa)
1604 return ERROR_INT("boxa not defined", __func__, 1);
1605 if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT)
1606 return ERROR_INT("invalid type", __func__, 1);
1607 if (!pdel_evenodd && !prms_even && !prms_odd && !prms_all)
1608 return ERROR_INT("nothing to do", __func__, 1);
1609 n = boxaGetCount(boxa);
1610 if (n < 4)
1611 return ERROR_INT("too few boxes", __func__, 1);
1612
1613 boxaSplitEvenOdd(boxa, 0, &boxae, &boxao);
1614 ne = boxaGetCount(boxae);
1615 no = boxaGetCount(boxao);
1616 nmin = L_MIN(ne, no);
1617 if (nmin == 0) {
1618 boxaDestroy(&boxae);
1619 boxaDestroy(&boxao);
1620 return ERROR_INT("either no even or no odd boxes", __func__, 1);
1621 }
1622
1623 if (type == L_SELECT_WIDTH) {
1624 boxaGetSizes(boxae, &nae, NULL);
1625 boxaGetSizes(boxao, &nao, NULL);
1626 boxaGetSizes(boxa, &na_all, NULL);
1627 } else { /* L_SELECT_HEIGHT) */
1628 boxaGetSizes(boxae, NULL, &nae);
1629 boxaGetSizes(boxao, NULL, &nao);
1630 boxaGetSizes(boxa, NULL, &na_all);
1631 }
1632
1633 if (pdel_evenodd) {
1634 sum = 0.0;
1635 for (i = 0; i < nmin; i++) {
1636 numaGetIValue(nae, i, &vale);
1637 numaGetIValue(nao, i, &valo);
1638 sum += L_ABS(vale - valo);
1639 }
1640 *pdel_evenodd = sum / nmin;
1641 }
1642 if (prms_even)
1643 numaSimpleStats(nae, 0, -1, NULL, NULL, prms_even);
1644 if (prms_odd)
1645 numaSimpleStats(nao, 0, -1, NULL, NULL, prms_odd);
1646 if (prms_all)
1647 numaSimpleStats(na_all, 0, -1, NULL, NULL, prms_all);
1648
1649 boxaDestroy(&boxae);
1650 boxaDestroy(&boxao);
1651 numaDestroy(&nae);
1652 numaDestroy(&nao);
1653 numaDestroy(&na_all);
1654 return 0;
1655 }
1656
1657
1658 /*!
1659 * \brief boxaMedianDimensions()
1660 *
1661 * \param[in] boxas containing at least 3 valid boxes in even and odd
1662 * \param[out] pmedw [optional] median width of all boxes
1663 * \param[out] pmedh [optional] median height of all boxes
1664 * \param[out] pmedwe [optional] median width of even boxes
1665 * \param[out] pmedwo [optional] median width of odd boxes
1666 * \param[out] pmedhe [optional] median height of even boxes
1667 * \param[out] pmedho [optional] median height of odd boxes
1668 * \param[out] pnadelw [optional] width diff of each box from median
1669 * \param[out] pnadelh [optional] height diff of each box from median
1670 * \return 0 if OK, 1 on error
1671 *
1672 * <pre>
1673 * Notes:
1674 * (1) This provides information that (1) allows identification of
1675 * boxes that have unusual (outlier) width or height, and (2) can
1676 * be used to regularize the sizes of the outlier boxes, assuming
1677 * that the boxes satisfy a fairly regular sequence and should
1678 * mostly have the same width and height.
1679 * (2) This finds the median width and height, as well as separate
1680 * median widths and heights of even and odd boxes. It also
1681 * generates arrays that give the difference in width and height
1682 * of each box from the median, which can be used to correct
1683 * individual boxes.
1684 * (3) All return values are optional.
1685 * </pre>
1686 */
1687 l_ok
1688 boxaMedianDimensions(BOXA *boxas,
1689 l_int32 *pmedw,
1690 l_int32 *pmedh,
1691 l_int32 *pmedwe,
1692 l_int32 *pmedwo,
1693 l_int32 *pmedhe,
1694 l_int32 *pmedho,
1695 NUMA **pnadelw,
1696 NUMA **pnadelh)
1697 {
1698 l_int32 i, n, bw, bh, medw, medh, medwe, medwo, medhe, medho;
1699 BOXA *boxae, *boxao;
1700 NUMA *nadelw, *nadelh;
1701
1702 if (pmedw) *pmedw = 0;
1703 if (pmedh) *pmedh = 0;
1704 if (pmedwe) *pmedwe= 0;
1705 if (pmedwo) *pmedwo= 0;
1706 if (pmedhe) *pmedhe= 0;
1707 if (pmedho) *pmedho= 0;
1708 if (pnadelw) *pnadelw = NULL;
1709 if (pnadelh) *pnadelh = NULL;
1710 if (!boxas)
1711 return ERROR_INT("boxas not defined", __func__, 1);
1712 if (boxaGetValidCount(boxas) < 6)
1713 return ERROR_INT("need at least 6 valid boxes", __func__, 1);
1714
1715 /* Require at least 3 valid boxes of both types */
1716 boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
1717 if (boxaGetValidCount(boxae) < 3 || boxaGetValidCount(boxao) < 3) {
1718 boxaDestroy(&boxae);
1719 boxaDestroy(&boxao);
1720 return ERROR_INT("don't have 3+ valid boxes of each type", __func__, 1);
1721 }
1722
1723 /* Get the relevant median widths and heights */
1724 boxaGetMedianVals(boxas, NULL, NULL, NULL, NULL, &medw, &medh);
1725 boxaGetMedianVals(boxae, NULL, NULL, NULL, NULL, &medwe, &medhe);
1726 boxaGetMedianVals(boxao, NULL, NULL, NULL, NULL, &medwo, &medho);
1727 if (pmedw) *pmedw = medw;
1728 if (pmedh) *pmedh = medh;
1729 if (pmedwe) *pmedwe = medwe;
1730 if (pmedwo) *pmedwo = medwo;
1731 if (pmedhe) *pmedhe = medhe;
1732 if (pmedho) *pmedho = medho;
1733
1734 /* Find the variation from median dimension for each box */
1735 n = boxaGetCount(boxas);
1736 nadelw = numaCreate(n);
1737 nadelh = numaCreate(n);
1738 for (i = 0; i < n; i++) {
1739 boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw, &bh);
1740 if (bw == 0 || bh == 0) { /* invalid box */
1741 numaAddNumber(nadelw, 0);
1742 numaAddNumber(nadelh, 0);
1743 } else {
1744 numaAddNumber(nadelw, bw - medw);
1745 numaAddNumber(nadelh, bh - medh);
1746 }
1747 }
1748 if (pnadelw)
1749 *pnadelw = nadelw;
1750 else
1751 numaDestroy(&nadelw);
1752 if (pnadelh)
1753 *pnadelh = nadelh;
1754 else
1755 numaDestroy(&nadelh);
1756
1757 boxaDestroy(&boxae);
1758 boxaDestroy(&boxao);
1759 return 0;
1760 }
1761