comparison mupdf-source/thirdparty/leptonica/src/boxfunc2.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file boxfunc2.c
29 * <pre>
30 *
31 * Boxa/Box transform (shift, scale) and orthogonal rotation
32 * BOXA *boxaTransform()
33 * BOX *boxTransform()
34 * BOXA *boxaTransformOrdered()
35 * BOX *boxTransformOrdered()
36 * BOXA *boxaRotateOrth()
37 * BOX *boxRotateOrth()
38 * BOXA *boxaShiftWithPta()
39 *
40 * Boxa sort
41 * BOXA *boxaSort()
42 * BOXA *boxaBinSort()
43 * BOXA *boxaSortByIndex()
44 * BOXAA *boxaSort2d()
45 * BOXAA *boxaSort2dByIndex()
46 *
47 * Boxa statistics
48 * l_int32 boxaGetRankVals()
49 * l_int32 boxaGetMedianVals()
50 * l_int32 boxaGetAverageSize()
51 *
52 * Boxa array extraction
53 * l_int32 boxaExtractAsNuma()
54 * l_int32 boxaExtractAsPta()
55 * PTA *boxaExtractCorners()
56 *
57 * Other Boxaa functions
58 * l_int32 boxaaGetExtent()
59 * BOXA *boxaaFlattenToBoxa()
60 * BOXA *boxaaFlattenAligned()
61 * BOXAA *boxaEncapsulateAligned()
62 * BOXAA *boxaaTranspose()
63 * l_int32 boxaaAlignBox()
64 * </pre>
65 */
66
67 #ifdef HAVE_CONFIG_H
68 #include <config_auto.h>
69 #endif /* HAVE_CONFIG_H */
70
71 #include <math.h>
72 #include "allheaders.h"
73 #include "pix_internal.h"
74
75 /* For more than this number of c.c. in a binarized image of
76 * semi-perimeter (w + h) about 5000 or less, the O(n) binsort
77 * is faster than the O(nlogn) shellsort. */
78 static const l_int32 MinCompsForBinSort = 200;
79
80 /*---------------------------------------------------------------------*
81 * Boxa/Box transform (shift, scale) and orthogonal rotation *
82 *---------------------------------------------------------------------*/
83 /*!
84 * \brief boxaTransform()
85 *
86 * \param[in] boxas
87 * \param[in] shiftx
88 * \param[in] shifty
89 * \param[in] scalex
90 * \param[in] scaley
91 * \return boxad, or NULL on error
92 *
93 * <pre>
94 * Notes:
95 * (1) This is a very simple function that first shifts, then scales.
96 * (2) The UL corner coordinates of all boxes in the output %boxad
97 * (3) For the boxes in the output %boxad, the UL corner coordinates
98 * must be non-negative, and the width and height of valid
99 * boxes must be at least 1.
100 * </pre>
101 */
102 BOXA *
103 boxaTransform(BOXA *boxas,
104 l_int32 shiftx,
105 l_int32 shifty,
106 l_float32 scalex,
107 l_float32 scaley)
108 {
109 l_int32 i, n;
110 BOX *boxs, *boxd;
111 BOXA *boxad;
112
113 if (!boxas)
114 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
115 n = boxaGetCount(boxas);
116 if ((boxad = boxaCreate(n)) == NULL)
117 return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL);
118 for (i = 0; i < n; i++) {
119 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
120 boxaDestroy(&boxad);
121 return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL);
122 }
123 boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley);
124 boxDestroy(&boxs);
125 boxaAddBox(boxad, boxd, L_INSERT);
126 }
127
128 return boxad;
129 }
130
131
132 /*!
133 * \brief boxTransform()
134 *
135 * \param[in] box
136 * \param[in] shiftx
137 * \param[in] shifty
138 * \param[in] scalex
139 * \param[in] scaley
140 * \return boxd, or NULL on error
141 *
142 * <pre>
143 * Notes:
144 * (1) This is a very simple function that first shifts, then scales.
145 * (2) If the box is invalid, a new invalid box is returned.
146 * (3) The UL corner coordinates must be non-negative, and the
147 * width and height of valid boxes must be at least 1.
148 * </pre>
149 */
150 BOX *
151 boxTransform(BOX *box,
152 l_int32 shiftx,
153 l_int32 shifty,
154 l_float32 scalex,
155 l_float32 scaley)
156 {
157 if (!box)
158 return (BOX *)ERROR_PTR("box not defined", __func__, NULL);
159 if (box->w <= 0 || box->h <= 0)
160 return boxCreate(0, 0, 0, 0);
161 else
162 return boxCreate((l_int32)(L_MAX(0, scalex * (box->x + shiftx) + 0.5)),
163 (l_int32)(L_MAX(0, scaley * (box->y + shifty) + 0.5)),
164 (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)),
165 (l_int32)(L_MAX(1.0, scaley * box->h + 0.5)));
166 }
167
168
169 /*!
170 * \brief boxaTransformOrdered()
171 *
172 * \param[in] boxas
173 * \param[in] shiftx
174 * \param[in] shifty
175 * \param[in] scalex
176 * \param[in] scaley
177 * \param[in] xcen, ycen center of rotation
178 * \param[in] angle in radians; clockwise is positive
179 * \param[in] order one of 6 combinations: L_TR_SC_RO, ...
180 * \return boxd, or NULL on error
181 *
182 * <pre>
183 * shift, scaling and rotation, and the order of the
184 * transforms is specified.
185 * (2) Although these operations appear to be on an infinite
186 * 2D plane, in practice the region of interest is clipped
187 * to a finite image. The center of rotation is usually taken
188 * with respect to the image (either the UL corner or the
189 * center). A translation can have two very different effects:
190 * (a) Moves the boxes across the fixed image region.
191 * (b) Moves the image origin, causing a change in the image
192 * region and an opposite effective translation of the boxes.
193 * This function should only be used for (a), where the image
194 * region is fixed on translation. If the image region is
195 * changed by the translation, use instead the functions
196 * in affinecompose.c, where the image region and rotation
197 * center can be computed from the actual clipping due to
198 * translation of the image origin.
199 * (3) See boxTransformOrdered() for usage and implementation details.
200 * </pre>
201 */
202 BOXA *
203 boxaTransformOrdered(BOXA *boxas,
204 l_int32 shiftx,
205 l_int32 shifty,
206 l_float32 scalex,
207 l_float32 scaley,
208 l_int32 xcen,
209 l_int32 ycen,
210 l_float32 angle,
211 l_int32 order)
212 {
213 l_int32 i, n;
214 BOX *boxs, *boxd;
215 BOXA *boxad;
216
217 if (!boxas)
218 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
219 n = boxaGetCount(boxas);
220 if ((boxad = boxaCreate(n)) == NULL)
221 return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL);
222 for (i = 0; i < n; i++) {
223 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
224 boxaDestroy(&boxad);
225 return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL);
226 }
227 boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley,
228 xcen, ycen, angle, order);
229 boxDestroy(&boxs);
230 boxaAddBox(boxad, boxd, L_INSERT);
231 }
232
233 return boxad;
234 }
235
236
237 /*!
238 * \brief boxTransformOrdered()
239 *
240 * \param[in] boxs
241 * \param[in] shiftx
242 * \param[in] shifty
243 * \param[in] scalex
244 * \param[in] scaley
245 * \param[in] xcen, ycen center of rotation
246 * \param[in] angle in radians; clockwise is positive
247 * \param[in] order one of 6 combinations: L_TR_SC_RO, ...
248 * \return boxd, or NULL on error
249 *
250 * <pre>
251 * Notes:
252 * (1) This allows a sequence of linear transforms, composed of
253 * shift, scaling and rotation, where the order of the
254 * transforms is specified.
255 * (2) The rotation is taken about a point specified by (xcen, ycen).
256 * Let the components of the vector from the center of rotation
257 * to the box center be (xdif, ydif):
258 * xdif = (bx + 0.5 * bw) - xcen
259 * ydif = (by + 0.5 * bh) - ycen
260 * Then the box center after rotation has new components:
261 * bxcen = xcen + xdif * cosa + ydif * sina
262 * bycen = ycen + ydif * cosa - xdif * sina
263 * where cosa and sina are the cos and sin of the angle,
264 * and the enclosing box for the rotated box has size:
265 * rw = |bw * cosa| + |bh * sina|
266 * rh = |bh * cosa| + |bw * sina|
267 * where bw and bh are the unrotated width and height.
268 * Then the box UL corner (rx, ry) is
269 * rx = bxcen - 0.5 * rw
270 * ry = bycen - 0.5 * rh
271 * (3) The center of rotation specified by args %xcen and %ycen
272 * is the point BEFORE any translation or scaling. If the
273 * rotation is not the first operation, this function finds
274 * the actual center at the time of rotation. It does this
275 * by making the following assumptions:
276 * (1) Any scaling is with respect to the UL corner, so
277 * that the center location scales accordingly.
278 * (2) A translation does not affect the center of
279 * the image; it just moves the boxes.
280 * We always use assumption (1). However, assumption (2)
281 * will be incorrect if the apparent translation is due
282 * to a clipping operation that, in effect, moves the
283 * origin of the image. In that case, you should NOT use
284 * these simple functions. Instead, use the functions
285 * in affinecompose.c, where the rotation center can be
286 * computed from the actual clipping due to translation
287 * of the image origin.
288 * </pre>
289 */
290 BOX *
291 boxTransformOrdered(BOX *boxs,
292 l_int32 shiftx,
293 l_int32 shifty,
294 l_float32 scalex,
295 l_float32 scaley,
296 l_int32 xcen,
297 l_int32 ycen,
298 l_float32 angle,
299 l_int32 order)
300 {
301 l_int32 bx, by, bw, bh, tx, ty, tw, th;
302 l_int32 xcent, ycent; /* transformed center of rotation due to scaling */
303 l_float32 sina, cosa, xdif, ydif, rx, ry, rw, rh;
304 BOX *boxd;
305
306 if (!boxs)
307 return (BOX *)ERROR_PTR("boxs not defined", __func__, NULL);
308 if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC &&
309 order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO)
310 return (BOX *)ERROR_PTR("order invalid", __func__, NULL);
311
312 boxGetGeometry(boxs, &bx, &by, &bw, &bh);
313 if (bw <= 0 || bh <= 0) /* invalid */
314 return boxCreate(0, 0, 0, 0);
315 if (angle != 0.0) {
316 sina = sin(angle);
317 cosa = cos(angle);
318 }
319
320 if (order == L_TR_SC_RO) {
321 tx = (l_int32)(scalex * (bx + shiftx) + 0.5);
322 ty = (l_int32)(scaley * (by + shifty) + 0.5);
323 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
324 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
325 xcent = (l_int32)(scalex * xcen + 0.5);
326 ycent = (l_int32)(scaley * ycen + 0.5);
327 if (angle == 0.0) {
328 boxd = boxCreate(tx, ty, tw, th);
329 } else {
330 xdif = tx + 0.5 * tw - xcent;
331 ydif = ty + 0.5 * th - ycent;
332 rw = L_ABS(tw * cosa) + L_ABS(th * sina);
333 rh = L_ABS(th * cosa) + L_ABS(tw * sina);
334 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
335 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
336 boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
337 (l_int32)rh);
338 }
339 } else if (order == L_SC_TR_RO) {
340 tx = (l_int32)(scalex * bx + shiftx + 0.5);
341 ty = (l_int32)(scaley * by + shifty + 0.5);
342 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
343 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
344 xcent = (l_int32)(scalex * xcen + 0.5);
345 ycent = (l_int32)(scaley * ycen + 0.5);
346 if (angle == 0.0) {
347 boxd = boxCreate(tx, ty, tw, th);
348 } else {
349 xdif = tx + 0.5 * tw - xcent;
350 ydif = ty + 0.5 * th - ycent;
351 rw = L_ABS(tw * cosa) + L_ABS(th * sina);
352 rh = L_ABS(th * cosa) + L_ABS(tw * sina);
353 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
354 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
355 boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
356 (l_int32)rh);
357 }
358 } else if (order == L_RO_TR_SC) {
359 if (angle == 0.0) {
360 rx = bx;
361 ry = by;
362 rw = bw;
363 rh = bh;
364 } else {
365 xdif = bx + 0.5 * bw - xcen;
366 ydif = by + 0.5 * bh - ycen;
367 rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
368 rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
369 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
370 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
371 }
372 tx = (l_int32)(scalex * (rx + shiftx) + 0.5);
373 ty = (l_int32)(scaley * (ry + shifty) + 0.5);
374 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
375 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
376 boxd = boxCreate(tx, ty, tw, th);
377 } else if (order == L_RO_SC_TR) {
378 if (angle == 0.0) {
379 rx = bx;
380 ry = by;
381 rw = bw;
382 rh = bh;
383 } else {
384 xdif = bx + 0.5 * bw - xcen;
385 ydif = by + 0.5 * bh - ycen;
386 rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
387 rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
388 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
389 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
390 }
391 tx = (l_int32)(scalex * rx + shiftx + 0.5);
392 ty = (l_int32)(scaley * ry + shifty + 0.5);
393 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
394 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
395 boxd = boxCreate(tx, ty, tw, th);
396 } else if (order == L_TR_RO_SC) {
397 tx = bx + shiftx;
398 ty = by + shifty;
399 if (angle == 0.0) {
400 rx = tx;
401 ry = ty;
402 rw = bw;
403 rh = bh;
404 } else {
405 xdif = tx + 0.5 * bw - xcen;
406 ydif = ty + 0.5 * bh - ycen;
407 rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
408 rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
409 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
410 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
411 }
412 tx = (l_int32)(scalex * rx + 0.5);
413 ty = (l_int32)(scaley * ry + 0.5);
414 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
415 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
416 boxd = boxCreate(tx, ty, tw, th);
417 } else { /* order == L_SC_RO_TR) */
418 tx = (l_int32)(scalex * bx + 0.5);
419 ty = (l_int32)(scaley * by + 0.5);
420 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
421 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
422 xcent = (l_int32)(scalex * xcen + 0.5);
423 ycent = (l_int32)(scaley * ycen + 0.5);
424 if (angle == 0.0) {
425 rx = tx;
426 ry = ty;
427 rw = tw;
428 rh = th;
429 } else {
430 xdif = tx + 0.5 * tw - xcent;
431 ydif = ty + 0.5 * th - ycent;
432 rw = L_ABS(tw * cosa) + L_ABS(th * sina);
433 rh = L_ABS(th * cosa) + L_ABS(tw * sina);
434 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
435 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
436 }
437 tx = (l_int32)(rx + shiftx + 0.5);
438 ty = (l_int32)(ry + shifty + 0.5);
439 tw = (l_int32)(rw + 0.5);
440 th = (l_int32)(rh + 0.5);
441 boxd = boxCreate(tx, ty, tw, th);
442 }
443
444 return boxd;
445 }
446
447
448 /*!
449 * \brief boxaRotateOrth()
450 *
451 * \param[in] boxas
452 * \param[in] w, h of image in which the boxa is embedded
453 * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
454 * all rotations are clockwise
455 * \return boxad, or NULL on error
456 *
457 * <pre>
458 * Notes:
459 * (1) See boxRotateOrth() for details.
460 * </pre>
461 */
462 BOXA *
463 boxaRotateOrth(BOXA *boxas,
464 l_int32 w,
465 l_int32 h,
466 l_int32 rotation)
467 {
468 l_int32 i, n;
469 BOX *boxs, *boxd;
470 BOXA *boxad;
471
472 if (!boxas)
473 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
474 if (rotation < 0 || rotation > 3)
475 return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", __func__, NULL);
476 if (rotation == 0)
477 return boxaCopy(boxas, L_COPY);
478
479 n = boxaGetCount(boxas);
480 if ((boxad = boxaCreate(n)) == NULL)
481 return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL);
482 for (i = 0; i < n; i++) {
483 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
484 boxaDestroy(&boxad);
485 return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL);
486 }
487 boxd = boxRotateOrth(boxs, w, h, rotation);
488 boxDestroy(&boxs);
489 boxaAddBox(boxad, boxd, L_INSERT);
490 }
491
492 return boxad;
493 }
494
495
496 /*!
497 * \brief boxRotateOrth()
498 *
499 * \param[in] box
500 * \param[in] w, h of image in which the box is embedded
501 * \param[in] rotation 0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
502 * all rotations are clockwise
503 * \return boxd, or NULL on error
504 *
505 * <pre>
506 * Notes:
507 * (1) Rotate the image with the embedded box by the specified amount.
508 * (2) After rotation, the rotated box is always measured with
509 * respect to the UL corner of the image.
510 * </pre>
511 */
512 BOX *
513 boxRotateOrth(BOX *box,
514 l_int32 w,
515 l_int32 h,
516 l_int32 rotation)
517 {
518 l_int32 bx, by, bw, bh, xdist, ydist;
519
520 if (!box)
521 return (BOX *)ERROR_PTR("box not defined", __func__, NULL);
522 if (rotation < 0 || rotation > 3)
523 return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", __func__, NULL);
524 if (rotation == 0)
525 return boxCopy(box);
526
527 boxGetGeometry(box, &bx, &by, &bw, &bh);
528 if (bw <= 0 || bh <= 0) /* invalid */
529 return boxCreate(0, 0, 0, 0);
530 ydist = h - by - bh; /* below box */
531 xdist = w - bx - bw; /* to right of box */
532 if (rotation == 1) /* 90 deg cw */
533 return boxCreate(ydist, bx, bh, bw);
534 else if (rotation == 2) /* 180 deg cw */
535 return boxCreate(xdist, ydist, bw, bh);
536 else /* rotation == 3, 270 deg cw */
537 return boxCreate(by, xdist, bh, bw);
538 }
539
540
541 /*!
542 * \brief boxaShiftWithPta()
543 *
544 * \param[in] boxas
545 * \param[in] pta aligned with the boxes; determines shift amount
546 * \param[in] dir +1 to shift by the values in pta; -1 to shift
547 * by the negative of the values in the pta.
548 * \return boxad, or NULL on error
549 *
550 * <pre>
551 * Notes:
552 * (1) In use, %pta may come from the UL corners of of a boxa, each
553 * of whose boxes contains the corresponding box of %boxas
554 * within it. The output %boxad is then a boxa in the (global)
555 * coordinates of the containing boxa. So the input %pta
556 * could come from boxaExtractCorners().
557 * (2) The operations with %dir == 1 and %dir == -1 are inverses if
558 * called in order (1, -1). Starting with an input boxa and
559 * calling twice with these values of %dir results in a boxa
560 * identical to the input. However, because box parameters can
561 * never be negative, calling in the order (-1, 1) may result
562 * in clipping at the left side and the top.
563 * </pre>
564 */
565 BOXA *
566 boxaShiftWithPta(BOXA *boxas,
567 PTA *pta,
568 l_int32 dir)
569 {
570 l_int32 i, n, x, y, full;
571 BOX *box1, *box2;
572 BOXA *boxad;
573
574 if (!boxas)
575 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
576 boxaIsFull(boxas, &full);
577 if (!full)
578 return (BOXA *)ERROR_PTR("boxas not full", __func__, NULL);
579 if (!pta)
580 return (BOXA *)ERROR_PTR("pta not defined", __func__, NULL);
581 if (dir != 1 && dir != -1)
582 return (BOXA *)ERROR_PTR("invalid dir", __func__, NULL);
583 n = boxaGetCount(boxas);
584 if (n != ptaGetCount(pta))
585 return (BOXA *)ERROR_PTR("boxas and pta not same size", __func__, NULL);
586
587 if ((boxad = boxaCreate(n)) == NULL)
588 return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL);
589 for (i = 0; i < n; i++) {
590 box1 = boxaGetBox(boxas, i, L_COPY);
591 ptaGetIPt(pta, i, &x, &y);
592 box2 = boxTransform(box1, dir * x, dir * y, 1.0, 1.0);
593 boxaAddBox(boxad, box2, L_INSERT);
594 boxDestroy(&box1);
595 }
596 return boxad;
597 }
598
599
600 /*---------------------------------------------------------------------*
601 * Boxa sort *
602 *---------------------------------------------------------------------*/
603 /*!
604 * \brief boxaSort()
605 *
606 * \param[in] boxas
607 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y,
608 * L_SORT_BY_RIGHT, L_SORT_BY_BOT,
609 * L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
610 * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
611 * L_SORT_BY_PERIMETER, L_SORT_BY_AREA,
612 * L_SORT_BY_ASPECT_RATIO
613 * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING
614 * \param[out] pnaindex [optional] index of sorted order into
615 * original array
616 * \return boxad sorted version of boxas, or NULL on error
617 *
618 * <pre>
619 * Notes:
620 * (1) An empty boxa returns a copy, with a warning.
621 * </pre>
622 */
623 BOXA *
624 boxaSort(BOXA *boxas,
625 l_int32 sorttype,
626 l_int32 sortorder,
627 NUMA **pnaindex)
628 {
629 l_int32 i, n, x, y, w, h, size;
630 BOXA *boxad;
631 NUMA *na, *naindex;
632
633 if (pnaindex) *pnaindex = NULL;
634 if (!boxas)
635 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
636 if ((n = boxaGetCount(boxas)) == 0) {
637 L_WARNING("boxas is empty\n", __func__);
638 return boxaCopy(boxas, L_COPY);
639 }
640 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
641 sorttype != L_SORT_BY_RIGHT && sorttype != L_SORT_BY_BOT &&
642 sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
643 sorttype != L_SORT_BY_MIN_DIMENSION &&
644 sorttype != L_SORT_BY_MAX_DIMENSION &&
645 sorttype != L_SORT_BY_PERIMETER &&
646 sorttype != L_SORT_BY_AREA &&
647 sorttype != L_SORT_BY_ASPECT_RATIO)
648 return (BOXA *)ERROR_PTR("invalid sort type", __func__, NULL);
649 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
650 return (BOXA *)ERROR_PTR("invalid sort order", __func__, NULL);
651
652 /* Use O(n) binsort if possible */
653 if (n > MinCompsForBinSort &&
654 ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) ||
655 (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) ||
656 (sorttype == L_SORT_BY_PERIMETER)))
657 return boxaBinSort(boxas, sorttype, sortorder, pnaindex);
658
659 /* Build up numa of specific data */
660 if ((na = numaCreate(n)) == NULL)
661 return (BOXA *)ERROR_PTR("na not made", __func__, NULL);
662 for (i = 0; i < n; i++) {
663 boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
664 switch (sorttype)
665 {
666 case L_SORT_BY_X:
667 numaAddNumber(na, x);
668 break;
669 case L_SORT_BY_Y:
670 numaAddNumber(na, y);
671 break;
672 case L_SORT_BY_RIGHT:
673 numaAddNumber(na, x + w - 1);
674 break;
675 case L_SORT_BY_BOT:
676 numaAddNumber(na, y + h - 1);
677 break;
678 case L_SORT_BY_WIDTH:
679 numaAddNumber(na, w);
680 break;
681 case L_SORT_BY_HEIGHT:
682 numaAddNumber(na, h);
683 break;
684 case L_SORT_BY_MIN_DIMENSION:
685 size = L_MIN(w, h);
686 numaAddNumber(na, size);
687 break;
688 case L_SORT_BY_MAX_DIMENSION:
689 size = L_MAX(w, h);
690 numaAddNumber(na, size);
691 break;
692 case L_SORT_BY_PERIMETER:
693 size = w + h;
694 numaAddNumber(na, size);
695 break;
696 case L_SORT_BY_AREA:
697 size = w * h;
698 numaAddNumber(na, size);
699 break;
700 case L_SORT_BY_ASPECT_RATIO:
701 numaAddNumber(na, (l_float32)w / (l_float32)h);
702 break;
703 default:
704 L_WARNING("invalid sort type\n", __func__);
705 }
706 }
707
708 /* Get the sort index for data array */
709 naindex = numaGetSortIndex(na, sortorder);
710 numaDestroy(&na);
711 if (!naindex)
712 return (BOXA *)ERROR_PTR("naindex not made", __func__, NULL);
713
714 /* Build up sorted boxa using sort index */
715 boxad = boxaSortByIndex(boxas, naindex);
716
717 if (pnaindex)
718 *pnaindex = naindex;
719 else
720 numaDestroy(&naindex);
721 return boxad;
722 }
723
724
725 /*!
726 * \brief boxaBinSort()
727 *
728 * \param[in] boxas
729 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
730 * L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER
731 * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING
732 * \param[out] pnaindex [optional] index of sorted order into
733 * original array
734 * \return boxad sorted version of boxas, or NULL on error
735 *
736 * <pre>
737 * Notes:
738 * (1) For a large number of boxes (say, greater than 1000), this
739 * O(n) binsort is much faster than the O(nlogn) shellsort.
740 * For 5000 components, this is over 20x faster than boxaSort().
741 * (2) Consequently, boxaSort() calls this function if it will
742 * likely go much faster.
743 * </pre>
744 */
745 BOXA *
746 boxaBinSort(BOXA *boxas,
747 l_int32 sorttype,
748 l_int32 sortorder,
749 NUMA **pnaindex)
750 {
751 l_int32 i, n, x, y, w, h;
752 BOXA *boxad;
753 NUMA *na, *naindex;
754
755 if (pnaindex) *pnaindex = NULL;
756 if (!boxas)
757 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
758 if ((n = boxaGetCount(boxas)) == 0) {
759 L_WARNING("boxas is empty\n", __func__);
760 return boxaCopy(boxas, L_COPY);
761 }
762 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
763 sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
764 sorttype != L_SORT_BY_PERIMETER)
765 return (BOXA *)ERROR_PTR("invalid sort type", __func__, NULL);
766 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
767 return (BOXA *)ERROR_PTR("invalid sort order", __func__, NULL);
768
769 /* Generate Numa of appropriate box dimensions */
770 if ((na = numaCreate(n)) == NULL)
771 return (BOXA *)ERROR_PTR("na not made", __func__, NULL);
772 for (i = 0; i < n; i++) {
773 boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
774 switch (sorttype)
775 {
776 case L_SORT_BY_X:
777 numaAddNumber(na, x);
778 break;
779 case L_SORT_BY_Y:
780 numaAddNumber(na, y);
781 break;
782 case L_SORT_BY_WIDTH:
783 numaAddNumber(na, w);
784 break;
785 case L_SORT_BY_HEIGHT:
786 numaAddNumber(na, h);
787 break;
788 case L_SORT_BY_PERIMETER:
789 numaAddNumber(na, w + h);
790 break;
791 default:
792 L_WARNING("invalid sort type\n", __func__);
793 }
794 }
795
796 /* Get the sort index for data array */
797 naindex = numaGetBinSortIndex(na, sortorder);
798 numaDestroy(&na);
799 if (!naindex)
800 return (BOXA *)ERROR_PTR("naindex not made", __func__, NULL);
801
802 /* Build up sorted boxa using the sort index */
803 boxad = boxaSortByIndex(boxas, naindex);
804
805 if (pnaindex)
806 *pnaindex = naindex;
807 else
808 numaDestroy(&naindex);
809 return boxad;
810 }
811
812
813 /*!
814 * \brief boxaSortByIndex()
815 *
816 * \param[in] boxas
817 * \param[in] naindex na that maps from the new boxa to the input boxa
818 * \return boxad sorted, or NULL on error
819 */
820 BOXA *
821 boxaSortByIndex(BOXA *boxas,
822 NUMA *naindex)
823 {
824 l_int32 i, n, index;
825 BOX *box;
826 BOXA *boxad;
827
828 if (!boxas)
829 return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
830 if ((n = boxaGetCount(boxas)) == 0) {
831 L_WARNING("boxas is empty\n", __func__);
832 return boxaCopy(boxas, L_COPY);
833 }
834 if (!naindex)
835 return (BOXA *)ERROR_PTR("naindex not defined", __func__, NULL);
836
837 boxad = boxaCreate(n);
838 for (i = 0; i < n; i++) {
839 numaGetIValue(naindex, i, &index);
840 box = boxaGetBox(boxas, index, L_COPY);
841 boxaAddBox(boxad, box, L_INSERT);
842 }
843
844 return boxad;
845 }
846
847
848 /*!
849 * \brief boxaSort2d()
850 *
851 * \param[in] boxas
852 * \param[out] pnaad [optional] numaa with sorted indices
853 * whose values are the indices of the input array
854 * \param[in] delta1 min separation that permits aggregation of a box
855 * onto a boxa of horizontally-aligned boxes; pass 1
856 * \param[in] delta2 min separation that permits aggregation of a box
857 * onto a boxa of horizontally-aligned boxes; pass 2
858 * \param[in] minh1 components less than this height either join an
859 * existing boxa or are set aside for pass 2
860 * \return baa 2d sorted version of boxa, or NULL on error
861 *
862 * <pre>
863 * Notes:
864 * (1) The final result is a sort where the 'fast scan' direction is
865 * left to right, and the 'slow scan' direction is from top
866 * to bottom. Each boxa in the baa represents a sorted set
867 * of boxes from left to right.
868 * (2) Three passes are used to aggregate the boxas, which can correspond
869 * to characters or words in a line of text. In pass 1, only
870 * taller components, which correspond to xheight or larger,
871 * are permitted to start a new boxa. In pass 2, the remaining
872 * vertically-challenged components are allowed to join an
873 * existing boxa or start a new one. In pass 3, boxa whose extent
874 * is overlapping are joined. After that, the boxes in each
875 * boxa are sorted horizontally, and finally the boxa are
876 * sorted vertically.
877 * (3) If %delta1 > 0, the first pass allows aggregation when
878 * boxes in the same boxa do not overlap vertically. In fact,
879 * %delta1 is the max distance by which they can miss and still
880 * be aggregated. If %delta1 < 0, the box must have vertical
881 * overlap of at least abs(%delta1) with the boxa before it
882 * can be merged. Similar for delta2 on the second pass.
883 * (4) On the first pass, any component of height less than minh1
884 * cannot start a new boxa; it's put aside for later insertion.
885 * (5) On the second pass, any small component that doesn't align
886 * with an existing boxa can start a new one.
887 * (6) This can be used to identify lines of text from
888 * character or word bounding boxes.
889 * (7) Typical values for the input parameters on 300 ppi text are:
890 * delta1 ~ 0
891 * delta2 ~ 0
892 * minh1 ~ 5
893 * </pre>
894 */
895 BOXAA *
896 boxaSort2d(BOXA *boxas,
897 NUMAA **pnaad,
898 l_int32 delta1,
899 l_int32 delta2,
900 l_int32 minh1)
901 {
902 l_int32 i, index, h, nt, ne, n, m, ival;
903 BOX *box;
904 BOXA *boxa, *boxae, *boxan, *boxa1, *boxa2, *boxa3, *boxav, *boxavs;
905 BOXAA *baa, *baa1, *baad;
906 NUMA *naindex, *nae, *nan, *nah, *nav, *na1, *na2, *nad, *namap;
907 NUMAA *naa, *naa1, *naad;
908
909 if (pnaad) *pnaad = NULL;
910 if (!boxas)
911 return (BOXAA *)ERROR_PTR("boxas not defined", __func__, NULL);
912 if (boxaGetCount(boxas) == 0)
913 return (BOXAA *)ERROR_PTR("boxas is empty", __func__, NULL);
914
915 /* Sort from left to right */
916 if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex))
917 == NULL)
918 return (BOXAA *)ERROR_PTR("boxa not made", __func__, NULL);
919
920 /* First pass: assign taller boxes to boxa by row */
921 nt = boxaGetCount(boxa);
922 baa = boxaaCreate(0);
923 naa = numaaCreate(0);
924 boxae = boxaCreate(0); /* save small height boxes here */
925 nae = numaCreate(0); /* keep track of small height boxes */
926 for (i = 0; i < nt; i++) {
927 box = boxaGetBox(boxa, i, L_CLONE);
928 boxGetGeometry(box, NULL, NULL, NULL, &h);
929 if (h < minh1) { /* save for 2nd pass */
930 boxaAddBox(boxae, box, L_INSERT);
931 numaAddNumber(nae, i);
932 } else {
933 n = boxaaGetCount(baa);
934 boxaaAlignBox(baa, box, delta1, &index);
935 if (index < n) { /* append to an existing boxa */
936 boxaaAddBox(baa, index, box, L_INSERT);
937 } else { /* doesn't align, need new boxa */
938 boxan = boxaCreate(0);
939 boxaAddBox(boxan, box, L_INSERT);
940 boxaaAddBoxa(baa, boxan, L_INSERT);
941 nan = numaCreate(0);
942 numaaAddNuma(naa, nan, L_INSERT);
943 }
944 numaGetIValue(naindex, i, &ival);
945 numaaAddNumber(naa, index, ival);
946 }
947 }
948 boxaDestroy(&boxa);
949 numaDestroy(&naindex);
950
951 /* Second pass: feed in small height boxes */
952 ne = boxaGetCount(boxae);
953 for (i = 0; i < ne; i++) {
954 box = boxaGetBox(boxae, i, L_CLONE);
955 n = boxaaGetCount(baa);
956 boxaaAlignBox(baa, box, delta2, &index);
957 if (index < n) { /* append to an existing boxa */
958 boxaaAddBox(baa, index, box, L_INSERT);
959 } else { /* doesn't align, need new boxa */
960 boxan = boxaCreate(0);
961 boxaAddBox(boxan, box, L_INSERT);
962 boxaaAddBoxa(baa, boxan, L_INSERT);
963 nan = numaCreate(0);
964 numaaAddNuma(naa, nan, L_INSERT);
965 }
966 numaGetIValue(nae, i, &ival); /* location in original boxas */
967 numaaAddNumber(naa, index, ival);
968 }
969
970 /* Third pass: merge some boxa whose extent is overlapping.
971 * Think of these boxa as text lines, where the bounding boxes
972 * of the text lines can overlap, but likely won't have
973 * a huge overlap.
974 * First do a greedy find of pairs of overlapping boxa, where
975 * the two boxa overlap by at least 50% of the smaller, and
976 * the smaller is not more than half the area of the larger.
977 * For such pairs, call the larger one the primary boxa. The
978 * boxes in the smaller one are appended to those in the primary
979 * in pass 3a, and the primaries are extracted in pass 3b.
980 * In this way, all boxes in the original baa are saved. */
981 n = boxaaGetCount(baa);
982 boxaaGetExtent(baa, NULL, NULL, NULL, &boxa3);
983 boxa1 = boxaHandleOverlaps(boxa3, L_REMOVE_SMALL, 1000, 0.5, 0.5, &namap);
984 boxaDestroy(&boxa1);
985 boxaDestroy(&boxa3);
986 for (i = 0; i < n; i++) { /* Pass 3a: join selected copies of boxa */
987 numaGetIValue(namap, i, &ival);
988 if (ival >= 0) { /* join current to primary boxa[ival] */
989 boxa1 = boxaaGetBoxa(baa, i, L_COPY);
990 boxa2 = boxaaGetBoxa(baa, ival, L_CLONE);
991 boxaJoin(boxa2, boxa1, 0, -1);
992 boxaDestroy(&boxa2);
993 boxaDestroy(&boxa1);
994 na1 = numaaGetNuma(naa, i, L_COPY);
995 na2 = numaaGetNuma(naa, ival, L_CLONE);
996 numaJoin(na2, na1, 0, -1);
997 numaDestroy(&na1);
998 numaDestroy(&na2);
999 }
1000 }
1001 baa1 = boxaaCreate(n);
1002 naa1 = numaaCreate(n);
1003 for (i = 0; i < n; i++) { /* Pass 3b: save primary boxa */
1004 numaGetIValue(namap, i, &ival);
1005 if (ival == -1) {
1006 boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1007 boxaaAddBoxa(baa1, boxa1, L_INSERT);
1008 na1 = numaaGetNuma(naa, i, L_CLONE);
1009 numaaAddNuma(naa1, na1, L_INSERT);
1010 }
1011 }
1012 numaDestroy(&namap);
1013 boxaaDestroy(&baa);
1014 baa = baa1;
1015 numaaDestroy(&naa);
1016 naa = naa1;
1017
1018 /* Sort the boxes in each boxa horizontally */
1019 m = boxaaGetCount(baa);
1020 for (i = 0; i < m; i++) {
1021 boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1022 boxa2 = boxaSort(boxa1, L_SORT_BY_X, L_SORT_INCREASING, &nah);
1023 boxaaReplaceBoxa(baa, i, boxa2);
1024 na1 = numaaGetNuma(naa, i, L_CLONE);
1025 na2 = numaSortByIndex(na1, nah);
1026 numaaReplaceNuma(naa, i, na2);
1027 boxaDestroy(&boxa1);
1028 numaDestroy(&na1);
1029 numaDestroy(&nah);
1030 }
1031
1032 /* Sort the boxa vertically within boxaa, using the first box
1033 * in each boxa. */
1034 m = boxaaGetCount(baa);
1035 boxav = boxaCreate(m); /* holds first box in each boxa in baa */
1036 naad = numaaCreate(m);
1037 if (pnaad)
1038 *pnaad = naad;
1039 baad = boxaaCreate(m);
1040 for (i = 0; i < m; i++) {
1041 boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1042 box = boxaGetBox(boxa1, 0, L_CLONE);
1043 boxaAddBox(boxav, box, L_INSERT);
1044 boxaDestroy(&boxa1);
1045 }
1046 boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav);
1047 for (i = 0; i < m; i++) {
1048 numaGetIValue(nav, i, &index);
1049 boxa = boxaaGetBoxa(baa, index, L_CLONE);
1050 boxaaAddBoxa(baad, boxa, L_INSERT);
1051 nad = numaaGetNuma(naa, index, L_CLONE);
1052 numaaAddNuma(naad, nad, L_INSERT);
1053 }
1054
1055
1056 /* lept_stderr("box count = %d, numaa count = %d\n", nt,
1057 numaaGetNumberCount(naad)); */
1058
1059 boxaaDestroy(&baa);
1060 boxaDestroy(&boxav);
1061 boxaDestroy(&boxavs);
1062 boxaDestroy(&boxae);
1063 numaDestroy(&nav);
1064 numaDestroy(&nae);
1065 numaaDestroy(&naa);
1066 if (!pnaad)
1067 numaaDestroy(&naad);
1068
1069 return baad;
1070 }
1071
1072
1073 /*!
1074 * \brief boxaSort2dByIndex()
1075 *
1076 * \param[in] boxas
1077 * \param[in] naa numaa that maps from the new baa to the input boxa
1078 * \return baa sorted boxaa, or NULL on error
1079 */
1080 BOXAA *
1081 boxaSort2dByIndex(BOXA *boxas,
1082 NUMAA *naa)
1083 {
1084 l_int32 ntot, boxtot, i, j, n, nn, index;
1085 BOX *box;
1086 BOXA *boxa;
1087 BOXAA *baa;
1088 NUMA *na;
1089
1090 if (!boxas)
1091 return (BOXAA *)ERROR_PTR("boxas not defined", __func__, NULL);
1092 if ((boxtot = boxaGetCount(boxas)) == 0)
1093 return (BOXAA *)ERROR_PTR("boxas is empty", __func__, NULL);
1094 if (!naa)
1095 return (BOXAA *)ERROR_PTR("naindex not defined", __func__, NULL);
1096
1097 /* Check counts */
1098 ntot = numaaGetNumberCount(naa);
1099 if (ntot != boxtot)
1100 return (BOXAA *)ERROR_PTR("element count mismatch", __func__, NULL);
1101
1102 n = numaaGetCount(naa);
1103 baa = boxaaCreate(n);
1104 for (i = 0; i < n; i++) {
1105 na = numaaGetNuma(naa, i, L_CLONE);
1106 nn = numaGetCount(na);
1107 boxa = boxaCreate(nn);
1108 for (j = 0; j < nn; j++) {
1109 numaGetIValue(na, i, &index);
1110 box = boxaGetBox(boxas, index, L_COPY);
1111 boxaAddBox(boxa, box, L_INSERT);
1112 }
1113 boxaaAddBoxa(baa, boxa, L_INSERT);
1114 numaDestroy(&na);
1115 }
1116
1117 return baa;
1118 }
1119
1120
1121 /*---------------------------------------------------------------------*
1122 * Boxa array extraction *
1123 *---------------------------------------------------------------------*/
1124 /*!
1125 * \brief boxaExtractAsNuma()
1126 *
1127 * \param[in] boxa
1128 * \param[out] pnal [optional] array of left locations
1129 * \param[out] pnat [optional] array of top locations
1130 * \param[out] pnar [optional] array of right locations
1131 * \param[out] pnab [optional] array of bottom locations
1132 * \param[out] pnaw [optional] array of widths
1133 * \param[out] pnah [optional] array of heights
1134 * \param[in] keepinvalid 1 to keep invalid boxes; 0 to remove them
1135 * \return 0 if OK, 1 on error
1136 *
1137 * <pre>
1138 * Notes:
1139 * (1) If you are counting or sorting values, such as determining
1140 * rank order, you must remove invalid boxes.
1141 * (2) If you are parametrizing the values, or doing an evaluation
1142 * where the position in the boxa sequence is important, you
1143 * must replace the invalid boxes with valid ones before
1144 * doing the extraction. This is easily done with boxaFillSequence().
1145 * </pre>
1146 */
1147 l_ok
1148 boxaExtractAsNuma(BOXA *boxa,
1149 NUMA **pnal,
1150 NUMA **pnat,
1151 NUMA **pnar,
1152 NUMA **pnab,
1153 NUMA **pnaw,
1154 NUMA **pnah,
1155 l_int32 keepinvalid)
1156 {
1157 l_int32 i, n, left, top, right, bot, w, h;
1158
1159 if (!pnal && !pnat && !pnar && !pnab && !pnaw && !pnah)
1160 return ERROR_INT("no output requested", __func__, 1);
1161 if (pnal) *pnal = NULL;
1162 if (pnat) *pnat = NULL;
1163 if (pnar) *pnar = NULL;
1164 if (pnab) *pnab = NULL;
1165 if (pnaw) *pnaw = NULL;
1166 if (pnah) *pnah = NULL;
1167 if (!boxa)
1168 return ERROR_INT("boxa not defined", __func__, 1);
1169 if (!keepinvalid && boxaGetValidCount(boxa) == 0)
1170 return ERROR_INT("no valid boxes", __func__, 1);
1171
1172 n = boxaGetCount(boxa);
1173 if (pnal) *pnal = numaCreate(n);
1174 if (pnat) *pnat = numaCreate(n);
1175 if (pnar) *pnar = numaCreate(n);
1176 if (pnab) *pnab = numaCreate(n);
1177 if (pnaw) *pnaw = numaCreate(n);
1178 if (pnah) *pnah = numaCreate(n);
1179 for (i = 0; i < n; i++) {
1180 boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
1181 if (!keepinvalid && (w <= 0 || h <= 0))
1182 continue;
1183 right = left + w - 1;
1184 bot = top + h - 1;
1185 if (pnal) numaAddNumber(*pnal, left);
1186 if (pnat) numaAddNumber(*pnat, top);
1187 if (pnar) numaAddNumber(*pnar, right);
1188 if (pnab) numaAddNumber(*pnab, bot);
1189 if (pnaw) numaAddNumber(*pnaw, w);
1190 if (pnah) numaAddNumber(*pnah, h);
1191 }
1192
1193 return 0;
1194 }
1195
1196
1197 /*!
1198 * \brief boxaExtractAsPta()
1199 *
1200 * \param[in] boxa
1201 * \param[out] pptal [optional] array of left locations vs. index
1202 * \param[out] pptat [optional] array of top locations vs. index
1203 * \param[out] pptar [optional] array of right locations vs. index
1204 * \param[out] pptab [optional] array of bottom locations vs. index
1205 * \param[out] pptaw [optional] array of widths vs. index
1206 * \param[out] pptah [optional] array of heights vs. index
1207 * \param[in] keepinvalid 1 to keep invalid boxes; 0 to remove them
1208 * \return 0 if OK, 1 on error
1209 *
1210 * <pre>
1211 * Notes:
1212 * (1) For most applications, such as counting, sorting, fitting
1213 * to some parametrized form, plotting or filtering in general,
1214 * you should remove the invalid boxes. Each pta saves the
1215 * box index in the x array, so replacing invalid boxes by
1216 * filling with boxaFillSequence(), which is required for
1217 * boxaExtractAsNuma(), is not necessary.
1218 * (2) If invalid boxes are retained, each one will result in
1219 * entries (typically 0) in all selected output pta.
1220 * (3) Other boxa --> pta functions are:
1221 * * boxaExtractCorners(): extracts any of the four corners as a pta.
1222 * * boxaConvertToPta(): extracts sufficient number of corners
1223 * to allow reconstruction of the original boxa from the pta.
1224 * </pre>
1225 */
1226 l_ok
1227 boxaExtractAsPta(BOXA *boxa,
1228 PTA **pptal,
1229 PTA **pptat,
1230 PTA **pptar,
1231 PTA **pptab,
1232 PTA **pptaw,
1233 PTA **pptah,
1234 l_int32 keepinvalid)
1235 {
1236 l_int32 i, n, left, top, right, bot, w, h;
1237
1238 if (!pptal && !pptar && !pptat && !pptab && !pptaw && !pptah)
1239 return ERROR_INT("no output requested", __func__, 1);
1240 if (pptal) *pptal = NULL;
1241 if (pptat) *pptat = NULL;
1242 if (pptar) *pptar = NULL;
1243 if (pptab) *pptab = NULL;
1244 if (pptaw) *pptaw = NULL;
1245 if (pptah) *pptah = NULL;
1246 if (!boxa)
1247 return ERROR_INT("boxa not defined", __func__, 1);
1248 if (!keepinvalid && boxaGetValidCount(boxa) == 0)
1249 return ERROR_INT("no valid boxes", __func__, 1);
1250
1251 n = boxaGetCount(boxa);
1252 if (pptal) *pptal = ptaCreate(n);
1253 if (pptat) *pptat = ptaCreate(n);
1254 if (pptar) *pptar = ptaCreate(n);
1255 if (pptab) *pptab = ptaCreate(n);
1256 if (pptaw) *pptaw = ptaCreate(n);
1257 if (pptah) *pptah = ptaCreate(n);
1258 for (i = 0; i < n; i++) {
1259 boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
1260 if (!keepinvalid && (w <= 0 || h <= 0))
1261 continue;
1262 right = left + w - 1;
1263 bot = top + h - 1;
1264 if (pptal) ptaAddPt(*pptal, i, left);
1265 if (pptat) ptaAddPt(*pptat, i, top);
1266 if (pptar) ptaAddPt(*pptar, i, right);
1267 if (pptab) ptaAddPt(*pptab, i, bot);
1268 if (pptaw) ptaAddPt(*pptaw, i, w);
1269 if (pptah) ptaAddPt(*pptah, i, h);
1270 }
1271
1272 return 0;
1273 }
1274
1275
1276 /*!
1277 * \brief boxaExtractCorners()
1278 *
1279 * \param[in] boxa
1280 * \param[in] loc L_UPPER_LEFT, L_UPPER_RIGHT, L_LOWER_LEFT,
1281 * L_LOWER_RIGHT, L_BOX_CENTER
1282 * \return pta of requested coordinates, or NULL on error
1283 *
1284 * <pre>
1285 * Notes:
1286 * (1) Extracts (0,0) for invalid boxes.
1287 * (2) Other boxa --> pta functions are:
1288 * * boxaExtractAsPta(): allows extraction of any dimension
1289 * and/or side location, with each in a separate pta.
1290 * * boxaConvertToPta(): extracts sufficient number of corners
1291 * to allow reconstruction of the original boxa from the pta.
1292 * </pre>
1293 */
1294 PTA *
1295 boxaExtractCorners(BOXA *boxa,
1296 l_int32 loc)
1297 {
1298 l_int32 i, n, left, top, right, bot, w, h;
1299 PTA *pta;
1300
1301 if (!boxa)
1302 return (PTA *)ERROR_PTR("boxa not defined", __func__, NULL);
1303 if (loc != L_UPPER_LEFT && loc != L_UPPER_RIGHT && loc != L_LOWER_LEFT &&
1304 loc != L_LOWER_RIGHT && loc != L_BOX_CENTER)
1305 return (PTA *)ERROR_PTR("invalid location", __func__, NULL);
1306
1307 n = boxaGetCount(boxa);
1308 if ((pta = ptaCreate(n)) == NULL)
1309 return (PTA *)ERROR_PTR("pta not made", __func__, NULL);
1310
1311 for (i = 0; i < n; i++) {
1312 boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
1313 right = left + w - 1;
1314 bot = top + h - 1;
1315 if (w == 0 || h == 0) { /* invalid */
1316 left = 0;
1317 top = 0;
1318 right = 0;
1319 bot = 0;
1320 }
1321 if (loc == L_UPPER_LEFT)
1322 ptaAddPt(pta, left, top);
1323 else if (loc == L_UPPER_RIGHT)
1324 ptaAddPt(pta, right, top);
1325 else if (loc == L_LOWER_LEFT)
1326 ptaAddPt(pta, left, bot);
1327 else if (loc == L_LOWER_RIGHT)
1328 ptaAddPt(pta, right, bot);
1329 else if (loc == L_BOX_CENTER)
1330 ptaAddPt(pta, (left + right) / 2, (top + bot) / 2);
1331 }
1332
1333 return pta;
1334 }
1335
1336
1337 /*---------------------------------------------------------------------*
1338 * Boxa statistics *
1339 *---------------------------------------------------------------------*/
1340 /*!
1341 * \brief boxaGetRankVals()
1342 *
1343 * \param[in] boxa
1344 * \param[in] fract use 0.0 for smallest, 1.0 for largest width and height
1345 * \param[out] px [optional] rank value of x (left side)
1346 * \param[out] py [optional] rank value of y (top side)
1347 * \param[out] pr [optional] rank value of right side
1348 * \param[out] pb [optional] rank value of bottom side
1349 * \param[out] pw [optional] rank value of width
1350 * \param[out] ph [optional] rank value of height
1351 * \return 0 if OK, 1 on error or if the boxa is empty or has no valid boxes
1352 *
1353 * <pre>
1354 * Notes:
1355 * (1) This function does not assume that all boxes in the boxa are valid
1356 * (2) The six box parameters are sorted independently.
1357 * For rank order, the width and height are sorted in increasing
1358 * order. But what does it mean to sort x and y in "rank order"?
1359 * If the boxes are of comparable size and somewhat
1360 * aligned (e.g., from multiple images), it makes some sense
1361 * to give a "rank order" for x and y by sorting them in
1362 * decreasing order. (By the same argument, we choose to sort
1363 * the r and b sides in increasing order.) In general, the
1364 * interpretation of a rank order on x and y (or on r and b)
1365 * is highly application dependent. In summary:
1366 * ~ x and y are sorted in decreasing order
1367 * ~ r and b are sorted in increasing order
1368 * ~ w and h are sorted in increasing order
1369 * </pre>
1370 */
1371 l_ok
1372 boxaGetRankVals(BOXA *boxa,
1373 l_float32 fract,
1374 l_int32 *px,
1375 l_int32 *py,
1376 l_int32 *pr,
1377 l_int32 *pb,
1378 l_int32 *pw,
1379 l_int32 *ph)
1380 {
1381 l_float32 xval, yval, rval, bval, wval, hval;
1382 NUMA *nax, *nay, *nar, *nab, *naw, *nah;
1383
1384 if (px) *px = 0;
1385 if (py) *py = 0;
1386 if (pr) *pr = 0;
1387 if (pb) *pb = 0;
1388 if (pw) *pw = 0;
1389 if (ph) *ph = 0;
1390 if (!boxa)
1391 return ERROR_INT("boxa not defined", __func__, 1);
1392 if (fract < 0.0 || fract > 1.0)
1393 return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1);
1394 if (boxaGetValidCount(boxa) == 0)
1395 return ERROR_INT("no valid boxes in boxa", __func__, 1);
1396
1397 /* Use only the valid boxes */
1398 boxaExtractAsNuma(boxa, &nax, &nay, &nar, &nab, &naw, &nah, 0);
1399
1400 if (px) {
1401 numaGetRankValue(nax, 1.0 - fract, NULL, 1, &xval);
1402 *px = (l_int32)xval;
1403 }
1404 if (py) {
1405 numaGetRankValue(nay, 1.0 - fract, NULL, 1, &yval);
1406 *py = (l_int32)yval;
1407 }
1408 if (pr) {
1409 numaGetRankValue(nar, fract, NULL, 1, &rval);
1410 *pr = (l_int32)rval;
1411 }
1412 if (pb) {
1413 numaGetRankValue(nab, fract, NULL, 1, &bval);
1414 *pb = (l_int32)bval;
1415 }
1416 if (pw) {
1417 numaGetRankValue(naw, fract, NULL, 1, &wval);
1418 *pw = (l_int32)wval;
1419 }
1420 if (ph) {
1421 numaGetRankValue(nah, fract, NULL, 1, &hval);
1422 *ph = (l_int32)hval;
1423 }
1424 numaDestroy(&nax);
1425 numaDestroy(&nay);
1426 numaDestroy(&nar);
1427 numaDestroy(&nab);
1428 numaDestroy(&naw);
1429 numaDestroy(&nah);
1430 return 0;
1431 }
1432
1433
1434 /*!
1435 * \brief boxaGetMedianVals()
1436 *
1437 * \param[in] boxa
1438 * \param[out] px [optional] median value of x (left side)
1439 * \param[out] py [optional] median value of y (top side)
1440 * \param[out] pr [optional] median value of right side
1441 * \param[out] pb [optional] median value of bottom side
1442 * \param[out] pw [optional] median value of width
1443 * \param[out] ph [optional] median value of height
1444 * \return 0 if OK, 1 on error or if the boxa is empty or has no valid boxes
1445 *
1446 * <pre>
1447 * Notes:
1448 * (1) See boxaGetRankVals()
1449 * </pre>
1450 */
1451 l_ok
1452 boxaGetMedianVals(BOXA *boxa,
1453 l_int32 *px,
1454 l_int32 *py,
1455 l_int32 *pr,
1456 l_int32 *pb,
1457 l_int32 *pw,
1458 l_int32 *ph)
1459 {
1460 if (!boxa)
1461 return ERROR_INT("boxa not defined", __func__, 1);
1462 if (boxaGetValidCount(boxa) == 0)
1463 return ERROR_INT("no valid boxes in boxa", __func__, 1);
1464
1465 return boxaGetRankVals(boxa, 0.5, px, py, pr, pb, pw, ph);
1466 }
1467
1468
1469 /*!
1470 * \brief boxaGetAverageSize()
1471 *
1472 * \param[in] boxa
1473 * \param[out] pw [optional] average width
1474 * \param[out] ph [optional] average height
1475 * \return 0 if OK, 1 on error or if the boxa is empty
1476 */
1477 l_ok
1478 boxaGetAverageSize(BOXA *boxa,
1479 l_float32 *pw,
1480 l_float32 *ph)
1481 {
1482 l_int32 i, n, bw, bh;
1483 l_float32 sumw, sumh;
1484
1485 if (pw) *pw = 0.0;
1486 if (ph) *ph = 0.0;
1487 if (!boxa)
1488 return ERROR_INT("boxa not defined", __func__, 1);
1489 if ((n = boxaGetCount(boxa)) == 0)
1490 return ERROR_INT("boxa is empty", __func__, 1);
1491
1492 sumw = sumh = 0.0;
1493 for (i = 0; i < n; i++) {
1494 boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
1495 sumw += bw;
1496 sumh += bh;
1497 }
1498
1499 if (pw) *pw = sumw / n;
1500 if (ph) *ph = sumh / n;
1501 return 0;
1502 }
1503
1504
1505 /*---------------------------------------------------------------------*
1506 * Other Boxaa functions *
1507 *---------------------------------------------------------------------*/
1508 /*!
1509 * \brief boxaaGetExtent()
1510 *
1511 * \param[in] baa
1512 * \param[out] pw [optional] width
1513 * \param[out] ph [optional] height
1514 * \param[out] pbox [optional] minimum box containing all boxa
1515 * in boxaa
1516 * \param[out] pboxa [optional] boxa containing all boxes in each
1517 * boxa in the boxaa
1518 * \return 0 if OK, 1 on error
1519 *
1520 * <pre>
1521 * Notes:
1522 * (1) The returned w and h are the minimum size image
1523 * that would contain all boxes untranslated.
1524 * (2) Each box in the returned boxa is the minimum box required to
1525 * hold all the boxes in the respective boxa of baa.
1526 * (3) If there are no valid boxes in a boxa, the box corresponding
1527 * to its extent has all fields set to 0 (an invalid box).
1528 * </pre>
1529 */
1530 l_ok
1531 boxaaGetExtent(BOXAA *baa,
1532 l_int32 *pw,
1533 l_int32 *ph,
1534 BOX **pbox,
1535 BOXA **pboxa)
1536 {
1537 l_int32 i, n, x, y, w, h, xmax, ymax, xmin, ymin, found;
1538 BOX *box1;
1539 BOXA *boxa, *boxa1;
1540
1541 if (!pw && !ph && !pbox && !pboxa)
1542 return ERROR_INT("no ptrs defined", __func__, 1);
1543 if (pw) *pw = 0;
1544 if (ph) *ph = 0;
1545 if (pbox) *pbox = NULL;
1546 if (pboxa) *pboxa = NULL;
1547 if (!baa)
1548 return ERROR_INT("baa not defined", __func__, 1);
1549
1550 n = boxaaGetCount(baa);
1551 if (n == 0)
1552 return ERROR_INT("no boxa in baa", __func__, 1);
1553
1554 boxa = boxaCreate(n);
1555 xmax = ymax = 0;
1556 xmin = ymin = 100000000;
1557 found = FALSE;
1558 for (i = 0; i < n; i++) {
1559 boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1560 boxaGetExtent(boxa1, NULL, NULL, &box1);
1561 boxaDestroy(&boxa1);
1562 boxGetGeometry(box1, &x, &y, &w, &h);
1563 if (w > 0 && h > 0) { /* a valid extent box */
1564 found = TRUE; /* found at least one valid extent box */
1565 xmin = L_MIN(xmin, x);
1566 ymin = L_MIN(ymin, y);
1567 xmax = L_MAX(xmax, x + w);
1568 ymax = L_MAX(ymax, y + h);
1569 }
1570 boxaAddBox(boxa, box1, L_INSERT);
1571 }
1572 if (found == FALSE) /* no valid extent boxes */
1573 xmin = ymin = 0;
1574
1575 if (pw) *pw = xmax;
1576 if (ph) *ph = ymax;
1577 if (pbox)
1578 *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin);
1579 if (pboxa)
1580 *pboxa = boxa;
1581 else
1582 boxaDestroy(&boxa);
1583 return 0;
1584 }
1585
1586
1587 /*!
1588 * \brief boxaaFlattenToBoxa()
1589 *
1590 * \param[in] baa
1591 * \param[out] pnaindex [optional] the boxa index in the baa
1592 * \param[in] copyflag L_COPY or L_CLONE
1593 * \return boxa, or NULL on error
1594 *
1595 * <pre>
1596 * Notes:
1597 * (1) This 'flattens' the baa to a boxa, taking the boxes in
1598 * order in the first boxa, then the second, etc.
1599 * (2) If a boxa is empty, we generate an invalid, placeholder box
1600 * of zero size. This is useful when converting from a baa
1601 * where each boxa has either 0 or 1 boxes, and it is necessary
1602 * to maintain a 1:1 correspondence between the initial
1603 * boxa array and the resulting box array.
1604 * (3) If &naindex is defined, we generate a Numa that gives, for
1605 * each box in the baa, the index of the boxa to which it belongs.
1606 * </pre>
1607 */
1608 BOXA *
1609 boxaaFlattenToBoxa(BOXAA *baa,
1610 NUMA **pnaindex,
1611 l_int32 copyflag)
1612 {
1613 l_int32 i, j, m, n;
1614 BOXA *boxa, *boxat;
1615 BOX *box;
1616 NUMA *naindex = NULL;
1617
1618 if (pnaindex) *pnaindex = NULL;
1619 if (!baa)
1620 return (BOXA *)ERROR_PTR("baa not defined", __func__, NULL);
1621 if (copyflag != L_COPY && copyflag != L_CLONE)
1622 return (BOXA *)ERROR_PTR("invalid copyflag", __func__, NULL);
1623 if (pnaindex) {
1624 naindex = numaCreate(0);
1625 *pnaindex = naindex;
1626 }
1627
1628 n = boxaaGetCount(baa);
1629 boxa = boxaCreate(n);
1630 for (i = 0; i < n; i++) {
1631 boxat = boxaaGetBoxa(baa, i, L_CLONE);
1632 m = boxaGetCount(boxat);
1633 if (m == 0) { /* placeholder box */
1634 box = boxCreate(0, 0, 0, 0);
1635 boxaAddBox(boxa, box, L_INSERT);
1636 if (pnaindex)
1637 numaAddNumber(naindex, i); /* save 'row' number */
1638 } else {
1639 for (j = 0; j < m; j++) {
1640 box = boxaGetBox(boxat, j, copyflag);
1641 boxaAddBox(boxa, box, L_INSERT);
1642 if (pnaindex)
1643 numaAddNumber(naindex, i); /* save 'row' number */
1644 }
1645 }
1646 boxaDestroy(&boxat);
1647 }
1648
1649 return boxa;
1650 }
1651
1652
1653 /*!
1654 * \brief boxaaFlattenAligned()
1655 *
1656 * \param[in] baa
1657 * \param[in] num number extracted from each
1658 * \param[in] fillerbox [optional] that fills if necessary
1659 * \param[in] copyflag L_COPY or L_CLONE
1660 * \return boxa, or NULL on error
1661 *
1662 * <pre>
1663 * Notes:
1664 * (1) This 'flattens' the baa to a boxa, taking the first %num
1665 * boxes from each boxa.
1666 * (2) In each boxa, if there are less than %num boxes, we preserve
1667 * the alignment between the input baa and the output boxa
1668 * by inserting one or more fillerbox(es) or, if %fillerbox == NULL,
1669 * one or more invalid placeholder boxes.
1670 * </pre>
1671 */
1672 BOXA *
1673 boxaaFlattenAligned(BOXAA *baa,
1674 l_int32 num,
1675 BOX *fillerbox,
1676 l_int32 copyflag)
1677 {
1678 l_int32 i, j, m, n, mval, nshort;
1679 BOXA *boxat, *boxad;
1680 BOX *box;
1681
1682 if (!baa)
1683 return (BOXA *)ERROR_PTR("baa not defined", __func__, NULL);
1684 if (copyflag != L_COPY && copyflag != L_CLONE)
1685 return (BOXA *)ERROR_PTR("invalid copyflag", __func__, NULL);
1686
1687 n = boxaaGetCount(baa);
1688 boxad = boxaCreate(n);
1689 for (i = 0; i < n; i++) {
1690 boxat = boxaaGetBoxa(baa, i, L_CLONE);
1691 m = boxaGetCount(boxat);
1692 mval = L_MIN(m, num);
1693 nshort = num - mval;
1694 for (j = 0; j < mval; j++) { /* take the first %num if possible */
1695 box = boxaGetBox(boxat, j, copyflag);
1696 boxaAddBox(boxad, box, L_INSERT);
1697 }
1698 for (j = 0; j < nshort; j++) { /* add fillers if necessary */
1699 if (fillerbox) {
1700 boxaAddBox(boxad, fillerbox, L_COPY);
1701 } else {
1702 box = boxCreate(0, 0, 0, 0); /* invalid placeholder box */
1703 boxaAddBox(boxad, box, L_INSERT);
1704 }
1705 }
1706 boxaDestroy(&boxat);
1707 }
1708
1709 return boxad;
1710 }
1711
1712
1713 /*!
1714 * \brief boxaEncapsulateAligned()
1715 *
1716 * \param[in] boxa
1717 * \param[in] num number put into each boxa in the baa
1718 * \param[in] copyflag L_COPY or L_CLONE
1719 * \return baa, or NULL on error
1720 *
1721 * <pre>
1722 * Notes:
1723 * (1) This puts %num boxes from the input %boxa into each of a
1724 * set of boxa within an output baa.
1725 * (2) This assumes that the boxes in %boxa are in sets of %num each.
1726 * </pre>
1727 */
1728 BOXAA *
1729 boxaEncapsulateAligned(BOXA *boxa,
1730 l_int32 num,
1731 l_int32 copyflag)
1732 {
1733 l_int32 i, j, n, nbaa, index;
1734 BOX *box;
1735 BOXA *boxat;
1736 BOXAA *baa;
1737
1738 if (!boxa)
1739 return (BOXAA *)ERROR_PTR("boxa not defined", __func__, NULL);
1740 if (copyflag != L_COPY && copyflag != L_CLONE)
1741 return (BOXAA *)ERROR_PTR("invalid copyflag", __func__, NULL);
1742
1743 n = boxaGetCount(boxa);
1744 nbaa = n / num;
1745 if (num * nbaa != n)
1746 L_ERROR("inconsistent alignment: num doesn't divide n\n", __func__);
1747 baa = boxaaCreate(nbaa);
1748 for (i = 0, index = 0; i < nbaa; i++) {
1749 boxat = boxaCreate(num);
1750 for (j = 0; j < num; j++, index++) {
1751 box = boxaGetBox(boxa, index, copyflag);
1752 boxaAddBox(boxat, box, L_INSERT);
1753 }
1754 boxaaAddBoxa(baa, boxat, L_INSERT);
1755 }
1756
1757 return baa;
1758 }
1759
1760
1761 /*!
1762 * \brief boxaaTranspose()
1763 *
1764 * \param[in] baas
1765 * \return baad, or NULL on error
1766 *
1767 * <pre>
1768 * Notes:
1769 * (1) If you think of a boxaa as a 2D array of boxes that is accessed
1770 * row major, then each row is represented by one of the boxa.
1771 * This function creates a new boxaa related to the input boxaa
1772 * as a column major traversal of the input boxaa.
1773 * (2) For example, if %baas has 2 boxa, each with 10 boxes, then
1774 * %baad will have 10 boxa, each with 2 boxes.
1775 * (3) Require for this transpose operation that each boxa in
1776 * %baas has the same number of boxes. This operation is useful
1777 * when the i-th boxes in each boxa are meaningfully related.
1778 * </pre>
1779 */
1780 BOXAA *
1781 boxaaTranspose(BOXAA *baas)
1782 {
1783 l_int32 i, j, ny, nb, nbox;
1784 BOX *box;
1785 BOXA *boxa;
1786 BOXAA *baad;
1787
1788 if (!baas)
1789 return (BOXAA *)ERROR_PTR("baas not defined", __func__, NULL);
1790 if ((ny = boxaaGetCount(baas)) == 0)
1791 return (BOXAA *)ERROR_PTR("baas empty", __func__, NULL);
1792
1793 /* Make sure that each boxa in baas has the same number of boxes */
1794 for (i = 0; i < ny; i++) {
1795 if ((boxa = boxaaGetBoxa(baas, i, L_CLONE)) == NULL)
1796 return (BOXAA *)ERROR_PTR("baas is missing a boxa", __func__, NULL);
1797 nb = boxaGetCount(boxa);
1798 boxaDestroy(&boxa);
1799 if (i == 0)
1800 nbox = nb;
1801 else if (nb != nbox)
1802 return (BOXAA *)ERROR_PTR("boxa are not all the same size",
1803 __func__, NULL);
1804 }
1805
1806 /* baad[i][j] = baas[j][i] */
1807 baad = boxaaCreate(nbox);
1808 for (i = 0; i < nbox; i++) {
1809 boxa = boxaCreate(ny);
1810 for (j = 0; j < ny; j++) {
1811 box = boxaaGetBox(baas, j, i, L_COPY);
1812 boxaAddBox(boxa, box, L_INSERT);
1813 }
1814 boxaaAddBoxa(baad, boxa, L_INSERT);
1815 }
1816 return baad;
1817 }
1818
1819
1820 /*!
1821 * \brief boxaaAlignBox()
1822 *
1823 * \param[in] baa
1824 * \param[in] box to be aligned with bext boxa in the baa, if possible
1825 * \param[in] delta amount by which consecutive components can miss
1826 * in overlap and still be included in the array
1827 * \param[out] pindex index of boxa with best overlap, or if none match,
1828 * this is the index of the next boxa to be generated
1829 * \return 0 if OK, 1 on error
1830 *
1831 * <pre>
1832 * Notes:
1833 * (1) This is not greedy. It finds the boxa whose vertical
1834 * extent has the closest overlap with the input box.
1835 * </pre>
1836 */
1837 l_ok
1838 boxaaAlignBox(BOXAA *baa,
1839 BOX *box,
1840 l_int32 delta,
1841 l_int32 *pindex)
1842 {
1843 l_int32 i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex;
1844 BOX *boxt;
1845 BOXA *boxa;
1846
1847 if (pindex) *pindex = 0;
1848 if (!baa)
1849 return ERROR_INT("baa not defined", __func__, 1);
1850 if (!box)
1851 return ERROR_INT("box not defined", __func__, 1);
1852 if (!pindex)
1853 return ERROR_INT("&index not defined", __func__, 1);
1854
1855 n = boxaaGetCount(baa);
1856 boxGetGeometry(box, NULL, &y, NULL, &h);
1857 maxovlp = -10000000;
1858 for (i = 0; i < n; i++) {
1859 boxa = boxaaGetBoxa(baa, i, L_CLONE);
1860 if ((m = boxaGetCount(boxa)) == 0) {
1861 boxaDestroy(&boxa);
1862 L_WARNING("no boxes in boxa\n", __func__);
1863 continue;
1864 }
1865 boxaGetExtent(boxa, NULL, NULL, &boxt);
1866 boxGetGeometry(boxt, NULL, &yt, NULL, &ht);
1867 boxDestroy(&boxt);
1868 boxaDestroy(&boxa);
1869
1870 /* Overlap < 0 means the components do not overlap vertically */
1871 if (yt >= y)
1872 ovlp = y + h - 1 - yt;
1873 else
1874 ovlp = yt + ht - 1 - y;
1875 if (ovlp > maxovlp) {
1876 maxovlp = ovlp;
1877 maxindex = i;
1878 }
1879 }
1880
1881 if (maxovlp + delta >= 0)
1882 *pindex = maxindex;
1883 else
1884 *pindex = n;
1885 return 0;
1886 }