comparison mupdf-source/thirdparty/leptonica/src/boxfunc3.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file boxfunc3.c
29 * <pre>
30 *
31 * Boxa/Boxaa painting into pix
32 * PIX *pixMaskConnComp()
33 * PIX *pixMaskBoxa()
34 * PIX *pixPaintBoxa()
35 * PIX *pixSetBlackOrWhiteBoxa()
36 * PIX *pixPaintBoxaRandom()
37 * PIX *pixBlendBoxaRandom()
38 * PIX *pixDrawBoxa()
39 * PIX *pixDrawBoxaRandom()
40 * PIX *boxaaDisplay()
41 * PIXA *pixaDisplayBoxaa()
42 *
43 * Split mask components into Boxa
44 * BOXA *pixSplitIntoBoxa()
45 * BOXA *pixSplitComponentIntoBoxa()
46 * static l_int32 pixSearchForRectangle()
47 *
48 * Represent horizontal or vertical mosaic strips
49 * BOXA *makeMosaicStrips()
50 *
51 * Comparison between boxa
52 * l_int32 boxaCompareRegions()
53 *
54 * Reliable selection of a single large box
55 * BOX *pixSelectLargeULComp()
56 * BOX *boxaSelectLargeULBox()
57 *
58 * See summary in pixPaintBoxa() of various ways to paint and draw
59 * boxes on images.
60 * </pre>
61 */
62
63 #ifdef HAVE_CONFIG_H
64 #include <config_auto.h>
65 #endif /* HAVE_CONFIG_H */
66
67 #include "allheaders.h"
68
69 static l_int32 pixSearchForRectangle(PIX *pixs, BOX *boxs, l_int32 minsum,
70 l_int32 skipdist, l_int32 delta,
71 l_int32 maxbg, l_int32 sideflag,
72 BOXA *boxat, NUMA *nascore);
73
74 #ifndef NO_CONSOLE_IO
75 #define DEBUG_SPLIT 0
76 #endif /* ~NO_CONSOLE_IO */
77
78 /*---------------------------------------------------------------------*
79 * Boxa/Boxaa painting into Pix *
80 *---------------------------------------------------------------------*/
81 /*!
82 * \brief pixMaskConnComp()
83 *
84 * \param[in] pixs 1 bpp
85 * \param[in] connectivity 4 or 8
86 * \param[out] pboxa [optional] bounding boxes of c.c.
87 * \return pixd 1 bpp mask over the c.c., or NULL on error
88 *
89 * <pre>
90 * Notes:
91 * (1) This generates a mask image with ON pixels over the
92 * b.b. of the c.c. in pixs. If there are no ON pixels in pixs,
93 * pixd will also have no ON pixels.
94 * </pre>
95 */
96 PIX *
97 pixMaskConnComp(PIX *pixs,
98 l_int32 connectivity,
99 BOXA **pboxa)
100 {
101 BOXA *boxa;
102 PIX *pixd;
103
104 if (pboxa) *pboxa = NULL;
105 if (!pixs || pixGetDepth(pixs) != 1)
106 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
107 if (connectivity != 4 && connectivity != 8)
108 return (PIX *)ERROR_PTR("connectivity not 4 or 8", __func__, NULL);
109
110 boxa = pixConnComp(pixs, NULL, connectivity);
111 pixd = pixCreateTemplate(pixs);
112 if (boxaGetCount(boxa) != 0)
113 pixMaskBoxa(pixd, pixd, boxa, L_SET_PIXELS);
114 if (pboxa)
115 *pboxa = boxa;
116 else
117 boxaDestroy(&boxa);
118 return pixd;
119 }
120
121
122 /*!
123 * \brief pixMaskBoxa()
124 *
125 * \param[in] pixd [optional] may be NULL
126 * \param[in] pixs any depth; not cmapped
127 * \param[in] boxa of boxes, to paint
128 * \param[in] op L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS
129 * \return pixd with masking op over the boxes, or NULL on error
130 *
131 * <pre>
132 * Notes:
133 * (1) This can be used with:
134 * pixd = NULL (makes a new pixd)
135 * pixd = pixs (in-place)
136 * (2) If pixd == NULL, this first makes a copy of pixs, and then
137 * bit-twiddles over the boxes. Otherwise, it operates directly
138 * on pixs.
139 * (3) This simple function is typically used with 1 bpp images.
140 * It uses the 1-image rasterop function, rasteropUniLow(),
141 * to set, clear or flip the pixels in pixd.
142 * (4) If you want to generate a 1 bpp mask of ON pixels from the boxes
143 * in a Boxa, in a pix of size (w,h):
144 * pix = pixCreate(w, h, 1);
145 * pixMaskBoxa(pix, pix, boxa, L_SET_PIXELS);
146 * </pre>
147 */
148 PIX *
149 pixMaskBoxa(PIX *pixd,
150 PIX *pixs,
151 BOXA *boxa,
152 l_int32 op)
153 {
154 l_int32 i, n, x, y, w, h;
155 BOX *box;
156
157 if (!pixs)
158 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
159 if (pixGetColormap(pixs))
160 return (PIX *)ERROR_PTR("pixs is cmapped", __func__, NULL);
161 if (pixd && (pixd != pixs))
162 return (PIX *)ERROR_PTR("if pixd, must be in-place", __func__, NULL);
163 if (!boxa)
164 return (PIX *)ERROR_PTR("boxa not defined", __func__, NULL);
165 if (op != L_SET_PIXELS && op != L_CLEAR_PIXELS && op != L_FLIP_PIXELS)
166 return (PIX *)ERROR_PTR("invalid op", __func__, NULL);
167
168 pixd = pixCopy(pixd, pixs);
169 if ((n = boxaGetCount(boxa)) == 0) {
170 L_WARNING("no boxes to mask\n", __func__);
171 return pixd;
172 }
173
174 for (i = 0; i < n; i++) {
175 box = boxaGetBox(boxa, i, L_CLONE);
176 boxGetGeometry(box, &x, &y, &w, &h);
177 if (op == L_SET_PIXELS)
178 pixRasterop(pixd, x, y, w, h, PIX_SET, NULL, 0, 0);
179 else if (op == L_CLEAR_PIXELS)
180 pixRasterop(pixd, x, y, w, h, PIX_CLR, NULL, 0, 0);
181 else /* op == L_FLIP_PIXELS */
182 pixRasterop(pixd, x, y, w, h, PIX_NOT(PIX_DST), NULL, 0, 0);
183 boxDestroy(&box);
184 }
185
186 return pixd;
187 }
188
189
190 /*!
191 * \brief pixPaintBoxa()
192 *
193 * \param[in] pixs any depth, can be cmapped
194 * \param[in] boxa of boxes, to paint
195 * \param[in] val rgba color to paint
196 * \return pixd with painted boxes, or NULL on error
197 *
198 * <pre>
199 * Notes:
200 * (1) If pixs is 1 bpp or is colormapped, it is converted to 8 bpp
201 * and the boxa is painted using a colormap; otherwise,
202 * it is converted to 32 bpp rgb.
203 * (2) There are several ways to display a box on an image:
204 * * Paint it as a solid color
205 * * Draw the outline
206 * * Blend the outline or region with the existing image
207 * We provide painting and drawing here; blending is in blend.c.
208 * When painting or drawing, the result can be either a
209 * cmapped image or an rgb image. The dest will be cmapped
210 * if the src is either 1 bpp or has a cmap that is not full.
211 * To force RGB output, use pixConvertTo8(pixs, FALSE)
212 * before calling any of these paint and draw functions.
213 * </pre>
214 */
215 PIX *
216 pixPaintBoxa(PIX *pixs,
217 BOXA *boxa,
218 l_uint32 val)
219 {
220 l_int32 i, n, d, rval, gval, bval, newindex;
221 l_int32 mapvacancy; /* true only if cmap and not full */
222 BOX *box;
223 PIX *pixd;
224 PIXCMAP *cmap;
225
226 if (!pixs)
227 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
228 if (!boxa)
229 return (PIX *)ERROR_PTR("boxa not defined", __func__, NULL);
230
231 if ((n = boxaGetCount(boxa)) == 0) {
232 L_WARNING("no boxes to paint; returning a copy\n", __func__);
233 return pixCopy(NULL, pixs);
234 }
235
236 mapvacancy = FALSE;
237 if ((cmap = pixGetColormap(pixs)) != NULL) {
238 if (pixcmapGetCount(cmap) < 256)
239 mapvacancy = TRUE;
240 }
241 if (pixGetDepth(pixs) == 1 || mapvacancy)
242 pixd = pixConvertTo8(pixs, TRUE);
243 else
244 pixd = pixConvertTo32(pixs);
245 if (!pixd)
246 return (PIX *)ERROR_PTR("pixd not made", __func__, NULL);
247
248 d = pixGetDepth(pixd);
249 if (d == 8) { /* colormapped */
250 cmap = pixGetColormap(pixd);
251 extractRGBValues(val, &rval, &gval, &bval);
252 if (pixcmapAddNewColor(cmap, rval, gval, bval, &newindex)) {
253 pixDestroy(&pixd);
254 return (PIX *)ERROR_PTR("cmap full; can't add", __func__, NULL);
255 }
256 }
257
258 for (i = 0; i < n; i++) {
259 box = boxaGetBox(boxa, i, L_CLONE);
260 if (d == 8)
261 pixSetInRectArbitrary(pixd, box, newindex);
262 else
263 pixSetInRectArbitrary(pixd, box, val);
264 boxDestroy(&box);
265 }
266
267 return pixd;
268 }
269
270
271 /*!
272 * \brief pixSetBlackOrWhiteBoxa()
273 *
274 * \param[in] pixs any depth, can be cmapped
275 * \param[in] boxa [optional] of boxes, to clear or set
276 * \param[in] op L_SET_BLACK, L_SET_WHITE
277 * \return pixd with boxes filled with white or black, or NULL on error
278 */
279 PIX *
280 pixSetBlackOrWhiteBoxa(PIX *pixs,
281 BOXA *boxa,
282 l_int32 op)
283 {
284 l_int32 i, n, d, index;
285 l_uint32 color;
286 BOX *box;
287 PIX *pixd;
288 PIXCMAP *cmap;
289
290 if (!pixs)
291 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
292 if (!boxa)
293 return pixCopy(NULL, pixs);
294 if ((n = boxaGetCount(boxa)) == 0)
295 return pixCopy(NULL, pixs);
296
297 pixd = pixCopy(NULL, pixs);
298 d = pixGetDepth(pixd);
299 if (d == 1) {
300 for (i = 0; i < n; i++) {
301 box = boxaGetBox(boxa, i, L_CLONE);
302 if (op == L_SET_WHITE)
303 pixClearInRect(pixd, box);
304 else
305 pixSetInRect(pixd, box);
306 boxDestroy(&box);
307 }
308 return pixd;
309 }
310
311 cmap = pixGetColormap(pixs);
312 if (cmap) {
313 color = (op == L_SET_WHITE) ? 1 : 0;
314 pixcmapAddBlackOrWhite(cmap, color, &index);
315 } else if (d == 8) {
316 color = (op == L_SET_WHITE) ? 0xff : 0x0;
317 } else if (d == 32) {
318 color = (op == L_SET_WHITE) ? 0xffffff00 : 0x0;
319 } else if (d == 2) {
320 color = (op == L_SET_WHITE) ? 0x3 : 0x0;
321 } else if (d == 4) {
322 color = (op == L_SET_WHITE) ? 0xf : 0x0;
323 } else if (d == 16) {
324 color = (op == L_SET_WHITE) ? 0xffff : 0x0;
325 } else {
326 pixDestroy(&pixd);
327 return (PIX *)ERROR_PTR("invalid depth", __func__, NULL);
328 }
329
330 for (i = 0; i < n; i++) {
331 box = boxaGetBox(boxa, i, L_CLONE);
332 if (cmap)
333 pixSetInRectArbitrary(pixd, box, index);
334 else
335 pixSetInRectArbitrary(pixd, box, color);
336 boxDestroy(&box);
337 }
338
339 return pixd;
340 }
341
342
343 /*!
344 * \brief pixPaintBoxaRandom()
345 *
346 * \param[in] pixs any depth, can be cmapped
347 * \param[in] boxa of boxes, to paint
348 * \return pixd with painted boxes, or NULL on error
349 *
350 * <pre>
351 * Notes:
352 * (1) If pixs is 1 bpp, we paint the boxa using a colormap;
353 * otherwise, we convert to 32 bpp.
354 * (2) We use up to 254 different colors for painting the regions.
355 * (3) If boxes overlap, the later ones paint over earlier ones.
356 * </pre>
357 */
358 PIX *
359 pixPaintBoxaRandom(PIX *pixs,
360 BOXA *boxa)
361 {
362 l_int32 i, n, d, rval, gval, bval, index;
363 l_uint32 val;
364 BOX *box;
365 PIX *pixd;
366 PIXCMAP *cmap;
367
368 if (!pixs)
369 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
370 if (!boxa)
371 return (PIX *)ERROR_PTR("boxa not defined", __func__, NULL);
372
373 if ((n = boxaGetCount(boxa)) == 0) {
374 L_WARNING("no boxes to paint; returning a copy\n", __func__);
375 return pixCopy(NULL, pixs);
376 }
377
378 if (pixGetDepth(pixs) == 1)
379 pixd = pixConvert1To8(NULL, pixs, 255, 0);
380 else
381 pixd = pixConvertTo32(pixs);
382 if (!pixd)
383 return (PIX *)ERROR_PTR("pixd not made", __func__, NULL);
384
385 cmap = pixcmapCreateRandom(8, 1, 1);
386 d = pixGetDepth(pixd); /* either 8 or 32 */
387 if (d == 8) /* colormapped */
388 pixSetColormap(pixd, cmap);
389
390 for (i = 0; i < n; i++) {
391 box = boxaGetBox(boxa, i, L_CLONE);
392 index = 1 + (i % 254);
393 if (d == 8) {
394 pixSetInRectArbitrary(pixd, box, index);
395 } else { /* d == 32 */
396 pixcmapGetColor(cmap, index, &rval, &gval, &bval);
397 composeRGBPixel(rval, gval, bval, &val);
398 pixSetInRectArbitrary(pixd, box, val);
399 }
400 boxDestroy(&box);
401 }
402
403 if (d == 32)
404 pixcmapDestroy(&cmap);
405 return pixd;
406 }
407
408
409 /*!
410 * \brief pixBlendBoxaRandom()
411 *
412 * \param[in] pixs any depth; can be cmapped
413 * \param[in] boxa of boxes, to blend/paint
414 * \param[in] fract of box color to use
415 * \return pixd 32 bpp, with blend/painted boxes, or NULL on error
416 *
417 * <pre>
418 * Notes:
419 * (1) pixs is converted to 32 bpp.
420 * (2) This differs from pixPaintBoxaRandom(), in that the
421 * colors here are blended with the color of pixs.
422 * (3) We use up to 254 different colors for painting the regions.
423 * (4) If boxes overlap, the final color depends only on the last
424 * rect that is used.
425 * </pre>
426 */
427 PIX *
428 pixBlendBoxaRandom(PIX *pixs,
429 BOXA *boxa,
430 l_float32 fract)
431 {
432 l_int32 i, n, rval, gval, bval, index;
433 l_uint32 val;
434 BOX *box;
435 PIX *pixd;
436 PIXCMAP *cmap;
437
438 if (!pixs)
439 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
440 if (!boxa)
441 return (PIX *)ERROR_PTR("boxa not defined", __func__, NULL);
442 if (fract < 0.0 || fract > 1.0) {
443 L_WARNING("fract must be in [0.0, 1.0]; setting to 0.5\n", __func__);
444 fract = 0.5;
445 }
446
447 if ((n = boxaGetCount(boxa)) == 0) {
448 L_WARNING("no boxes to paint; returning a copy\n", __func__);
449 return pixCopy(NULL, pixs);
450 }
451
452 if ((pixd = pixConvertTo32(pixs)) == NULL)
453 return (PIX *)ERROR_PTR("pixd not defined", __func__, NULL);
454
455 cmap = pixcmapCreateRandom(8, 1, 1);
456 for (i = 0; i < n; i++) {
457 box = boxaGetBox(boxa, i, L_CLONE);
458 index = 1 + (i % 254);
459 pixcmapGetColor(cmap, index, &rval, &gval, &bval);
460 composeRGBPixel(rval, gval, bval, &val);
461 pixBlendInRect(pixd, box, val, fract);
462 boxDestroy(&box);
463 }
464
465 pixcmapDestroy(&cmap);
466 return pixd;
467 }
468
469
470 /*!
471 * \brief pixDrawBoxa()
472 *
473 * \param[in] pixs any depth; can be cmapped
474 * \param[in] boxa of boxes, to draw
475 * \param[in] width of lines
476 * \param[in] val rgba color to draw
477 * \return pixd with outlines of boxes added, or NULL on error
478 *
479 * <pre>
480 * Notes:
481 * (1) If pixs is 1 bpp or is colormapped, it is converted to 8 bpp
482 * and the boxa is drawn using a colormap; otherwise,
483 * it is converted to 32 bpp rgb.
484 * </pre>
485 */
486 PIX *
487 pixDrawBoxa(PIX *pixs,
488 BOXA *boxa,
489 l_int32 width,
490 l_uint32 val)
491 {
492 l_int32 rval, gval, bval, newindex;
493 l_int32 mapvacancy; /* true only if cmap and not full */
494 PIX *pixd;
495 PIXCMAP *cmap;
496
497 if (!pixs)
498 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
499 if (!boxa)
500 return (PIX *)ERROR_PTR("boxa not defined", __func__, NULL);
501 if (width < 1)
502 return (PIX *)ERROR_PTR("width must be >= 1", __func__, NULL);
503
504 if (boxaGetCount(boxa) == 0) {
505 L_WARNING("no boxes to draw; returning a copy\n", __func__);
506 return pixCopy(NULL, pixs);
507 }
508
509 mapvacancy = FALSE;
510 if ((cmap = pixGetColormap(pixs)) != NULL) {
511 if (pixcmapGetCount(cmap) < 256)
512 mapvacancy = TRUE;
513 }
514 if (pixGetDepth(pixs) == 1 || mapvacancy)
515 pixd = pixConvertTo8(pixs, TRUE);
516 else
517 pixd = pixConvertTo32(pixs);
518 if (!pixd)
519 return (PIX *)ERROR_PTR("pixd not made", __func__, NULL);
520
521 extractRGBValues(val, &rval, &gval, &bval);
522 if (pixGetDepth(pixd) == 8) { /* colormapped */
523 cmap = pixGetColormap(pixd);
524 pixcmapAddNewColor(cmap, rval, gval, bval, &newindex);
525 }
526
527 pixRenderBoxaArb(pixd, boxa, width, rval, gval, bval);
528 return pixd;
529 }
530
531
532 /*!
533 * \brief pixDrawBoxaRandom()
534 *
535 * \param[in] pixs any depth, can be cmapped
536 * \param[in] boxa of boxes, to draw
537 * \param[in] width thickness of line
538 * \return pixd with box outlines drawn, or NULL on error
539 *
540 * <pre>
541 * Notes:
542 * (1) If pixs is 1 bpp, we draw the boxa using a colormap;
543 * otherwise, we convert to 32 bpp.
544 * (2) We use up to 254 different colors for drawing the boxes.
545 * (3) If boxes overlap, the later ones draw over earlier ones.
546 * </pre>
547 */
548 PIX *
549 pixDrawBoxaRandom(PIX *pixs,
550 BOXA *boxa,
551 l_int32 width)
552 {
553 l_int32 i, n, rval, gval, bval, index;
554 BOX *box;
555 PIX *pixd;
556 PIXCMAP *cmap;
557 PTAA *ptaa;
558
559 if (!pixs)
560 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
561 if (!boxa)
562 return (PIX *)ERROR_PTR("boxa not defined", __func__, NULL);
563 if (width < 1)
564 return (PIX *)ERROR_PTR("width must be >= 1", __func__, NULL);
565
566 if ((n = boxaGetCount(boxa)) == 0) {
567 L_WARNING("no boxes to draw; returning a copy\n", __func__);
568 return pixCopy(NULL, pixs);
569 }
570
571 /* Input depth = 1 bpp; generate cmapped output */
572 if (pixGetDepth(pixs) == 1) {
573 ptaa = generatePtaaBoxa(boxa);
574 pixd = pixRenderRandomCmapPtaa(pixs, ptaa, 1, width, 1);
575 ptaaDestroy(&ptaa);
576 return pixd;
577 }
578
579 /* Generate rgb output */
580 pixd = pixConvertTo32(pixs);
581 cmap = pixcmapCreateRandom(8, 1, 1);
582 for (i = 0; i < n; i++) {
583 box = boxaGetBox(boxa, i, L_CLONE);
584 index = 1 + (i % 254);
585 pixcmapGetColor(cmap, index, &rval, &gval, &bval);
586 pixRenderBoxArb(pixd, box, width, rval, gval, bval);
587 boxDestroy(&box);
588 }
589 pixcmapDestroy(&cmap);
590 return pixd;
591 }
592
593
594 /*!
595 * \brief boxaaDisplay()
596 *
597 * \param[in] pixs [optional] 1 bpp
598 * \param[in] baa boxaa, typically from a 2d sort
599 * \param[in] linewba line width to display outline of each boxa
600 * \param[in] linewb line width to display outline of each box
601 * \param[in] colorba color to display boxa
602 * \param[in] colorb color to display box
603 * \param[in] w width of output pix; use 0 if determined by %pixs or %baa
604 * \param[in] h height of output pix; use 0 if determined by %pixs or %baa
605 * \return 0 if OK, 1 on error
606 *
607 * <pre>
608 * Notes:
609 * (1) If %pixs exists, this renders the boxes over an 8 bpp version
610 * of it. Otherwise, it renders the boxes over an empty image
611 * with a white background.
612 * (2) If %pixs exists, the dimensions of %pixd are the same,
613 * and input values of %w and %h are ignored.
614 * If %pixs is NULL, the dimensions of %pixd are determined by
615 * - %w and %h if both are > 0, or
616 * - the minimum size required using all boxes in %baa.
617 *
618 * </pre>
619 */
620 PIX *
621 boxaaDisplay(PIX *pixs,
622 BOXAA *baa,
623 l_int32 linewba,
624 l_int32 linewb,
625 l_uint32 colorba,
626 l_uint32 colorb,
627 l_int32 w,
628 l_int32 h)
629 {
630 l_int32 i, j, n, m, rbox, gbox, bbox, rboxa, gboxa, bboxa;
631 BOX *box;
632 BOXA *boxa;
633 PIX *pixd;
634 PIXCMAP *cmap;
635
636 if (!baa)
637 return (PIX *)ERROR_PTR("baa not defined", __func__, NULL);
638
639 if (w <= 0 || h <= 0) {
640 if (pixs)
641 pixGetDimensions(pixs, &w, &h, NULL);
642 else
643 boxaaGetExtent(baa, &w, &h, NULL, NULL);
644 }
645
646 if (pixs) {
647 pixd = pixConvertTo8(pixs, 1);
648 cmap = pixGetColormap(pixd);
649 } else {
650 pixd = pixCreate(w, h, 8);
651 cmap = pixcmapCreate(8);
652 pixSetColormap(pixd, cmap);
653 pixcmapAddColor(cmap, 255, 255, 255);
654 }
655 extractRGBValues(colorb, &rbox, &gbox, &bbox);
656 extractRGBValues(colorba, &rboxa, &gboxa, &bboxa);
657 pixcmapAddColor(cmap, rbox, gbox, bbox);
658 pixcmapAddColor(cmap, rboxa, gboxa, bboxa);
659
660 n = boxaaGetCount(baa);
661 for (i = 0; i < n; i++) {
662 boxa = boxaaGetBoxa(baa, i, L_CLONE);
663 boxaGetExtent(boxa, NULL, NULL, &box);
664 pixRenderBoxArb(pixd, box, linewba, rboxa, gboxa, bboxa);
665 boxDestroy(&box);
666 m = boxaGetCount(boxa);
667 for (j = 0; j < m; j++) {
668 box = boxaGetBox(boxa, j, L_CLONE);
669 pixRenderBoxArb(pixd, box, linewb, rbox, gbox, bbox);
670 boxDestroy(&box);
671 }
672 boxaDestroy(&boxa);
673 }
674
675 return pixd;
676 }
677
678
679 /*!
680 * \brief pixaDisplayBoxaa()
681 *
682 * \param[in] pixas any depth, can be cmapped
683 * \param[in] baa boxes to draw on input pixa
684 * \param[in] colorflag L_DRAW_RED, L_DRAW_GREEN, etc
685 * \param[in] width thickness of lines
686 * \return pixa with box outlines drawn on each pix, or NULL on error
687 *
688 * <pre>
689 * Notes:
690 * (1) All pix in %pixas that are not rgb are converted to rgb.
691 * (2) Each boxa in %baa contains boxes that will be drawn on
692 * the corresponding pix in %pixas.
693 * (3) The color of the boxes drawn on each pix are selected with
694 * %colorflag:
695 * * For red, green or blue: use L_DRAW_RED, etc.
696 * * For sequential r, g, b: use L_DRAW_RGB
697 * * For random colors: use L_DRAW_RANDOM
698 * </pre>
699 */
700 PIXA *
701 pixaDisplayBoxaa(PIXA *pixas,
702 BOXAA *baa,
703 l_int32 colorflag,
704 l_int32 width)
705 {
706 l_int32 i, j, nba, n, nbox, rval, gval, bval;
707 l_uint32 color;
708 l_uint32 colors[255];
709 BOXA *boxa;
710 BOX *box;
711 PIX *pix;
712 PIXA *pixad;
713
714 if (!pixas)
715 return (PIXA *)ERROR_PTR("pixas not defined", __func__, NULL);
716 if (!baa)
717 return (PIXA *)ERROR_PTR("baa not defined", __func__, NULL);
718 if (width < 1)
719 return (PIXA *)ERROR_PTR("width must be >= 1", __func__, NULL);
720 if ((nba = boxaaGetCount(baa)) < 1)
721 return (PIXA *)ERROR_PTR("no boxa in baa", __func__, NULL);
722 if ((n = pixaGetCount(pixas)) == 0)
723 return (PIXA *)ERROR_PTR("no pix in pixas", __func__, NULL);
724 if (n != nba)
725 return (PIXA *)ERROR_PTR("num pix != num boxa", __func__, NULL);
726 if (colorflag == L_DRAW_RED)
727 color = 0xff000000;
728 else if (colorflag == L_DRAW_GREEN)
729 color = 0x00ff0000;
730 else if (colorflag == L_DRAW_BLUE)
731 color = 0x0000ff00;
732 else if (colorflag == L_DRAW_RGB)
733 color = 0x000000ff;
734 else if (colorflag == L_DRAW_RANDOM)
735 color = 0x00000000;
736 else
737 return (PIXA *)ERROR_PTR("invalid colorflag", __func__, NULL);
738
739 if (colorflag == L_DRAW_RED || colorflag == L_DRAW_GREEN ||
740 colorflag == L_DRAW_BLUE) {
741 for (i = 0; i < 255; i++)
742 colors[i] = color;
743 } else if (colorflag == L_DRAW_RGB) {
744 for (i = 0; i < 255; i++) {
745 if (i % 3 == L_DRAW_RED)
746 colors[i] = 0xff000000;
747 else if (i % 3 == L_DRAW_GREEN)
748 colors[i] = 0x00ff0000;
749 else /* i % 3 == L_DRAW_BLUE) */
750 colors[i] = 0x0000ff00;
751 }
752 } else if (colorflag == L_DRAW_RANDOM) {
753 for (i = 0; i < 255; i++) {
754 rval = (l_uint32)rand() & 0xff;
755 gval = (l_uint32)rand() & 0xff;
756 bval = (l_uint32)rand() & 0xff;
757 composeRGBPixel(rval, gval, bval, &colors[i]);
758 }
759 }
760
761 pixad = pixaCreate(n);
762 for (i = 0; i < n; i++) {
763 pix = pixaGetPix(pixas, i, L_COPY);
764 boxa = boxaaGetBoxa(baa, i, L_CLONE);
765 nbox = boxaGetCount(boxa);
766 for (j = 0; j < nbox; j++) {
767 box = boxaGetBox(boxa, j, L_CLONE);
768 extractRGBValues(colors[j % 255], &rval, &gval, &bval);
769 pixRenderBoxArb(pix, box, width, rval, gval, bval);
770 boxDestroy(&box);
771 }
772 boxaDestroy(&boxa);
773 pixaAddPix(pixad, pix, L_INSERT);
774 }
775
776 return pixad;
777 }
778
779
780 /*---------------------------------------------------------------------*
781 * Split mask components into Boxa *
782 *---------------------------------------------------------------------*/
783 /*!
784 * \brief pixSplitIntoBoxa()
785 *
786 * \param[in] pixs 1 bpp
787 * \param[in] minsum minimum pixels to trigger propagation
788 * \param[in] skipdist distance before computing sum for propagation
789 * \param[in] delta difference required to stop propagation
790 * \param[in] maxbg maximum number of allowed bg pixels in ref scan
791 * \param[in] maxcomps use 0 for unlimited number of subdivided components
792 * \param[in] remainder set to 1 to get b.b. of remaining stuff
793 * \return boxa of rectangles covering the fg of pixs, or NULL on error
794 *
795 * <pre>
796 * Notes:
797 * (1) This generates a boxa of rectangles that covers
798 * the fg of a mask. For each 8-connected component in pixs,
799 * it does a greedy partitioning, choosing the largest
800 * rectangle found from each of the four directions at each iter.
801 * See pixSplitComponentIntoBoxa() for details.
802 * (2) The input parameters give some flexibility for boundary
803 * noise. The resulting set of rectangles may cover some
804 * bg pixels.
805 * (3) This should be used when there are a small number of
806 * mask components, each of which has sides that are close
807 * to horizontal and vertical. The input parameters %delta
808 * and %maxbg determine whether or not holes in the mask are covered.
809 * (4) The parameter %maxcomps gives the maximum number of allowed
810 * rectangles extracted from any single connected component.
811 * Use 0 if no limit is to be applied.
812 * (5) The flag %remainder specifies whether we take a final bounding
813 * box for anything left after the maximum number of allowed
814 * rectangle is extracted.
815 * </pre>
816 */
817 BOXA *
818 pixSplitIntoBoxa(PIX *pixs,
819 l_int32 minsum,
820 l_int32 skipdist,
821 l_int32 delta,
822 l_int32 maxbg,
823 l_int32 maxcomps,
824 l_int32 remainder)
825 {
826 l_int32 i, n;
827 BOX *box;
828 BOXA *boxa, *boxas, *boxad;
829 PIX *pix;
830 PIXA *pixas;
831
832 if (!pixs || pixGetDepth(pixs) != 1)
833 return (BOXA *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
834
835 boxas = pixConnComp(pixs, &pixas, 8);
836 n = boxaGetCount(boxas);
837 boxad = boxaCreate(0);
838 for (i = 0; i < n; i++) {
839 pix = pixaGetPix(pixas, i, L_CLONE);
840 box = boxaGetBox(boxas, i, L_CLONE);
841 boxa = pixSplitComponentIntoBoxa(pix, box, minsum, skipdist,
842 delta, maxbg, maxcomps, remainder);
843 boxaJoin(boxad, boxa, 0, -1);
844 pixDestroy(&pix);
845 boxDestroy(&box);
846 boxaDestroy(&boxa);
847 }
848
849 pixaDestroy(&pixas);
850 boxaDestroy(&boxas);
851 return boxad;
852 }
853
854
855 /*!
856 * \brief pixSplitComponentIntoBoxa()
857 *
858 * \param[in] pix 1 bpp
859 * \param[in] box [optional] location of pix w/rt an origin
860 * \param[in] minsum minimum pixels to trigger propagation
861 * \param[in] skipdist distance before computing sum for propagation
862 * \param[in] delta difference required to stop propagation
863 * \param[in] maxbg maximum number of allowed bg pixels in ref scan
864 * \param[in] maxcomps use 0 for unlimited number of subdivided components
865 * \param[in] remainder set to 1 to get b.b. of remaining stuff
866 * \return boxa of rectangles covering the fg of pix, or NULL on error
867 *
868 * <pre>
869 * Notes:
870 * (1) This generates a boxa of rectangles that covers
871 * the fg of a mask. It does so by a greedy partitioning of
872 * the mask, choosing the largest rectangle found from
873 * each of the four directions at each step.
874 * (2) The input parameters give some flexibility for boundary
875 * noise. The resulting set of rectangles must cover all
876 * the fg pixels and, in addition, may cover some bg pixels.
877 * Using small input parameters on a noiseless mask (i.e., one
878 * that has only large vertical and horizontal edges) will
879 * result in a proper covering of only the fg pixels of the mask.
880 * (3) The input is assumed to be a single connected component, that
881 * may have holes. From each side, sweep inward, counting
882 * the pixels. If the count becomes greater than %minsum,
883 * and we have moved forward a further amount %skipdist,
884 * record that count ('countref'), but don't accept if the scan
885 * contains more than %maxbg bg pixels. Continue the scan
886 * until we reach a count that differs from countref by at
887 * least %delta, at which point the propagation stops. The box
888 * swept out gets a score, which is the sum of fg pixels
889 * minus a penalty. The penalty is the number of bg pixels
890 * in the box. This is done from all four sides, and the
891 * side with the largest score is saved as a rectangle.
892 * The process repeats until there is either no rectangle
893 * left, or there is one that can't be captured from any
894 * direction. For the latter case, we simply accept the
895 * last rectangle.
896 * (4) The input box is only used to specify the location of
897 * the UL corner of pix, with respect to an origin that
898 * typically represents the UL corner of an underlying image,
899 * of which pix is one component. If %box is null,
900 * the UL corner is taken to be (0, 0).
901 * (5) The parameter %maxcomps gives the maximum number of allowed
902 * rectangles extracted from any single connected component.
903 * Use 0 if no limit is to be applied.
904 * (6) The flag %remainder specifies whether we take a final bounding
905 * box for anything left after the maximum number of allowed
906 * rectangle is extracted.
907 * (7) So if %maxcomps > 0, it specifies that we want no more than
908 * the first %maxcomps rectangles that satisfy the input
909 * criteria. After this, we can get a final rectangle that
910 * bounds everything left over by setting %remainder == 1.
911 * If %remainder == 0, we only get rectangles that satisfy
912 * the input criteria.
913 * (8) It should be noted that the removal of rectangles can
914 * break the original c.c. into several c.c.
915 * (9) Summing up:
916 * * If %maxcomp == 0, the splitting proceeds as far as possible.
917 * * If %maxcomp > 0, the splitting stops when %maxcomps are
918 * found, or earlier if no more components can be selected.
919 * * If %remainder == 1 and components remain that cannot be
920 * selected, they are returned as a single final rectangle;
921 * otherwise, they are ignored.
922 * </pre>
923 */
924 BOXA *
925 pixSplitComponentIntoBoxa(PIX *pix,
926 BOX *box,
927 l_int32 minsum,
928 l_int32 skipdist,
929 l_int32 delta,
930 l_int32 maxbg,
931 l_int32 maxcomps,
932 l_int32 remainder)
933 {
934 l_int32 i, w, h, boxx, boxy, bx, by, bw, bh, maxdir, maxscore;
935 l_int32 iter;
936 BOX *boxs; /* shrinks as rectangular regions are removed */
937 BOX *boxt1, *boxt2, *boxt3;
938 BOXA *boxat; /* stores rectangle data for each side in an iteration */
939 BOXA *boxad;
940 NUMA *nascore, *nas;
941 PIX *pixs;
942
943 if (!pix || pixGetDepth(pix) != 1)
944 return (BOXA *)ERROR_PTR("pix undefined or not 1 bpp", __func__, NULL);
945
946 pixs = pixCopy(NULL, pix);
947 pixGetDimensions(pixs, &w, &h, NULL);
948 if (box)
949 boxGetGeometry(box, &boxx, &boxy, NULL, NULL);
950 else
951 boxx = boxy = 0;
952 boxs = boxCreate(0, 0, w, h);
953 boxad = boxaCreate(0);
954
955 iter = 0;
956 while (boxs != NULL) {
957 boxGetGeometry(boxs, &bx, &by, &bw, &bh);
958 boxat = boxaCreate(4); /* potential rectangular regions */
959 nascore = numaCreate(4);
960 for (i = 0; i < 4; i++) {
961 pixSearchForRectangle(pixs, boxs, minsum, skipdist, delta, maxbg,
962 i, boxat, nascore);
963 }
964 nas = numaGetSortIndex(nascore, L_SORT_DECREASING);
965 numaGetIValue(nas, 0, &maxdir);
966 numaGetIValue(nascore, maxdir, &maxscore);
967 #if DEBUG_SPLIT
968 lept_stderr("Iteration: %d\n", iter);
969 boxPrintStreamInfo(stderr, boxs);
970 boxaWriteStderr(boxat);
971 lept_stderr("\nmaxdir = %d, maxscore = %d\n\n", maxdir, maxscore);
972 #endif /* DEBUG_SPLIT */
973 if (maxscore > 0) { /* accept this */
974 boxt1 = boxaGetBox(boxat, maxdir, L_CLONE);
975 boxt2 = boxTransform(boxt1, boxx, boxy, 1.0, 1.0);
976 boxaAddBox(boxad, boxt2, L_INSERT);
977 pixClearInRect(pixs, boxt1);
978 boxDestroy(&boxt1);
979 pixClipBoxToForeground(pixs, boxs, NULL, &boxt3);
980 boxDestroy(&boxs);
981 boxs = boxt3;
982 if (boxs) {
983 boxGetGeometry(boxs, NULL, NULL, &bw, &bh);
984 if (bw < 2 || bh < 2)
985 boxDestroy(&boxs); /* we're done */
986 }
987 } else { /* no more valid rectangles can be found */
988 if (remainder == 1) { /* save the last box */
989 boxt1 = boxTransform(boxs, boxx, boxy, 1.0, 1.0);
990 boxaAddBox(boxad, boxt1, L_INSERT);
991 }
992 boxDestroy(&boxs); /* we're done */
993 }
994 boxaDestroy(&boxat);
995 numaDestroy(&nascore);
996 numaDestroy(&nas);
997
998 iter++;
999 if ((iter == maxcomps) && boxs) {
1000 if (remainder == 1) { /* save the last box */
1001 boxt1 = boxTransform(boxs, boxx, boxy, 1.0, 1.0);
1002 boxaAddBox(boxad, boxt1, L_INSERT);
1003 }
1004 boxDestroy(&boxs); /* we're done */
1005 }
1006 }
1007
1008 pixDestroy(&pixs);
1009 return boxad;
1010 }
1011
1012
1013 /*!
1014 * \brief pixSearchForRectangle()
1015 *
1016 * \param[in] pixs 1 bpp
1017 * \param[in] boxs current region to investigate
1018 * \param[in] minsum minimum pixels to trigger propagation
1019 * \param[in] skipdist distance before computing sum for propagation
1020 * \param[in] delta difference required to stop propagation
1021 * \param[in] maxbg maximum number of allowed bg pixels in ref scan
1022 * \param[in] sideflag side to search from
1023 * \param[in] boxat add result of rectangular region found here
1024 * \param[in] nascore add score for this rectangle here
1025 * \return 0 if OK, 1 on error
1026 *
1027 * <pre>
1028 * Notes:
1029 * (1) See pixSplitComponentIntoBoxa() for an explanation of the algorithm.
1030 * This does the sweep from a single side. For each iteration
1031 * in pixSplitComponentIntoBoxa(), this will be called 4 times,
1032 * for %sideflag = {0, 1, 2, 3}.
1033 * (2) If a valid rectangle is not found, add a score of 0 and
1034 * input a minimum box.
1035 * </pre>
1036 */
1037 static l_int32
1038 pixSearchForRectangle(PIX *pixs,
1039 BOX *boxs,
1040 l_int32 minsum,
1041 l_int32 skipdist,
1042 l_int32 delta,
1043 l_int32 maxbg,
1044 l_int32 sideflag,
1045 BOXA *boxat,
1046 NUMA *nascore)
1047 {
1048 l_int32 bx, by, bw, bh, width, height, setref, atref;
1049 l_int32 minincol, maxincol, mininrow, maxinrow, minval, maxval, bgref;
1050 l_int32 x, y, x0, y0, xref, yref, colsum, rowsum, score, countref, diff;
1051 void **lines1;
1052 BOX *boxr;
1053
1054 if (!pixs || pixGetDepth(pixs) != 1)
1055 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
1056 if (!boxs)
1057 return ERROR_INT("boxs not defined", __func__, 1);
1058 if (!boxat)
1059 return ERROR_INT("boxat not defined", __func__, 1);
1060 if (!nascore)
1061 return ERROR_INT("nascore not defined", __func__, 1);
1062
1063 lines1 = pixGetLinePtrs(pixs, NULL);
1064 boxGetGeometry(boxs, &bx, &by, &bw, &bh);
1065 boxr = NULL;
1066 setref = 0;
1067 atref = 0;
1068 maxval = 0;
1069 minval = 100000;
1070 score = 0; /* sum of all (fg - bg) pixels seen in the scan */
1071 xref = yref = 100000; /* init to impossibly big number */
1072 if (sideflag == L_FROM_LEFT) {
1073 for (x = bx; x < bx + bw; x++) {
1074 colsum = 0;
1075 maxincol = 0;
1076 minincol = 100000;
1077 for (y = by; y < by + bh; y++) {
1078 if (GET_DATA_BIT(lines1[y], x)) {
1079 colsum++;
1080 if (y > maxincol) maxincol = y;
1081 if (y < minincol) minincol = y;
1082 }
1083 }
1084 score += colsum;
1085
1086 /* Enough fg to sweep out a rectangle? */
1087 if (!setref && colsum >= minsum) {
1088 setref = 1;
1089 xref = x + 10;
1090 if (xref >= bx + bw)
1091 goto failure;
1092 }
1093
1094 /* Reached the reference line; save the count;
1095 * if there is too much bg, the rectangle is invalid. */
1096 if (setref && x == xref) {
1097 atref = 1;
1098 countref = colsum;
1099 bgref = maxincol - minincol + 1 - countref;
1100 if (bgref > maxbg)
1101 goto failure;
1102 }
1103
1104 /* Have we left the rectangle? If so, save it along
1105 * with the score. */
1106 if (atref) {
1107 diff = L_ABS(colsum - countref);
1108 if (diff >= delta || x == bx + bw - 1) {
1109 height = maxval - minval + 1;
1110 width = x - bx;
1111 if (x == bx + bw - 1) width = x - bx + 1;
1112 boxr = boxCreate(bx, minval, width, height);
1113 score = 2 * score - width * height;
1114 goto success;
1115 }
1116 }
1117 maxval = L_MAX(maxval, maxincol);
1118 minval = L_MIN(minval, minincol);
1119 }
1120 goto failure;
1121 } else if (sideflag == L_FROM_RIGHT) {
1122 for (x = bx + bw - 1; x >= bx; x--) {
1123 colsum = 0;
1124 maxincol = 0;
1125 minincol = 100000;
1126 for (y = by; y < by + bh; y++) {
1127 if (GET_DATA_BIT(lines1[y], x)) {
1128 colsum++;
1129 if (y > maxincol) maxincol = y;
1130 if (y < minincol) minincol = y;
1131 }
1132 }
1133 score += colsum;
1134 if (!setref && colsum >= minsum) {
1135 setref = 1;
1136 xref = x - 10;
1137 if (xref < bx)
1138 goto failure;
1139 }
1140 if (setref && x == xref) {
1141 atref = 1;
1142 countref = colsum;
1143 bgref = maxincol - minincol + 1 - countref;
1144 if (bgref > maxbg)
1145 goto failure;
1146 }
1147 if (atref) {
1148 diff = L_ABS(colsum - countref);
1149 if (diff >= delta || x == bx) {
1150 height = maxval - minval + 1;
1151 x0 = x + 1;
1152 if (x == bx) x0 = x;
1153 width = bx + bw - x0;
1154 boxr = boxCreate(x0, minval, width, height);
1155 score = 2 * score - width * height;
1156 goto success;
1157 }
1158 }
1159 maxval = L_MAX(maxval, maxincol);
1160 minval = L_MIN(minval, minincol);
1161 }
1162 goto failure;
1163 } else if (sideflag == L_FROM_TOP) {
1164 for (y = by; y < by + bh; y++) {
1165 rowsum = 0;
1166 maxinrow = 0;
1167 mininrow = 100000;
1168 for (x = bx; x < bx + bw; x++) {
1169 if (GET_DATA_BIT(lines1[y], x)) {
1170 rowsum++;
1171 if (x > maxinrow) maxinrow = x;
1172 if (x < mininrow) mininrow = x;
1173 }
1174 }
1175 score += rowsum;
1176 if (!setref && rowsum >= minsum) {
1177 setref = 1;
1178 yref = y + 10;
1179 if (yref >= by + bh)
1180 goto failure;
1181 }
1182 if (setref && y == yref) {
1183 atref = 1;
1184 countref = rowsum;
1185 bgref = maxinrow - mininrow + 1 - countref;
1186 if (bgref > maxbg)
1187 goto failure;
1188 }
1189 if (atref) {
1190 diff = L_ABS(rowsum - countref);
1191 if (diff >= delta || y == by + bh - 1) {
1192 width = maxval - minval + 1;
1193 height = y - by;
1194 if (y == by + bh - 1) height = y - by + 1;
1195 boxr = boxCreate(minval, by, width, height);
1196 score = 2 * score - width * height;
1197 goto success;
1198 }
1199 }
1200 maxval = L_MAX(maxval, maxinrow);
1201 minval = L_MIN(minval, mininrow);
1202 }
1203 goto failure;
1204 } else if (sideflag == L_FROM_BOT) {
1205 for (y = by + bh - 1; y >= by; y--) {
1206 rowsum = 0;
1207 maxinrow = 0;
1208 mininrow = 100000;
1209 for (x = bx; x < bx + bw; x++) {
1210 if (GET_DATA_BIT(lines1[y], x)) {
1211 rowsum++;
1212 if (x > maxinrow) maxinrow = x;
1213 if (x < mininrow) mininrow = x;
1214 }
1215 }
1216 score += rowsum;
1217 if (!setref && rowsum >= minsum) {
1218 setref = 1;
1219 yref = y - 10;
1220 if (yref < by)
1221 goto failure;
1222 }
1223 if (setref && y == yref) {
1224 atref = 1;
1225 countref = rowsum;
1226 bgref = maxinrow - mininrow + 1 - countref;
1227 if (bgref > maxbg)
1228 goto failure;
1229 }
1230 if (atref) {
1231 diff = L_ABS(rowsum - countref);
1232 if (diff >= delta || y == by) {
1233 width = maxval - minval + 1;
1234 y0 = y + 1;
1235 if (y == by) y0 = y;
1236 height = by + bh - y0;
1237 boxr = boxCreate(minval, y0, width, height);
1238 score = 2 * score - width * height;
1239 goto success;
1240 }
1241 }
1242 maxval = L_MAX(maxval, maxinrow);
1243 minval = L_MIN(minval, mininrow);
1244 }
1245 goto failure;
1246 }
1247
1248 failure:
1249 numaAddNumber(nascore, 0);
1250 boxaAddBox(boxat, boxCreate(0, 0, 1, 1), L_INSERT); /* min box */
1251 LEPT_FREE(lines1);
1252 return 0;
1253
1254 success:
1255 numaAddNumber(nascore, score);
1256 boxaAddBox(boxat, boxr, L_INSERT);
1257 LEPT_FREE(lines1);
1258 return 0;
1259 }
1260
1261
1262 /*---------------------------------------------------------------------*
1263 * Represent horizontal or vertical mosaic strips *
1264 *---------------------------------------------------------------------*/
1265 /*!
1266 * \brief makeMosaicStrips()
1267 *
1268 * \param[in] w, h
1269 * \param[in] direction L_SCAN_HORIZONTAL or L_SCAN_VERTICAL
1270 * \param[in] size of strips in the scan direction
1271 * \return boxa, or NULL on error
1272 *
1273 * <pre>
1274 * Notes:
1275 * (1) For example, this can be used to generate a pixa of
1276 * vertical strips of width 10 from an image, using:
1277 * pixGetDimensions(pix, &w, &h, NULL);
1278 * boxa = makeMosaicStrips(w, h, L_SCAN_HORIZONTAL, 10);
1279 * pixa = pixClipRectangles(pix, boxa);
1280 * All strips except the last will be the same width. The
1281 * last strip will have width w % 10.
1282 * </pre>
1283 */
1284 BOXA *
1285 makeMosaicStrips(l_int32 w,
1286 l_int32 h,
1287 l_int32 direction,
1288 l_int32 size)
1289 {
1290 l_int32 i, nstrips, extra;
1291 BOX *box;
1292 BOXA *boxa;
1293
1294 if (w < 1 || h < 1)
1295 return (BOXA *)ERROR_PTR("invalid w or h", __func__, NULL);
1296 if (direction != L_SCAN_HORIZONTAL && direction != L_SCAN_VERTICAL)
1297 return (BOXA *)ERROR_PTR("invalid direction", __func__, NULL);
1298 if (size < 1)
1299 return (BOXA *)ERROR_PTR("size < 1", __func__, NULL);
1300
1301 boxa = boxaCreate(0);
1302 if (direction == L_SCAN_HORIZONTAL) {
1303 nstrips = w / size;
1304 for (i = 0; i < nstrips; i++) {
1305 box = boxCreate(i * size, 0, size, h);
1306 boxaAddBox(boxa, box, L_INSERT);
1307 }
1308 if ((extra = w % size) > 0) {
1309 box = boxCreate(nstrips * size, 0, extra, h);
1310 boxaAddBox(boxa, box, L_INSERT);
1311 }
1312 } else {
1313 nstrips = h / size;
1314 for (i = 0; i < nstrips; i++) {
1315 box = boxCreate(0, i * size, w, size);
1316 boxaAddBox(boxa, box, L_INSERT);
1317 }
1318 if ((extra = h % size) > 0) {
1319 box = boxCreate(0, nstrips * size, w, extra);
1320 boxaAddBox(boxa, box, L_INSERT);
1321 }
1322 }
1323 return boxa;
1324 }
1325
1326
1327 /*---------------------------------------------------------------------*
1328 * Comparison between boxa *
1329 *---------------------------------------------------------------------*/
1330 /*!
1331 * \brief boxaCompareRegions()
1332 *
1333 * \param[in] boxa1, boxa2
1334 * \param[in] areathresh minimum area of boxes to be considered
1335 * \param[out] pnsame true if same number of boxes
1336 * \param[out] pdiffarea fractional difference in total area
1337 * \param[out] pdiffxor [optional] fractional difference in xor of regions
1338 * \param[out] ppixdb [optional] debug pix showing two boxa
1339 * \return 0 if OK, 1 on error
1340 *
1341 * <pre>
1342 * Notes:
1343 * (1) This takes 2 boxa, removes all boxes smaller than a given area,
1344 * and compares the remaining boxes between the boxa.
1345 * (2) The area threshold is introduced to help remove noise from
1346 * small components. Any box with a smaller value of w * h
1347 * will be removed from consideration.
1348 * (3) The xor difference is the most stringent test, requiring alignment
1349 * of the corresponding boxes. It is also more computationally
1350 * intensive and is optionally returned. Alignment is to the
1351 * UL corner of each region containing all boxes, as given by
1352 * boxaGetExtent().
1353 * (4) Both fractional differences are with respect to the total
1354 * area in the two boxa. They range from 0.0 to 1.0.
1355 * A perfect match has value 0.0. If both boxa are empty,
1356 * we return 0.0; if one is empty we return 1.0.
1357 * (5) An example input might be the rectangular regions of a
1358 * segmentation mask for text or images from two pages.
1359 * </pre>
1360 */
1361 l_ok
1362 boxaCompareRegions(BOXA *boxa1,
1363 BOXA *boxa2,
1364 l_int32 areathresh,
1365 l_int32 *pnsame,
1366 l_float32 *pdiffarea,
1367 l_float32 *pdiffxor,
1368 PIX **ppixdb)
1369 {
1370 l_int32 w, h, x3, y3, w3, h3, x4, y4, w4, h4, n3, n4, area1, area2;
1371 l_int32 count3, count4, countxor;
1372 l_int32 *tab;
1373 BOX *box3, *box4;
1374 BOXA *boxa3, *boxa4, *boxa3t, *boxa4t;
1375 PIX *pix1, *pix2, *pix3, *pix4, *pix5;
1376 PIXA *pixa;
1377
1378 if (pdiffxor) *pdiffxor = 1.0;
1379 if (ppixdb) *ppixdb = NULL;
1380 if (pnsame) *pnsame = FALSE;
1381 if (pdiffarea) *pdiffarea = 1.0;
1382 if (!boxa1 || !boxa2)
1383 return ERROR_INT("boxa1 and boxa2 not both defined", __func__, 1);
1384 if (!pnsame)
1385 return ERROR_INT("&nsame not defined", __func__, 1);
1386 if (!pdiffarea)
1387 return ERROR_INT("&diffarea not defined", __func__, 1);
1388
1389 boxa3 = boxaSelectByArea(boxa1, areathresh, L_SELECT_IF_GTE, NULL);
1390 boxa4 = boxaSelectByArea(boxa2, areathresh, L_SELECT_IF_GTE, NULL);
1391 n3 = boxaGetCount(boxa3);
1392 n4 = boxaGetCount(boxa4);
1393 if (n3 == n4)
1394 *pnsame = TRUE;
1395
1396 /* There are no boxes in one or both */
1397 if (n3 == 0 || n4 == 0) {
1398 boxaDestroy(&boxa3);
1399 boxaDestroy(&boxa4);
1400 if (n3 == 0 && n4 == 0) { /* they are both empty: we say they are the
1401 * same; otherwise, they differ maximally
1402 * and retain the default value. */
1403 *pdiffarea = 0.0;
1404 if (pdiffxor) *pdiffxor = 0.0;
1405 }
1406 return 0;
1407 }
1408
1409 /* There are boxes in both */
1410 boxaGetArea(boxa3, &area1);
1411 boxaGetArea(boxa4, &area2);
1412 *pdiffarea = (l_float32)L_ABS(area1 - area2) / (l_float32)(area1 + area2);
1413 if (!pdiffxor) {
1414 boxaDestroy(&boxa3);
1415 boxaDestroy(&boxa4);
1416 return 0;
1417 }
1418
1419 /* The easiest way to get the xor of aligned boxes is to work
1420 * with images of each boxa. This is done by translating each
1421 * boxa so that the UL corner of the region that includes all
1422 * boxes in the boxa is placed at the origin of each pix. */
1423 boxaGetExtent(boxa3, &w, &h, &box3);
1424 boxaGetExtent(boxa4, &w, &h, &box4);
1425 boxGetGeometry(box3, &x3, &y3, &w3, &h3);
1426 boxGetGeometry(box4, &x4, &y4, &w4, &h4);
1427 boxa3t = boxaTransform(boxa3, -x3, -y3, 1.0, 1.0);
1428 boxa4t = boxaTransform(boxa4, -x4, -y4, 1.0, 1.0);
1429 w = L_MAX(x3 + w3, x4 + w4);
1430 h = L_MAX(y3 + h3, y4 + h4);
1431 pix3 = pixCreate(w, h, 1); /* use the max to keep everything in the xor */
1432 pix4 = pixCreate(w, h, 1);
1433 pixMaskBoxa(pix3, pix3, boxa3t, L_SET_PIXELS);
1434 pixMaskBoxa(pix4, pix4, boxa4t, L_SET_PIXELS);
1435 tab = makePixelSumTab8();
1436 pixCountPixels(pix3, &count3, tab);
1437 pixCountPixels(pix4, &count4, tab);
1438 pix5 = pixXor(NULL, pix3, pix4);
1439 pixCountPixels(pix5, &countxor, tab);
1440 LEPT_FREE(tab);
1441 *pdiffxor = (l_float32)countxor / (l_float32)(count3 + count4);
1442
1443 if (ppixdb) {
1444 pixa = pixaCreate(2);
1445 pix1 = pixCreate(w, h, 32);
1446 pixSetAll(pix1);
1447 pixRenderHashBoxaBlend(pix1, boxa3, 5, 1, L_POS_SLOPE_LINE, 2,
1448 255, 0, 0, 0.5);
1449 pixRenderHashBoxaBlend(pix1, boxa4, 5, 1, L_NEG_SLOPE_LINE, 2,
1450 0, 255, 0, 0.5);
1451 pixaAddPix(pixa, pix1, L_INSERT);
1452 pix2 = pixCreate(w, h, 32);
1453 pixPaintThroughMask(pix2, pix3, x3, y3, 0xff000000);
1454 pixPaintThroughMask(pix2, pix4, x4, y4, 0x00ff0000);
1455 pixAnd(pix3, pix3, pix4);
1456 pixPaintThroughMask(pix2, pix3, x3, y3, 0x0000ff00);
1457 pixaAddPix(pixa, pix2, L_INSERT);
1458 *ppixdb = pixaDisplayTiledInRows(pixa, 32, 1000, 1.0, 0, 30, 2);
1459 pixaDestroy(&pixa);
1460 }
1461
1462 boxDestroy(&box3);
1463 boxDestroy(&box4);
1464 boxaDestroy(&boxa3);
1465 boxaDestroy(&boxa3t);
1466 boxaDestroy(&boxa4);
1467 boxaDestroy(&boxa4t);
1468 pixDestroy(&pix3);
1469 pixDestroy(&pix4);
1470 pixDestroy(&pix5);
1471 return 0;
1472 }
1473
1474
1475 /*---------------------------------------------------------------------*
1476 * Reliable selection of a single large box *
1477 *---------------------------------------------------------------------*/
1478 /*!
1479 * \brief pixSelectLargeULComp()
1480 *
1481 * \param[in] pixs 1 bpp
1482 * \param[in] areaslop fraction near but less than 1.0
1483 * \param[in] yslop number of pixels in y direction
1484 * \param[in] connectivity 4 or 8
1485 * \return box, or NULL on error
1486 *
1487 * <pre>
1488 * Notes:
1489 * (1) This selects a box near the top (first) and left (second)
1490 * of the image, from the set of all boxes that have
1491 * area >= %areaslop * (area of biggest box),
1492 * where %areaslop is some fraction; say ~ 0.9.
1493 * (2) For all boxes satisfying the above condition, select
1494 * the left-most box that is within %yslop (say, 20) pixels
1495 * of the box nearest the top.
1496 * (3) This can be used to reliably select a specific one of
1497 * the largest regions in an image, for applications where
1498 * there are expected to be small variations in region size
1499 * and location.
1500 * (4) See boxSelectLargeULBox() for implementation details.
1501 * </pre>
1502 */
1503 BOX *
1504 pixSelectLargeULComp(PIX *pixs,
1505 l_float32 areaslop,
1506 l_int32 yslop,
1507 l_int32 connectivity)
1508 {
1509 BOX *box;
1510 BOXA *boxa1;
1511
1512 if (!pixs)
1513 return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL);
1514 if (areaslop < 0.0 || areaslop > 1.0)
1515 return (BOX *)ERROR_PTR("invalid value for areaslop", __func__, NULL);
1516 yslop = L_MAX(0, yslop);
1517
1518 boxa1 = pixConnCompBB(pixs, connectivity);
1519 if (boxaGetCount(boxa1) == 0) {
1520 boxaDestroy(&boxa1);
1521 return NULL;
1522 }
1523 box = boxaSelectLargeULBox(boxa1, areaslop, yslop);
1524 boxaDestroy(&boxa1);
1525 return box;
1526 }
1527
1528
1529 /*!
1530 * \brief boxaSelectLargeULBox()
1531 *
1532 * \param[in] boxas 1 bpp
1533 * \param[in] areaslop fraction near but less than 1.0
1534 * \param[in] yslop number of pixels in y direction
1535 * \return box, or NULL on error
1536 *
1537 * <pre>
1538 * Notes:
1539 * (1) See usage notes in pixSelectLargeULComp().
1540 * </pre>
1541 */
1542 BOX *
1543 boxaSelectLargeULBox(BOXA *boxas,
1544 l_float32 areaslop,
1545 l_int32 yslop)
1546 {
1547 l_int32 w, h, i, n, x1, y1, x2, y2, select;
1548 l_float32 area, max_area;
1549 BOX *box;
1550 BOXA *boxa1, *boxa2, *boxa3;
1551
1552 if (!boxas)
1553 return (BOX *)ERROR_PTR("boxas not defined", __func__, NULL);
1554 if (boxaGetCount(boxas) == 0)
1555 return (BOX *)ERROR_PTR("no boxes in boxas", __func__, NULL);
1556 if (areaslop < 0.0 || areaslop > 1.0)
1557 return (BOX *)ERROR_PTR("invalid value for areaslop", __func__, NULL);
1558 yslop = L_MAX(0, yslop);
1559
1560 boxa1 = boxaSort(boxas, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
1561 boxa2 = boxaSort(boxa1, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
1562 n = boxaGetCount(boxa2);
1563 boxaGetBoxGeometry(boxa1, 0, NULL, NULL, &w, &h); /* biggest box by area */
1564 max_area = (l_float32)(w * h);
1565
1566 /* boxa3 collects all boxes eligible by area, sorted top-down */
1567 boxa3 = boxaCreate(4);
1568 for (i = 0; i < n; i++) {
1569 boxaGetBoxGeometry(boxa2, i, NULL, NULL, &w, &h);
1570 area = (l_float32)(w * h);
1571 if (area / max_area >= areaslop) {
1572 box = boxaGetBox(boxa2, i, L_COPY);
1573 boxaAddBox(boxa3, box, L_INSERT);
1574 }
1575 }
1576
1577 /* Take the first (top-most box) unless the second (etc) has
1578 * nearly the same y value but a smaller x value. */
1579 n = boxaGetCount(boxa3);
1580 boxaGetBoxGeometry(boxa3, 0, &x1, &y1, NULL, NULL);
1581 select = 0;
1582 for (i = 1; i < n; i++) {
1583 boxaGetBoxGeometry(boxa3, i, &x2, &y2, NULL, NULL);
1584 if (y2 - y1 < yslop && x2 < x1) {
1585 select = i;
1586 x1 = x2; /* but always compare against y1 */
1587 }
1588 }
1589
1590 box = boxaGetBox(boxa3, select, L_COPY);
1591 boxaDestroy(&boxa1);
1592 boxaDestroy(&boxa2);
1593 boxaDestroy(&boxa3);
1594 return box;
1595 }