comparison mupdf-source/thirdparty/leptonica/src/pageseg.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file pageseg.c
29 * <pre>
30 *
31 * Top level page segmentation
32 * l_int32 pixGetRegionsBinary()
33 *
34 * Halftone region extraction
35 * PIX *pixGenHalftoneMask() **Deprecated wrapper**
36 * PIX *pixGenerateHalftoneMask()
37 *
38 * Textline extraction
39 * PIX *pixGenTextlineMask()
40 *
41 * Textblock extraction
42 * PIX *pixGenTextblockMask()
43 *
44 * Location and extraction of page foreground; cleaning pages
45 * PIX *pixCropImage()
46 * static l_int32 pixMaxCompAfterVClosing()
47 * static l_int32 pixFindPageInsideBlackBorder()
48 * static PIX *pixRescaleForCropping()
49 * PIX *pixCleanImage()
50 * BOX *pixFindPageForeground()
51 *
52 * Extraction of characters from image with only text
53 * l_int32 pixSplitIntoCharacters()
54 * BOXA *pixSplitComponentWithProfile()
55 *
56 * Extraction of lines of text
57 * PIXA *pixExtractTextlines()
58 * PIXA *pixExtractRawTextlines()
59 *
60 * How many text columns
61 * l_int32 pixCountTextColumns()
62 *
63 * Decision: text vs photo
64 * l_int32 pixDecideIfText()
65 * l_int32 pixFindThreshFgExtent()
66 *
67 * Decision: table vs text
68 * l_int32 pixDecideIfTable()
69 * Pix *pixPrepare1bpp()
70 *
71 * Estimate the grayscale background value
72 * l_int32 pixEstimateBackground()
73 *
74 * Largest white or black rectangles in an image
75 * l_int32 pixFindLargeRectangles()
76 * l_int32 pixFindLargestRectangle()
77 *
78 * Generate rectangle inside connected component
79 * BOX *pixFindRectangleInCC()
80 *
81 * Automatic photoinvert for OCR
82 * PIX *pixAutoPhotoinvert()
83 * </pre>
84 */
85
86 #ifdef HAVE_CONFIG_H
87 #include <config_auto.h>
88 #endif /* HAVE_CONFIG_H */
89
90 #include <math.h>
91 #include "allheaders.h"
92 #include "pix_internal.h"
93
94 /* These functions are not intended to work on very low-res images */
95 static const l_int32 MinWidth = 100;
96 static const l_int32 MinHeight = 100;
97
98 static l_ok pixMaxCompAfterVClosing(PIX *pixs, BOX **pbox);
99 static l_ok pixFindPageInsideBlackBorder(PIX *pixs, BOX **pbox);
100 static PIX *pixRescaleForCropping(PIX *pixs, l_int32 w, l_int32 h,
101 l_int32 lr_border, l_int32 tb_border,
102 l_float32 maxwiden, PIX **ppixsc);
103
104 /*------------------------------------------------------------------*
105 * Top level page segmentation *
106 *------------------------------------------------------------------*/
107 /*!
108 * \brief pixGetRegionsBinary()
109 *
110 * \param[in] pixs 1 bpp, assumed to be 300 to 400 ppi
111 * \param[out] ppixhm [optional] halftone mask
112 * \param[out] ppixtm [optional] textline mask
113 * \param[out] ppixtb [optional] textblock mask
114 * \param[in] pixadb input for collecting debug pix; use NULL to skip
115 * \return 0 if OK, 1 on error
116 *
117 * <pre>
118 * Notes:
119 * (1) It is best to deskew the image before segmenting.
120 * (2) Passing in %pixadb enables debug output.
121 * </pre>
122 */
123 l_ok
124 pixGetRegionsBinary(PIX *pixs,
125 PIX **ppixhm,
126 PIX **ppixtm,
127 PIX **ppixtb,
128 PIXA *pixadb)
129 {
130 l_int32 w, h, htfound, tlfound;
131 PIX *pixr, *pix1, *pix2;
132 PIX *pixtext; /* text pixels only */
133 PIX *pixhm2; /* halftone mask; 2x reduction */
134 PIX *pixhm; /* halftone mask; */
135 PIX *pixtm2; /* textline mask; 2x reduction */
136 PIX *pixtm; /* textline mask */
137 PIX *pixvws; /* vertical white space mask */
138 PIX *pixtb2; /* textblock mask; 2x reduction */
139 PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */
140 PIX *pixtb; /* textblock mask */
141
142 if (ppixhm) *ppixhm = NULL;
143 if (ppixtm) *ppixtm = NULL;
144 if (ppixtb) *ppixtb = NULL;
145 if (!pixs || pixGetDepth(pixs) != 1)
146 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
147 pixGetDimensions(pixs, &w, &h, NULL);
148 if (w < MinWidth || h < MinHeight) {
149 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
150 return 1;
151 }
152
153 /* 2x reduce, to 150 -200 ppi */
154 pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
155 if (pixadb) pixaAddPix(pixadb, pixr, L_COPY);
156
157 /* Get the halftone mask */
158 pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb);
159
160 /* Get the textline mask from the text pixels */
161 pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb);
162
163 /* Get the textblock mask from the textline mask */
164 pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb);
165 pixDestroy(&pixr);
166 pixDestroy(&pixtext);
167 pixDestroy(&pixvws);
168
169 /* Remove small components from the mask, where a small
170 * component is defined as one with both width and height < 60 */
171 pixtbf2 = NULL;
172 if (pixtb2) {
173 pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
174 L_SELECT_IF_GTE, NULL);
175 pixDestroy(&pixtb2);
176 if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY);
177 }
178
179 /* Expand all masks to full resolution, and do filling or
180 * small dilations for better coverage. */
181 pixhm = pixExpandReplicate(pixhm2, 2);
182 pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
183 pixOr(pixhm, pixhm, pix1);
184 pixDestroy(&pixhm2);
185 pixDestroy(&pix1);
186 if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
187
188 pix1 = pixExpandReplicate(pixtm2, 2);
189 pixtm = pixDilateBrick(NULL, pix1, 3, 3);
190 pixDestroy(&pixtm2);
191 pixDestroy(&pix1);
192 if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY);
193
194 if (pixtbf2) {
195 pix1 = pixExpandReplicate(pixtbf2, 2);
196 pixtb = pixDilateBrick(NULL, pix1, 3, 3);
197 pixDestroy(&pixtbf2);
198 pixDestroy(&pix1);
199 if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY);
200 } else {
201 pixtb = pixCreateTemplate(pixs); /* empty mask */
202 }
203
204 /* Debug: identify objects that are neither text nor halftone image */
205 if (pixadb) {
206 pix1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */
207 pix2 = pixSubtract(NULL, pix1, pixhm); /* remove halftone pixels */
208 pixaAddPix(pixadb, pix2, L_INSERT);
209 pixDestroy(&pix1);
210 }
211
212 /* Debug: display textline components with random colors */
213 if (pixadb) {
214 l_int32 w, h;
215 BOXA *boxa;
216 PIXA *pixa;
217 boxa = pixConnComp(pixtm, &pixa, 8);
218 pixGetDimensions(pixtm, &w, &h, NULL);
219 pix1 = pixaDisplayRandomCmap(pixa, w, h);
220 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
221 pixaAddPix(pixadb, pix1, L_INSERT);
222 pixaDestroy(&pixa);
223 boxaDestroy(&boxa);
224 }
225
226 /* Debug: identify the outlines of each textblock */
227 if (pixadb) {
228 PIXCMAP *cmap;
229 PTAA *ptaa;
230 ptaa = pixGetOuterBordersPtaa(pixtb);
231 lept_mkdir("lept/pageseg");
232 ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1);
233 pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
234 cmap = pixGetColormap(pix1);
235 pixcmapResetColor(cmap, 0, 130, 130, 130);
236 pixaAddPix(pixadb, pix1, L_INSERT);
237 ptaaDestroy(&ptaa);
238 }
239
240 /* Debug: get b.b. for all mask components */
241 if (pixadb) {
242 BOXA *bahm, *batm, *batb;
243 bahm = pixConnComp(pixhm, NULL, 4);
244 batm = pixConnComp(pixtm, NULL, 4);
245 batb = pixConnComp(pixtb, NULL, 4);
246 boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm);
247 boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm);
248 boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb);
249 boxaDestroy(&bahm);
250 boxaDestroy(&batm);
251 boxaDestroy(&batb);
252 }
253 if (pixadb) {
254 pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation",
255 "/tmp/lept/pageseg/debug.pdf");
256 L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__);
257 }
258
259 if (ppixhm)
260 *ppixhm = pixhm;
261 else
262 pixDestroy(&pixhm);
263 if (ppixtm)
264 *ppixtm = pixtm;
265 else
266 pixDestroy(&pixtm);
267 if (ppixtb)
268 *ppixtb = pixtb;
269 else
270 pixDestroy(&pixtb);
271
272 return 0;
273 }
274
275
276 /*------------------------------------------------------------------*
277 * Halftone region extraction *
278 *------------------------------------------------------------------*/
279 /*!
280 * \brief pixGenHalftoneMask()
281 *
282 * <pre>
283 * Deprecated:
284 * This wrapper avoids an ABI change with tesseract 3.0.4.
285 * It should be removed when we no longer need to support 3.0.4.
286 * The debug parameter is ignored (assumed 0).
287 * </pre>
288 */
289 PIX *
290 pixGenHalftoneMask(PIX *pixs,
291 PIX **ppixtext,
292 l_int32 *phtfound,
293 l_int32 debug)
294 {
295 return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL);
296 }
297
298
299 /*!
300 * \brief pixGenerateHalftoneMask()
301 *
302 * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi
303 * \param[out] ppixtext [optional] text part of pixs
304 * \param[out] phtfound [optional] 1 if the mask is not empty
305 * \param[in] pixadb input for collecting debug pix; use NULL to skip
306 * \return pixd halftone mask, or NULL on error
307 *
308 * <pre>
309 * Notes:
310 * (1) This is not intended to work on small thumbnails. The
311 * dimensions of pixs must be at least MinWidth x MinHeight.
312 * </pre>
313 */
314 PIX *
315 pixGenerateHalftoneMask(PIX *pixs,
316 PIX **ppixtext,
317 l_int32 *phtfound,
318 PIXA *pixadb)
319 {
320 l_int32 w, h, empty;
321 PIX *pix1, *pix2, *pixhs, *pixhm, *pixd;
322
323 if (ppixtext) *ppixtext = NULL;
324 if (phtfound) *phtfound = 0;
325 if (!pixs || pixGetDepth(pixs) != 1)
326 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
327 pixGetDimensions(pixs, &w, &h, NULL);
328 if (w < MinWidth || h < MinHeight) {
329 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
330 return NULL;
331 }
332
333 /* Compute seed for halftone parts at 8x reduction */
334 pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0);
335 pix2 = pixOpenBrick(NULL, pix1, 5, 5);
336 pixhs = pixExpandReplicate(pix2, 4); /* back to 2x reduction */
337 pixDestroy(&pix1);
338 pixDestroy(&pix2);
339 if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY);
340
341 /* Compute mask for connected regions */
342 pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
343 if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
344
345 /* Fill seed into mask to get halftone mask */
346 pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
347 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
348
349 #if 0
350 pixOpenBrick(pixd, pixd, 9, 9);
351 #endif
352
353 /* Check if mask is empty */
354 pixZero(pixd, &empty);
355 if (phtfound && !empty)
356 *phtfound = 1;
357
358 /* Optionally, get all pixels that are not under the halftone mask */
359 if (ppixtext) {
360 if (empty)
361 *ppixtext = pixCopy(NULL, pixs);
362 else
363 *ppixtext = pixSubtract(NULL, pixs, pixd);
364 if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY);
365 }
366
367 pixDestroy(&pixhs);
368 pixDestroy(&pixhm);
369 return pixd;
370 }
371
372
373 /*------------------------------------------------------------------*
374 * Textline extraction *
375 *------------------------------------------------------------------*/
376 /*!
377 * \brief pixGenTextlineMask()
378 *
379 * \param[in] pixs 1 bpp, assumed to be 150 to 200 ppi
380 * \param[out] ppixvws vertical whitespace mask
381 * \param[out] ptlfound [optional] 1 if the mask is not empty
382 * \param[in] pixadb input for collecting debug pix; use NULL to skip
383 * \return pixd textline mask, or NULL on error
384 *
385 * <pre>
386 * Notes:
387 * (1) The input pixs should be deskewed.
388 * (2) pixs should have no halftone pixels.
389 * (3) This is not intended to work on small thumbnails. The
390 * dimensions of pixs must be at least MinWidth x MinHeight.
391 * (4) Both the input image and the returned textline mask
392 * are at the same resolution.
393 * </pre>
394 */
395 PIX *
396 pixGenTextlineMask(PIX *pixs,
397 PIX **ppixvws,
398 l_int32 *ptlfound,
399 PIXA *pixadb)
400 {
401 l_int32 w, h, empty;
402 PIX *pix1, *pix2, *pixvws, *pixd;
403
404 if (ptlfound) *ptlfound = 0;
405 if (!ppixvws)
406 return (PIX *)ERROR_PTR("&pixvws not defined", __func__, NULL);
407 *ppixvws = NULL;
408 if (!pixs || pixGetDepth(pixs) != 1)
409 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
410 pixGetDimensions(pixs, &w, &h, NULL);
411 if (w < MinWidth || h < MinHeight) {
412 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
413 return NULL;
414 }
415
416 /* First we need a vertical whitespace mask. Invert the image. */
417 pix1 = pixInvert(NULL, pixs);
418
419 /* The whitespace mask will break textlines where there
420 * is a large amount of white space below or above.
421 * This can be prevented by identifying regions of the
422 * inverted image that have large horizontal extent (bigger than
423 * the separation between columns) and significant
424 * vertical extent (bigger than the separation between
425 * textlines), and subtracting this from the bg. */
426 pix2 = pixMorphCompSequence(pix1, "o80.60", 0);
427 pixSubtract(pix1, pix1, pix2);
428 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
429 pixDestroy(&pix2);
430
431 /* Identify vertical whitespace by opening the remaining bg.
432 * o5.1 removes thin vertical bg lines and o1.200 extracts
433 * long vertical bg lines. */
434 pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0);
435 *ppixvws = pixvws;
436 if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY);
437 pixDestroy(&pix1);
438
439 /* Three steps to getting text line mask:
440 * (1) close the characters and words in the textlines
441 * (2) open the vertical whitespace corridors back up
442 * (3) small opening to remove noise */
443 pix1 = pixMorphSequence(pixs, "c30.1", 0);
444 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
445 pixd = pixSubtract(NULL, pix1, pixvws);
446 pixOpenBrick(pixd, pixd, 3, 3);
447 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
448 pixDestroy(&pix1);
449
450 /* Check if text line mask is empty */
451 if (ptlfound) {
452 pixZero(pixd, &empty);
453 if (!empty)
454 *ptlfound = 1;
455 }
456
457 return pixd;
458 }
459
460
461 /*------------------------------------------------------------------*
462 * Textblock extraction *
463 *------------------------------------------------------------------*/
464 /*!
465 * \brief pixGenTextblockMask()
466 *
467 * \param[in] pixs 1 bpp, textline mask, assumed to be 150 to 200 ppi
468 * \param[in] pixvws vertical white space mask
469 * \param[in] pixadb input for collecting debug pix; use NULL to skip
470 * \return pixd textblock mask, or NULL if empty or on error
471 *
472 * <pre>
473 * Notes:
474 * (1) Both the input masks (textline and vertical white space) and
475 * the returned textblock mask are at the same resolution.
476 * (2) This is not intended to work on small thumbnails. The
477 * dimensions of pixs must be at least MinWidth x MinHeight.
478 * (3) The result is somewhat noisy, in that small "blocks" of
479 * text may be included. These can be removed by post-processing,
480 * using, e.g.,
481 * pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
482 * L_SELECT_IF_GTE, NULL);
483 * </pre>
484 */
485 PIX *
486 pixGenTextblockMask(PIX *pixs,
487 PIX *pixvws,
488 PIXA *pixadb)
489 {
490 l_int32 w, h, empty;
491 PIX *pix1, *pix2, *pix3, *pixd;
492
493 if (!pixs || pixGetDepth(pixs) != 1)
494 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
495 pixGetDimensions(pixs, &w, &h, NULL);
496 if (w < MinWidth || h < MinHeight) {
497 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
498 return NULL;
499 }
500 if (!pixvws)
501 return (PIX *)ERROR_PTR("pixvws not defined", __func__, NULL);
502
503 /* Join pixels vertically to make a textblock mask */
504 pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
505 pixZero(pix1, &empty);
506 if (empty) {
507 pixDestroy(&pix1);
508 L_INFO("no fg pixels in textblock mask\n", __func__);
509 return NULL;
510 }
511 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
512
513 /* Solidify the textblock mask and remove noise:
514 * (1) For each cc, close the blocks and dilate slightly
515 * to form a solid mask.
516 * (2) Small horizontal closing between components.
517 * (3) Open the white space between columns, again.
518 * (4) Remove small components. */
519 pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL);
520 pixCloseSafeBrick(pix2, pix2, 10, 1);
521 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
522 pix3 = pixSubtract(NULL, pix2, pixvws);
523 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
524 pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH,
525 L_SELECT_IF_GTE, NULL);
526 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
527
528 pixDestroy(&pix1);
529 pixDestroy(&pix2);
530 pixDestroy(&pix3);
531 return pixd;
532 }
533
534
535 /*------------------------------------------------------------------*
536 * Location and extraction of page foreground; cleaning pages *
537 *------------------------------------------------------------------*/
538 /*!
539 * \brief pixCropImage()
540 *
541 * \param[in] pixs full resolution (any type or depth)
542 * \param[in] lr_clear full res pixels cleared at left and right sides
543 * \param[in] tb_clear full res pixels cleared at top and bottom sides
544 * \param[in] edgeclean parameter for removing edge noise (-1 to 15)
545 * default = 0 (no removal);
546 * 15 is maximally aggressive for random noise
547 * -1 for aggressively removing side noise
548 * -2 to extract page embedded in black background
549 * \param[in] lr_border full res final "added" pixels on left and right
550 * \param[in] tb_border full res final "added" pixels on top and bottom
551 * \param[in] maxwiden max fractional horizontal stretch allowed
552 * \param[in] printwiden 0 to skip, 1 for 8.5x11, 2 for A4
553 * \param[in] *debugfile [optional] usually is NULL
554 * \param[out] *pcropbox [optional] crop box at full resolution
555 * \return cropped pix, or NULL on error
556 *
557 * <pre>
558 * Notes:
559 * (1) This binarizes and crops a page image.
560 * (a) Binarizes if necessary and does 2x reduction.
561 * (b) Clears near the border by %lr_clear and %tb_clear full
562 * resolution pixels. (This is done at 2x reduction.)
563 * (c) If %edgeclean > 0, it removes isolated sets of pixels,
564 * using a close/open operation of size %edgeclean + 1.
565 * If %edgeclean == -1, it uses a large vertical morphological
566 * close/open and the extraction of either the largest
567 * resulting connected component (or the largest two components
568 * if the page has 2 columns), to eliminate noise on left
569 * and right sides.
570 * If %edgeclean == -2, it extracts the page region from a
571 * possible exterior black surround.
572 * (d) Find the bounding box of remaining fg pixels and scales
573 * the box up 2x back to full resolution.
574 * (e) Crops the binarized image to the bounding box.
575 * (f) Slightly thickens long horizontal lines.
576 * (g) Rescales this image to fit within the original image,
577 * less lr_border on the sides and tb_border above and below.
578 * The rescaling is done isomorphically with a (possible)
579 * optional additional widening. Suggest the additional
580 * widening factor not exceed 1.15.
581 * (h) Optionally do additional horizontal stretch if needed to
582 * better fill a printed page. Default is 0 to skip; 1 to
583 * widen for 8.5x11 page, 2 for A4 page.
584 * Note that (b) - (d) are done at 2x reduction for efficiency.
585 * (2) Side clearing must not exceed 1/6 of the dimension on that side.
586 * (3) The clear and border pixel parameters must be >= 0.
587 * (4) The "clear" parameters act on the input image, whereas the
588 * "border" parameters act to give a white border to the final
589 * image. They are not literally added, because the input and final
590 * images are the same size. If the resulting images are to be
591 * printed, it is useful to have border pixel parameters of at
592 * least 60 at 300 ppi, to avoid losing content at the edges.
593 * (5) This is not intended to work on small thumbnails. The
594 * dimensions of pixs must be at least MinWidth x MinHeight.
595 * (6) Step (f) above helps with orthographically-produced music notation,
596 * where the horizontal staff lines can be very thin and thus
597 * subject to printer alias.
598 * (7) If you are not concerned with printing on paper, use the
599 * default value 0 for %printwiden. Widening only takes place
600 * if the ratio h/w exceeds the specified paper size by 3%,
601 * and the horizontal scaling factor will not exceed 1.25.
602 * </pre>
603 */
604 PIX *
605 pixCropImage(PIX *pixs,
606 l_int32 lr_clear,
607 l_int32 tb_clear,
608 l_int32 edgeclean,
609 l_int32 lr_border,
610 l_int32 tb_border,
611 l_float32 maxwiden,
612 l_int32 printwiden,
613 const char *debugfile,
614 BOX **pcropbox)
615 {
616 char cmd[64];
617 l_int32 w, h, val, ret;
618 l_float32 r1, r2;
619 BOX *box1, *box2;
620 PIX *pix1, *pix2, *pix3, *pix4;
621 PIXA *pixa1;
622
623 if (pcropbox) *pcropbox = NULL;
624 if (!pixs)
625 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
626 if (edgeclean > 15) {
627 L_WARNING("edgeclean > 15; setting to 15\n", __func__);
628 edgeclean = 15;
629 }
630 if (edgeclean < -1) {
631 lept_stderr("Using edgeclean = -2\n");
632 edgeclean = -2;
633 }
634 pixGetDimensions(pixs, &w, &h, NULL);
635 if (w < MinWidth || h < MinHeight) {
636 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
637 return NULL;
638 }
639 if (lr_clear < 0) lr_clear = 0;
640 if (tb_clear < 0) tb_clear = 0;
641 if (lr_border < 0) lr_border = 0;
642 if (tb_border < 0) tb_border = 0;
643 if (lr_clear > w / 6 || tb_clear > h / 6) {
644 L_ERROR("lr_clear or tb_clear too large; must be <= %d and %d\n",
645 __func__, w / 6, h / 6);
646 return NULL;
647 }
648 if (maxwiden > 1.15)
649 L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n",
650 __func__, maxwiden);
651 if (printwiden < 0 || printwiden > 2) printwiden = 0;
652 pixa1 = (debugfile) ? pixaCreate(5) : NULL;
653 if (pixa1) pixaAddPix(pixa1, pixs, L_COPY);
654
655 /* Binarize if necessary and 2x reduction */
656 pix1 = pixBackgroundNormTo1MinMax(pixs, 1, 1);
657 pix2 = pixReduceRankBinary2(pix1, 2, NULL);
658
659 /* Clear out pixels near the image edges */
660 pixSetOrClearBorder(pix2, lr_clear / 2, lr_clear / 2, tb_clear / 2,
661 tb_clear / 2, PIX_CLR);
662 if (pixa1) pixaAddPix(pixa1, pixScale(pix2, 2.0, 2.0), L_INSERT);
663
664 /* Choose one of three methods for extracting foreground pixels:
665 * (1) Include all foreground pixels
666 * (2) Do a morphological close/open to remove noise throughout
667 * the image before finding a b.b. for remaining f.g. pixels
668 * (3) Do a large vertical closing and choose the largest (by area)
669 * component to avoid foreground noise on left and right sides */
670 if (edgeclean == 0) {
671 ret = pixClipToForeground(pix2, NULL, &box1);
672 } else if (edgeclean > 0) {
673 val = edgeclean + 1;
674 snprintf(cmd, 64, "c%d.%d + o%d.%d", val, val, val, val);
675 pix3 = pixMorphSequence(pix2, cmd, 0);
676 ret = pixClipToForeground(pix3, NULL, &box1);
677 pixDestroy(&pix3);
678 } else if (edgeclean == -1) {
679 ret = pixMaxCompAfterVClosing(pix2, &box1);
680 } else { /* edgeclean == -2 */
681 ret = pixFindPageInsideBlackBorder(pix2, &box1);
682 }
683 pixDestroy(&pix2);
684 if (ret) {
685 L_ERROR("no returned b.b. for foreground\n", __func__);
686 boxDestroy(&box1);
687 pixDestroy(&pix1);
688 pixaDestroy(&pixa1);
689 return NULL;
690 }
691
692 /* Transform to full resolution */
693 box2 = boxTransform(box1, 0, 0, 2.0, 2.0); /* full res */
694 boxDestroy(&box1);
695 if (pixa1) {
696 pix2 = pixCopy(NULL, pix1);
697 pixRenderBoxArb(pix2, box2, 5, 255, 0, 0);
698 pixaAddPix(pixa1, pix2, L_INSERT);
699 }
700
701 /* Grab the foreground region */
702 pix2 = pixClipRectangle(pix1, box2, NULL);
703 pixDestroy(&pix1);
704
705 /* Slightly thicken long horizontal lines. This prevents loss of
706 * printed thin music staff lines due to aliasing. */
707 pix3 = pixMorphSequence(pix2, "o80.1 + d1.2", 0);
708 pixOr(pix2, pix2, pix3);
709 pixDestroy(&pix3);
710
711 /* Rescale the fg and paste into the input-sized image */
712 pix3 = pixRescaleForCropping(pix2, w, h, lr_border, tb_border,
713 maxwiden, NULL);
714 pixDestroy(&pix2);
715 if (pixa1) {
716 pix2 = pixCopy(NULL, pix3);
717 pixaAddPix(pixa1, pix2, L_INSERT);
718 }
719
720 /* Optionally widen image if possible, for printing on 8.5 x 11 inch
721 * or A4 paper. Specifically, widen the image if the h/w asperity
722 * ratio of the input image exceeds that of the selected paper by
723 * more than 3%. Do not widen by more than 20%. */
724 r1 = (l_float32)h / (l_float32)w;
725 r2 = 0.0; /* for default case */
726 if (printwiden == 1) /* standard */
727 r2 = r1 / 1.294;
728 else if (printwiden == 2) /* A4 */
729 r2 = r1 / 1.414;
730 if (r2 > 1.03) {
731 r2 = L_MIN(r2, 1.20);
732 lept_stderr("oversize h/w ratio by factor %6.3f\n", r2);
733 pix4 = pixScale(pix3, r2, 1.0);
734 } else {
735 pix4 = pixClone(pix3);
736 }
737 pixDestroy(&pix3);
738
739 if (pcropbox)
740 *pcropbox = box2;
741 else
742 boxDestroy(&box2);
743 if (pixa1) {
744 pixaAddPix(pixa1, pix4, L_COPY);
745 lept_stderr("Writing debug file: %s\n", debugfile);
746 pixaConvertToPdf(pixa1, 0, 1.0, L_DEFAULT_ENCODE, 0, NULL, debugfile);
747 pixaDestroy(&pixa1);
748 }
749 return pix4;
750 }
751
752
753 /*!
754 * \brief pixMaxCompAfterVClosing()
755 *
756 * \param[in] pixs 1 bpp (input at 2x reduction)
757 * \param[out] **pbox main region at input resolution (2x reduction)
758 * \return 0 if OK, 1 on error
759 *
760 * <pre>
761 * Notes:
762 * (1) This removes foreground noise along left and right edges,
763 * returning a bounding box for the remaining foreground pixels
764 * at the input resolution.
765 * (2) The input %pixs should be at a resolution 100 - 150 ppi.
766 * (3) It does two 2x level1 rank binary reductions, followed
767 * by a large vertical close/open, with a very small horizontal
768 * close/oopen, and then a 4x expansion back to the input resolution.
769 * (4) To work properly with 2-column layout, if the largest and
770 * second-largest regions are comparable in size, both are included.
771 * (5) This is used as an option to pixCropImage(), when given
772 * an %edgecrop parameter of -1.
773 * </pre>
774 */
775 static l_ok
776 pixMaxCompAfterVClosing(PIX *pixs,
777 BOX **pbox)
778 {
779 l_int32 w1, h1, w2, h2, n, empty;
780 BOX *box1, *box2;
781 BOXA *boxa1, *boxa2;
782 PIX *pix1;
783
784 if (!pbox)
785 return ERROR_INT("pbox not defined", __func__, 1);
786 *pbox = NULL;
787 if (!pixs || pixGetDepth(pixs) != 1)
788 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
789
790 /* Strong vertical closing */
791 pix1 = pixMorphSequence(pixs, "r11 + c3.80 + o3.80 + x4", 0);
792 pixZero(pix1, &empty);
793 if (empty) {
794 pixDestroy(&pix1);
795 return ERROR_INT("pix1 is empty", __func__, 1);
796 }
797
798 /* Find the two c.c. with largest area. If they are not comparable
799 * in area, return the bounding box of the largest; otherwise,
800 * return the bounding box of both regions. */
801 boxa1 = pixConnCompBB(pix1, 8);
802 pixDestroy(&pix1);
803 boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
804 if ((n = boxaGetCount(boxa2)) == 1) {
805 *pbox = boxaGetBox(boxa2, 0, L_COPY);
806 } else { /* 2 or more */
807 box1 = boxaGetBox(boxa2, 0, L_COPY);
808 box2 = boxaGetBox(boxa2, 1, L_COPY);
809 boxGetGeometry(box1, NULL, NULL, &w1, &h1);
810 boxGetGeometry(box2, NULL, NULL, &w2, &h2);
811 if (((l_float32)(w2 * h2) / (l_float32)(w1 * h1)) > 0.7) {
812 *pbox = boxBoundingRegion(box1, box2);
813 boxDestroy(&box1);
814 } else {
815 *pbox = box1;
816 }
817 boxDestroy(&box2);
818 }
819 boxaDestroy(&boxa1);
820 boxaDestroy(&boxa2);
821 return 0;
822 }
823
824
825 /*!
826 * \brief pixFindPageInsideBlackBorder()
827 *
828 * \param[in] pixs 1 bpp (input at 2x reduction)
829 * \param[out] **pbox page region at input resolution (2x reduction)
830 * \return 0 if OK, 1 on error
831 *
832 * <pre>
833 * Notes:
834 * (1) This extracts the page region from the image. It is designed
835 * to work when the page is within a fairly solid black border.
836 * (2) It returns a bounding box for the page region at the input res.
837 * (3) The input %pixs is expected to be at a resolution 100 - 150 ppi.
838 * (4) This is used as an option to pixCropImage(), when given an
839 * %edgecrop parameter of -2.
840 * </pre>
841 */
842 static l_ok
843 pixFindPageInsideBlackBorder(PIX *pixs,
844 BOX **pbox)
845 {
846 l_int32 empty;
847 BOX *box1;
848 BOXA *boxa1, *boxa2;
849 PIX *pix1, *pix2;
850
851 if (!pbox)
852 return ERROR_INT("pbox not defined", __func__, 1);
853 *pbox = NULL;
854 if (!pixs || pixGetDepth(pixs) != 1)
855 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
856
857 /* Reduce 4x and remove some remaining small foreground */
858 pix1 = pixMorphSequence(pixs, "r22 + c5.5 + o7.7", 0);
859 pixZero(pix1, &empty);
860 if (empty) {
861 pixDestroy(&pix1);
862 return ERROR_INT("pix1 is empty", __func__, 1);
863 }
864
865 /* Photoinvert image and Find the c.c. with largest area. */
866 pixInvert(pix1, pix1);
867 pix2 = pixMorphSequence(pix1, "c11.11 + o11.11", 0);
868 pixDestroy(&pix1);
869 boxa1 = pixConnCompBB(pix2, 8);
870 pixDestroy(&pix2);
871 boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
872 box1 = boxaGetBox(boxa2, 0, L_COPY); /* largest by area */
873 boxAdjustSides(box1, box1, 5, -5, 5, -5);
874 *pbox = boxTransform(box1, 0, 0, 4.0, 4.0);
875 boxaDestroy(&boxa1);
876 boxaDestroy(&boxa2);
877 boxDestroy(&box1);
878 return 0;
879 }
880
881
882 /*!
883 * \brief pixRescaleForCropping()
884 *
885 * \param[in] pixs 1 bpp
886 * \param[in] w width of output lmage
887 * \param[in] h height of output lmage
888 * \param[in] lr_border cleared final border pixels on left and right
889 * \param[in] tb_border cleared final border pixels on top and bottom
890 * \param[in] maxwiden max fractional horizontal stretch allowed; >= 1.0
891 * \param[out] *ppixsc [optional] rescaled foreground region
892 * \return pixd output image, or NULL on error
893 *
894 * <pre>
895 * Notes:
896 * (1) This rescales %pixs to fit maximally within an image of
897 * size (w x h), under two conditions:
898 * (a) the final image has cleared border regions given by the
899 * input parameters %lr_border and %tb_border, and
900 * (b) the input image is first isotropically scaled to fit
901 * maximally within the allowed final region, and then further
902 * maxiximally widened, subject to the constraints of the
903 * cleared border and the %maxwiden parameter.
904 * (2) The cleared border pixel parameters must be >= 0.
905 * (3) If there is extra horizontal stretching by a factor
906 * %maxwiden larger than about 1.15, the appearance may be
907 * unpleasingly distorted; hence the suggestion not to exceed it.
908 * </pre>
909 */
910 static PIX *
911 pixRescaleForCropping(PIX *pixs,
912 l_int32 w,
913 l_int32 h,
914 l_int32 lr_border,
915 l_int32 tb_border,
916 l_float32 maxwiden,
917 PIX **ppixsc)
918 {
919 static l_int32 first_time = TRUE;
920 l_int32 wi, hi, wmax, hmax, wn, wf, hf, xf;
921 l_float32 ratio, scaleh, scalew, scalewid;
922 PIX *pix1, *pixd;
923
924 if (ppixsc) *ppixsc = NULL;
925 if (!pixs || pixGetDepth(pixs) != 1)
926 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
927 if (lr_border < 0) lr_border = 0;
928 if (tb_border < 0) tb_border = 0;
929 maxwiden = L_MAX(1.0, maxwiden);
930 if (maxwiden > 1.15)
931 L_WARNING("maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n",
932 __func__, maxwiden);
933
934 /* Rescale the foreground region.
935 * First, decide if scaling is to full width or full height.
936 * If scaling to full height, determine how much additional
937 * width widening is possible, given the maxwiden constraint.
938 * If scaling to full width, both width and height are
939 * scaled isotropically. Scaling is done so that the resulting
940 * foreground is maximally widened, so it can be horizontally
941 * centered in an image of size (w x h), less %lr_border
942 * on each side. */
943 pixGetDimensions(pixs, &wi, &hi, NULL);
944 wmax = w - 2 * lr_border;
945 hmax = h - 2 * tb_border;
946 ratio = (l_float32)(wmax * hi) / (l_float32)(hmax * wi);
947 if (ratio >= 1) { /* width can be widened after isotropic scaling */
948 scaleh = (l_float32)hmax / (l_float32)hi;
949 wn = scaleh * wi; /* scaled but not widened */
950 scalewid = L_MIN(maxwiden, (l_float32)wmax / (l_float32)wn);
951 scalew = scaleh * scalewid;
952 wf = scalew * wi;
953 hf = hmax; /* scale to full height */
954 pix1 = pixScale(pixs, scalew, scaleh);
955 if (first_time == TRUE) {
956 lept_stderr("Width stretched by factor %5.3f\n", scalewid);
957 first_time = FALSE;
958 }
959 xf = (w - wf) / 2.0;
960 } else { /* width cannot be widened after isotropic scaling */
961 scalew = (l_float32)wmax / (l_float32)wi;
962 pix1 = pixScale(pixs, scalew, scalew);
963 wf = wmax; /* scale to full width */
964 hf = scalew * hi; /* no extra vertical stretching allowed */
965 xf = lr_border;
966 }
967
968 /* Paste it, horizontally centered and vertically placed as
969 * high as allowed (by %tb_border) into the final page image. */
970 pixd = pixCreate(w, h, 1);
971 pixRasterop(pixd, xf, tb_border, wf, hf, PIX_SRC, pix1, 0, 0);
972
973 if (ppixsc)
974 *ppixsc = pix1;
975 else
976 pixDestroy(&pix1);
977 return pixd;
978 }
979
980
981 /*!
982 * \brief pixCleanImage()
983 *
984 * \param[in] pixs full resolution (any type or depth)
985 * \param[in] contrast vary contrast: 1 = lightest; 10 = darkest;
986 * suggest 1 unless light features are being lost
987 * \param[in] rotation cw by 90 degrees: {0,1,2,3} represent
988 * 0, 90, 180 and 270 degree cw rotations
989 * \param[in] scale 1 (no scaling) or 2 (2x upscaling)
990 * \param[in] opensize opening size of structuring element for noise
991 * removal: {0 or 1 to skip; 2, 3 for opening}
992 * \return cleaned pix, or NULL on error
993 *
994 * <pre>
995 * Notes:
996 * (1) This deskews, optionally rotates and darkens, cleans background
997 * to white, binarizes and optionally removes small noise.
998 * (2) For color and grayscale input, local background normalization is
999 * done to 200, and a threshold of 180 sets the maximum foreground
1000 * value in the normalized image.
1001 * (3) The %contrast parameter adjusts the binarization to avoid losing
1002 * lighter input pixels. Contrast is increased as %contrast increases
1003 * from 1 to 10.
1004 * (4) The %scale parameter controls the thresholding to 1 bpp. Two values:
1005 * 1 = threshold
1006 * 2 = linear interpolated 2x upscaling before threshold.
1007 * (5) The #opensize parameter is the size of a square SEL used with
1008 * opening to remove small speckle noise. Allowed open sizes are 2,3.
1009 * If this is to be used, try 2 before 3.
1010 * (6) This does the image processing for cleanTo1bppFilesToPdf() and
1011 * prog/cleanpdf.c.
1012 * </pre>
1013 */
1014 PIX *
1015 pixCleanImage(PIX *pixs,
1016 l_int32 contrast,
1017 l_int32 rotation,
1018 l_int32 scale,
1019 l_int32 opensize)
1020 {
1021 char sequence[32];
1022 PIX *pix1, *pix2, *pix3, *pix4, *pix5;
1023
1024 if (!pixs)
1025 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
1026 if (rotation < 0 || rotation > 3) {
1027 L_ERROR("invalid rotation = %d; rotation must be in {0,1,2,3}\n",
1028 __func__, rotation);
1029 return NULL;
1030 }
1031 if (contrast < 1 || contrast > 10) {
1032 L_ERROR("invalid contrast = %d; contrast must be in [1...10]\n",
1033 __func__, contrast);
1034 return NULL;
1035 }
1036 if (scale != 1 && scale != 2) {
1037 L_ERROR("invalid scale = %d; scale must be 1 or 2\n",
1038 __func__, opensize);
1039 return NULL;
1040 }
1041 if (opensize > 3) {
1042 L_ERROR("invalid opensize = %d; opensize must be <= 3\n",
1043 __func__, opensize);
1044 return NULL;
1045 }
1046
1047 if (pixGetDepth(pixs) == 1) {
1048 if (rotation > 0)
1049 pix1 = pixRotateOrth(pixs, rotation);
1050 else
1051 pix1 = pixClone(pixs);
1052 pix2 = pixFindSkewAndDeskew(pix1, 2, NULL, NULL);
1053 if (scale == 2)
1054 pix4 = pixExpandBinaryReplicate(pix2, 2, 2);
1055 else /* scale == 1 */
1056 pix4 = pixClone(pix2);
1057 } else {
1058 pix1 = pixConvertTo8MinMax(pixs);
1059 if (rotation > 0)
1060 pix2 = pixRotateOrth(pix1, rotation);
1061 else
1062 pix2 = pixClone(pix1);
1063 pix3 = pixFindSkewAndDeskew(pix2, 2, NULL, NULL);
1064 pix4 = pixBackgroundNormTo1MinMax(pix3, contrast, scale);
1065 pixDestroy(&pix3);
1066 }
1067
1068 if (opensize == 2 || opensize == 3) {
1069 snprintf(sequence, sizeof(sequence), "o%d.%d", opensize, opensize);
1070 pix5 = pixMorphSequence(pix4, sequence, 0);
1071 } else {
1072 pix5 = pixClone(pix4);
1073 }
1074
1075 pixDestroy(&pix1);
1076 pixDestroy(&pix2);
1077 pixDestroy(&pix4);
1078 return pix5;
1079 }
1080
1081
1082 /*!
1083 * \brief pixFindPageForeground()
1084 *
1085 * \param[in] pixs full resolution (any type or depth)
1086 * \param[in] threshold for binarization; typically about 128
1087 * \param[in] mindist min distance of text from border to allow
1088 * cleaning near border; at 2x reduction, this
1089 * should be larger than 50; typically about 70
1090 * \param[in] erasedist when conditions are satisfied, erase anything
1091 * within this distance of the edge;
1092 * typically 20-30 at 2x reduction
1093 * \param[in] showmorph debug: set to a negative integer to show steps
1094 * in generating masks; this is typically used
1095 * for debugging region extraction
1096 * \param[in] pixac debug: allocate outside and pass this in to
1097 * accumulate results of each call to this function,
1098 * which can be displayed in a mosaic or a pdf.
1099 * \return box region including foreground, with some pixel noise
1100 * removed, or NULL if not found
1101 *
1102 * <pre>
1103 * Notes:
1104 * (1) This doesn't simply crop to the fg. It attempts to remove
1105 * pixel noise and junk at the edge of the image before cropping.
1106 * The input %threshold is used if pixs is not 1 bpp.
1107 * (2) This is not intended to work on small thumbnails. The
1108 * dimensions of pixs must be at least MinWidth x MinHeight.
1109 * (3) Debug: set showmorph to display the intermediate image in
1110 * the morphological operations on this page.
1111 * (4) Debug: to get pdf output of results when called repeatedly,
1112 * call with an existing pixac, which will add an image of this page,
1113 * with the fg outlined. If no foreground is found, there is
1114 * no output for this page image.
1115 * </pre>
1116 */
1117 BOX *
1118 pixFindPageForeground(PIX *pixs,
1119 l_int32 threshold,
1120 l_int32 mindist,
1121 l_int32 erasedist,
1122 l_int32 showmorph,
1123 PIXAC *pixac)
1124 {
1125 l_int32 flag, nbox, intersects;
1126 l_int32 w, h, bx, by, bw, bh, left, right, top, bottom;
1127 PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2;
1128 BOX *box, *boxfg, *boxin, *boxd;
1129 BOXA *ba1, *ba2;
1130
1131 if (!pixs)
1132 return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL);
1133 pixGetDimensions(pixs, &w, &h, NULL);
1134 if (w < MinWidth || h < MinHeight) {
1135 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
1136 return NULL;
1137 }
1138
1139 /* Binarize, downscale by 0.5, remove the noise to generate a seed,
1140 * and do a seedfill back from the seed into those 8-connected
1141 * components of the binarized image for which there was at least
1142 * one seed pixel. */
1143 flag = (showmorph) ? 100 : 0;
1144 pixb = pixConvertTo1(pixs, threshold);
1145 pixb2 = pixScale(pixb, 0.5, 0.5);
1146 pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag);
1147 pix1 = pixMorphSequence(pixb2, "o50.1", 0);
1148 pixOr(pixseed, pixseed, pix1);
1149 pixDestroy(&pix1);
1150 pix1 = pixMorphSequence(pixb2, "o1.50", 0);
1151 pixOr(pixseed, pixseed, pix1);
1152 pixDestroy(&pix1);
1153 pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8);
1154 pixm = pixRemoveBorderConnComps(pixsf, 8);
1155
1156 /* Now, where is the main block of text? We want to remove noise near
1157 * the edge of the image, but to do that, we have to be convinced that
1158 * (1) there is noise and (2) it is far enough from the text block
1159 * and close enough to the edge. For each edge, if the block
1160 * is more than mindist from that edge, then clean 'erasedist'
1161 * pixels from the edge. */
1162 pix1 = pixMorphSequence(pixm, "c50.50", flag);
1163 ba1 = pixConnComp(pix1, NULL, 8);
1164 ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
1165 pixGetDimensions(pix1, &w, &h, NULL);
1166 nbox = boxaGetCount(ba2);
1167 if (nbox > 1) {
1168 box = boxaGetBox(ba2, 0, L_CLONE);
1169 boxGetGeometry(box, &bx, &by, &bw, &bh);
1170 left = (bx > mindist) ? erasedist : 0;
1171 right = (w - bx - bw > mindist) ? erasedist : 0;
1172 top = (by > mindist) ? erasedist : 0;
1173 bottom = (h - by - bh > mindist) ? erasedist : 0;
1174 pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR);
1175 boxDestroy(&box);
1176 }
1177 pixDestroy(&pix1);
1178 boxaDestroy(&ba1);
1179 boxaDestroy(&ba2);
1180
1181 /* Locate the foreground region; don't bother cropping */
1182 pixClipToForeground(pixm, NULL, &boxfg);
1183
1184 /* Sanity check the fg region. Make sure it's not confined
1185 * to a thin boundary on the left and right sides of the image,
1186 * in which case it is likely to be noise. */
1187 if (boxfg) {
1188 boxin = boxCreate(0.1 * w, 0, 0.8 * w, h);
1189 boxIntersects(boxfg, boxin, &intersects);
1190 boxDestroy(&boxin);
1191 if (!intersects) boxDestroy(&boxfg);
1192 }
1193
1194 boxd = NULL;
1195 if (boxfg) {
1196 boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */
1197 boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0);
1198
1199 /* Save the debug image showing the box for this page */
1200 if (pixac) {
1201 pixg2 = pixConvert1To4Cmap(pixb);
1202 pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0);
1203 pixacompAddPix(pixac, pixg2, IFF_DEFAULT);
1204 pixDestroy(&pixg2);
1205 }
1206 }
1207
1208 pixDestroy(&pixb);
1209 pixDestroy(&pixb2);
1210 pixDestroy(&pixseed);
1211 pixDestroy(&pixsf);
1212 pixDestroy(&pixm);
1213 boxDestroy(&boxfg);
1214 return boxd;
1215 }
1216
1217
1218 /*------------------------------------------------------------------*
1219 * Extraction of characters from image with only text *
1220 *------------------------------------------------------------------*/
1221 /*!
1222 * \brief pixSplitIntoCharacters()
1223 *
1224 * \param[in] pixs 1 bpp, contains only deskewed text
1225 * \param[in] minw min component width for initial filtering; typ. 4
1226 * \param[in] minh min component height for initial filtering; typ. 4
1227 * \param[out] pboxa [optional] character bounding boxes
1228 * \param[out] ppixa [optional] character images
1229 * \param[out] ppixdebug [optional] showing splittings
1230 *
1231 * \return 0 if OK, 1 on error
1232 *
1233 * <pre>
1234 * Notes:
1235 * (1) This is a simple function that attempts to find split points
1236 * based on vertical pixel profiles.
1237 * (2) It should be given an image that has an arbitrary number
1238 * of text characters.
1239 * (3) The returned pixa includes the boxes from which the
1240 * (possibly split) components are extracted.
1241 * </pre>
1242 */
1243 l_ok
1244 pixSplitIntoCharacters(PIX *pixs,
1245 l_int32 minw,
1246 l_int32 minh,
1247 BOXA **pboxa,
1248 PIXA **ppixa,
1249 PIX **ppixdebug)
1250 {
1251 l_int32 ncomp, i, xoff, yoff;
1252 BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
1253 BOXAA *baa;
1254 PIX *pix, *pix1, *pix2, *pixdb;
1255 PIXA *pixa1, *pixadb;
1256
1257 if (pboxa) *pboxa = NULL;
1258 if (ppixa) *ppixa = NULL;
1259 if (ppixdebug) *ppixdebug = NULL;
1260 if (!pixs || pixGetDepth(pixs) != 1)
1261 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1262
1263 /* Remove the small stuff */
1264 pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
1265 L_SELECT_IF_GT, NULL);
1266
1267 /* Small vertical close for consolidation */
1268 pix2 = pixMorphSequence(pix1, "c1.10", 0);
1269 pixDestroy(&pix1);
1270
1271 /* Get the 8-connected components */
1272 boxa1 = pixConnComp(pix2, &pixa1, 8);
1273 pixDestroy(&pix2);
1274 boxaDestroy(&boxa1);
1275
1276 /* Split the components if obvious */
1277 ncomp = pixaGetCount(pixa1);
1278 boxa2 = boxaCreate(ncomp);
1279 pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
1280 for (i = 0; i < ncomp; i++) {
1281 pix = pixaGetPix(pixa1, i, L_CLONE);
1282 if (ppixdebug) {
1283 boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
1284 if (pixdb)
1285 pixaAddPix(pixadb, pixdb, L_INSERT);
1286 } else {
1287 boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
1288 }
1289 pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
1290 boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
1291 boxaJoin(boxa2, boxat2, 0, -1);
1292 pixDestroy(&pix);
1293 boxaDestroy(&boxat1);
1294 boxaDestroy(&boxat2);
1295 }
1296 pixaDestroy(&pixa1);
1297
1298 /* Generate the debug image */
1299 if (ppixdebug) {
1300 if (pixaGetCount(pixadb) > 0) {
1301 *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
1302 1.0, 0, 20, 1);
1303 }
1304 pixaDestroy(&pixadb);
1305 }
1306
1307 /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
1308 baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
1309 boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
1310 boxaaDestroy(&baa);
1311 boxaDestroy(&boxa2);
1312
1313 /* Optionally extract the pieces from the input image */
1314 if (ppixa)
1315 *ppixa = pixClipRectangles(pixs, boxad);
1316 if (pboxa)
1317 *pboxa = boxad;
1318 else
1319 boxaDestroy(&boxad);
1320 return 0;
1321 }
1322
1323
1324 /*!
1325 * \brief pixSplitComponentWithProfile()
1326 *
1327 * \param[in] pixs 1 bpp, exactly one connected component
1328 * \param[in] delta distance used in extrema finding in a numa; typ. 10
1329 * \param[in] mindel minimum required difference between profile
1330 * minimum and profile values +2 and -2 away; typ. 7
1331 * \param[out] ppixdebug [optional] debug image of splitting
1332 * \return boxa of c.c. after splitting, or NULL on error
1333 *
1334 * <pre>
1335 * Notes:
1336 * (1) This will split the most obvious cases of touching characters.
1337 * The split points it is searching for are narrow and deep
1338 * minimima in the vertical pixel projection profile, after a
1339 * large vertical closing has been applied to the component.
1340 * </pre>
1341 */
1342 BOXA *
1343 pixSplitComponentWithProfile(PIX *pixs,
1344 l_int32 delta,
1345 l_int32 mindel,
1346 PIX **ppixdebug)
1347 {
1348 l_int32 w, h, n2, i, firstmin, xmin, xshift;
1349 l_int32 nmin, nleft, nright, nsplit, isplit, ncomp;
1350 l_int32 *array1, *array2;
1351 BOX *box;
1352 BOXA *boxad;
1353 NUMA *na1, *na2, *nasplit;
1354 PIX *pix1, *pixdb;
1355
1356 if (ppixdebug) *ppixdebug = NULL;
1357 if (!pixs || pixGetDepth(pixs) != 1)
1358 return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", __func__, NULL);
1359 pixGetDimensions(pixs, &w, &h, NULL);
1360
1361 /* Closing to consolidate characters vertically */
1362 pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100);
1363
1364 /* Get extrema of column projections */
1365 boxad = boxaCreate(2);
1366 na1 = pixCountPixelsByColumn(pix1); /* w elements */
1367 pixDestroy(&pix1);
1368 na2 = numaFindExtrema(na1, delta, NULL);
1369 n2 = numaGetCount(na2);
1370 if (n2 < 3) { /* no split possible */
1371 box = boxCreate(0, 0, w, h);
1372 boxaAddBox(boxad, box, L_INSERT);
1373 numaDestroy(&na1);
1374 numaDestroy(&na2);
1375 return boxad;
1376 }
1377
1378 /* Look for sufficiently deep and narrow minima.
1379 * All minima of of interest must be surrounded by max on each
1380 * side. firstmin is the index of first possible minimum. */
1381 array1 = numaGetIArray(na1);
1382 array2 = numaGetIArray(na2);
1383 if (ppixdebug) numaWriteStderr(na2);
1384 firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2;
1385 nasplit = numaCreate(n2); /* will hold split locations */
1386 for (i = firstmin; i < n2 - 1; i+= 2) {
1387 xmin = array2[i];
1388 nmin = array1[xmin];
1389 if (xmin + 2 >= w) break; /* no more splits possible */
1390 nleft = array1[xmin - 2];
1391 nright = array1[xmin + 2];
1392 if (ppixdebug) {
1393 lept_stderr(
1394 "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n",
1395 xmin, w, nleft, nmin, nright);
1396 }
1397 if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */
1398 numaAddNumber(nasplit, xmin);
1399 }
1400 nsplit = numaGetCount(nasplit);
1401
1402 #if 0
1403 if (ppixdebug && nsplit > 0) {
1404 lept_mkdir("lept/split");
1405 gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL);
1406 }
1407 #endif
1408
1409 numaDestroy(&na1);
1410 numaDestroy(&na2);
1411 LEPT_FREE(array1);
1412 LEPT_FREE(array2);
1413
1414 if (nsplit == 0) { /* no splitting */
1415 numaDestroy(&nasplit);
1416 box = boxCreate(0, 0, w, h);
1417 boxaAddBox(boxad, box, L_INSERT);
1418 return boxad;
1419 }
1420
1421 /* Use split points to generate b.b. after splitting */
1422 for (i = 0, xshift = 0; i < nsplit; i++) {
1423 numaGetIValue(nasplit, i, &isplit);
1424 box = boxCreate(xshift, 0, isplit - xshift, h);
1425 boxaAddBox(boxad, box, L_INSERT);
1426 xshift = isplit + 1;
1427 }
1428 box = boxCreate(xshift, 0, w - xshift, h);
1429 boxaAddBox(boxad, box, L_INSERT);
1430 numaDestroy(&nasplit);
1431
1432 if (ppixdebug) {
1433 pixdb = pixConvertTo32(pixs);
1434 ncomp = boxaGetCount(boxad);
1435 for (i = 0; i < ncomp; i++) {
1436 box = boxaGetBox(boxad, i, L_CLONE);
1437 pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5);
1438 boxDestroy(&box);
1439 }
1440 *ppixdebug = pixdb;
1441 }
1442
1443 return boxad;
1444 }
1445
1446
1447 /*------------------------------------------------------------------*
1448 * Extraction of lines of text *
1449 *------------------------------------------------------------------*/
1450 /*!
1451 * \brief pixExtractTextlines()
1452 *
1453 * \param[in] pixs any depth, assumed to have nearly horizontal text
1454 * \param[in] maxw, maxh initial filtering: remove any components in pixs
1455 * with components larger than maxw or maxh
1456 * \param[in] minw, minh final filtering: remove extracted 'lines'
1457 * with sizes smaller than minw or minh; use
1458 * 0 for default.
1459 * \param[in] adjw, adjh final adjustment of boxes representing each
1460 * text line. If > 0, these increase the box
1461 * size at each edge by this amount.
1462 * \param[in] pixadb pixa for saving intermediate steps; NULL to omit
1463 * \return pixa of textline images, including bounding boxes, or
1464 * NULL on error
1465 *
1466 * <pre>
1467 * Notes:
1468 * (1) This function assumes that textline fragments have sufficient
1469 * vertical separation and small enough skew so that a
1470 * horizontal dilation sufficient to join words will not join
1471 * textlines. It does not guarantee that horizontally adjacent
1472 * textline fragments on the same line will be joined.
1473 * (2) For images with multiple columns, it attempts to avoid joining
1474 * textlines across the space between columns. If that is not
1475 * a concern, you can also use pixExtractRawTextlines(),
1476 * which will join them with alacrity.
1477 * (3) This first removes components from pixs that are either
1478 * wide (> %maxw) or tall (> %maxh).
1479 * (4) A final filtering operation removes small components, such
1480 * that width < %minw or height < %minh.
1481 * (5) For reasonable accuracy, the resolution of pixs should be
1482 * at least 100 ppi. For reasonable efficiency, the resolution
1483 * should not exceed 600 ppi.
1484 * (6) This can be used to determine if some region of a scanned
1485 * image is horizontal text.
1486 * (7) As an example, for a pix with resolution 300 ppi, a reasonable
1487 * set of parameters is:
1488 * pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL);
1489 * The defaults minw and minh for 300 ppi are about 36 and 20,
1490 * so the same result is obtained with:
1491 * pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL);
1492 * (8) The output pixa is composed of subimages, one for each textline,
1493 * and the boxa in the pixa tells where in %pixs each textline goes.
1494 * </pre>
1495 */
1496 PIXA *
1497 pixExtractTextlines(PIX *pixs,
1498 l_int32 maxw,
1499 l_int32 maxh,
1500 l_int32 minw,
1501 l_int32 minh,
1502 l_int32 adjw,
1503 l_int32 adjh,
1504 PIXA *pixadb)
1505 {
1506 char buf[64];
1507 l_int32 res, csize, empty;
1508 BOXA *boxa1, *boxa2, *boxa3;
1509 PIX *pix1, *pix2, *pix3;
1510 PIXA *pixa1, *pixa2, *pixa3;
1511
1512 if (!pixs)
1513 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1514
1515 /* Binarize carefully, if necessary */
1516 if (pixGetDepth(pixs) > 1) {
1517 pix2 = pixConvertTo8(pixs, FALSE);
1518 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1519 pix1 = pixThresholdToBinary(pix3, 150);
1520 pixDestroy(&pix2);
1521 pixDestroy(&pix3);
1522 } else {
1523 pix1 = pixClone(pixs);
1524 }
1525 pixZero(pix1, &empty);
1526 if (empty) {
1527 pixDestroy(&pix1);
1528 L_INFO("no fg pixels in input image\n", __func__);
1529 return NULL;
1530 }
1531 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1532
1533 /* Remove any very tall or very wide connected components */
1534 pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1535 L_SELECT_IF_LT, NULL);
1536 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1537 pixDestroy(&pix1);
1538
1539 /* Filter to solidify the text lines within the x-height region.
1540 * The closing (csize) bridges gaps between words. The opening
1541 * removes isolated bridges between textlines. */
1542 if ((res = pixGetXRes(pixs)) == 0) {
1543 L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1544 res = 300;
1545 }
1546 csize = L_MIN(120., 60.0 * res / 300.0);
1547 snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3);
1548 pix3 = pixMorphCompSequence(pix2, buf, 0);
1549 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1550
1551 /* Extract the connected components. These should be dilated lines */
1552 boxa1 = pixConnComp(pix3, &pixa1, 4);
1553 if (pixadb) {
1554 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1555 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1556 pixaAddPix(pixadb, pix1, L_INSERT);
1557 }
1558
1559 /* Set minw, minh if default is requested */
1560 minw = (minw != 0) ? minw : (l_int32)(0.12 * res);
1561 minh = (minh != 0) ? minh : (l_int32)(0.07 * res);
1562
1563 /* Remove line components that are too small */
1564 pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH,
1565 L_SELECT_IF_GTE, NULL);
1566 if (pixadb) {
1567 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1568 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1569 pixaAddPix(pixadb, pix1, L_INSERT);
1570 pix1 = pixConvertTo32(pix2);
1571 pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0);
1572 pixaAddPix(pixadb, pix1, L_INSERT);
1573 }
1574
1575 /* Selectively AND with the version before dilation, and save */
1576 boxa2 = pixaGetBoxa(pixa2, L_CLONE);
1577 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1578 pixa3 = pixClipRectangles(pix2, boxa3);
1579 if (pixadb) {
1580 pix1 = pixaDisplayRandomCmap(pixa3, 0, 0);
1581 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1582 pixaAddPix(pixadb, pix1, L_INSERT);
1583 }
1584
1585 pixDestroy(&pix2);
1586 pixDestroy(&pix3);
1587 pixaDestroy(&pixa1);
1588 pixaDestroy(&pixa2);
1589 boxaDestroy(&boxa1);
1590 boxaDestroy(&boxa2);
1591 boxaDestroy(&boxa3);
1592 return pixa3;
1593 }
1594
1595
1596 /*!
1597 * \brief pixExtractRawTextlines()
1598 *
1599 * \param[in] pixs any depth, assumed to have nearly horizontal text
1600 * \param[in] maxw, maxh initial filtering: remove any components in pixs
1601 * with components larger than maxw or maxh;
1602 * use 0 for default values.
1603 * \param[in] adjw, adjh final adjustment of boxes representing each
1604 * text line. If > 0, these increase the box
1605 * size at each edge by this amount.
1606 * \param[in] pixadb pixa for saving intermediate steps; NULL to omit
1607 * \return pixa of textline images, including bounding boxes, or
1608 * NULL on error
1609 *
1610 * <pre>
1611 * Notes:
1612 * (1) This function assumes that textlines have sufficient
1613 * vertical separation and small enough skew so that a
1614 * horizontal dilation sufficient to join words will not join
1615 * textlines. It aggressively joins textlines across multiple
1616 * columns, so if that is not desired, you must either (a) make
1617 * sure that %pixs is a single column of text or (b) use instead
1618 * pixExtractTextlines(), which is more conservative
1619 * about joining text fragments that have vertical overlap.
1620 * (2) This first removes components from pixs that are either
1621 * very wide (> %maxw) or very tall (> %maxh).
1622 * (3) For reasonable accuracy, the resolution of pixs should be
1623 * at least 100 ppi. For reasonable efficiency, the resolution
1624 * should not exceed 600 ppi.
1625 * (4) This can be used to determine if some region of a scanned
1626 * image is horizontal text.
1627 * (5) As an example, for a pix with resolution 300 ppi, a reasonable
1628 * set of parameters is:
1629 * pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL);
1630 * (6) The output pixa is composed of subimages, one for each textline,
1631 * and the boxa in the pixa tells where in %pixs each textline goes.
1632 * </pre>
1633 */
1634 PIXA *
1635 pixExtractRawTextlines(PIX *pixs,
1636 l_int32 maxw,
1637 l_int32 maxh,
1638 l_int32 adjw,
1639 l_int32 adjh,
1640 PIXA *pixadb)
1641 {
1642 char buf[64];
1643 l_int32 res, csize, empty;
1644 BOXA *boxa1, *boxa2, *boxa3;
1645 BOXAA *baa1;
1646 PIX *pix1, *pix2, *pix3;
1647 PIXA *pixa1, *pixa2;
1648
1649 if (!pixs)
1650 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1651
1652 /* Set maxw, maxh if default is requested */
1653 if ((res = pixGetXRes(pixs)) == 0) {
1654 L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1655 res = 300;
1656 }
1657 maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res);
1658 maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res);
1659
1660 /* Binarize carefully, if necessary */
1661 if (pixGetDepth(pixs) > 1) {
1662 pix2 = pixConvertTo8(pixs, FALSE);
1663 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1664 pix1 = pixThresholdToBinary(pix3, 150);
1665 pixDestroy(&pix2);
1666 pixDestroy(&pix3);
1667 } else {
1668 pix1 = pixClone(pixs);
1669 }
1670 pixZero(pix1, &empty);
1671 if (empty) {
1672 pixDestroy(&pix1);
1673 L_INFO("no fg pixels in input image\n", __func__);
1674 return NULL;
1675 }
1676 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1677
1678 /* Remove any very tall or very wide connected components */
1679 pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1680 L_SELECT_IF_LT, NULL);
1681 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1682 pixDestroy(&pix1);
1683
1684 /* Filter to solidify the text lines within the x-height region.
1685 * The closing (csize) bridges gaps between words. */
1686 csize = L_MIN(120., 60.0 * res / 300.0);
1687 snprintf(buf, sizeof(buf), "c%d.1", csize);
1688 pix3 = pixMorphCompSequence(pix2, buf, 0);
1689 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1690
1691 /* Extract the connected components. These should be dilated lines */
1692 boxa1 = pixConnComp(pix3, &pixa1, 4);
1693 if (pixadb) {
1694 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1695 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1696 pixaAddPix(pixadb, pix1, L_INSERT);
1697 }
1698
1699 /* Do a 2-d sort, and generate a bounding box for each set of text
1700 * line segments that is aligned horizontally (i.e., has vertical
1701 * overlap) into a box representing a single text line. */
1702 baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5);
1703 boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2);
1704 if (pixadb) {
1705 pix1 = pixConvertTo32(pix2);
1706 pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0);
1707 pixaAddPix(pixadb, pix1, L_INSERT);
1708 }
1709
1710 /* Optionally adjust the sides of each text line box, and then
1711 * use the boxes to generate a pixa of the text lines. */
1712 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1713 pixa2 = pixClipRectangles(pix2, boxa3);
1714 if (pixadb) {
1715 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1716 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1717 pixaAddPix(pixadb, pix1, L_INSERT);
1718 }
1719
1720 pixDestroy(&pix2);
1721 pixDestroy(&pix3);
1722 pixaDestroy(&pixa1);
1723 boxaDestroy(&boxa1);
1724 boxaDestroy(&boxa2);
1725 boxaDestroy(&boxa3);
1726 boxaaDestroy(&baa1);
1727 return pixa2;
1728 }
1729
1730
1731 /*------------------------------------------------------------------*
1732 * How many text columns *
1733 *------------------------------------------------------------------*/
1734 /*!
1735 * \brief pixCountTextColumns()
1736 *
1737 * \param[in] pixs 1 bpp
1738 * \param[in] deltafract fraction of (max - min) to be used in the delta
1739 * for extrema finding; typ 0.3
1740 * \param[in] peakfract fraction of (max - min) to be used to threshold
1741 * the peak value; typ. 0.5
1742 * \param[in] clipfract fraction of image dimension removed on each side;
1743 * typ. 0.1, which leaves w and h reduced by 0.8
1744 * \param[out] pncols number of columns; -1 if not determined
1745 * \param[in] pixadb [optional] pre-allocated, for showing
1746 * intermediate computation; use null to skip
1747 * \return 0 if OK, 1 on error
1748 *
1749 * <pre>
1750 * Notes:
1751 * (1) It is assumed that pixs has the correct resolution set.
1752 * If the resolution is 0, we set to 300 and issue a warning.
1753 * (2) If necessary, the image is scaled to between 37 and 75 ppi;
1754 * most of the processing is done at this resolution.
1755 * (3) If no text is found (essentially a blank page),
1756 * this returns ncols = 0.
1757 * (4) For debug output, input a pre-allocated pixa.
1758 * </pre>
1759 */
1760 l_ok
1761 pixCountTextColumns(PIX *pixs,
1762 l_float32 deltafract,
1763 l_float32 peakfract,
1764 l_float32 clipfract,
1765 l_int32 *pncols,
1766 PIXA *pixadb)
1767 {
1768 l_int32 w, h, res, i, n, npeak;
1769 l_float32 scalefact, redfact, minval, maxval, val4, val5, fract;
1770 BOX *box;
1771 NUMA *na1, *na2, *na3, *na4, *na5;
1772 PIX *pix1, *pix2, *pix3, *pix4, *pix5;
1773
1774 if (!pncols)
1775 return ERROR_INT("&ncols not defined", __func__, 1);
1776 *pncols = -1; /* init */
1777 if (!pixs || pixGetDepth(pixs) != 1)
1778 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1779 if (deltafract < 0.15 || deltafract > 0.75)
1780 L_WARNING("deltafract not in [0.15 ... 0.75]\n", __func__);
1781 if (peakfract < 0.25 || peakfract > 0.9)
1782 L_WARNING("peakfract not in [0.25 ... 0.9]\n", __func__);
1783 if (clipfract < 0.0 || clipfract >= 0.5)
1784 return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", __func__, 1);
1785 if (pixadb) pixaAddPix(pixadb, pixs, L_COPY);
1786
1787 /* Scale to between 37.5 and 75 ppi */
1788 if ((res = pixGetXRes(pixs)) == 0) {
1789 L_WARNING("resolution undefined; set to 300\n", __func__);
1790 pixSetResolution(pixs, 300, 300);
1791 res = 300;
1792 }
1793 if (res < 37) {
1794 L_WARNING("resolution %d very low\n", __func__, res);
1795 scalefact = 37.5 / res;
1796 pix1 = pixScale(pixs, scalefact, scalefact);
1797 } else {
1798 redfact = (l_float32)res / 37.5;
1799 if (redfact < 2.0)
1800 pix1 = pixClone(pixs);
1801 else if (redfact < 4.0)
1802 pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1803 else if (redfact < 8.0)
1804 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0);
1805 else if (redfact < 16.0)
1806 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0);
1807 else
1808 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2);
1809 }
1810 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1811
1812 /* Crop inner 80% of image */
1813 pixGetDimensions(pix1, &w, &h, NULL);
1814 box = boxCreate(clipfract * w, clipfract * h,
1815 (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h);
1816 pix2 = pixClipRectangle(pix1, box, NULL);
1817 pixGetDimensions(pix2, &w, &h, NULL);
1818 boxDestroy(&box);
1819 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1820
1821 /* Deskew */
1822 pix3 = pixDeskew(pix2, 0);
1823 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1824
1825 /* Close to increase column counts for text */
1826 pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21);
1827 if (pixadb) pixaAddPix(pixadb, pix4, L_COPY);
1828 pixInvert(pix4, pix4);
1829 na1 = pixCountByColumn(pix4, NULL);
1830
1831 if (pixadb) {
1832 gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL);
1833 pix5 = pixRead("/tmp/lept/plot.png");
1834 pixaAddPix(pixadb, pix5, L_INSERT);
1835 }
1836
1837 /* Analyze the column counts. na4 gives the locations of
1838 * the extrema in normalized units (0.0 to 1.0) across the
1839 * cropped image. na5 gives the magnitude of the
1840 * extrema, normalized to the dynamic range. The peaks
1841 * are values that are at least peakfract of (max - min). */
1842 numaGetMax(na1, &maxval, NULL);
1843 numaGetMin(na1, &minval, NULL);
1844 fract = (l_float32)(maxval - minval) / h; /* is there much at all? */
1845 if (fract < 0.05) {
1846 L_INFO("very little content on page; 0 text columns\n", __func__);
1847 *pncols = 0;
1848 } else {
1849 na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3);
1850 na4 = numaTransform(na2, 0, 1.0 / w);
1851 na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval));
1852 n = numaGetCount(na4);
1853 for (i = 0, npeak = 0; i < n; i++) {
1854 numaGetFValue(na4, i, &val4);
1855 numaGetFValue(na5, i, &val5);
1856 if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) {
1857 npeak++;
1858 L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5);
1859 }
1860 }
1861 *pncols = npeak + 1;
1862 numaDestroy(&na2);
1863 numaDestroy(&na3);
1864 numaDestroy(&na4);
1865 numaDestroy(&na5);
1866 }
1867
1868 pixDestroy(&pix1);
1869 pixDestroy(&pix2);
1870 pixDestroy(&pix3);
1871 pixDestroy(&pix4);
1872 numaDestroy(&na1);
1873 return 0;
1874 }
1875
1876
1877 /*------------------------------------------------------------------*
1878 * Decision text vs photo *
1879 *------------------------------------------------------------------*/
1880 /*!
1881 * \brief pixDecideIfText()
1882 *
1883 * \param[in] pixs any depth
1884 * \param[in] box [optional] if null, use entire pixs
1885 * \param[out] pistext 1 if text; 0 if photo; -1 if not determined or empty
1886 * \param[in] pixadb [optional] pre-allocated, for showing intermediate
1887 * computation; use NULL to skip
1888 * \return 0 if OK, 1 on error
1889 *
1890 * <pre>
1891 * Notes:
1892 * (1) It is assumed that pixs has the correct resolution set.
1893 * If the resolution is 0, we set to 300 and issue a warning.
1894 * (2) If necessary, the image is scaled to 300 ppi; most of the
1895 * processing is done at this resolution.
1896 * (3) Text is assumed to be in horizontal lines.
1897 * (4) Because thin vertical lines are removed before filtering for
1898 * text lines, this should identify tables as text.
1899 * (5) If %box is null and pixs contains both text lines and line art,
1900 * this function might return %istext == true.
1901 * (6) If the input pixs is empty, or for some other reason the
1902 * result can not be determined, return -1.
1903 * (7) For debug output, input a pre-allocated pixa.
1904 * </pre>
1905 */
1906 l_ok
1907 pixDecideIfText(PIX *pixs,
1908 BOX *box,
1909 l_int32 *pistext,
1910 PIXA *pixadb)
1911 {
1912 l_int32 i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp;
1913 l_float32 ratio1, ratio2;
1914 L_BMF *bmf;
1915 BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxa5;
1916 PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7;
1917 PIXA *pixa1;
1918 SEL *sel1;
1919
1920 if (!pistext)
1921 return ERROR_INT("&istext not defined", __func__, 1);
1922 *pistext = -1;
1923 if (!pixs)
1924 return ERROR_INT("pixs not defined", __func__, 1);
1925
1926 /* Crop, convert to 1 bpp, 300 ppi */
1927 if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL)
1928 return ERROR_INT("pix1 not made", __func__, 1);
1929
1930 pixZero(pix1, &empty);
1931 if (empty) {
1932 pixDestroy(&pix1);
1933 L_INFO("pix is empty\n", __func__);
1934 return 0;
1935 }
1936 w = pixGetWidth(pix1);
1937
1938 /* Identify and remove tall, thin vertical lines (as found in tables)
1939 * that are up to 9 pixels wide. Make a hit-miss sel with an
1940 * 81 pixel vertical set of hits and with 3 pairs of misses that
1941 * are 10 pixels apart horizontally. It is necessary to use a
1942 * hit-miss transform; if we only opened with a vertical line of
1943 * hits, we would remove solid regions of pixels that are not
1944 * text or vertical lines. */
1945 pix2 = pixCreate(11, 81, 1);
1946 for (i = 0; i < 81; i++)
1947 pixSetPixel(pix2, 5, i, 1);
1948 sel1 = selCreateFromPix(pix2, 40, 5, NULL);
1949 selSetElement(sel1, 20, 0, SEL_MISS);
1950 selSetElement(sel1, 20, 10, SEL_MISS);
1951 selSetElement(sel1, 40, 0, SEL_MISS);
1952 selSetElement(sel1, 40, 10, SEL_MISS);
1953 selSetElement(sel1, 60, 0, SEL_MISS);
1954 selSetElement(sel1, 60, 10, SEL_MISS);
1955 pix3 = pixHMT(NULL, pix1, sel1);
1956 pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000);
1957 pix5 = pixXor(NULL, pix1, pix4);
1958 pixDestroy(&pix2);
1959 selDestroy(&sel1);
1960
1961 /* Convert the text lines to separate long horizontal components */
1962 pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0);
1963
1964 /* Estimate the distance to the bottom of the significant region */
1965 if (box) { /* use full height */
1966 pixGetDimensions(pix6, NULL, &h, NULL);
1967 } else { /* use height of region that has text lines */
1968 pixFindThreshFgExtent(pix6, 400, NULL, &h);
1969 }
1970
1971 if (pixadb) {
1972 bmf = bmfCreate(NULL, 6);
1973 pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary",
1974 0x0000ff00, L_ADD_BELOW);
1975 pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line",
1976 0x0000ff00, L_ADD_BELOW);
1977 pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill",
1978 0x0000ff00, L_ADD_BELOW);
1979 pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor",
1980 0x0000ff00, L_ADD_BELOW);
1981 pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components",
1982 0x0000ff00, L_ADD_BELOW);
1983 }
1984
1985 /* Extract the connected components */
1986 if (pixadb) {
1987 boxa1 = pixConnComp(pix6, &pixa1, 8);
1988 pix7 = pixaDisplayRandomCmap(pixa1, 0, 0);
1989 pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255);
1990 pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components",
1991 0x0000ff00, L_ADD_BELOW);
1992 pixDestroy(&pix7);
1993 pixaDestroy(&pixa1);
1994 bmfDestroy(&bmf);
1995 } else {
1996 boxa1 = pixConnComp(pix6, NULL, 8);
1997 }
1998
1999 /* Analyze the connected components. The following conditions
2000 * at 300 ppi must be satisfied if the image is text:
2001 * (1) There are no components that are wider than 400 pixels and
2002 * taller than 175 pixels.
2003 * (2) The second longest component is at least 60% of the
2004 * (possibly cropped) image width. This catches images
2005 * that don't have any significant content.
2006 * (3) Of the components that are at least 40% of the length
2007 * of the longest (n2), at least 80% of them must not exceed
2008 * 60 pixels in height.
2009 * (4) The number of those long, thin components (n3) must
2010 * equal or exceed a minimum that scales linearly with the
2011 * image height.
2012 * Most images that are not text fail more than one of these
2013 * conditions. */
2014 boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL);
2015 boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL); /* 2nd longest */
2016 boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH,
2017 L_SELECT_IF_GTE, NULL);
2018 boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT,
2019 L_SELECT_IF_LTE, NULL);
2020 boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH,
2021 L_SELECT_IF_GT, NULL);
2022 big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1;
2023 n1 = boxaGetCount(boxa1);
2024 n2 = boxaGetCount(boxa3);
2025 n3 = boxaGetCount(boxa4);
2026 ratio1 = (l_float32)maxw / (l_float32)w;
2027 ratio2 = (l_float32)n3 / (l_float32)n2;
2028 minlines = L_MAX(2, h / 125);
2029 if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines)
2030 *pistext = 0;
2031 else
2032 *pistext = 1;
2033 if (pixadb) {
2034 if (*pistext == 1) {
2035 L_INFO("This is text: \n n1 = %d, n2 = %d, n3 = %d, "
2036 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, "
2037 "big_comp = %d\n", __func__, n1, n2, n3, minlines,
2038 maxw, ratio1, h, big_comp);
2039 } else {
2040 L_INFO("This is not text: \n n1 = %d, n2 = %d, n3 = %d, "
2041 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, "
2042 "big_comp = %d\n", __func__, n1, n2, n3, minlines,
2043 maxw, ratio1, h, big_comp);
2044 }
2045 }
2046
2047 boxaDestroy(&boxa1);
2048 boxaDestroy(&boxa2);
2049 boxaDestroy(&boxa3);
2050 boxaDestroy(&boxa4);
2051 boxaDestroy(&boxa5);
2052 pixDestroy(&pix1);
2053 pixDestroy(&pix3);
2054 pixDestroy(&pix4);
2055 pixDestroy(&pix5);
2056 pixDestroy(&pix6);
2057 return 0;
2058 }
2059
2060
2061 /*!
2062 * \brief pixFindThreshFgExtent()
2063 *
2064 * \param[in] pixs 1 bpp
2065 * \param[in] thresh threshold number of pixels in row
2066 * \param[out] ptop [optional] location of top of region
2067 * \param[out] pbot [optional] location of bottom of region
2068 * \return 0 if OK, 1 on error
2069 */
2070 l_ok
2071 pixFindThreshFgExtent(PIX *pixs,
2072 l_int32 thresh,
2073 l_int32 *ptop,
2074 l_int32 *pbot)
2075 {
2076 l_int32 i, n;
2077 l_int32 *array;
2078 NUMA *na;
2079
2080 if (ptop) *ptop = 0;
2081 if (pbot) *pbot = 0;
2082 if (!ptop && !pbot)
2083 return ERROR_INT("nothing to determine", __func__, 1);
2084 if (!pixs || pixGetDepth(pixs) != 1)
2085 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
2086
2087 na = pixCountPixelsByRow(pixs, NULL);
2088 n = numaGetCount(na);
2089 array = numaGetIArray(na);
2090 if (ptop) {
2091 for (i = 0; i < n; i++) {
2092 if (array[i] >= thresh) {
2093 *ptop = i;
2094 break;
2095 }
2096 }
2097 }
2098 if (pbot) {
2099 for (i = n - 1; i >= 0; i--) {
2100 if (array[i] >= thresh) {
2101 *pbot = i;
2102 break;
2103 }
2104 }
2105 }
2106 LEPT_FREE(array);
2107 numaDestroy(&na);
2108 return 0;
2109 }
2110
2111
2112 /*------------------------------------------------------------------*
2113 * Decision: table vs text *
2114 *------------------------------------------------------------------*/
2115 /*!
2116 * \brief pixDecideIfTable()
2117 *
2118 * \param[in] pixs any depth, any resolution >= 75 ppi
2119 * \param[in] box [optional] if null, use entire pixs
2120 * \param[in] orient L_PORTRAIT_MODE, L_LANDSCAPE_MODE
2121 * \param[out] pscore 0 - 4; -1 if not determined
2122 * \param[in] pixadb [optional] pre-allocated, for showing intermediate
2123 * computation; use NULL to skip
2124 * \return 0 if OK, 1 on error
2125 *
2126 * <pre>
2127 * Notes:
2128 * (1) It is assumed that pixs has the correct resolution set.
2129 * If the resolution is 0, we assume it is 300 ppi and issue a warning.
2130 * (2) If %orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees
2131 * clockwise before being analyzed.
2132 * (3) The interpretation of the returned score:
2133 * -1 undetermined
2134 * 0 no table
2135 * 1 unlikely to have a table
2136 * 2 likely to have a table
2137 * 3 even more likely to have a table
2138 * 4 extremely likely to have a table
2139 * * Setting the condition for finding a table at score >= 2 works
2140 * well, except for false positives on kanji and landscape text.
2141 * * These false positives can be removed by setting the condition
2142 * at score >= 3, but recall is lowered because it will not find
2143 * tables without either horizontal or vertical lines.
2144 * (4) Most of the processing takes place at 75 ppi.
2145 * (5) Internally, three numbers are determined, for horizontal and
2146 * vertical fg lines, and for vertical bg lines. From these,
2147 * four tests are made to decide if there is a table occupying
2148 * a significant part of the image.
2149 * (6) Images have arbitrary content and would be likely to trigger
2150 * this detector, so they are checked for first, and if found,
2151 * return with a 0 (no table) score.
2152 * (7) Musical scores (tablature) are likely to trigger the detector.
2153 * (8) Tables of content with more than 2 columns are likely to
2154 * trigger the detector.
2155 * (9) For debug output, input a pre-allocated pixa.
2156 * </pre>
2157 */
2158 l_ok
2159 pixDecideIfTable(PIX *pixs,
2160 BOX *box,
2161 l_int32 orient,
2162 l_int32 *pscore,
2163 PIXA *pixadb)
2164 {
2165 l_int32 empty, nhb, nvb, nvw, score, htfound;
2166 PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
2167
2168 if (!pscore)
2169 return ERROR_INT("&score not defined", __func__, 1);
2170 *pscore = -1;
2171 if (!pixs)
2172 return ERROR_INT("pixs not defined", __func__, 1);
2173
2174 /* Check if there is an image region. First convert to 1 bpp
2175 * at 175 ppi. If an image is found, assume there is no table. */
2176 pix1 = pixPrepare1bpp(pixs, box, 0.1f, 175);
2177 pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL);
2178 if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY);
2179 pixDestroy(&pix1);
2180 pixDestroy(&pix2);
2181 if (htfound) {
2182 *pscore = 0;
2183 L_INFO("pix has an image region\n", __func__);
2184 return 0;
2185 }
2186
2187 /* Crop, convert to 1 bpp, 75 ppi */
2188 if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL)
2189 return ERROR_INT("pix1 not made", __func__, 1);
2190
2191 pixZero(pix1, &empty);
2192 if (empty) {
2193 *pscore = 0;
2194 pixDestroy(&pix1);
2195 L_INFO("pix is empty\n", __func__);
2196 return 0;
2197 }
2198
2199 /* The 2x2 dilation on 75 ppi makes these two approaches very similar:
2200 * (1) pix1 = pixPrepare1bpp(..., 300); // 300 ppi resolution
2201 * pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
2202 * (2) pix1 = pixPrepare1bpp(..., 75); // 75 ppi resolution
2203 * pix2 = pixDilateBrick(NULL, pix1, 2, 2);
2204 * But (2) is more efficient if the input image to pixPrepare1bpp()
2205 * is not at 300 ppi. */
2206 pix2 = pixDilateBrick(NULL, pix1, 2, 2);
2207
2208 /* Deskew both horizontally and vertically; rotate by 90
2209 * degrees if in landscape mode. */
2210 pix3 = pixDeskewBoth(pix2, 1);
2211 if (pixadb) {
2212 pixaAddPix(pixadb, pix2, L_COPY);
2213 pixaAddPix(pixadb, pix3, L_COPY);
2214 }
2215 if (orient == L_LANDSCAPE_MODE)
2216 pix4 = pixRotate90(pix3, 1);
2217 else
2218 pix4 = pixClone(pix3);
2219 pixDestroy(&pix1);
2220 pixDestroy(&pix2);
2221 pixDestroy(&pix3);
2222 pix1 = pixClone(pix4);
2223 pixDestroy(&pix4);
2224
2225 /* Look for horizontal and vertical lines */
2226 pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0);
2227 pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8);
2228 pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0);
2229 pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8);
2230 pix6 = pixOr(NULL, pix3, pix5);
2231 if (pixadb) {
2232 pixaAddPix(pixadb, pix2, L_COPY);
2233 pixaAddPix(pixadb, pix4, L_COPY);
2234 pixaAddPix(pixadb, pix3, L_COPY);
2235 pixaAddPix(pixadb, pix5, L_COPY);
2236 pixaAddPix(pixadb, pix6, L_COPY);
2237 }
2238 pixCountConnComp(pix2, 8, &nhb); /* number of horizontal black lines */
2239 pixCountConnComp(pix4, 8, &nvb); /* number of vertical black lines */
2240
2241 /* Remove the lines */
2242 pixSubtract(pix1, pix1, pix6);
2243 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
2244
2245 /* Remove noise pixels */
2246 pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0);
2247 if (pixadb) pixaAddPix(pixadb, pix7, L_COPY);
2248
2249 /* Look for vertical white space. Invert to convert white bg
2250 * to fg. Use a single rank-1 2x reduction, which closes small
2251 * fg holes, for the final processing at 37.5 ppi.
2252 * The vertical opening is then about 3 inches on a 300 ppi image.
2253 * We also remove vertical whitespace that is less than 5 pixels
2254 * wide at this resolution (about 0.1 inches) */
2255 pixInvert(pix7, pix7);
2256 pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0);
2257 pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH,
2258 L_SELECT_IF_GTE, NULL);
2259 pixCountConnComp(pix9, 8, &nvw); /* number of vertical white lines */
2260 if (pixadb) {
2261 pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT);
2262 pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT);
2263 }
2264
2265 /* Require at least 2 of the following 4 conditions for a table.
2266 * Some tables do not have black (fg) lines, and for those we
2267 * require more than 6 long vertical whitespace (bg) lines. */
2268 score = 0;
2269 if (nhb > 1) score++;
2270 if (nvb > 2) score++;
2271 if (nvw > 3) score++;
2272 if (nvw > 6) score++;
2273 *pscore = score;
2274
2275 pixDestroy(&pix1);
2276 pixDestroy(&pix2);
2277 pixDestroy(&pix3);
2278 pixDestroy(&pix4);
2279 pixDestroy(&pix5);
2280 pixDestroy(&pix6);
2281 pixDestroy(&pix7);
2282 pixDestroy(&pix8);
2283 pixDestroy(&pix9);
2284 return 0;
2285 }
2286
2287
2288 /*!
2289 * \brief pixPrepare1bpp()
2290 *
2291 * \param[in] pixs any depth
2292 * \param[in] box [optional] if null, use entire pixs
2293 * \param[in] cropfract fraction to be removed from the boundary;
2294 * use 0.0 to retain the entire image
2295 * \param[in] outres desired resolution of output image; if the
2296 * input image resolution is not set, assume
2297 * 300 ppi; use 0 to skip scaling.
2298 * \return pixd if OK, NULL on error
2299 *
2300 * <pre>
2301 * Notes:
2302 * (1) This handles some common pre-processing operations,
2303 * where the page segmentation algorithm takes a 1 bpp image.
2304 * </pre>
2305 */
2306 PIX *
2307 pixPrepare1bpp(PIX *pixs,
2308 BOX *box,
2309 l_float32 cropfract,
2310 l_int32 outres)
2311 {
2312 l_int32 w, h, res;
2313 l_float32 factor;
2314 BOX *box1;
2315 PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2316
2317 if (!pixs)
2318 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2319
2320 /* Crop the image. If no box is given, use %cropfract to remove
2321 * pixels near the image boundary; this helps avoid false
2322 * negatives from noise that is often found there. */
2323 if (box) {
2324 pix1 = pixClipRectangle(pixs, box, NULL);
2325 } else {
2326 pixGetDimensions(pixs, &w, &h, NULL);
2327 box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h),
2328 (l_int32)((1.0 - 2 * cropfract) * w),
2329 (l_int32)((1.0 - 2 * cropfract) * h));
2330 pix1 = pixClipRectangle(pixs, box1, NULL);
2331 boxDestroy(&box1);
2332 }
2333
2334 /* Convert to 1 bpp with adaptive background cleaning */
2335 if (pixGetDepth(pixs) > 1) {
2336 pix2 = pixConvertTo8(pix1, 0);
2337 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160);
2338 pixDestroy(&pix1);
2339 pixDestroy(&pix2);
2340 if (!pix3) {
2341 L_INFO("pix cleaning failed\n", __func__);
2342 return NULL;
2343 }
2344 pix4 = pixThresholdToBinary(pix3, 200);
2345 pixDestroy(&pix3);
2346 } else {
2347 pix4 = pixClone(pix1);
2348 pixDestroy(&pix1);
2349 }
2350
2351 /* Scale the image to the requested output resolution;
2352 do not scale if %outres <= 0 */
2353 if (outres <= 0)
2354 return pix4;
2355 if ((res = pixGetXRes(pixs)) == 0) {
2356 L_WARNING("Resolution is not set: using 300 ppi\n", __func__);
2357 res = 300;
2358 }
2359 if (res != outres) {
2360 factor = (l_float32)outres / (l_float32)res;
2361 pix5 = pixScale(pix4, factor, factor);
2362 } else {
2363 pix5 = pixClone(pix4);
2364 }
2365 pixDestroy(&pix4);
2366 return pix5;
2367 }
2368
2369
2370 /*------------------------------------------------------------------*
2371 * Estimate the grayscale background value *
2372 *------------------------------------------------------------------*/
2373 /*!
2374 * \brief pixEstimateBackground()
2375 *
2376 * \param[in] pixs 8 bpp, with or without colormap
2377 * \param[in] darkthresh pixels below this value are never considered
2378 * part of the background; typ. 70; use 0 to skip
2379 * \param[in] edgecrop fraction of half-width on each side, and of
2380 * half-height at top and bottom, that are cropped
2381 * \param[out] pbg estimated background, or 0 on error
2382 * \return 0 if OK, 1 on error
2383 *
2384 * <pre>
2385 * Notes:
2386 * (1) Caller should check that return bg value is > 0.
2387 * </pre>
2388 */
2389 l_ok
2390 pixEstimateBackground(PIX *pixs,
2391 l_int32 darkthresh,
2392 l_float32 edgecrop,
2393 l_int32 *pbg)
2394 {
2395 l_int32 w, h, sampling;
2396 l_float32 fbg;
2397 BOX *box;
2398 PIX *pix1, *pix2, *pixm;
2399
2400 if (!pbg)
2401 return ERROR_INT("&bg not defined", __func__, 1);
2402 *pbg = 0;
2403 if (!pixs || pixGetDepth(pixs) != 8)
2404 return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1);
2405 if (darkthresh > 128)
2406 L_WARNING("darkthresh unusually large\n", __func__);
2407 if (edgecrop < 0.0 || edgecrop >= 1.0)
2408 return ERROR_INT("edgecrop not in [0.0 ... 1.0)", __func__, 1);
2409
2410 pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE);
2411 pixGetDimensions(pix1, &w, &h, NULL);
2412
2413 /* Optionally crop inner part of image */
2414 if (edgecrop > 0.0) {
2415 box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h,
2416 (1.0 - edgecrop) * w, (1.0 - edgecrop) * h);
2417 pix2 = pixClipRectangle(pix1, box, NULL);
2418 boxDestroy(&box);
2419 } else {
2420 pix2 = pixClone(pix1);
2421 }
2422
2423 /* We will use no more than 50K samples */
2424 sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5));
2425
2426 /* Optionally make a mask over all pixels lighter than %darkthresh */
2427 pixm = NULL;
2428 if (darkthresh > 0) {
2429 pixm = pixThresholdToBinary(pix2, darkthresh);
2430 pixInvert(pixm, pixm);
2431 }
2432
2433 pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL);
2434 *pbg = (l_int32)(fbg + 0.5);
2435 pixDestroy(&pix1);
2436 pixDestroy(&pix2);
2437 pixDestroy(&pixm);
2438 return 0;
2439 }
2440
2441
2442 /*---------------------------------------------------------------------*
2443 * Largest white or black rectangles in an image *
2444 *---------------------------------------------------------------------*/
2445 /*!
2446 * \brief pixFindLargeRectangles()
2447 *
2448 * \param[in] pixs 1 bpp
2449 * \param[in] polarity 0 within background, 1 within foreground
2450 * \param[in] nrect number of rectangles to be found
2451 * \param[out] pboxa largest rectangles, sorted by decreasing area
2452 * \param[in,out] ppixdb optional return output with rectangles drawn on it
2453 * \return 0 if OK, 1 on error
2454 *
2455 * <pre>
2456 * Notes:
2457 * (1) This does a greedy search to find the largest rectangles,
2458 * either black or white and without overlaps, in %pix.
2459 * (2) See pixFindLargestRectangle(), which is called multiple
2460 * times, for details. On each call, the largest rectangle
2461 * found is painted, so that none of its pixels can be
2462 * used later, before calling it again.
2463 * (3) This function is surprisingly fast. Although
2464 * pixFindLargestRectangle() runs at about 50 MPix/sec, when it
2465 * is run multiple times by pixFindLargeRectangles(), it processes
2466 * at 150 - 250 MPix/sec, and the time is approximately linear
2467 * in %nrect. For example, for a 1 MPix image, searching for
2468 * the largest 50 boxes takes about 0.2 seconds.
2469 * </pre>
2470 */
2471 l_ok
2472 pixFindLargeRectangles(PIX *pixs,
2473 l_int32 polarity,
2474 l_int32 nrect,
2475 BOXA **pboxa,
2476 PIX **ppixdb)
2477 {
2478 l_int32 i, op, bx, by, bw, bh;
2479 BOX *box;
2480 BOXA *boxa;
2481 PIX *pix;
2482
2483 if (ppixdb) *ppixdb = NULL;
2484 if (!pboxa)
2485 return ERROR_INT("&boxa not defined", __func__, 1);
2486 *pboxa = NULL;
2487 if (!pixs || pixGetDepth(pixs) != 1)
2488 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
2489 if (polarity != 0 && polarity != 1)
2490 return ERROR_INT("invalid polarity", __func__, 1);
2491 if (nrect > 1000) {
2492 L_WARNING("large num rectangles = %d requested; using 1000\n",
2493 __func__, nrect);
2494 nrect = 1000;
2495 }
2496
2497 pix = pixCopy(NULL, pixs);
2498 boxa = boxaCreate(nrect);
2499 *pboxa = boxa;
2500
2501 /* Sequentially find largest rectangle and fill with opposite color */
2502 for (i = 0; i < nrect; i++) {
2503 if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) {
2504 boxDestroy(&box);
2505 L_ERROR("failure in pixFindLargestRectangle\n", __func__);
2506 break;
2507 }
2508 boxaAddBox(boxa, box, L_INSERT);
2509 op = (polarity == 0) ? PIX_SET : PIX_CLR;
2510 boxGetGeometry(box, &bx, &by, &bw, &bh);
2511 pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0);
2512 }
2513
2514 if (ppixdb)
2515 *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3);
2516
2517 pixDestroy(&pix);
2518 return 0;
2519 }
2520
2521
2522 /*!
2523 * \brief pixFindLargestRectangle()
2524 *
2525 * \param[in] pixs 1 bpp
2526 * \param[in] polarity 0 within background, 1 within foreground
2527 * \param[out] pbox largest area rectangle
2528 * \param[in,out] ppixdb optional return output with rectangle drawn on it
2529 * \return 0 if OK, 1 on error
2530 *
2531 * <pre>
2532 * Notes:
2533 * (1) This is a simple and elegant solution to a problem in
2534 * computational geometry that at first appears to be quite
2535 * difficult: what is the largest rectangle that can be
2536 * placed in the image, covering only pixels of one polarity
2537 * (bg or fg)? The solution is O(n), where n is the number
2538 * of pixels in the image, and it requires nothing more than
2539 * using a simple recursion relation in a single sweep of the image.
2540 * (2) In a sweep from UL to LR with left-to-right being the fast
2541 * direction, calculate the largest white rectangle at (x, y),
2542 * using previously calculated values at pixels #1 and #2:
2543 * #1: (x, y - 1)
2544 * #2: (x - 1, y)
2545 * We also need the most recent "black" pixels that were seen
2546 * in the current row and column.
2547 * Consider the largest area. There are only two possibilities:
2548 * (a) Min(w(1), horizdist) * (h(1) + 1)
2549 * (b) Min(h(2), vertdist) * (w(2) + 1)
2550 * where
2551 * horizdist: the distance from the rightmost "black" pixel seen
2552 * in the current row across to the current pixel
2553 * vertdist: the distance from the lowest "black" pixel seen
2554 * in the current column down to the current pixel
2555 * and we choose the Max of (a) and (b).
2556 * (3) To convince yourself that these recursion relations are correct,
2557 * it helps to draw the maximum rectangles at #1 and #2.
2558 * Then for #1, you try to extend the rectangle down one line,
2559 * so that the height is h(1) + 1. Do you get the full
2560 * width of #1, w(1)? It depends on where the black pixels are
2561 * in the current row. You know the final width is bounded by w(1)
2562 * and w(2) + 1, but the actual value depends on the distribution
2563 * of black pixels in the current row that are at a distance
2564 * from the current pixel that is between these limits.
2565 * We call that value "horizdist", and the area is then given
2566 * by the expression (a) above. Using similar reasoning for #2,
2567 * where you attempt to extend the rectangle to the right
2568 * by 1 pixel, you arrive at (b). The largest rectangle is
2569 * then found by taking the Max.
2570 * </pre>
2571 */
2572 l_ok
2573 pixFindLargestRectangle(PIX *pixs,
2574 l_int32 polarity,
2575 BOX **pbox,
2576 PIX **ppixdb)
2577 {
2578 l_int32 i, j, w, h, d, wpls, val;
2579 l_int32 wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2;
2580 l_int32 xmax, ymax; /* LR corner of the largest rectangle */
2581 l_int32 maxarea, wmax, hmax, vertdist, horizdist, prevfg;
2582 l_int32 *lowestfg;
2583 l_uint32 *datas, *lines;
2584 l_uint32 **linew, **lineh;
2585 BOX *box;
2586 PIX *pixw, *pixh; /* keeps the width and height for the largest */
2587 /* rectangles whose LR corner is located there. */
2588
2589 if (ppixdb) *ppixdb = NULL;
2590 if (!pbox)
2591 return ERROR_INT("&box not defined", __func__, 1);
2592 *pbox = NULL;
2593 if (!pixs)
2594 return ERROR_INT("pixs not defined", __func__, 1);
2595 pixGetDimensions(pixs, &w, &h, &d);
2596 if (d != 1)
2597 return ERROR_INT("pixs not 1 bpp", __func__, 1);
2598 if (polarity != 0 && polarity != 1)
2599 return ERROR_INT("invalid polarity", __func__, 1);
2600
2601 /* Initialize lowest "fg" seen so far for each column */
2602 lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32));
2603 for (i = 0; i < w; i++)
2604 lowestfg[i] = -1;
2605
2606 /* The combination (val ^ polarity) is the color for which we
2607 * are searching for the maximum rectangle. For polarity == 0,
2608 * we search in the bg (white). */
2609 pixw = pixCreate(w, h, 32); /* stores width */
2610 pixh = pixCreate(w, h, 32); /* stores height */
2611 linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL);
2612 lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL);
2613 datas = pixGetData(pixs);
2614 wpls = pixGetWpl(pixs);
2615 maxarea = xmax = ymax = wmax = hmax = 0;
2616 for (i = 0; i < h; i++) {
2617 lines = datas + i * wpls;
2618 prevfg = -1;
2619 for (j = 0; j < w; j++) {
2620 val = GET_DATA_BIT(lines, j);
2621 if ((val ^ polarity) == 0) { /* bg (0) if polarity == 0, etc. */
2622 if (i == 0 && j == 0) {
2623 wp = hp = 1;
2624 } else if (i == 0) {
2625 wp = linew[i][j - 1] + 1;
2626 hp = 1;
2627 } else if (j == 0) {
2628 wp = 1;
2629 hp = lineh[i - 1][j] + 1;
2630 } else {
2631 /* Expand #1 prev rectangle down */
2632 w1 = linew[i - 1][j];
2633 h1 = lineh[i - 1][j];
2634 horizdist = j - prevfg;
2635 wmin = L_MIN(w1, horizdist); /* width of new rectangle */
2636 area1 = wmin * (h1 + 1);
2637
2638 /* Expand #2 prev rectangle to right */
2639 w2 = linew[i][j - 1];
2640 h2 = lineh[i][j - 1];
2641 vertdist = i - lowestfg[j];
2642 hmin = L_MIN(h2, vertdist); /* height of new rectangle */
2643 area2 = hmin * (w2 + 1);
2644
2645 if (area1 > area2) {
2646 wp = wmin;
2647 hp = h1 + 1;
2648 } else {
2649 wp = w2 + 1;
2650 hp = hmin;
2651 }
2652 }
2653 } else { /* fg (1) if polarity == 0; bg (0) if polarity == 1 */
2654 prevfg = j;
2655 lowestfg[j] = i;
2656 wp = hp = 0;
2657 }
2658 linew[i][j] = wp;
2659 lineh[i][j] = hp;
2660 if (wp * hp > maxarea) {
2661 maxarea = wp * hp;
2662 xmax = j;
2663 ymax = i;
2664 wmax = wp;
2665 hmax = hp;
2666 }
2667 }
2668 }
2669
2670 /* Translate from LR corner to Box coords (UL corner, w, h) */
2671 box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax);
2672 *pbox = box;
2673
2674 if (ppixdb) {
2675 *ppixdb = pixConvertTo8(pixs, TRUE);
2676 pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0);
2677 }
2678
2679 LEPT_FREE(linew);
2680 LEPT_FREE(lineh);
2681 LEPT_FREE(lowestfg);
2682 pixDestroy(&pixw);
2683 pixDestroy(&pixh);
2684 return 0;
2685 }
2686
2687
2688 /*---------------------------------------------------------------------*
2689 * Generate rectangle inside connected component *
2690 *---------------------------------------------------------------------*/
2691 /*!
2692 * \brief pixFindRectangleInCC()
2693 *
2694 * \param[in] pixs 1 bpp, with sufficient closings to make the fg be
2695 * a single c.c. that is a convex hull
2696 * \param[in] boxs [optional] if NULL, %pixs should be a minimum
2697 * container of a single c.c.
2698 * \param[in] fract first and all consecutive lines found must be at
2699 * least this fraction of the fast scan dimension
2700 * \param[in] dir L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of
2701 * fast scan
2702 * \param[in] select L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION,
2703 * L_LARGEST_AREA, L_SMALEST_AREA
2704 * \param[in] debug if 1, generates output pdf showing intermediate
2705 * computation and final result
2706 * \return box of included rectangle, or NULL on error
2707 *
2708 * <pre>
2709 * Notes:
2710 * (1) Computation is similar to pixFindLargestRectangle(), but allows
2711 * a different set of results to choose from.
2712 * (2) Select the fast scan direction. Then, scanning in the slow
2713 * direction, find the longest run of ON pixels in the fast
2714 * scan direction and look for the first run that is longer
2715 * than %fract of the dimension. Continue until a shorter run
2716 * is found. This generates a box of ON pixels fitting into the c.c.
2717 * (3) Do this from both slow scan directions and use %select to get
2718 * a resulting box from these two.
2719 * (4) The extracted rectangle is not necessarily the largest that
2720 * can fit in the c.c. To get that, use pixFindLargestRectangle().
2721 */
2722 BOX *
2723 pixFindRectangleInCC(PIX *pixs,
2724 BOX *boxs,
2725 l_float32 fract,
2726 l_int32 dir,
2727 l_int32 select,
2728 l_int32 debug)
2729 {
2730 l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res;
2731 l_int32 xfirst, xlast, xstart, yfirst, ylast, length;
2732 BOX *box1, *box2, *box3, *box4, *box5;
2733 PIX *pix1, *pix2, *pixdb1, *pixdb2;
2734 PIXA *pixadb;
2735
2736 if (!pixs || pixGetDepth(pixs) != 1)
2737 return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
2738 if (fract <= 0.0 || fract > 1.0)
2739 return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL);
2740 if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
2741 return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL);
2742 if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
2743 select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
2744 return (BOX *)ERROR_PTR("invalid select", __func__, NULL);
2745
2746 /* Extract the c.c. if necessary */
2747 x = y = 0;
2748 if (boxs) {
2749 pix1 = pixClipRectangle(pixs, boxs, NULL);
2750 boxGetGeometry(boxs, &x, &y, NULL, NULL);
2751 } else {
2752 pix1 = pixClone(pixs);
2753 }
2754
2755 /* All fast scans are horizontal; rotate 90 deg cw if necessary */
2756 if (dir == L_SCAN_VERTICAL)
2757 pix2 = pixRotate90(pix1, 1);
2758 else /* L_SCAN_HORIZONTAL */
2759 pix2 = pixClone(pix1);
2760 pixGetDimensions(pix2, &w, &h, NULL);
2761
2762 pixadb = (debug) ? pixaCreate(0) : NULL;
2763 pixdb1 = NULL;
2764 if (pixadb) {
2765 lept_mkdir("lept/rect");
2766 pixaAddPix(pixadb, pix1, L_CLONE);
2767 pixdb1 = pixConvertTo32(pix2);
2768 }
2769 pixDestroy(&pix1);
2770
2771 /* Scanning down, find the first scanline with a long enough run.
2772 * That run goes from (xfirst, yfirst) to (xlast, yfirst). */
2773 found = FALSE;
2774 for (i = 0; i < h; i++) {
2775 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2776 if (length >= (l_int32)(fract * w + 0.5)) {
2777 yfirst = i;
2778 xfirst = xstart;
2779 xlast = xfirst + length - 1;
2780 found = TRUE;
2781 break;
2782 }
2783 }
2784 if (!found) {
2785 L_WARNING("no run of sufficient size was found\n", __func__);
2786 pixDestroy(&pix2);
2787 pixDestroy(&pixdb1);
2788 pixaDestroy(&pixadb);
2789 return NULL;
2790 }
2791
2792 /* Continue down until the condition fails */
2793 w1 = xlast - xfirst + 1;
2794 h1 = h - yfirst; /* init */
2795 ylast = h - 1; /* init */
2796 for (i = yfirst + 1; i < h; i++) {
2797 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2798 if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2799 i == h - 1) {
2800 ylast = i - 1;
2801 h1 = ylast - yfirst + 1;
2802 break;
2803 }
2804 }
2805 box1 = boxCreate(xfirst, yfirst, w1, h1);
2806
2807 /* Scanning up, find the first scanline with a long enough run.
2808 * That run goes from (xfirst, ylast) to (xlast, ylast). */
2809 for (i = h - 1; i >= 0; i--) {
2810 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2811 if (length >= (l_int32)(fract * w + 0.5)) {
2812 ylast = i;
2813 xfirst = xstart;
2814 xlast = xfirst + length - 1;
2815 break;
2816 }
2817 }
2818
2819 /* Continue up until the condition fails */
2820 w2 = xlast - xfirst + 1;
2821 h2 = ylast + 1; /* initialize */
2822 for (i = ylast - 1; i >= 0; i--) {
2823 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2824 if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2825 i == 0) {
2826 yfirst = i + 1;
2827 h2 = ylast - yfirst + 1;
2828 break;
2829 }
2830 }
2831 box2 = boxCreate(xfirst, yfirst, w2, h2);
2832 pixDestroy(&pix2);
2833
2834 if (pixadb) {
2835 pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
2836 pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
2837 pixaAddPix(pixadb, pixdb1, L_INSERT);
2838 }
2839
2840 /* Select the final result from the two boxes */
2841 if (select == L_GEOMETRIC_UNION)
2842 box3 = boxBoundingRegion(box1, box2);
2843 else if (select == L_GEOMETRIC_INTERSECTION)
2844 box3 = boxOverlapRegion(box1, box2);
2845 else if (select == L_LARGEST_AREA)
2846 box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2847 else /* select == L_SMALLEST_AREA) */
2848 box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2849 boxDestroy(&box1);
2850 boxDestroy(&box2);
2851
2852 /* Rotate the box 90 degrees ccw if necessary */
2853 box4 = NULL;
2854 if (box3) {
2855 if (dir == L_SCAN_VERTICAL)
2856 box4 = boxRotateOrth(box3, w, h, 3);
2857 else
2858 box4 = boxCopy(box3);
2859 }
2860
2861 /* Transform back to global coordinates if %boxs exists */
2862 box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
2863 boxDestroy(&box3);
2864 boxDestroy(&box4);
2865
2866 /* Debug output */
2867 if (pixadb) {
2868 pixdb1 = pixConvertTo8(pixs, 0);
2869 pixAddConstantGray(pixdb1, 190);
2870 pixdb2 = pixConvertTo32(pixdb1);
2871 if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
2872 pixaAddPix(pixadb, pixdb2, L_INSERT);
2873 res = pixGetXRes(pixs);
2874 L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__);
2875 pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
2876 "/tmp/lept/rect/fitrect.pdf");
2877 pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
2878 pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
2879 pixDestroy(&pix1);
2880 pixDestroy(&pixdb1);
2881 pixaDestroy(&pixadb);
2882 }
2883
2884 return box5;
2885 }
2886
2887 /*------------------------------------------------------------------*
2888 * Automatic photoinvert for OCR *
2889 *------------------------------------------------------------------*/
2890 /*!
2891 * \brief pixAutoPhotoinvert()
2892 *
2893 * \param[in] pixs any depth, colormap ok
2894 * \param[in] thresh binarization threshold; use 0 for default
2895 * \param[out] ppixm [optional] image regions to be inverted
2896 * \param[out] pixadb [optional] debug; input NULL to skip
2897 * \return pixd 1 bpp image to be sent to OCR, or NULL on error
2898 *
2899 * <pre>
2900 * Notes:
2901 * (1) A 1 bpp image is returned, where pixels in image regions are
2902 * photo-inverted.
2903 * (2) If there is light text with a dark background, this will
2904 * identify the region and photoinvert the pixels there if
2905 * there are at least 60% fg pixels in the region.
2906 * (3) For debug output, input a (typically empty) %pixadb.
2907 * </pre>
2908 */
2909 PIX *
2910 pixAutoPhotoinvert(PIX *pixs,
2911 l_int32 thresh,
2912 PIX **ppixm,
2913 PIXA *pixadb)
2914 {
2915 l_int32 i, n, empty, x, y, w, h;
2916 l_float32 fgfract;
2917 BOX *box1;
2918 BOXA *boxa1;
2919 PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2920
2921 if (ppixm) *ppixm = NULL;
2922 if (!pixs)
2923 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2924 if (thresh == 0) thresh = 128;
2925
2926 if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL)
2927 return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL);
2928 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
2929
2930 /* Identify regions for photo-inversion:
2931 * (1) Start with the halftone mask.
2932 * (2) Eliminate ordinary text and halftones in the mask.
2933 * (3) Some regions of inverted text may have been removed in
2934 * steps (1) and (2). Conditionally fill holes in the mask,
2935 * but do not fill out to the bounding rect. */
2936 pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb);
2937 pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0); /* remove noise */
2938 pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0);
2939 if (pixadb) {
2940 pixaAddPix(pixadb, pix2, L_CLONE);
2941 pixaAddPix(pixadb, pix3, L_CLONE);
2942 pixaAddPix(pixadb, pix4, L_COPY);
2943 }
2944 pixDestroy(&pix2);
2945 pixDestroy(&pix3);
2946 pixZero(pix4, &empty);
2947 if (empty) {
2948 pixDestroy(&pix4);
2949 return pix1;
2950 }
2951
2952 /* Examine each component and validate the inversion.
2953 * Require at least 60% of pixels under each component to be FG. */
2954 boxa1 = pixConnCompBB(pix4, 8);
2955 n = boxaGetCount(boxa1);
2956 for (i = 0; i < n; i++) {
2957 box1 = boxaGetBox(boxa1, i, L_COPY);
2958 pix5 = pixClipRectangle(pix1, box1, NULL);
2959 pixForegroundFraction(pix5, &fgfract);
2960 if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract);
2961 boxGetGeometry(box1, &x, &y, &w, &h);
2962 if (fgfract < 0.6) /* erase from the mask */
2963 pixRasterop(pix4, x, y, w, h, PIX_CLR, NULL, 0, 0);
2964 pixDestroy(&pix5);
2965 boxDestroy(&box1);
2966 }
2967 boxaDestroy(&boxa1);
2968 pixZero(pix4, &empty);
2969 if (empty) {
2970 pixDestroy(&pix4);
2971 return pix1;
2972 }
2973
2974 /* Combine pixels of the photo-inverted pix with the binarized input */
2975 pix5 = pixInvert(NULL, pix1);
2976 pixCombineMasked(pix1, pix5, pix4);
2977
2978 if (pixadb) {
2979 pixaAddPix(pixadb, pix5, L_CLONE);
2980 pixaAddPix(pixadb, pix1, L_COPY);
2981 }
2982 pixDestroy(&pix5);
2983 if (ppixm)
2984 *ppixm = pix4;
2985 else
2986 pixDestroy(&pix4);
2987 return pix1;
2988 }