comparison mupdf-source/thirdparty/leptonica/src/flipdetect.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file flipdetect.c
29 * <pre>
30 *
31 * High-level interface for detection and correction
32 * PIX *pixOrientCorrect()
33 *
34 * Page orientation detection (pure rotation by 90 degree increments):
35 * l_int32 pixOrientDetect()
36 * l_int32 makeOrientDecision()
37 * l_int32 pixUpDownDetect()
38 *
39 * Page mirror detection (flip 180 degrees about line in plane of image):
40 * l_int32 pixMirrorDetect()
41 *
42 * Static debug helper
43 * static void pixDebugFlipDetect()
44 *
45 * ===================================================================
46 *
47 * Page transformation detection:
48 *
49 * Once a page is deskewed, there are 8 possible states that it
50 * can be in, shown symbolically below. Suppose state 0 is correct.
51 *
52 * 0: correct 1 2 3
53 * +------+ +------+ +------+ +------+
54 * | **** | | * | | **** | | * |
55 * | * | | * | | * | | * |
56 * | * | | **** | | * | | **** |
57 * +------+ +------+ +------+ +------+
58 *
59 * 4 5 6 7
60 * +-----+ +-----+ +-----+ +-----+
61 * | *** | | * | | *** | | * |
62 * | * | | * | | * | | * |
63 * | * | | * | | * | | * |
64 * | * | | *** | | * | | *** |
65 * +-----+ +-----+ +-----+ +-----+
66 *
67 * Each of the other seven can be derived from state 0 by applying some
68 * combination of a 90 degree clockwise rotation, a flip about
69 * a horizontal line, and a flip about a vertical line,
70 * all abbreviated as:
71 * R = Rotation (about a line perpendicular to the image)
72 * H = Horizontal flip (about a vertical line in the plane of the image)
73 * V = Vertical flip (about a horizontal line in the plane of the image)
74 *
75 * We get these transformations:
76 * RHV
77 * 000 -> 0
78 * 001 -> 1
79 * 010 -> 2
80 * 011 -> 3
81 * 100 -> 4
82 * 101 -> 5
83 * 110 -> 6
84 * 111 -> 7
85 *
86 * Note that in four of these, the sum of H and V is 1 (odd).
87 * For these four, we have a change in parity (handedness) of
88 * the image, and the transformation cannot be performed by
89 * rotation about a vertical line out of the page. Under
90 * rotation R, the set of 8 transformations decomposes into
91 * two subgroups linking {0, 3, 4, 7} and {1, 2, 5, 6} independently.
92 *
93 * pixOrientDetect() tests for a pure rotation (0, 90, 180, 270 degrees).
94 * It doesn't change parity.
95 *
96 * pixMirrorDetect() tests for a horizontal flip about the vertical axis.
97 * It changes parity.
98 *
99 * The landscape/portrait rotation can be detected in two ways:
100 *
101 * (1) Compute the deskew confidence for an image segment,
102 * both as is and rotated 90 degrees (see skew.c).
103 *
104 * (2) Compute the ascender/descender signal for the image,
105 * both as is and rotated 90 degrees (implemented here).
106 *
107 * The ascender/descender signal is useful for determining text
108 * orientation in Roman alphabets because the incidence of letters
109 * with straight-line ascenders (b, d, h, k, l, 't') outnumber
110 * those with descenders ('g', p, q). The letters 't' and 'g'
111 * will respond variably to the filter, depending on the type face.
112 *
113 * What about the mirror image situations? These aren't common
114 * unless you're dealing with film, for example.
115 * But you can reliably test if the image has undergone a
116 * parity-changing flip once about some axis in the plane
117 * of the image, using pixMirrorDetect*(). This works ostensibly by
118 * counting the number of characters with ascenders that
119 * stick out to the left and right of the ascender. Characters
120 * that are not mirror flipped are more likely to extend to the
121 * right (b, h, k) than to the left (d). Of course, that is for
122 * text that is rightside-up. So before you apply the mirror
123 * test, it is necessary to insure that the text has the ascenders
124 * going up, and not down or to the left or right. But here's
125 * what *really* happens. It turns out that the pre-filtering before
126 * the hit-miss transform (HMT) is crucial, and surprisingly, when
127 * the pre-filtering is chosen to generate a large signal, the majority
128 * of the signal comes from open regions of common lower-case
129 * letters such as 'e', 'c' and 'f'.
130 *
131 * The set of operations you actually use depends on your prior knowledge:
132 *
133 * (1) If the page is known to be either rightside-up or upside-down, use
134 * either pixOrientDetect() with pleftconf = NULL, or
135 * pixUpDownDetect().
136 *
137 * (2) If any of the four orientations are possible, use pixOrientDetect().
138 *
139 * (3) If the text is horizontal and rightside-up, the only remaining
140 * degree of freedom is a left-right mirror flip: use pixMirrorDetect().
141 *
142 * (4) If you have a relatively large amount of numbers on the page,
143 * use the slower pixUpDownDetect().
144 *
145 * We summarize the full orientation and mirror flip detection process:
146 *
147 * (1) First determine which of the four 90 degree rotations
148 * causes the text to be rightside-up. This can be done
149 * with either skew confidence or the pixOrientDetect()
150 * signals. For the latter, see the table for pixOrientDetect().
151 *
152 * (2) Then, with ascenders pointing up, apply pixMirrorDetect().
153 * In the normal situation the confidence confidence will be
154 * large and positive. However, if mirror flipped, the
155 * confidence will be large and negative.
156 *
157 * A high-level interface, pixOrientCorrect() combines the detection
158 * of the orientation with the rotation decision and the rotation itself.
159 *
160 * The structuring elements used for text orientation detection require text
161 * with ascenders and descenders. They have been designed to work best
162 * with normal sized text (about 10 pt font), scanned with a resolution
163 * between 150 and 300 ppi.
164 *
165 * For pedagogical reasons, we have included a dwa implementation of
166 * this functionality, in flipdetectdwa.c.notused. It shows by example
167 * how to make a dwa implementation of an application that uses binary
168 * morphological operations. It is faster than the rasterop implementation,
169 * but not by a large amount.
170 *
171 * The generation of flipdetectdwa.c.notused was achieved as follows:
172 * (1) The program flipselgen.c.notused generates the DWA code, in two C files
173 * (2) The low-level DWA code in those two files was put into a single
174 * file, fliphmtgen.c.notused, for clarity. We didn't want the two
175 * files (fmorphgen.3.c and fmorphgenlow.3.c) sitting around and
176 * possibly causing confusion.
177 * (3) This low-level code was directly incorporated into flipdetectdwa.c,
178 * where it substitutes for the basic rasterop code in flipdetect.c.
179 *
180 * Finally, use can be made of programs such as exiftool and convert to
181 * read exif camera orientation data in jpeg files and conditionally rotate.
182 * Here is an example shell script, made by Dan9er:
183 * ==================================================================
184 * #!/bin/sh
185 * # orientByExif.sh
186 * # Dependencies: exiftool (exiflib) and convert (ImageMagick)
187 * # Note: if there is no exif orientation data in the jpeg file,
188 * # this simply copies the input file.
189 * #
190 * if [[ -z $(command -v exiftool) || -z $(command -v convert) ]]; then
191 * echo "You need to install dependencies; e.g.:"
192 * echo " sudo apt install libimage-exiftool-perl"
193 * echo " sudo apt install imagemagick"
194 * exit 1
195 * fi
196 * if [[ $# != 2 ]]; then
197 * echo "Syntax: orientByExif infile outfile"
198 * exit 2
199 * fi
200 * if [[ ${1: -4} != ".jpg" ]]; then
201 * echo "File is not a jpeg"
202 * exit 3
203 * fi
204 * if [[ $(exiftool -s3 -n -Orientation "$1") = 1 ]]; then
205 * echo "Image is already upright"
206 * exit 0
207 * fi
208 * convert "$1" -auto-orient "$2"
209 * echo "Done"
210 * exit 0
211 * ==================================================================
212 * </pre>
213 */
214
215 #ifdef HAVE_CONFIG_H
216 #include <config_auto.h>
217 #endif /* HAVE_CONFIG_H */
218
219 #include <math.h>
220 #include "allheaders.h"
221
222 /* Sels for pixOrientDetect() and pixMirrorDetect() */
223 static const char *textsel1 = "x oo "
224 "x oOo "
225 "x o "
226 "x "
227 "xxxxxx";
228
229 static const char *textsel2 = " oo x"
230 " oOo x"
231 " o x"
232 " x"
233 "xxxxxx";
234
235 static const char *textsel3 = "xxxxxx"
236 "x "
237 "x o "
238 "x oOo "
239 "x oo ";
240
241 static const char *textsel4 = "xxxxxx"
242 " x"
243 " o x"
244 " oOo x"
245 " oo x";
246
247 /* Parameters for determining orientation */
248 static const l_int32 DefaultMinUpDownCount = 70;
249 static const l_float32 DefaultMinUpDownConf = 8.0;
250 static const l_float32 DefaultMinUpDownRatio = 2.5;
251
252 /* Parameters for determining mirror flip */
253 static const l_int32 DefaultMinMirrorFlipCount = 100;
254 static const l_float32 DefaultMinMirrorFlipConf = 5.0;
255
256 /* Static debug function */
257 static void pixDebugFlipDetect(const char *filename, PIX *pixs,
258 PIX *pixhm, l_int32 enable);
259
260
261 /*----------------------------------------------------------------*
262 * High-level interface for detection and correction *
263 *----------------------------------------------------------------*/
264 /*!
265 * \brief pixOrientCorrect()
266 *
267 * \param[in] pixs 1 bpp, deskewed, Roman text, 150 - 300 ppi
268 * \param[in] minupconf minimum value for which a decision can be made
269 * \param[in] minratio minimum conf ratio required for a decision
270 * \param[out] pupconf [optional] ; use NULL to skip
271 * \param[out] pleftconf [optional] ; use NULL to skip
272 * \param[out] protation [optional] ; use NULL to skip
273 * \param[in] debug 1 for debug output; 0 otherwise
274 * \return pixd may be rotated by 90, 180 or 270; null on error
275 *
276 * <pre>
277 * Notes:
278 * (1) Simple top-level function to detect if Roman text is in
279 * reading orientation, and to rotate the image accordingly if not.
280 * (2) Returns a copy if no rotation is needed.
281 * (3) See notes for pixOrientDetect() and pixOrientDecision().
282 * Use 0.0 for default values for %minupconf and %minratio
283 * (4) Optional output of intermediate confidence results and
284 * the rotation performed on pixs.
285 * (5) Use on text images with a resolution between 150 and 300 ppi.
286 * </pre>
287 */
288 PIX *
289 pixOrientCorrect(PIX *pixs,
290 l_float32 minupconf,
291 l_float32 minratio,
292 l_float32 *pupconf,
293 l_float32 *pleftconf,
294 l_int32 *protation,
295 l_int32 debug)
296 {
297 l_int32 orient;
298 l_float32 upconf, leftconf;
299 PIX *pix1;
300
301 if (!pixs || pixGetDepth(pixs) != 1)
302 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
303
304 /* Get confidences for orientation */
305 pixUpDownDetect(pixs, &upconf, 0, 0, debug);
306 pix1 = pixRotate90(pixs, 1);
307 pixUpDownDetect(pix1, &leftconf, 0, 0, debug);
308 pixDestroy(&pix1);
309 if (pupconf) *pupconf = upconf;
310 if (pleftconf) *pleftconf = leftconf;
311
312 /* Decide what to do */
313 makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug);
314
315 /* Do it */
316 switch (orient)
317 {
318 case L_TEXT_ORIENT_UNKNOWN:
319 L_INFO("text orientation not determined; no rotation\n", __func__);
320 if (protation) *protation = 0;
321 return pixCopy(NULL, pixs);
322 break;
323 case L_TEXT_ORIENT_UP:
324 L_INFO("text is oriented up; no rotation\n", __func__);
325 if (protation) *protation = 0;
326 return pixCopy(NULL, pixs);
327 break;
328 case L_TEXT_ORIENT_LEFT:
329 L_INFO("landscape; text oriented left; 90 cw rotation\n", __func__);
330 if (protation) *protation = 90;
331 return pixRotateOrth(pixs, 1);
332 break;
333 case L_TEXT_ORIENT_DOWN:
334 L_INFO("text oriented down; 180 cw rotation\n", __func__);
335 if (protation) *protation = 180;
336 return pixRotateOrth(pixs, 2);
337 break;
338 case L_TEXT_ORIENT_RIGHT:
339 L_INFO("landscape; text oriented right; 270 cw rotation\n", __func__);
340 if (protation) *protation = 270;
341 return pixRotateOrth(pixs, 3);
342 break;
343 default:
344 L_ERROR("invalid orient flag!\n", __func__);
345 return pixCopy(NULL, pixs);
346 }
347 }
348
349
350 /*----------------------------------------------------------------*
351 * Orientation detection (four 90 degree angles) *
352 *----------------------------------------------------------------*/
353 /*!
354 * \brief pixOrientDetect()
355 *
356 * \param[in] pixs 1 bpp, deskewed, Roman text, 150 - 300 ppi
357 * \param[out] pupconf [optional] ; may be NULL
358 * \param[out] pleftconf [optional] ; may be NULL
359 * \param[in] mincount min number of up + down; use 0 for default
360 * \param[in] debug 1 for debug output; 0 otherwise
361 * \return 0 if OK, 1 on error
362 *
363 * <pre>
364 * Notes:
365 * (1) See "Measuring document image skew and orientation"
366 * Dan S. Bloomberg, Gary E. Kopec and Lakshmi Dasari
367 * IS&T/SPIE EI'95, Conference 2422: Document Recognition II
368 * pp 302-316, Feb 6-7, 1995, San Jose, CA
369 * (2) upconf is the normalized difference between up ascenders
370 * and down ascenders. The image is analyzed without rotation
371 * for being rightside-up or upside-down. Set &upconf to null
372 * to skip this operation.
373 * (3) leftconf is the normalized difference between up ascenders
374 * and down ascenders in the image after it has been
375 * rotated 90 degrees clockwise. With that rotation, ascenders
376 * projecting to the left in the source image will project up
377 * in the rotated image. We compute this by rotating 90 degrees
378 * clockwise and testing for up and down ascenders. Set
379 * &leftconf to null to skip this operation.
380 * (4) Note that upconf and leftconf are not linear measures of
381 * confidence, e.g., in a range between 0 and 100. They
382 * measure how far you are out on the tail of a (presumably)
383 * normal distribution. For example, a confidence of 10 means
384 * that it is nearly certain that the difference did not
385 * happen at random. However, these values must be interpreted
386 * cautiously, taking into consideration the estimated prior
387 * for a particular orientation or mirror flip. The up-down
388 * signal is very strong if applied to text with ascenders
389 * up and down, and relatively weak for text at 90 degrees,
390 * but even at 90 degrees, the difference can look significant.
391 * For example, suppose the ascenders are oriented horizontally,
392 * but the test is done vertically. Then upconf can
393 * be < -MIN_CONF_FOR_UP_DOWN, suggesting the text may be
394 * upside-down. However, if instead the test were done
395 * horizontally, leftconf will be very much larger
396 * (in absolute value), giving the correct orientation.
397 * (5) If you compute both upconf and leftconf, and there is
398 * sufficient signal, the following table determines the
399 * cw angle necessary to rotate pixs so that the text is
400 * rightside-up:
401 * 0 deg : upconf >> 1, abs(upconf) >> abs(leftconf)
402 * 90 deg : leftconf >> 1, abs(leftconf) >> abs(upconf)
403 * 180 deg : upconf << -1, abs(upconf) >> abs(leftconf)
404 * 270 deg : leftconf << -1, abs(leftconf) >> abs(upconf)
405 * (6) One should probably not interpret the direction unless
406 * there are a sufficient number of counts for both orientations,
407 * in which case neither upconf nor leftconf will be 0.0.
408 * (7) Use on text images with a resolution between 150 and 300 ppi.
409 * (8) This algorithm will fail on some images, such as tables,
410 * where most of the characters are numbers and appear as
411 * uppercase, but there are some repeated words that give a
412 * biased signal. It may be advisable to run a table detector
413 * first (e.g., pixDecideIfTable()), and not run the orientation
414 * detector if it is a table.
415 * (9) Uses rasterop implementation of HMT.
416 * </pre>
417 */
418 l_ok
419 pixOrientDetect(PIX *pixs,
420 l_float32 *pupconf,
421 l_float32 *pleftconf,
422 l_int32 mincount,
423 l_int32 debug)
424 {
425 PIX *pix1;
426
427 if (!pixs || pixGetDepth(pixs) != 1)
428 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
429 if (!pupconf && !pleftconf)
430 return ERROR_INT("nothing to do", __func__, 1);
431 if (mincount == 0)
432 mincount = DefaultMinUpDownCount;
433
434 if (pupconf)
435 pixUpDownDetect(pixs, pupconf, mincount, 0, debug);
436 if (pleftconf) {
437 pix1 = pixRotate90(pixs, 1);
438 pixUpDownDetect(pix1, pleftconf, mincount, 0, debug);
439 pixDestroy(&pix1);
440 }
441
442 return 0;
443 }
444
445
446 /*!
447 * \brief makeOrientDecision()
448 *
449 * \param[in] upconf nonzero
450 * \param[in] leftconf nonzero
451 * \param[in] minupconf minimum value for which a decision can be made
452 * \param[in] minratio minimum conf ratio required for a decision
453 * \param[out] porient text orientation enum {0,1,2,3,4}
454 * \param[in] debug 1 for debug output; 0 otherwise
455 * \return 0 if OK, 1 on error
456 *
457 * <pre>
458 * Notes:
459 * (1) This can be run after pixOrientDetect()
460 * (2) Both upconf and leftconf must be nonzero; otherwise the
461 * orientation cannot be determined.
462 * (3) The abs values of the input confidences are compared to
463 * minupconf.
464 * (4) The abs value of the largest of (upconf/leftconf) and
465 * (leftconf/upconf) is compared with minratio.
466 * (5) Input 0.0 for the default values for minupconf and minratio.
467 * (6) The return value of orient is interpreted thus:
468 * L_TEXT_ORIENT_UNKNOWN: not enough evidence to determine
469 * L_TEXT_ORIENT_UP: text rightside-up
470 * L_TEXT_ORIENT_LEFT: landscape, text up facing left
471 * L_TEXT_ORIENT_DOWN: text upside-down
472 * L_TEXT_ORIENT_RIGHT: landscape, text up facing right
473 * </pre>
474 */
475 l_ok
476 makeOrientDecision(l_float32 upconf,
477 l_float32 leftconf,
478 l_float32 minupconf,
479 l_float32 minratio,
480 l_int32 *porient,
481 l_int32 debug)
482 {
483 l_float32 absupconf, absleftconf;
484
485 if (!porient)
486 return ERROR_INT("&orient not defined", __func__, 1);
487 *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */
488 if (upconf == 0.0 || leftconf == 0.0) {
489 L_INFO("not enough confidence to get orientation\n", __func__);
490 return 0;
491 }
492
493 if (minupconf == 0.0)
494 minupconf = DefaultMinUpDownConf;
495 if (minratio == 0.0)
496 minratio = DefaultMinUpDownRatio;
497 absupconf = L_ABS(upconf);
498 absleftconf = L_ABS(leftconf);
499
500 /* Here are the four possible orientation decisions, based
501 * on satisfaction of two threshold constraints. */
502 if (upconf > minupconf && absupconf > minratio * absleftconf)
503 *porient = L_TEXT_ORIENT_UP;
504 else if (leftconf > minupconf && absleftconf > minratio * absupconf)
505 *porient = L_TEXT_ORIENT_LEFT;
506 else if (upconf < -minupconf && absupconf > minratio * absleftconf)
507 *porient = L_TEXT_ORIENT_DOWN;
508 else if (leftconf < -minupconf && absleftconf > minratio * absupconf)
509 *porient = L_TEXT_ORIENT_RIGHT;
510
511 if (debug) {
512 lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf);
513 if (*porient == L_TEXT_ORIENT_UNKNOWN)
514 lept_stderr("Confidence is low; no determination is made\n");
515 else if (*porient == L_TEXT_ORIENT_UP)
516 lept_stderr("Text is rightside-up\n");
517 else if (*porient == L_TEXT_ORIENT_LEFT)
518 lept_stderr("Text is rotated 90 deg ccw\n");
519 else if (*porient == L_TEXT_ORIENT_DOWN)
520 lept_stderr("Text is upside-down\n");
521 else /* *porient == L_TEXT_ORIENT_RIGHT */
522 lept_stderr("Text is rotated 90 deg cw\n");
523 }
524
525 return 0;
526 }
527
528
529 /*!
530 * \brief pixUpDownDetect()
531 *
532 * \param[in] pixs 1 bpp, deskewed, Roman text, 150 - 300 ppi
533 * \param[out] pconf confidence that text is rightside-up
534 * \param[in] mincount min number of up + down; use 0 for default
535 * \param[in] npixels number of pixels removed from each side of word box
536 * \param[in] debug 1 for debug output; 0 otherwise
537 * \return 0 if OK, 1 on error
538 *
539 * <pre>
540 * Notes:
541 * (1) See pixOrientDetect() for other details.
542 * (2) The detected confidence %conf is the normalized difference
543 * between the number of detected up and down ascenders,
544 * assuming that the text is either rightside-up or upside-down
545 * and not rotated at a 90 degree angle.
546 * (3) The typical mode of operation is %npixels == 0.
547 * If %npixels > 0, this removes HMT matches at the
548 * beginning and ending of "words." This is useful for
549 * pages that may have mostly digits, because if npixels == 0,
550 * leading "1" and "3" digits can register as having
551 * ascenders or descenders, and "7" digits can match descenders.
552 * Consequently, a page image of only digits may register
553 * as being upside-down.
554 * (4) We want to count the number of instances found using the HMT.
555 * An expensive way to do this would be to count the
556 * number of connected components. A cheap way is to do a rank
557 * reduction cascade that reduces each component to a single
558 * pixel, and results (after two or three 2x reductions)
559 * in one pixel for each of the original components.
560 * After the reduction, you have a much smaller pix over
561 * which to count pixels. We do only 2 reductions, because
562 * this function is designed to work for input pix between
563 * 150 and 300 ppi, and an 8x reduction on a 150 ppi image
564 * is going too far -- components will get merged.
565 * (5) Use on text images with a resolution between 150 and 300 ppi.
566 * </pre>
567 */
568 l_ok
569 pixUpDownDetect(PIX *pixs,
570 l_float32 *pconf,
571 l_int32 mincount,
572 l_int32 npixels,
573 l_int32 debug)
574 {
575 l_int32 countup, countdown, nmax;
576 l_float32 nup, ndown;
577 PIX *pix0, *pix1, *pix2, *pix3, *pixm;
578 SEL *sel1, *sel2, *sel3, *sel4;
579
580 if (!pconf)
581 return ERROR_INT("&conf not defined", __func__, 1);
582 *pconf = 0.0;
583 if (!pixs || pixGetDepth(pixs) != 1)
584 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
585 if (mincount == 0)
586 mincount = DefaultMinUpDownCount;
587 if (npixels < 0)
588 npixels = 0;
589
590 if (debug) {
591 lept_mkdir("lept/orient");
592 }
593
594 sel1 = selCreateFromString(textsel1, 5, 6, NULL);
595 sel2 = selCreateFromString(textsel2, 5, 6, NULL);
596 sel3 = selCreateFromString(textsel3, 5, 6, NULL);
597 sel4 = selCreateFromString(textsel4, 5, 6, NULL);
598
599 /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
600 * This closes holes in x-height characters and joins them at
601 * the x-height. There is more noise in the descender detection
602 * from this, but it works fairly well. */
603 pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0);
604
605 /* Optionally, make a mask of the word bounding boxes, shortening
606 * each of them by a fixed amount at each end. */
607 pixm = NULL;
608 if (npixels > 0) {
609 l_int32 i, nbox, x, y, w, h;
610 BOX *box;
611 BOXA *boxa;
612 pix1 = pixMorphSequence(pix0, "o10.1", 0);
613 boxa = pixConnComp(pix1, NULL, 8);
614 pixm = pixCreateTemplate(pix1);
615 pixDestroy(&pix1);
616 nbox = boxaGetCount(boxa);
617 for (i = 0; i < nbox; i++) {
618 box = boxaGetBox(boxa, i, L_CLONE);
619 boxGetGeometry(box, &x, &y, &w, &h);
620 if (w > 2 * npixels)
621 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
622 PIX_SET, NULL, 0, 0);
623 boxDestroy(&box);
624 }
625 boxaDestroy(&boxa);
626 }
627
628 /* Find the ascenders and optionally filter with pixm.
629 * For an explanation of the procedure used for counting the result
630 * of the HMT, see comments at the beginning of this function. */
631 pix1 = pixHMT(NULL, pix0, sel1);
632 pix2 = pixHMT(NULL, pix0, sel2);
633 pixOr(pix1, pix1, pix2);
634 if (pixm)
635 pixAnd(pix1, pix1, pixm);
636 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
637 pixCountPixels(pix3, &countup, NULL);
638 pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug);
639 pixDestroy(&pix1);
640 pixDestroy(&pix2);
641 pixDestroy(&pix3);
642
643 /* Find the ascenders and optionally filter with pixm. */
644 pix1 = pixHMT(NULL, pix0, sel3);
645 pix2 = pixHMT(NULL, pix0, sel4);
646 pixOr(pix1, pix1, pix2);
647 if (pixm)
648 pixAnd(pix1, pix1, pixm);
649 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
650 pixCountPixels(pix3, &countdown, NULL);
651 pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug);
652 pixDestroy(&pix1);
653 pixDestroy(&pix2);
654 pixDestroy(&pix3);
655
656 /* Evaluate statistically, generating a confidence that is
657 * related to the probability with a gaussian distribution. */
658 nup = (l_float32)(countup);
659 ndown = (l_float32)(countdown);
660 nmax = L_MAX(countup, countdown);
661 if (nmax > mincount)
662 *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
663
664 if (debug) {
665 if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG);
666 lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
667 nup, ndown, *pconf);
668 if (*pconf > DefaultMinUpDownConf)
669 lept_stderr("Text is rightside-up\n");
670 if (*pconf < -DefaultMinUpDownConf)
671 lept_stderr("Text is upside-down\n");
672 }
673
674 pixDestroy(&pix0);
675 pixDestroy(&pixm);
676 selDestroy(&sel1);
677 selDestroy(&sel2);
678 selDestroy(&sel3);
679 selDestroy(&sel4);
680 return 0;
681 }
682
683
684 /*----------------------------------------------------------------*
685 * Left-right mirror detection *
686 *----------------------------------------------------------------*/
687 /*!
688 * \brief pixMirrorDetect()
689 *
690 * \param[in] pixs 1 bpp, deskewed, Roman text, 150 - 300 ppi
691 * \param[out] pconf confidence that text is not LR mirror reversed
692 * \param[in] mincount min number of left + right; use 0 for default
693 * \param[in] debug 1 for debug output; 0 otherwise
694 * \return 0 if OK, 1 on error
695 *
696 * <pre>
697 * Notes:
698 * (1) For this test, it is necessary that the text is horizontally
699 * oriented, with ascenders going up.
700 * (2) conf is the normalized difference between the number of
701 * right and left facing characters with ascenders.
702 * Left-facing are {d}; right-facing are {b, h, k}.
703 * At least that was the expectation. In practice, we can
704 * really just say that it is the normalized difference in
705 * hits using two specific hit-miss filters, textsel1 and textsel2,
706 * after the image has been suitably pre-filtered so that
707 * these filters are effective. See (4) for what's really happening.
708 * (3) A large positive conf value indicates normal text, whereas
709 * a large negative conf value means the page is mirror reversed.
710 * (4) The implementation is a bit tricky. The general idea is
711 * to fill the x-height part of characters, but not the space
712 * between them, before doing the HMT. This is done by
713 * finding pixels added using two different operations -- a
714 * horizontal close and a vertical dilation -- and adding
715 * the intersection of these sets to the original. It turns
716 * out that the original intuition about the signal was largely
717 * in error: much of the signal for right-facing characters
718 * comes from the lower part of common x-height characters, like
719 * the e and c, that remain open after these operations.
720 * So it's important that the operations to close the x-height
721 * parts of the characters are purposely weakened sufficiently
722 * to allow these characters to remain open. The wonders
723 * of morphology!
724 * (5) Use on text images with a resolution between 150 and 300 ppi.
725 * </pre>
726 */
727 l_ok
728 pixMirrorDetect(PIX *pixs,
729 l_float32 *pconf,
730 l_int32 mincount,
731 l_int32 debug)
732 {
733 l_int32 count1, count2, nmax;
734 l_float32 nleft, nright;
735 PIX *pix0, *pix1, *pix2, *pix3;
736 SEL *sel1, *sel2;
737
738 if (!pconf)
739 return ERROR_INT("&conf not defined", __func__, 1);
740 *pconf = 0.0;
741 if (!pixs || pixGetDepth(pixs) != 1)
742 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
743 if (mincount == 0)
744 mincount = DefaultMinMirrorFlipCount;
745
746 if (debug) {
747 lept_mkdir("lept/orient");
748 }
749
750 sel1 = selCreateFromString(textsel1, 5, 6, NULL);
751 sel2 = selCreateFromString(textsel2, 5, 6, NULL);
752
753 /* Fill x-height characters but not space between them, sort of. */
754 pix3 = pixMorphCompSequence(pixs, "d1.30", 0);
755 pixXor(pix3, pix3, pixs);
756 pix0 = pixMorphCompSequence(pixs, "c15.1", 0);
757 pixXor(pix0, pix0, pixs);
758 pixAnd(pix0, pix0, pix3);
759 pixOr(pix0, pix0, pixs);
760 pixDestroy(&pix3);
761
762 /* Filter the right-facing characters. */
763 pix1 = pixHMT(NULL, pix0, sel1);
764 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
765 pixCountPixels(pix3, &count1, NULL);
766 pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug);
767 pixDestroy(&pix1);
768 pixDestroy(&pix3);
769
770 /* Filter the left-facing characters. */
771 pix2 = pixHMT(NULL, pix0, sel2);
772 pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
773 pixCountPixels(pix3, &count2, NULL);
774 pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug);
775 pixDestroy(&pix2);
776 pixDestroy(&pix3);
777
778 nright = (l_float32)count1;
779 nleft = (l_float32)count2;
780 nmax = L_MAX(count1, count2);
781 pixDestroy(&pix0);
782 selDestroy(&sel1);
783 selDestroy(&sel2);
784
785 if (nmax > mincount)
786 *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
787
788 if (debug) {
789 lept_stderr("nright = %f, nleft = %f\n", nright, nleft);
790 if (*pconf > DefaultMinMirrorFlipConf)
791 lept_stderr("Text is not mirror reversed\n");
792 if (*pconf < -DefaultMinMirrorFlipConf)
793 lept_stderr("Text is mirror reversed\n");
794 }
795
796 return 0;
797 }
798
799
800 /*----------------------------------------------------------------*
801 * Static debug helper *
802 *----------------------------------------------------------------*/
803 /*
804 * \brief pixDebugFlipDetect()
805 *
806 * \param[in] filename for output debug file
807 * \param[in] pixs input to pix*Detect
808 * \param[in] pixhm hit-miss result from ascenders or descenders
809 * \param[in] enable 1 to enable this function; 0 to disable
810 * \return void
811 */
812 static void
813 pixDebugFlipDetect(const char *filename,
814 PIX *pixs,
815 PIX *pixhm,
816 l_int32 enable)
817 {
818 PIX *pixt, *pixthm;
819
820 if (!enable) return;
821
822 /* Display with red dot at counted locations */
823 pixt = pixConvert1To4Cmap(pixs);
824 pixthm = pixMorphSequence(pixhm, "d5.5", 0);
825 pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);
826
827 pixWriteDebug(filename, pixt, IFF_PNG);
828 pixDestroy(&pixthm);
829 pixDestroy(&pixt);
830 return;
831 }