Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/flipdetect.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file flipdetect.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * High-level interface for detection and correction | |
| 32 * PIX *pixOrientCorrect() | |
| 33 * | |
| 34 * Page orientation detection (pure rotation by 90 degree increments): | |
| 35 * l_int32 pixOrientDetect() | |
| 36 * l_int32 makeOrientDecision() | |
| 37 * l_int32 pixUpDownDetect() | |
| 38 * | |
| 39 * Page mirror detection (flip 180 degrees about line in plane of image): | |
| 40 * l_int32 pixMirrorDetect() | |
| 41 * | |
| 42 * Static debug helper | |
| 43 * static void pixDebugFlipDetect() | |
| 44 * | |
| 45 * =================================================================== | |
| 46 * | |
| 47 * Page transformation detection: | |
| 48 * | |
| 49 * Once a page is deskewed, there are 8 possible states that it | |
| 50 * can be in, shown symbolically below. Suppose state 0 is correct. | |
| 51 * | |
| 52 * 0: correct 1 2 3 | |
| 53 * +------+ +------+ +------+ +------+ | |
| 54 * | **** | | * | | **** | | * | | |
| 55 * | * | | * | | * | | * | | |
| 56 * | * | | **** | | * | | **** | | |
| 57 * +------+ +------+ +------+ +------+ | |
| 58 * | |
| 59 * 4 5 6 7 | |
| 60 * +-----+ +-----+ +-----+ +-----+ | |
| 61 * | *** | | * | | *** | | * | | |
| 62 * | * | | * | | * | | * | | |
| 63 * | * | | * | | * | | * | | |
| 64 * | * | | *** | | * | | *** | | |
| 65 * +-----+ +-----+ +-----+ +-----+ | |
| 66 * | |
| 67 * Each of the other seven can be derived from state 0 by applying some | |
| 68 * combination of a 90 degree clockwise rotation, a flip about | |
| 69 * a horizontal line, and a flip about a vertical line, | |
| 70 * all abbreviated as: | |
| 71 * R = Rotation (about a line perpendicular to the image) | |
| 72 * H = Horizontal flip (about a vertical line in the plane of the image) | |
| 73 * V = Vertical flip (about a horizontal line in the plane of the image) | |
| 74 * | |
| 75 * We get these transformations: | |
| 76 * RHV | |
| 77 * 000 -> 0 | |
| 78 * 001 -> 1 | |
| 79 * 010 -> 2 | |
| 80 * 011 -> 3 | |
| 81 * 100 -> 4 | |
| 82 * 101 -> 5 | |
| 83 * 110 -> 6 | |
| 84 * 111 -> 7 | |
| 85 * | |
| 86 * Note that in four of these, the sum of H and V is 1 (odd). | |
| 87 * For these four, we have a change in parity (handedness) of | |
| 88 * the image, and the transformation cannot be performed by | |
| 89 * rotation about a vertical line out of the page. Under | |
| 90 * rotation R, the set of 8 transformations decomposes into | |
| 91 * two subgroups linking {0, 3, 4, 7} and {1, 2, 5, 6} independently. | |
| 92 * | |
| 93 * pixOrientDetect() tests for a pure rotation (0, 90, 180, 270 degrees). | |
| 94 * It doesn't change parity. | |
| 95 * | |
| 96 * pixMirrorDetect() tests for a horizontal flip about the vertical axis. | |
| 97 * It changes parity. | |
| 98 * | |
| 99 * The landscape/portrait rotation can be detected in two ways: | |
| 100 * | |
| 101 * (1) Compute the deskew confidence for an image segment, | |
| 102 * both as is and rotated 90 degrees (see skew.c). | |
| 103 * | |
| 104 * (2) Compute the ascender/descender signal for the image, | |
| 105 * both as is and rotated 90 degrees (implemented here). | |
| 106 * | |
| 107 * The ascender/descender signal is useful for determining text | |
| 108 * orientation in Roman alphabets because the incidence of letters | |
| 109 * with straight-line ascenders (b, d, h, k, l, 't') outnumber | |
| 110 * those with descenders ('g', p, q). The letters 't' and 'g' | |
| 111 * will respond variably to the filter, depending on the type face. | |
| 112 * | |
| 113 * What about the mirror image situations? These aren't common | |
| 114 * unless you're dealing with film, for example. | |
| 115 * But you can reliably test if the image has undergone a | |
| 116 * parity-changing flip once about some axis in the plane | |
| 117 * of the image, using pixMirrorDetect*(). This works ostensibly by | |
| 118 * counting the number of characters with ascenders that | |
| 119 * stick out to the left and right of the ascender. Characters | |
| 120 * that are not mirror flipped are more likely to extend to the | |
| 121 * right (b, h, k) than to the left (d). Of course, that is for | |
| 122 * text that is rightside-up. So before you apply the mirror | |
| 123 * test, it is necessary to insure that the text has the ascenders | |
| 124 * going up, and not down or to the left or right. But here's | |
| 125 * what *really* happens. It turns out that the pre-filtering before | |
| 126 * the hit-miss transform (HMT) is crucial, and surprisingly, when | |
| 127 * the pre-filtering is chosen to generate a large signal, the majority | |
| 128 * of the signal comes from open regions of common lower-case | |
| 129 * letters such as 'e', 'c' and 'f'. | |
| 130 * | |
| 131 * The set of operations you actually use depends on your prior knowledge: | |
| 132 * | |
| 133 * (1) If the page is known to be either rightside-up or upside-down, use | |
| 134 * either pixOrientDetect() with pleftconf = NULL, or | |
| 135 * pixUpDownDetect(). | |
| 136 * | |
| 137 * (2) If any of the four orientations are possible, use pixOrientDetect(). | |
| 138 * | |
| 139 * (3) If the text is horizontal and rightside-up, the only remaining | |
| 140 * degree of freedom is a left-right mirror flip: use pixMirrorDetect(). | |
| 141 * | |
| 142 * (4) If you have a relatively large amount of numbers on the page, | |
| 143 * use the slower pixUpDownDetect(). | |
| 144 * | |
| 145 * We summarize the full orientation and mirror flip detection process: | |
| 146 * | |
| 147 * (1) First determine which of the four 90 degree rotations | |
| 148 * causes the text to be rightside-up. This can be done | |
| 149 * with either skew confidence or the pixOrientDetect() | |
| 150 * signals. For the latter, see the table for pixOrientDetect(). | |
| 151 * | |
| 152 * (2) Then, with ascenders pointing up, apply pixMirrorDetect(). | |
| 153 * In the normal situation the confidence confidence will be | |
| 154 * large and positive. However, if mirror flipped, the | |
| 155 * confidence will be large and negative. | |
| 156 * | |
| 157 * A high-level interface, pixOrientCorrect() combines the detection | |
| 158 * of the orientation with the rotation decision and the rotation itself. | |
| 159 * | |
| 160 * The structuring elements used for text orientation detection require text | |
| 161 * with ascenders and descenders. They have been designed to work best | |
| 162 * with normal sized text (about 10 pt font), scanned with a resolution | |
| 163 * between 150 and 300 ppi. | |
| 164 * | |
| 165 * For pedagogical reasons, we have included a dwa implementation of | |
| 166 * this functionality, in flipdetectdwa.c.notused. It shows by example | |
| 167 * how to make a dwa implementation of an application that uses binary | |
| 168 * morphological operations. It is faster than the rasterop implementation, | |
| 169 * but not by a large amount. | |
| 170 * | |
| 171 * The generation of flipdetectdwa.c.notused was achieved as follows: | |
| 172 * (1) The program flipselgen.c.notused generates the DWA code, in two C files | |
| 173 * (2) The low-level DWA code in those two files was put into a single | |
| 174 * file, fliphmtgen.c.notused, for clarity. We didn't want the two | |
| 175 * files (fmorphgen.3.c and fmorphgenlow.3.c) sitting around and | |
| 176 * possibly causing confusion. | |
| 177 * (3) This low-level code was directly incorporated into flipdetectdwa.c, | |
| 178 * where it substitutes for the basic rasterop code in flipdetect.c. | |
| 179 * | |
| 180 * Finally, use can be made of programs such as exiftool and convert to | |
| 181 * read exif camera orientation data in jpeg files and conditionally rotate. | |
| 182 * Here is an example shell script, made by Dan9er: | |
| 183 * ================================================================== | |
| 184 * #!/bin/sh | |
| 185 * # orientByExif.sh | |
| 186 * # Dependencies: exiftool (exiflib) and convert (ImageMagick) | |
| 187 * # Note: if there is no exif orientation data in the jpeg file, | |
| 188 * # this simply copies the input file. | |
| 189 * # | |
| 190 * if [[ -z $(command -v exiftool) || -z $(command -v convert) ]]; then | |
| 191 * echo "You need to install dependencies; e.g.:" | |
| 192 * echo " sudo apt install libimage-exiftool-perl" | |
| 193 * echo " sudo apt install imagemagick" | |
| 194 * exit 1 | |
| 195 * fi | |
| 196 * if [[ $# != 2 ]]; then | |
| 197 * echo "Syntax: orientByExif infile outfile" | |
| 198 * exit 2 | |
| 199 * fi | |
| 200 * if [[ ${1: -4} != ".jpg" ]]; then | |
| 201 * echo "File is not a jpeg" | |
| 202 * exit 3 | |
| 203 * fi | |
| 204 * if [[ $(exiftool -s3 -n -Orientation "$1") = 1 ]]; then | |
| 205 * echo "Image is already upright" | |
| 206 * exit 0 | |
| 207 * fi | |
| 208 * convert "$1" -auto-orient "$2" | |
| 209 * echo "Done" | |
| 210 * exit 0 | |
| 211 * ================================================================== | |
| 212 * </pre> | |
| 213 */ | |
| 214 | |
| 215 #ifdef HAVE_CONFIG_H | |
| 216 #include <config_auto.h> | |
| 217 #endif /* HAVE_CONFIG_H */ | |
| 218 | |
| 219 #include <math.h> | |
| 220 #include "allheaders.h" | |
| 221 | |
| 222 /* Sels for pixOrientDetect() and pixMirrorDetect() */ | |
| 223 static const char *textsel1 = "x oo " | |
| 224 "x oOo " | |
| 225 "x o " | |
| 226 "x " | |
| 227 "xxxxxx"; | |
| 228 | |
| 229 static const char *textsel2 = " oo x" | |
| 230 " oOo x" | |
| 231 " o x" | |
| 232 " x" | |
| 233 "xxxxxx"; | |
| 234 | |
| 235 static const char *textsel3 = "xxxxxx" | |
| 236 "x " | |
| 237 "x o " | |
| 238 "x oOo " | |
| 239 "x oo "; | |
| 240 | |
| 241 static const char *textsel4 = "xxxxxx" | |
| 242 " x" | |
| 243 " o x" | |
| 244 " oOo x" | |
| 245 " oo x"; | |
| 246 | |
| 247 /* Parameters for determining orientation */ | |
| 248 static const l_int32 DefaultMinUpDownCount = 70; | |
| 249 static const l_float32 DefaultMinUpDownConf = 8.0; | |
| 250 static const l_float32 DefaultMinUpDownRatio = 2.5; | |
| 251 | |
| 252 /* Parameters for determining mirror flip */ | |
| 253 static const l_int32 DefaultMinMirrorFlipCount = 100; | |
| 254 static const l_float32 DefaultMinMirrorFlipConf = 5.0; | |
| 255 | |
| 256 /* Static debug function */ | |
| 257 static void pixDebugFlipDetect(const char *filename, PIX *pixs, | |
| 258 PIX *pixhm, l_int32 enable); | |
| 259 | |
| 260 | |
| 261 /*----------------------------------------------------------------* | |
| 262 * High-level interface for detection and correction * | |
| 263 *----------------------------------------------------------------*/ | |
| 264 /*! | |
| 265 * \brief pixOrientCorrect() | |
| 266 * | |
| 267 * \param[in] pixs 1 bpp, deskewed, Roman text, 150 - 300 ppi | |
| 268 * \param[in] minupconf minimum value for which a decision can be made | |
| 269 * \param[in] minratio minimum conf ratio required for a decision | |
| 270 * \param[out] pupconf [optional] ; use NULL to skip | |
| 271 * \param[out] pleftconf [optional] ; use NULL to skip | |
| 272 * \param[out] protation [optional] ; use NULL to skip | |
| 273 * \param[in] debug 1 for debug output; 0 otherwise | |
| 274 * \return pixd may be rotated by 90, 180 or 270; null on error | |
| 275 * | |
| 276 * <pre> | |
| 277 * Notes: | |
| 278 * (1) Simple top-level function to detect if Roman text is in | |
| 279 * reading orientation, and to rotate the image accordingly if not. | |
| 280 * (2) Returns a copy if no rotation is needed. | |
| 281 * (3) See notes for pixOrientDetect() and pixOrientDecision(). | |
| 282 * Use 0.0 for default values for %minupconf and %minratio | |
| 283 * (4) Optional output of intermediate confidence results and | |
| 284 * the rotation performed on pixs. | |
| 285 * (5) Use on text images with a resolution between 150 and 300 ppi. | |
| 286 * </pre> | |
| 287 */ | |
| 288 PIX * | |
| 289 pixOrientCorrect(PIX *pixs, | |
| 290 l_float32 minupconf, | |
| 291 l_float32 minratio, | |
| 292 l_float32 *pupconf, | |
| 293 l_float32 *pleftconf, | |
| 294 l_int32 *protation, | |
| 295 l_int32 debug) | |
| 296 { | |
| 297 l_int32 orient; | |
| 298 l_float32 upconf, leftconf; | |
| 299 PIX *pix1; | |
| 300 | |
| 301 if (!pixs || pixGetDepth(pixs) != 1) | |
| 302 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 303 | |
| 304 /* Get confidences for orientation */ | |
| 305 pixUpDownDetect(pixs, &upconf, 0, 0, debug); | |
| 306 pix1 = pixRotate90(pixs, 1); | |
| 307 pixUpDownDetect(pix1, &leftconf, 0, 0, debug); | |
| 308 pixDestroy(&pix1); | |
| 309 if (pupconf) *pupconf = upconf; | |
| 310 if (pleftconf) *pleftconf = leftconf; | |
| 311 | |
| 312 /* Decide what to do */ | |
| 313 makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug); | |
| 314 | |
| 315 /* Do it */ | |
| 316 switch (orient) | |
| 317 { | |
| 318 case L_TEXT_ORIENT_UNKNOWN: | |
| 319 L_INFO("text orientation not determined; no rotation\n", __func__); | |
| 320 if (protation) *protation = 0; | |
| 321 return pixCopy(NULL, pixs); | |
| 322 break; | |
| 323 case L_TEXT_ORIENT_UP: | |
| 324 L_INFO("text is oriented up; no rotation\n", __func__); | |
| 325 if (protation) *protation = 0; | |
| 326 return pixCopy(NULL, pixs); | |
| 327 break; | |
| 328 case L_TEXT_ORIENT_LEFT: | |
| 329 L_INFO("landscape; text oriented left; 90 cw rotation\n", __func__); | |
| 330 if (protation) *protation = 90; | |
| 331 return pixRotateOrth(pixs, 1); | |
| 332 break; | |
| 333 case L_TEXT_ORIENT_DOWN: | |
| 334 L_INFO("text oriented down; 180 cw rotation\n", __func__); | |
| 335 if (protation) *protation = 180; | |
| 336 return pixRotateOrth(pixs, 2); | |
| 337 break; | |
| 338 case L_TEXT_ORIENT_RIGHT: | |
| 339 L_INFO("landscape; text oriented right; 270 cw rotation\n", __func__); | |
| 340 if (protation) *protation = 270; | |
| 341 return pixRotateOrth(pixs, 3); | |
| 342 break; | |
| 343 default: | |
| 344 L_ERROR("invalid orient flag!\n", __func__); | |
| 345 return pixCopy(NULL, pixs); | |
| 346 } | |
| 347 } | |
| 348 | |
| 349 | |
| 350 /*----------------------------------------------------------------* | |
| 351 * Orientation detection (four 90 degree angles) * | |
| 352 *----------------------------------------------------------------*/ | |
| 353 /*! | |
| 354 * \brief pixOrientDetect() | |
| 355 * | |
| 356 * \param[in] pixs 1 bpp, deskewed, Roman text, 150 - 300 ppi | |
| 357 * \param[out] pupconf [optional] ; may be NULL | |
| 358 * \param[out] pleftconf [optional] ; may be NULL | |
| 359 * \param[in] mincount min number of up + down; use 0 for default | |
| 360 * \param[in] debug 1 for debug output; 0 otherwise | |
| 361 * \return 0 if OK, 1 on error | |
| 362 * | |
| 363 * <pre> | |
| 364 * Notes: | |
| 365 * (1) See "Measuring document image skew and orientation" | |
| 366 * Dan S. Bloomberg, Gary E. Kopec and Lakshmi Dasari | |
| 367 * IS&T/SPIE EI'95, Conference 2422: Document Recognition II | |
| 368 * pp 302-316, Feb 6-7, 1995, San Jose, CA | |
| 369 * (2) upconf is the normalized difference between up ascenders | |
| 370 * and down ascenders. The image is analyzed without rotation | |
| 371 * for being rightside-up or upside-down. Set &upconf to null | |
| 372 * to skip this operation. | |
| 373 * (3) leftconf is the normalized difference between up ascenders | |
| 374 * and down ascenders in the image after it has been | |
| 375 * rotated 90 degrees clockwise. With that rotation, ascenders | |
| 376 * projecting to the left in the source image will project up | |
| 377 * in the rotated image. We compute this by rotating 90 degrees | |
| 378 * clockwise and testing for up and down ascenders. Set | |
| 379 * &leftconf to null to skip this operation. | |
| 380 * (4) Note that upconf and leftconf are not linear measures of | |
| 381 * confidence, e.g., in a range between 0 and 100. They | |
| 382 * measure how far you are out on the tail of a (presumably) | |
| 383 * normal distribution. For example, a confidence of 10 means | |
| 384 * that it is nearly certain that the difference did not | |
| 385 * happen at random. However, these values must be interpreted | |
| 386 * cautiously, taking into consideration the estimated prior | |
| 387 * for a particular orientation or mirror flip. The up-down | |
| 388 * signal is very strong if applied to text with ascenders | |
| 389 * up and down, and relatively weak for text at 90 degrees, | |
| 390 * but even at 90 degrees, the difference can look significant. | |
| 391 * For example, suppose the ascenders are oriented horizontally, | |
| 392 * but the test is done vertically. Then upconf can | |
| 393 * be < -MIN_CONF_FOR_UP_DOWN, suggesting the text may be | |
| 394 * upside-down. However, if instead the test were done | |
| 395 * horizontally, leftconf will be very much larger | |
| 396 * (in absolute value), giving the correct orientation. | |
| 397 * (5) If you compute both upconf and leftconf, and there is | |
| 398 * sufficient signal, the following table determines the | |
| 399 * cw angle necessary to rotate pixs so that the text is | |
| 400 * rightside-up: | |
| 401 * 0 deg : upconf >> 1, abs(upconf) >> abs(leftconf) | |
| 402 * 90 deg : leftconf >> 1, abs(leftconf) >> abs(upconf) | |
| 403 * 180 deg : upconf << -1, abs(upconf) >> abs(leftconf) | |
| 404 * 270 deg : leftconf << -1, abs(leftconf) >> abs(upconf) | |
| 405 * (6) One should probably not interpret the direction unless | |
| 406 * there are a sufficient number of counts for both orientations, | |
| 407 * in which case neither upconf nor leftconf will be 0.0. | |
| 408 * (7) Use on text images with a resolution between 150 and 300 ppi. | |
| 409 * (8) This algorithm will fail on some images, such as tables, | |
| 410 * where most of the characters are numbers and appear as | |
| 411 * uppercase, but there are some repeated words that give a | |
| 412 * biased signal. It may be advisable to run a table detector | |
| 413 * first (e.g., pixDecideIfTable()), and not run the orientation | |
| 414 * detector if it is a table. | |
| 415 * (9) Uses rasterop implementation of HMT. | |
| 416 * </pre> | |
| 417 */ | |
| 418 l_ok | |
| 419 pixOrientDetect(PIX *pixs, | |
| 420 l_float32 *pupconf, | |
| 421 l_float32 *pleftconf, | |
| 422 l_int32 mincount, | |
| 423 l_int32 debug) | |
| 424 { | |
| 425 PIX *pix1; | |
| 426 | |
| 427 if (!pixs || pixGetDepth(pixs) != 1) | |
| 428 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 429 if (!pupconf && !pleftconf) | |
| 430 return ERROR_INT("nothing to do", __func__, 1); | |
| 431 if (mincount == 0) | |
| 432 mincount = DefaultMinUpDownCount; | |
| 433 | |
| 434 if (pupconf) | |
| 435 pixUpDownDetect(pixs, pupconf, mincount, 0, debug); | |
| 436 if (pleftconf) { | |
| 437 pix1 = pixRotate90(pixs, 1); | |
| 438 pixUpDownDetect(pix1, pleftconf, mincount, 0, debug); | |
| 439 pixDestroy(&pix1); | |
| 440 } | |
| 441 | |
| 442 return 0; | |
| 443 } | |
| 444 | |
| 445 | |
| 446 /*! | |
| 447 * \brief makeOrientDecision() | |
| 448 * | |
| 449 * \param[in] upconf nonzero | |
| 450 * \param[in] leftconf nonzero | |
| 451 * \param[in] minupconf minimum value for which a decision can be made | |
| 452 * \param[in] minratio minimum conf ratio required for a decision | |
| 453 * \param[out] porient text orientation enum {0,1,2,3,4} | |
| 454 * \param[in] debug 1 for debug output; 0 otherwise | |
| 455 * \return 0 if OK, 1 on error | |
| 456 * | |
| 457 * <pre> | |
| 458 * Notes: | |
| 459 * (1) This can be run after pixOrientDetect() | |
| 460 * (2) Both upconf and leftconf must be nonzero; otherwise the | |
| 461 * orientation cannot be determined. | |
| 462 * (3) The abs values of the input confidences are compared to | |
| 463 * minupconf. | |
| 464 * (4) The abs value of the largest of (upconf/leftconf) and | |
| 465 * (leftconf/upconf) is compared with minratio. | |
| 466 * (5) Input 0.0 for the default values for minupconf and minratio. | |
| 467 * (6) The return value of orient is interpreted thus: | |
| 468 * L_TEXT_ORIENT_UNKNOWN: not enough evidence to determine | |
| 469 * L_TEXT_ORIENT_UP: text rightside-up | |
| 470 * L_TEXT_ORIENT_LEFT: landscape, text up facing left | |
| 471 * L_TEXT_ORIENT_DOWN: text upside-down | |
| 472 * L_TEXT_ORIENT_RIGHT: landscape, text up facing right | |
| 473 * </pre> | |
| 474 */ | |
| 475 l_ok | |
| 476 makeOrientDecision(l_float32 upconf, | |
| 477 l_float32 leftconf, | |
| 478 l_float32 minupconf, | |
| 479 l_float32 minratio, | |
| 480 l_int32 *porient, | |
| 481 l_int32 debug) | |
| 482 { | |
| 483 l_float32 absupconf, absleftconf; | |
| 484 | |
| 485 if (!porient) | |
| 486 return ERROR_INT("&orient not defined", __func__, 1); | |
| 487 *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */ | |
| 488 if (upconf == 0.0 || leftconf == 0.0) { | |
| 489 L_INFO("not enough confidence to get orientation\n", __func__); | |
| 490 return 0; | |
| 491 } | |
| 492 | |
| 493 if (minupconf == 0.0) | |
| 494 minupconf = DefaultMinUpDownConf; | |
| 495 if (minratio == 0.0) | |
| 496 minratio = DefaultMinUpDownRatio; | |
| 497 absupconf = L_ABS(upconf); | |
| 498 absleftconf = L_ABS(leftconf); | |
| 499 | |
| 500 /* Here are the four possible orientation decisions, based | |
| 501 * on satisfaction of two threshold constraints. */ | |
| 502 if (upconf > minupconf && absupconf > minratio * absleftconf) | |
| 503 *porient = L_TEXT_ORIENT_UP; | |
| 504 else if (leftconf > minupconf && absleftconf > minratio * absupconf) | |
| 505 *porient = L_TEXT_ORIENT_LEFT; | |
| 506 else if (upconf < -minupconf && absupconf > minratio * absleftconf) | |
| 507 *porient = L_TEXT_ORIENT_DOWN; | |
| 508 else if (leftconf < -minupconf && absleftconf > minratio * absupconf) | |
| 509 *porient = L_TEXT_ORIENT_RIGHT; | |
| 510 | |
| 511 if (debug) { | |
| 512 lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf); | |
| 513 if (*porient == L_TEXT_ORIENT_UNKNOWN) | |
| 514 lept_stderr("Confidence is low; no determination is made\n"); | |
| 515 else if (*porient == L_TEXT_ORIENT_UP) | |
| 516 lept_stderr("Text is rightside-up\n"); | |
| 517 else if (*porient == L_TEXT_ORIENT_LEFT) | |
| 518 lept_stderr("Text is rotated 90 deg ccw\n"); | |
| 519 else if (*porient == L_TEXT_ORIENT_DOWN) | |
| 520 lept_stderr("Text is upside-down\n"); | |
| 521 else /* *porient == L_TEXT_ORIENT_RIGHT */ | |
| 522 lept_stderr("Text is rotated 90 deg cw\n"); | |
| 523 } | |
| 524 | |
| 525 return 0; | |
| 526 } | |
| 527 | |
| 528 | |
| 529 /*! | |
| 530 * \brief pixUpDownDetect() | |
| 531 * | |
| 532 * \param[in] pixs 1 bpp, deskewed, Roman text, 150 - 300 ppi | |
| 533 * \param[out] pconf confidence that text is rightside-up | |
| 534 * \param[in] mincount min number of up + down; use 0 for default | |
| 535 * \param[in] npixels number of pixels removed from each side of word box | |
| 536 * \param[in] debug 1 for debug output; 0 otherwise | |
| 537 * \return 0 if OK, 1 on error | |
| 538 * | |
| 539 * <pre> | |
| 540 * Notes: | |
| 541 * (1) See pixOrientDetect() for other details. | |
| 542 * (2) The detected confidence %conf is the normalized difference | |
| 543 * between the number of detected up and down ascenders, | |
| 544 * assuming that the text is either rightside-up or upside-down | |
| 545 * and not rotated at a 90 degree angle. | |
| 546 * (3) The typical mode of operation is %npixels == 0. | |
| 547 * If %npixels > 0, this removes HMT matches at the | |
| 548 * beginning and ending of "words." This is useful for | |
| 549 * pages that may have mostly digits, because if npixels == 0, | |
| 550 * leading "1" and "3" digits can register as having | |
| 551 * ascenders or descenders, and "7" digits can match descenders. | |
| 552 * Consequently, a page image of only digits may register | |
| 553 * as being upside-down. | |
| 554 * (4) We want to count the number of instances found using the HMT. | |
| 555 * An expensive way to do this would be to count the | |
| 556 * number of connected components. A cheap way is to do a rank | |
| 557 * reduction cascade that reduces each component to a single | |
| 558 * pixel, and results (after two or three 2x reductions) | |
| 559 * in one pixel for each of the original components. | |
| 560 * After the reduction, you have a much smaller pix over | |
| 561 * which to count pixels. We do only 2 reductions, because | |
| 562 * this function is designed to work for input pix between | |
| 563 * 150 and 300 ppi, and an 8x reduction on a 150 ppi image | |
| 564 * is going too far -- components will get merged. | |
| 565 * (5) Use on text images with a resolution between 150 and 300 ppi. | |
| 566 * </pre> | |
| 567 */ | |
| 568 l_ok | |
| 569 pixUpDownDetect(PIX *pixs, | |
| 570 l_float32 *pconf, | |
| 571 l_int32 mincount, | |
| 572 l_int32 npixels, | |
| 573 l_int32 debug) | |
| 574 { | |
| 575 l_int32 countup, countdown, nmax; | |
| 576 l_float32 nup, ndown; | |
| 577 PIX *pix0, *pix1, *pix2, *pix3, *pixm; | |
| 578 SEL *sel1, *sel2, *sel3, *sel4; | |
| 579 | |
| 580 if (!pconf) | |
| 581 return ERROR_INT("&conf not defined", __func__, 1); | |
| 582 *pconf = 0.0; | |
| 583 if (!pixs || pixGetDepth(pixs) != 1) | |
| 584 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 585 if (mincount == 0) | |
| 586 mincount = DefaultMinUpDownCount; | |
| 587 if (npixels < 0) | |
| 588 npixels = 0; | |
| 589 | |
| 590 if (debug) { | |
| 591 lept_mkdir("lept/orient"); | |
| 592 } | |
| 593 | |
| 594 sel1 = selCreateFromString(textsel1, 5, 6, NULL); | |
| 595 sel2 = selCreateFromString(textsel2, 5, 6, NULL); | |
| 596 sel3 = selCreateFromString(textsel3, 5, 6, NULL); | |
| 597 sel4 = selCreateFromString(textsel4, 5, 6, NULL); | |
| 598 | |
| 599 /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). | |
| 600 * This closes holes in x-height characters and joins them at | |
| 601 * the x-height. There is more noise in the descender detection | |
| 602 * from this, but it works fairly well. */ | |
| 603 pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0); | |
| 604 | |
| 605 /* Optionally, make a mask of the word bounding boxes, shortening | |
| 606 * each of them by a fixed amount at each end. */ | |
| 607 pixm = NULL; | |
| 608 if (npixels > 0) { | |
| 609 l_int32 i, nbox, x, y, w, h; | |
| 610 BOX *box; | |
| 611 BOXA *boxa; | |
| 612 pix1 = pixMorphSequence(pix0, "o10.1", 0); | |
| 613 boxa = pixConnComp(pix1, NULL, 8); | |
| 614 pixm = pixCreateTemplate(pix1); | |
| 615 pixDestroy(&pix1); | |
| 616 nbox = boxaGetCount(boxa); | |
| 617 for (i = 0; i < nbox; i++) { | |
| 618 box = boxaGetBox(boxa, i, L_CLONE); | |
| 619 boxGetGeometry(box, &x, &y, &w, &h); | |
| 620 if (w > 2 * npixels) | |
| 621 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, | |
| 622 PIX_SET, NULL, 0, 0); | |
| 623 boxDestroy(&box); | |
| 624 } | |
| 625 boxaDestroy(&boxa); | |
| 626 } | |
| 627 | |
| 628 /* Find the ascenders and optionally filter with pixm. | |
| 629 * For an explanation of the procedure used for counting the result | |
| 630 * of the HMT, see comments at the beginning of this function. */ | |
| 631 pix1 = pixHMT(NULL, pix0, sel1); | |
| 632 pix2 = pixHMT(NULL, pix0, sel2); | |
| 633 pixOr(pix1, pix1, pix2); | |
| 634 if (pixm) | |
| 635 pixAnd(pix1, pix1, pixm); | |
| 636 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); | |
| 637 pixCountPixels(pix3, &countup, NULL); | |
| 638 pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug); | |
| 639 pixDestroy(&pix1); | |
| 640 pixDestroy(&pix2); | |
| 641 pixDestroy(&pix3); | |
| 642 | |
| 643 /* Find the ascenders and optionally filter with pixm. */ | |
| 644 pix1 = pixHMT(NULL, pix0, sel3); | |
| 645 pix2 = pixHMT(NULL, pix0, sel4); | |
| 646 pixOr(pix1, pix1, pix2); | |
| 647 if (pixm) | |
| 648 pixAnd(pix1, pix1, pixm); | |
| 649 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); | |
| 650 pixCountPixels(pix3, &countdown, NULL); | |
| 651 pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug); | |
| 652 pixDestroy(&pix1); | |
| 653 pixDestroy(&pix2); | |
| 654 pixDestroy(&pix3); | |
| 655 | |
| 656 /* Evaluate statistically, generating a confidence that is | |
| 657 * related to the probability with a gaussian distribution. */ | |
| 658 nup = (l_float32)(countup); | |
| 659 ndown = (l_float32)(countdown); | |
| 660 nmax = L_MAX(countup, countdown); | |
| 661 if (nmax > mincount) | |
| 662 *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); | |
| 663 | |
| 664 if (debug) { | |
| 665 if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG); | |
| 666 lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", | |
| 667 nup, ndown, *pconf); | |
| 668 if (*pconf > DefaultMinUpDownConf) | |
| 669 lept_stderr("Text is rightside-up\n"); | |
| 670 if (*pconf < -DefaultMinUpDownConf) | |
| 671 lept_stderr("Text is upside-down\n"); | |
| 672 } | |
| 673 | |
| 674 pixDestroy(&pix0); | |
| 675 pixDestroy(&pixm); | |
| 676 selDestroy(&sel1); | |
| 677 selDestroy(&sel2); | |
| 678 selDestroy(&sel3); | |
| 679 selDestroy(&sel4); | |
| 680 return 0; | |
| 681 } | |
| 682 | |
| 683 | |
| 684 /*----------------------------------------------------------------* | |
| 685 * Left-right mirror detection * | |
| 686 *----------------------------------------------------------------*/ | |
| 687 /*! | |
| 688 * \brief pixMirrorDetect() | |
| 689 * | |
| 690 * \param[in] pixs 1 bpp, deskewed, Roman text, 150 - 300 ppi | |
| 691 * \param[out] pconf confidence that text is not LR mirror reversed | |
| 692 * \param[in] mincount min number of left + right; use 0 for default | |
| 693 * \param[in] debug 1 for debug output; 0 otherwise | |
| 694 * \return 0 if OK, 1 on error | |
| 695 * | |
| 696 * <pre> | |
| 697 * Notes: | |
| 698 * (1) For this test, it is necessary that the text is horizontally | |
| 699 * oriented, with ascenders going up. | |
| 700 * (2) conf is the normalized difference between the number of | |
| 701 * right and left facing characters with ascenders. | |
| 702 * Left-facing are {d}; right-facing are {b, h, k}. | |
| 703 * At least that was the expectation. In practice, we can | |
| 704 * really just say that it is the normalized difference in | |
| 705 * hits using two specific hit-miss filters, textsel1 and textsel2, | |
| 706 * after the image has been suitably pre-filtered so that | |
| 707 * these filters are effective. See (4) for what's really happening. | |
| 708 * (3) A large positive conf value indicates normal text, whereas | |
| 709 * a large negative conf value means the page is mirror reversed. | |
| 710 * (4) The implementation is a bit tricky. The general idea is | |
| 711 * to fill the x-height part of characters, but not the space | |
| 712 * between them, before doing the HMT. This is done by | |
| 713 * finding pixels added using two different operations -- a | |
| 714 * horizontal close and a vertical dilation -- and adding | |
| 715 * the intersection of these sets to the original. It turns | |
| 716 * out that the original intuition about the signal was largely | |
| 717 * in error: much of the signal for right-facing characters | |
| 718 * comes from the lower part of common x-height characters, like | |
| 719 * the e and c, that remain open after these operations. | |
| 720 * So it's important that the operations to close the x-height | |
| 721 * parts of the characters are purposely weakened sufficiently | |
| 722 * to allow these characters to remain open. The wonders | |
| 723 * of morphology! | |
| 724 * (5) Use on text images with a resolution between 150 and 300 ppi. | |
| 725 * </pre> | |
| 726 */ | |
| 727 l_ok | |
| 728 pixMirrorDetect(PIX *pixs, | |
| 729 l_float32 *pconf, | |
| 730 l_int32 mincount, | |
| 731 l_int32 debug) | |
| 732 { | |
| 733 l_int32 count1, count2, nmax; | |
| 734 l_float32 nleft, nright; | |
| 735 PIX *pix0, *pix1, *pix2, *pix3; | |
| 736 SEL *sel1, *sel2; | |
| 737 | |
| 738 if (!pconf) | |
| 739 return ERROR_INT("&conf not defined", __func__, 1); | |
| 740 *pconf = 0.0; | |
| 741 if (!pixs || pixGetDepth(pixs) != 1) | |
| 742 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 743 if (mincount == 0) | |
| 744 mincount = DefaultMinMirrorFlipCount; | |
| 745 | |
| 746 if (debug) { | |
| 747 lept_mkdir("lept/orient"); | |
| 748 } | |
| 749 | |
| 750 sel1 = selCreateFromString(textsel1, 5, 6, NULL); | |
| 751 sel2 = selCreateFromString(textsel2, 5, 6, NULL); | |
| 752 | |
| 753 /* Fill x-height characters but not space between them, sort of. */ | |
| 754 pix3 = pixMorphCompSequence(pixs, "d1.30", 0); | |
| 755 pixXor(pix3, pix3, pixs); | |
| 756 pix0 = pixMorphCompSequence(pixs, "c15.1", 0); | |
| 757 pixXor(pix0, pix0, pixs); | |
| 758 pixAnd(pix0, pix0, pix3); | |
| 759 pixOr(pix0, pix0, pixs); | |
| 760 pixDestroy(&pix3); | |
| 761 | |
| 762 /* Filter the right-facing characters. */ | |
| 763 pix1 = pixHMT(NULL, pix0, sel1); | |
| 764 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); | |
| 765 pixCountPixels(pix3, &count1, NULL); | |
| 766 pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug); | |
| 767 pixDestroy(&pix1); | |
| 768 pixDestroy(&pix3); | |
| 769 | |
| 770 /* Filter the left-facing characters. */ | |
| 771 pix2 = pixHMT(NULL, pix0, sel2); | |
| 772 pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0); | |
| 773 pixCountPixels(pix3, &count2, NULL); | |
| 774 pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug); | |
| 775 pixDestroy(&pix2); | |
| 776 pixDestroy(&pix3); | |
| 777 | |
| 778 nright = (l_float32)count1; | |
| 779 nleft = (l_float32)count2; | |
| 780 nmax = L_MAX(count1, count2); | |
| 781 pixDestroy(&pix0); | |
| 782 selDestroy(&sel1); | |
| 783 selDestroy(&sel2); | |
| 784 | |
| 785 if (nmax > mincount) | |
| 786 *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); | |
| 787 | |
| 788 if (debug) { | |
| 789 lept_stderr("nright = %f, nleft = %f\n", nright, nleft); | |
| 790 if (*pconf > DefaultMinMirrorFlipConf) | |
| 791 lept_stderr("Text is not mirror reversed\n"); | |
| 792 if (*pconf < -DefaultMinMirrorFlipConf) | |
| 793 lept_stderr("Text is mirror reversed\n"); | |
| 794 } | |
| 795 | |
| 796 return 0; | |
| 797 } | |
| 798 | |
| 799 | |
| 800 /*----------------------------------------------------------------* | |
| 801 * Static debug helper * | |
| 802 *----------------------------------------------------------------*/ | |
| 803 /* | |
| 804 * \brief pixDebugFlipDetect() | |
| 805 * | |
| 806 * \param[in] filename for output debug file | |
| 807 * \param[in] pixs input to pix*Detect | |
| 808 * \param[in] pixhm hit-miss result from ascenders or descenders | |
| 809 * \param[in] enable 1 to enable this function; 0 to disable | |
| 810 * \return void | |
| 811 */ | |
| 812 static void | |
| 813 pixDebugFlipDetect(const char *filename, | |
| 814 PIX *pixs, | |
| 815 PIX *pixhm, | |
| 816 l_int32 enable) | |
| 817 { | |
| 818 PIX *pixt, *pixthm; | |
| 819 | |
| 820 if (!enable) return; | |
| 821 | |
| 822 /* Display with red dot at counted locations */ | |
| 823 pixt = pixConvert1To4Cmap(pixs); | |
| 824 pixthm = pixMorphSequence(pixhm, "d5.5", 0); | |
| 825 pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0); | |
| 826 | |
| 827 pixWriteDebug(filename, pixt, IFF_PNG); | |
| 828 pixDestroy(&pixthm); | |
| 829 pixDestroy(&pixt); | |
| 830 return; | |
| 831 } |
