Mercurial > hgrepos > Python2 > PyMuPDF

diff mupdf-source/thirdparty/leptonica/src/flipdetect.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:43:07 +0200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/leptonica/src/flipdetect.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,831 @@
+/*====================================================================*
+ -  Copyright (C) 2001 Leptonica.  All rights reserved.
+ -
+ -  Redistribution and use in source and binary forms, with or without
+ -  modification, are permitted provided that the following conditions
+ -  are met:
+ -  1. Redistributions of source code must retain the above copyright
+ -     notice, this list of conditions and the following disclaimer.
+ -  2. Redistributions in binary form must reproduce the above
+ -     copyright notice, this list of conditions and the following
+ -     disclaimer in the documentation and/or other materials
+ -     provided with the distribution.
+ -
+ -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+ -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*!
+ * \file flipdetect.c
+ * <pre>
+ *
+ *      High-level interface for detection and correction
+ *          PIX         *pixOrientCorrect()
+ *
+ *      Page orientation detection (pure rotation by 90 degree increments):
+ *          l_int32      pixOrientDetect()
+ *          l_int32      makeOrientDecision()
+ *          l_int32      pixUpDownDetect()
+ *
+ *      Page mirror detection (flip 180 degrees about line in plane of image):
+ *          l_int32      pixMirrorDetect()
+ *
+ *      Static debug helper
+ *          static void  pixDebugFlipDetect()
+ *
+ *  ===================================================================
+ *
+ *  Page transformation detection:
+ *
+ *  Once a page is deskewed, there are 8 possible states that it
+ *  can be in, shown symbolically below.  Suppose state 0 is correct.
+ *
+ *      0: correct     1          2          3
+ *      +------+   +------+   +------+   +------+
+ *      | **** |   | *    |   | **** |   |    * |
+ *      | *    |   | *    |   |    * |   |    * |
+ *      | *    |   | **** |   |    * |   | **** |
+ *      +------+   +------+   +------+   +------+
+ *
+ *         4          5          6          7
+ *      +-----+    +-----+    +-----+    +-----+
+ *      | *** |    |   * |    | *** |    | *   |
+ *      |   * |    |   * |    | *   |    | *   |
+ *      |   * |    |   * |    | *   |    | *   |
+ *      |   * |    | *** |    | *   |    | *** |
+ *      +-----+    +-----+    +-----+    +-----+
+ *
+ *  Each of the other seven can be derived from state 0 by applying some
+ *  combination of a 90 degree clockwise rotation, a flip about
+ *  a horizontal line, and a flip about a vertical line,
+ *  all abbreviated as:
+ *      R = Rotation (about a line perpendicular to the image)
+ *      H = Horizontal flip (about a vertical line in the plane of the image)
+ *      V = Vertical flip (about a horizontal line in the plane of the image)
+ *
+ *  We get these transformations:
+ *      RHV
+ *      000  -> 0
+ *      001  -> 1
+ *      010  -> 2
+ *      011  -> 3
+ *      100  -> 4
+ *      101  -> 5
+ *      110  -> 6
+ *      111  -> 7
+ *
+ *  Note that in four of these, the sum of H and V is 1 (odd).
+ *  For these four, we have a change in parity (handedness) of
+ *  the image, and the transformation cannot be performed by
+ *  rotation about a vertical line out of the page.   Under
+ *  rotation R, the set of 8 transformations decomposes into
+ *  two subgroups linking {0, 3, 4, 7} and {1, 2, 5, 6} independently.
+ *
+ *  pixOrientDetect() tests for a pure rotation (0, 90, 180, 270 degrees).
+ *  It doesn't change parity.
+ *
+ *  pixMirrorDetect() tests for a horizontal flip about the vertical axis.
+ *  It changes parity.
+ *
+ *  The landscape/portrait rotation can be detected in two ways:
+ *
+ *    (1) Compute the deskew confidence for an image segment,
+ *        both as is and rotated 90 degrees  (see skew.c).
+ *
+ *    (2) Compute the ascender/descender signal for the image,
+ *        both as is and rotated 90 degrees  (implemented here).
+ *
+ *  The ascender/descender signal is useful for determining text
+ *  orientation in Roman alphabets because the incidence of letters
+ *  with straight-line ascenders (b, d, h, k, l, 't') outnumber
+ *  those with descenders ('g', p, q).  The letters 't' and 'g'
+ *  will respond variably to the filter, depending on the type face.
+ *
+ *  What about the mirror image situations?  These aren't common
+ *  unless you're dealing with film, for example.
+ *  But you can reliably test if the image has undergone a
+ *  parity-changing flip once about some axis in the plane
+ *  of the image, using pixMirrorDetect*().  This works ostensibly by
+ *  counting the number of characters with ascenders that
+ *  stick out to the left and right of the ascender.  Characters
+ *  that are not mirror flipped are more likely to extend to the
+ *  right (b, h, k) than to the left (d).  Of course, that is for
+ *  text that is rightside-up.  So before you apply the mirror
+ *  test, it is necessary to insure that the text has the ascenders
+ *  going up, and not down or to the left or right.  But here's
+ *  what *really* happens.  It turns out that the pre-filtering before
+ *  the hit-miss transform (HMT) is crucial, and surprisingly, when
+ *  the pre-filtering is chosen to generate a large signal, the majority
+ *  of the signal comes from open regions of common lower-case
+ *  letters such as 'e', 'c' and 'f'.
+ *
+ *  The set of operations you actually use depends on your prior knowledge:
+ *
+ *  (1) If the page is known to be either rightside-up or upside-down, use
+ *      either pixOrientDetect() with pleftconf = NULL, or
+ *      pixUpDownDetect().
+ *
+ *  (2) If any of the four orientations are possible, use pixOrientDetect().
+ *
+ *  (3) If the text is horizontal and rightside-up, the only remaining
+ *      degree of freedom is a left-right mirror flip: use pixMirrorDetect().
+ *
+ *  (4) If you have a relatively large amount of numbers on the page,
+ *      use the slower pixUpDownDetect().
+ *
+ *  We summarize the full orientation and mirror flip detection process:
+ *
+ *  (1) First determine which of the four 90 degree rotations
+ *      causes the text to be rightside-up.  This can be done
+ *      with either skew confidence or the pixOrientDetect()
+ *      signals.  For the latter, see the table for pixOrientDetect().
+ *
+ *  (2) Then, with ascenders pointing up, apply pixMirrorDetect().
+ *      In the normal situation the confidence confidence will be
+ *      large and positive.  However, if mirror flipped, the
+ *      confidence will be large and negative.
+ *
+ *  A high-level interface, pixOrientCorrect() combines the detection
+ *  of the orientation with the rotation decision and the rotation itself.
+ *
+ *  The structuring elements used for text orientation detection require text
+ *  with ascenders and descenders.  They have been designed to work best
+ *  with normal sized text (about 10 pt font), scanned with a resolution
+ *  between 150 and 300 ppi.
+ *
+ *  For pedagogical reasons, we have included a dwa implementation of
+ *  this functionality, in flipdetectdwa.c.notused.  It shows by example
+ *  how to make a dwa implementation of an application that uses binary
+ *  morphological operations.  It is faster than the rasterop implementation,
+ *  but not by a large amount.
+ *
+ *  The generation of flipdetectdwa.c.notused was achieved as follows:
+ *  (1) The program flipselgen.c.notused generates the DWA code, in two C files
+ *  (2) The low-level DWA code in those two files was put into a single
+ *      file, fliphmtgen.c.notused, for clarity.  We didn't want the two
+ *      files (fmorphgen.3.c and fmorphgenlow.3.c) sitting around and
+ *      possibly causing confusion.
+ *  (3) This low-level code was directly incorporated into flipdetectdwa.c,
+ *      where it substitutes for the basic rasterop code in flipdetect.c.
+ *
+ *  Finally, use can be made of programs such as exiftool and convert to
+ *  read exif camera orientation data in jpeg files and conditionally rotate.
+ *  Here is an example shell script, made by Dan9er:
+ *  ==================================================================
+ *  #!/bin/sh
+ *  #   orientByExif.sh
+ *  #   Dependencies: exiftool (exiflib) and convert (ImageMagick)
+ *  #   Note: if there is no exif orientation data in the jpeg file,
+ *  #         this simply copies the input file.
+ *  #
+ *  if [[ -z $(command -v exiftool) || -z $(command -v convert) ]]; then
+ *      echo "You need to install dependencies; e.g.:"
+ *      echo "   sudo apt install libimage-exiftool-perl"
+ *      echo "   sudo apt install imagemagick"
+ *      exit 1
+ *  fi
+ *  if [[ $# != 2 ]]; then
+ *      echo "Syntax: orientByExif infile outfile"
+ *      exit 2
+ *  fi
+ *  if [[ ${1: -4} != ".jpg" ]]; then
+ *      echo "File is not a jpeg"
+ *      exit 3
+ *  fi
+ *  if [[ $(exiftool -s3 -n -Orientation "$1") = 1 ]]; then
+ *      echo "Image is already upright"
+ *      exit 0
+ *  fi
+ *  convert "$1" -auto-orient "$2"
+ *  echo "Done"
+ *  exit 0
+ *  ==================================================================
+ * </pre>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif  /* HAVE_CONFIG_H */
+
+#include <math.h>
+#include "allheaders.h"
+
+    /* Sels for pixOrientDetect() and pixMirrorDetect() */
+static const char *textsel1 = "x  oo "
+                              "x oOo "
+                              "x  o  "
+                              "x     "
+                              "xxxxxx";
+
+static const char *textsel2 = " oo  x"
+                              " oOo x"
+                              "  o  x"
+                              "     x"
+                              "xxxxxx";
+
+static const char *textsel3 = "xxxxxx"
+                              "x     "
+                              "x  o  "
+                              "x oOo "
+                              "x  oo ";
+
+static const char *textsel4 = "xxxxxx"
+                              "     x"
+                              "  o  x"
+                              " oOo x"
+                              " oo  x";
+
+    /* Parameters for determining orientation */
+static const l_int32  DefaultMinUpDownCount = 70;
+static const l_float32  DefaultMinUpDownConf = 8.0;
+static const l_float32  DefaultMinUpDownRatio = 2.5;
+
+    /* Parameters for determining mirror flip */
+static const l_int32  DefaultMinMirrorFlipCount = 100;
+static const l_float32  DefaultMinMirrorFlipConf = 5.0;
+
+    /* Static debug function */
+static void pixDebugFlipDetect(const char *filename, PIX *pixs,
+                               PIX *pixhm, l_int32 enable);
+
+
+/*----------------------------------------------------------------*
+ *        High-level interface for detection and correction       *
+ *----------------------------------------------------------------*/
+/*!
+ * \brief   pixOrientCorrect()
+ *
+ * \param[in]    pixs        1 bpp, deskewed, Roman text, 150 - 300 ppi
+ * \param[in]    minupconf   minimum value for which a decision can be made
+ * \param[in]    minratio    minimum conf ratio required for a decision
+ * \param[out]   pupconf     [optional] ; use NULL to skip
+ * \param[out]   pleftconf   [optional] ; use NULL to skip
+ * \param[out]   protation   [optional] ; use NULL to skip
+ * \param[in]    debug       1 for debug output; 0 otherwise
+ * \return  pixd  may be rotated by 90, 180 or 270; null on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Simple top-level function to detect if Roman text is in
+ *          reading orientation, and to rotate the image accordingly if not.
+ *      (2) Returns a copy if no rotation is needed.
+ *      (3) See notes for pixOrientDetect() and pixOrientDecision().
+ *          Use 0.0 for default values for %minupconf and %minratio
+ *      (4) Optional output of intermediate confidence results and
+ *          the rotation performed on pixs.
+ *      (5) Use on text images with a resolution between 150 and 300 ppi.
+ * </pre>
+ */
+PIX *
+pixOrientCorrect(PIX        *pixs,
+                 l_float32   minupconf,
+                 l_float32   minratio,
+                 l_float32  *pupconf,
+                 l_float32  *pleftconf,
+                 l_int32    *protation,
+                 l_int32     debug)
+{
+l_int32    orient;
+l_float32  upconf, leftconf;
+PIX       *pix1;
+
+    if (!pixs || pixGetDepth(pixs) != 1)
+        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
+
+        /* Get confidences for orientation */
+    pixUpDownDetect(pixs, &upconf, 0, 0, debug);
+    pix1 = pixRotate90(pixs, 1);
+    pixUpDownDetect(pix1, &leftconf, 0, 0, debug);
+    pixDestroy(&pix1);
+    if (pupconf) *pupconf = upconf;
+    if (pleftconf) *pleftconf = leftconf;
+
+        /* Decide what to do */
+    makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug);
+
+        /* Do it */
+    switch (orient)
+    {
+    case L_TEXT_ORIENT_UNKNOWN:
+        L_INFO("text orientation not determined; no rotation\n", __func__);
+        if (protation) *protation = 0;
+        return pixCopy(NULL, pixs);
+        break;
+    case L_TEXT_ORIENT_UP:
+        L_INFO("text is oriented up; no rotation\n", __func__);
+        if (protation) *protation = 0;
+        return pixCopy(NULL, pixs);
+        break;
+    case L_TEXT_ORIENT_LEFT:
+        L_INFO("landscape; text oriented left; 90 cw rotation\n", __func__);
+        if (protation) *protation = 90;
+        return pixRotateOrth(pixs, 1);
+        break;
+    case L_TEXT_ORIENT_DOWN:
+        L_INFO("text oriented down; 180 cw rotation\n", __func__);
+        if (protation) *protation = 180;
+        return pixRotateOrth(pixs, 2);
+        break;
+    case L_TEXT_ORIENT_RIGHT:
+        L_INFO("landscape; text oriented right; 270 cw rotation\n", __func__);
+        if (protation) *protation = 270;
+        return pixRotateOrth(pixs, 3);
+        break;
+    default:
+        L_ERROR("invalid orient flag!\n", __func__);
+        return pixCopy(NULL, pixs);
+    }
+}
+
+
+/*----------------------------------------------------------------*
+ *         Orientation detection (four 90 degree angles)          *
+ *----------------------------------------------------------------*/
+/*!
+ * \brief   pixOrientDetect()
+ *
+ * \param[in]    pixs       1 bpp, deskewed, Roman text, 150 - 300 ppi
+ * \param[out]   pupconf    [optional] ; may be NULL
+ * \param[out]   pleftconf  [optional] ; may be NULL
+ * \param[in]    mincount   min number of up + down; use 0 for default
+ * \param[in]    debug      1 for debug output; 0 otherwise
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) See "Measuring document image skew and orientation"
+ *          Dan S. Bloomberg, Gary E. Kopec and Lakshmi Dasari
+ *          IS&T/SPIE EI'95, Conference 2422: Document Recognition II
+ *          pp 302-316, Feb 6-7, 1995, San Jose, CA
+ *      (2) upconf is the normalized difference between up ascenders
+ *          and down ascenders.  The image is analyzed without rotation
+ *          for being rightside-up or upside-down.  Set &upconf to null
+ *          to skip this operation.
+ *      (3) leftconf is the normalized difference between up ascenders
+ *          and down ascenders in the image after it has been
+ *          rotated 90 degrees clockwise.  With that rotation, ascenders
+ *          projecting to the left in the source image will project up
+ *          in the rotated image.  We compute this by rotating 90 degrees
+ *          clockwise and testing for up and down ascenders.  Set
+ *          &leftconf to null to skip this operation.
+ *      (4) Note that upconf and leftconf are not linear measures of
+ *          confidence, e.g., in a range between 0 and 100.  They
+ *          measure how far you are out on the tail of a (presumably)
+ *          normal distribution.  For example, a confidence of 10 means
+ *          that it is nearly certain that the difference did not
+ *          happen at random.  However, these values must be interpreted
+ *          cautiously, taking into consideration the estimated prior
+ *          for a particular orientation or mirror flip.   The up-down
+ *          signal is very strong if applied to text with ascenders
+ *          up and down, and relatively weak for text at 90 degrees,
+ *          but even at 90 degrees, the difference can look significant.
+ *          For example, suppose the ascenders are oriented horizontally,
+ *          but the test is done vertically.  Then upconf can
+ *          be < -MIN_CONF_FOR_UP_DOWN, suggesting the text may be
+ *          upside-down.  However, if instead the test were done
+ *          horizontally, leftconf will be very much larger
+ *          (in absolute value), giving the correct orientation.
+ *      (5) If you compute both upconf and leftconf, and there is
+ *          sufficient signal, the following table determines the
+ *          cw angle necessary to rotate pixs so that the text is
+ *          rightside-up:
+ *             0 deg :           upconf >> 1,    abs(upconf) >> abs(leftconf)
+ *             90 deg :          leftconf >> 1,  abs(leftconf) >> abs(upconf)
+ *             180 deg :         upconf << -1,   abs(upconf) >> abs(leftconf)
+ *             270 deg :         leftconf << -1, abs(leftconf) >> abs(upconf)
+ *      (6) One should probably not interpret the direction unless
+ *          there are a sufficient number of counts for both orientations,
+ *          in which case neither upconf nor leftconf will be 0.0.
+ *      (7) Use on text images with a resolution between 150 and 300 ppi.
+ *      (8) This algorithm will fail on some images, such as tables,
+ *          where most of the characters are numbers and appear as
+ *          uppercase, but there are some repeated words that give a
+ *          biased signal.  It may be advisable to run a table detector
+ *          first (e.g., pixDecideIfTable()), and not run the orientation
+ *          detector if it is a table.
+ *      (9) Uses rasterop implementation of HMT.
+ * </pre>
+ */
+l_ok
+pixOrientDetect(PIX        *pixs,
+                l_float32  *pupconf,
+                l_float32  *pleftconf,
+                l_int32     mincount,
+                l_int32     debug)
+{
+PIX  *pix1;
+
+    if (!pixs || pixGetDepth(pixs) != 1)
+        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
+    if (!pupconf && !pleftconf)
+        return ERROR_INT("nothing to do", __func__, 1);
+    if (mincount == 0)
+        mincount = DefaultMinUpDownCount;
+
+    if (pupconf)
+        pixUpDownDetect(pixs, pupconf, mincount, 0, debug);
+    if (pleftconf) {
+        pix1 = pixRotate90(pixs, 1);
+        pixUpDownDetect(pix1, pleftconf, mincount, 0, debug);
+        pixDestroy(&pix1);
+    }
+
+    return 0;
+}
+
+
+/*!
+ * \brief   makeOrientDecision()
+ *
+ * \param[in]    upconf      nonzero
+ * \param[in]    leftconf    nonzero
+ * \param[in]    minupconf   minimum value for which a decision can be made
+ * \param[in]    minratio    minimum conf ratio required for a decision
+ * \param[out]   porient     text orientation enum {0,1,2,3,4}
+ * \param[in]    debug       1 for debug output; 0 otherwise
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This can be run after pixOrientDetect()
+ *      (2) Both upconf and leftconf must be nonzero; otherwise the
+ *          orientation cannot be determined.
+ *      (3) The abs values of the input confidences are compared to
+ *          minupconf.
+ *      (4) The abs value of the largest of (upconf/leftconf) and
+ *          (leftconf/upconf) is compared with minratio.
+ *      (5) Input 0.0 for the default values for minupconf and minratio.
+ *      (6) The return value of orient is interpreted thus:
+ *            L_TEXT_ORIENT_UNKNOWN:  not enough evidence to determine
+ *            L_TEXT_ORIENT_UP:       text rightside-up
+ *            L_TEXT_ORIENT_LEFT:     landscape, text up facing left
+ *            L_TEXT_ORIENT_DOWN:     text upside-down
+ *            L_TEXT_ORIENT_RIGHT:    landscape, text up facing right
+ * </pre>
+ */
+l_ok
+makeOrientDecision(l_float32  upconf,
+                   l_float32  leftconf,
+                   l_float32  minupconf,
+                   l_float32  minratio,
+                   l_int32   *porient,
+                   l_int32    debug)
+{
+l_float32  absupconf, absleftconf;
+
+    if (!porient)
+        return ERROR_INT("&orient not defined", __func__, 1);
+    *porient = L_TEXT_ORIENT_UNKNOWN;  /* default: no decision */
+    if (upconf == 0.0 || leftconf == 0.0) {
+        L_INFO("not enough confidence to get orientation\n", __func__);
+        return 0;
+    }
+
+    if (minupconf == 0.0)
+        minupconf = DefaultMinUpDownConf;
+    if (minratio == 0.0)
+        minratio = DefaultMinUpDownRatio;
+    absupconf = L_ABS(upconf);
+    absleftconf = L_ABS(leftconf);
+
+        /* Here are the four possible orientation decisions, based
+         * on satisfaction of two threshold constraints. */
+    if (upconf > minupconf && absupconf > minratio * absleftconf)
+        *porient = L_TEXT_ORIENT_UP;
+    else if (leftconf > minupconf && absleftconf > minratio * absupconf)
+        *porient = L_TEXT_ORIENT_LEFT;
+    else if (upconf < -minupconf && absupconf > minratio * absleftconf)
+        *porient = L_TEXT_ORIENT_DOWN;
+    else if (leftconf < -minupconf && absleftconf > minratio * absupconf)
+        *porient = L_TEXT_ORIENT_RIGHT;
+
+    if (debug) {
+        lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf);
+        if (*porient == L_TEXT_ORIENT_UNKNOWN)
+            lept_stderr("Confidence is low; no determination is made\n");
+        else if (*porient == L_TEXT_ORIENT_UP)
+            lept_stderr("Text is rightside-up\n");
+        else if (*porient == L_TEXT_ORIENT_LEFT)
+            lept_stderr("Text is rotated 90 deg ccw\n");
+        else if (*porient == L_TEXT_ORIENT_DOWN)
+            lept_stderr("Text is upside-down\n");
+        else   /* *porient == L_TEXT_ORIENT_RIGHT */
+            lept_stderr("Text is rotated 90 deg cw\n");
+    }
+
+    return 0;
+}
+
+
+/*!
+ * \brief   pixUpDownDetect()
+ *
+ * \param[in]    pixs       1 bpp, deskewed, Roman text, 150 - 300 ppi
+ * \param[out]   pconf      confidence that text is rightside-up
+ * \param[in]    mincount   min number of up + down; use 0 for default
+ * \param[in]    npixels    number of pixels removed from each side of word box
+ * \param[in]    debug      1 for debug output; 0 otherwise
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) See pixOrientDetect() for other details.
+ *      (2) The detected confidence %conf is the normalized difference
+ *          between the number of detected up and down ascenders,
+ *          assuming that the text is either rightside-up or upside-down
+ *          and not rotated at a 90 degree angle.
+ *      (3) The typical mode of operation is %npixels == 0.
+ *          If %npixels > 0, this removes HMT matches at the
+ *          beginning and ending of "words."  This is useful for
+ *          pages that may have mostly digits, because if npixels == 0,
+ *          leading "1" and "3" digits can register as having
+ *          ascenders or descenders, and "7" digits can match descenders.
+ *          Consequently, a page image of only digits may register
+ *          as being upside-down.
+ *      (4) We want to count the number of instances found using the HMT.
+ *          An expensive way to do this would be to count the
+ *          number of connected components.  A cheap way is to do a rank
+ *          reduction cascade that reduces each component to a single
+ *          pixel, and results (after two or three 2x reductions)
+ *          in one pixel for each of the original components.
+ *          After the reduction, you have a much smaller pix over
+ *          which to count pixels.  We do only 2 reductions, because
+ *          this function is designed to work for input pix between
+ *          150 and 300 ppi, and an 8x reduction on a 150 ppi image
+ *          is going too far -- components will get merged.
+ *      (5) Use on text images with a resolution between 150 and 300 ppi.
+ * </pre>
+ */
+l_ok
+pixUpDownDetect(PIX        *pixs,
+                l_float32  *pconf,
+                l_int32     mincount,
+                l_int32     npixels,
+                l_int32     debug)
+{
+l_int32    countup, countdown, nmax;
+l_float32  nup, ndown;
+PIX       *pix0, *pix1, *pix2, *pix3, *pixm;
+SEL       *sel1, *sel2, *sel3, *sel4;
+
+    if (!pconf)
+        return ERROR_INT("&conf not defined", __func__, 1);
+    *pconf = 0.0;
+    if (!pixs || pixGetDepth(pixs) != 1)
+        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
+    if (mincount == 0)
+        mincount = DefaultMinUpDownCount;
+    if (npixels < 0)
+        npixels = 0;
+
+    if (debug) {
+        lept_mkdir("lept/orient");
+    }
+
+    sel1 = selCreateFromString(textsel1, 5, 6, NULL);
+    sel2 = selCreateFromString(textsel2, 5, 6, NULL);
+    sel3 = selCreateFromString(textsel3, 5, 6, NULL);
+    sel4 = selCreateFromString(textsel4, 5, 6, NULL);
+
+        /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
+         * This closes holes in x-height characters and joins them at
+         * the x-height.  There is more noise in the descender detection
+         * from this, but it works fairly well. */
+    pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0);
+
+        /* Optionally, make a mask of the word bounding boxes, shortening
+         * each of them by a fixed amount at each end. */
+    pixm = NULL;
+    if (npixels > 0) {
+        l_int32  i, nbox, x, y, w, h;
+        BOX   *box;
+        BOXA  *boxa;
+        pix1 = pixMorphSequence(pix0, "o10.1", 0);
+        boxa = pixConnComp(pix1, NULL, 8);
+        pixm = pixCreateTemplate(pix1);
+        pixDestroy(&pix1);
+        nbox = boxaGetCount(boxa);
+        for (i = 0; i < nbox; i++) {
+            box = boxaGetBox(boxa, i, L_CLONE);
+            boxGetGeometry(box, &x, &y, &w, &h);
+            if (w > 2 * npixels)
+                pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
+                            PIX_SET, NULL, 0, 0);
+            boxDestroy(&box);
+        }
+        boxaDestroy(&boxa);
+    }
+
+        /* Find the ascenders and optionally filter with pixm.
+         * For an explanation of the procedure used for counting the result
+         * of the HMT, see comments at the beginning of this function. */
+    pix1 = pixHMT(NULL, pix0, sel1);
+    pix2 = pixHMT(NULL, pix0, sel2);
+    pixOr(pix1, pix1, pix2);
+    if (pixm)
+        pixAnd(pix1, pix1, pixm);
+    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
+    pixCountPixels(pix3, &countup, NULL);
+    pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug);
+    pixDestroy(&pix1);
+    pixDestroy(&pix2);
+    pixDestroy(&pix3);
+
+        /* Find the ascenders and optionally filter with pixm. */
+    pix1 = pixHMT(NULL, pix0, sel3);
+    pix2 = pixHMT(NULL, pix0, sel4);
+    pixOr(pix1, pix1, pix2);
+    if (pixm)
+        pixAnd(pix1, pix1, pixm);
+    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
+    pixCountPixels(pix3, &countdown, NULL);
+    pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug);
+    pixDestroy(&pix1);
+    pixDestroy(&pix2);
+    pixDestroy(&pix3);
+
+        /* Evaluate statistically, generating a confidence that is
+         * related to the probability with a gaussian distribution. */
+    nup = (l_float32)(countup);
+    ndown = (l_float32)(countdown);
+    nmax = L_MAX(countup, countdown);
+    if (nmax > mincount)
+        *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
+
+    if (debug) {
+        if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG);
+        lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
+                nup, ndown, *pconf);
+        if (*pconf > DefaultMinUpDownConf)
+            lept_stderr("Text is rightside-up\n");
+        if (*pconf < -DefaultMinUpDownConf)
+            lept_stderr("Text is upside-down\n");
+    }
+
+    pixDestroy(&pix0);
+    pixDestroy(&pixm);
+    selDestroy(&sel1);
+    selDestroy(&sel2);
+    selDestroy(&sel3);
+    selDestroy(&sel4);
+    return 0;
+}
+
+
+/*----------------------------------------------------------------*
+ *                     Left-right mirror detection                *
+ *----------------------------------------------------------------*/
+/*!
+ * \brief   pixMirrorDetect()
+ *
+ * \param[in]    pixs       1 bpp, deskewed, Roman text, 150 - 300 ppi
+ * \param[out]   pconf      confidence that text is not LR mirror reversed
+ * \param[in]    mincount   min number of left + right; use 0 for default
+ * \param[in]    debug      1 for debug output; 0 otherwise
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) For this test, it is necessary that the text is horizontally
+ *          oriented, with ascenders going up.
+ *      (2) conf is the normalized difference between the number of
+ *          right and left facing characters with ascenders.
+ *          Left-facing are {d}; right-facing are {b, h, k}.
+ *          At least that was the expectation.  In practice, we can
+ *          really just say that it is the normalized difference in
+ *          hits using two specific hit-miss filters, textsel1 and textsel2,
+ *          after the image has been suitably pre-filtered so that
+ *          these filters are effective.  See (4) for what's really happening.
+ *      (3) A large positive conf value indicates normal text, whereas
+ *          a large negative conf value means the page is mirror reversed.
+ *      (4) The implementation is a bit tricky.  The general idea is
+ *          to fill the x-height part of characters, but not the space
+ *          between them, before doing the HMT.  This is done by
+ *          finding pixels added using two different operations -- a
+ *          horizontal close and a vertical dilation -- and adding
+ *          the intersection of these sets to the original.  It turns
+ *          out that the original intuition about the signal was largely
+ *          in error: much of the signal for right-facing characters
+ *          comes from the lower part of common x-height characters, like
+ *          the e and c, that remain open after these operations.
+ *          So it's important that the operations to close the x-height
+ *          parts of the characters are purposely weakened sufficiently
+ *          to allow these characters to remain open.  The wonders
+ *          of morphology!
+ *      (5) Use on text images with a resolution between 150 and 300 ppi.
+ * </pre>
+ */
+l_ok
+pixMirrorDetect(PIX        *pixs,
+                l_float32  *pconf,
+                l_int32     mincount,
+                l_int32     debug)
+{
+l_int32    count1, count2, nmax;
+l_float32  nleft, nright;
+PIX       *pix0, *pix1, *pix2, *pix3;
+SEL       *sel1, *sel2;
+
+    if (!pconf)
+        return ERROR_INT("&conf not defined", __func__, 1);
+    *pconf = 0.0;
+    if (!pixs || pixGetDepth(pixs) != 1)
+        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
+    if (mincount == 0)
+        mincount = DefaultMinMirrorFlipCount;
+
+    if (debug) {
+        lept_mkdir("lept/orient");
+    }
+
+    sel1 = selCreateFromString(textsel1, 5, 6, NULL);
+    sel2 = selCreateFromString(textsel2, 5, 6, NULL);
+
+        /* Fill x-height characters but not space between them, sort of. */
+    pix3 = pixMorphCompSequence(pixs, "d1.30", 0);
+    pixXor(pix3, pix3, pixs);
+    pix0 = pixMorphCompSequence(pixs, "c15.1", 0);
+    pixXor(pix0, pix0, pixs);
+    pixAnd(pix0, pix0, pix3);
+    pixOr(pix0, pix0, pixs);
+    pixDestroy(&pix3);
+
+        /* Filter the right-facing characters. */
+    pix1 = pixHMT(NULL, pix0, sel1);
+    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
+    pixCountPixels(pix3, &count1, NULL);
+    pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug);
+    pixDestroy(&pix1);
+    pixDestroy(&pix3);
+
+        /* Filter the left-facing characters. */
+    pix2 = pixHMT(NULL, pix0, sel2);
+    pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
+    pixCountPixels(pix3, &count2, NULL);
+    pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug);
+    pixDestroy(&pix2);
+    pixDestroy(&pix3);
+
+    nright = (l_float32)count1;
+    nleft = (l_float32)count2;
+    nmax = L_MAX(count1, count2);
+    pixDestroy(&pix0);
+    selDestroy(&sel1);
+    selDestroy(&sel2);
+
+    if (nmax > mincount)
+        *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
+
+    if (debug) {
+        lept_stderr("nright = %f, nleft = %f\n", nright, nleft);
+        if (*pconf > DefaultMinMirrorFlipConf)
+            lept_stderr("Text is not mirror reversed\n");
+        if (*pconf < -DefaultMinMirrorFlipConf)
+            lept_stderr("Text is mirror reversed\n");
+    }
+
+    return 0;
+}
+
+
+/*----------------------------------------------------------------*
+ *                        Static debug helper                     *
+ *----------------------------------------------------------------*/
+/*
+ * \brief   pixDebugFlipDetect()
+ *
+ * \param[in]    filename   for output debug file
+ * \param[in]    pixs       input to pix*Detect
+ * \param[in]    pixhm      hit-miss result from ascenders or descenders
+ * \param[in]    enable     1 to enable this function; 0 to disable
+ * \return   void
+ */
+static void
+pixDebugFlipDetect(const char *filename,
+                   PIX        *pixs,
+                   PIX        *pixhm,
+                   l_int32     enable)
+{
+PIX  *pixt, *pixthm;
+
+   if (!enable) return;
+
+        /* Display with red dot at counted locations */
+    pixt = pixConvert1To4Cmap(pixs);
+    pixthm = pixMorphSequence(pixhm, "d5.5", 0);
+    pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);
+
+    pixWriteDebug(filename, pixt, IFF_PNG);
+    pixDestroy(&pixthm);
+    pixDestroy(&pixt);
+    return;
+}
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children