view mupdf-source/thirdparty/leptonica/src/baseline.c @ 7:5ab937c03c27

Apply full RELRO to all generated binaries. Also strip the generated binaries.
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 16 Sep 2025 12:37:32 +0200
parents b50eed0cc0ef
children
line wrap: on
line source

/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -
 -  Redistribution and use in source and binary forms, with or without
 -  modification, are permitted provided that the following conditions
 -  are met:
 -  1. Redistributions of source code must retain the above copyright
 -     notice, this list of conditions and the following disclaimer.
 -  2. Redistributions in binary form must reproduce the above
 -     copyright notice, this list of conditions and the following
 -     disclaimer in the documentation and/or other materials
 -     provided with the distribution.
 -
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *====================================================================*/

/*!
 * \file baseline.c
 * <pre>
 *
 *      Locate text baselines in an image
 *           NUMA     *pixFindBaselines()
 *
 *      Projective transform to remove local skew
 *           PIX      *pixDeskewLocal()
 *
 *      Determine local skew
 *           l_int32   pixGetLocalSkewTransform()
 *           NUMA     *pixGetLocalSkewAngles()
 *
 *  We have two apparently different functions here:
 *    ~ finding baselines
 *    ~ finding a projective transform to remove keystone warping
 *  The function pixGetLocalSkewAngles() returns an array of angles,
 *  one for each raster line, and the baselines of the text lines
 *  should intersect the left edge of the image with that angle.
 * </pre>
 */

#ifdef HAVE_CONFIG_H
#include <config_auto.h>
#endif  /* HAVE_CONFIG_H */

#include <math.h>
#include "allheaders.h"

    /* Minimum distance to travel after finding max before abandoning peak */
static const l_int32  MinDistInPeak = 35;

    /* Thresholds for peaks and zeros, relative to the max peak */
static const l_int32  PeakThresholdRatio = 20;
static const l_int32  ZeroThresholdRatio = 100;

    /* Default values for determining local skew */
static const l_int32  DefaultSlices = 10;
static const l_int32  DefaultSweepReduction = 2;
static const l_int32  DefaultBsReduction = 1;
static const l_float32  DefaultSweepRange = 5.;     /* degrees */
static const l_float32  DefaultSweepDelta = 1.;     /* degrees */
static const l_float32  DefaultMinbsDelta = 0.01f;  /* degrees */

    /* Overlap slice fraction added to top and bottom of each slice */
static const l_float32  OverlapFraction = 0.5;

    /* Minimum allowed confidence (ratio) for accepting a value */
static const l_float32  MinAllowedConfidence = 3.0;


/*---------------------------------------------------------------------*
 *                    Locate text baselines in an image                *
 *---------------------------------------------------------------------*/
/*!
 * \brief   pixFindBaselines()
 *
 * \param[in]    pixs     1 bpp, 300 ppi
 * \param[out]   ppta     [optional] pairs of pts corresponding to
 *                        approx. ends of each text line
 * \param[in]    pixadb   for debug output; use NULL to skip
 * \return  na of baseline y values, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Input binary image must have text lines already aligned
 *          horizontally.  This can be done by either rotating the
 *          image with pixDeskew(), or, if a projective transform
 *          is required, by doing pixDeskewLocal() first.
 *      (2) Input null for &pta if you don't want this returned.
 *          The pta will come in pairs of points (left and right end
 *          of each baseline).
 *      (3) Caution: this will not work properly on text with multiple
 *          columns, where the lines are not aligned between columns.
 *          If there are multiple columns, they should be extracted
 *          separately before finding the baselines.
 *      (4) This function constructs different types of output
 *          for baselines; namely, a set of raster line values and
 *          a set of end points of each baseline.
 *      (5) This function was designed to handle short and long text lines
 *          without using dangerous thresholds on the peak heights.  It does
 *          this by combining the differential signal with a morphological
 *          analysis of the locations of the text lines.  One can also
 *          combine this data to normalize the peak heights, by weighting
 *          the differential signal in the region of each baseline
 *          by the inverse of the width of the text line found there.
 * </pre>
 */
NUMA *
pixFindBaselines(PIX   *pixs,
                 PTA  **ppta,
                 PIXA  *pixadb)
{
l_int32    h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh;
l_int32    imaxloc, peakthresh, zerothresh, inpeak;
l_int32    mintosearch, max, maxloc, nloc, locval;
l_int32   *array;
l_float32  maxval;
BOXA      *boxa1, *boxa2, *boxa3;
GPLOT     *gplot;
NUMA      *nasum, *nadiff, *naloc, *naval;
PIX       *pix1, *pix2;
PTA       *pta;

    if (ppta) *ppta = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);

        /* Close up the text characters, removing noise */
    pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0);

        /* Estimate the resolution */
    if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);

        /* Save the difference of adjacent row sums.
         * The high positive-going peaks are the baselines */
    if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) {
        pixDestroy(&pix1);
        return (NUMA *)ERROR_PTR("nasum not made", __func__, NULL);
    }
    h = pixGetHeight(pixs);
    nadiff = numaCreate(h);
    numaGetIValue(nasum, 0, &val2);
    for (i = 0; i < h - 1; i++) {
        val1 = val2;
        numaGetIValue(nasum, i + 1, &val2);
        numaAddNumber(nadiff, val1 - val2);
    }
    numaDestroy(&nasum);

    if (pixadb) {  /* show the difference signal */
        lept_mkdir("lept/baseline");
        gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig");
        pix2 = pixRead("/tmp/lept/baseline/diff.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }

        /* Use the zeroes of the profile to locate each baseline. */
    array = numaGetIArray(nadiff);
    ndiff = numaGetCount(nadiff);
    numaGetMax(nadiff, &maxval, &imaxloc);
    numaDestroy(&nadiff);

        /* Use this to begin locating a new peak: */
    peakthresh = (l_int32)maxval / PeakThresholdRatio;
        /* Use this to begin a region between peaks: */
    zerothresh = (l_int32)maxval / ZeroThresholdRatio;

    naloc = numaCreate(0);
    naval = numaCreate(0);
    inpeak = FALSE;
    for (i = 0; i < ndiff; i++) {
        if (inpeak == FALSE) {
            if (array[i] > peakthresh) {  /* transition to in-peak */
                inpeak = TRUE;
                mintosearch = i + MinDistInPeak; /* accept no zeros
                                               * between i and mintosearch */
                max = array[i];
                maxloc = i;
            }
        } else {  /* inpeak == TRUE; look for max */
            if (array[i] > max) {
                max = array[i];
                maxloc = i;
                mintosearch = i + MinDistInPeak;
            } else if (i > mintosearch && array[i] <= zerothresh) {  /* leave */
                inpeak = FALSE;
                numaAddNumber(naval, max);
                numaAddNumber(naloc, maxloc);
            }
        }
    }
    LEPT_FREE(array);

        /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */
    if (inpeak) {
        numaAddNumber(naval, max);
        numaAddNumber(naloc, maxloc);
    }

    if (pixadb) {  /* show the raster locations for the peaks */
        gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs",
                            "rasterline", "height");
        gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
        pix2 = pixRead("/tmp/lept/baseline/loc.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }
    numaDestroy(&naval);

        /* Generate an approximate profile of text line width.
         * First, filter the boxes of text, where there may be
         * more than one box for a given textline. */
    pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0);
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
    boxa1 = pixConnComp(pix2, NULL, 4);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    if (boxaGetCount(boxa1) == 0) {
        numaDestroy(&naloc);
        boxaDestroy(&boxa1);
        L_INFO("no components after filtering\n", __func__);
        return NULL;
    }
    boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.);
    boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Optionally, find the baseline segments */
    pta = NULL;
    if (ppta) {
        pta = ptaCreate(0);
        *ppta = pta;
    }
    if (pta) {
      nloc = numaGetCount(naloc);
      nbox = boxaGetCount(boxa3);
      for (i = 0; i < nbox; i++) {
          boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh);
          for (j = 0; j < nloc; j++) {
              numaGetIValue(naloc, j, &locval);
              if (L_ABS(locval - (by + bh)) > 25)
                  continue;
              ptaAddPt(pta, bx, locval);
              ptaAddPt(pta, bx + bw, locval);
              break;
          }
      }
    }
    boxaDestroy(&boxa3);

    if (pixadb && pta) {  /* display baselines */
        l_int32  npts, x1, y1, x2, y2;
        pix1 = pixConvertTo32(pixs);
        npts = ptaGetCount(pta);
        for (i = 0; i < npts; i += 2) {
            ptaGetIPt(pta, i, &x1, &y1);
            ptaGetIPt(pta, i + 1, &x2, &y2);
            pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0);
        }
        pixWriteDebug("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG);
        pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);
        pixDestroy(&pix1);
    }

    return naloc;
}


/*---------------------------------------------------------------------*
 *               Projective transform to remove local skew             *
 *---------------------------------------------------------------------*/
/*!
 * \brief   pixDeskewLocal()
 *
 * \param[in]    pixs        1 bpp
 * \param[in]    nslices     the number of horizontal overlapping slices;
 *                           must be larger than 1 and not exceed 20;
 *                           use 0 for default
 * \param[in]    redsweep    sweep reduction factor: 1, 2, 4 or 8;
 *                           use 0 for default value
 * \param[in]    redsearch   search reduction factor: 1, 2, 4 or 8, and
 *                           not larger than redsweep; use 0 for default value
 * \param[in]    sweeprange  half the full range, assumed about 0; in degrees;
 *                           use 0.0 for default value
 * \param[in]    sweepdelta  angle increment of sweep; in degrees;
 *                           use 0.0 for default value
 * \param[in]    minbsdelta  min binary search increment angle; in degrees;
 *                           use 0.0 for default value
 * \return  pixd, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This function allows deskew of a page whose skew changes
 *          approximately linearly with vertical position.  It uses
 *          a projective transform that in effect does a differential
 *          shear about the LHS of the page, and makes all text lines
 *          horizontal.
 *      (2) The origin of the keystoning can be either a cheap document
 *          feeder that rotates the page as it is passed through, or a
 *          camera image taken from either the left or right side
 *          of the vertical.
 *      (3) The image transformation is a projective warping,
 *          not a rotation.  Apart from this function, the text lines
 *          must be properly aligned vertically with respect to each
 *          other.  This can be done by pre-processing the page; e.g.,
 *          by rotating or horizontally shearing it.
 *          Typically, this can be achieved by vertically aligning
 *          the page edge.
 * </pre>
 */
PIX *
pixDeskewLocal(PIX       *pixs,
               l_int32    nslices,
               l_int32    redsweep,
               l_int32    redsearch,
               l_float32  sweeprange,
               l_float32  sweepdelta,
               l_float32  minbsdelta)
{
l_int32    ret;
PIX       *pixd;
PTA       *ptas, *ptad;

    if (!pixs || pixGetDepth(pixs) != 1)
        return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);

        /* Skew array gives skew angle (deg) as fctn of raster line
         * where it intersects the LHS of the image */
    ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch,
                                   sweeprange, sweepdelta, minbsdelta,
                                   &ptas, &ptad);
    if (ret != 0)
        return (PIX *)ERROR_PTR("transform pts not found", __func__, NULL);

        /* Use a projective transform */
    pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE);

    ptaDestroy(&ptas);
    ptaDestroy(&ptad);
    return pixd;
}


/*---------------------------------------------------------------------*
 *                       Determine the local skew                      *
 *---------------------------------------------------------------------*/
/*!
 * \brief   pixGetLocalSkewTransform()
 *
 * \param[in]    pixs
 * \param[in]    nslices      the number of horizontal overlapping slices;
 *                            must be larger than 1 and not exceed 20;
 *                            use 0 for default
 * \param[in]    redsweep     sweep reduction factor: 1, 2, 4 or 8;
 *                            use 0 for default value
 * \param[in]    redsearch    search reduction factor: 1, 2, 4 or 8, and not
 *                            larger than redsweep; use 0 for default value
 * \param[in]    sweeprange   half the full range, assumed about 0;
 *                            in degrees; use 0.0 for default value
 * \param[in]    sweepdelta   angle increment of sweep; in degrees;
 *                            use 0.0 for default value
 * \param[in]    minbsdelta   min binary search increment angle; in degrees;
 *                            use 0.0 for default value
 * \param[out]   pptas        4 points in the source
 * \param[out]   pptad        the corresponding 4 pts in the dest
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) This generates two pairs of points in the src, each pair
 *          corresponding to a pair of points that would lie along
 *          the same raster line in a transformed (dewarped) image.
 *      (2) The sets of 4 src and 4 dest points returned by this function
 *          can then be used, in a projective or bilinear transform,
 *          to remove keystoning in the src.
 * </pre>
 */
l_ok
pixGetLocalSkewTransform(PIX       *pixs,
                         l_int32    nslices,
                         l_int32    redsweep,
                         l_int32    redsearch,
                         l_float32  sweeprange,
                         l_float32  sweepdelta,
                         l_float32  minbsdelta,
                         PTA      **pptas,
                         PTA      **pptad)
{
l_int32    w, h, i;
l_float32  deg2rad, angr, angd, dely;
NUMA      *naskew;
PTA       *ptas, *ptad;

    if (!pptas || !pptad)
        return ERROR_INT("&ptas and &ptad not defined", __func__, 1);
    *pptas = *pptad = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
    if (nslices < 2 || nslices > 20)
        nslices = DefaultSlices;
    if (redsweep < 1 || redsweep > 8)
        redsweep = DefaultSweepReduction;
    if (redsearch < 1 || redsearch > redsweep)
        redsearch = DefaultBsReduction;
    if (sweeprange == 0.0)
        sweeprange = DefaultSweepRange;
    if (sweepdelta == 0.0)
        sweepdelta = DefaultSweepDelta;
    if (minbsdelta == 0.0)
        minbsdelta = DefaultMinbsDelta;

    naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch,
                                   sweeprange, sweepdelta, minbsdelta,
                                   NULL, NULL, 0);
    if (!naskew)
        return ERROR_INT("naskew not made", __func__, 1);

    deg2rad = 3.14159265f / 180.f;
    w = pixGetWidth(pixs);
    h = pixGetHeight(pixs);
    ptas = ptaCreate(4);
    ptad = ptaCreate(4);
    *pptas = ptas;
    *pptad = ptad;

        /* Find i for skew line that intersects LHS at i and RHS at h / 20 */
    for (i = 0; i < h; i++) {
        numaGetFValue(naskew, i, &angd);
        angr = angd * deg2rad;
        dely = w * tan(angr);
        if (i - dely > 0.05 * h)
            break;
    }
    ptaAddPt(ptas, 0, i);
    ptaAddPt(ptas, w - 1, i - dely);
    ptaAddPt(ptad, 0, i);
    ptaAddPt(ptad, w - 1, i);

        /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */
    for (i = h - 1; i > 0; i--) {
        numaGetFValue(naskew, i, &angd);
        angr = angd * deg2rad;
        dely = w * tan(angr);
        if (i - dely < 0.95 * h)
            break;
    }
    ptaAddPt(ptas, 0, i);
    ptaAddPt(ptas, w - 1, i - dely);
    ptaAddPt(ptad, 0, i);
    ptaAddPt(ptad, w - 1, i);

    numaDestroy(&naskew);
    return 0;
}


/*!
 * \brief   pixGetLocalSkewAngles()
 *
 * \param[in]    pixs          1 bpp
 * \param[in]    nslices       the number of horizontal overlapping slices;
 *                             must be larger than 1 and not exceed 20;
 *                             use 0 for default
 * \param[in]    redsweep      sweep reduction factor: 1, 2, 4 or 8;
 *                             use 0 for default value
 * \param[in]    redsearch     search reduction factor: 1, 2, 4 or 8, and not
 *                             larger than redsweep; use 0 for default value
 * \param[in]    sweeprange    half the full range, assumed about 0;
 *                             in degrees; use 0.0 for default value
 * \param[in]    sweepdelta    angle increment of sweep; in degrees;
 *                             use 0.0 for default value
 * \param[in]    minbsdelta    min binary search increment angle; in degrees;
 *                             use 0.0 for default value
 * \param[out]   pa            [optional] slope of skew as fctn of y
 * \param[out]   pb            [optional] intercept at y = 0 of skew,
 8                             as a function of y
 * \param[in]    debug         1 for generating plot of skew angle vs. y;
 *                             0 otherwise
 * \return  naskew, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) The local skew is measured in a set of overlapping strips.
 *          We then do a least square linear fit parameters to get
 *          the slope and intercept parameters a and b in
 *              skew-angle = a * y + b  (degrees)
 *          for the local skew as a function of raster line y.
 *          This is then used to make naskew, which can be interpreted
 *          as the computed skew angle (in degrees) at the left edge
 *          of each raster line.
 *      (2) naskew can then be used to find the baselines of text, because
 *          each text line has a baseline that should intersect
 *          the left edge of the image with the angle given by this
 *          array, evaluated at the raster line of intersection.
 * </pre>
 */
NUMA *
pixGetLocalSkewAngles(PIX        *pixs,
                      l_int32     nslices,
                      l_int32     redsweep,
                      l_int32     redsearch,
                      l_float32   sweeprange,
                      l_float32   sweepdelta,
                      l_float32   minbsdelta,
                      l_float32  *pa,
                      l_float32  *pb,
                      l_int32     debug)
{
l_int32    w, h, hs, i, ystart, yend, ovlap, npts;
l_float32  angle, conf, ycenter, a, b;
BOX       *box;
GPLOT     *gplot;
NUMA      *naskew, *nax, *nay;
PIX       *pix;
PTA       *pta;

    if (!pixs || pixGetDepth(pixs) != 1)
        return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
    if (nslices < 2 || nslices > 20)
        nslices = DefaultSlices;
    if (redsweep < 1 || redsweep > 8)
        redsweep = DefaultSweepReduction;
    if (redsearch < 1 || redsearch > redsweep)
        redsearch = DefaultBsReduction;
    if (sweeprange == 0.0)
        sweeprange = DefaultSweepRange;
    if (sweepdelta == 0.0)
        sweepdelta = DefaultSweepDelta;
    if (minbsdelta == 0.0)
        minbsdelta = DefaultMinbsDelta;

    pixGetDimensions(pixs, &w, &h, NULL);
    hs = h / nslices;
    ovlap = (l_int32)(OverlapFraction * hs);
    pta = ptaCreate(nslices);
    for (i = 0; i < nslices; i++) {
        ystart = L_MAX(0, hs * i - ovlap);
        yend = L_MIN(h - 1, hs * (i + 1) + ovlap);
        ycenter = (l_float32)(ystart + yend) / 2;
        box = boxCreate(0, ystart, w, yend - ystart + 1);
        pix = pixClipRectangle(pixs, box, NULL);
        pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch,
                                  sweeprange, sweepdelta, minbsdelta);
        if (conf > MinAllowedConfidence)
            ptaAddPt(pta, ycenter, angle);
        pixDestroy(&pix);
        boxDestroy(&box);
    }

        /* Do linear least squares fit */
    if ((npts = ptaGetCount(pta)) < 2) {
        ptaDestroy(&pta);
        return (NUMA *)ERROR_PTR("can't fit skew", __func__, NULL);
    }
    ptaGetLinearLSF(pta, &a, &b, NULL);
    if (pa) *pa = a;
    if (pb) *pb = b;

        /* Make skew angle array as function of raster line */
    naskew = numaCreate(h);
    for (i = 0; i < h; i++) {
        angle = a * i + b;
        numaAddNumber(naskew, angle);
    }

    if (debug) {
        lept_mkdir("lept/baseline");
        ptaGetArrays(pta, &nax, &nay);
        gplot = gplotCreate("/tmp/lept/baseline/skew", GPLOT_PNG,
                            "skew as fctn of y", "y (in raster lines from top)",
                            "angle (in degrees)");
        gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf");
        gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
        numaDestroy(&nax);
        numaDestroy(&nay);
    }

    ptaDestroy(&pta);
    return naskew;
}