Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/leptonica/src/skew.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/leptonica/src/skew.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,1226 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/*! + * \file skew.c + * <pre> + * + * Top-level deskew interfaces + * PIX *pixDeskewBoth() + * PIX *pixDeskew() + * PIX *pixFindSkewAndDeskew() + * PIX *pixDeskewGeneral() + * + * Top-level angle-finding interface + * l_int32 pixFindSkew() + * + * Basic angle-finding functions + * l_int32 pixFindSkewSweep() + * l_int32 pixFindSkewSweepAndSearch() + * l_int32 pixFindSkewSweepAndSearchScore() + * l_int32 pixFindSkewSweepAndSearchScorePivot() + * + * Search over arbitrary range of angles in orthogonal directions + * l_int32 pixFindSkewOrthogonalRange() + * + * Differential square sum function for scoring + * l_int32 pixFindDifferentialSquareSum() + * + * Measures of variance of row sums + * l_int32 pixFindNormalizedSquareSum() + * + * + * ============================================================== + * Page skew detection + * + * Skew is determined by pixel profiles, which are computed + * as pixel sums along the raster line for each line in the + * image. By vertically shearing the image by a given angle, + * the sums can be computed quickly along the raster lines + * rather than along lines at that angle. The score is + * computed from these line sums by taking the square of + * the DIFFERENCE between adjacent line sums, summed over + * all lines. The skew angle is then found as the angle + * that maximizes the score. The actual computation for + * any sheared image is done in the function + * pixFindDifferentialSquareSum(). + * + * The search for the angle that maximizes this score is + * most efficiently performed by first sweeping coarsely + * over angles, using a significantly reduced image (say, 4x + * reduction), to find the approximate maximum within a half + * degree or so, and then doing an interval-halving binary + * search at higher resolution to get the skew angle to + * within 1/20 degree or better. + * + * The differential signal is used (rather than just using + * that variance of line sums) because it rejects the + * background noise due to total number of black pixels, + * and has maximum contributions from the baselines and + * x-height lines of text when the textlines are aligned + * with the raster lines. It also works well in multicolumn + * pages where the textlines do not line up across columns. + * + * The method is fast, accurate to within an angle (in radians) + * of approximately the inverse width in pixels of the image, + * and will work on a surprisingly small amount of text data + * (just a couple of text lines). Consequently, it can + * also be used to find local skew if the skew were to vary + * significantly over the page. Local skew determination + * is not very important except for locating lines of + * handwritten text that may be mixed with printed text. + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include <math.h> +#include "allheaders.h" + + /* Default sweep angle parameters for pixFindSkew() */ +static const l_float32 DefaultSweepRange = 7.0; /* degrees */ +static const l_float32 DefaultSweepDelta = 1.0; /* degrees */ + + /* Default final angle difference parameter for binary + * search in pixFindSkew(). The expected accuracy is + * not better than the inverse image width in pixels, + * say, 1/2000 radians, or about 0.03 degrees. */ +static const l_float32 DefaultMinbsDelta = 0.01f; /* degrees */ + + /* Default scale factors for pixFindSkew() */ +static const l_int32 DefaultSweepReduction = 4; /* sweep part; 4 is good */ +static const l_int32 DefaultBsReduction = 2; /* binary search part */ + + /* Minimum angle for deskewing in pixDeskew() */ +static const l_float32 MinDeskewAngle = 0.1f; /* degree */ + + /* Minimum allowed confidence (ratio) for deskewing in pixDeskew() */ +static const l_float32 MinAllowedConfidence = 3.0; + + /* Minimum allowed maxscore to give nonzero confidence */ +static const l_int32 MinValidMaxscore = 10000; + + /* Constant setting threshold for minimum allowed minscore + * to give nonzero confidence; multiply this constant by + * (height * width^2) */ +static const l_float32 MinscoreThreshFactor = 0.000002f; + + /* Default binarization threshold value */ +static const l_int32 DefaultBinaryThreshold = 130; + +#ifndef NO_CONSOLE_IO +#define DEBUG_PRINT_SCORES 0 +#define DEBUG_PRINT_SWEEP 0 +#define DEBUG_PRINT_BINARY 0 +#define DEBUG_PRINT_ORTH 0 +#define DEBUG_THRESHOLD 0 +#define DEBUG_PLOT_SCORES 0 /* requires the gnuplot executable */ +#endif /* ~NO_CONSOLE_IO */ + + + +/*-----------------------------------------------------------------------* + * Top-level deskew interfaces * + *-----------------------------------------------------------------------*/ +/*! + * \brief pixDeskewBoth() + * + * \param[in] pixs any depth + * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; + * use 0 for default + * \return pixd deskewed pix, or NULL on error + * + * <pre> + * Notes: + * (1) This binarizes if necessary and does both horizontal + * and vertical deskewing, using the default parameters in + * the underlying pixDeskew(). See usage there. + * </pre> + */ +PIX * +pixDeskewBoth(PIX *pixs, + l_int32 redsearch) +{ +PIX *pix1, *pix2, *pix3, *pix4; + + if (!pixs) + return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); + if (redsearch == 0) + redsearch = DefaultBsReduction; + else if (redsearch != 1 && redsearch != 2 && redsearch != 4) + return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); + + pix1 = pixDeskew(pixs, redsearch); + pix2 = pixRotate90(pix1, 1); + pix3 = pixDeskew(pix2, redsearch); + pix4 = pixRotate90(pix3, -1); + pixDestroy(&pix1); + pixDestroy(&pix2); + pixDestroy(&pix3); + return pix4; +} + + +/*! + * \brief pixDeskew() + * + * \param[in] pixs any depth + * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; + * use 0 for default + * \return pixd deskewed pix, or NULL on error + * + * <pre> + * Notes: + * (1) This binarizes if necessary and finds the skew angle. If the + * angle is large enough and there is sufficient confidence, + * it returns a deskewed image; otherwise, it returns a clone. + * (2) Typical values at 300 ppi for %redsearch are 2 and 4. + * At 75 ppi, one should use %redsearch = 1. + * </pre> + */ +PIX * +pixDeskew(PIX *pixs, + l_int32 redsearch) +{ + if (!pixs) + return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); + if (redsearch == 0) + redsearch = DefaultBsReduction; + else if (redsearch != 1 && redsearch != 2 && redsearch != 4) + return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); + + return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, NULL, NULL); +} + + +/*! + * \brief pixFindSkewAndDeskew() + * + * \param[in] pixs any depth + * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; + * use 0 for default + * \param[out] pangle [optional] angle required to deskew, + * in degrees; use NULL to skip + * \param[out] pconf [optional] conf value is ratio + * of max/min scores; use NULL to skip + * \return pixd deskewed pix, or NULL on error + * + * <pre> + * Notes: + * (1) This binarizes if necessary and finds the skew angle. If the + * angle is large enough and there is sufficient confidence, + * it returns a deskewed image; otherwise, it returns a clone. + * </pre> + */ +PIX * +pixFindSkewAndDeskew(PIX *pixs, + l_int32 redsearch, + l_float32 *pangle, + l_float32 *pconf) +{ + if (!pixs) + return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); + if (redsearch == 0) + redsearch = DefaultBsReduction; + else if (redsearch != 1 && redsearch != 2 && redsearch != 4) + return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); + + return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, pangle, pconf); +} + + +/*! + * \brief pixDeskewGeneral() + * + * \param[in] pixs any depth + * \param[in] redsweep for linear search: reduction factor = 1, 2 or 4; + * use 0 for default + * \param[in] sweeprange in degrees in each direction from 0; + * use 0.0 for default + * \param[in] sweepdelta in degrees; use 0.0 for default + * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; + * use 0 for default; + * \param[in] thresh for binarizing the image; use 0 for default + * \param[out] pangle [optional] angle required to deskew, + * in degrees; use NULL to skip + * \param[out] pconf [optional] conf value is ratio + * of max/min scores; use NULL to skip + * \return pixd deskewed pix, or NULL on error + * + * <pre> + * Notes: + * (1) This binarizes if necessary and finds the skew angle. If the + * angle is large enough and there is sufficient confidence, + * it returns a deskewed image; otherwise, it returns a clone. + * </pre> + */ +PIX * +pixDeskewGeneral(PIX *pixs, + l_int32 redsweep, + l_float32 sweeprange, + l_float32 sweepdelta, + l_int32 redsearch, + l_int32 thresh, + l_float32 *pangle, + l_float32 *pconf) +{ +l_int32 ret, depth; +l_float32 angle, conf, deg2rad; +PIX *pixb, *pixd; + + if (pangle) *pangle = 0.0; + if (pconf) *pconf = 0.0; + if (!pixs) + return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); + if (redsweep == 0) + redsweep = DefaultSweepReduction; + else if (redsweep != 1 && redsweep != 2 && redsweep != 4) + return (PIX *)ERROR_PTR("redsweep not in {1,2,4}", __func__, NULL); + if (sweeprange == 0.0) + sweeprange = DefaultSweepRange; + if (sweepdelta == 0.0) + sweepdelta = DefaultSweepDelta; + if (redsearch == 0) + redsearch = DefaultBsReduction; + else if (redsearch != 1 && redsearch != 2 && redsearch != 4) + return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); + if (thresh == 0) + thresh = DefaultBinaryThreshold; + + deg2rad = 3.1415926535f / 180.f; + + /* Binarize if necessary */ + depth = pixGetDepth(pixs); + if (depth == 1) + pixb = pixClone(pixs); + else + pixb = pixConvertTo1(pixs, thresh); + + /* Use the 1 bpp image to find the skew */ + ret = pixFindSkewSweepAndSearch(pixb, &angle, &conf, redsweep, redsearch, + sweeprange, sweepdelta, + DefaultMinbsDelta); + pixDestroy(&pixb); + if (pangle) *pangle = angle; + if (pconf) *pconf = conf; + if (ret) + return pixClone(pixs); + + if (L_ABS(angle) < MinDeskewAngle || conf < MinAllowedConfidence) + return pixClone(pixs); + + if ((pixd = pixRotate(pixs, deg2rad * angle, L_ROTATE_AREA_MAP, + L_BRING_IN_WHITE, 0, 0)) == NULL) + return pixClone(pixs); + else + return pixd; +} + + +/*-----------------------------------------------------------------------* + * Simple top-level angle-finding interface * + *-----------------------------------------------------------------------*/ +/*! + * \brief pixFindSkew() + * + * \param[in] pixs 1 bpp + * \param[out] pangle angle required to deskew, in degrees + * \param[out] pconf confidence value is ratio max/min scores + * \return 0 if OK, 1 on error or if angle measurement not valid + * + * <pre> + * Notes: + * (1) This is a simple high-level interface, that uses default + * values of the parameters for reasonable speed and accuracy. + * (2) The angle returned is the negative of the skew angle of + * the image. It is the angle required for deskew. + * Clockwise rotations are positive angles. + * </pre> + */ +l_ok +pixFindSkew(PIX *pixs, + l_float32 *pangle, + l_float32 *pconf) +{ + if (pangle) *pangle = 0.0; + if (pconf) *pconf = 0.0; + if (!pangle || !pconf) + return ERROR_INT("&angle and/or &conf not defined", __func__, 1); + if (!pixs) + return ERROR_INT("pixs not defined", __func__, 1); + if (pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not 1 bpp", __func__, 1); + + return pixFindSkewSweepAndSearch(pixs, pangle, pconf, + DefaultSweepReduction, + DefaultBsReduction, + DefaultSweepRange, + DefaultSweepDelta, + DefaultMinbsDelta); +} + + +/*-----------------------------------------------------------------------* + * Basic angle-finding functions * + *-----------------------------------------------------------------------*/ +/*! + * \brief pixFindSkewSweep() + * + * \param[in] pixs 1 bpp + * \param[out] pangle angle required to deskew, in degrees + * \param[in] reduction factor = 1, 2, 4 or 8 + * \param[in] sweeprange half the full range; assumed about 0; in degrees + * \param[in] sweepdelta angle increment of sweep; in degrees + * \return 0 if OK, 1 on error or if angle measurement not valid + * + * <pre> + * Notes: + * (1) This examines the 'score' for skew angles with equal intervals. + * (2) Caller must check the return value for validity of the result. + * </pre> + */ +l_ok +pixFindSkewSweep(PIX *pixs, + l_float32 *pangle, + l_int32 reduction, + l_float32 sweeprange, + l_float32 sweepdelta) +{ +l_int32 ret, bzero, i, nangles; +l_float32 deg2rad, theta; +l_float32 sum, maxscore, maxangle; +NUMA *natheta, *nascore; +PIX *pix, *pixt; + + if (!pangle) + return ERROR_INT("&angle not defined", __func__, 1); + *pangle = 0.0; + if (!pixs) + return ERROR_INT("pixs not defined", __func__, 1); + if (pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not 1 bpp", __func__, 1); + if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) + return ERROR_INT("reduction must be in {1,2,4,8}", __func__, 1); + + deg2rad = 3.1415926535f / 180.f; + ret = 0; + + /* Generate reduced image, if requested */ + if (reduction == 1) + pix = pixClone(pixs); + else if (reduction == 2) + pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); + else if (reduction == 4) + pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); + else /* reduction == 8 */ + pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); + + pixZero(pix, &bzero); + if (bzero) { + pixDestroy(&pix); + return 1; + } + + nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); + natheta = numaCreate(nangles); + nascore = numaCreate(nangles); + pixt = pixCreateTemplate(pix); + + if (!pix || !pixt) { + ret = ERROR_INT("pix and pixt not both made", __func__, 1); + goto cleanup; + } + if (!natheta || !nascore) { + ret = ERROR_INT("natheta and nascore not both made", __func__, 1); + goto cleanup; + } + + for (i = 0; i < nangles; i++) { + theta = -sweeprange + i * sweepdelta; /* degrees */ + + /* Shear pix about the UL corner and put the result in pixt */ + pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE); + + /* Get score */ + pixFindDifferentialSquareSum(pixt, &sum); + +#if DEBUG_PRINT_SCORES + L_INFO("sum(%7.2f) = %7.0f\n", __func__, theta, sum); +#endif /* DEBUG_PRINT_SCORES */ + + /* Save the result in the output arrays */ + numaAddNumber(nascore, sum); + numaAddNumber(natheta, theta); + } + + /* Find the location of the maximum (i.e., the skew angle) + * by fitting the largest data point and its two neighbors + * to a quadratic, using lagrangian interpolation. */ + numaFitMax(nascore, &maxscore, natheta, &maxangle); + *pangle = maxangle; + +#if DEBUG_PRINT_SWEEP + L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", __func__, + maxangle, maxscore); +#endif /* DEBUG_PRINT_SWEEP */ + +#if DEBUG_PLOT_SCORES + /* Plot the result -- the scores versus rotation angle -- + * using gnuplot with GPLOT_LINES (lines connecting data points). + * The GPLOT data structure is first created, with the + * appropriate data incorporated from the two input NUMAs, + * and then the function gplotMakeOutput() uses gnuplot to + * generate the output plot. This can be either a .png file + * or a .ps file, depending on whether you use GPLOT_PNG + * or GPLOT_PS. */ + {GPLOT *gplot; + gplot = gplotCreate("sweep_output", GPLOT_PNG, + "Sweep. Variance of difference of ON pixels vs. angle", + "angle (deg)", "score"); + gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); + gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); + gplotMakeOutput(gplot); + gplotDestroy(&gplot); + } +#endif /* DEBUG_PLOT_SCORES */ + +cleanup: + pixDestroy(&pix); + pixDestroy(&pixt); + numaDestroy(&nascore); + numaDestroy(&natheta); + return ret; +} + + +/*! + * \brief pixFindSkewSweepAndSearch() + * + * \param[in] pixs 1 bpp + * \param[out] pangle angle required to deskew; in degrees + * \param[out] pconf confidence given by ratio of max/min score + * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 + * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; + * and must not exceed redsweep + * \param[in] sweeprange half the full range, assumed about 0; in degrees + * \param[in] sweepdelta angle increment of sweep; in degrees + * \param[in] minbsdelta min binary search increment angle; in degrees + * \return 0 if OK, 1 on error or if angle measurement not valid + * + * <pre> + * Notes: + * (1) This finds the skew angle, doing first a sweep through a set + * of equal angles, and then doing a binary search until + * convergence. + * (2) Caller must check the return value for validity of the result. + * (3) In computing the differential line sum variance score, we sum + * the result over scanlines, but we always skip: + * ~ at least one scanline + * ~ not more than 10% of the image height + * ~ not more than 5% of the image width + * (4) See also notes in pixFindSkewSweepAndSearchScore() + * </pre> + */ +l_ok +pixFindSkewSweepAndSearch(PIX *pixs, + l_float32 *pangle, + l_float32 *pconf, + l_int32 redsweep, + l_int32 redsearch, + l_float32 sweeprange, + l_float32 sweepdelta, + l_float32 minbsdelta) +{ + return pixFindSkewSweepAndSearchScore(pixs, pangle, pconf, NULL, + redsweep, redsearch, 0.0, sweeprange, + sweepdelta, minbsdelta); +} + + +/*! + * \brief pixFindSkewSweepAndSearchScore() + * + * \param[in] pixs 1 bpp + * \param[out] pangle angle required to deskew; in degrees + * \param[out] pconf confidence given by ratio of max/min score + * \param[out] pendscore [optional] max score; use NULL to ignore + * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 + * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; + * and must not exceed redsweep + * \param[in] sweepcenter angle about which sweep is performed; in degrees + * \param[in] sweeprange half the full range, taken about sweepcenter; + * in degrees + * \param[in] sweepdelta angle increment of sweep; in degrees + * \param[in] minbsdelta min binary search increment angle; in degrees + * \return 0 if OK, 1 on error or if angle measurement not valid + * + * <pre> + * Notes: + * (1) This finds the skew angle, doing first a sweep through a set + * of equal angles, and then doing a binary search until convergence. + * (2) There are two built-in constants that determine if the + * returned confidence is nonzero: + * ~ MinValidMaxscore (minimum allowed maxscore) + * ~ MinscoreThreshFactor (determines minimum allowed + * minscore, by multiplying by (height * width^2) + * If either of these conditions is not satisfied, the returned + * confidence value will be zero. The maxscore is optionally + * returned in this function to allow evaluation of the + * resulting angle by a method that is independent of the + * returned confidence value. + * (3) The larger the confidence value, the greater the probability + * that the proper alignment is given by the angle that maximizes + * variance. It should be compared to a threshold, which depends + * on the application. Values between 3.0 and 6.0 are common. + * (4) By default, the shear is about the UL corner. + * </pre> + */ +l_ok +pixFindSkewSweepAndSearchScore(PIX *pixs, + l_float32 *pangle, + l_float32 *pconf, + l_float32 *pendscore, + l_int32 redsweep, + l_int32 redsearch, + l_float32 sweepcenter, + l_float32 sweeprange, + l_float32 sweepdelta, + l_float32 minbsdelta) +{ + return pixFindSkewSweepAndSearchScorePivot(pixs, pangle, pconf, pendscore, + redsweep, redsearch, 0.0, + sweeprange, sweepdelta, + minbsdelta, + L_SHEAR_ABOUT_CORNER); +} + + +/*! + * \brief pixFindSkewSweepAndSearchScorePivot() + * + * \param[in] pixs 1 bpp + * \param[out] pangle angle required to deskew; in degrees + * \param[out] pconf confidence given by ratio of max/min score + * \param[out] pendscore [optional] max score; use NULL to ignore + * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 + * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; + * and must not exceed redsweep + * \param[in] sweepcenter angle about which sweep is performed; in degrees + * \param[in] sweeprange half the full range, taken about sweepcenter; + * in degrees + * \param[in] sweepdelta angle increment of sweep; in degrees + * \param[in] minbsdelta min binary search increment angle; in degrees + * \param[in] pivot L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER + * \return 0 if OK, 1 on error or if angle measurement not valid + * + * <pre> + * Notes: + * (1) See notes in pixFindSkewSweepAndSearchScore(). + * (2) This allows choice of shear pivoting from either the UL corner + * or the center. For small angles, the ability to discriminate + * angles is better with shearing from the UL corner. However, + * for large angles (say, greater than 20 degrees), it is better + * to shear about the center because a shear from the UL corner + * loses too much of the image. + * </pre> + */ +l_ok +pixFindSkewSweepAndSearchScorePivot(PIX *pixs, + l_float32 *pangle, + l_float32 *pconf, + l_float32 *pendscore, + l_int32 redsweep, + l_int32 redsearch, + l_float32 sweepcenter, + l_float32 sweeprange, + l_float32 sweepdelta, + l_float32 minbsdelta, + l_int32 pivot) +{ +l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc; +l_int32 width, height; +l_float32 deg2rad, theta, delta; +l_float32 sum, maxscore, maxangle; +l_float32 centerangle, leftcenterangle, rightcenterangle; +l_float32 lefttemp, righttemp; +l_float32 bsearchscore[5]; +l_float32 minscore, minthresh; +l_float32 rangeleft; +NUMA *natheta, *nascore; +PIX *pixsw, *pixsch, *pixt1, *pixt2; + + if (pendscore) *pendscore = 0.0; + if (pangle) *pangle = 0.0; + if (pconf) *pconf = 0.0; + if (!pangle || !pconf) + return ERROR_INT("&angle and/or &conf not defined", __func__, 1); + if (!pixs || pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); + if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8) + return ERROR_INT("redsweep must be in {1,2,4,8}", __func__, 1); + if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8) + return ERROR_INT("redsearch must be in {1,2,4,8}", __func__, 1); + if (redsearch > redsweep) + return ERROR_INT("redsearch must not exceed redsweep", __func__, 1); + if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER) + return ERROR_INT("invalid pivot", __func__, 1); + + deg2rad = 3.1415926535f / 180.f; + ret = 0; + + /* Generate reduced image for binary search, if requested */ + if (redsearch == 1) + pixsch = pixClone(pixs); + else if (redsearch == 2) + pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); + else if (redsearch == 4) + pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); + else /* redsearch == 8 */ + pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); + + pixZero(pixsch, &bzero); + if (bzero) { + pixDestroy(&pixsch); + return 1; + } + + /* Generate reduced image for sweep, if requested */ + ratio = redsweep / redsearch; + if (ratio == 1) { + pixsw = pixClone(pixsch); + } else { /* ratio > 1 */ + if (ratio == 2) + pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0); + else if (ratio == 4) + pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0); + else /* ratio == 8 */ + pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0); + } + + pixt1 = pixCreateTemplate(pixsw); + if (ratio == 1) + pixt2 = pixClone(pixt1); + else + pixt2 = pixCreateTemplate(pixsch); + + nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); + natheta = numaCreate(nangles); + nascore = numaCreate(nangles); + + if (!pixsch || !pixsw) { + ret = ERROR_INT("pixsch and pixsw not both made", __func__, 1); + goto cleanup; + } + if (!pixt1 || !pixt2) { + ret = ERROR_INT("pixt1 and pixt2 not both made", __func__, 1); + goto cleanup; + } + if (!natheta || !nascore) { + ret = ERROR_INT("natheta and nascore not both made", __func__, 1); + goto cleanup; + } + + /* Do sweep */ + rangeleft = sweepcenter - sweeprange; + for (i = 0; i < nangles; i++) { + theta = rangeleft + i * sweepdelta; /* degrees */ + + /* Shear pix and put the result in pixt1 */ + if (pivot == L_SHEAR_ABOUT_CORNER) + pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); + else + pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); + + /* Get score */ + pixFindDifferentialSquareSum(pixt1, &sum); + +#if DEBUG_PRINT_SCORES + L_INFO("sum(%7.2f) = %7.0f\n", __func__, theta, sum); +#endif /* DEBUG_PRINT_SCORES */ + + /* Save the result in the output arrays */ + numaAddNumber(nascore, sum); + numaAddNumber(natheta, theta); + } + + /* Find the largest of the set (maxscore at maxangle) */ + numaGetMax(nascore, &maxscore, &maxindex); + numaGetFValue(natheta, maxindex, &maxangle); + +#if DEBUG_PRINT_SWEEP + L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", __func__, + maxangle, maxscore); +#endif /* DEBUG_PRINT_SWEEP */ + +#if DEBUG_PLOT_SCORES + /* Plot the sweep result -- the scores versus rotation angle -- + * using gnuplot with GPLOT_LINES (lines connecting data points). */ + {GPLOT *gplot; + gplot = gplotCreate("sweep_output", GPLOT_PNG, + "Sweep. Variance of difference of ON pixels vs. angle", + "angle (deg)", "score"); + gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); + gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); + gplotMakeOutput(gplot); + gplotDestroy(&gplot); + } +#endif /* DEBUG_PLOT_SCORES */ + + /* Check if the max is at the end of the sweep. */ + n = numaGetCount(natheta); + if (maxindex == 0 || maxindex == n - 1) { + L_WARNING("max found at sweep edge\n", __func__); + goto cleanup; + } + + /* Empty the numas for re-use */ + numaEmpty(nascore); + numaEmpty(natheta); + + /* Do binary search to find skew angle. + * First, set up initial three points. */ + centerangle = maxangle; + if (pivot == L_SHEAR_ABOUT_CORNER) { + pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); + pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); + pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), + L_BRING_IN_WHITE); + pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); + pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), + L_BRING_IN_WHITE); + pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); + } else { + pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); + pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); + pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), + L_BRING_IN_WHITE); + pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); + pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), + L_BRING_IN_WHITE); + pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); + } + + numaAddNumber(nascore, bsearchscore[2]); + numaAddNumber(natheta, centerangle); + numaAddNumber(nascore, bsearchscore[0]); + numaAddNumber(natheta, centerangle - sweepdelta); + numaAddNumber(nascore, bsearchscore[4]); + numaAddNumber(natheta, centerangle + sweepdelta); + + /* Start the search */ + delta = 0.5f * sweepdelta; + while (delta >= minbsdelta) + { + /* Get the left intermediate score */ + leftcenterangle = centerangle - delta; + if (pivot == L_SHEAR_ABOUT_CORNER) + pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle, + L_BRING_IN_WHITE); + else + pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle, + L_BRING_IN_WHITE); + pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]); + numaAddNumber(nascore, bsearchscore[1]); + numaAddNumber(natheta, leftcenterangle); + + /* Get the right intermediate score */ + rightcenterangle = centerangle + delta; + if (pivot == L_SHEAR_ABOUT_CORNER) + pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle, + L_BRING_IN_WHITE); + else + pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle, + L_BRING_IN_WHITE); + pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]); + numaAddNumber(nascore, bsearchscore[3]); + numaAddNumber(natheta, rightcenterangle); + + /* Find the maximum of the five scores and its location. + * Note that the maximum must be in the center + * three values, not in the end two. */ + maxscore = bsearchscore[1]; + maxindex = 1; + for (i = 2; i < 4; i++) { + if (bsearchscore[i] > maxscore) { + maxscore = bsearchscore[i]; + maxindex = i; + } + } + + /* Set up score array to interpolate for the next iteration */ + lefttemp = bsearchscore[maxindex - 1]; + righttemp = bsearchscore[maxindex + 1]; + bsearchscore[2] = maxscore; + bsearchscore[0] = lefttemp; + bsearchscore[4] = righttemp; + + /* Get new center angle and delta for next iteration */ + centerangle = centerangle + delta * (maxindex - 2); + delta = 0.5f * delta; + } + *pangle = centerangle; + +#if DEBUG_PRINT_SCORES + L_INFO(" Binary search score = %7.3f\n", __func__, bsearchscore[2]); +#endif /* DEBUG_PRINT_SCORES */ + + if (pendscore) /* save if requested */ + *pendscore = bsearchscore[2]; + + /* Return the ratio of Max score over Min score + * as a confidence value. Don't trust if the Min score + * is too small, which can happen if the image is all black + * with only a few white pixels interspersed. In that case, + * we get a contribution from the top and bottom edges when + * vertically sheared, but this contribution becomes zero when + * the shear angle is zero. For zero shear angle, the only + * contribution will be from the white pixels. We expect that + * the signal goes as the product of the (height * width^2), + * so we compute a (hopefully) normalized minimum threshold as + * a function of these dimensions. */ + numaGetMin(nascore, &minscore, &minloc); + width = pixGetWidth(pixsch); + height = pixGetHeight(pixsch); + minthresh = MinscoreThreshFactor * width * width * height; + +#if DEBUG_THRESHOLD + L_INFO(" minthresh = %10.2f, minscore = %10.2f\n", __func__, + minthresh, minscore); + L_INFO(" maxscore = %10.2f\n", __func__, maxscore); +#endif /* DEBUG_THRESHOLD */ + + if (minscore > minthresh) + *pconf = maxscore / minscore; + else + *pconf = 0.0; + + /* Don't trust it if too close to the edge of the sweep + * range or if maxscore is small */ + if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) || + (centerangle < rangeleft + sweepdelta) || + (maxscore < MinValidMaxscore)) + *pconf = 0.0; + +#if DEBUG_PRINT_BINARY + lept_stderr("Binary search: angle = %7.3f, score ratio = %6.2f\n", + *pangle, *pconf); + lept_stderr(" max score = %8.0f\n", maxscore); +#endif /* DEBUG_PRINT_BINARY */ + +#if DEBUG_PLOT_SCORES + /* Plot the result -- the scores versus rotation angle -- + * using gnuplot with GPLOT_POINTS. Because the data + * points are not ordered by theta (increasing or decreasing), + * using GPLOT_LINES would be confusing! */ + {GPLOT *gplot; + gplot = gplotCreate("search_output", GPLOT_PNG, + "Binary search. Variance of difference of ON pixels vs. angle", + "angle (deg)", "score"); + gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1"); + gplotMakeOutput(gplot); + gplotDestroy(&gplot); + } +#endif /* DEBUG_PLOT_SCORES */ + +cleanup: + pixDestroy(&pixsw); + pixDestroy(&pixsch); + pixDestroy(&pixt1); + pixDestroy(&pixt2); + numaDestroy(&nascore); + numaDestroy(&natheta); + return ret; +} + + +/*---------------------------------------------------------------------* + * Search over arbitrary range of angles in orthogonal directions * + *---------------------------------------------------------------------*/ +/* + * \brief pixFindSkewOrthogonalRange() + * + * \param[in] pixs 1 bpp + * \param[out] pangle angle required to deskew; in degrees cw + * \param[out] pconf confidence given by ratio of max/min score + * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 + * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; + * and must not exceed redsweep + * \param[in] sweeprange half the full range in each orthogonal + * direction, taken about 0, in degrees + * \param[in] sweepdelta angle increment of sweep; in degrees + * \param[in] minbsdelta min binary search increment angle; in degrees + * \param[in] confprior amount by which confidence of 90 degree rotated + * result is reduced when comparing with unrotated + * confidence value + * \return 0 if OK, 1 on error or if angle measurement not valid + * + * <pre> + * Notes: + * (1) This searches for the skew angle, first in the range + * [-sweeprange, sweeprange], and then in + * [90 - sweeprange, 90 + sweeprange], with angles measured + * clockwise. For exploring the full range of possibilities, + * suggest using sweeprange = 47.0 degrees, giving some overlap + * at 45 and 135 degrees. From these results, and discounting + * the the second confidence by %confprior, it selects the + * angle for maximal differential variance. If the angle + * is larger than pi/4, the angle found after 90 degree rotation + * is selected. + * (2) The larger the confidence value, the greater the probability + * that the proper alignment is given by the angle that maximizes + * variance. It should be compared to a threshold, which depends + * on the application. Values between 3.0 and 6.0 are common. + * (3) Allowing for both portrait and landscape searches is more + * difficult, because if the signal from the text lines is weak, + * a signal from vertical rules can be larger! + * The most difficult documents to deskew have some or all of: + * (a) Multiple columns, not aligned + * (b) Black lines along the vertical edges + * (c) Text from two pages, and at different angles + * Rule of thumb for resolution: + * (a) If the margins are clean, you can work at 75 ppi, + * although 100 ppi is safer. + * (b) If there are vertical lines in the margins, do not + * work below 150 ppi. The signal from the text lines must + * exceed that from the margin lines. + * (4) Choosing the %confprior parameter depends on knowing something + * about the source of image. However, we're not using + * real probabilities here, so its use is qualitative. + * If landscape and portrait are equally likely, use + * %confprior = 0.0. If the likelihood of portrait (non-rotated) + * is 100 times higher than that of landscape, we want to reduce + * the chance that we rotate to landscape in a situation where + * the landscape signal is accidentally larger than the + * portrait signal. To do this use a positive value of + * %confprior; say 1.5. + * </pre> + */ +l_int32 +pixFindSkewOrthogonalRange(PIX *pixs, + l_float32 *pangle, + l_float32 *pconf, + l_int32 redsweep, + l_int32 redsearch, + l_float32 sweeprange, + l_float32 sweepdelta, + l_float32 minbsdelta, + l_float32 confprior) +{ +l_float32 angle1, conf1, score1, angle2, conf2, score2; +PIX *pixr; + + if (pangle) *pangle = 0.0; + if (pconf) *pconf = 0.0; + if (!pangle || !pconf) + return ERROR_INT("&angle and/or &conf not defined", __func__, 1); + if (!pixs || pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); + + pixFindSkewSweepAndSearchScorePivot(pixs, &angle1, &conf1, &score1, + redsweep, redsearch, 0.0, + sweeprange, sweepdelta, minbsdelta, + L_SHEAR_ABOUT_CORNER); + pixr = pixRotateOrth(pixs, 1); + pixFindSkewSweepAndSearchScorePivot(pixr, &angle2, &conf2, &score2, + redsweep, redsearch, 0.0, + sweeprange, sweepdelta, minbsdelta, + L_SHEAR_ABOUT_CORNER); + pixDestroy(&pixr); + + if (conf1 > conf2 - confprior) { + *pangle = angle1; + *pconf = conf1; + } else { + *pangle = -90.0f + angle2; + *pconf = conf2; + } + +#if DEBUG_PRINT_ORTH + lept_stderr(" About 0: angle1 = %7.3f, conf1 = %7.3f, score1 = %f\n", + angle1, conf1, score1); + lept_stderr(" About 90: angle2 = %7.3f, conf2 = %7.3f, score2 = %f\n", + angle2, conf2, score2); + lept_stderr(" Final: angle = %7.3f, conf = %7.3f\n", *pangle, *pconf); +#endif /* DEBUG_PRINT_ORTH */ + + return 0; +} + + + +/*----------------------------------------------------------------* + * Differential square sum function * + *----------------------------------------------------------------*/ +/*! + * \brief pixFindDifferentialSquareSum() + * + * \param[in] pixs + * \param[out] psum result + * \return 0 if OK, 1 on error + * + * <pre> + * Notes: + * (1) At the top and bottom, we skip: + * ~ at least one scanline + * ~ not more than 10% of the image height + * ~ not more than 5% of the image width + * </pre> + */ +l_ok +pixFindDifferentialSquareSum(PIX *pixs, + l_float32 *psum) +{ +l_int32 i, n; +l_int32 w, h, skiph, skip, nskip; +l_float32 val1, val2, diff, sum; +NUMA *na; + + if (!psum) + return ERROR_INT("&sum not defined", __func__, 1); + *psum = 0.0; + if (!pixs) + return ERROR_INT("pixs not defined", __func__, 1); + + /* Generate a number array consisting of the sum + * of pixels in each row of pixs */ + if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL) + return ERROR_INT("na not made", __func__, 1); + + /* Compute the number of rows at top and bottom to omit. + * We omit these to avoid getting a spurious signal from + * the top and bottom of a (nearly) all black image. */ + w = pixGetWidth(pixs); + h = pixGetHeight(pixs); + skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */ + skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */ + nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */ + + /* Sum the squares of differential row sums, on the + * allowed rows. Note that nskip must be >= 1. */ + n = numaGetCount(na); + sum = 0.0; + for (i = nskip; i < n - nskip; i++) { + numaGetFValue(na, i - 1, &val1); + numaGetFValue(na, i, &val2); + diff = val2 - val1; + sum += diff * diff; + } + numaDestroy(&na); + *psum = sum; + return 0; +} + + +/*----------------------------------------------------------------* + * Normalized square sum * + *----------------------------------------------------------------*/ +/*! + * \brief pixFindNormalizedSquareSum() + * + * \param[in] pixs + * \param[out] phratio [optional] ratio of normalized horiz square sum + * to result if the pixel distribution were uniform + * \param[out] pvratio [optional] ratio of normalized vert square sum + * to result if the pixel distribution were uniform + * \param[out] pfract [optional] ratio of fg pixels to total pixels + * \return 0 if OK, 1 on error or if there are no fg pixels + * + * <pre> + * Notes: + * (1) Let the image have h scanlines and N fg pixels. + * If the pixels were uniformly distributed on scanlines, + * the sum of squares of fg pixels on each scanline would be + * h * (N / h)^2. However, if the pixels are not uniformly + * distributed (e.g., for text), the sum of squares of fg + * pixels will be larger. We return in hratio and vratio the + * ratio of these two values. + * (2) If there are no fg pixels, hratio and vratio are returned as 0.0. + * </pre> + */ +l_ok +pixFindNormalizedSquareSum(PIX *pixs, + l_float32 *phratio, + l_float32 *pvratio, + l_float32 *pfract) +{ +l_int32 i, w, h, empty; +l_float32 sum, sumsq, uniform, val; +NUMA *na; +PIX *pixt; + + if (phratio) *phratio = 0.0; + if (pvratio) *pvratio = 0.0; + if (pfract) *pfract = 0.0; + if (!phratio && !pvratio) + return ERROR_INT("nothing to do", __func__, 1); + if (!pixs || pixGetDepth(pixs) != 1) + return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); + pixGetDimensions(pixs, &w, &h, NULL); + + empty = 0; + if (phratio) { + na = pixCountPixelsByRow(pixs, NULL); + numaGetSum(na, &sum); /* fg pixels */ + if (pfract) *pfract = sum / (l_float32)(w * h); + if (sum != 0.0) { + uniform = sum * sum / h; /* h*(sum / h)^2 */ + sumsq = 0.0; + for (i = 0; i < h; i++) { + numaGetFValue(na, i, &val); + sumsq += val * val; + } + *phratio = sumsq / uniform; + } else { + empty = 1; + } + numaDestroy(&na); + } + + if (pvratio) { + if (empty == 1) return 1; + pixt = pixRotateOrth(pixs, 1); + na = pixCountPixelsByRow(pixt, NULL); + numaGetSum(na, &sum); + if (pfract) *pfract = sum / (l_float32)(w * h); + if (sum != 0.0) { + uniform = sum * sum / w; + sumsq = 0.0; + for (i = 0; i < w; i++) { + numaGetFValue(na, i, &val); + sumsq += val * val; + } + *pvratio = sumsq / uniform; + } else { + empty = 1; + } + pixDestroy(&pixt); + numaDestroy(&na); + } + + return empty; +}
