Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/skew.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file skew.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Top-level deskew interfaces | |
| 32 * PIX *pixDeskewBoth() | |
| 33 * PIX *pixDeskew() | |
| 34 * PIX *pixFindSkewAndDeskew() | |
| 35 * PIX *pixDeskewGeneral() | |
| 36 * | |
| 37 * Top-level angle-finding interface | |
| 38 * l_int32 pixFindSkew() | |
| 39 * | |
| 40 * Basic angle-finding functions | |
| 41 * l_int32 pixFindSkewSweep() | |
| 42 * l_int32 pixFindSkewSweepAndSearch() | |
| 43 * l_int32 pixFindSkewSweepAndSearchScore() | |
| 44 * l_int32 pixFindSkewSweepAndSearchScorePivot() | |
| 45 * | |
| 46 * Search over arbitrary range of angles in orthogonal directions | |
| 47 * l_int32 pixFindSkewOrthogonalRange() | |
| 48 * | |
| 49 * Differential square sum function for scoring | |
| 50 * l_int32 pixFindDifferentialSquareSum() | |
| 51 * | |
| 52 * Measures of variance of row sums | |
| 53 * l_int32 pixFindNormalizedSquareSum() | |
| 54 * | |
| 55 * | |
| 56 * ============================================================== | |
| 57 * Page skew detection | |
| 58 * | |
| 59 * Skew is determined by pixel profiles, which are computed | |
| 60 * as pixel sums along the raster line for each line in the | |
| 61 * image. By vertically shearing the image by a given angle, | |
| 62 * the sums can be computed quickly along the raster lines | |
| 63 * rather than along lines at that angle. The score is | |
| 64 * computed from these line sums by taking the square of | |
| 65 * the DIFFERENCE between adjacent line sums, summed over | |
| 66 * all lines. The skew angle is then found as the angle | |
| 67 * that maximizes the score. The actual computation for | |
| 68 * any sheared image is done in the function | |
| 69 * pixFindDifferentialSquareSum(). | |
| 70 * | |
| 71 * The search for the angle that maximizes this score is | |
| 72 * most efficiently performed by first sweeping coarsely | |
| 73 * over angles, using a significantly reduced image (say, 4x | |
| 74 * reduction), to find the approximate maximum within a half | |
| 75 * degree or so, and then doing an interval-halving binary | |
| 76 * search at higher resolution to get the skew angle to | |
| 77 * within 1/20 degree or better. | |
| 78 * | |
| 79 * The differential signal is used (rather than just using | |
| 80 * that variance of line sums) because it rejects the | |
| 81 * background noise due to total number of black pixels, | |
| 82 * and has maximum contributions from the baselines and | |
| 83 * x-height lines of text when the textlines are aligned | |
| 84 * with the raster lines. It also works well in multicolumn | |
| 85 * pages where the textlines do not line up across columns. | |
| 86 * | |
| 87 * The method is fast, accurate to within an angle (in radians) | |
| 88 * of approximately the inverse width in pixels of the image, | |
| 89 * and will work on a surprisingly small amount of text data | |
| 90 * (just a couple of text lines). Consequently, it can | |
| 91 * also be used to find local skew if the skew were to vary | |
| 92 * significantly over the page. Local skew determination | |
| 93 * is not very important except for locating lines of | |
| 94 * handwritten text that may be mixed with printed text. | |
| 95 * </pre> | |
| 96 */ | |
| 97 | |
| 98 #ifdef HAVE_CONFIG_H | |
| 99 #include <config_auto.h> | |
| 100 #endif /* HAVE_CONFIG_H */ | |
| 101 | |
| 102 #include <math.h> | |
| 103 #include "allheaders.h" | |
| 104 | |
| 105 /* Default sweep angle parameters for pixFindSkew() */ | |
| 106 static const l_float32 DefaultSweepRange = 7.0; /* degrees */ | |
| 107 static const l_float32 DefaultSweepDelta = 1.0; /* degrees */ | |
| 108 | |
| 109 /* Default final angle difference parameter for binary | |
| 110 * search in pixFindSkew(). The expected accuracy is | |
| 111 * not better than the inverse image width in pixels, | |
| 112 * say, 1/2000 radians, or about 0.03 degrees. */ | |
| 113 static const l_float32 DefaultMinbsDelta = 0.01f; /* degrees */ | |
| 114 | |
| 115 /* Default scale factors for pixFindSkew() */ | |
| 116 static const l_int32 DefaultSweepReduction = 4; /* sweep part; 4 is good */ | |
| 117 static const l_int32 DefaultBsReduction = 2; /* binary search part */ | |
| 118 | |
| 119 /* Minimum angle for deskewing in pixDeskew() */ | |
| 120 static const l_float32 MinDeskewAngle = 0.1f; /* degree */ | |
| 121 | |
| 122 /* Minimum allowed confidence (ratio) for deskewing in pixDeskew() */ | |
| 123 static const l_float32 MinAllowedConfidence = 3.0; | |
| 124 | |
| 125 /* Minimum allowed maxscore to give nonzero confidence */ | |
| 126 static const l_int32 MinValidMaxscore = 10000; | |
| 127 | |
| 128 /* Constant setting threshold for minimum allowed minscore | |
| 129 * to give nonzero confidence; multiply this constant by | |
| 130 * (height * width^2) */ | |
| 131 static const l_float32 MinscoreThreshFactor = 0.000002f; | |
| 132 | |
| 133 /* Default binarization threshold value */ | |
| 134 static const l_int32 DefaultBinaryThreshold = 130; | |
| 135 | |
| 136 #ifndef NO_CONSOLE_IO | |
| 137 #define DEBUG_PRINT_SCORES 0 | |
| 138 #define DEBUG_PRINT_SWEEP 0 | |
| 139 #define DEBUG_PRINT_BINARY 0 | |
| 140 #define DEBUG_PRINT_ORTH 0 | |
| 141 #define DEBUG_THRESHOLD 0 | |
| 142 #define DEBUG_PLOT_SCORES 0 /* requires the gnuplot executable */ | |
| 143 #endif /* ~NO_CONSOLE_IO */ | |
| 144 | |
| 145 | |
| 146 | |
| 147 /*-----------------------------------------------------------------------* | |
| 148 * Top-level deskew interfaces * | |
| 149 *-----------------------------------------------------------------------*/ | |
| 150 /*! | |
| 151 * \brief pixDeskewBoth() | |
| 152 * | |
| 153 * \param[in] pixs any depth | |
| 154 * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; | |
| 155 * use 0 for default | |
| 156 * \return pixd deskewed pix, or NULL on error | |
| 157 * | |
| 158 * <pre> | |
| 159 * Notes: | |
| 160 * (1) This binarizes if necessary and does both horizontal | |
| 161 * and vertical deskewing, using the default parameters in | |
| 162 * the underlying pixDeskew(). See usage there. | |
| 163 * </pre> | |
| 164 */ | |
| 165 PIX * | |
| 166 pixDeskewBoth(PIX *pixs, | |
| 167 l_int32 redsearch) | |
| 168 { | |
| 169 PIX *pix1, *pix2, *pix3, *pix4; | |
| 170 | |
| 171 if (!pixs) | |
| 172 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 173 if (redsearch == 0) | |
| 174 redsearch = DefaultBsReduction; | |
| 175 else if (redsearch != 1 && redsearch != 2 && redsearch != 4) | |
| 176 return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); | |
| 177 | |
| 178 pix1 = pixDeskew(pixs, redsearch); | |
| 179 pix2 = pixRotate90(pix1, 1); | |
| 180 pix3 = pixDeskew(pix2, redsearch); | |
| 181 pix4 = pixRotate90(pix3, -1); | |
| 182 pixDestroy(&pix1); | |
| 183 pixDestroy(&pix2); | |
| 184 pixDestroy(&pix3); | |
| 185 return pix4; | |
| 186 } | |
| 187 | |
| 188 | |
| 189 /*! | |
| 190 * \brief pixDeskew() | |
| 191 * | |
| 192 * \param[in] pixs any depth | |
| 193 * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; | |
| 194 * use 0 for default | |
| 195 * \return pixd deskewed pix, or NULL on error | |
| 196 * | |
| 197 * <pre> | |
| 198 * Notes: | |
| 199 * (1) This binarizes if necessary and finds the skew angle. If the | |
| 200 * angle is large enough and there is sufficient confidence, | |
| 201 * it returns a deskewed image; otherwise, it returns a clone. | |
| 202 * (2) Typical values at 300 ppi for %redsearch are 2 and 4. | |
| 203 * At 75 ppi, one should use %redsearch = 1. | |
| 204 * </pre> | |
| 205 */ | |
| 206 PIX * | |
| 207 pixDeskew(PIX *pixs, | |
| 208 l_int32 redsearch) | |
| 209 { | |
| 210 if (!pixs) | |
| 211 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 212 if (redsearch == 0) | |
| 213 redsearch = DefaultBsReduction; | |
| 214 else if (redsearch != 1 && redsearch != 2 && redsearch != 4) | |
| 215 return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); | |
| 216 | |
| 217 return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, NULL, NULL); | |
| 218 } | |
| 219 | |
| 220 | |
| 221 /*! | |
| 222 * \brief pixFindSkewAndDeskew() | |
| 223 * | |
| 224 * \param[in] pixs any depth | |
| 225 * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; | |
| 226 * use 0 for default | |
| 227 * \param[out] pangle [optional] angle required to deskew, | |
| 228 * in degrees; use NULL to skip | |
| 229 * \param[out] pconf [optional] conf value is ratio | |
| 230 * of max/min scores; use NULL to skip | |
| 231 * \return pixd deskewed pix, or NULL on error | |
| 232 * | |
| 233 * <pre> | |
| 234 * Notes: | |
| 235 * (1) This binarizes if necessary and finds the skew angle. If the | |
| 236 * angle is large enough and there is sufficient confidence, | |
| 237 * it returns a deskewed image; otherwise, it returns a clone. | |
| 238 * </pre> | |
| 239 */ | |
| 240 PIX * | |
| 241 pixFindSkewAndDeskew(PIX *pixs, | |
| 242 l_int32 redsearch, | |
| 243 l_float32 *pangle, | |
| 244 l_float32 *pconf) | |
| 245 { | |
| 246 if (!pixs) | |
| 247 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 248 if (redsearch == 0) | |
| 249 redsearch = DefaultBsReduction; | |
| 250 else if (redsearch != 1 && redsearch != 2 && redsearch != 4) | |
| 251 return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); | |
| 252 | |
| 253 return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, pangle, pconf); | |
| 254 } | |
| 255 | |
| 256 | |
| 257 /*! | |
| 258 * \brief pixDeskewGeneral() | |
| 259 * | |
| 260 * \param[in] pixs any depth | |
| 261 * \param[in] redsweep for linear search: reduction factor = 1, 2 or 4; | |
| 262 * use 0 for default | |
| 263 * \param[in] sweeprange in degrees in each direction from 0; | |
| 264 * use 0.0 for default | |
| 265 * \param[in] sweepdelta in degrees; use 0.0 for default | |
| 266 * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; | |
| 267 * use 0 for default; | |
| 268 * \param[in] thresh for binarizing the image; use 0 for default | |
| 269 * \param[out] pangle [optional] angle required to deskew, | |
| 270 * in degrees; use NULL to skip | |
| 271 * \param[out] pconf [optional] conf value is ratio | |
| 272 * of max/min scores; use NULL to skip | |
| 273 * \return pixd deskewed pix, or NULL on error | |
| 274 * | |
| 275 * <pre> | |
| 276 * Notes: | |
| 277 * (1) This binarizes if necessary and finds the skew angle. If the | |
| 278 * angle is large enough and there is sufficient confidence, | |
| 279 * it returns a deskewed image; otherwise, it returns a clone. | |
| 280 * </pre> | |
| 281 */ | |
| 282 PIX * | |
| 283 pixDeskewGeneral(PIX *pixs, | |
| 284 l_int32 redsweep, | |
| 285 l_float32 sweeprange, | |
| 286 l_float32 sweepdelta, | |
| 287 l_int32 redsearch, | |
| 288 l_int32 thresh, | |
| 289 l_float32 *pangle, | |
| 290 l_float32 *pconf) | |
| 291 { | |
| 292 l_int32 ret, depth; | |
| 293 l_float32 angle, conf, deg2rad; | |
| 294 PIX *pixb, *pixd; | |
| 295 | |
| 296 if (pangle) *pangle = 0.0; | |
| 297 if (pconf) *pconf = 0.0; | |
| 298 if (!pixs) | |
| 299 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL); | |
| 300 if (redsweep == 0) | |
| 301 redsweep = DefaultSweepReduction; | |
| 302 else if (redsweep != 1 && redsweep != 2 && redsweep != 4) | |
| 303 return (PIX *)ERROR_PTR("redsweep not in {1,2,4}", __func__, NULL); | |
| 304 if (sweeprange == 0.0) | |
| 305 sweeprange = DefaultSweepRange; | |
| 306 if (sweepdelta == 0.0) | |
| 307 sweepdelta = DefaultSweepDelta; | |
| 308 if (redsearch == 0) | |
| 309 redsearch = DefaultBsReduction; | |
| 310 else if (redsearch != 1 && redsearch != 2 && redsearch != 4) | |
| 311 return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", __func__, NULL); | |
| 312 if (thresh == 0) | |
| 313 thresh = DefaultBinaryThreshold; | |
| 314 | |
| 315 deg2rad = 3.1415926535f / 180.f; | |
| 316 | |
| 317 /* Binarize if necessary */ | |
| 318 depth = pixGetDepth(pixs); | |
| 319 if (depth == 1) | |
| 320 pixb = pixClone(pixs); | |
| 321 else | |
| 322 pixb = pixConvertTo1(pixs, thresh); | |
| 323 | |
| 324 /* Use the 1 bpp image to find the skew */ | |
| 325 ret = pixFindSkewSweepAndSearch(pixb, &angle, &conf, redsweep, redsearch, | |
| 326 sweeprange, sweepdelta, | |
| 327 DefaultMinbsDelta); | |
| 328 pixDestroy(&pixb); | |
| 329 if (pangle) *pangle = angle; | |
| 330 if (pconf) *pconf = conf; | |
| 331 if (ret) | |
| 332 return pixClone(pixs); | |
| 333 | |
| 334 if (L_ABS(angle) < MinDeskewAngle || conf < MinAllowedConfidence) | |
| 335 return pixClone(pixs); | |
| 336 | |
| 337 if ((pixd = pixRotate(pixs, deg2rad * angle, L_ROTATE_AREA_MAP, | |
| 338 L_BRING_IN_WHITE, 0, 0)) == NULL) | |
| 339 return pixClone(pixs); | |
| 340 else | |
| 341 return pixd; | |
| 342 } | |
| 343 | |
| 344 | |
| 345 /*-----------------------------------------------------------------------* | |
| 346 * Simple top-level angle-finding interface * | |
| 347 *-----------------------------------------------------------------------*/ | |
| 348 /*! | |
| 349 * \brief pixFindSkew() | |
| 350 * | |
| 351 * \param[in] pixs 1 bpp | |
| 352 * \param[out] pangle angle required to deskew, in degrees | |
| 353 * \param[out] pconf confidence value is ratio max/min scores | |
| 354 * \return 0 if OK, 1 on error or if angle measurement not valid | |
| 355 * | |
| 356 * <pre> | |
| 357 * Notes: | |
| 358 * (1) This is a simple high-level interface, that uses default | |
| 359 * values of the parameters for reasonable speed and accuracy. | |
| 360 * (2) The angle returned is the negative of the skew angle of | |
| 361 * the image. It is the angle required for deskew. | |
| 362 * Clockwise rotations are positive angles. | |
| 363 * </pre> | |
| 364 */ | |
| 365 l_ok | |
| 366 pixFindSkew(PIX *pixs, | |
| 367 l_float32 *pangle, | |
| 368 l_float32 *pconf) | |
| 369 { | |
| 370 if (pangle) *pangle = 0.0; | |
| 371 if (pconf) *pconf = 0.0; | |
| 372 if (!pangle || !pconf) | |
| 373 return ERROR_INT("&angle and/or &conf not defined", __func__, 1); | |
| 374 if (!pixs) | |
| 375 return ERROR_INT("pixs not defined", __func__, 1); | |
| 376 if (pixGetDepth(pixs) != 1) | |
| 377 return ERROR_INT("pixs not 1 bpp", __func__, 1); | |
| 378 | |
| 379 return pixFindSkewSweepAndSearch(pixs, pangle, pconf, | |
| 380 DefaultSweepReduction, | |
| 381 DefaultBsReduction, | |
| 382 DefaultSweepRange, | |
| 383 DefaultSweepDelta, | |
| 384 DefaultMinbsDelta); | |
| 385 } | |
| 386 | |
| 387 | |
| 388 /*-----------------------------------------------------------------------* | |
| 389 * Basic angle-finding functions * | |
| 390 *-----------------------------------------------------------------------*/ | |
| 391 /*! | |
| 392 * \brief pixFindSkewSweep() | |
| 393 * | |
| 394 * \param[in] pixs 1 bpp | |
| 395 * \param[out] pangle angle required to deskew, in degrees | |
| 396 * \param[in] reduction factor = 1, 2, 4 or 8 | |
| 397 * \param[in] sweeprange half the full range; assumed about 0; in degrees | |
| 398 * \param[in] sweepdelta angle increment of sweep; in degrees | |
| 399 * \return 0 if OK, 1 on error or if angle measurement not valid | |
| 400 * | |
| 401 * <pre> | |
| 402 * Notes: | |
| 403 * (1) This examines the 'score' for skew angles with equal intervals. | |
| 404 * (2) Caller must check the return value for validity of the result. | |
| 405 * </pre> | |
| 406 */ | |
| 407 l_ok | |
| 408 pixFindSkewSweep(PIX *pixs, | |
| 409 l_float32 *pangle, | |
| 410 l_int32 reduction, | |
| 411 l_float32 sweeprange, | |
| 412 l_float32 sweepdelta) | |
| 413 { | |
| 414 l_int32 ret, bzero, i, nangles; | |
| 415 l_float32 deg2rad, theta; | |
| 416 l_float32 sum, maxscore, maxangle; | |
| 417 NUMA *natheta, *nascore; | |
| 418 PIX *pix, *pixt; | |
| 419 | |
| 420 if (!pangle) | |
| 421 return ERROR_INT("&angle not defined", __func__, 1); | |
| 422 *pangle = 0.0; | |
| 423 if (!pixs) | |
| 424 return ERROR_INT("pixs not defined", __func__, 1); | |
| 425 if (pixGetDepth(pixs) != 1) | |
| 426 return ERROR_INT("pixs not 1 bpp", __func__, 1); | |
| 427 if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) | |
| 428 return ERROR_INT("reduction must be in {1,2,4,8}", __func__, 1); | |
| 429 | |
| 430 deg2rad = 3.1415926535f / 180.f; | |
| 431 ret = 0; | |
| 432 | |
| 433 /* Generate reduced image, if requested */ | |
| 434 if (reduction == 1) | |
| 435 pix = pixClone(pixs); | |
| 436 else if (reduction == 2) | |
| 437 pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); | |
| 438 else if (reduction == 4) | |
| 439 pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); | |
| 440 else /* reduction == 8 */ | |
| 441 pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); | |
| 442 | |
| 443 pixZero(pix, &bzero); | |
| 444 if (bzero) { | |
| 445 pixDestroy(&pix); | |
| 446 return 1; | |
| 447 } | |
| 448 | |
| 449 nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); | |
| 450 natheta = numaCreate(nangles); | |
| 451 nascore = numaCreate(nangles); | |
| 452 pixt = pixCreateTemplate(pix); | |
| 453 | |
| 454 if (!pix || !pixt) { | |
| 455 ret = ERROR_INT("pix and pixt not both made", __func__, 1); | |
| 456 goto cleanup; | |
| 457 } | |
| 458 if (!natheta || !nascore) { | |
| 459 ret = ERROR_INT("natheta and nascore not both made", __func__, 1); | |
| 460 goto cleanup; | |
| 461 } | |
| 462 | |
| 463 for (i = 0; i < nangles; i++) { | |
| 464 theta = -sweeprange + i * sweepdelta; /* degrees */ | |
| 465 | |
| 466 /* Shear pix about the UL corner and put the result in pixt */ | |
| 467 pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE); | |
| 468 | |
| 469 /* Get score */ | |
| 470 pixFindDifferentialSquareSum(pixt, &sum); | |
| 471 | |
| 472 #if DEBUG_PRINT_SCORES | |
| 473 L_INFO("sum(%7.2f) = %7.0f\n", __func__, theta, sum); | |
| 474 #endif /* DEBUG_PRINT_SCORES */ | |
| 475 | |
| 476 /* Save the result in the output arrays */ | |
| 477 numaAddNumber(nascore, sum); | |
| 478 numaAddNumber(natheta, theta); | |
| 479 } | |
| 480 | |
| 481 /* Find the location of the maximum (i.e., the skew angle) | |
| 482 * by fitting the largest data point and its two neighbors | |
| 483 * to a quadratic, using lagrangian interpolation. */ | |
| 484 numaFitMax(nascore, &maxscore, natheta, &maxangle); | |
| 485 *pangle = maxangle; | |
| 486 | |
| 487 #if DEBUG_PRINT_SWEEP | |
| 488 L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", __func__, | |
| 489 maxangle, maxscore); | |
| 490 #endif /* DEBUG_PRINT_SWEEP */ | |
| 491 | |
| 492 #if DEBUG_PLOT_SCORES | |
| 493 /* Plot the result -- the scores versus rotation angle -- | |
| 494 * using gnuplot with GPLOT_LINES (lines connecting data points). | |
| 495 * The GPLOT data structure is first created, with the | |
| 496 * appropriate data incorporated from the two input NUMAs, | |
| 497 * and then the function gplotMakeOutput() uses gnuplot to | |
| 498 * generate the output plot. This can be either a .png file | |
| 499 * or a .ps file, depending on whether you use GPLOT_PNG | |
| 500 * or GPLOT_PS. */ | |
| 501 {GPLOT *gplot; | |
| 502 gplot = gplotCreate("sweep_output", GPLOT_PNG, | |
| 503 "Sweep. Variance of difference of ON pixels vs. angle", | |
| 504 "angle (deg)", "score"); | |
| 505 gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); | |
| 506 gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); | |
| 507 gplotMakeOutput(gplot); | |
| 508 gplotDestroy(&gplot); | |
| 509 } | |
| 510 #endif /* DEBUG_PLOT_SCORES */ | |
| 511 | |
| 512 cleanup: | |
| 513 pixDestroy(&pix); | |
| 514 pixDestroy(&pixt); | |
| 515 numaDestroy(&nascore); | |
| 516 numaDestroy(&natheta); | |
| 517 return ret; | |
| 518 } | |
| 519 | |
| 520 | |
| 521 /*! | |
| 522 * \brief pixFindSkewSweepAndSearch() | |
| 523 * | |
| 524 * \param[in] pixs 1 bpp | |
| 525 * \param[out] pangle angle required to deskew; in degrees | |
| 526 * \param[out] pconf confidence given by ratio of max/min score | |
| 527 * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 | |
| 528 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; | |
| 529 * and must not exceed redsweep | |
| 530 * \param[in] sweeprange half the full range, assumed about 0; in degrees | |
| 531 * \param[in] sweepdelta angle increment of sweep; in degrees | |
| 532 * \param[in] minbsdelta min binary search increment angle; in degrees | |
| 533 * \return 0 if OK, 1 on error or if angle measurement not valid | |
| 534 * | |
| 535 * <pre> | |
| 536 * Notes: | |
| 537 * (1) This finds the skew angle, doing first a sweep through a set | |
| 538 * of equal angles, and then doing a binary search until | |
| 539 * convergence. | |
| 540 * (2) Caller must check the return value for validity of the result. | |
| 541 * (3) In computing the differential line sum variance score, we sum | |
| 542 * the result over scanlines, but we always skip: | |
| 543 * ~ at least one scanline | |
| 544 * ~ not more than 10% of the image height | |
| 545 * ~ not more than 5% of the image width | |
| 546 * (4) See also notes in pixFindSkewSweepAndSearchScore() | |
| 547 * </pre> | |
| 548 */ | |
| 549 l_ok | |
| 550 pixFindSkewSweepAndSearch(PIX *pixs, | |
| 551 l_float32 *pangle, | |
| 552 l_float32 *pconf, | |
| 553 l_int32 redsweep, | |
| 554 l_int32 redsearch, | |
| 555 l_float32 sweeprange, | |
| 556 l_float32 sweepdelta, | |
| 557 l_float32 minbsdelta) | |
| 558 { | |
| 559 return pixFindSkewSweepAndSearchScore(pixs, pangle, pconf, NULL, | |
| 560 redsweep, redsearch, 0.0, sweeprange, | |
| 561 sweepdelta, minbsdelta); | |
| 562 } | |
| 563 | |
| 564 | |
| 565 /*! | |
| 566 * \brief pixFindSkewSweepAndSearchScore() | |
| 567 * | |
| 568 * \param[in] pixs 1 bpp | |
| 569 * \param[out] pangle angle required to deskew; in degrees | |
| 570 * \param[out] pconf confidence given by ratio of max/min score | |
| 571 * \param[out] pendscore [optional] max score; use NULL to ignore | |
| 572 * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 | |
| 573 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; | |
| 574 * and must not exceed redsweep | |
| 575 * \param[in] sweepcenter angle about which sweep is performed; in degrees | |
| 576 * \param[in] sweeprange half the full range, taken about sweepcenter; | |
| 577 * in degrees | |
| 578 * \param[in] sweepdelta angle increment of sweep; in degrees | |
| 579 * \param[in] minbsdelta min binary search increment angle; in degrees | |
| 580 * \return 0 if OK, 1 on error or if angle measurement not valid | |
| 581 * | |
| 582 * <pre> | |
| 583 * Notes: | |
| 584 * (1) This finds the skew angle, doing first a sweep through a set | |
| 585 * of equal angles, and then doing a binary search until convergence. | |
| 586 * (2) There are two built-in constants that determine if the | |
| 587 * returned confidence is nonzero: | |
| 588 * ~ MinValidMaxscore (minimum allowed maxscore) | |
| 589 * ~ MinscoreThreshFactor (determines minimum allowed | |
| 590 * minscore, by multiplying by (height * width^2) | |
| 591 * If either of these conditions is not satisfied, the returned | |
| 592 * confidence value will be zero. The maxscore is optionally | |
| 593 * returned in this function to allow evaluation of the | |
| 594 * resulting angle by a method that is independent of the | |
| 595 * returned confidence value. | |
| 596 * (3) The larger the confidence value, the greater the probability | |
| 597 * that the proper alignment is given by the angle that maximizes | |
| 598 * variance. It should be compared to a threshold, which depends | |
| 599 * on the application. Values between 3.0 and 6.0 are common. | |
| 600 * (4) By default, the shear is about the UL corner. | |
| 601 * </pre> | |
| 602 */ | |
| 603 l_ok | |
| 604 pixFindSkewSweepAndSearchScore(PIX *pixs, | |
| 605 l_float32 *pangle, | |
| 606 l_float32 *pconf, | |
| 607 l_float32 *pendscore, | |
| 608 l_int32 redsweep, | |
| 609 l_int32 redsearch, | |
| 610 l_float32 sweepcenter, | |
| 611 l_float32 sweeprange, | |
| 612 l_float32 sweepdelta, | |
| 613 l_float32 minbsdelta) | |
| 614 { | |
| 615 return pixFindSkewSweepAndSearchScorePivot(pixs, pangle, pconf, pendscore, | |
| 616 redsweep, redsearch, 0.0, | |
| 617 sweeprange, sweepdelta, | |
| 618 minbsdelta, | |
| 619 L_SHEAR_ABOUT_CORNER); | |
| 620 } | |
| 621 | |
| 622 | |
| 623 /*! | |
| 624 * \brief pixFindSkewSweepAndSearchScorePivot() | |
| 625 * | |
| 626 * \param[in] pixs 1 bpp | |
| 627 * \param[out] pangle angle required to deskew; in degrees | |
| 628 * \param[out] pconf confidence given by ratio of max/min score | |
| 629 * \param[out] pendscore [optional] max score; use NULL to ignore | |
| 630 * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 | |
| 631 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; | |
| 632 * and must not exceed redsweep | |
| 633 * \param[in] sweepcenter angle about which sweep is performed; in degrees | |
| 634 * \param[in] sweeprange half the full range, taken about sweepcenter; | |
| 635 * in degrees | |
| 636 * \param[in] sweepdelta angle increment of sweep; in degrees | |
| 637 * \param[in] minbsdelta min binary search increment angle; in degrees | |
| 638 * \param[in] pivot L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER | |
| 639 * \return 0 if OK, 1 on error or if angle measurement not valid | |
| 640 * | |
| 641 * <pre> | |
| 642 * Notes: | |
| 643 * (1) See notes in pixFindSkewSweepAndSearchScore(). | |
| 644 * (2) This allows choice of shear pivoting from either the UL corner | |
| 645 * or the center. For small angles, the ability to discriminate | |
| 646 * angles is better with shearing from the UL corner. However, | |
| 647 * for large angles (say, greater than 20 degrees), it is better | |
| 648 * to shear about the center because a shear from the UL corner | |
| 649 * loses too much of the image. | |
| 650 * </pre> | |
| 651 */ | |
| 652 l_ok | |
| 653 pixFindSkewSweepAndSearchScorePivot(PIX *pixs, | |
| 654 l_float32 *pangle, | |
| 655 l_float32 *pconf, | |
| 656 l_float32 *pendscore, | |
| 657 l_int32 redsweep, | |
| 658 l_int32 redsearch, | |
| 659 l_float32 sweepcenter, | |
| 660 l_float32 sweeprange, | |
| 661 l_float32 sweepdelta, | |
| 662 l_float32 minbsdelta, | |
| 663 l_int32 pivot) | |
| 664 { | |
| 665 l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc; | |
| 666 l_int32 width, height; | |
| 667 l_float32 deg2rad, theta, delta; | |
| 668 l_float32 sum, maxscore, maxangle; | |
| 669 l_float32 centerangle, leftcenterangle, rightcenterangle; | |
| 670 l_float32 lefttemp, righttemp; | |
| 671 l_float32 bsearchscore[5]; | |
| 672 l_float32 minscore, minthresh; | |
| 673 l_float32 rangeleft; | |
| 674 NUMA *natheta, *nascore; | |
| 675 PIX *pixsw, *pixsch, *pixt1, *pixt2; | |
| 676 | |
| 677 if (pendscore) *pendscore = 0.0; | |
| 678 if (pangle) *pangle = 0.0; | |
| 679 if (pconf) *pconf = 0.0; | |
| 680 if (!pangle || !pconf) | |
| 681 return ERROR_INT("&angle and/or &conf not defined", __func__, 1); | |
| 682 if (!pixs || pixGetDepth(pixs) != 1) | |
| 683 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 684 if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8) | |
| 685 return ERROR_INT("redsweep must be in {1,2,4,8}", __func__, 1); | |
| 686 if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8) | |
| 687 return ERROR_INT("redsearch must be in {1,2,4,8}", __func__, 1); | |
| 688 if (redsearch > redsweep) | |
| 689 return ERROR_INT("redsearch must not exceed redsweep", __func__, 1); | |
| 690 if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER) | |
| 691 return ERROR_INT("invalid pivot", __func__, 1); | |
| 692 | |
| 693 deg2rad = 3.1415926535f / 180.f; | |
| 694 ret = 0; | |
| 695 | |
| 696 /* Generate reduced image for binary search, if requested */ | |
| 697 if (redsearch == 1) | |
| 698 pixsch = pixClone(pixs); | |
| 699 else if (redsearch == 2) | |
| 700 pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); | |
| 701 else if (redsearch == 4) | |
| 702 pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); | |
| 703 else /* redsearch == 8 */ | |
| 704 pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); | |
| 705 | |
| 706 pixZero(pixsch, &bzero); | |
| 707 if (bzero) { | |
| 708 pixDestroy(&pixsch); | |
| 709 return 1; | |
| 710 } | |
| 711 | |
| 712 /* Generate reduced image for sweep, if requested */ | |
| 713 ratio = redsweep / redsearch; | |
| 714 if (ratio == 1) { | |
| 715 pixsw = pixClone(pixsch); | |
| 716 } else { /* ratio > 1 */ | |
| 717 if (ratio == 2) | |
| 718 pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0); | |
| 719 else if (ratio == 4) | |
| 720 pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0); | |
| 721 else /* ratio == 8 */ | |
| 722 pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0); | |
| 723 } | |
| 724 | |
| 725 pixt1 = pixCreateTemplate(pixsw); | |
| 726 if (ratio == 1) | |
| 727 pixt2 = pixClone(pixt1); | |
| 728 else | |
| 729 pixt2 = pixCreateTemplate(pixsch); | |
| 730 | |
| 731 nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); | |
| 732 natheta = numaCreate(nangles); | |
| 733 nascore = numaCreate(nangles); | |
| 734 | |
| 735 if (!pixsch || !pixsw) { | |
| 736 ret = ERROR_INT("pixsch and pixsw not both made", __func__, 1); | |
| 737 goto cleanup; | |
| 738 } | |
| 739 if (!pixt1 || !pixt2) { | |
| 740 ret = ERROR_INT("pixt1 and pixt2 not both made", __func__, 1); | |
| 741 goto cleanup; | |
| 742 } | |
| 743 if (!natheta || !nascore) { | |
| 744 ret = ERROR_INT("natheta and nascore not both made", __func__, 1); | |
| 745 goto cleanup; | |
| 746 } | |
| 747 | |
| 748 /* Do sweep */ | |
| 749 rangeleft = sweepcenter - sweeprange; | |
| 750 for (i = 0; i < nangles; i++) { | |
| 751 theta = rangeleft + i * sweepdelta; /* degrees */ | |
| 752 | |
| 753 /* Shear pix and put the result in pixt1 */ | |
| 754 if (pivot == L_SHEAR_ABOUT_CORNER) | |
| 755 pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); | |
| 756 else | |
| 757 pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); | |
| 758 | |
| 759 /* Get score */ | |
| 760 pixFindDifferentialSquareSum(pixt1, &sum); | |
| 761 | |
| 762 #if DEBUG_PRINT_SCORES | |
| 763 L_INFO("sum(%7.2f) = %7.0f\n", __func__, theta, sum); | |
| 764 #endif /* DEBUG_PRINT_SCORES */ | |
| 765 | |
| 766 /* Save the result in the output arrays */ | |
| 767 numaAddNumber(nascore, sum); | |
| 768 numaAddNumber(natheta, theta); | |
| 769 } | |
| 770 | |
| 771 /* Find the largest of the set (maxscore at maxangle) */ | |
| 772 numaGetMax(nascore, &maxscore, &maxindex); | |
| 773 numaGetFValue(natheta, maxindex, &maxangle); | |
| 774 | |
| 775 #if DEBUG_PRINT_SWEEP | |
| 776 L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", __func__, | |
| 777 maxangle, maxscore); | |
| 778 #endif /* DEBUG_PRINT_SWEEP */ | |
| 779 | |
| 780 #if DEBUG_PLOT_SCORES | |
| 781 /* Plot the sweep result -- the scores versus rotation angle -- | |
| 782 * using gnuplot with GPLOT_LINES (lines connecting data points). */ | |
| 783 {GPLOT *gplot; | |
| 784 gplot = gplotCreate("sweep_output", GPLOT_PNG, | |
| 785 "Sweep. Variance of difference of ON pixels vs. angle", | |
| 786 "angle (deg)", "score"); | |
| 787 gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); | |
| 788 gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); | |
| 789 gplotMakeOutput(gplot); | |
| 790 gplotDestroy(&gplot); | |
| 791 } | |
| 792 #endif /* DEBUG_PLOT_SCORES */ | |
| 793 | |
| 794 /* Check if the max is at the end of the sweep. */ | |
| 795 n = numaGetCount(natheta); | |
| 796 if (maxindex == 0 || maxindex == n - 1) { | |
| 797 L_WARNING("max found at sweep edge\n", __func__); | |
| 798 goto cleanup; | |
| 799 } | |
| 800 | |
| 801 /* Empty the numas for re-use */ | |
| 802 numaEmpty(nascore); | |
| 803 numaEmpty(natheta); | |
| 804 | |
| 805 /* Do binary search to find skew angle. | |
| 806 * First, set up initial three points. */ | |
| 807 centerangle = maxangle; | |
| 808 if (pivot == L_SHEAR_ABOUT_CORNER) { | |
| 809 pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); | |
| 810 pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); | |
| 811 pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), | |
| 812 L_BRING_IN_WHITE); | |
| 813 pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); | |
| 814 pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), | |
| 815 L_BRING_IN_WHITE); | |
| 816 pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); | |
| 817 } else { | |
| 818 pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); | |
| 819 pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); | |
| 820 pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), | |
| 821 L_BRING_IN_WHITE); | |
| 822 pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); | |
| 823 pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), | |
| 824 L_BRING_IN_WHITE); | |
| 825 pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); | |
| 826 } | |
| 827 | |
| 828 numaAddNumber(nascore, bsearchscore[2]); | |
| 829 numaAddNumber(natheta, centerangle); | |
| 830 numaAddNumber(nascore, bsearchscore[0]); | |
| 831 numaAddNumber(natheta, centerangle - sweepdelta); | |
| 832 numaAddNumber(nascore, bsearchscore[4]); | |
| 833 numaAddNumber(natheta, centerangle + sweepdelta); | |
| 834 | |
| 835 /* Start the search */ | |
| 836 delta = 0.5f * sweepdelta; | |
| 837 while (delta >= minbsdelta) | |
| 838 { | |
| 839 /* Get the left intermediate score */ | |
| 840 leftcenterangle = centerangle - delta; | |
| 841 if (pivot == L_SHEAR_ABOUT_CORNER) | |
| 842 pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle, | |
| 843 L_BRING_IN_WHITE); | |
| 844 else | |
| 845 pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle, | |
| 846 L_BRING_IN_WHITE); | |
| 847 pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]); | |
| 848 numaAddNumber(nascore, bsearchscore[1]); | |
| 849 numaAddNumber(natheta, leftcenterangle); | |
| 850 | |
| 851 /* Get the right intermediate score */ | |
| 852 rightcenterangle = centerangle + delta; | |
| 853 if (pivot == L_SHEAR_ABOUT_CORNER) | |
| 854 pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle, | |
| 855 L_BRING_IN_WHITE); | |
| 856 else | |
| 857 pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle, | |
| 858 L_BRING_IN_WHITE); | |
| 859 pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]); | |
| 860 numaAddNumber(nascore, bsearchscore[3]); | |
| 861 numaAddNumber(natheta, rightcenterangle); | |
| 862 | |
| 863 /* Find the maximum of the five scores and its location. | |
| 864 * Note that the maximum must be in the center | |
| 865 * three values, not in the end two. */ | |
| 866 maxscore = bsearchscore[1]; | |
| 867 maxindex = 1; | |
| 868 for (i = 2; i < 4; i++) { | |
| 869 if (bsearchscore[i] > maxscore) { | |
| 870 maxscore = bsearchscore[i]; | |
| 871 maxindex = i; | |
| 872 } | |
| 873 } | |
| 874 | |
| 875 /* Set up score array to interpolate for the next iteration */ | |
| 876 lefttemp = bsearchscore[maxindex - 1]; | |
| 877 righttemp = bsearchscore[maxindex + 1]; | |
| 878 bsearchscore[2] = maxscore; | |
| 879 bsearchscore[0] = lefttemp; | |
| 880 bsearchscore[4] = righttemp; | |
| 881 | |
| 882 /* Get new center angle and delta for next iteration */ | |
| 883 centerangle = centerangle + delta * (maxindex - 2); | |
| 884 delta = 0.5f * delta; | |
| 885 } | |
| 886 *pangle = centerangle; | |
| 887 | |
| 888 #if DEBUG_PRINT_SCORES | |
| 889 L_INFO(" Binary search score = %7.3f\n", __func__, bsearchscore[2]); | |
| 890 #endif /* DEBUG_PRINT_SCORES */ | |
| 891 | |
| 892 if (pendscore) /* save if requested */ | |
| 893 *pendscore = bsearchscore[2]; | |
| 894 | |
| 895 /* Return the ratio of Max score over Min score | |
| 896 * as a confidence value. Don't trust if the Min score | |
| 897 * is too small, which can happen if the image is all black | |
| 898 * with only a few white pixels interspersed. In that case, | |
| 899 * we get a contribution from the top and bottom edges when | |
| 900 * vertically sheared, but this contribution becomes zero when | |
| 901 * the shear angle is zero. For zero shear angle, the only | |
| 902 * contribution will be from the white pixels. We expect that | |
| 903 * the signal goes as the product of the (height * width^2), | |
| 904 * so we compute a (hopefully) normalized minimum threshold as | |
| 905 * a function of these dimensions. */ | |
| 906 numaGetMin(nascore, &minscore, &minloc); | |
| 907 width = pixGetWidth(pixsch); | |
| 908 height = pixGetHeight(pixsch); | |
| 909 minthresh = MinscoreThreshFactor * width * width * height; | |
| 910 | |
| 911 #if DEBUG_THRESHOLD | |
| 912 L_INFO(" minthresh = %10.2f, minscore = %10.2f\n", __func__, | |
| 913 minthresh, minscore); | |
| 914 L_INFO(" maxscore = %10.2f\n", __func__, maxscore); | |
| 915 #endif /* DEBUG_THRESHOLD */ | |
| 916 | |
| 917 if (minscore > minthresh) | |
| 918 *pconf = maxscore / minscore; | |
| 919 else | |
| 920 *pconf = 0.0; | |
| 921 | |
| 922 /* Don't trust it if too close to the edge of the sweep | |
| 923 * range or if maxscore is small */ | |
| 924 if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) || | |
| 925 (centerangle < rangeleft + sweepdelta) || | |
| 926 (maxscore < MinValidMaxscore)) | |
| 927 *pconf = 0.0; | |
| 928 | |
| 929 #if DEBUG_PRINT_BINARY | |
| 930 lept_stderr("Binary search: angle = %7.3f, score ratio = %6.2f\n", | |
| 931 *pangle, *pconf); | |
| 932 lept_stderr(" max score = %8.0f\n", maxscore); | |
| 933 #endif /* DEBUG_PRINT_BINARY */ | |
| 934 | |
| 935 #if DEBUG_PLOT_SCORES | |
| 936 /* Plot the result -- the scores versus rotation angle -- | |
| 937 * using gnuplot with GPLOT_POINTS. Because the data | |
| 938 * points are not ordered by theta (increasing or decreasing), | |
| 939 * using GPLOT_LINES would be confusing! */ | |
| 940 {GPLOT *gplot; | |
| 941 gplot = gplotCreate("search_output", GPLOT_PNG, | |
| 942 "Binary search. Variance of difference of ON pixels vs. angle", | |
| 943 "angle (deg)", "score"); | |
| 944 gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1"); | |
| 945 gplotMakeOutput(gplot); | |
| 946 gplotDestroy(&gplot); | |
| 947 } | |
| 948 #endif /* DEBUG_PLOT_SCORES */ | |
| 949 | |
| 950 cleanup: | |
| 951 pixDestroy(&pixsw); | |
| 952 pixDestroy(&pixsch); | |
| 953 pixDestroy(&pixt1); | |
| 954 pixDestroy(&pixt2); | |
| 955 numaDestroy(&nascore); | |
| 956 numaDestroy(&natheta); | |
| 957 return ret; | |
| 958 } | |
| 959 | |
| 960 | |
| 961 /*---------------------------------------------------------------------* | |
| 962 * Search over arbitrary range of angles in orthogonal directions * | |
| 963 *---------------------------------------------------------------------*/ | |
| 964 /* | |
| 965 * \brief pixFindSkewOrthogonalRange() | |
| 966 * | |
| 967 * \param[in] pixs 1 bpp | |
| 968 * \param[out] pangle angle required to deskew; in degrees cw | |
| 969 * \param[out] pconf confidence given by ratio of max/min score | |
| 970 * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 | |
| 971 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; | |
| 972 * and must not exceed redsweep | |
| 973 * \param[in] sweeprange half the full range in each orthogonal | |
| 974 * direction, taken about 0, in degrees | |
| 975 * \param[in] sweepdelta angle increment of sweep; in degrees | |
| 976 * \param[in] minbsdelta min binary search increment angle; in degrees | |
| 977 * \param[in] confprior amount by which confidence of 90 degree rotated | |
| 978 * result is reduced when comparing with unrotated | |
| 979 * confidence value | |
| 980 * \return 0 if OK, 1 on error or if angle measurement not valid | |
| 981 * | |
| 982 * <pre> | |
| 983 * Notes: | |
| 984 * (1) This searches for the skew angle, first in the range | |
| 985 * [-sweeprange, sweeprange], and then in | |
| 986 * [90 - sweeprange, 90 + sweeprange], with angles measured | |
| 987 * clockwise. For exploring the full range of possibilities, | |
| 988 * suggest using sweeprange = 47.0 degrees, giving some overlap | |
| 989 * at 45 and 135 degrees. From these results, and discounting | |
| 990 * the the second confidence by %confprior, it selects the | |
| 991 * angle for maximal differential variance. If the angle | |
| 992 * is larger than pi/4, the angle found after 90 degree rotation | |
| 993 * is selected. | |
| 994 * (2) The larger the confidence value, the greater the probability | |
| 995 * that the proper alignment is given by the angle that maximizes | |
| 996 * variance. It should be compared to a threshold, which depends | |
| 997 * on the application. Values between 3.0 and 6.0 are common. | |
| 998 * (3) Allowing for both portrait and landscape searches is more | |
| 999 * difficult, because if the signal from the text lines is weak, | |
| 1000 * a signal from vertical rules can be larger! | |
| 1001 * The most difficult documents to deskew have some or all of: | |
| 1002 * (a) Multiple columns, not aligned | |
| 1003 * (b) Black lines along the vertical edges | |
| 1004 * (c) Text from two pages, and at different angles | |
| 1005 * Rule of thumb for resolution: | |
| 1006 * (a) If the margins are clean, you can work at 75 ppi, | |
| 1007 * although 100 ppi is safer. | |
| 1008 * (b) If there are vertical lines in the margins, do not | |
| 1009 * work below 150 ppi. The signal from the text lines must | |
| 1010 * exceed that from the margin lines. | |
| 1011 * (4) Choosing the %confprior parameter depends on knowing something | |
| 1012 * about the source of image. However, we're not using | |
| 1013 * real probabilities here, so its use is qualitative. | |
| 1014 * If landscape and portrait are equally likely, use | |
| 1015 * %confprior = 0.0. If the likelihood of portrait (non-rotated) | |
| 1016 * is 100 times higher than that of landscape, we want to reduce | |
| 1017 * the chance that we rotate to landscape in a situation where | |
| 1018 * the landscape signal is accidentally larger than the | |
| 1019 * portrait signal. To do this use a positive value of | |
| 1020 * %confprior; say 1.5. | |
| 1021 * </pre> | |
| 1022 */ | |
| 1023 l_int32 | |
| 1024 pixFindSkewOrthogonalRange(PIX *pixs, | |
| 1025 l_float32 *pangle, | |
| 1026 l_float32 *pconf, | |
| 1027 l_int32 redsweep, | |
| 1028 l_int32 redsearch, | |
| 1029 l_float32 sweeprange, | |
| 1030 l_float32 sweepdelta, | |
| 1031 l_float32 minbsdelta, | |
| 1032 l_float32 confprior) | |
| 1033 { | |
| 1034 l_float32 angle1, conf1, score1, angle2, conf2, score2; | |
| 1035 PIX *pixr; | |
| 1036 | |
| 1037 if (pangle) *pangle = 0.0; | |
| 1038 if (pconf) *pconf = 0.0; | |
| 1039 if (!pangle || !pconf) | |
| 1040 return ERROR_INT("&angle and/or &conf not defined", __func__, 1); | |
| 1041 if (!pixs || pixGetDepth(pixs) != 1) | |
| 1042 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 1043 | |
| 1044 pixFindSkewSweepAndSearchScorePivot(pixs, &angle1, &conf1, &score1, | |
| 1045 redsweep, redsearch, 0.0, | |
| 1046 sweeprange, sweepdelta, minbsdelta, | |
| 1047 L_SHEAR_ABOUT_CORNER); | |
| 1048 pixr = pixRotateOrth(pixs, 1); | |
| 1049 pixFindSkewSweepAndSearchScorePivot(pixr, &angle2, &conf2, &score2, | |
| 1050 redsweep, redsearch, 0.0, | |
| 1051 sweeprange, sweepdelta, minbsdelta, | |
| 1052 L_SHEAR_ABOUT_CORNER); | |
| 1053 pixDestroy(&pixr); | |
| 1054 | |
| 1055 if (conf1 > conf2 - confprior) { | |
| 1056 *pangle = angle1; | |
| 1057 *pconf = conf1; | |
| 1058 } else { | |
| 1059 *pangle = -90.0f + angle2; | |
| 1060 *pconf = conf2; | |
| 1061 } | |
| 1062 | |
| 1063 #if DEBUG_PRINT_ORTH | |
| 1064 lept_stderr(" About 0: angle1 = %7.3f, conf1 = %7.3f, score1 = %f\n", | |
| 1065 angle1, conf1, score1); | |
| 1066 lept_stderr(" About 90: angle2 = %7.3f, conf2 = %7.3f, score2 = %f\n", | |
| 1067 angle2, conf2, score2); | |
| 1068 lept_stderr(" Final: angle = %7.3f, conf = %7.3f\n", *pangle, *pconf); | |
| 1069 #endif /* DEBUG_PRINT_ORTH */ | |
| 1070 | |
| 1071 return 0; | |
| 1072 } | |
| 1073 | |
| 1074 | |
| 1075 | |
| 1076 /*----------------------------------------------------------------* | |
| 1077 * Differential square sum function * | |
| 1078 *----------------------------------------------------------------*/ | |
| 1079 /*! | |
| 1080 * \brief pixFindDifferentialSquareSum() | |
| 1081 * | |
| 1082 * \param[in] pixs | |
| 1083 * \param[out] psum result | |
| 1084 * \return 0 if OK, 1 on error | |
| 1085 * | |
| 1086 * <pre> | |
| 1087 * Notes: | |
| 1088 * (1) At the top and bottom, we skip: | |
| 1089 * ~ at least one scanline | |
| 1090 * ~ not more than 10% of the image height | |
| 1091 * ~ not more than 5% of the image width | |
| 1092 * </pre> | |
| 1093 */ | |
| 1094 l_ok | |
| 1095 pixFindDifferentialSquareSum(PIX *pixs, | |
| 1096 l_float32 *psum) | |
| 1097 { | |
| 1098 l_int32 i, n; | |
| 1099 l_int32 w, h, skiph, skip, nskip; | |
| 1100 l_float32 val1, val2, diff, sum; | |
| 1101 NUMA *na; | |
| 1102 | |
| 1103 if (!psum) | |
| 1104 return ERROR_INT("&sum not defined", __func__, 1); | |
| 1105 *psum = 0.0; | |
| 1106 if (!pixs) | |
| 1107 return ERROR_INT("pixs not defined", __func__, 1); | |
| 1108 | |
| 1109 /* Generate a number array consisting of the sum | |
| 1110 * of pixels in each row of pixs */ | |
| 1111 if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL) | |
| 1112 return ERROR_INT("na not made", __func__, 1); | |
| 1113 | |
| 1114 /* Compute the number of rows at top and bottom to omit. | |
| 1115 * We omit these to avoid getting a spurious signal from | |
| 1116 * the top and bottom of a (nearly) all black image. */ | |
| 1117 w = pixGetWidth(pixs); | |
| 1118 h = pixGetHeight(pixs); | |
| 1119 skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */ | |
| 1120 skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */ | |
| 1121 nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */ | |
| 1122 | |
| 1123 /* Sum the squares of differential row sums, on the | |
| 1124 * allowed rows. Note that nskip must be >= 1. */ | |
| 1125 n = numaGetCount(na); | |
| 1126 sum = 0.0; | |
| 1127 for (i = nskip; i < n - nskip; i++) { | |
| 1128 numaGetFValue(na, i - 1, &val1); | |
| 1129 numaGetFValue(na, i, &val2); | |
| 1130 diff = val2 - val1; | |
| 1131 sum += diff * diff; | |
| 1132 } | |
| 1133 numaDestroy(&na); | |
| 1134 *psum = sum; | |
| 1135 return 0; | |
| 1136 } | |
| 1137 | |
| 1138 | |
| 1139 /*----------------------------------------------------------------* | |
| 1140 * Normalized square sum * | |
| 1141 *----------------------------------------------------------------*/ | |
| 1142 /*! | |
| 1143 * \brief pixFindNormalizedSquareSum() | |
| 1144 * | |
| 1145 * \param[in] pixs | |
| 1146 * \param[out] phratio [optional] ratio of normalized horiz square sum | |
| 1147 * to result if the pixel distribution were uniform | |
| 1148 * \param[out] pvratio [optional] ratio of normalized vert square sum | |
| 1149 * to result if the pixel distribution were uniform | |
| 1150 * \param[out] pfract [optional] ratio of fg pixels to total pixels | |
| 1151 * \return 0 if OK, 1 on error or if there are no fg pixels | |
| 1152 * | |
| 1153 * <pre> | |
| 1154 * Notes: | |
| 1155 * (1) Let the image have h scanlines and N fg pixels. | |
| 1156 * If the pixels were uniformly distributed on scanlines, | |
| 1157 * the sum of squares of fg pixels on each scanline would be | |
| 1158 * h * (N / h)^2. However, if the pixels are not uniformly | |
| 1159 * distributed (e.g., for text), the sum of squares of fg | |
| 1160 * pixels will be larger. We return in hratio and vratio the | |
| 1161 * ratio of these two values. | |
| 1162 * (2) If there are no fg pixels, hratio and vratio are returned as 0.0. | |
| 1163 * </pre> | |
| 1164 */ | |
| 1165 l_ok | |
| 1166 pixFindNormalizedSquareSum(PIX *pixs, | |
| 1167 l_float32 *phratio, | |
| 1168 l_float32 *pvratio, | |
| 1169 l_float32 *pfract) | |
| 1170 { | |
| 1171 l_int32 i, w, h, empty; | |
| 1172 l_float32 sum, sumsq, uniform, val; | |
| 1173 NUMA *na; | |
| 1174 PIX *pixt; | |
| 1175 | |
| 1176 if (phratio) *phratio = 0.0; | |
| 1177 if (pvratio) *pvratio = 0.0; | |
| 1178 if (pfract) *pfract = 0.0; | |
| 1179 if (!phratio && !pvratio) | |
| 1180 return ERROR_INT("nothing to do", __func__, 1); | |
| 1181 if (!pixs || pixGetDepth(pixs) != 1) | |
| 1182 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 1183 pixGetDimensions(pixs, &w, &h, NULL); | |
| 1184 | |
| 1185 empty = 0; | |
| 1186 if (phratio) { | |
| 1187 na = pixCountPixelsByRow(pixs, NULL); | |
| 1188 numaGetSum(na, &sum); /* fg pixels */ | |
| 1189 if (pfract) *pfract = sum / (l_float32)(w * h); | |
| 1190 if (sum != 0.0) { | |
| 1191 uniform = sum * sum / h; /* h*(sum / h)^2 */ | |
| 1192 sumsq = 0.0; | |
| 1193 for (i = 0; i < h; i++) { | |
| 1194 numaGetFValue(na, i, &val); | |
| 1195 sumsq += val * val; | |
| 1196 } | |
| 1197 *phratio = sumsq / uniform; | |
| 1198 } else { | |
| 1199 empty = 1; | |
| 1200 } | |
| 1201 numaDestroy(&na); | |
| 1202 } | |
| 1203 | |
| 1204 if (pvratio) { | |
| 1205 if (empty == 1) return 1; | |
| 1206 pixt = pixRotateOrth(pixs, 1); | |
| 1207 na = pixCountPixelsByRow(pixt, NULL); | |
| 1208 numaGetSum(na, &sum); | |
| 1209 if (pfract) *pfract = sum / (l_float32)(w * h); | |
| 1210 if (sum != 0.0) { | |
| 1211 uniform = sum * sum / w; | |
| 1212 sumsq = 0.0; | |
| 1213 for (i = 0; i < w; i++) { | |
| 1214 numaGetFValue(na, i, &val); | |
| 1215 sumsq += val * val; | |
| 1216 } | |
| 1217 *pvratio = sumsq / uniform; | |
| 1218 } else { | |
| 1219 empty = 1; | |
| 1220 } | |
| 1221 pixDestroy(&pixt); | |
| 1222 numaDestroy(&na); | |
| 1223 } | |
| 1224 | |
| 1225 return empty; | |
| 1226 } |
