Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/baseline.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file baseline.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Locate text baselines in an image | |
| 32 * NUMA *pixFindBaselines() | |
| 33 * | |
| 34 * Projective transform to remove local skew | |
| 35 * PIX *pixDeskewLocal() | |
| 36 * | |
| 37 * Determine local skew | |
| 38 * l_int32 pixGetLocalSkewTransform() | |
| 39 * NUMA *pixGetLocalSkewAngles() | |
| 40 * | |
| 41 * We have two apparently different functions here: | |
| 42 * ~ finding baselines | |
| 43 * ~ finding a projective transform to remove keystone warping | |
| 44 * The function pixGetLocalSkewAngles() returns an array of angles, | |
| 45 * one for each raster line, and the baselines of the text lines | |
| 46 * should intersect the left edge of the image with that angle. | |
| 47 * </pre> | |
| 48 */ | |
| 49 | |
| 50 #ifdef HAVE_CONFIG_H | |
| 51 #include <config_auto.h> | |
| 52 #endif /* HAVE_CONFIG_H */ | |
| 53 | |
| 54 #include <math.h> | |
| 55 #include "allheaders.h" | |
| 56 | |
| 57 /* Minimum distance to travel after finding max before abandoning peak */ | |
| 58 static const l_int32 MinDistInPeak = 35; | |
| 59 | |
| 60 /* Thresholds for peaks and zeros, relative to the max peak */ | |
| 61 static const l_int32 PeakThresholdRatio = 20; | |
| 62 static const l_int32 ZeroThresholdRatio = 100; | |
| 63 | |
| 64 /* Default values for determining local skew */ | |
| 65 static const l_int32 DefaultSlices = 10; | |
| 66 static const l_int32 DefaultSweepReduction = 2; | |
| 67 static const l_int32 DefaultBsReduction = 1; | |
| 68 static const l_float32 DefaultSweepRange = 5.; /* degrees */ | |
| 69 static const l_float32 DefaultSweepDelta = 1.; /* degrees */ | |
| 70 static const l_float32 DefaultMinbsDelta = 0.01f; /* degrees */ | |
| 71 | |
| 72 /* Overlap slice fraction added to top and bottom of each slice */ | |
| 73 static const l_float32 OverlapFraction = 0.5; | |
| 74 | |
| 75 /* Minimum allowed confidence (ratio) for accepting a value */ | |
| 76 static const l_float32 MinAllowedConfidence = 3.0; | |
| 77 | |
| 78 | |
| 79 /*---------------------------------------------------------------------* | |
| 80 * Locate text baselines in an image * | |
| 81 *---------------------------------------------------------------------*/ | |
| 82 /*! | |
| 83 * \brief pixFindBaselines() | |
| 84 * | |
| 85 * \param[in] pixs 1 bpp, 300 ppi | |
| 86 * \param[out] ppta [optional] pairs of pts corresponding to | |
| 87 * approx. ends of each text line | |
| 88 * \param[in] pixadb for debug output; use NULL to skip | |
| 89 * \return na of baseline y values, or NULL on error | |
| 90 * | |
| 91 * <pre> | |
| 92 * Notes: | |
| 93 * (1) Input binary image must have text lines already aligned | |
| 94 * horizontally. This can be done by either rotating the | |
| 95 * image with pixDeskew(), or, if a projective transform | |
| 96 * is required, by doing pixDeskewLocal() first. | |
| 97 * (2) Input null for &pta if you don't want this returned. | |
| 98 * The pta will come in pairs of points (left and right end | |
| 99 * of each baseline). | |
| 100 * (3) Caution: this will not work properly on text with multiple | |
| 101 * columns, where the lines are not aligned between columns. | |
| 102 * If there are multiple columns, they should be extracted | |
| 103 * separately before finding the baselines. | |
| 104 * (4) This function constructs different types of output | |
| 105 * for baselines; namely, a set of raster line values and | |
| 106 * a set of end points of each baseline. | |
| 107 * (5) This function was designed to handle short and long text lines | |
| 108 * without using dangerous thresholds on the peak heights. It does | |
| 109 * this by combining the differential signal with a morphological | |
| 110 * analysis of the locations of the text lines. One can also | |
| 111 * combine this data to normalize the peak heights, by weighting | |
| 112 * the differential signal in the region of each baseline | |
| 113 * by the inverse of the width of the text line found there. | |
| 114 * </pre> | |
| 115 */ | |
| 116 NUMA * | |
| 117 pixFindBaselines(PIX *pixs, | |
| 118 PTA **ppta, | |
| 119 PIXA *pixadb) | |
| 120 { | |
| 121 l_int32 h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh; | |
| 122 l_int32 imaxloc, peakthresh, zerothresh, inpeak; | |
| 123 l_int32 mintosearch, max, maxloc, nloc, locval; | |
| 124 l_int32 *array; | |
| 125 l_float32 maxval; | |
| 126 BOXA *boxa1, *boxa2, *boxa3; | |
| 127 GPLOT *gplot; | |
| 128 NUMA *nasum, *nadiff, *naloc, *naval; | |
| 129 PIX *pix1, *pix2; | |
| 130 PTA *pta; | |
| 131 | |
| 132 if (ppta) *ppta = NULL; | |
| 133 if (!pixs || pixGetDepth(pixs) != 1) | |
| 134 return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 135 | |
| 136 /* Close up the text characters, removing noise */ | |
| 137 pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0); | |
| 138 | |
| 139 /* Estimate the resolution */ | |
| 140 if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); | |
| 141 | |
| 142 /* Save the difference of adjacent row sums. | |
| 143 * The high positive-going peaks are the baselines */ | |
| 144 if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) { | |
| 145 pixDestroy(&pix1); | |
| 146 return (NUMA *)ERROR_PTR("nasum not made", __func__, NULL); | |
| 147 } | |
| 148 h = pixGetHeight(pixs); | |
| 149 nadiff = numaCreate(h); | |
| 150 numaGetIValue(nasum, 0, &val2); | |
| 151 for (i = 0; i < h - 1; i++) { | |
| 152 val1 = val2; | |
| 153 numaGetIValue(nasum, i + 1, &val2); | |
| 154 numaAddNumber(nadiff, val1 - val2); | |
| 155 } | |
| 156 numaDestroy(&nasum); | |
| 157 | |
| 158 if (pixadb) { /* show the difference signal */ | |
| 159 lept_mkdir("lept/baseline"); | |
| 160 gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig"); | |
| 161 pix2 = pixRead("/tmp/lept/baseline/diff.png"); | |
| 162 pixaAddPix(pixadb, pix2, L_INSERT); | |
| 163 } | |
| 164 | |
| 165 /* Use the zeroes of the profile to locate each baseline. */ | |
| 166 array = numaGetIArray(nadiff); | |
| 167 ndiff = numaGetCount(nadiff); | |
| 168 numaGetMax(nadiff, &maxval, &imaxloc); | |
| 169 numaDestroy(&nadiff); | |
| 170 | |
| 171 /* Use this to begin locating a new peak: */ | |
| 172 peakthresh = (l_int32)maxval / PeakThresholdRatio; | |
| 173 /* Use this to begin a region between peaks: */ | |
| 174 zerothresh = (l_int32)maxval / ZeroThresholdRatio; | |
| 175 | |
| 176 naloc = numaCreate(0); | |
| 177 naval = numaCreate(0); | |
| 178 inpeak = FALSE; | |
| 179 for (i = 0; i < ndiff; i++) { | |
| 180 if (inpeak == FALSE) { | |
| 181 if (array[i] > peakthresh) { /* transition to in-peak */ | |
| 182 inpeak = TRUE; | |
| 183 mintosearch = i + MinDistInPeak; /* accept no zeros | |
| 184 * between i and mintosearch */ | |
| 185 max = array[i]; | |
| 186 maxloc = i; | |
| 187 } | |
| 188 } else { /* inpeak == TRUE; look for max */ | |
| 189 if (array[i] > max) { | |
| 190 max = array[i]; | |
| 191 maxloc = i; | |
| 192 mintosearch = i + MinDistInPeak; | |
| 193 } else if (i > mintosearch && array[i] <= zerothresh) { /* leave */ | |
| 194 inpeak = FALSE; | |
| 195 numaAddNumber(naval, max); | |
| 196 numaAddNumber(naloc, maxloc); | |
| 197 } | |
| 198 } | |
| 199 } | |
| 200 LEPT_FREE(array); | |
| 201 | |
| 202 /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */ | |
| 203 if (inpeak) { | |
| 204 numaAddNumber(naval, max); | |
| 205 numaAddNumber(naloc, maxloc); | |
| 206 } | |
| 207 | |
| 208 if (pixadb) { /* show the raster locations for the peaks */ | |
| 209 gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs", | |
| 210 "rasterline", "height"); | |
| 211 gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs"); | |
| 212 gplotMakeOutput(gplot); | |
| 213 gplotDestroy(&gplot); | |
| 214 pix2 = pixRead("/tmp/lept/baseline/loc.png"); | |
| 215 pixaAddPix(pixadb, pix2, L_INSERT); | |
| 216 } | |
| 217 numaDestroy(&naval); | |
| 218 | |
| 219 /* Generate an approximate profile of text line width. | |
| 220 * First, filter the boxes of text, where there may be | |
| 221 * more than one box for a given textline. */ | |
| 222 pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0); | |
| 223 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); | |
| 224 boxa1 = pixConnComp(pix2, NULL, 4); | |
| 225 pixDestroy(&pix1); | |
| 226 pixDestroy(&pix2); | |
| 227 if (boxaGetCount(boxa1) == 0) { | |
| 228 numaDestroy(&naloc); | |
| 229 boxaDestroy(&boxa1); | |
| 230 L_INFO("no components after filtering\n", __func__); | |
| 231 return NULL; | |
| 232 } | |
| 233 boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.); | |
| 234 boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); | |
| 235 boxaDestroy(&boxa1); | |
| 236 boxaDestroy(&boxa2); | |
| 237 | |
| 238 /* Optionally, find the baseline segments */ | |
| 239 pta = NULL; | |
| 240 if (ppta) { | |
| 241 pta = ptaCreate(0); | |
| 242 *ppta = pta; | |
| 243 } | |
| 244 if (pta) { | |
| 245 nloc = numaGetCount(naloc); | |
| 246 nbox = boxaGetCount(boxa3); | |
| 247 for (i = 0; i < nbox; i++) { | |
| 248 boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh); | |
| 249 for (j = 0; j < nloc; j++) { | |
| 250 numaGetIValue(naloc, j, &locval); | |
| 251 if (L_ABS(locval - (by + bh)) > 25) | |
| 252 continue; | |
| 253 ptaAddPt(pta, bx, locval); | |
| 254 ptaAddPt(pta, bx + bw, locval); | |
| 255 break; | |
| 256 } | |
| 257 } | |
| 258 } | |
| 259 boxaDestroy(&boxa3); | |
| 260 | |
| 261 if (pixadb && pta) { /* display baselines */ | |
| 262 l_int32 npts, x1, y1, x2, y2; | |
| 263 pix1 = pixConvertTo32(pixs); | |
| 264 npts = ptaGetCount(pta); | |
| 265 for (i = 0; i < npts; i += 2) { | |
| 266 ptaGetIPt(pta, i, &x1, &y1); | |
| 267 ptaGetIPt(pta, i + 1, &x2, &y2); | |
| 268 pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0); | |
| 269 } | |
| 270 pixWriteDebug("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG); | |
| 271 pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); | |
| 272 pixDestroy(&pix1); | |
| 273 } | |
| 274 | |
| 275 return naloc; | |
| 276 } | |
| 277 | |
| 278 | |
| 279 /*---------------------------------------------------------------------* | |
| 280 * Projective transform to remove local skew * | |
| 281 *---------------------------------------------------------------------*/ | |
| 282 /*! | |
| 283 * \brief pixDeskewLocal() | |
| 284 * | |
| 285 * \param[in] pixs 1 bpp | |
| 286 * \param[in] nslices the number of horizontal overlapping slices; | |
| 287 * must be larger than 1 and not exceed 20; | |
| 288 * use 0 for default | |
| 289 * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; | |
| 290 * use 0 for default value | |
| 291 * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and | |
| 292 * not larger than redsweep; use 0 for default value | |
| 293 * \param[in] sweeprange half the full range, assumed about 0; in degrees; | |
| 294 * use 0.0 for default value | |
| 295 * \param[in] sweepdelta angle increment of sweep; in degrees; | |
| 296 * use 0.0 for default value | |
| 297 * \param[in] minbsdelta min binary search increment angle; in degrees; | |
| 298 * use 0.0 for default value | |
| 299 * \return pixd, or NULL on error | |
| 300 * | |
| 301 * <pre> | |
| 302 * Notes: | |
| 303 * (1) This function allows deskew of a page whose skew changes | |
| 304 * approximately linearly with vertical position. It uses | |
| 305 * a projective transform that in effect does a differential | |
| 306 * shear about the LHS of the page, and makes all text lines | |
| 307 * horizontal. | |
| 308 * (2) The origin of the keystoning can be either a cheap document | |
| 309 * feeder that rotates the page as it is passed through, or a | |
| 310 * camera image taken from either the left or right side | |
| 311 * of the vertical. | |
| 312 * (3) The image transformation is a projective warping, | |
| 313 * not a rotation. Apart from this function, the text lines | |
| 314 * must be properly aligned vertically with respect to each | |
| 315 * other. This can be done by pre-processing the page; e.g., | |
| 316 * by rotating or horizontally shearing it. | |
| 317 * Typically, this can be achieved by vertically aligning | |
| 318 * the page edge. | |
| 319 * </pre> | |
| 320 */ | |
| 321 PIX * | |
| 322 pixDeskewLocal(PIX *pixs, | |
| 323 l_int32 nslices, | |
| 324 l_int32 redsweep, | |
| 325 l_int32 redsearch, | |
| 326 l_float32 sweeprange, | |
| 327 l_float32 sweepdelta, | |
| 328 l_float32 minbsdelta) | |
| 329 { | |
| 330 l_int32 ret; | |
| 331 PIX *pixd; | |
| 332 PTA *ptas, *ptad; | |
| 333 | |
| 334 if (!pixs || pixGetDepth(pixs) != 1) | |
| 335 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 336 | |
| 337 /* Skew array gives skew angle (deg) as fctn of raster line | |
| 338 * where it intersects the LHS of the image */ | |
| 339 ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch, | |
| 340 sweeprange, sweepdelta, minbsdelta, | |
| 341 &ptas, &ptad); | |
| 342 if (ret != 0) | |
| 343 return (PIX *)ERROR_PTR("transform pts not found", __func__, NULL); | |
| 344 | |
| 345 /* Use a projective transform */ | |
| 346 pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE); | |
| 347 | |
| 348 ptaDestroy(&ptas); | |
| 349 ptaDestroy(&ptad); | |
| 350 return pixd; | |
| 351 } | |
| 352 | |
| 353 | |
| 354 /*---------------------------------------------------------------------* | |
| 355 * Determine the local skew * | |
| 356 *---------------------------------------------------------------------*/ | |
| 357 /*! | |
| 358 * \brief pixGetLocalSkewTransform() | |
| 359 * | |
| 360 * \param[in] pixs | |
| 361 * \param[in] nslices the number of horizontal overlapping slices; | |
| 362 * must be larger than 1 and not exceed 20; | |
| 363 * use 0 for default | |
| 364 * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; | |
| 365 * use 0 for default value | |
| 366 * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not | |
| 367 * larger than redsweep; use 0 for default value | |
| 368 * \param[in] sweeprange half the full range, assumed about 0; | |
| 369 * in degrees; use 0.0 for default value | |
| 370 * \param[in] sweepdelta angle increment of sweep; in degrees; | |
| 371 * use 0.0 for default value | |
| 372 * \param[in] minbsdelta min binary search increment angle; in degrees; | |
| 373 * use 0.0 for default value | |
| 374 * \param[out] pptas 4 points in the source | |
| 375 * \param[out] pptad the corresponding 4 pts in the dest | |
| 376 * \return 0 if OK, 1 on error | |
| 377 * | |
| 378 * <pre> | |
| 379 * Notes: | |
| 380 * (1) This generates two pairs of points in the src, each pair | |
| 381 * corresponding to a pair of points that would lie along | |
| 382 * the same raster line in a transformed (dewarped) image. | |
| 383 * (2) The sets of 4 src and 4 dest points returned by this function | |
| 384 * can then be used, in a projective or bilinear transform, | |
| 385 * to remove keystoning in the src. | |
| 386 * </pre> | |
| 387 */ | |
| 388 l_ok | |
| 389 pixGetLocalSkewTransform(PIX *pixs, | |
| 390 l_int32 nslices, | |
| 391 l_int32 redsweep, | |
| 392 l_int32 redsearch, | |
| 393 l_float32 sweeprange, | |
| 394 l_float32 sweepdelta, | |
| 395 l_float32 minbsdelta, | |
| 396 PTA **pptas, | |
| 397 PTA **pptad) | |
| 398 { | |
| 399 l_int32 w, h, i; | |
| 400 l_float32 deg2rad, angr, angd, dely; | |
| 401 NUMA *naskew; | |
| 402 PTA *ptas, *ptad; | |
| 403 | |
| 404 if (!pptas || !pptad) | |
| 405 return ERROR_INT("&ptas and &ptad not defined", __func__, 1); | |
| 406 *pptas = *pptad = NULL; | |
| 407 if (!pixs || pixGetDepth(pixs) != 1) | |
| 408 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1); | |
| 409 if (nslices < 2 || nslices > 20) | |
| 410 nslices = DefaultSlices; | |
| 411 if (redsweep < 1 || redsweep > 8) | |
| 412 redsweep = DefaultSweepReduction; | |
| 413 if (redsearch < 1 || redsearch > redsweep) | |
| 414 redsearch = DefaultBsReduction; | |
| 415 if (sweeprange == 0.0) | |
| 416 sweeprange = DefaultSweepRange; | |
| 417 if (sweepdelta == 0.0) | |
| 418 sweepdelta = DefaultSweepDelta; | |
| 419 if (minbsdelta == 0.0) | |
| 420 minbsdelta = DefaultMinbsDelta; | |
| 421 | |
| 422 naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch, | |
| 423 sweeprange, sweepdelta, minbsdelta, | |
| 424 NULL, NULL, 0); | |
| 425 if (!naskew) | |
| 426 return ERROR_INT("naskew not made", __func__, 1); | |
| 427 | |
| 428 deg2rad = 3.14159265f / 180.f; | |
| 429 w = pixGetWidth(pixs); | |
| 430 h = pixGetHeight(pixs); | |
| 431 ptas = ptaCreate(4); | |
| 432 ptad = ptaCreate(4); | |
| 433 *pptas = ptas; | |
| 434 *pptad = ptad; | |
| 435 | |
| 436 /* Find i for skew line that intersects LHS at i and RHS at h / 20 */ | |
| 437 for (i = 0; i < h; i++) { | |
| 438 numaGetFValue(naskew, i, &angd); | |
| 439 angr = angd * deg2rad; | |
| 440 dely = w * tan(angr); | |
| 441 if (i - dely > 0.05 * h) | |
| 442 break; | |
| 443 } | |
| 444 ptaAddPt(ptas, 0, i); | |
| 445 ptaAddPt(ptas, w - 1, i - dely); | |
| 446 ptaAddPt(ptad, 0, i); | |
| 447 ptaAddPt(ptad, w - 1, i); | |
| 448 | |
| 449 /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */ | |
| 450 for (i = h - 1; i > 0; i--) { | |
| 451 numaGetFValue(naskew, i, &angd); | |
| 452 angr = angd * deg2rad; | |
| 453 dely = w * tan(angr); | |
| 454 if (i - dely < 0.95 * h) | |
| 455 break; | |
| 456 } | |
| 457 ptaAddPt(ptas, 0, i); | |
| 458 ptaAddPt(ptas, w - 1, i - dely); | |
| 459 ptaAddPt(ptad, 0, i); | |
| 460 ptaAddPt(ptad, w - 1, i); | |
| 461 | |
| 462 numaDestroy(&naskew); | |
| 463 return 0; | |
| 464 } | |
| 465 | |
| 466 | |
| 467 /*! | |
| 468 * \brief pixGetLocalSkewAngles() | |
| 469 * | |
| 470 * \param[in] pixs 1 bpp | |
| 471 * \param[in] nslices the number of horizontal overlapping slices; | |
| 472 * must be larger than 1 and not exceed 20; | |
| 473 * use 0 for default | |
| 474 * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; | |
| 475 * use 0 for default value | |
| 476 * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not | |
| 477 * larger than redsweep; use 0 for default value | |
| 478 * \param[in] sweeprange half the full range, assumed about 0; | |
| 479 * in degrees; use 0.0 for default value | |
| 480 * \param[in] sweepdelta angle increment of sweep; in degrees; | |
| 481 * use 0.0 for default value | |
| 482 * \param[in] minbsdelta min binary search increment angle; in degrees; | |
| 483 * use 0.0 for default value | |
| 484 * \param[out] pa [optional] slope of skew as fctn of y | |
| 485 * \param[out] pb [optional] intercept at y = 0 of skew, | |
| 486 8 as a function of y | |
| 487 * \param[in] debug 1 for generating plot of skew angle vs. y; | |
| 488 * 0 otherwise | |
| 489 * \return naskew, or NULL on error | |
| 490 * | |
| 491 * <pre> | |
| 492 * Notes: | |
| 493 * (1) The local skew is measured in a set of overlapping strips. | |
| 494 * We then do a least square linear fit parameters to get | |
| 495 * the slope and intercept parameters a and b in | |
| 496 * skew-angle = a * y + b (degrees) | |
| 497 * for the local skew as a function of raster line y. | |
| 498 * This is then used to make naskew, which can be interpreted | |
| 499 * as the computed skew angle (in degrees) at the left edge | |
| 500 * of each raster line. | |
| 501 * (2) naskew can then be used to find the baselines of text, because | |
| 502 * each text line has a baseline that should intersect | |
| 503 * the left edge of the image with the angle given by this | |
| 504 * array, evaluated at the raster line of intersection. | |
| 505 * </pre> | |
| 506 */ | |
| 507 NUMA * | |
| 508 pixGetLocalSkewAngles(PIX *pixs, | |
| 509 l_int32 nslices, | |
| 510 l_int32 redsweep, | |
| 511 l_int32 redsearch, | |
| 512 l_float32 sweeprange, | |
| 513 l_float32 sweepdelta, | |
| 514 l_float32 minbsdelta, | |
| 515 l_float32 *pa, | |
| 516 l_float32 *pb, | |
| 517 l_int32 debug) | |
| 518 { | |
| 519 l_int32 w, h, hs, i, ystart, yend, ovlap, npts; | |
| 520 l_float32 angle, conf, ycenter, a, b; | |
| 521 BOX *box; | |
| 522 GPLOT *gplot; | |
| 523 NUMA *naskew, *nax, *nay; | |
| 524 PIX *pix; | |
| 525 PTA *pta; | |
| 526 | |
| 527 if (!pixs || pixGetDepth(pixs) != 1) | |
| 528 return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL); | |
| 529 if (nslices < 2 || nslices > 20) | |
| 530 nslices = DefaultSlices; | |
| 531 if (redsweep < 1 || redsweep > 8) | |
| 532 redsweep = DefaultSweepReduction; | |
| 533 if (redsearch < 1 || redsearch > redsweep) | |
| 534 redsearch = DefaultBsReduction; | |
| 535 if (sweeprange == 0.0) | |
| 536 sweeprange = DefaultSweepRange; | |
| 537 if (sweepdelta == 0.0) | |
| 538 sweepdelta = DefaultSweepDelta; | |
| 539 if (minbsdelta == 0.0) | |
| 540 minbsdelta = DefaultMinbsDelta; | |
| 541 | |
| 542 pixGetDimensions(pixs, &w, &h, NULL); | |
| 543 hs = h / nslices; | |
| 544 ovlap = (l_int32)(OverlapFraction * hs); | |
| 545 pta = ptaCreate(nslices); | |
| 546 for (i = 0; i < nslices; i++) { | |
| 547 ystart = L_MAX(0, hs * i - ovlap); | |
| 548 yend = L_MIN(h - 1, hs * (i + 1) + ovlap); | |
| 549 ycenter = (l_float32)(ystart + yend) / 2; | |
| 550 box = boxCreate(0, ystart, w, yend - ystart + 1); | |
| 551 pix = pixClipRectangle(pixs, box, NULL); | |
| 552 pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch, | |
| 553 sweeprange, sweepdelta, minbsdelta); | |
| 554 if (conf > MinAllowedConfidence) | |
| 555 ptaAddPt(pta, ycenter, angle); | |
| 556 pixDestroy(&pix); | |
| 557 boxDestroy(&box); | |
| 558 } | |
| 559 | |
| 560 /* Do linear least squares fit */ | |
| 561 if ((npts = ptaGetCount(pta)) < 2) { | |
| 562 ptaDestroy(&pta); | |
| 563 return (NUMA *)ERROR_PTR("can't fit skew", __func__, NULL); | |
| 564 } | |
| 565 ptaGetLinearLSF(pta, &a, &b, NULL); | |
| 566 if (pa) *pa = a; | |
| 567 if (pb) *pb = b; | |
| 568 | |
| 569 /* Make skew angle array as function of raster line */ | |
| 570 naskew = numaCreate(h); | |
| 571 for (i = 0; i < h; i++) { | |
| 572 angle = a * i + b; | |
| 573 numaAddNumber(naskew, angle); | |
| 574 } | |
| 575 | |
| 576 if (debug) { | |
| 577 lept_mkdir("lept/baseline"); | |
| 578 ptaGetArrays(pta, &nax, &nay); | |
| 579 gplot = gplotCreate("/tmp/lept/baseline/skew", GPLOT_PNG, | |
| 580 "skew as fctn of y", "y (in raster lines from top)", | |
| 581 "angle (in degrees)"); | |
| 582 gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf"); | |
| 583 gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts"); | |
| 584 gplotMakeOutput(gplot); | |
| 585 gplotDestroy(&gplot); | |
| 586 numaDestroy(&nax); | |
| 587 numaDestroy(&nay); | |
| 588 } | |
| 589 | |
| 590 ptaDestroy(&pta); | |
| 591 return naskew; | |
| 592 } |
