Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/training/degradeimage.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: degradeimage.cpp | |
| 3 * Description: Function to degrade an image (usually of text) as if it | |
| 4 * has been printed and then scanned. | |
| 5 * Authors: Ray Smith | |
| 6 * | |
| 7 * (C) Copyright 2013, Google Inc. | |
| 8 * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 * you may not use this file except in compliance with the License. | |
| 10 * You may obtain a copy of the License at | |
| 11 * http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 * Unless required by applicable law or agreed to in writing, software | |
| 13 * distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 * See the License for the specific language governing permissions and | |
| 16 * limitations under the License. | |
| 17 * | |
| 18 **********************************************************************/ | |
| 19 | |
| 20 #include "degradeimage.h" | |
| 21 | |
| 22 #include <allheaders.h> // from leptonica | |
| 23 #include <cstdlib> | |
| 24 #include "helpers.h" // For TRand. | |
| 25 #include "rect.h" | |
| 26 | |
| 27 namespace tesseract { | |
| 28 | |
| 29 // A randomized perspective distortion can be applied to synthetic input. | |
| 30 // The perspective distortion comes from leptonica, which uses 2 sets of 4 | |
| 31 // corners to determine the distortion. There are random values for each of | |
| 32 // the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead | |
| 33 // defined in terms of a single shear value. This reduces the degrees of | |
| 34 // freedom enough to make the distortion more realistic than it would otherwise | |
| 35 // be if all 8 coordinates could move independently. | |
| 36 // One additional factor is used for the color of the pixels that don't exist | |
| 37 // in the source image. | |
| 38 // Name for each of the randomizing factors. | |
| 39 enum FactorNames { | |
| 40 FN_INCOLOR, | |
| 41 FN_Y0, | |
| 42 FN_Y1, | |
| 43 FN_Y2, | |
| 44 FN_Y3, | |
| 45 FN_X0, | |
| 46 FN_X1, | |
| 47 FN_SHEAR, | |
| 48 // x2 = x1 - shear | |
| 49 // x3 = x0 + shear | |
| 50 FN_NUM_FACTORS | |
| 51 }; | |
| 52 | |
| 53 // Rotation is +/- kRotationRange radians. | |
| 54 const float kRotationRange = 0.02f; | |
| 55 // Number of grey levels to shift by for each exposure step. | |
| 56 const int kExposureFactor = 16; | |
| 57 // Salt and pepper noise is +/- kSaltnPepper. | |
| 58 const int kSaltnPepper = 5; | |
| 59 // Min sum of width + height on which to operate the ramp. | |
| 60 const int kMinRampSize = 1000; | |
| 61 | |
| 62 // Degrade the pix as if by a print/copy/scan cycle with exposure > 0 | |
| 63 // corresponding to darkening on the copier and <0 lighter and 0 not copied. | |
| 64 // Exposures in [-2,2] are most useful, with -3 and 3 being extreme. | |
| 65 // If rotation is nullptr, rotation is skipped. If *rotation is non-zero, the | |
| 66 // pix is rotated by *rotation else it is randomly rotated and *rotation is | |
| 67 // modified. | |
| 68 // | |
| 69 // HOW IT WORKS: | |
| 70 // Most of the process is really dictated by the fact that the minimum | |
| 71 // available convolution is 3X3, which is too big really to simulate a | |
| 72 // good quality print/scan process. (2X2 would be better.) | |
| 73 // 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the | |
| 74 // images generally biased to being too light, so most of the work is to make | |
| 75 // them darker. 3 levels of thickening/darkening are achieved with 2 dilations, | |
| 76 // (using a greyscale erosion) one heavy (by being before convolution) and one | |
| 77 // light (after convolution). | |
| 78 // With no dilation, after covolution, the images are so light that a heavy | |
| 79 // constant offset is required to make the 0 image look reasonable. A simple | |
| 80 // constant offset multiple of exposure to undo this value is enough to achieve | |
| 81 // all the required lighting. This gives the advantage that exposure level 1 | |
| 82 // with a single dilation gives a good impression of the broken-yet-too-dark | |
| 83 // problem that is often seen in scans. | |
| 84 // A small random rotation gives some varying greyscale values on the edges, | |
| 85 // and some random salt and pepper noise on top helps to realistically jaggy-up | |
| 86 // the edges. | |
| 87 // Finally a greyscale ramp provides a continuum of effects between exposure | |
| 88 // levels. | |
| 89 Image DegradeImage(Image input, int exposure, TRand *randomizer, float *rotation) { | |
| 90 Image pix = pixConvertTo8(input, false); | |
| 91 input.destroy(); | |
| 92 input = pix; | |
| 93 int width = pixGetWidth(input); | |
| 94 int height = pixGetHeight(input); | |
| 95 | |
| 96 if (exposure >= 2) { | |
| 97 // An erosion simulates the spreading darkening of a dark copy. | |
| 98 // This is backwards to binary morphology, | |
| 99 // see http://www.leptonica.com/grayscale-morphology.html | |
| 100 pix = input; | |
| 101 input = pixErodeGray(pix, 3, 3); | |
| 102 pix.destroy(); | |
| 103 } | |
| 104 // A convolution is essential to any mode as no scanner produces an | |
| 105 // image as sharp as the electronic image. | |
| 106 pix = pixBlockconv(input, 1, 1); | |
| 107 input.destroy(); | |
| 108 // A small random rotation helps to make the edges jaggy in a realistic way. | |
| 109 if (rotation != nullptr) { | |
| 110 float radians_clockwise = 0.0f; | |
| 111 if (*rotation) { | |
| 112 radians_clockwise = *rotation; | |
| 113 } else if (randomizer != nullptr) { | |
| 114 radians_clockwise = randomizer->SignedRand(kRotationRange); | |
| 115 } | |
| 116 | |
| 117 input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0); | |
| 118 // Rotate the boxes to match. | |
| 119 *rotation = radians_clockwise; | |
| 120 pix.destroy(); | |
| 121 } else { | |
| 122 input = pix; | |
| 123 } | |
| 124 | |
| 125 if (exposure >= 3 || exposure == 1) { | |
| 126 // Erosion after the convolution is not as heavy as before, so it is | |
| 127 // good for level 1 and in addition as a level 3. | |
| 128 // This is backwards to binary morphology, | |
| 129 // see http://www.leptonica.com/grayscale-morphology.html | |
| 130 pix = input; | |
| 131 input = pixErodeGray(pix, 3, 3); | |
| 132 pix.destroy(); | |
| 133 } | |
| 134 // The convolution really needed to be 2x2 to be realistic enough, but | |
| 135 // we only have 3x3, so we have to bias the image darker or lose thin | |
| 136 // strokes. | |
| 137 int erosion_offset = 0; | |
| 138 // For light and 0 exposure, there is no dilation, so compensate for the | |
| 139 // convolution with a big darkening bias which is undone for lighter | |
| 140 // exposures. | |
| 141 if (exposure <= 0) { | |
| 142 erosion_offset = -3 * kExposureFactor; | |
| 143 } | |
| 144 // Add in a general offset of the greyscales for the exposure level so | |
| 145 // a threshold of 128 gives a reasonable binary result. | |
| 146 erosion_offset -= exposure * kExposureFactor; | |
| 147 // Add a gradual fade over the page and a small amount of salt and pepper | |
| 148 // noise to simulate noise in the sensor/paper fibres and varying | |
| 149 // illumination. | |
| 150 l_uint32 *data = pixGetData(input); | |
| 151 for (int y = 0; y < height; ++y) { | |
| 152 for (int x = 0; x < width; ++x) { | |
| 153 int pixel = GET_DATA_BYTE(data, x); | |
| 154 if (randomizer != nullptr) { | |
| 155 pixel += randomizer->IntRand() % (kSaltnPepper * 2 + 1) - kSaltnPepper; | |
| 156 } | |
| 157 if (height + width > kMinRampSize) { | |
| 158 pixel -= (2 * x + y) * 32 / (height + width); | |
| 159 } | |
| 160 pixel += erosion_offset; | |
| 161 if (pixel < 0) { | |
| 162 pixel = 0; | |
| 163 } | |
| 164 if (pixel > 255) { | |
| 165 pixel = 255; | |
| 166 } | |
| 167 SET_DATA_BYTE(data, x, pixel); | |
| 168 } | |
| 169 data += pixGetWpl(input); | |
| 170 } | |
| 171 return input; | |
| 172 } | |
| 173 | |
| 174 // Creates and returns a Pix distorted by various means according to the bool | |
| 175 // flags. If boxes is not nullptr, the boxes are resized/positioned according to | |
| 176 // any spatial distortion and also by the integer reduction factor box_scale | |
| 177 // so they will match what the network will output. | |
| 178 // Returns nullptr on error. The returned Pix must be pixDestroyed. | |
| 179 Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise, | |
| 180 bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, | |
| 181 std::vector<TBOX> *boxes) { | |
| 182 Image distorted = pix.copy(); | |
| 183 // Things to do to synthetic training data. | |
| 184 if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) { | |
| 185 // TODO(rays) Cook noise in a more thread-safe manner than rand(). | |
| 186 // Attempt to make the sequences reproducible. | |
| 187 srand(randomizer->IntRand()); | |
| 188 Image pixn = pixAddGaussianNoise(distorted, 8.0); | |
| 189 distorted.destroy(); | |
| 190 if (smooth_noise) { | |
| 191 distorted = pixBlockconv(pixn, 1, 1); | |
| 192 pixn.destroy(); | |
| 193 } else { | |
| 194 distorted = pixn; | |
| 195 } | |
| 196 } | |
| 197 if (blur && randomizer->SignedRand(1.0) > 0.0) { | |
| 198 Image blurred = pixBlockconv(distorted, 1, 1); | |
| 199 distorted.destroy(); | |
| 200 distorted = blurred; | |
| 201 } | |
| 202 if (perspective) { | |
| 203 GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes); | |
| 204 } | |
| 205 if (boxes != nullptr) { | |
| 206 for (auto &b : *boxes) { | |
| 207 b.scale(1.0f / box_reduction); | |
| 208 if (b.width() <= 0) { | |
| 209 b.set_right(b.left() + 1); | |
| 210 } | |
| 211 } | |
| 212 } | |
| 213 if (invert && randomizer->SignedRand(1.0) < -0) { | |
| 214 pixInvert(distorted, distorted); | |
| 215 } | |
| 216 return distorted; | |
| 217 } | |
| 218 | |
| 219 // Distorts anything that has a non-null pointer with the same pseudo-random | |
| 220 // perspective distortion. Width and height only need to be set if there | |
| 221 // is no pix. If there is a pix, then they will be taken from there. | |
| 222 void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix, | |
| 223 std::vector<TBOX> *boxes) { | |
| 224 if (pix != nullptr && *pix != nullptr) { | |
| 225 width = pixGetWidth(*pix); | |
| 226 height = pixGetHeight(*pix); | |
| 227 } | |
| 228 float *im_coeffs = nullptr; | |
| 229 float *box_coeffs = nullptr; | |
| 230 l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs); | |
| 231 if (pix != nullptr && *pix != nullptr) { | |
| 232 // Transform the image. | |
| 233 Image transformed = pixProjective(*pix, im_coeffs, incolor); | |
| 234 if (transformed == nullptr) { | |
| 235 tprintf("Projective transformation failed!!\n"); | |
| 236 return; | |
| 237 } | |
| 238 pix->destroy(); | |
| 239 *pix = transformed; | |
| 240 } | |
| 241 if (boxes != nullptr) { | |
| 242 // Transform the boxes. | |
| 243 for (auto &b : *boxes) { | |
| 244 int x1, y1, x2, y2; | |
| 245 const TBOX &box = b; | |
| 246 projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, &y1); | |
| 247 projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), &x2, &y2); | |
| 248 TBOX new_box1(x1, height - y2, x2, height - y1); | |
| 249 projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), &x1, &y1); | |
| 250 projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, &y2); | |
| 251 TBOX new_box2(x1, height - y1, x2, height - y2); | |
| 252 b = new_box1.bounding_union(new_box2); | |
| 253 } | |
| 254 } | |
| 255 lept_free(im_coeffs); | |
| 256 lept_free(box_coeffs); | |
| 257 } | |
| 258 | |
| 259 // Computes the coefficients of a randomized projective transformation. | |
| 260 // The image transform requires backward transformation coefficient, and the | |
| 261 // box transform the forward coefficients. | |
| 262 // Returns the incolor arg to pixProjective. | |
| 263 int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs, | |
| 264 float **box_coeffs) { | |
| 265 // Setup "from" points. | |
| 266 Pta *src_pts = ptaCreate(4); | |
| 267 ptaAddPt(src_pts, 0.0f, 0.0f); | |
| 268 ptaAddPt(src_pts, width, 0.0f); | |
| 269 ptaAddPt(src_pts, width, height); | |
| 270 ptaAddPt(src_pts, 0.0f, height); | |
| 271 // Extract factors from pseudo-random sequence. | |
| 272 float factors[FN_NUM_FACTORS]; | |
| 273 float shear = 0.0f; // Shear is signed. | |
| 274 for (int i = 0; i < FN_NUM_FACTORS; ++i) { | |
| 275 // Everything is squared to make wild values rarer. | |
| 276 if (i == FN_SHEAR) { | |
| 277 // Shear is signed. | |
| 278 shear = randomizer->SignedRand(0.5 / 3.0); | |
| 279 shear = shear >= 0.0 ? shear * shear : -shear * shear; | |
| 280 // Keep the sheared points within the original rectangle. | |
| 281 if (shear < -factors[FN_X0]) { | |
| 282 shear = -factors[FN_X0]; | |
| 283 } | |
| 284 if (shear > factors[FN_X1]) { | |
| 285 shear = factors[FN_X1]; | |
| 286 } | |
| 287 factors[i] = shear; | |
| 288 } else if (i != FN_INCOLOR) { | |
| 289 factors[i] = fabs(randomizer->SignedRand(1.0)); | |
| 290 if (i <= FN_Y3) { | |
| 291 factors[i] *= 5.0 / 8.0; | |
| 292 } else { | |
| 293 factors[i] *= 0.5; | |
| 294 } | |
| 295 factors[i] *= factors[i]; | |
| 296 } | |
| 297 } | |
| 298 // Setup "to" points. | |
| 299 Pta *dest_pts = ptaCreate(4); | |
| 300 ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height); | |
| 301 ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height); | |
| 302 ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width, (1 - factors[FN_Y2]) * height); | |
| 303 ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width, (1 - factors[FN_Y3]) * height); | |
| 304 getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs); | |
| 305 getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs); | |
| 306 ptaDestroy(&src_pts); | |
| 307 ptaDestroy(&dest_pts); | |
| 308 return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK; | |
| 309 } | |
| 310 | |
| 311 } // namespace tesseract |
