Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/training/degradeimage.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/training/degradeimage.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,311 @@ +/********************************************************************** + * File: degradeimage.cpp + * Description: Function to degrade an image (usually of text) as if it + * has been printed and then scanned. + * Authors: Ray Smith + * + * (C) Copyright 2013, Google Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **********************************************************************/ + +#include "degradeimage.h" + +#include <allheaders.h> // from leptonica +#include <cstdlib> +#include "helpers.h" // For TRand. +#include "rect.h" + +namespace tesseract { + +// A randomized perspective distortion can be applied to synthetic input. +// The perspective distortion comes from leptonica, which uses 2 sets of 4 +// corners to determine the distortion. There are random values for each of +// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead +// defined in terms of a single shear value. This reduces the degrees of +// freedom enough to make the distortion more realistic than it would otherwise +// be if all 8 coordinates could move independently. +// One additional factor is used for the color of the pixels that don't exist +// in the source image. +// Name for each of the randomizing factors. +enum FactorNames { + FN_INCOLOR, + FN_Y0, + FN_Y1, + FN_Y2, + FN_Y3, + FN_X0, + FN_X1, + FN_SHEAR, + // x2 = x1 - shear + // x3 = x0 + shear + FN_NUM_FACTORS +}; + +// Rotation is +/- kRotationRange radians. +const float kRotationRange = 0.02f; +// Number of grey levels to shift by for each exposure step. +const int kExposureFactor = 16; +// Salt and pepper noise is +/- kSaltnPepper. +const int kSaltnPepper = 5; +// Min sum of width + height on which to operate the ramp. +const int kMinRampSize = 1000; + +// Degrade the pix as if by a print/copy/scan cycle with exposure > 0 +// corresponding to darkening on the copier and <0 lighter and 0 not copied. +// Exposures in [-2,2] are most useful, with -3 and 3 being extreme. +// If rotation is nullptr, rotation is skipped. If *rotation is non-zero, the +// pix is rotated by *rotation else it is randomly rotated and *rotation is +// modified. +// +// HOW IT WORKS: +// Most of the process is really dictated by the fact that the minimum +// available convolution is 3X3, which is too big really to simulate a +// good quality print/scan process. (2X2 would be better.) +// 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the +// images generally biased to being too light, so most of the work is to make +// them darker. 3 levels of thickening/darkening are achieved with 2 dilations, +// (using a greyscale erosion) one heavy (by being before convolution) and one +// light (after convolution). +// With no dilation, after covolution, the images are so light that a heavy +// constant offset is required to make the 0 image look reasonable. A simple +// constant offset multiple of exposure to undo this value is enough to achieve +// all the required lighting. This gives the advantage that exposure level 1 +// with a single dilation gives a good impression of the broken-yet-too-dark +// problem that is often seen in scans. +// A small random rotation gives some varying greyscale values on the edges, +// and some random salt and pepper noise on top helps to realistically jaggy-up +// the edges. +// Finally a greyscale ramp provides a continuum of effects between exposure +// levels. +Image DegradeImage(Image input, int exposure, TRand *randomizer, float *rotation) { + Image pix = pixConvertTo8(input, false); + input.destroy(); + input = pix; + int width = pixGetWidth(input); + int height = pixGetHeight(input); + + if (exposure >= 2) { + // An erosion simulates the spreading darkening of a dark copy. + // This is backwards to binary morphology, + // see http://www.leptonica.com/grayscale-morphology.html + pix = input; + input = pixErodeGray(pix, 3, 3); + pix.destroy(); + } + // A convolution is essential to any mode as no scanner produces an + // image as sharp as the electronic image. + pix = pixBlockconv(input, 1, 1); + input.destroy(); + // A small random rotation helps to make the edges jaggy in a realistic way. + if (rotation != nullptr) { + float radians_clockwise = 0.0f; + if (*rotation) { + radians_clockwise = *rotation; + } else if (randomizer != nullptr) { + radians_clockwise = randomizer->SignedRand(kRotationRange); + } + + input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0); + // Rotate the boxes to match. + *rotation = radians_clockwise; + pix.destroy(); + } else { + input = pix; + } + + if (exposure >= 3 || exposure == 1) { + // Erosion after the convolution is not as heavy as before, so it is + // good for level 1 and in addition as a level 3. + // This is backwards to binary morphology, + // see http://www.leptonica.com/grayscale-morphology.html + pix = input; + input = pixErodeGray(pix, 3, 3); + pix.destroy(); + } + // The convolution really needed to be 2x2 to be realistic enough, but + // we only have 3x3, so we have to bias the image darker or lose thin + // strokes. + int erosion_offset = 0; + // For light and 0 exposure, there is no dilation, so compensate for the + // convolution with a big darkening bias which is undone for lighter + // exposures. + if (exposure <= 0) { + erosion_offset = -3 * kExposureFactor; + } + // Add in a general offset of the greyscales for the exposure level so + // a threshold of 128 gives a reasonable binary result. + erosion_offset -= exposure * kExposureFactor; + // Add a gradual fade over the page and a small amount of salt and pepper + // noise to simulate noise in the sensor/paper fibres and varying + // illumination. + l_uint32 *data = pixGetData(input); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + int pixel = GET_DATA_BYTE(data, x); + if (randomizer != nullptr) { + pixel += randomizer->IntRand() % (kSaltnPepper * 2 + 1) - kSaltnPepper; + } + if (height + width > kMinRampSize) { + pixel -= (2 * x + y) * 32 / (height + width); + } + pixel += erosion_offset; + if (pixel < 0) { + pixel = 0; + } + if (pixel > 255) { + pixel = 255; + } + SET_DATA_BYTE(data, x, pixel); + } + data += pixGetWpl(input); + } + return input; +} + +// Creates and returns a Pix distorted by various means according to the bool +// flags. If boxes is not nullptr, the boxes are resized/positioned according to +// any spatial distortion and also by the integer reduction factor box_scale +// so they will match what the network will output. +// Returns nullptr on error. The returned Pix must be pixDestroyed. +Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise, + bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, + std::vector<TBOX> *boxes) { + Image distorted = pix.copy(); + // Things to do to synthetic training data. + if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) { + // TODO(rays) Cook noise in a more thread-safe manner than rand(). + // Attempt to make the sequences reproducible. + srand(randomizer->IntRand()); + Image pixn = pixAddGaussianNoise(distorted, 8.0); + distorted.destroy(); + if (smooth_noise) { + distorted = pixBlockconv(pixn, 1, 1); + pixn.destroy(); + } else { + distorted = pixn; + } + } + if (blur && randomizer->SignedRand(1.0) > 0.0) { + Image blurred = pixBlockconv(distorted, 1, 1); + distorted.destroy(); + distorted = blurred; + } + if (perspective) { + GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes); + } + if (boxes != nullptr) { + for (auto &b : *boxes) { + b.scale(1.0f / box_reduction); + if (b.width() <= 0) { + b.set_right(b.left() + 1); + } + } + } + if (invert && randomizer->SignedRand(1.0) < -0) { + pixInvert(distorted, distorted); + } + return distorted; +} + +// Distorts anything that has a non-null pointer with the same pseudo-random +// perspective distortion. Width and height only need to be set if there +// is no pix. If there is a pix, then they will be taken from there. +void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix, + std::vector<TBOX> *boxes) { + if (pix != nullptr && *pix != nullptr) { + width = pixGetWidth(*pix); + height = pixGetHeight(*pix); + } + float *im_coeffs = nullptr; + float *box_coeffs = nullptr; + l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs); + if (pix != nullptr && *pix != nullptr) { + // Transform the image. + Image transformed = pixProjective(*pix, im_coeffs, incolor); + if (transformed == nullptr) { + tprintf("Projective transformation failed!!\n"); + return; + } + pix->destroy(); + *pix = transformed; + } + if (boxes != nullptr) { + // Transform the boxes. + for (auto &b : *boxes) { + int x1, y1, x2, y2; + const TBOX &box = b; + projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, &y1); + projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), &x2, &y2); + TBOX new_box1(x1, height - y2, x2, height - y1); + projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), &x1, &y1); + projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, &y2); + TBOX new_box2(x1, height - y1, x2, height - y2); + b = new_box1.bounding_union(new_box2); + } + } + lept_free(im_coeffs); + lept_free(box_coeffs); +} + +// Computes the coefficients of a randomized projective transformation. +// The image transform requires backward transformation coefficient, and the +// box transform the forward coefficients. +// Returns the incolor arg to pixProjective. +int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs, + float **box_coeffs) { + // Setup "from" points. + Pta *src_pts = ptaCreate(4); + ptaAddPt(src_pts, 0.0f, 0.0f); + ptaAddPt(src_pts, width, 0.0f); + ptaAddPt(src_pts, width, height); + ptaAddPt(src_pts, 0.0f, height); + // Extract factors from pseudo-random sequence. + float factors[FN_NUM_FACTORS]; + float shear = 0.0f; // Shear is signed. + for (int i = 0; i < FN_NUM_FACTORS; ++i) { + // Everything is squared to make wild values rarer. + if (i == FN_SHEAR) { + // Shear is signed. + shear = randomizer->SignedRand(0.5 / 3.0); + shear = shear >= 0.0 ? shear * shear : -shear * shear; + // Keep the sheared points within the original rectangle. + if (shear < -factors[FN_X0]) { + shear = -factors[FN_X0]; + } + if (shear > factors[FN_X1]) { + shear = factors[FN_X1]; + } + factors[i] = shear; + } else if (i != FN_INCOLOR) { + factors[i] = fabs(randomizer->SignedRand(1.0)); + if (i <= FN_Y3) { + factors[i] *= 5.0 / 8.0; + } else { + factors[i] *= 0.5; + } + factors[i] *= factors[i]; + } + } + // Setup "to" points. + Pta *dest_pts = ptaCreate(4); + ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height); + ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height); + ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width, (1 - factors[FN_Y2]) * height); + ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width, (1 - factors[FN_Y3]) * height); + getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs); + getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs); + ptaDestroy(&src_pts); + ptaDestroy(&dest_pts); + return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK; +} + +} // namespace tesseract
