diff mupdf-source/thirdparty/tesseract/src/training/degradeimage.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/training/degradeimage.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,311 @@
+/**********************************************************************
+ * File:        degradeimage.cpp
+ * Description: Function to degrade an image (usually of text) as if it
+ *              has been printed and then scanned.
+ * Authors:     Ray Smith
+ *
+ * (C) Copyright 2013, Google Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ **********************************************************************/
+
+#include "degradeimage.h"
+
+#include <allheaders.h> // from leptonica
+#include <cstdlib>
+#include "helpers.h" // For TRand.
+#include "rect.h"
+
+namespace tesseract {
+
+// A randomized perspective distortion can be applied to synthetic input.
+// The perspective distortion comes from leptonica, which uses 2 sets of 4
+// corners to determine the distortion. There are random values for each of
+// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead
+// defined in terms of a single shear value. This reduces the degrees of
+// freedom enough to make the distortion more realistic than it would otherwise
+// be if all 8 coordinates could move independently.
+// One additional factor is used for the color of the pixels that don't exist
+// in the source image.
+// Name for each of the randomizing factors.
+enum FactorNames {
+  FN_INCOLOR,
+  FN_Y0,
+  FN_Y1,
+  FN_Y2,
+  FN_Y3,
+  FN_X0,
+  FN_X1,
+  FN_SHEAR,
+  // x2 = x1 - shear
+  // x3 = x0 + shear
+  FN_NUM_FACTORS
+};
+
+// Rotation is +/- kRotationRange radians.
+const float kRotationRange = 0.02f;
+// Number of grey levels to shift by for each exposure step.
+const int kExposureFactor = 16;
+// Salt and pepper noise is +/- kSaltnPepper.
+const int kSaltnPepper = 5;
+// Min sum of width + height on which to operate the ramp.
+const int kMinRampSize = 1000;
+
+// Degrade the pix as if by a print/copy/scan cycle with exposure > 0
+// corresponding to darkening on the copier and <0 lighter and 0 not copied.
+// Exposures in [-2,2] are most useful, with -3 and 3 being extreme.
+// If rotation is nullptr, rotation is skipped. If *rotation is non-zero, the
+// pix is rotated by *rotation else it is randomly rotated and *rotation is
+// modified.
+//
+// HOW IT WORKS:
+// Most of the process is really dictated by the fact that the minimum
+// available convolution is 3X3, which is too big really to simulate a
+// good quality print/scan process. (2X2 would be better.)
+// 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the
+// images generally biased to being too light, so most of the work is to make
+// them darker. 3 levels of thickening/darkening are achieved with 2 dilations,
+// (using a greyscale erosion) one heavy (by being before convolution) and one
+// light (after convolution).
+// With no dilation, after covolution, the images are so light that a heavy
+// constant offset is required to make the 0 image look reasonable. A simple
+// constant offset multiple of exposure to undo this value is enough to achieve
+// all the required lighting. This gives the advantage that exposure level 1
+// with a single dilation gives a good impression of the broken-yet-too-dark
+// problem that is often seen in scans.
+// A small random rotation gives some varying greyscale values on the edges,
+// and some random salt and pepper noise on top helps to realistically jaggy-up
+// the edges.
+// Finally a greyscale ramp provides a continuum of effects between exposure
+// levels.
+Image DegradeImage(Image input, int exposure, TRand *randomizer, float *rotation) {
+  Image pix = pixConvertTo8(input, false);
+  input.destroy();
+  input = pix;
+  int width = pixGetWidth(input);
+  int height = pixGetHeight(input);
+
+  if (exposure >= 2) {
+    // An erosion simulates the spreading darkening of a dark copy.
+    // This is backwards to binary morphology,
+    // see http://www.leptonica.com/grayscale-morphology.html
+    pix = input;
+    input = pixErodeGray(pix, 3, 3);
+    pix.destroy();
+  }
+  // A convolution is essential to any mode as no scanner produces an
+  // image as sharp as the electronic image.
+  pix = pixBlockconv(input, 1, 1);
+  input.destroy();
+  // A small random rotation helps to make the edges jaggy in a realistic way.
+  if (rotation != nullptr) {
+    float radians_clockwise = 0.0f;
+    if (*rotation) {
+      radians_clockwise = *rotation;
+    } else if (randomizer != nullptr) {
+      radians_clockwise = randomizer->SignedRand(kRotationRange);
+    }
+
+    input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0);
+    // Rotate the boxes to match.
+    *rotation = radians_clockwise;
+    pix.destroy();
+  } else {
+    input = pix;
+  }
+
+  if (exposure >= 3 || exposure == 1) {
+    // Erosion after the convolution is not as heavy as before, so it is
+    // good for level 1 and in addition as a level 3.
+    // This is backwards to binary morphology,
+    // see http://www.leptonica.com/grayscale-morphology.html
+    pix = input;
+    input = pixErodeGray(pix, 3, 3);
+    pix.destroy();
+  }
+  // The convolution really needed to be 2x2 to be realistic enough, but
+  // we only have 3x3, so we have to bias the image darker or lose thin
+  // strokes.
+  int erosion_offset = 0;
+  // For light and 0 exposure, there is no dilation, so compensate for the
+  // convolution with a big darkening bias which is undone for lighter
+  // exposures.
+  if (exposure <= 0) {
+    erosion_offset = -3 * kExposureFactor;
+  }
+  // Add in a general offset of the greyscales for the exposure level so
+  // a threshold of 128 gives a reasonable binary result.
+  erosion_offset -= exposure * kExposureFactor;
+  // Add a gradual fade over the page and a small amount of salt and pepper
+  // noise to simulate noise in the sensor/paper fibres and varying
+  // illumination.
+  l_uint32 *data = pixGetData(input);
+  for (int y = 0; y < height; ++y) {
+    for (int x = 0; x < width; ++x) {
+      int pixel = GET_DATA_BYTE(data, x);
+      if (randomizer != nullptr) {
+        pixel += randomizer->IntRand() % (kSaltnPepper * 2 + 1) - kSaltnPepper;
+      }
+      if (height + width > kMinRampSize) {
+        pixel -= (2 * x + y) * 32 / (height + width);
+      }
+      pixel += erosion_offset;
+      if (pixel < 0) {
+        pixel = 0;
+      }
+      if (pixel > 255) {
+        pixel = 255;
+      }
+      SET_DATA_BYTE(data, x, pixel);
+    }
+    data += pixGetWpl(input);
+  }
+  return input;
+}
+
+// Creates and returns a Pix distorted by various means according to the bool
+// flags. If boxes is not nullptr, the boxes are resized/positioned according to
+// any spatial distortion and also by the integer reduction factor box_scale
+// so they will match what the network will output.
+// Returns nullptr on error. The returned Pix must be pixDestroyed.
+Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise,
+                         bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
+                         std::vector<TBOX> *boxes) {
+  Image distorted = pix.copy();
+  // Things to do to synthetic training data.
+  if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
+    // TODO(rays) Cook noise in a more thread-safe manner than rand().
+    // Attempt to make the sequences reproducible.
+    srand(randomizer->IntRand());
+    Image pixn = pixAddGaussianNoise(distorted, 8.0);
+    distorted.destroy();
+    if (smooth_noise) {
+      distorted = pixBlockconv(pixn, 1, 1);
+      pixn.destroy();
+    } else {
+      distorted = pixn;
+    }
+  }
+  if (blur && randomizer->SignedRand(1.0) > 0.0) {
+    Image blurred = pixBlockconv(distorted, 1, 1);
+    distorted.destroy();
+    distorted = blurred;
+  }
+  if (perspective) {
+    GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
+  }
+  if (boxes != nullptr) {
+    for (auto &b : *boxes) {
+      b.scale(1.0f / box_reduction);
+      if (b.width() <= 0) {
+        b.set_right(b.left() + 1);
+      }
+    }
+  }
+  if (invert && randomizer->SignedRand(1.0) < -0) {
+    pixInvert(distorted, distorted);
+  }
+  return distorted;
+}
+
+// Distorts anything that has a non-null pointer with the same pseudo-random
+// perspective distortion. Width and height only need to be set if there
+// is no pix. If there is a pix, then they will be taken from there.
+void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix,
+                                   std::vector<TBOX> *boxes) {
+  if (pix != nullptr && *pix != nullptr) {
+    width = pixGetWidth(*pix);
+    height = pixGetHeight(*pix);
+  }
+  float *im_coeffs = nullptr;
+  float *box_coeffs = nullptr;
+  l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
+  if (pix != nullptr && *pix != nullptr) {
+    // Transform the image.
+    Image transformed = pixProjective(*pix, im_coeffs, incolor);
+    if (transformed == nullptr) {
+      tprintf("Projective transformation failed!!\n");
+      return;
+    }
+    pix->destroy();
+    *pix = transformed;
+  }
+  if (boxes != nullptr) {
+    // Transform the boxes.
+    for (auto &b : *boxes) {
+      int x1, y1, x2, y2;
+      const TBOX &box = b;
+      projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, &y1);
+      projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), &x2, &y2);
+      TBOX new_box1(x1, height - y2, x2, height - y1);
+      projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), &x1, &y1);
+      projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, &y2);
+      TBOX new_box2(x1, height - y1, x2, height - y2);
+      b = new_box1.bounding_union(new_box2);
+    }
+  }
+  lept_free(im_coeffs);
+  lept_free(box_coeffs);
+}
+
+// Computes the coefficients of a randomized projective transformation.
+// The image transform requires backward transformation coefficient, and the
+// box transform the forward coefficients.
+// Returns the incolor arg to pixProjective.
+int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs,
+                     float **box_coeffs) {
+  // Setup "from" points.
+  Pta *src_pts = ptaCreate(4);
+  ptaAddPt(src_pts, 0.0f, 0.0f);
+  ptaAddPt(src_pts, width, 0.0f);
+  ptaAddPt(src_pts, width, height);
+  ptaAddPt(src_pts, 0.0f, height);
+  // Extract factors from pseudo-random sequence.
+  float factors[FN_NUM_FACTORS];
+  float shear = 0.0f; // Shear is signed.
+  for (int i = 0; i < FN_NUM_FACTORS; ++i) {
+    // Everything is squared to make wild values rarer.
+    if (i == FN_SHEAR) {
+      // Shear is signed.
+      shear = randomizer->SignedRand(0.5 / 3.0);
+      shear = shear >= 0.0 ? shear * shear : -shear * shear;
+      // Keep the sheared points within the original rectangle.
+      if (shear < -factors[FN_X0]) {
+        shear = -factors[FN_X0];
+      }
+      if (shear > factors[FN_X1]) {
+        shear = factors[FN_X1];
+      }
+      factors[i] = shear;
+    } else if (i != FN_INCOLOR) {
+      factors[i] = fabs(randomizer->SignedRand(1.0));
+      if (i <= FN_Y3) {
+        factors[i] *= 5.0 / 8.0;
+      } else {
+        factors[i] *= 0.5;
+      }
+      factors[i] *= factors[i];
+    }
+  }
+  // Setup "to" points.
+  Pta *dest_pts = ptaCreate(4);
+  ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
+  ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
+  ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width, (1 - factors[FN_Y2]) * height);
+  ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width, (1 - factors[FN_Y3]) * height);
+  getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
+  getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
+  ptaDestroy(&src_pts);
+  ptaDestroy(&dest_pts);
+  return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
+}
+
+} // namespace tesseract