diff mupdf-source/thirdparty/tesseract/src/lstm/input.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/lstm/input.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,146 @@
+///////////////////////////////////////////////////////////////////////
+// File:        input.cpp
+// Description: Input layer class for neural network implementations.
+// Author:      Ray Smith
+//
+// (C) Copyright 2014, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#include "input.h"
+
+#include <allheaders.h>
+#include "imagedata.h"
+#include "pageres.h"
+#include "scrollview.h"
+
+namespace tesseract {
+
+// Max height for variable height inputs before scaling anyway.
+const int kMaxInputHeight = 48;
+
+Input::Input(const std::string &name, int ni, int no)
+    : Network(NT_INPUT, name, ni, no), cached_x_scale_(1) {}
+Input::Input(const std::string &name, const StaticShape &shape)
+    : Network(NT_INPUT, name, shape.height(), shape.depth()), shape_(shape), cached_x_scale_(1) {
+  if (shape.height() == 1) {
+    ni_ = shape.depth();
+  }
+}
+
+// Writes to the given file. Returns false in case of error.
+bool Input::Serialize(TFile *fp) const {
+  return Network::Serialize(fp) && shape_.Serialize(fp);
+}
+
+// Reads from the given file. Returns false in case of error.
+bool Input::DeSerialize(TFile *fp) {
+  return shape_.DeSerialize(fp);
+}
+
+// Returns an integer reduction factor that the network applies to the
+// time sequence. Assumes that any 2-d is already eliminated. Used for
+// scaling bounding boxes of truth data.
+int Input::XScaleFactor() const {
+  return 1;
+}
+
+// Provides the (minimum) x scale factor to the network (of interest only to
+// input units) so they can determine how to scale bounding boxes.
+void Input::CacheXScaleFactor(int factor) {
+  cached_x_scale_ = factor;
+}
+
+// Runs forward propagation of activations on the input line.
+// See Network for a detailed discussion of the arguments.
+void Input::Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose,
+                    NetworkScratch *scratch, NetworkIO *output) {
+  *output = input;
+}
+
+// Runs backward propagation of errors on the deltas line.
+// See NetworkCpp for a detailed discussion of the arguments.
+bool Input::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch,
+                     NetworkIO *back_deltas) {
+  tprintf("Input::Backward should not be called!!\n");
+  return false;
+}
+
+// Creates and returns a Pix of appropriate size for the network from the
+// image_data. If non-null, *image_scale returns the image scale factor used.
+// Returns nullptr on error.
+/* static */
+Image Input::PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
+                              TRand *randomizer, float *image_scale) {
+  // Note that NumInputs() is defined as input image height.
+  int target_height = network->NumInputs();
+  int width, height;
+  Image pix =
+      image_data.PreScale(target_height, kMaxInputHeight, image_scale, &width, &height, nullptr);
+  if (pix == nullptr) {
+    tprintf("Bad pix from ImageData!\n");
+    return nullptr;
+  }
+  if (width < min_width || height < min_width) {
+    tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, height, min_width);
+    pix.destroy();
+    return nullptr;
+  }
+  return pix;
+}
+
+// Converts the given pix to a NetworkIO of height and depth appropriate to the
+// given StaticShape:
+// If depth == 3, convert to 24 bit color, otherwise normalized grey.
+// Scale to target height, if the shape's height is > 1, or its depth if the
+// height == 1. If height == 0 then no scaling.
+// NOTE: It isn't safe for multiple threads to call this on the same pix.
+/* static */
+void Input::PreparePixInput(const StaticShape &shape, const Image pix, TRand *randomizer,
+                            NetworkIO *input) {
+  bool color = shape.depth() == 3;
+  Image var_pix = pix;
+  int depth = pixGetDepth(var_pix);
+  Image normed_pix = nullptr;
+  // On input to BaseAPI, an image is forced to be 1, 8 or 24 bit, without
+  // colormap, so we just have to deal with depth conversion here.
+  if (color) {
+    // Force RGB.
+    if (depth == 32) {
+      normed_pix = var_pix.clone();
+    } else {
+      normed_pix = pixConvertTo32(var_pix);
+    }
+  } else {
+    // Convert non-8-bit images to 8 bit.
+    if (depth == 8) {
+      normed_pix = var_pix.clone();
+    } else {
+      normed_pix = pixConvertTo8(var_pix, false);
+    }
+  }
+  int height = pixGetHeight(normed_pix);
+  int target_height = shape.height();
+  if (target_height == 1) {
+    target_height = shape.depth();
+  }
+  if (target_height != 0 && target_height != height) {
+    // Get the scaled image.
+    float im_factor = static_cast<float>(target_height) / height;
+    Image scaled_pix = pixScale(normed_pix, im_factor, im_factor);
+    normed_pix.destroy();
+    normed_pix = scaled_pix;
+  }
+  input->FromPix(shape, normed_pix, randomizer);
+  normed_pix.destroy();
+}
+
+} // namespace tesseract.