diff mupdf-source/thirdparty/tesseract/src/training/common/ctc.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/training/common/ctc.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,129 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ctc.h
+// Description: Slightly improved standard CTC to compute the targets.
+// Author:      Ray Smith
+// Created:     Wed Jul 13 15:17:06 PDT 2016
+//
+// (C) Copyright 2016, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_LSTM_CTC_H_
+#define TESSERACT_LSTM_CTC_H_
+
+#include "export.h"
+#include "network.h"
+#include "networkio.h"
+#include "scrollview.h"
+
+namespace tesseract {
+
+// Class to encapsulate CTC and simple target generation.
+class TESS_COMMON_TRAINING_API CTC {
+public:
+  // Normalizes the probabilities such that no target has a prob below min_prob,
+  // and, provided that the initial total is at least min_total_prob, then all
+  // probs will sum to 1, otherwise to sum/min_total_prob. The maximum output
+  // probability is thus 1 - (num_classes-1)*min_prob.
+  static void NormalizeProbs(NetworkIO *probs) {
+    NormalizeProbs(probs->mutable_float_array());
+  }
+
+  // Builds a target using CTC. Slightly improved as follows:
+  // Includes normalizations and clipping for stability.
+  // labels should be pre-padded with nulls wherever desired, but they don't
+  // have to be between all labels. Allows for multi-label codes with no
+  // nulls between.
+  // labels can be longer than the time sequence, but the total number of
+  // essential labels (non-null plus nulls between equal labels) must not exceed
+  // the number of timesteps in outputs.
+  // outputs is the output of the network, and should have already been
+  // normalized with NormalizeProbs.
+  // On return targets is filled with the computed targets.
+  // Returns false if there is insufficient time for the labels.
+  static bool ComputeCTCTargets(const std::vector<int> &truth_labels, int null_char,
+                                const GENERIC_2D_ARRAY<float> &outputs, NetworkIO *targets);
+
+private:
+  // Constructor is private as the instance only holds information specific to
+  // the current labels, outputs etc, and is built by the static function.
+  CTC(const std::vector<int> &labels, int null_char, const GENERIC_2D_ARRAY<float> &outputs);
+
+  // Computes vectors of min and max label index for each timestep, based on
+  // whether skippability of nulls makes it possible to complete a valid path.
+  bool ComputeLabelLimits();
+  // Computes targets based purely on the labels by spreading the labels evenly
+  // over the available timesteps.
+  void ComputeSimpleTargets(GENERIC_2D_ARRAY<float> *targets) const;
+  // Computes mean positions and half widths of the simple targets by spreading
+  // the labels even over the available timesteps.
+  void ComputeWidthsAndMeans(std::vector<float> *half_widths, std::vector<int> *means) const;
+  // Calculates and returns a suitable fraction of the simple targets to add
+  // to the network outputs.
+  float CalculateBiasFraction();
+  // Runs the forward CTC pass, filling in log_probs.
+  void Forward(GENERIC_2D_ARRAY<double> *log_probs) const;
+  // Runs the backward CTC pass, filling in log_probs.
+  void Backward(GENERIC_2D_ARRAY<double> *log_probs) const;
+  // Normalizes and brings probs out of log space with a softmax over time.
+  void NormalizeSequence(GENERIC_2D_ARRAY<double> *probs) const;
+  // For each timestep computes the max prob for each class over all
+  // instances of the class in the labels_, and sets the targets to
+  // the max observed prob.
+  void LabelsToClasses(const GENERIC_2D_ARRAY<double> &probs, NetworkIO *targets) const;
+  // Normalizes the probabilities such that no target has a prob below min_prob,
+  // and, provided that the initial total is at least min_total_prob, then all
+  // probs will sum to 1, otherwise to sum/min_total_prob. The maximum output
+  // probability is thus 1 - (num_classes-1)*min_prob.
+  static void NormalizeProbs(GENERIC_2D_ARRAY<float> *probs);
+  // Returns true if the label at index is a needed null.
+  bool NeededNull(int index) const;
+  // Returns exp(clipped(x)), clipping x to a reasonable range to prevent over/
+  // underflow.
+  static double ClippedExp(double x) {
+    if (x < -kMaxExpArg_) {
+      return exp(-kMaxExpArg_);
+    }
+    if (x > kMaxExpArg_) {
+      return exp(kMaxExpArg_);
+    }
+    return exp(x);
+  }
+
+  // Minimum probability limit for softmax input to ctc_loss.
+  static const float kMinProb_;
+  // Maximum absolute argument to exp().
+  static const double kMaxExpArg_;
+  // Minimum probability for total prob in time normalization.
+  static const double kMinTotalTimeProb_;
+  // Minimum probability for total prob in final normalization.
+  static const double kMinTotalFinalProb_;
+
+  // The truth label indices that are to be matched to outputs_.
+  const std::vector<int> &labels_;
+  // The network outputs.
+  GENERIC_2D_ARRAY<float> outputs_;
+  // The null or "blank" label.
+  int null_char_;
+  // Number of timesteps in outputs_.
+  int num_timesteps_;
+  // Number of classes in outputs_.
+  int num_classes_;
+  // Number of labels in labels_.
+  int num_labels_;
+  // Min and max valid label indices for each timestep.
+  std::vector<int> min_labels_;
+  std::vector<int> max_labels_;
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_LSTM_CTC_H_