Python2/PyMuPDF: mupdf-source/thirdparty/tesseract/src/lstm/networkio.h comparison

comparison mupdf-source/thirdparty/tesseract/src/lstm/networkio.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.

author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children

comparison

equal deleted inserted replaced

-:1d09e1dec1d9
+:b50eed0cc0ef
+///////////////////////////////////////////////////////////////////////
+// File:        networkio.h
+// Description: Network input/output data, allowing float/int implementations.
+// Author:      Ray Smith
+//
+// (C) Copyright 2014, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+#ifndef TESSERACT_LSTM_NETWORKIO_H_
+#define TESSERACT_LSTM_NETWORKIO_H_
+#include "helpers.h"
+#include "image.h"
+#include "static_shape.h"
+#include "stridemap.h"
+#include "weightmatrix.h"
+#include <cmath>
+#include <cstdio>
+#include <vector>
+struct Pix;
+namespace tesseract {
+// Class to contain all the input/output of a network, allowing for fixed or
+// variable-strided 2d to 1d mapping, and float or int8_t values. Provides
+// enough calculating functions to hide the detail of the implementation.
+class TESS_API NetworkIO {
+public:
+NetworkIO() : int_mode_(false) {}
+// Resizes the array (and stride), avoiding realloc if possible, to the given
+// size from various size specs:
+// Same stride size, but given number of features.
+void Resize(const NetworkIO &src, int num_features) {
+ResizeToMap(src.int_mode(), src.stride_map(), num_features);
+}
+// Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
+void Resize2d(bool int_mode, int width, int num_features);
+// Resizes forcing a float representation with the stridemap of src and the
+// given number of features.
+void ResizeFloat(const NetworkIO &src, int num_features) {
+ResizeToMap(false, src.stride_map(), num_features);
+}
+// Resizes to a specific stride_map.
+void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features);
+// Shrinks image size by x_scale,y_scale, and use given number of features.
+void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features);
+// Resizes to just 1 x-coord, whatever the input.
+void ResizeXTo1(const NetworkIO &src, int num_features);
+// Initialize all the array to zero.
+void Zero();
+// Initializes to zero all elements of the array that do not correspond to
+// valid image positions. (If a batch of different-sized images are packed
+// together, then there will be padding pixels.)
+void ZeroInvalidElements();
+// Sets up the array from the given image, using the currently set int_mode_.
+// If the image width doesn't match the shape, the image is truncated or
+// padded with noise to match.
+void FromPix(const StaticShape &shape, const Image pix, TRand *randomizer);
+// Sets up the array from the given set of images, using the currently set
+// int_mode_. If the image width doesn't match the shape, the images are
+// truncated or padded with noise to match.
+void FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,
+TRand *randomizer);
+// Copies the given pix to *this at the given batch index, stretching and
+// clipping the pixel values so that [black, black + 2*contrast] maps to the
+// dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
+// This is a 2-d operation in the sense that the output depth is the number
+// of input channels, the height is the height of the image, and the width
+// is the width of the image, or truncated/padded with noise if the width
+// is a fixed size.
+void Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer);
+// Copies the given pix to *this at the given batch index, as Copy2DImage
+// above, except that the output depth is the height of the input image, the
+// output height is 1, and the output width as for Copy2DImage.
+// The image is thus treated as a 1-d set of vertical pixel strips.
+void Copy1DGreyImage(int batch, Image pix, float black, float contrast, TRand *randomizer);
+// Helper stores the pixel value in i_ or f_ according to int_mode_.
+// t: is the index from the StrideMap corresponding to the current
+//   [batch,y,x] position
+// f: is the index into the depth/channel
+// pixel: the value of the pixel from the image (in one channel)
+// black: the pixel value to map to the lowest of the range of *this
+// contrast: the range of pixel values to stretch to half the range of *this.
+void SetPixel(int t, int f, int pixel, float black, float contrast);
+// Converts the array to a Pix. Must be pixDestroyed after use.
+Image ToPix() const;
+// Prints the first and last num timesteps of the array for each feature.
+void Print(int num) const;
+// Returns the timestep width.
+int Width() const {
+return int_mode_ ? i_.dim1() : f_.dim1();
+}
+// Returns the number of features.
+int NumFeatures() const {
+return int_mode_ ? i_.dim2() : f_.dim2();
+}
+// Accessor to a timestep of the float matrix.
+float *f(int t) {
+ASSERT_HOST(!int_mode_);
+return f_[t];
+}
+const float *f(int t) const {
+ASSERT_HOST(!int_mode_);
+return f_[t];
+}
+const int8_t *i(int t) const {
+ASSERT_HOST(int_mode_);
+return i_[t];
+}
+bool int_mode() const {
+return int_mode_;
+}
+void set_int_mode(bool is_quantized) {
+int_mode_ = is_quantized;
+}
+const StrideMap &stride_map() const {
+return stride_map_;
+}
+void set_stride_map(const StrideMap &map) {
+stride_map_ = map;
+}
+const GENERIC_2D_ARRAY<float> &float_array() const {
+return f_;
+}
+GENERIC_2D_ARRAY<float> *mutable_float_array() {
+return &f_;
+}
+// Copies a single time step from src.
+void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t);
+// Copies a part of single time step from src.
+void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src,
+int src_t, int src_offset);
+// Zeroes a single time step.
+void ZeroTimeStep(int t) {
+if (int_mode_) {
+memset(i_[t], 0, sizeof(*i_[t]) * NumFeatures());
+} else {
+memset(f_[t], 0, sizeof(*f_[t]) * NumFeatures());
+}
+}
+// Sets the given range to random values.
+void Randomize(int t, int offset, int num_features, TRand *randomizer);
+// Helper returns the label and score of the best choice over a range.
+int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating,
+float *certainty) const;
+// Helper returns the rating and certainty of the choice over a range in t.
+void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating,
+float *certainty) const;
+// Returns the index (label) of the best value at the given timestep,
+// and if not null, sets the score to the log of the corresponding value.
+int BestLabel(int t, float *score) const {
+return BestLabel(t, -1, -1, score);
+}
+// Returns the index (label) of the best value at the given timestep,
+// excluding not_this and not_that, and if not null, sets the score to the
+// log of the corresponding value.
+int BestLabel(int t, int not_this, int not_that, float *score) const;
+// Returns the best start position out of range (into which both start and end
+// must fit) to obtain the highest cumulative score for the given labels.
+int PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const;
+// Returns the cumulative score of the given labels starting at start, and
+// using one label per time-step.
+TFloat ScoreOfLabels(const std::vector<int> &labels, int start) const;
+// Helper function sets all the outputs for a single timestep, such that
+// label has value ok_score, and the other labels share 1 - ok_score.
+// Assumes float mode.
+void SetActivations(int t, int label, float ok_score);
+// Modifies the values, only if needed, so that the given label is
+// the winner at the given time step t.
+// Assumes float mode.
+void EnsureBestLabel(int t, int label);
+// Helper function converts prob to certainty taking the minimum into account.
+static float ProbToCertainty(float prob);
+// Returns true if there is any bad value that is suspiciously like a GT
+// error. Assuming that *this is the difference(gradient) between target
+// and forward output, returns true if there is a large negative value
+// (correcting a very confident output) for which there is no corresponding
+// positive value in an adjacent timestep for the same feature index. This
+// allows the box-truthed samples to make fine adjustments to position while
+// stopping other disagreements of confident output with ground truth.
+bool AnySuspiciousTruth(float confidence_thr) const;
+// Reads a single timestep to floats in the range [-1, 1].
+void ReadTimeStep(int t, TFloat *output) const;
+// Adds a single timestep to floats.
+void AddTimeStep(int t, TFloat *inout) const;
+// Adds part of a single timestep to floats.
+void AddTimeStepPart(int t, int offset, int num_features, float *inout) const;
+// Writes a single timestep from floats in the range [-1, 1].
+void WriteTimeStep(int t, const TFloat *input);
+// Writes a single timestep from floats in the range [-1, 1] writing only
+// num_features elements of input to (*this)[t], starting at offset.
+void WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input);
+// Maxpools a single time step from src.
+void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line);
+// Runs maxpool backward, using maxes to index timesteps in *this.
+void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY<int> &maxes);
+// Returns the min over time of the maxes over features of the outputs.
+float MinOfMaxes() const;
+// Returns the min over time.
+float Max() const {
+return int_mode_ ? i_.Max() : f_.Max();
+}
+// Computes combined results for a combiner that chooses between an existing
+// input and itself, with an additional output to indicate the choice.
+void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output);
+// Computes deltas for a combiner that chooses between 2 sets of inputs.
+void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output);
+// Copies the array checking that the types match.
+void CopyAll(const NetworkIO &src);
+// Adds the array to a float array, with scaling to [-1, 1] if the src is int.
+void AddAllToFloat(const NetworkIO &src);
+// Subtracts the array from a float array. src must also be float.
+void SubtractAllFromFloat(const NetworkIO &src);
+// Copies src to *this, with maxabs normalization to match scale.
+void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale);
+// Multiplies the float data by the given factor.
+void ScaleFloatBy(float factor) {
+f_ *= factor;
+}
+// Copies src to *this with independent reversal of the y dimension.
+void CopyWithYReversal(const NetworkIO &src);
+// Copies src to *this with independent reversal of the x dimension.
+void CopyWithXReversal(const NetworkIO &src);
+// Copies src to *this with independent transpose of the x and y dimensions.
+void CopyWithXYTranspose(const NetworkIO &src);
+// Copies src to *this, at the given feature_offset, returning the total
+// feature offset after the copy. Multiple calls will stack outputs from
+// multiple sources in feature space.
+int CopyPacking(const NetworkIO &src, int feature_offset);
+// Opposite of CopyPacking, fills *this with a part of src, starting at
+// feature_offset, and picking num_features. Resizes *this to match.
+void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features);
+// Transposes the float part of *this into dest.
+void Transpose(TransposedArray *dest) const;
+// Clips the content of a single time-step to +/-range.
+void ClipVector(int t, float range);
+// Applies Func to timestep t of *this (u) and multiplies the result by v
+// component-wise, putting the product in *product.
+// *this and v may be int or float, but must match. The outputs are TFloat.
+template <class Func>
+void FuncMultiply(const NetworkIO &v_io, int t, TFloat *product) {
+Func f;
+ASSERT_HOST(!int_mode_);
+ASSERT_HOST(!v_io.int_mode_);
+int dim = f_.dim2();
+if (int_mode_) {
+const int8_t *u = i_[t];
+const int8_t *v = v_io.i_[t];
+for (int i = 0; i < dim; ++i) {
+product[i] = f(u[i] / static_cast<TFloat>(INT8_MAX)) * v[i] / INT8_MAX;
+}
+} else {
+const float *u = f_[t];
+const float *v = v_io.f_[t];
+for (int i = 0; i < dim; ++i) {
+product[i] = f(u[i]) * v[i];
+}
+}
+}
+// Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w,
+// component-wise, putting the product in *product.
+// All NetworkIOs are assumed to be float.
+template <class Func>
+void FuncMultiply3(int u_t, const NetworkIO &v_io, int v_t, const TFloat *w,
+TFloat *product) const {
+ASSERT_HOST(!int_mode_);
+ASSERT_HOST(!v_io.int_mode_);
+Func f;
+const float *u = f_[u_t];
+const float *v = v_io.f_[v_t];
+int dim = f_.dim2();
+for (int i = 0; i < dim; ++i) {
+product[i] = f(u[i]) * v[i] * w[i];
+}
+}
+// Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w,
+// component-wise, adding the product to *product.
+// All NetworkIOs are assumed to be float.
+template <class Func>
+void FuncMultiply3Add(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const {
+ASSERT_HOST(!int_mode_);
+ASSERT_HOST(!v_io.int_mode_);
+Func f;
+const float *u = f_[t];
+const float *v = v_io.f_[t];
+int dim = f_.dim2();
+for (int i = 0; i < dim; ++i) {
+product[i] += f(u[i]) * v[i] * w[i];
+}
+}
+// Applies Func1 to *this (u), Func2 to v, and multiplies the result by w,
+// component-wise, putting the product in product, all at timestep t, except
+// w, which is a simple array. All NetworkIOs are assumed to be float.
+template <class Func1, class Func2>
+void Func2Multiply3(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const {
+ASSERT_HOST(!int_mode_);
+ASSERT_HOST(!v_io.int_mode_);
+Func1 f;
+Func2 g;
+const float *u = f_[t];
+const float *v = v_io.f_[t];
+int dim = f_.dim2();
+for (int i = 0; i < dim; ++i) {
+product[i] = f(u[i]) * g(v[i]) * w[i];
+}
+}
+private:
+// Returns the padding required for the given number of features in order
+// for the SIMD operations to be safe.
+static int GetPadding(int num_features);
+// Choice of float vs 8 bit int for data.
+GENERIC_2D_ARRAY<float> f_;
+GENERIC_2D_ARRAY<int8_t> i_;
+// Which of f_ and i_ are we actually using.
+bool int_mode_;
+// Stride for 2d input data.
+StrideMap stride_map_;
+};
+} // namespace tesseract.
+#endif // TESSERACT_LSTM_NETWORKIO_H_

Mercurial > hgrepos > Python2 > PyMuPDF

comparison mupdf-source/thirdparty/tesseract/src/lstm/networkio.h @ 2:b50eed0cc0ef upstream