Mercurial > hgrepos > Python2 > PyMuPDF

diff mupdf-source/thirdparty/tesseract/src/lstm/networkio.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:43:07 +0200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/lstm/networkio.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,1000 @@
+///////////////////////////////////////////////////////////////////////
+// File:        networkio.cpp
+// Description: Network input/output data, allowing float/int implementations.
+// Author:      Ray Smith
+//
+// (C) Copyright 2014, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#include "networkio.h"
+#include <cfloat> // for FLT_MAX
+#include <cmath>
+
+#include <allheaders.h>
+#include "functions.h"
+#include "statistc.h"
+#include "tprintf.h"
+
+namespace tesseract {
+
+// Minimum value to output for certainty.
+const float kMinCertainty = -20.0f;
+// Probability corresponding to kMinCertainty.
+const float kMinProb = std::exp(kMinCertainty);
+
+// Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
+void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
+  stride_map_ = StrideMap();
+  int_mode_ = int_mode;
+  if (int_mode_) {
+    i_.ResizeNoInit(width, num_features, GetPadding(num_features));
+  } else {
+    f_.ResizeNoInit(width, num_features);
+  }
+}
+
+// Resizes to a specific stride_map.
+void NetworkIO::ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features) {
+  // If this method crashes with this == nullptr,
+  // it most likely got here through an uninitialized scratch element,
+  // ie call NetworkScratch::IO::Resizexxx() not NetworkIO::Resizexxx()!!
+  stride_map_ = stride_map;
+  int_mode_ = int_mode;
+  if (int_mode_) {
+    i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features));
+  } else {
+    f_.ResizeNoInit(stride_map.Width(), num_features);
+  }
+  ZeroInvalidElements();
+}
+
+// Shrinks image size by x_scale,y_scale, and use given number of features.
+void NetworkIO::ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features) {
+  StrideMap stride_map = src.stride_map_;
+  stride_map.ScaleXY(x_scale, y_scale);
+  ResizeToMap(src.int_mode_, stride_map, num_features);
+}
+
+// Resizes to just 1 x-coord, whatever the input.
+void NetworkIO::ResizeXTo1(const NetworkIO &src, int num_features) {
+  StrideMap stride_map = src.stride_map_;
+  stride_map.ReduceWidthTo1();
+  ResizeToMap(src.int_mode_, stride_map, num_features);
+}
+
+// Initialize all the array to zero.
+void NetworkIO::Zero() {
+  int width = Width();
+  // Zero out the everything. Column-by-column in case it is aligned.
+  for (int t = 0; t < width; ++t) {
+    ZeroTimeStep(t);
+  }
+}
+
+// Initializes to zero all elements of the array that do not correspond to
+// valid image positions. (If a batch of different-sized images are packed
+// together, then there will be padding pixels.)
+void NetworkIO::ZeroInvalidElements() {
+  int num_features = NumFeatures();
+  int full_width = stride_map_.Size(FD_WIDTH);
+  int full_height = stride_map_.Size(FD_HEIGHT);
+  StrideMap::Index b_index(stride_map_);
+  do {
+    int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
+    if (end_x < full_width) {
+      // The width is small, so fill for every valid y.
+      StrideMap::Index y_index(b_index);
+      int fill_size = num_features * (full_width - end_x);
+      do {
+        StrideMap::Index z_index(y_index);
+        z_index.AddOffset(end_x, FD_WIDTH);
+        if (int_mode_) {
+          ZeroVector(fill_size, i_[z_index.t()]);
+        } else {
+          ZeroVector(fill_size, f_[z_index.t()]);
+        }
+      } while (y_index.AddOffset(1, FD_HEIGHT));
+    }
+    int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1;
+    if (end_y < full_height) {
+      // The height is small, so fill in the space in one go.
+      StrideMap::Index y_index(b_index);
+      y_index.AddOffset(end_y, FD_HEIGHT);
+      int fill_size = num_features * full_width * (full_height - end_y);
+      if (int_mode_) {
+        ZeroVector(fill_size, i_[y_index.t()]);
+      } else {
+        ZeroVector(fill_size, f_[y_index.t()]);
+      }
+    }
+  } while (b_index.AddOffset(1, FD_BATCH));
+}
+
+// Helper computes a black point and white point to contrast-enhance an image.
+// The computation is based on the assumption that the image is of a single line
+// of text, so a horizontal line through the middle of the image passes through
+// at least some of it, so local minima and maxima are a good proxy for black
+// and white pixel samples.
+static void ComputeBlackWhite(Image pix, float *black, float *white) {
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  STATS mins(0, 255), maxes(0, 255);
+  if (width >= 3) {
+    int y = height / 2;
+    l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y;
+    int prev = GET_DATA_BYTE(line, 0);
+    int curr = GET_DATA_BYTE(line, 1);
+    for (int x = 1; x + 1 < width; ++x) {
+      int next = GET_DATA_BYTE(line, x + 1);
+      if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) {
+        // Local minimum.
+        mins.add(curr, 1);
+      }
+      if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) {
+        // Local maximum.
+        maxes.add(curr, 1);
+      }
+      prev = curr;
+      curr = next;
+    }
+  }
+  if (mins.get_total() == 0) {
+    mins.add(0, 1);
+  }
+  if (maxes.get_total() == 0) {
+    maxes.add(255, 1);
+  }
+  *black = mins.ile(0.25);
+  *white = maxes.ile(0.75);
+}
+
+// Sets up the array from the given image, using the currently set int_mode_.
+// If the image width doesn't match the shape, the image is truncated or padded
+// with noise to match.
+void NetworkIO::FromPix(const StaticShape &shape, const Image pix, TRand *randomizer) {
+  std::vector<Image> pixes(1, pix);
+  FromPixes(shape, pixes, randomizer);
+}
+
+// Sets up the array from the given set of images, using the currently set
+// int_mode_. If the image width doesn't match the shape, the images are
+// truncated or padded with noise to match.
+void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,
+                          TRand *randomizer) {
+  int target_height = shape.height();
+  int target_width = shape.width();
+  std::vector<std::pair<int, int>> h_w_pairs;
+  for (auto &&pix : pixes) {
+    Image var_pix = pix;
+    int width = pixGetWidth(var_pix);
+    if (target_width != 0) {
+      width = target_width;
+    }
+    int height = pixGetHeight(var_pix);
+    if (target_height != 0) {
+      height = target_height;
+    }
+    h_w_pairs.emplace_back(height, width);
+  }
+  stride_map_.SetStride(h_w_pairs);
+  ResizeToMap(int_mode(), stride_map_, shape.depth());
+  // Iterate over the images again to copy the data.
+  for (size_t b = 0; b < pixes.size(); ++b) {
+    Image pix = pixes[b];
+    float black = 0.0f, white = 255.0f;
+    if (shape.depth() != 3) {
+      ComputeBlackWhite(pix, &black, &white);
+    }
+    float contrast = (white - black) / 2.0f;
+    if (contrast <= 0.0f) {
+      contrast = 1.0f;
+    }
+    if (shape.height() == 1) {
+      Copy1DGreyImage(b, pix, black, contrast, randomizer);
+    } else {
+      Copy2DImage(b, pix, black, contrast, randomizer);
+    }
+  }
+}
+
+// Copies the given pix to *this at the given batch index, stretching and
+// clipping the pixel values so that [black, black + 2*contrast] maps to the
+// dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
+// This is a 2-d operation in the sense that the output depth is the number
+// of input channels, the height is the height of the image, and the width
+// is the width of the image, or truncated/padded with noise if the width
+// is a fixed size.
+void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer) {
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  int wpl = pixGetWpl(pix);
+  StrideMap::Index index(stride_map_);
+  index.AddOffset(batch, FD_BATCH);
+  int t = index.t();
+  int target_height = stride_map_.Size(FD_HEIGHT);
+  int target_width = stride_map_.Size(FD_WIDTH);
+  int num_features = NumFeatures();
+  bool color = num_features == 3;
+  if (width > target_width) {
+    width = target_width;
+  }
+  uint32_t *line = pixGetData(pix);
+  for (int y = 0; y < target_height; ++y, line += wpl) {
+    int x = 0;
+    if (y < height) {
+      for (x = 0; x < width; ++x, ++t) {
+        if (color) {
+          int f = 0;
+          for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
+            int pixel = GET_DATA_BYTE(line + x, c);
+            SetPixel(t, f++, pixel, black, contrast);
+          }
+        } else {
+          int pixel = GET_DATA_BYTE(line, x);
+          SetPixel(t, 0, pixel, black, contrast);
+        }
+      }
+    }
+    for (; x < target_width; ++x) {
+      Randomize(t++, 0, num_features, randomizer);
+    }
+  }
+}
+
+// Copies the given pix to *this at the given batch index, as Copy2DImage
+// above, except that the output depth is the height of the input image, the
+// output height is 1, and the output width as for Copy2DImage.
+// The image is thus treated as a 1-d set of vertical pixel strips.
+void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contrast,
+                                TRand *randomizer) {
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  ASSERT_HOST(height == NumFeatures());
+  int wpl = pixGetWpl(pix);
+  StrideMap::Index index(stride_map_);
+  index.AddOffset(batch, FD_BATCH);
+  int t = index.t();
+  int target_width = stride_map_.Size(FD_WIDTH);
+  if (width > target_width) {
+    width = target_width;
+  }
+  int x;
+  for (x = 0; x < width; ++x, ++t) {
+    for (int y = 0; y < height; ++y) {
+      uint32_t *line = pixGetData(pix) + wpl * y;
+      int pixel = GET_DATA_BYTE(line, x);
+      SetPixel(t, y, pixel, black, contrast);
+    }
+  }
+  for (; x < target_width; ++x) {
+    Randomize(t++, 0, height, randomizer);
+  }
+}
+
+// Helper stores the pixel value in i_ or f_ according to int_mode_.
+// t: is the index from the StrideMap corresponding to the current
+//   [batch,y,x] position
+// f: is the index into the depth/channel
+// pixel: the value of the pixel from the image (in one channel)
+// black: the pixel value to map to the lowest of the range of *this
+// contrast: the range of pixel values to stretch to half the range of *this.
+void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
+  float float_pixel = (pixel - black) / contrast - 1.0f;
+  if (int_mode_) {
+    i_[t][f] = ClipToRange<int>(IntCastRounded((INT8_MAX + 1) * float_pixel), -INT8_MAX, INT8_MAX);
+  } else {
+    f_[t][f] = float_pixel;
+  }
+}
+
+// Converts the array to a Pix. Must be pixDestroyed after use.
+Image NetworkIO::ToPix() const {
+  // Count the width of the image, and find the max multiplication factor.
+  int im_width = stride_map_.Size(FD_WIDTH);
+  int im_height = stride_map_.Size(FD_HEIGHT);
+  int num_features = NumFeatures();
+  int feature_factor = 1;
+  if (num_features == 3) {
+    // Special hack for color.
+    num_features = 1;
+    feature_factor = 3;
+  }
+  Image pix = pixCreate(im_width, im_height * num_features, 32);
+  StrideMap::Index index(stride_map_);
+  do {
+    int im_x = index.index(FD_WIDTH);
+    int top_im_y = index.index(FD_HEIGHT);
+    int im_y = top_im_y;
+    int t = index.t();
+    if (int_mode_) {
+      const int8_t *features = i_[t];
+      for (int y = 0; y < num_features; ++y, im_y += im_height) {
+        int pixel = features[y * feature_factor];
+        // 1 or 2 features use greyscale.
+        int red = ClipToRange<int>(pixel + 128, 0, 255);
+        int green = red, blue = red;
+        if (feature_factor == 3) {
+          // With 3 features assume RGB color.
+          green = ClipToRange<int>(features[y * feature_factor + 1] + 128, 0, 255);
+          blue = ClipToRange<int>(features[y * feature_factor + 2] + 128, 0, 255);
+        } else if (num_features > 3) {
+          // More than 3 features use false yellow/blue color, assuming a signed
+          // input in the range [-1,1].
+          red = abs(pixel) * 2;
+          if (pixel >= 0) {
+            green = red;
+            blue = 0;
+          } else {
+            blue = red;
+            green = red = 0;
+          }
+        }
+        pixSetPixel(pix, im_x, im_y,
+                    (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT));
+      }
+    } else {
+      const float *features = f_[t];
+      for (int y = 0; y < num_features; ++y, im_y += im_height) {
+        float pixel = features[y * feature_factor];
+        // 1 or 2 features use greyscale.
+        int red = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
+        int green = red, blue = red;
+        if (feature_factor == 3) {
+          // With 3 features assume RGB color.
+          pixel = features[y * feature_factor + 1];
+          green = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
+          pixel = features[y * feature_factor + 2];
+          blue = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
+        } else if (num_features > 3) {
+          // More than 3 features use false yellow/blue color, assuming a signed
+          // input in the range [-1,1].
+          red = ClipToRange<int>(IntCastRounded(std::fabs(pixel) * 255), 0, 255);
+          if (pixel >= 0) {
+            green = red;
+            blue = 0;
+          } else {
+            blue = red;
+            green = red = 0;
+          }
+        }
+        pixSetPixel(pix, im_x, im_y,
+                    (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT));
+      }
+    }
+  } while (index.Increment());
+  return pix;
+}
+
+// Prints the first and last num timesteps of the array for each feature.
+void NetworkIO::Print(int num) const {
+  int num_features = NumFeatures();
+  for (int y = 0; y < num_features; ++y) {
+    for (int t = 0; t < Width(); ++t) {
+      if (num == 0 || t < num || t + num >= Width()) {
+        if (int_mode_) {
+          tprintf(" %g", static_cast<float>(i_[t][y]) / INT8_MAX);
+        } else {
+          tprintf(" %g", f_[t][y]);
+        }
+      }
+    }
+    tprintf("\n");
+  }
+}
+
+// Copies a single time step from src.
+void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t) {
+  ASSERT_HOST(int_mode_ == src.int_mode_);
+  if (int_mode_) {
+    memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0]));
+  } else {
+    memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0]));
+  }
+}
+
+// Copies a part of single time step from src.
+void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features,
+                                    const NetworkIO &src, int src_t, int src_offset) {
+  ASSERT_HOST(int_mode_ == src.int_mode_);
+  if (int_mode_) {
+    memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset, num_features * sizeof(i_[0][0]));
+  } else {
+    memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset, num_features * sizeof(f_[0][0]));
+  }
+}
+
+// Sets the given range to random values.
+void NetworkIO::Randomize(int t, int offset, int num_features, TRand *randomizer) {
+  if (int_mode_) {
+    int8_t *line = i_[t] + offset;
+    for (int i = 0; i < num_features; ++i) {
+      line[i] = IntCastRounded(randomizer->SignedRand(INT8_MAX));
+    }
+  } else {
+    // float mode.
+    float *line = f_[t] + offset;
+    for (int i = 0; i < num_features; ++i) {
+      line[i] = randomizer->SignedRand(1.0);
+    }
+  }
+}
+
+// Helper returns the label and score of the best choice over a range.
+int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating,
+                                   float *certainty) const {
+  if (t_end <= t_start) {
+    return -1;
+  }
+  int max_char = -1;
+  float min_score = 0.0f;
+  for (int c = 0; c < NumFeatures(); ++c) {
+    if (c == not_this || c == null_ch) {
+      continue;
+    }
+    ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty);
+    if (max_char < 0 || *rating < min_score) {
+      min_score = *rating;
+      max_char = c;
+    }
+  }
+  ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty);
+  return max_char;
+}
+
+// Helper returns the rating and certainty of the choice over a range in output.
+void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating,
+                                float *certainty) const {
+  ASSERT_HOST(!int_mode_);
+  *rating = 0.0f;
+  *certainty = 0.0f;
+  if (t_end <= t_start || t_end <= 0) {
+    return;
+  }
+  float ratings[3] = {0.0f, 0.0f, 0.0f};
+  float certs[3] = {0.0f, 0.0f, 0.0f};
+  for (int t = t_start; t < t_end; ++t) {
+    const float *line = f_[t];
+    float score = ProbToCertainty(line[choice]);
+    float zero = ProbToCertainty(line[null_ch]);
+    if (t == t_start) {
+      ratings[2] = FLT_MAX;
+      ratings[1] = -score;
+      certs[1] = score;
+    } else {
+      for (int i = 2; i >= 1; --i) {
+        if (ratings[i] > ratings[i - 1]) {
+          ratings[i] = ratings[i - 1];
+          certs[i] = certs[i - 1];
+        }
+      }
+      ratings[2] -= zero;
+      if (zero < certs[2]) {
+        certs[2] = zero;
+      }
+      ratings[1] -= score;
+      if (score < certs[1]) {
+        certs[1] = score;
+      }
+    }
+    ratings[0] -= zero;
+    if (zero < certs[0]) {
+      certs[0] = zero;
+    }
+  }
+  int best_i = ratings[2] < ratings[1] ? 2 : 1;
+  *rating = ratings[best_i] + t_end - t_start;
+  *certainty = certs[best_i];
+}
+
+// Returns the index (label) of the best value at the given timestep,
+// excluding not_this and not_that, and if not null, sets the score to the
+// log of the corresponding value.
+int NetworkIO::BestLabel(int t, int not_this, int not_that, float *score) const {
+  ASSERT_HOST(!int_mode_);
+  int best_index = -1;
+  float best_score = -FLT_MAX;
+  const float *line = f_[t];
+  for (int i = 0; i < f_.dim2(); ++i) {
+    if (line[i] > best_score && i != not_this && i != not_that) {
+      best_score = line[i];
+      best_index = i;
+    }
+  }
+  if (score != nullptr) {
+    *score = ProbToCertainty(best_score);
+  }
+  return best_index;
+}
+
+// Returns the best start position out of [start, end) (into which all labels
+// must fit) to obtain the highest cumulative score for the given labels.
+int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const {
+  int length = labels.size();
+  int last_start = end - length;
+  int best_start = -1;
+  TFloat best_score = 0;
+  for (int s = start; s <= last_start; ++s) {
+    TFloat score = ScoreOfLabels(labels, s);
+    if (score > best_score || best_start < 0) {
+      best_score = score;
+      best_start = s;
+    }
+  }
+  return best_start;
+}
+
+// Returns the cumulative score of the given labels starting at start, and
+// using one label per time-step.
+TFloat NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const {
+  int length = labels.size();
+  TFloat score = 0;
+  for (int i = 0; i < length; ++i) {
+    score += f_(start + i, labels[i]);
+  }
+  return score;
+}
+
+// Helper function sets all the outputs for a single timestep, such that
+// label has value ok_score, and the other labels share 1 - ok_score.
+void NetworkIO::SetActivations(int t, int label, float ok_score) {
+  ASSERT_HOST(!int_mode_);
+  int num_classes = NumFeatures();
+  float bad_score = (1.0f - ok_score) / (num_classes - 1);
+  float *targets = f_[t];
+  for (int i = 0; i < num_classes; ++i) {
+    targets[i] = bad_score;
+  }
+  targets[label] = ok_score;
+}
+
+// Modifies the values, only if needed, so that the given label is
+// the winner at the given time step t.
+void NetworkIO::EnsureBestLabel(int t, int label) {
+  ASSERT_HOST(!int_mode_);
+  if (BestLabel(t, nullptr) != label) {
+    // Output value needs enhancing. Third all the other elements and add the
+    // remainder to best_label.
+    int num_classes = NumFeatures();
+    float *targets = f_[t];
+    for (int c = 0; c < num_classes; ++c) {
+      if (c == label) {
+        targets[c] += (1.0 - targets[c]) * (2 / 3.0);
+      } else {
+        targets[c] /= 3.0;
+      }
+    }
+  }
+}
+
+// Helper function converts prob to certainty taking the minimum into account.
+/* static */
+float NetworkIO::ProbToCertainty(float prob) {
+  return prob > kMinProb ? std::log(prob) : kMinCertainty;
+}
+
+// Returns true if there is any bad value that is suspiciously like a GT
+// error. Assuming that *this is the difference(gradient) between target
+// and forward output, returns true if there is a large negative value
+// (correcting a very confident output) for which there is no corresponding
+// positive value in an adjacent timestep for the same feature index. This
+// allows the box-truthed samples to make fine adjustments to position while
+// stopping other disagreements of confident output with ground truth.
+bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const {
+  int num_features = NumFeatures();
+  for (int t = 0; t < Width(); ++t) {
+    const float *features = f_[t];
+    for (int y = 0; y < num_features; ++y) {
+      float grad = features[y];
+      if (grad < -confidence_thr) {
+        // Correcting strong output. Check for movement.
+        if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&
+            (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) {
+          return true; // No strong positive on either side.
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// Reads a single timestep to floats in the range [-1, 1].
+void NetworkIO::ReadTimeStep(int t, TFloat *output) const {
+  if (int_mode_) {
+    const int8_t *line = i_[t];
+    for (int i = 0; i < i_.dim2(); ++i) {
+      output[i] = static_cast<TFloat>(line[i]) / INT8_MAX;
+    }
+  } else {
+    const float *line = f_[t];
+    for (int i = 0; i < f_.dim2(); ++i) {
+      output[i] = static_cast<TFloat>(line[i]);
+    }
+  }
+}
+
+// Adds a single timestep to floats.
+void NetworkIO::AddTimeStep(int t, TFloat *inout) const {
+  int num_features = NumFeatures();
+  if (int_mode_) {
+    const int8_t *line = i_[t];
+    for (int i = 0; i < num_features; ++i) {
+      inout[i] += static_cast<TFloat>(line[i]) / INT8_MAX;
+    }
+  } else {
+    const float *line = f_[t];
+    for (int i = 0; i < num_features; ++i) {
+      inout[i] += line[i];
+    }
+  }
+}
+
+// Adds part of a single timestep to floats.
+void NetworkIO::AddTimeStepPart(int t, int offset, int num_features, float *inout) const {
+  if (int_mode_) {
+    const int8_t *line = i_[t] + offset;
+    for (int i = 0; i < num_features; ++i) {
+      inout[i] += static_cast<float>(line[i]) / INT8_MAX;
+    }
+  } else {
+    const float *line = f_[t] + offset;
+    for (int i = 0; i < num_features; ++i) {
+      inout[i] += line[i];
+    }
+  }
+}
+
+// Writes a single timestep from floats in the range [-1, 1].
+void NetworkIO::WriteTimeStep(int t, const TFloat *input) {
+  WriteTimeStepPart(t, 0, NumFeatures(), input);
+}
+
+// Writes a single timestep from floats in the range [-1, 1] writing only
+// num_features elements of input to (*this)[t], starting at offset.
+void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input) {
+  if (int_mode_) {
+    int8_t *line = i_[t] + offset;
+    for (int i = 0; i < num_features; ++i) {
+      line[i] = ClipToRange<int>(IntCastRounded(input[i] * INT8_MAX), -INT8_MAX, INT8_MAX);
+    }
+  } else {
+    float *line = f_[t] + offset;
+    for (int i = 0; i < num_features; ++i) {
+      line[i] = static_cast<float>(input[i]);
+    }
+  }
+}
+
+// Maxpools a single time step from src.
+void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line) {
+  ASSERT_HOST(int_mode_ == src.int_mode_);
+  if (int_mode_) {
+    int dim = i_.dim2();
+    int8_t *dest_line = i_[dest_t];
+    const int8_t *src_line = src.i_[src_t];
+    for (int i = 0; i < dim; ++i) {
+      if (dest_line[i] < src_line[i]) {
+        dest_line[i] = src_line[i];
+        max_line[i] = src_t;
+      }
+    }
+  } else {
+    int dim = f_.dim2();
+    float *dest_line = f_[dest_t];
+    const float *src_line = src.f_[src_t];
+    for (int i = 0; i < dim; ++i) {
+      if (dest_line[i] < src_line[i]) {
+        dest_line[i] = src_line[i];
+        max_line[i] = src_t;
+      }
+    }
+  }
+}
+
+// Runs maxpool backward, using maxes to index timesteps in *this.
+void NetworkIO::MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY<int> &maxes) {
+  ASSERT_HOST(!int_mode_);
+  Zero();
+  StrideMap::Index index(fwd.stride_map_);
+  do {
+    int t = index.t();
+    const int *max_line = maxes[t];
+    const float *fwd_line = fwd.f_[t];
+    int num_features = fwd.f_.dim2();
+    for (int i = 0; i < num_features; ++i) {
+      f_[max_line[i]][i] = fwd_line[i];
+    }
+  } while (index.Increment());
+}
+
+// Returns the min over time of the maxes over features of the outputs.
+float NetworkIO::MinOfMaxes() const {
+  float min_max = 0.0f;
+  int width = Width();
+  int num_features = NumFeatures();
+  for (int t = 0; t < width; ++t) {
+    float max_value = -FLT_MAX;
+    if (int_mode_) {
+      const int8_t *column = i_[t];
+      for (int i = 0; i < num_features; ++i) {
+        if (column[i] > max_value) {
+          max_value = column[i];
+        }
+      }
+    } else {
+      const float *column = f_[t];
+      for (int i = 0; i < num_features; ++i) {
+        if (column[i] > max_value) {
+          max_value = column[i];
+        }
+      }
+    }
+    if (t == 0 || max_value < min_max) {
+      min_max = max_value;
+    }
+  }
+  return min_max;
+}
+
+// Computes combined results for a combiner that chooses between an existing
+// input and itself, with an additional output to indicate the choice.
+void NetworkIO::CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output) {
+  int no = base_output.NumFeatures();
+  ASSERT_HOST(combiner_output.NumFeatures() == no + 1);
+  Resize(base_output, no);
+  int width = Width();
+  if (int_mode_) {
+    // Number of outputs from base and final result.
+    for (int t = 0; t < width; ++t) {
+      int8_t *out_line = i_[t];
+      const int8_t *base_line = base_output.i_[t];
+      const int8_t *comb_line = combiner_output.i_[t];
+      float base_weight = static_cast<float>(comb_line[no]) / INT8_MAX;
+      float boost_weight = 1.0f - base_weight;
+      for (int i = 0; i < no; ++i) {
+        out_line[i] = IntCastRounded(base_line[i] * base_weight + comb_line[i] * boost_weight);
+      }
+    }
+  } else {
+    for (int t = 0; t < width; ++t) {
+      float *out_line = f_[t];
+      const float *base_line = base_output.f_[t];
+      const float *comb_line = combiner_output.f_[t];
+      float base_weight = comb_line[no];
+      float boost_weight = 1.0f - base_weight;
+      for (int i = 0; i < no; ++i) {
+        out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight;
+      }
+    }
+  }
+}
+
+// Computes deltas for a combiner that chooses between 2 sets of inputs.
+void NetworkIO::ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output) {
+  ASSERT_HOST(!int_mode_);
+  // Compute the deltas for the combiner.
+  int width = Width();
+  int no = NumFeatures() - 1;
+  ASSERT_HOST(fwd_deltas.NumFeatures() == no);
+  ASSERT_HOST(base_output.NumFeatures() == no);
+  // Number of outputs from base and final result.
+  for (int t = 0; t < width; ++t) {
+    const float *delta_line = fwd_deltas.f_[t];
+    const float *base_line = base_output.f_[t];
+    float *comb_line = f_[t];
+    float base_weight = comb_line[no];
+    float boost_weight = 1.0f - base_weight;
+    float max_base_delta = 0.0;
+    for (int i = 0; i < no; ++i) {
+      // What did the combiner actually produce?
+      float output = base_line[i] * base_weight + comb_line[i] * boost_weight;
+      // Reconstruct the target from the delta.
+      float comb_target = delta_line[i] + output;
+      comb_line[i] = comb_target - comb_line[i];
+      float base_delta = std::fabs(comb_target - base_line[i]);
+      if (base_delta > max_base_delta) {
+        max_base_delta = base_delta;
+      }
+    }
+    if (max_base_delta >= 0.5) {
+      // The base network got it wrong. The combiner should output the right
+      // answer and 0 for the base network.
+      comb_line[no] = 0.0 - base_weight;
+    } else {
+      // The base network was right. The combiner should flag that.
+      for (int i = 0; i < no; ++i) {
+        // All other targets are 0.
+        if (comb_line[i] > 0.0) {
+          comb_line[i] -= 1.0;
+        }
+      }
+      comb_line[no] = 1.0 - base_weight;
+    }
+  }
+}
+
+// Copies the array checking that the types match.
+void NetworkIO::CopyAll(const NetworkIO &src) {
+  ASSERT_HOST(src.int_mode_ == int_mode_);
+  f_ = src.f_;
+}
+
+// Checks that both are floats and adds the src array to *this.
+void NetworkIO::AddAllToFloat(const NetworkIO &src) {
+  ASSERT_HOST(!int_mode_);
+  ASSERT_HOST(!src.int_mode_);
+  f_ += src.f_;
+}
+
+// Subtracts the array from a float array. src must also be float.
+void NetworkIO::SubtractAllFromFloat(const NetworkIO &src) {
+  ASSERT_HOST(!int_mode_);
+  ASSERT_HOST(!src.int_mode_);
+  f_ -= src.f_;
+}
+
+// Copies src to *this, with maxabs normalization to match scale.
+void NetworkIO::CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale) {
+  ASSERT_HOST(!int_mode_);
+  ASSERT_HOST(!src.int_mode_);
+  ASSERT_HOST(!scale.int_mode_);
+  float src_max = src.f_.MaxAbs();
+  ASSERT_HOST(std::isfinite(src_max));
+  float scale_max = scale.f_.MaxAbs();
+  ASSERT_HOST(std::isfinite(scale_max));
+  if (src_max > 0.0f) {
+    float factor = scale_max / src_max;
+    for (int t = 0; t < src.Width(); ++t) {
+      const float *src_ptr = src.f_[t];
+      float *dest_ptr = f_[t];
+      for (int i = 0; i < src.f_.dim2(); ++i) {
+        dest_ptr[i] = src_ptr[i] * factor;
+      }
+    }
+  } else {
+    f_.Clear();
+  }
+}
+
+// Copies src to *this with independent reversal of the y dimension.
+void NetworkIO::CopyWithYReversal(const NetworkIO &src) {
+  int num_features = src.NumFeatures();
+  Resize(src, num_features);
+  StrideMap::Index b_index(src.stride_map_);
+  do {
+    int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
+    StrideMap::Index fwd_index(b_index);
+    StrideMap::Index rev_index(b_index);
+    rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT);
+    do {
+      int fwd_t = fwd_index.t();
+      int rev_t = rev_index.t();
+      for (int x = 0; x < width; ++x) {
+        CopyTimeStepFrom(rev_t++, src, fwd_t++);
+      }
+    } while (fwd_index.AddOffset(1, FD_HEIGHT) && rev_index.AddOffset(-1, FD_HEIGHT));
+  } while (b_index.AddOffset(1, FD_BATCH));
+}
+
+// Copies src to *this with independent reversal of the x dimension.
+void NetworkIO::CopyWithXReversal(const NetworkIO &src) {
+  int num_features = src.NumFeatures();
+  Resize(src, num_features);
+  StrideMap::Index b_index(src.stride_map_);
+  do {
+    StrideMap::Index y_index(b_index);
+    do {
+      StrideMap::Index fwd_index(y_index);
+      StrideMap::Index rev_index(y_index);
+      rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH);
+      do {
+        CopyTimeStepFrom(rev_index.t(), src, fwd_index.t());
+      } while (fwd_index.AddOffset(1, FD_WIDTH) && rev_index.AddOffset(-1, FD_WIDTH));
+    } while (y_index.AddOffset(1, FD_HEIGHT));
+  } while (b_index.AddOffset(1, FD_BATCH));
+}
+
+// Copies src to *this with independent transpose of the x and y dimensions.
+void NetworkIO::CopyWithXYTranspose(const NetworkIO &src) {
+  int num_features = src.NumFeatures();
+  stride_map_ = src.stride_map_;
+  stride_map_.TransposeXY();
+  ResizeToMap(src.int_mode(), stride_map_, num_features);
+  StrideMap::Index src_b_index(src.stride_map_);
+  StrideMap::Index dest_b_index(stride_map_);
+  do {
+    StrideMap::Index src_y_index(src_b_index);
+    StrideMap::Index dest_x_index(dest_b_index);
+    do {
+      StrideMap::Index src_x_index(src_y_index);
+      StrideMap::Index dest_y_index(dest_x_index);
+      do {
+        CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t());
+      } while (src_x_index.AddOffset(1, FD_WIDTH) && dest_y_index.AddOffset(1, FD_HEIGHT));
+    } while (src_y_index.AddOffset(1, FD_HEIGHT) && dest_x_index.AddOffset(1, FD_WIDTH));
+  } while (src_b_index.AddOffset(1, FD_BATCH) && dest_b_index.AddOffset(1, FD_BATCH));
+}
+
+// Copies src to *this, at the given feature_offset, returning the total
+// feature offset after the copy. Multiple calls will stack outputs from
+// multiple sources in feature space.
+int NetworkIO::CopyPacking(const NetworkIO &src, int feature_offset) {
+  ASSERT_HOST(int_mode_ == src.int_mode_);
+  int width = src.Width();
+  ASSERT_HOST(width <= Width());
+  int num_features = src.NumFeatures();
+  ASSERT_HOST(num_features + feature_offset <= NumFeatures());
+  if (int_mode_) {
+    for (int t = 0; t < width; ++t) {
+      memcpy(i_[t] + feature_offset, src.i_[t], num_features * sizeof(i_[t][0]));
+    }
+    for (int t = width; t < i_.dim1(); ++t) {
+      memset(i_[t], 0, num_features * sizeof(i_[t][0]));
+    }
+  } else {
+    for (int t = 0; t < width; ++t) {
+      memcpy(f_[t] + feature_offset, src.f_[t], num_features * sizeof(f_[t][0]));
+    }
+    for (int t = width; t < f_.dim1(); ++t) {
+      memset(f_[t], 0, num_features * sizeof(f_[t][0]));
+    }
+  }
+  return num_features + feature_offset;
+}
+
+// Opposite of CopyPacking, fills *this with a part of src, starting at
+// feature_offset, and picking num_features.
+void NetworkIO::CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features) {
+  Resize(src, num_features);
+  int width = src.Width();
+  ASSERT_HOST(num_features + feature_offset <= src.NumFeatures());
+  if (int_mode_) {
+    for (int t = 0; t < width; ++t) {
+      memcpy(i_[t], src.i_[t] + feature_offset, num_features * sizeof(i_[t][0]));
+    }
+  } else {
+    for (int t = 0; t < width; ++t) {
+      memcpy(f_[t], src.f_[t] + feature_offset, num_features * sizeof(f_[t][0]));
+    }
+  }
+}
+
+// Transposes the float part of *this into dest.
+void NetworkIO::Transpose(TransposedArray *dest) const {
+  int width = Width();
+  dest->ResizeNoInit(NumFeatures(), width);
+  for (int t = 0; t < width; ++t) {
+    dest->WriteStrided(t, f_[t]);
+  }
+}
+
+// Clips the content of a single time-step to +/-range.
+void NetworkIO::ClipVector(int t, float range) {
+  ASSERT_HOST(!int_mode_);
+  float *v = f_[t];
+  int dim = f_.dim2();
+  for (int i = 0; i < dim; ++i) {
+    v[i] = ClipToRange<float>(v[i], -range, range);
+  }
+}
+
+// Returns the padding required for the given number of features in order
+// for the SIMD operations to be safe.
+/* static */
+int NetworkIO::GetPadding(int num_features) {
+  int padding = 0;
+  if (IntSimdMatrix::intSimdMatrix) {
+    padding = IntSimdMatrix::intSimdMatrix->RoundInputs(num_features) - num_features;
+  }
+  return padding;
+}
+
+} // namespace tesseract.
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children