view mupdf-source/thirdparty/tesseract/src/lstm/stridemap.h @ 46:7ee69f120f19 default tip

>>>>> tag v1.26.5+1 for changeset b74429b0f5c4
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 17:17:30 +0200
parents b50eed0cc0ef
children
line wrap: on
line source

///////////////////////////////////////////////////////////////////////
// File:        stridemap.h
// Description: Indexing into a 4-d tensor held in a 2-d Array.
// Author:      Ray Smith
//
// (C) Copyright 2016, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_LSTM_STRIDEMAP_H_
#define TESSERACT_LSTM_STRIDEMAP_H_

#include <cstring>
#include <vector>

namespace tesseract {

// Enum describing the dimensions of the 'Tensor' in a NetworkIO.
// A NetworkIO is analogous to a TF Tensor, except that the number of dimensions
// is fixed (4), and they always have the same meaning. The underlying
// representation is a 2-D array, for which the product batch*height*width
// is always dim1 and depth is always dim2. FlexDimensions is used only for
// batch, height, width with the StrideMap, and therefore represents the runtime
// shape. The build-time shape is defined by StaticShape.
enum FlexDimensions {
  FD_BATCH,   // Index of multiple images.
  FD_HEIGHT,  // y-coordinate in image.
  FD_WIDTH,   // x-coordinate in image.
  FD_DIMSIZE, // Number of flexible non-depth dimensions.
};

// Encapsulation of information relating to the mapping from [batch][y][x] to
// the first index into the 2-d array underlying a NetworkIO.
class StrideMap {
public:
  // Class holding the non-depth indices.
  class Index {
  public:
    explicit Index(const StrideMap &stride_map) : stride_map_(&stride_map) {
      InitToFirst();
    }
    Index(const StrideMap &stride_map, int batch, int y, int x) : stride_map_(&stride_map) {
      indices_[FD_BATCH] = batch;
      indices_[FD_HEIGHT] = y;
      indices_[FD_WIDTH] = x;
      SetTFromIndices();
    }
    // Accesses the index to the underlying array.
    int t() const {
      return t_;
    }
    int index(FlexDimensions dimension) const {
      return indices_[dimension];
    }
    // Initializes the indices to the first valid location.
    void InitToFirst() {
      memset(indices_, 0, sizeof(indices_));
      t_ = 0;
    }
    // Initializes the indices to the last valid location.
    void InitToLast() {
      InitToLastOfBatch(MaxIndexOfDim(FD_BATCH));
    }
    // Returns true if *this is a valid index.
    bool IsValid() const;
    // Returns true if the index of the given dimension is the last.
    bool IsLast(FlexDimensions dimension) const;
    // Given that the dimensions up to and including dim-1 are valid, returns
    // the maximum index for dimension dim.
    int MaxIndexOfDim(FlexDimensions dim) const;
    // Adds the given offset to the given dimension. Returns true if the result
    // makes a valid index.
    bool AddOffset(int offset, FlexDimensions dimension);
    // Increments the index in some encapsulated way that guarantees to remain
    // valid until it returns false, meaning that the iteration is complete.
    bool Increment();
    // Decrements the index in some encapsulated way that guarantees to remain
    // valid until it returns false, meaning that the iteration (that started
    // with InitToLast()) is complete.
    bool Decrement();

  private:
    // Initializes the indices to the last valid location in the given batch
    // index.
    void InitToLastOfBatch(int batch);
    // Computes and sets t_ from the current indices_.
    void SetTFromIndices();

    // Map into which *this is an index.
    const StrideMap *stride_map_;
    // Index to the first dimension of the underlying array.
    int t_;
    // Indices into the individual dimensions.
    int indices_[FD_DIMSIZE];
  };

  StrideMap() {
    memset(shape_, 0, sizeof(shape_));
    memset(t_increments_, 0, sizeof(t_increments_));
  }
  // Default copy constructor and operator= are OK to use here!

  // Sets up the stride for the given array of height, width pairs.
  void SetStride(const std::vector<std::pair<int, int>> &h_w_pairs);
  // Scales width and height dimensions by the given factors.
  void ScaleXY(int x_factor, int y_factor);
  // Reduces width to 1, across the batch, whatever the input size.
  void ReduceWidthTo1();
  // Transposes the width and height dimensions.
  void TransposeXY();
  // Returns the size of the given dimension.
  int Size(FlexDimensions dimension) const {
    return shape_[dimension];
  }
  // Returns the total width required.
  int Width() const {
    return t_increments_[FD_BATCH] * shape_[FD_BATCH];
  }

private:
  // Computes t_increments_ from shape_.
  void ComputeTIncrements();

  // The size of each non-depth dimension.
  int shape_[FD_DIMSIZE];
  // Precomputed 't' increments for each dimension. This is the value of
  // the given dimension in the packed 3-d array that the shape_ represents.
  int t_increments_[FD_DIMSIZE];
  // Vector of size shape_[FD_BATCH] holds the height of each image in a batch.
  std::vector<int> heights_;
  // Vector of size shape_[FD_BATCH] holds the width of each image in a batch.
  std::vector<int> widths_;
};

} // namespace tesseract

#endif // TESSERACT_LSTM_STRIDEMAP_H_