diff mupdf-source/thirdparty/tesseract/src/ccutil/indexmapbidi.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/ccutil/indexmapbidi.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,181 @@
+///////////////////////////////////////////////////////////////////////
+// File:        indexmapbidi.h
+// Description: Bi-directional mapping between a sparse and compact space.
+// Author:      rays@google.com (Ray Smith)
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_
+#define TESSERACT_CCUTIL_INDEXMAPBIDI_H_
+
+#include <tesseract/export.h> // for TESS_API
+
+#include <cstdint>  // for int32_t
+#include <cstdio>
+#include <vector>
+
+namespace tesseract {
+
+class IndexMapBiDi;
+
+// Bidirectional one-to-one mapping between a sparse and a compact discrete
+// space. Many entries in the sparse space are unmapped, but those that are
+// mapped have a 1-1 mapping to (and from) the compact space, where all
+// values are used. This is useful for forming subsets of larger collections,
+// such as subsets of character sets, or subsets of binary feature spaces.
+//
+// This base class provides basic functionality with binary search for the
+// SparseToCompact mapping to save memory.
+// For a faster inverse mapping, or to allow a many-to-one mapping, use
+// IndexMapBiDi below.
+// NOTE: there are currently no methods to setup an IndexMap on its own!
+// It must be initialized by copying from an IndexMapBiDi or by DeSerialize.
+class TESS_API IndexMap {
+public:
+  virtual ~IndexMap();
+
+  // SparseToCompact takes a sparse index to an index in the compact space.
+  // Uses a binary search to find the result. For faster speed use
+  // IndexMapBiDi, but that takes more memory.
+  virtual int SparseToCompact(int sparse_index) const;
+
+  // CompactToSparse takes a compact index to the corresponding index in the
+  // sparse space.
+  int CompactToSparse(int compact_index) const {
+    return compact_map_[compact_index];
+  }
+  // The size of the sparse space.
+  virtual int SparseSize() const {
+    return sparse_size_;
+  }
+  // The size of the compact space.
+  int CompactSize() const {
+    return compact_map_.size();
+  }
+
+  // Copy from the input.
+  void CopyFrom(const IndexMap &src);
+  void CopyFrom(const IndexMapBiDi &src);
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE *fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE *fp);
+
+protected:
+  // The sparse space covers integers in the range [0, sparse_size_-1].
+  int32_t sparse_size_;
+  // The compact space covers integers in the range [0, compact_map_.size()-1].
+  // Each element contains the corresponding sparse index.
+  std::vector<int32_t> compact_map_;
+};
+
+// Bidirectional many-to-one mapping between a sparse and a compact discrete
+// space. As with IndexMap, many entries may be unmapped, but unlike IndexMap,
+// of those that are, many may be mapped to the same compact index.
+// If the map is many-to-one, it is not possible to directly obtain all the
+// sparse indices that map to a single compact index.
+// This map is time- rather than space-efficient. It stores the entire sparse
+// space.
+// IndexMapBiDi may be initialized in one of 3 ways:
+// 1. Init(size, true);
+//    Setup();
+//    Sets a complete 1:1 mapping with no unmapped elements.
+// 2. Init(size, false);
+//    for ... SetMap(index, true);
+//    Setup();
+//    Specifies precisely which sparse indices are mapped. The mapping is 1:1.
+// 3. Either of the above, followed by:
+//    for ... Merge(index1, index2);
+//    CompleteMerges();
+//    Allows a many-to-one mapping by merging compact space indices.
+class TESS_API IndexMapBiDi : public IndexMap {
+public:
+  ~IndexMapBiDi() override;
+
+  // Top-level init function in a single call to initialize a map to select
+  // a single contiguous subrange [start, end) of the sparse space to be mapped
+  // 1 to 1 to the compact space, with all other elements of the sparse space
+  // left unmapped.
+  // No need to call Setup after this.
+  void InitAndSetupRange(int sparse_size, int start, int end);
+
+  // Initializes just the sparse_map_ to the given size with either all
+  // forward indices mapped (all_mapped = true) or none (all_mapped = false).
+  // Call Setup immediately after, or make calls to SetMap first to adjust the
+  // mapping and then call Setup before using the map.
+  void Init(int size, bool all_mapped);
+  // Sets a given index in the sparse_map_ to be mapped or not.
+  void SetMap(int sparse_index, bool mapped);
+  // Sets up the sparse_map_ and compact_map_ properly after Init and
+  // some calls to SetMap. Assumes an ordered 1-1 map from set indices
+  // in the sparse space to the compact space.
+  void Setup();
+
+  // Merges the two compact space indices. May be called many times, but
+  // the merges must be concluded by a call to CompleteMerges.
+  // Returns true if a merge was actually performed.
+  bool Merge(int compact_index1, int compact_index2);
+  // Returns true if the given compact index has been deleted.
+  bool IsCompactDeleted(int index) const {
+    return MasterCompactIndex(index) < 0;
+  }
+  // Completes one or more Merge operations by further compacting the
+  // compact space.
+  void CompleteMerges();
+
+  // SparseToCompact takes a sparse index to an index in the compact space.
+  int SparseToCompact(int sparse_index) const override {
+    return sparse_map_[sparse_index];
+  }
+  // The size of the sparse space.
+  int SparseSize() const override {
+    return sparse_map_.size();
+  }
+
+  // Copy from the input.
+  void CopyFrom(const IndexMapBiDi &src);
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE *fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE *fp);
+
+  // Bulk calls to SparseToCompact.
+  // Maps the given array of sparse indices to an array of compact indices.
+  // Assumes the input is sorted. The output indices are sorted and uniqued.
+  // Return value is the number of "missed" features, being features that
+  // don't map to the compact feature space.
+  int MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const;
+
+private:
+  // Returns the master compact index for a given compact index.
+  // During a multiple merge operation, several compact indices may be
+  // combined, so we need to be able to find the master of all.
+  int MasterCompactIndex(int compact_index) const {
+    while (compact_index >= 0 && sparse_map_[compact_map_[compact_index]] != compact_index) {
+      compact_index = sparse_map_[compact_map_[compact_index]];
+    }
+    return compact_index;
+  }
+
+  // Direct look-up of the compact index for each element in sparse space.
+  std::vector<int32_t> sparse_map_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_