Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccutil/indexmapbidi.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccutil/indexmapbidi.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,280 @@ +/////////////////////////////////////////////////////////////////////// +// File: indexmapbidi.cpp +// Description: Bi-directional mapping between a sparse and compact space. +// Author: rays@google.com (Ray Smith) +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "helpers.h" +#include "indexmapbidi.h" +#include "serialis.h" + +namespace tesseract { + +// Destructor. +// It is defined here, so the compiler can create a single vtable +// instead of weak vtables in every compilation unit. +IndexMap::~IndexMap() = default; + +// SparseToCompact takes a sparse index to an index in the compact space. +// Uses a binary search to find the result. For faster speed use +// IndexMapBiDi, but that takes more memory. +int IndexMap::SparseToCompact(int sparse_index) const { + auto pos = std::upper_bound(compact_map_.begin(), compact_map_.end(), sparse_index); + if (pos > compact_map_.begin()) { + --pos; + } + auto result = pos - compact_map_.begin(); + return compact_map_[result] == sparse_index ? result : -1; +} + +// Copy from the input. +void IndexMap::CopyFrom(const IndexMap &src) { + sparse_size_ = src.sparse_size_; + compact_map_ = src.compact_map_; +} +void IndexMap::CopyFrom(const IndexMapBiDi &src) { + sparse_size_ = src.SparseSize(); + compact_map_ = src.compact_map_; +} + +// Writes to the given file. Returns false in case of error. +bool IndexMap::Serialize(FILE *fp) const { + return tesseract::Serialize(fp, &sparse_size_) && tesseract::Serialize(fp, compact_map_); +} + +// Reads from the given file. Returns false in case of error. +// If swap is true, assumes a big/little-endian swap is needed. +bool IndexMap::DeSerialize(bool swap, FILE *fp) { + uint32_t sparse_size; + if (!tesseract::DeSerialize(fp, &sparse_size)) { + return false; + } + if (swap) { + ReverseN(&sparse_size, sizeof(sparse_size)); + } + // Arbitrarily limit the number of elements to protect against bad data. + if (sparse_size > UINT16_MAX) { + return false; + } + sparse_size_ = sparse_size; + return tesseract::DeSerialize(swap, fp, compact_map_); +} + +// Destructor. +// It is defined here, so the compiler can create a single vtable +// instead of weak vtables in every compilation unit. +IndexMapBiDi::~IndexMapBiDi() = default; + +// Top-level init function in a single call to initialize a map to select +// a single contiguous subrange [start, end) of the sparse space to be mapped +// 1 to 1 to the compact space, with all other elements of the sparse space +// left unmapped. +// No need to call Setup after this. +void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) { + Init(sparse_size, false); + for (int i = start; i < end; ++i) { + SetMap(i, true); + } + Setup(); +} + +// Initializes just the sparse_map_ to the given size with either all +// forward indices mapped (all_mapped = true) or none (all_mapped = false). +// Call Setup immediately after, or make calls to SetMap first to adjust the +// mapping and then call Setup before using the map. +void IndexMapBiDi::Init(int size, bool all_mapped) { + if (!all_mapped) { + sparse_map_.clear(); + } + sparse_map_.resize(size, -1); + if (all_mapped) { + for (int i = 0; i < size; ++i) { + sparse_map_[i] = i; + } + } +} + +// Sets a given index in the sparse_map_ to be mapped or not. +void IndexMapBiDi::SetMap(int sparse_index, bool mapped) { + sparse_map_[sparse_index] = mapped ? 0 : -1; +} + +// Sets up the sparse_map_ and compact_map_ properly after Init and +// some calls to SetMap. Assumes an ordered 1-1 map from set indices +// in the forward map to the compact space. +void IndexMapBiDi::Setup() { + int compact_size = 0; + for (int &i : sparse_map_) { + if (i >= 0) { + i = compact_size++; + } + } + compact_map_.clear(); + compact_map_.resize(compact_size, -1); + for (size_t i = 0; i < sparse_map_.size(); ++i) { + if (sparse_map_[i] >= 0) { + compact_map_[sparse_map_[i]] = i; + } + } + sparse_size_ = sparse_map_.size(); +} + +// Copy from the input. +void IndexMapBiDi::CopyFrom(const IndexMapBiDi &src) { + sparse_map_ = src.sparse_map_; + compact_map_ = src.compact_map_; + sparse_size_ = sparse_map_.size(); +} + +// Merges the two compact space indices. May be called many times, but +// the merges must be concluded by a call to CompleteMerges. +// Returns true if a merge was actually performed. +bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) { + // Find the current master index for index1 and index2. + compact_index1 = MasterCompactIndex(compact_index1); + compact_index2 = MasterCompactIndex(compact_index2); + // Be sure that index1 < index2. + if (compact_index1 > compact_index2) { + int tmp = compact_index1; + compact_index1 = compact_index2; + compact_index2 = tmp; + } else if (compact_index1 == compact_index2) { + return false; + } + // To save iterating over all sparse_map_ entries, simply make the master + // entry for index2 point to index1. + // This leaves behind a potential chain of parents that needs to be chased, + // as above. + sparse_map_[compact_map_[compact_index2]] = compact_index1; + if (compact_index1 >= 0) { + compact_map_[compact_index2] = compact_map_[compact_index1]; + } + return true; +} + +// Completes one or more Merge operations by further compacting the +// compact space. Unused compact space indices are removed, and the used +// ones above shuffled down to fill the gaps. +// Example: +// Input sparse_map_: (x indicates -1) +// x x 0 x 2 x x 4 x 0 x 2 x +// Output sparse_map_: +// x x 0 x 1 x x 2 x 0 x 1 x +// Output compact_map_: +// 2 4 7. +void IndexMapBiDi::CompleteMerges() { + // Ensure each sparse_map_entry contains a master compact_map_ index. + int compact_size = 0; + for (int &i : sparse_map_) { + int compact_index = MasterCompactIndex(i); + i = compact_index; + if (compact_index >= compact_size) { + compact_size = compact_index + 1; + } + } + // Re-generate the compact_map leaving holes for unused indices. + compact_map_.clear(); + compact_map_.resize(compact_size, -1); + for (size_t i = 0; i < sparse_map_.size(); ++i) { + if (sparse_map_[i] >= 0) { + if (compact_map_[sparse_map_[i]] == -1) { + compact_map_[sparse_map_[i]] = i; + } + } + } + // Compact the compact_map, leaving tmp_compact_map saying where each + // index went to in the compacted map. + std::vector<int32_t> tmp_compact_map(compact_size, -1); + compact_size = 0; + for (size_t i = 0; i < compact_map_.size(); ++i) { + if (compact_map_[i] >= 0) { + tmp_compact_map[i] = compact_size; + compact_map_[compact_size++] = compact_map_[i]; + } + } + compact_map_.resize(compact_size); + // Now modify the entries in the sparse map to point to the new locations. + for (int &i : sparse_map_) { + if (i >= 0) { + i = tmp_compact_map[i]; + } + } +} + +// Writes to the given file. Returns false in case of error. +bool IndexMapBiDi::Serialize(FILE *fp) const { + if (!IndexMap::Serialize(fp)) { + return false; + } + // Make a vector containing the rest of the map. If the map is many-to-one + // then each additional sparse entry needs to be stored. + // Normally we store only the compact map to save space. + std::vector<int32_t> remaining_pairs; + for (unsigned i = 0; i < sparse_map_.size(); ++i) { + if (sparse_map_[i] >= 0 && static_cast<unsigned>(compact_map_[sparse_map_[i]]) != i) { + remaining_pairs.push_back(i); + remaining_pairs.push_back(sparse_map_[i]); + } + } + return tesseract::Serialize(fp, remaining_pairs); +} + +// Reads from the given file. Returns false in case of error. +// If swap is true, assumes a big/little-endian swap is needed. +bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) { + if (!IndexMap::DeSerialize(swap, fp)) { + return false; + } + std::vector<int32_t> remaining_pairs; + if (!tesseract::DeSerialize(swap, fp, remaining_pairs)) { + return false; + } + sparse_map_.clear(); + sparse_map_.resize(sparse_size_, -1); + for (unsigned i = 0; i < compact_map_.size(); ++i) { + sparse_map_[compact_map_[i]] = i; + } + for (size_t i = 0; i < remaining_pairs.size(); ++i) { + int sparse_index = remaining_pairs[i++]; + sparse_map_[sparse_index] = remaining_pairs[i]; + } + return true; +} + +// Bulk calls to SparseToCompact. +// Maps the given array of sparse indices to an array of compact indices. +// Assumes the input is sorted. The output indices are sorted and uniqued. +// Return value is the number of "missed" features, being features that +// don't map to the compact feature space. +int IndexMapBiDi::MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const { + compact->clear(); + int num_features = sparse.size(); + int missed_features = 0; + int prev_good_feature = -1; + for (int f = 0; f < num_features; ++f) { + int feature = sparse_map_[sparse[f]]; + if (feature >= 0) { + if (feature != prev_good_feature) { + compact->push_back(feature); + prev_good_feature = feature; + } + } else { + ++missed_features; + } + } + return missed_features; +} + +} // namespace tesseract.
