Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccutil/indexmapbidi.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: indexmapbidi.cpp | |
| 3 // Description: Bi-directional mapping between a sparse and compact space. | |
| 4 // Author: rays@google.com (Ray Smith) | |
| 5 // | |
| 6 // (C) Copyright 2010, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #include "helpers.h" | |
| 20 #include "indexmapbidi.h" | |
| 21 #include "serialis.h" | |
| 22 | |
| 23 namespace tesseract { | |
| 24 | |
| 25 // Destructor. | |
| 26 // It is defined here, so the compiler can create a single vtable | |
| 27 // instead of weak vtables in every compilation unit. | |
| 28 IndexMap::~IndexMap() = default; | |
| 29 | |
| 30 // SparseToCompact takes a sparse index to an index in the compact space. | |
| 31 // Uses a binary search to find the result. For faster speed use | |
| 32 // IndexMapBiDi, but that takes more memory. | |
| 33 int IndexMap::SparseToCompact(int sparse_index) const { | |
| 34 auto pos = std::upper_bound(compact_map_.begin(), compact_map_.end(), sparse_index); | |
| 35 if (pos > compact_map_.begin()) { | |
| 36 --pos; | |
| 37 } | |
| 38 auto result = pos - compact_map_.begin(); | |
| 39 return compact_map_[result] == sparse_index ? result : -1; | |
| 40 } | |
| 41 | |
| 42 // Copy from the input. | |
| 43 void IndexMap::CopyFrom(const IndexMap &src) { | |
| 44 sparse_size_ = src.sparse_size_; | |
| 45 compact_map_ = src.compact_map_; | |
| 46 } | |
| 47 void IndexMap::CopyFrom(const IndexMapBiDi &src) { | |
| 48 sparse_size_ = src.SparseSize(); | |
| 49 compact_map_ = src.compact_map_; | |
| 50 } | |
| 51 | |
| 52 // Writes to the given file. Returns false in case of error. | |
| 53 bool IndexMap::Serialize(FILE *fp) const { | |
| 54 return tesseract::Serialize(fp, &sparse_size_) && tesseract::Serialize(fp, compact_map_); | |
| 55 } | |
| 56 | |
| 57 // Reads from the given file. Returns false in case of error. | |
| 58 // If swap is true, assumes a big/little-endian swap is needed. | |
| 59 bool IndexMap::DeSerialize(bool swap, FILE *fp) { | |
| 60 uint32_t sparse_size; | |
| 61 if (!tesseract::DeSerialize(fp, &sparse_size)) { | |
| 62 return false; | |
| 63 } | |
| 64 if (swap) { | |
| 65 ReverseN(&sparse_size, sizeof(sparse_size)); | |
| 66 } | |
| 67 // Arbitrarily limit the number of elements to protect against bad data. | |
| 68 if (sparse_size > UINT16_MAX) { | |
| 69 return false; | |
| 70 } | |
| 71 sparse_size_ = sparse_size; | |
| 72 return tesseract::DeSerialize(swap, fp, compact_map_); | |
| 73 } | |
| 74 | |
| 75 // Destructor. | |
| 76 // It is defined here, so the compiler can create a single vtable | |
| 77 // instead of weak vtables in every compilation unit. | |
| 78 IndexMapBiDi::~IndexMapBiDi() = default; | |
| 79 | |
| 80 // Top-level init function in a single call to initialize a map to select | |
| 81 // a single contiguous subrange [start, end) of the sparse space to be mapped | |
| 82 // 1 to 1 to the compact space, with all other elements of the sparse space | |
| 83 // left unmapped. | |
| 84 // No need to call Setup after this. | |
| 85 void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) { | |
| 86 Init(sparse_size, false); | |
| 87 for (int i = start; i < end; ++i) { | |
| 88 SetMap(i, true); | |
| 89 } | |
| 90 Setup(); | |
| 91 } | |
| 92 | |
| 93 // Initializes just the sparse_map_ to the given size with either all | |
| 94 // forward indices mapped (all_mapped = true) or none (all_mapped = false). | |
| 95 // Call Setup immediately after, or make calls to SetMap first to adjust the | |
| 96 // mapping and then call Setup before using the map. | |
| 97 void IndexMapBiDi::Init(int size, bool all_mapped) { | |
| 98 if (!all_mapped) { | |
| 99 sparse_map_.clear(); | |
| 100 } | |
| 101 sparse_map_.resize(size, -1); | |
| 102 if (all_mapped) { | |
| 103 for (int i = 0; i < size; ++i) { | |
| 104 sparse_map_[i] = i; | |
| 105 } | |
| 106 } | |
| 107 } | |
| 108 | |
| 109 // Sets a given index in the sparse_map_ to be mapped or not. | |
| 110 void IndexMapBiDi::SetMap(int sparse_index, bool mapped) { | |
| 111 sparse_map_[sparse_index] = mapped ? 0 : -1; | |
| 112 } | |
| 113 | |
| 114 // Sets up the sparse_map_ and compact_map_ properly after Init and | |
| 115 // some calls to SetMap. Assumes an ordered 1-1 map from set indices | |
| 116 // in the forward map to the compact space. | |
| 117 void IndexMapBiDi::Setup() { | |
| 118 int compact_size = 0; | |
| 119 for (int &i : sparse_map_) { | |
| 120 if (i >= 0) { | |
| 121 i = compact_size++; | |
| 122 } | |
| 123 } | |
| 124 compact_map_.clear(); | |
| 125 compact_map_.resize(compact_size, -1); | |
| 126 for (size_t i = 0; i < sparse_map_.size(); ++i) { | |
| 127 if (sparse_map_[i] >= 0) { | |
| 128 compact_map_[sparse_map_[i]] = i; | |
| 129 } | |
| 130 } | |
| 131 sparse_size_ = sparse_map_.size(); | |
| 132 } | |
| 133 | |
| 134 // Copy from the input. | |
| 135 void IndexMapBiDi::CopyFrom(const IndexMapBiDi &src) { | |
| 136 sparse_map_ = src.sparse_map_; | |
| 137 compact_map_ = src.compact_map_; | |
| 138 sparse_size_ = sparse_map_.size(); | |
| 139 } | |
| 140 | |
| 141 // Merges the two compact space indices. May be called many times, but | |
| 142 // the merges must be concluded by a call to CompleteMerges. | |
| 143 // Returns true if a merge was actually performed. | |
| 144 bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) { | |
| 145 // Find the current master index for index1 and index2. | |
| 146 compact_index1 = MasterCompactIndex(compact_index1); | |
| 147 compact_index2 = MasterCompactIndex(compact_index2); | |
| 148 // Be sure that index1 < index2. | |
| 149 if (compact_index1 > compact_index2) { | |
| 150 int tmp = compact_index1; | |
| 151 compact_index1 = compact_index2; | |
| 152 compact_index2 = tmp; | |
| 153 } else if (compact_index1 == compact_index2) { | |
| 154 return false; | |
| 155 } | |
| 156 // To save iterating over all sparse_map_ entries, simply make the master | |
| 157 // entry for index2 point to index1. | |
| 158 // This leaves behind a potential chain of parents that needs to be chased, | |
| 159 // as above. | |
| 160 sparse_map_[compact_map_[compact_index2]] = compact_index1; | |
| 161 if (compact_index1 >= 0) { | |
| 162 compact_map_[compact_index2] = compact_map_[compact_index1]; | |
| 163 } | |
| 164 return true; | |
| 165 } | |
| 166 | |
| 167 // Completes one or more Merge operations by further compacting the | |
| 168 // compact space. Unused compact space indices are removed, and the used | |
| 169 // ones above shuffled down to fill the gaps. | |
| 170 // Example: | |
| 171 // Input sparse_map_: (x indicates -1) | |
| 172 // x x 0 x 2 x x 4 x 0 x 2 x | |
| 173 // Output sparse_map_: | |
| 174 // x x 0 x 1 x x 2 x 0 x 1 x | |
| 175 // Output compact_map_: | |
| 176 // 2 4 7. | |
| 177 void IndexMapBiDi::CompleteMerges() { | |
| 178 // Ensure each sparse_map_entry contains a master compact_map_ index. | |
| 179 int compact_size = 0; | |
| 180 for (int &i : sparse_map_) { | |
| 181 int compact_index = MasterCompactIndex(i); | |
| 182 i = compact_index; | |
| 183 if (compact_index >= compact_size) { | |
| 184 compact_size = compact_index + 1; | |
| 185 } | |
| 186 } | |
| 187 // Re-generate the compact_map leaving holes for unused indices. | |
| 188 compact_map_.clear(); | |
| 189 compact_map_.resize(compact_size, -1); | |
| 190 for (size_t i = 0; i < sparse_map_.size(); ++i) { | |
| 191 if (sparse_map_[i] >= 0) { | |
| 192 if (compact_map_[sparse_map_[i]] == -1) { | |
| 193 compact_map_[sparse_map_[i]] = i; | |
| 194 } | |
| 195 } | |
| 196 } | |
| 197 // Compact the compact_map, leaving tmp_compact_map saying where each | |
| 198 // index went to in the compacted map. | |
| 199 std::vector<int32_t> tmp_compact_map(compact_size, -1); | |
| 200 compact_size = 0; | |
| 201 for (size_t i = 0; i < compact_map_.size(); ++i) { | |
| 202 if (compact_map_[i] >= 0) { | |
| 203 tmp_compact_map[i] = compact_size; | |
| 204 compact_map_[compact_size++] = compact_map_[i]; | |
| 205 } | |
| 206 } | |
| 207 compact_map_.resize(compact_size); | |
| 208 // Now modify the entries in the sparse map to point to the new locations. | |
| 209 for (int &i : sparse_map_) { | |
| 210 if (i >= 0) { | |
| 211 i = tmp_compact_map[i]; | |
| 212 } | |
| 213 } | |
| 214 } | |
| 215 | |
| 216 // Writes to the given file. Returns false in case of error. | |
| 217 bool IndexMapBiDi::Serialize(FILE *fp) const { | |
| 218 if (!IndexMap::Serialize(fp)) { | |
| 219 return false; | |
| 220 } | |
| 221 // Make a vector containing the rest of the map. If the map is many-to-one | |
| 222 // then each additional sparse entry needs to be stored. | |
| 223 // Normally we store only the compact map to save space. | |
| 224 std::vector<int32_t> remaining_pairs; | |
| 225 for (unsigned i = 0; i < sparse_map_.size(); ++i) { | |
| 226 if (sparse_map_[i] >= 0 && static_cast<unsigned>(compact_map_[sparse_map_[i]]) != i) { | |
| 227 remaining_pairs.push_back(i); | |
| 228 remaining_pairs.push_back(sparse_map_[i]); | |
| 229 } | |
| 230 } | |
| 231 return tesseract::Serialize(fp, remaining_pairs); | |
| 232 } | |
| 233 | |
| 234 // Reads from the given file. Returns false in case of error. | |
| 235 // If swap is true, assumes a big/little-endian swap is needed. | |
| 236 bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) { | |
| 237 if (!IndexMap::DeSerialize(swap, fp)) { | |
| 238 return false; | |
| 239 } | |
| 240 std::vector<int32_t> remaining_pairs; | |
| 241 if (!tesseract::DeSerialize(swap, fp, remaining_pairs)) { | |
| 242 return false; | |
| 243 } | |
| 244 sparse_map_.clear(); | |
| 245 sparse_map_.resize(sparse_size_, -1); | |
| 246 for (unsigned i = 0; i < compact_map_.size(); ++i) { | |
| 247 sparse_map_[compact_map_[i]] = i; | |
| 248 } | |
| 249 for (size_t i = 0; i < remaining_pairs.size(); ++i) { | |
| 250 int sparse_index = remaining_pairs[i++]; | |
| 251 sparse_map_[sparse_index] = remaining_pairs[i]; | |
| 252 } | |
| 253 return true; | |
| 254 } | |
| 255 | |
| 256 // Bulk calls to SparseToCompact. | |
| 257 // Maps the given array of sparse indices to an array of compact indices. | |
| 258 // Assumes the input is sorted. The output indices are sorted and uniqued. | |
| 259 // Return value is the number of "missed" features, being features that | |
| 260 // don't map to the compact feature space. | |
| 261 int IndexMapBiDi::MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const { | |
| 262 compact->clear(); | |
| 263 int num_features = sparse.size(); | |
| 264 int missed_features = 0; | |
| 265 int prev_good_feature = -1; | |
| 266 for (int f = 0; f < num_features; ++f) { | |
| 267 int feature = sparse_map_[sparse[f]]; | |
| 268 if (feature >= 0) { | |
| 269 if (feature != prev_good_feature) { | |
| 270 compact->push_back(feature); | |
| 271 prev_good_feature = feature; | |
| 272 } | |
| 273 } else { | |
| 274 ++missed_features; | |
| 275 } | |
| 276 } | |
| 277 return missed_features; | |
| 278 } | |
| 279 | |
| 280 } // namespace tesseract. |
