comparison mupdf-source/thirdparty/tesseract/src/ccutil/indexmapbidi.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: indexmapbidi.cpp
3 // Description: Bi-directional mapping between a sparse and compact space.
4 // Author: rays@google.com (Ray Smith)
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #include "helpers.h"
20 #include "indexmapbidi.h"
21 #include "serialis.h"
22
23 namespace tesseract {
24
25 // Destructor.
26 // It is defined here, so the compiler can create a single vtable
27 // instead of weak vtables in every compilation unit.
28 IndexMap::~IndexMap() = default;
29
30 // SparseToCompact takes a sparse index to an index in the compact space.
31 // Uses a binary search to find the result. For faster speed use
32 // IndexMapBiDi, but that takes more memory.
33 int IndexMap::SparseToCompact(int sparse_index) const {
34 auto pos = std::upper_bound(compact_map_.begin(), compact_map_.end(), sparse_index);
35 if (pos > compact_map_.begin()) {
36 --pos;
37 }
38 auto result = pos - compact_map_.begin();
39 return compact_map_[result] == sparse_index ? result : -1;
40 }
41
42 // Copy from the input.
43 void IndexMap::CopyFrom(const IndexMap &src) {
44 sparse_size_ = src.sparse_size_;
45 compact_map_ = src.compact_map_;
46 }
47 void IndexMap::CopyFrom(const IndexMapBiDi &src) {
48 sparse_size_ = src.SparseSize();
49 compact_map_ = src.compact_map_;
50 }
51
52 // Writes to the given file. Returns false in case of error.
53 bool IndexMap::Serialize(FILE *fp) const {
54 return tesseract::Serialize(fp, &sparse_size_) && tesseract::Serialize(fp, compact_map_);
55 }
56
57 // Reads from the given file. Returns false in case of error.
58 // If swap is true, assumes a big/little-endian swap is needed.
59 bool IndexMap::DeSerialize(bool swap, FILE *fp) {
60 uint32_t sparse_size;
61 if (!tesseract::DeSerialize(fp, &sparse_size)) {
62 return false;
63 }
64 if (swap) {
65 ReverseN(&sparse_size, sizeof(sparse_size));
66 }
67 // Arbitrarily limit the number of elements to protect against bad data.
68 if (sparse_size > UINT16_MAX) {
69 return false;
70 }
71 sparse_size_ = sparse_size;
72 return tesseract::DeSerialize(swap, fp, compact_map_);
73 }
74
75 // Destructor.
76 // It is defined here, so the compiler can create a single vtable
77 // instead of weak vtables in every compilation unit.
78 IndexMapBiDi::~IndexMapBiDi() = default;
79
80 // Top-level init function in a single call to initialize a map to select
81 // a single contiguous subrange [start, end) of the sparse space to be mapped
82 // 1 to 1 to the compact space, with all other elements of the sparse space
83 // left unmapped.
84 // No need to call Setup after this.
85 void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
86 Init(sparse_size, false);
87 for (int i = start; i < end; ++i) {
88 SetMap(i, true);
89 }
90 Setup();
91 }
92
93 // Initializes just the sparse_map_ to the given size with either all
94 // forward indices mapped (all_mapped = true) or none (all_mapped = false).
95 // Call Setup immediately after, or make calls to SetMap first to adjust the
96 // mapping and then call Setup before using the map.
97 void IndexMapBiDi::Init(int size, bool all_mapped) {
98 if (!all_mapped) {
99 sparse_map_.clear();
100 }
101 sparse_map_.resize(size, -1);
102 if (all_mapped) {
103 for (int i = 0; i < size; ++i) {
104 sparse_map_[i] = i;
105 }
106 }
107 }
108
109 // Sets a given index in the sparse_map_ to be mapped or not.
110 void IndexMapBiDi::SetMap(int sparse_index, bool mapped) {
111 sparse_map_[sparse_index] = mapped ? 0 : -1;
112 }
113
114 // Sets up the sparse_map_ and compact_map_ properly after Init and
115 // some calls to SetMap. Assumes an ordered 1-1 map from set indices
116 // in the forward map to the compact space.
117 void IndexMapBiDi::Setup() {
118 int compact_size = 0;
119 for (int &i : sparse_map_) {
120 if (i >= 0) {
121 i = compact_size++;
122 }
123 }
124 compact_map_.clear();
125 compact_map_.resize(compact_size, -1);
126 for (size_t i = 0; i < sparse_map_.size(); ++i) {
127 if (sparse_map_[i] >= 0) {
128 compact_map_[sparse_map_[i]] = i;
129 }
130 }
131 sparse_size_ = sparse_map_.size();
132 }
133
134 // Copy from the input.
135 void IndexMapBiDi::CopyFrom(const IndexMapBiDi &src) {
136 sparse_map_ = src.sparse_map_;
137 compact_map_ = src.compact_map_;
138 sparse_size_ = sparse_map_.size();
139 }
140
141 // Merges the two compact space indices. May be called many times, but
142 // the merges must be concluded by a call to CompleteMerges.
143 // Returns true if a merge was actually performed.
144 bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) {
145 // Find the current master index for index1 and index2.
146 compact_index1 = MasterCompactIndex(compact_index1);
147 compact_index2 = MasterCompactIndex(compact_index2);
148 // Be sure that index1 < index2.
149 if (compact_index1 > compact_index2) {
150 int tmp = compact_index1;
151 compact_index1 = compact_index2;
152 compact_index2 = tmp;
153 } else if (compact_index1 == compact_index2) {
154 return false;
155 }
156 // To save iterating over all sparse_map_ entries, simply make the master
157 // entry for index2 point to index1.
158 // This leaves behind a potential chain of parents that needs to be chased,
159 // as above.
160 sparse_map_[compact_map_[compact_index2]] = compact_index1;
161 if (compact_index1 >= 0) {
162 compact_map_[compact_index2] = compact_map_[compact_index1];
163 }
164 return true;
165 }
166
167 // Completes one or more Merge operations by further compacting the
168 // compact space. Unused compact space indices are removed, and the used
169 // ones above shuffled down to fill the gaps.
170 // Example:
171 // Input sparse_map_: (x indicates -1)
172 // x x 0 x 2 x x 4 x 0 x 2 x
173 // Output sparse_map_:
174 // x x 0 x 1 x x 2 x 0 x 1 x
175 // Output compact_map_:
176 // 2 4 7.
177 void IndexMapBiDi::CompleteMerges() {
178 // Ensure each sparse_map_entry contains a master compact_map_ index.
179 int compact_size = 0;
180 for (int &i : sparse_map_) {
181 int compact_index = MasterCompactIndex(i);
182 i = compact_index;
183 if (compact_index >= compact_size) {
184 compact_size = compact_index + 1;
185 }
186 }
187 // Re-generate the compact_map leaving holes for unused indices.
188 compact_map_.clear();
189 compact_map_.resize(compact_size, -1);
190 for (size_t i = 0; i < sparse_map_.size(); ++i) {
191 if (sparse_map_[i] >= 0) {
192 if (compact_map_[sparse_map_[i]] == -1) {
193 compact_map_[sparse_map_[i]] = i;
194 }
195 }
196 }
197 // Compact the compact_map, leaving tmp_compact_map saying where each
198 // index went to in the compacted map.
199 std::vector<int32_t> tmp_compact_map(compact_size, -1);
200 compact_size = 0;
201 for (size_t i = 0; i < compact_map_.size(); ++i) {
202 if (compact_map_[i] >= 0) {
203 tmp_compact_map[i] = compact_size;
204 compact_map_[compact_size++] = compact_map_[i];
205 }
206 }
207 compact_map_.resize(compact_size);
208 // Now modify the entries in the sparse map to point to the new locations.
209 for (int &i : sparse_map_) {
210 if (i >= 0) {
211 i = tmp_compact_map[i];
212 }
213 }
214 }
215
216 // Writes to the given file. Returns false in case of error.
217 bool IndexMapBiDi::Serialize(FILE *fp) const {
218 if (!IndexMap::Serialize(fp)) {
219 return false;
220 }
221 // Make a vector containing the rest of the map. If the map is many-to-one
222 // then each additional sparse entry needs to be stored.
223 // Normally we store only the compact map to save space.
224 std::vector<int32_t> remaining_pairs;
225 for (unsigned i = 0; i < sparse_map_.size(); ++i) {
226 if (sparse_map_[i] >= 0 && static_cast<unsigned>(compact_map_[sparse_map_[i]]) != i) {
227 remaining_pairs.push_back(i);
228 remaining_pairs.push_back(sparse_map_[i]);
229 }
230 }
231 return tesseract::Serialize(fp, remaining_pairs);
232 }
233
234 // Reads from the given file. Returns false in case of error.
235 // If swap is true, assumes a big/little-endian swap is needed.
236 bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) {
237 if (!IndexMap::DeSerialize(swap, fp)) {
238 return false;
239 }
240 std::vector<int32_t> remaining_pairs;
241 if (!tesseract::DeSerialize(swap, fp, remaining_pairs)) {
242 return false;
243 }
244 sparse_map_.clear();
245 sparse_map_.resize(sparse_size_, -1);
246 for (unsigned i = 0; i < compact_map_.size(); ++i) {
247 sparse_map_[compact_map_[i]] = i;
248 }
249 for (size_t i = 0; i < remaining_pairs.size(); ++i) {
250 int sparse_index = remaining_pairs[i++];
251 sparse_map_[sparse_index] = remaining_pairs[i];
252 }
253 return true;
254 }
255
256 // Bulk calls to SparseToCompact.
257 // Maps the given array of sparse indices to an array of compact indices.
258 // Assumes the input is sorted. The output indices are sorted and uniqued.
259 // Return value is the number of "missed" features, being features that
260 // don't map to the compact feature space.
261 int IndexMapBiDi::MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const {
262 compact->clear();
263 int num_features = sparse.size();
264 int missed_features = 0;
265 int prev_good_feature = -1;
266 for (int f = 0; f < num_features; ++f) {
267 int feature = sparse_map_[sparse[f]];
268 if (feature >= 0) {
269 if (feature != prev_good_feature) {
270 compact->push_back(feature);
271 prev_good_feature = feature;
272 }
273 } else {
274 ++missed_features;
275 }
276 }
277 return missed_features;
278 }
279
280 } // namespace tesseract.