comparison mupdf-source/thirdparty/tesseract/src/classify/kdtree.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 ** Filename: kdtree.cpp
3 ** Purpose: Routines for managing K-D search trees
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18 /*-----------------------------------------------------------------------------
19 Include Files and Type Defines
20 -----------------------------------------------------------------------------*/
21 #include "kdtree.h"
22
23 #include <algorithm>
24 #include <cfloat> // for FLT_MAX
25 #include <cmath>
26 #include <cstdio>
27
28 namespace tesseract {
29
30 #define Magnitude(X) ((X) < 0 ? -(X) : (X))
31 #define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D)))
32
33 /*-----------------------------------------------------------------------------
34 Global Data Definitions and Declarations
35 -----------------------------------------------------------------------------*/
36 #define MINSEARCH (-FLT_MAX)
37 #define MAXSEARCH FLT_MAX
38
39 // Helper function to find the next essential dimension in a cycle.
40 static int NextLevel(KDTREE *tree, int level) {
41 do {
42 ++level;
43 if (level >= tree->KeySize) {
44 level = 0;
45 }
46 } while (tree->KeyDesc[level].NonEssential);
47 return level;
48 }
49
50 //-----------------------------------------------------------------------------
51 /** Store the k smallest-keyed key-value pairs. */
52 template <typename Key, typename Value>
53 class MinK {
54 public:
55 MinK(Key max_key, int k);
56 ~MinK();
57
58 struct Element {
59 Element() = default;
60 Element(const Key &k, const Value &v) : key(k), value(v) {}
61
62 Key key;
63 Value value;
64 };
65
66 bool insert(Key k, Value v);
67 const Key &max_insertable_key();
68
69 int elements_count() {
70 return elements_count_;
71 }
72 const Element *elements() {
73 return elements_;
74 }
75
76 private:
77 const Key max_key_; ///< the maximum possible Key
78 Element *elements_; ///< unsorted array of elements
79 int elements_count_; ///< the number of results collected so far
80 int k_; ///< the number of results we want from the search
81 int max_index_; ///< the index of the result with the largest key
82 };
83
84 template <typename Key, typename Value>
85 MinK<Key, Value>::MinK(Key max_key, int k)
86 : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
87 elements_ = new Element[k_];
88 }
89
90 template <typename Key, typename Value>
91 MinK<Key, Value>::~MinK() {
92 delete[] elements_;
93 }
94
95 template <typename Key, typename Value>
96 const Key &MinK<Key, Value>::max_insertable_key() {
97 if (elements_count_ < k_) {
98 return max_key_;
99 }
100 return elements_[max_index_].key;
101 }
102
103 template <typename Key, typename Value>
104 bool MinK<Key, Value>::insert(Key key, Value value) {
105 if (elements_count_ < k_) {
106 elements_[elements_count_++] = Element(key, value);
107 if (key > elements_[max_index_].key) {
108 max_index_ = elements_count_ - 1;
109 }
110 return true;
111 } else if (key < elements_[max_index_].key) {
112 // evict the largest element.
113 elements_[max_index_] = Element(key, value);
114 // recompute max_index_
115 for (int i = 0; i < elements_count_; i++) {
116 if (elements_[i].key > elements_[max_index_].key) {
117 max_index_ = i;
118 }
119 }
120 return true;
121 }
122 return false;
123 }
124
125 //-----------------------------------------------------------------------------
126 /** Helper class for searching for the k closest points to query_point in tree.
127 */
128 class KDTreeSearch {
129 public:
130 KDTreeSearch(KDTREE *tree, float *query_point, int k_closest);
131 ~KDTreeSearch();
132
133 /** Return the k nearest points' data. */
134 void Search(int *result_count, float *distances, void **results);
135
136 private:
137 void SearchRec(int Level, KDNODE *SubTree);
138 bool BoxIntersectsSearch(float *lower, float *upper);
139
140 KDTREE *tree_;
141 float *query_point_;
142 float *sb_min_; ///< search box minimum
143 float *sb_max_; ///< search box maximum
144 MinK<float, void *> results_;
145 };
146
147 KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
148 : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
149 sb_min_ = new float[tree->KeySize];
150 sb_max_ = new float[tree->KeySize];
151 }
152
153 KDTreeSearch::~KDTreeSearch() {
154 delete[] sb_min_;
155 delete[] sb_max_;
156 }
157
158 /// Locate the k_closest points to query_point_, and return their distances and
159 /// data into the given buffers.
160 void KDTreeSearch::Search(int *result_count, float *distances, void **results) {
161 if (tree_->Root.Left == nullptr) {
162 *result_count = 0;
163 } else {
164 for (int i = 0; i < tree_->KeySize; i++) {
165 sb_min_[i] = tree_->KeyDesc[i].Min;
166 sb_max_[i] = tree_->KeyDesc[i].Max;
167 }
168 SearchRec(0, tree_->Root.Left);
169 int count = results_.elements_count();
170 *result_count = count;
171 for (int j = 0; j < count; j++) {
172 // Pre-cast to float64 as key is a template type and we have no control
173 // over its actual type.
174 distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key)));
175 results[j] = results_.elements()[j].value;
176 }
177 }
178 }
179
180 /*-----------------------------------------------------------------------------
181 Public Code
182 -----------------------------------------------------------------------------*/
183 /// @return a new KDTREE based on the specified parameters.
184 /// @param KeySize # of dimensions in the K-D tree
185 /// @param KeyDesc array of params to describe key dimensions
186 KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) {
187 auto *KDTree = new KDTREE(KeySize);
188 for (int i = 0; i < KeySize; i++) {
189 KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
190 KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
191 if (KeyDesc[i].Circular) {
192 KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
193 KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
194 KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
195 KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
196 KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
197 } else {
198 KDTree->KeyDesc[i].Min = MINSEARCH;
199 KDTree->KeyDesc[i].Max = MAXSEARCH;
200 }
201 }
202 KDTree->Root.Left = nullptr;
203 KDTree->Root.Right = nullptr;
204 return KDTree;
205 }
206
207 /**
208 * This routine stores Data in the K-D tree specified by Tree
209 * using Key as an access key.
210 *
211 * @param Tree K-D tree in which data is to be stored
212 * @param Key ptr to key by which data can be retrieved
213 * @param Data ptr to data to be stored in the tree
214 */
215 void KDStore(KDTREE *Tree, float *Key, CLUSTER *Data) {
216 auto PtrToNode = &(Tree->Root.Left);
217 auto Node = *PtrToNode;
218 auto Level = NextLevel(Tree, -1);
219 while (Node != nullptr) {
220 if (Key[Level] < Node->BranchPoint) {
221 PtrToNode = &(Node->Left);
222 if (Key[Level] > Node->LeftBranch) {
223 Node->LeftBranch = Key[Level];
224 }
225 } else {
226 PtrToNode = &(Node->Right);
227 if (Key[Level] < Node->RightBranch) {
228 Node->RightBranch = Key[Level];
229 }
230 }
231 Level = NextLevel(Tree, Level);
232 Node = *PtrToNode;
233 }
234
235 *PtrToNode = new KDNODE(Tree, Key, Data, Level);
236 } /* KDStore */
237
238 /**
239 * This routine deletes a node from Tree. The node to be
240 * deleted is specified by the Key for the node and the Data
241 * contents of the node. These two pointers must be identical
242 * to the pointers that were used for the node when it was
243 * originally stored in the tree. A node will be deleted from
244 * the tree only if its key and data pointers are identical
245 * to Key and Data respectively. The tree is re-formed by removing
246 * the affected subtree and inserting all elements but the root.
247 *
248 * @param Tree K-D tree to delete node from
249 * @param Key key of node to be deleted
250 * @param Data data contents of node to be deleted
251 */
252 void KDDelete(KDTREE *Tree, float Key[], void *Data) {
253 int Level;
254 KDNODE *Current;
255 KDNODE *Father;
256
257 /* initialize search at root of tree */
258 Father = &(Tree->Root);
259 Current = Father->Left;
260 Level = NextLevel(Tree, -1);
261
262 /* search tree for node to be deleted */
263 while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) {
264 Father = Current;
265 if (Key[Level] < Current->BranchPoint) {
266 Current = Current->Left;
267 } else {
268 Current = Current->Right;
269 }
270
271 Level = NextLevel(Tree, Level);
272 }
273
274 if (Current != nullptr) { /* if node to be deleted was found */
275 if (Current == Father->Left) {
276 Father->Left = nullptr;
277 Father->LeftBranch = Tree->KeyDesc[Level].Min;
278 } else {
279 Father->Right = nullptr;
280 Father->RightBranch = Tree->KeyDesc[Level].Max;
281 }
282
283 InsertNodes(Tree, Current->Left);
284 InsertNodes(Tree, Current->Right);
285 delete Current;
286 }
287 } /* KDDelete */
288
289 /**
290 * This routine searches the K-D tree specified by Tree and
291 * finds the QuerySize nearest neighbors of Query. All neighbors
292 * must be within MaxDistance of Query. The data contents of
293 * the nearest neighbors
294 * are placed in NBuffer and their distances from Query are
295 * placed in DBuffer.
296 * @param Tree ptr to K-D tree to be searched
297 * @param Query ptr to query key (point in D-space)
298 * @param QuerySize number of nearest neighbors to be found
299 * @param MaxDistance all neighbors must be within this distance
300 * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors
301 * @param DBuffer ptr to QuerySize buffer to hold distances
302 * from nearest neighbor to query point
303 * @param NumberOfResults [out] Number of nearest neighbors actually found
304 */
305 void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance,
306 int *NumberOfResults, void **NBuffer, float DBuffer[]) {
307 KDTreeSearch search(Tree, Query, QuerySize);
308 search.Search(NumberOfResults, DBuffer, NBuffer);
309 }
310
311 /*---------------------------------------------------------------------------*/
312 /** Walk a given Tree with action. */
313 void KDWalk(KDTREE *Tree, kdwalk_proc action, ClusteringContext *context) {
314 if (Tree->Root.Left != nullptr) {
315 Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
316 }
317 }
318
319 /*-----------------------------------------------------------------------------
320 Private Code
321 -----------------------------------------------------------------------------*/
322
323 /*---------------------------------------------------------------------------*/
324 /**
325 * Recursively accumulate the k_closest points to query_point_ into results_.
326 * @param Level level in tree of sub-tree to be searched
327 * @param SubTree sub-tree to be searched
328 */
329 void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
330 if (level >= tree_->KeySize) {
331 level = 0;
332 }
333
334 if (!BoxIntersectsSearch(sb_min_, sb_max_)) {
335 return;
336 }
337
338 results_.insert(DistanceSquared(tree_->KeySize, &tree_->KeyDesc[0], query_point_, sub_tree->Key),
339 sub_tree->Data);
340
341 if (query_point_[level] < sub_tree->BranchPoint) {
342 if (sub_tree->Left != nullptr) {
343 float tmp = sb_max_[level];
344 sb_max_[level] = sub_tree->LeftBranch;
345 SearchRec(NextLevel(tree_, level), sub_tree->Left);
346 sb_max_[level] = tmp;
347 }
348 if (sub_tree->Right != nullptr) {
349 float tmp = sb_min_[level];
350 sb_min_[level] = sub_tree->RightBranch;
351 SearchRec(NextLevel(tree_, level), sub_tree->Right);
352 sb_min_[level] = tmp;
353 }
354 } else {
355 if (sub_tree->Right != nullptr) {
356 float tmp = sb_min_[level];
357 sb_min_[level] = sub_tree->RightBranch;
358 SearchRec(NextLevel(tree_, level), sub_tree->Right);
359 sb_min_[level] = tmp;
360 }
361 if (sub_tree->Left != nullptr) {
362 float tmp = sb_max_[level];
363 sb_max_[level] = sub_tree->LeftBranch;
364 SearchRec(NextLevel(tree_, level), sub_tree->Left);
365 sb_max_[level] = tmp;
366 }
367 }
368 }
369
370 /*---------------------------------------------------------------------------*/
371 /**
372 *Returns the Euclidean distance squared between p1 and p2 for all essential
373 * dimensions.
374 * @param k keys are in k-space
375 * @param dim dimension descriptions (essential, circular, etc)
376 * @param p1,p2 two different points in K-D space
377 */
378 float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
379 float total_distance = 0;
380
381 for (; k > 0; k--, p1++, p2++, dim++) {
382 if (dim->NonEssential) {
383 continue;
384 }
385
386 float dimension_distance = *p1 - *p2;
387
388 /* if this dimension is circular - check wraparound distance */
389 if (dim->Circular) {
390 dimension_distance = Magnitude(dimension_distance);
391 float wrap_distance = dim->Max - dim->Min - dimension_distance;
392 dimension_distance = std::min(dimension_distance, wrap_distance);
393 }
394
395 total_distance += dimension_distance * dimension_distance;
396 }
397 return total_distance;
398 }
399
400 float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
401 return std::sqrt(DistanceSquared(k, dim, p1, p2));
402 }
403
404 /*---------------------------------------------------------------------------*/
405 /// Return whether the query region (the smallest known circle about
406 /// query_point_ containing results->k_ points) intersects the box specified
407 /// between lower and upper. For circular dimensions, we also check the point
408 /// one wrap distance away from the query.
409 bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) {
410 float *query = query_point_;
411 // Compute the sum in higher precision.
412 double total_distance = 0.0;
413 double radius_squared =
414 static_cast<double>(results_.max_insertable_key()) * results_.max_insertable_key();
415 PARAM_DESC *dim = &tree_->KeyDesc[0];
416
417 for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
418 if (dim->NonEssential) {
419 continue;
420 }
421
422 float dimension_distance;
423 if (*query < *lower) {
424 dimension_distance = *lower - *query;
425 } else if (*query > *upper) {
426 dimension_distance = *query - *upper;
427 } else {
428 dimension_distance = 0;
429 }
430
431 /* if this dimension is circular - check wraparound distance */
432 if (dim->Circular) {
433 float wrap_distance = FLT_MAX;
434 if (*query < *lower) {
435 wrap_distance = *query + dim->Max - dim->Min - *upper;
436 } else if (*query > *upper) {
437 wrap_distance = *lower - (*query - (dim->Max - dim->Min));
438 }
439 dimension_distance = std::min(dimension_distance, wrap_distance);
440 }
441
442 total_distance += static_cast<double>(dimension_distance) * dimension_distance;
443 if (total_distance >= radius_squared) {
444 return false;
445 }
446 }
447 return true;
448 }
449
450 /*---------------------------------------------------------------------------*/
451 /**
452 * Walk a tree, calling action once on each node.
453 *
454 * Operation:
455 * This routine walks through the specified sub_tree and invokes action
456 * action at each node as follows:
457 * action(context, data, level)
458 * data the data contents of the node being visited,
459 * level is the level of the node in the tree with the root being level 0.
460 * @param tree root of the tree being walked.
461 * @param action action to be performed at every node
462 * @param context action's context
463 * @param sub_tree ptr to root of subtree to be walked
464 * @param level current level in the tree for this node
465 */
466 void Walk(KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level) {
467 (*action)(context, sub_tree->Data, level);
468 if (sub_tree->Left != nullptr) {
469 Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
470 }
471 if (sub_tree->Right != nullptr) {
472 Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
473 }
474 }
475
476 /** Given a subtree nodes, insert all of its elements into tree. */
477 void InsertNodes(KDTREE *tree, KDNODE *nodes) {
478 if (nodes == nullptr) {
479 return;
480 }
481
482 KDStore(tree, nodes->Key, nodes->Data);
483 InsertNodes(tree, nodes->Left);
484 InsertNodes(tree, nodes->Right);
485 }
486
487 } // namespace tesseract