Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/classify/kdtree.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 ** Filename: kdtree.cpp | |
| 3 ** Purpose: Routines for managing K-D search trees | |
| 4 ** Author: Dan Johnson | |
| 5 ** | |
| 6 ** (c) Copyright Hewlett-Packard Company, 1988. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 ******************************************************************************/ | |
| 17 | |
| 18 /*----------------------------------------------------------------------------- | |
| 19 Include Files and Type Defines | |
| 20 -----------------------------------------------------------------------------*/ | |
| 21 #include "kdtree.h" | |
| 22 | |
| 23 #include <algorithm> | |
| 24 #include <cfloat> // for FLT_MAX | |
| 25 #include <cmath> | |
| 26 #include <cstdio> | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 #define Magnitude(X) ((X) < 0 ? -(X) : (X)) | |
| 31 #define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D))) | |
| 32 | |
| 33 /*----------------------------------------------------------------------------- | |
| 34 Global Data Definitions and Declarations | |
| 35 -----------------------------------------------------------------------------*/ | |
| 36 #define MINSEARCH (-FLT_MAX) | |
| 37 #define MAXSEARCH FLT_MAX | |
| 38 | |
| 39 // Helper function to find the next essential dimension in a cycle. | |
| 40 static int NextLevel(KDTREE *tree, int level) { | |
| 41 do { | |
| 42 ++level; | |
| 43 if (level >= tree->KeySize) { | |
| 44 level = 0; | |
| 45 } | |
| 46 } while (tree->KeyDesc[level].NonEssential); | |
| 47 return level; | |
| 48 } | |
| 49 | |
| 50 //----------------------------------------------------------------------------- | |
| 51 /** Store the k smallest-keyed key-value pairs. */ | |
| 52 template <typename Key, typename Value> | |
| 53 class MinK { | |
| 54 public: | |
| 55 MinK(Key max_key, int k); | |
| 56 ~MinK(); | |
| 57 | |
| 58 struct Element { | |
| 59 Element() = default; | |
| 60 Element(const Key &k, const Value &v) : key(k), value(v) {} | |
| 61 | |
| 62 Key key; | |
| 63 Value value; | |
| 64 }; | |
| 65 | |
| 66 bool insert(Key k, Value v); | |
| 67 const Key &max_insertable_key(); | |
| 68 | |
| 69 int elements_count() { | |
| 70 return elements_count_; | |
| 71 } | |
| 72 const Element *elements() { | |
| 73 return elements_; | |
| 74 } | |
| 75 | |
| 76 private: | |
| 77 const Key max_key_; ///< the maximum possible Key | |
| 78 Element *elements_; ///< unsorted array of elements | |
| 79 int elements_count_; ///< the number of results collected so far | |
| 80 int k_; ///< the number of results we want from the search | |
| 81 int max_index_; ///< the index of the result with the largest key | |
| 82 }; | |
| 83 | |
| 84 template <typename Key, typename Value> | |
| 85 MinK<Key, Value>::MinK(Key max_key, int k) | |
| 86 : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) { | |
| 87 elements_ = new Element[k_]; | |
| 88 } | |
| 89 | |
| 90 template <typename Key, typename Value> | |
| 91 MinK<Key, Value>::~MinK() { | |
| 92 delete[] elements_; | |
| 93 } | |
| 94 | |
| 95 template <typename Key, typename Value> | |
| 96 const Key &MinK<Key, Value>::max_insertable_key() { | |
| 97 if (elements_count_ < k_) { | |
| 98 return max_key_; | |
| 99 } | |
| 100 return elements_[max_index_].key; | |
| 101 } | |
| 102 | |
| 103 template <typename Key, typename Value> | |
| 104 bool MinK<Key, Value>::insert(Key key, Value value) { | |
| 105 if (elements_count_ < k_) { | |
| 106 elements_[elements_count_++] = Element(key, value); | |
| 107 if (key > elements_[max_index_].key) { | |
| 108 max_index_ = elements_count_ - 1; | |
| 109 } | |
| 110 return true; | |
| 111 } else if (key < elements_[max_index_].key) { | |
| 112 // evict the largest element. | |
| 113 elements_[max_index_] = Element(key, value); | |
| 114 // recompute max_index_ | |
| 115 for (int i = 0; i < elements_count_; i++) { | |
| 116 if (elements_[i].key > elements_[max_index_].key) { | |
| 117 max_index_ = i; | |
| 118 } | |
| 119 } | |
| 120 return true; | |
| 121 } | |
| 122 return false; | |
| 123 } | |
| 124 | |
| 125 //----------------------------------------------------------------------------- | |
| 126 /** Helper class for searching for the k closest points to query_point in tree. | |
| 127 */ | |
| 128 class KDTreeSearch { | |
| 129 public: | |
| 130 KDTreeSearch(KDTREE *tree, float *query_point, int k_closest); | |
| 131 ~KDTreeSearch(); | |
| 132 | |
| 133 /** Return the k nearest points' data. */ | |
| 134 void Search(int *result_count, float *distances, void **results); | |
| 135 | |
| 136 private: | |
| 137 void SearchRec(int Level, KDNODE *SubTree); | |
| 138 bool BoxIntersectsSearch(float *lower, float *upper); | |
| 139 | |
| 140 KDTREE *tree_; | |
| 141 float *query_point_; | |
| 142 float *sb_min_; ///< search box minimum | |
| 143 float *sb_max_; ///< search box maximum | |
| 144 MinK<float, void *> results_; | |
| 145 }; | |
| 146 | |
| 147 KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest) | |
| 148 : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) { | |
| 149 sb_min_ = new float[tree->KeySize]; | |
| 150 sb_max_ = new float[tree->KeySize]; | |
| 151 } | |
| 152 | |
| 153 KDTreeSearch::~KDTreeSearch() { | |
| 154 delete[] sb_min_; | |
| 155 delete[] sb_max_; | |
| 156 } | |
| 157 | |
| 158 /// Locate the k_closest points to query_point_, and return their distances and | |
| 159 /// data into the given buffers. | |
| 160 void KDTreeSearch::Search(int *result_count, float *distances, void **results) { | |
| 161 if (tree_->Root.Left == nullptr) { | |
| 162 *result_count = 0; | |
| 163 } else { | |
| 164 for (int i = 0; i < tree_->KeySize; i++) { | |
| 165 sb_min_[i] = tree_->KeyDesc[i].Min; | |
| 166 sb_max_[i] = tree_->KeyDesc[i].Max; | |
| 167 } | |
| 168 SearchRec(0, tree_->Root.Left); | |
| 169 int count = results_.elements_count(); | |
| 170 *result_count = count; | |
| 171 for (int j = 0; j < count; j++) { | |
| 172 // Pre-cast to float64 as key is a template type and we have no control | |
| 173 // over its actual type. | |
| 174 distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key))); | |
| 175 results[j] = results_.elements()[j].value; | |
| 176 } | |
| 177 } | |
| 178 } | |
| 179 | |
| 180 /*----------------------------------------------------------------------------- | |
| 181 Public Code | |
| 182 -----------------------------------------------------------------------------*/ | |
| 183 /// @return a new KDTREE based on the specified parameters. | |
| 184 /// @param KeySize # of dimensions in the K-D tree | |
| 185 /// @param KeyDesc array of params to describe key dimensions | |
| 186 KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) { | |
| 187 auto *KDTree = new KDTREE(KeySize); | |
| 188 for (int i = 0; i < KeySize; i++) { | |
| 189 KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential; | |
| 190 KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular; | |
| 191 if (KeyDesc[i].Circular) { | |
| 192 KDTree->KeyDesc[i].Min = KeyDesc[i].Min; | |
| 193 KDTree->KeyDesc[i].Max = KeyDesc[i].Max; | |
| 194 KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min; | |
| 195 KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2; | |
| 196 KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2; | |
| 197 } else { | |
| 198 KDTree->KeyDesc[i].Min = MINSEARCH; | |
| 199 KDTree->KeyDesc[i].Max = MAXSEARCH; | |
| 200 } | |
| 201 } | |
| 202 KDTree->Root.Left = nullptr; | |
| 203 KDTree->Root.Right = nullptr; | |
| 204 return KDTree; | |
| 205 } | |
| 206 | |
| 207 /** | |
| 208 * This routine stores Data in the K-D tree specified by Tree | |
| 209 * using Key as an access key. | |
| 210 * | |
| 211 * @param Tree K-D tree in which data is to be stored | |
| 212 * @param Key ptr to key by which data can be retrieved | |
| 213 * @param Data ptr to data to be stored in the tree | |
| 214 */ | |
| 215 void KDStore(KDTREE *Tree, float *Key, CLUSTER *Data) { | |
| 216 auto PtrToNode = &(Tree->Root.Left); | |
| 217 auto Node = *PtrToNode; | |
| 218 auto Level = NextLevel(Tree, -1); | |
| 219 while (Node != nullptr) { | |
| 220 if (Key[Level] < Node->BranchPoint) { | |
| 221 PtrToNode = &(Node->Left); | |
| 222 if (Key[Level] > Node->LeftBranch) { | |
| 223 Node->LeftBranch = Key[Level]; | |
| 224 } | |
| 225 } else { | |
| 226 PtrToNode = &(Node->Right); | |
| 227 if (Key[Level] < Node->RightBranch) { | |
| 228 Node->RightBranch = Key[Level]; | |
| 229 } | |
| 230 } | |
| 231 Level = NextLevel(Tree, Level); | |
| 232 Node = *PtrToNode; | |
| 233 } | |
| 234 | |
| 235 *PtrToNode = new KDNODE(Tree, Key, Data, Level); | |
| 236 } /* KDStore */ | |
| 237 | |
| 238 /** | |
| 239 * This routine deletes a node from Tree. The node to be | |
| 240 * deleted is specified by the Key for the node and the Data | |
| 241 * contents of the node. These two pointers must be identical | |
| 242 * to the pointers that were used for the node when it was | |
| 243 * originally stored in the tree. A node will be deleted from | |
| 244 * the tree only if its key and data pointers are identical | |
| 245 * to Key and Data respectively. The tree is re-formed by removing | |
| 246 * the affected subtree and inserting all elements but the root. | |
| 247 * | |
| 248 * @param Tree K-D tree to delete node from | |
| 249 * @param Key key of node to be deleted | |
| 250 * @param Data data contents of node to be deleted | |
| 251 */ | |
| 252 void KDDelete(KDTREE *Tree, float Key[], void *Data) { | |
| 253 int Level; | |
| 254 KDNODE *Current; | |
| 255 KDNODE *Father; | |
| 256 | |
| 257 /* initialize search at root of tree */ | |
| 258 Father = &(Tree->Root); | |
| 259 Current = Father->Left; | |
| 260 Level = NextLevel(Tree, -1); | |
| 261 | |
| 262 /* search tree for node to be deleted */ | |
| 263 while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) { | |
| 264 Father = Current; | |
| 265 if (Key[Level] < Current->BranchPoint) { | |
| 266 Current = Current->Left; | |
| 267 } else { | |
| 268 Current = Current->Right; | |
| 269 } | |
| 270 | |
| 271 Level = NextLevel(Tree, Level); | |
| 272 } | |
| 273 | |
| 274 if (Current != nullptr) { /* if node to be deleted was found */ | |
| 275 if (Current == Father->Left) { | |
| 276 Father->Left = nullptr; | |
| 277 Father->LeftBranch = Tree->KeyDesc[Level].Min; | |
| 278 } else { | |
| 279 Father->Right = nullptr; | |
| 280 Father->RightBranch = Tree->KeyDesc[Level].Max; | |
| 281 } | |
| 282 | |
| 283 InsertNodes(Tree, Current->Left); | |
| 284 InsertNodes(Tree, Current->Right); | |
| 285 delete Current; | |
| 286 } | |
| 287 } /* KDDelete */ | |
| 288 | |
| 289 /** | |
| 290 * This routine searches the K-D tree specified by Tree and | |
| 291 * finds the QuerySize nearest neighbors of Query. All neighbors | |
| 292 * must be within MaxDistance of Query. The data contents of | |
| 293 * the nearest neighbors | |
| 294 * are placed in NBuffer and their distances from Query are | |
| 295 * placed in DBuffer. | |
| 296 * @param Tree ptr to K-D tree to be searched | |
| 297 * @param Query ptr to query key (point in D-space) | |
| 298 * @param QuerySize number of nearest neighbors to be found | |
| 299 * @param MaxDistance all neighbors must be within this distance | |
| 300 * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors | |
| 301 * @param DBuffer ptr to QuerySize buffer to hold distances | |
| 302 * from nearest neighbor to query point | |
| 303 * @param NumberOfResults [out] Number of nearest neighbors actually found | |
| 304 */ | |
| 305 void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, | |
| 306 int *NumberOfResults, void **NBuffer, float DBuffer[]) { | |
| 307 KDTreeSearch search(Tree, Query, QuerySize); | |
| 308 search.Search(NumberOfResults, DBuffer, NBuffer); | |
| 309 } | |
| 310 | |
| 311 /*---------------------------------------------------------------------------*/ | |
| 312 /** Walk a given Tree with action. */ | |
| 313 void KDWalk(KDTREE *Tree, kdwalk_proc action, ClusteringContext *context) { | |
| 314 if (Tree->Root.Left != nullptr) { | |
| 315 Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1)); | |
| 316 } | |
| 317 } | |
| 318 | |
| 319 /*----------------------------------------------------------------------------- | |
| 320 Private Code | |
| 321 -----------------------------------------------------------------------------*/ | |
| 322 | |
| 323 /*---------------------------------------------------------------------------*/ | |
| 324 /** | |
| 325 * Recursively accumulate the k_closest points to query_point_ into results_. | |
| 326 * @param Level level in tree of sub-tree to be searched | |
| 327 * @param SubTree sub-tree to be searched | |
| 328 */ | |
| 329 void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { | |
| 330 if (level >= tree_->KeySize) { | |
| 331 level = 0; | |
| 332 } | |
| 333 | |
| 334 if (!BoxIntersectsSearch(sb_min_, sb_max_)) { | |
| 335 return; | |
| 336 } | |
| 337 | |
| 338 results_.insert(DistanceSquared(tree_->KeySize, &tree_->KeyDesc[0], query_point_, sub_tree->Key), | |
| 339 sub_tree->Data); | |
| 340 | |
| 341 if (query_point_[level] < sub_tree->BranchPoint) { | |
| 342 if (sub_tree->Left != nullptr) { | |
| 343 float tmp = sb_max_[level]; | |
| 344 sb_max_[level] = sub_tree->LeftBranch; | |
| 345 SearchRec(NextLevel(tree_, level), sub_tree->Left); | |
| 346 sb_max_[level] = tmp; | |
| 347 } | |
| 348 if (sub_tree->Right != nullptr) { | |
| 349 float tmp = sb_min_[level]; | |
| 350 sb_min_[level] = sub_tree->RightBranch; | |
| 351 SearchRec(NextLevel(tree_, level), sub_tree->Right); | |
| 352 sb_min_[level] = tmp; | |
| 353 } | |
| 354 } else { | |
| 355 if (sub_tree->Right != nullptr) { | |
| 356 float tmp = sb_min_[level]; | |
| 357 sb_min_[level] = sub_tree->RightBranch; | |
| 358 SearchRec(NextLevel(tree_, level), sub_tree->Right); | |
| 359 sb_min_[level] = tmp; | |
| 360 } | |
| 361 if (sub_tree->Left != nullptr) { | |
| 362 float tmp = sb_max_[level]; | |
| 363 sb_max_[level] = sub_tree->LeftBranch; | |
| 364 SearchRec(NextLevel(tree_, level), sub_tree->Left); | |
| 365 sb_max_[level] = tmp; | |
| 366 } | |
| 367 } | |
| 368 } | |
| 369 | |
| 370 /*---------------------------------------------------------------------------*/ | |
| 371 /** | |
| 372 *Returns the Euclidean distance squared between p1 and p2 for all essential | |
| 373 * dimensions. | |
| 374 * @param k keys are in k-space | |
| 375 * @param dim dimension descriptions (essential, circular, etc) | |
| 376 * @param p1,p2 two different points in K-D space | |
| 377 */ | |
| 378 float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) { | |
| 379 float total_distance = 0; | |
| 380 | |
| 381 for (; k > 0; k--, p1++, p2++, dim++) { | |
| 382 if (dim->NonEssential) { | |
| 383 continue; | |
| 384 } | |
| 385 | |
| 386 float dimension_distance = *p1 - *p2; | |
| 387 | |
| 388 /* if this dimension is circular - check wraparound distance */ | |
| 389 if (dim->Circular) { | |
| 390 dimension_distance = Magnitude(dimension_distance); | |
| 391 float wrap_distance = dim->Max - dim->Min - dimension_distance; | |
| 392 dimension_distance = std::min(dimension_distance, wrap_distance); | |
| 393 } | |
| 394 | |
| 395 total_distance += dimension_distance * dimension_distance; | |
| 396 } | |
| 397 return total_distance; | |
| 398 } | |
| 399 | |
| 400 float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) { | |
| 401 return std::sqrt(DistanceSquared(k, dim, p1, p2)); | |
| 402 } | |
| 403 | |
| 404 /*---------------------------------------------------------------------------*/ | |
| 405 /// Return whether the query region (the smallest known circle about | |
| 406 /// query_point_ containing results->k_ points) intersects the box specified | |
| 407 /// between lower and upper. For circular dimensions, we also check the point | |
| 408 /// one wrap distance away from the query. | |
| 409 bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) { | |
| 410 float *query = query_point_; | |
| 411 // Compute the sum in higher precision. | |
| 412 double total_distance = 0.0; | |
| 413 double radius_squared = | |
| 414 static_cast<double>(results_.max_insertable_key()) * results_.max_insertable_key(); | |
| 415 PARAM_DESC *dim = &tree_->KeyDesc[0]; | |
| 416 | |
| 417 for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) { | |
| 418 if (dim->NonEssential) { | |
| 419 continue; | |
| 420 } | |
| 421 | |
| 422 float dimension_distance; | |
| 423 if (*query < *lower) { | |
| 424 dimension_distance = *lower - *query; | |
| 425 } else if (*query > *upper) { | |
| 426 dimension_distance = *query - *upper; | |
| 427 } else { | |
| 428 dimension_distance = 0; | |
| 429 } | |
| 430 | |
| 431 /* if this dimension is circular - check wraparound distance */ | |
| 432 if (dim->Circular) { | |
| 433 float wrap_distance = FLT_MAX; | |
| 434 if (*query < *lower) { | |
| 435 wrap_distance = *query + dim->Max - dim->Min - *upper; | |
| 436 } else if (*query > *upper) { | |
| 437 wrap_distance = *lower - (*query - (dim->Max - dim->Min)); | |
| 438 } | |
| 439 dimension_distance = std::min(dimension_distance, wrap_distance); | |
| 440 } | |
| 441 | |
| 442 total_distance += static_cast<double>(dimension_distance) * dimension_distance; | |
| 443 if (total_distance >= radius_squared) { | |
| 444 return false; | |
| 445 } | |
| 446 } | |
| 447 return true; | |
| 448 } | |
| 449 | |
| 450 /*---------------------------------------------------------------------------*/ | |
| 451 /** | |
| 452 * Walk a tree, calling action once on each node. | |
| 453 * | |
| 454 * Operation: | |
| 455 * This routine walks through the specified sub_tree and invokes action | |
| 456 * action at each node as follows: | |
| 457 * action(context, data, level) | |
| 458 * data the data contents of the node being visited, | |
| 459 * level is the level of the node in the tree with the root being level 0. | |
| 460 * @param tree root of the tree being walked. | |
| 461 * @param action action to be performed at every node | |
| 462 * @param context action's context | |
| 463 * @param sub_tree ptr to root of subtree to be walked | |
| 464 * @param level current level in the tree for this node | |
| 465 */ | |
| 466 void Walk(KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level) { | |
| 467 (*action)(context, sub_tree->Data, level); | |
| 468 if (sub_tree->Left != nullptr) { | |
| 469 Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level)); | |
| 470 } | |
| 471 if (sub_tree->Right != nullptr) { | |
| 472 Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level)); | |
| 473 } | |
| 474 } | |
| 475 | |
| 476 /** Given a subtree nodes, insert all of its elements into tree. */ | |
| 477 void InsertNodes(KDTREE *tree, KDNODE *nodes) { | |
| 478 if (nodes == nullptr) { | |
| 479 return; | |
| 480 } | |
| 481 | |
| 482 KDStore(tree, nodes->Key, nodes->Data); | |
| 483 InsertNodes(tree, nodes->Left); | |
| 484 InsertNodes(tree, nodes->Right); | |
| 485 } | |
| 486 | |
| 487 } // namespace tesseract |
