diff mupdf-source/thirdparty/tesseract/src/classify/kdtree.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/classify/kdtree.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,487 @@
+/******************************************************************************
+ **  Filename:  kdtree.cpp
+ **  Purpose:   Routines for managing K-D search trees
+ **  Author:    Dan Johnson
+ **
+ **  (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ ******************************************************************************/
+
+/*-----------------------------------------------------------------------------
+          Include Files and Type Defines
+-----------------------------------------------------------------------------*/
+#include "kdtree.h"
+
+#include <algorithm>
+#include <cfloat> // for FLT_MAX
+#include <cmath>
+#include <cstdio>
+
+namespace tesseract {
+
+#define Magnitude(X) ((X) < 0 ? -(X) : (X))
+#define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D)))
+
+/*-----------------------------------------------------------------------------
+        Global Data Definitions and Declarations
+-----------------------------------------------------------------------------*/
+#define MINSEARCH (-FLT_MAX)
+#define MAXSEARCH FLT_MAX
+
+// Helper function to find the next essential dimension in a cycle.
+static int NextLevel(KDTREE *tree, int level) {
+  do {
+    ++level;
+    if (level >= tree->KeySize) {
+      level = 0;
+    }
+  } while (tree->KeyDesc[level].NonEssential);
+  return level;
+}
+
+//-----------------------------------------------------------------------------
+/**  Store the k smallest-keyed key-value pairs. */
+template <typename Key, typename Value>
+class MinK {
+public:
+  MinK(Key max_key, int k);
+  ~MinK();
+
+  struct Element {
+    Element() = default;
+    Element(const Key &k, const Value &v) : key(k), value(v) {}
+
+    Key key;
+    Value value;
+  };
+
+  bool insert(Key k, Value v);
+  const Key &max_insertable_key();
+
+  int elements_count() {
+    return elements_count_;
+  }
+  const Element *elements() {
+    return elements_;
+  }
+
+private:
+  const Key max_key_;  ///< the maximum possible Key
+  Element *elements_;  ///< unsorted array of elements
+  int elements_count_; ///< the number of results collected so far
+  int k_;              ///< the number of results we want from the search
+  int max_index_;      ///< the index of the result with the largest key
+};
+
+template <typename Key, typename Value>
+MinK<Key, Value>::MinK(Key max_key, int k)
+    : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
+  elements_ = new Element[k_];
+}
+
+template <typename Key, typename Value>
+MinK<Key, Value>::~MinK() {
+  delete[] elements_;
+}
+
+template <typename Key, typename Value>
+const Key &MinK<Key, Value>::max_insertable_key() {
+  if (elements_count_ < k_) {
+    return max_key_;
+  }
+  return elements_[max_index_].key;
+}
+
+template <typename Key, typename Value>
+bool MinK<Key, Value>::insert(Key key, Value value) {
+  if (elements_count_ < k_) {
+    elements_[elements_count_++] = Element(key, value);
+    if (key > elements_[max_index_].key) {
+      max_index_ = elements_count_ - 1;
+    }
+    return true;
+  } else if (key < elements_[max_index_].key) {
+    // evict the largest element.
+    elements_[max_index_] = Element(key, value);
+    // recompute max_index_
+    for (int i = 0; i < elements_count_; i++) {
+      if (elements_[i].key > elements_[max_index_].key) {
+        max_index_ = i;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
+//-----------------------------------------------------------------------------
+/** Helper class for searching for the k closest points to query_point in tree.
+ */
+class KDTreeSearch {
+public:
+  KDTreeSearch(KDTREE *tree, float *query_point, int k_closest);
+  ~KDTreeSearch();
+
+  /** Return the k nearest points' data. */
+  void Search(int *result_count, float *distances, void **results);
+
+private:
+  void SearchRec(int Level, KDNODE *SubTree);
+  bool BoxIntersectsSearch(float *lower, float *upper);
+
+  KDTREE *tree_;
+  float *query_point_;
+  float *sb_min_; ///< search box minimum
+  float *sb_max_; ///< search box maximum
+  MinK<float, void *> results_;
+};
+
+KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
+    : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
+  sb_min_ = new float[tree->KeySize];
+  sb_max_ = new float[tree->KeySize];
+}
+
+KDTreeSearch::~KDTreeSearch() {
+  delete[] sb_min_;
+  delete[] sb_max_;
+}
+
+/// Locate the k_closest points to query_point_, and return their distances and
+/// data into the given buffers.
+void KDTreeSearch::Search(int *result_count, float *distances, void **results) {
+  if (tree_->Root.Left == nullptr) {
+    *result_count = 0;
+  } else {
+    for (int i = 0; i < tree_->KeySize; i++) {
+      sb_min_[i] = tree_->KeyDesc[i].Min;
+      sb_max_[i] = tree_->KeyDesc[i].Max;
+    }
+    SearchRec(0, tree_->Root.Left);
+    int count = results_.elements_count();
+    *result_count = count;
+    for (int j = 0; j < count; j++) {
+      // Pre-cast to float64 as key is a template type and we have no control
+      // over its actual type.
+      distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key)));
+      results[j] = results_.elements()[j].value;
+    }
+  }
+}
+
+/*-----------------------------------------------------------------------------
+              Public Code
+-----------------------------------------------------------------------------*/
+/// @return a new KDTREE based on the specified parameters.
+/// @param KeySize  # of dimensions in the K-D tree
+/// @param KeyDesc  array of params to describe key dimensions
+KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) {
+  auto *KDTree = new KDTREE(KeySize);
+  for (int i = 0; i < KeySize; i++) {
+    KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
+    KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
+    if (KeyDesc[i].Circular) {
+      KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
+      KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
+      KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
+      KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
+      KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
+    } else {
+      KDTree->KeyDesc[i].Min = MINSEARCH;
+      KDTree->KeyDesc[i].Max = MAXSEARCH;
+    }
+  }
+  KDTree->Root.Left = nullptr;
+  KDTree->Root.Right = nullptr;
+  return KDTree;
+}
+
+/**
+ * This routine stores Data in the K-D tree specified by Tree
+ * using Key as an access key.
+ *
+ * @param Tree    K-D tree in which data is to be stored
+ * @param Key    ptr to key by which data can be retrieved
+ * @param Data    ptr to data to be stored in the tree
+ */
+void KDStore(KDTREE *Tree, float *Key, CLUSTER *Data) {
+  auto PtrToNode = &(Tree->Root.Left);
+  auto Node = *PtrToNode;
+  auto Level = NextLevel(Tree, -1);
+  while (Node != nullptr) {
+    if (Key[Level] < Node->BranchPoint) {
+      PtrToNode = &(Node->Left);
+      if (Key[Level] > Node->LeftBranch) {
+        Node->LeftBranch = Key[Level];
+      }
+    } else {
+      PtrToNode = &(Node->Right);
+      if (Key[Level] < Node->RightBranch) {
+        Node->RightBranch = Key[Level];
+      }
+    }
+    Level = NextLevel(Tree, Level);
+    Node = *PtrToNode;
+  }
+
+  *PtrToNode = new KDNODE(Tree, Key, Data, Level);
+} /* KDStore */
+
+/**
+ * This routine deletes a node from Tree.  The node to be
+ * deleted is specified by the Key for the node and the Data
+ * contents of the node.  These two pointers must be identical
+ * to the pointers that were used for the node when it was
+ * originally stored in the tree.  A node will be deleted from
+ * the tree only if its key and data pointers are identical
+ * to Key and Data respectively.  The tree is re-formed by removing
+ * the affected subtree and inserting all elements but the root.
+ *
+ * @param Tree K-D tree to delete node from
+ * @param Key key of node to be deleted
+ * @param Data data contents of node to be deleted
+ */
+void KDDelete(KDTREE *Tree, float Key[], void *Data) {
+  int Level;
+  KDNODE *Current;
+  KDNODE *Father;
+
+  /* initialize search at root of tree */
+  Father = &(Tree->Root);
+  Current = Father->Left;
+  Level = NextLevel(Tree, -1);
+
+  /* search tree for node to be deleted */
+  while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) {
+    Father = Current;
+    if (Key[Level] < Current->BranchPoint) {
+      Current = Current->Left;
+    } else {
+      Current = Current->Right;
+    }
+
+    Level = NextLevel(Tree, Level);
+  }
+
+  if (Current != nullptr) { /* if node to be deleted was found */
+    if (Current == Father->Left) {
+      Father->Left = nullptr;
+      Father->LeftBranch = Tree->KeyDesc[Level].Min;
+    } else {
+      Father->Right = nullptr;
+      Father->RightBranch = Tree->KeyDesc[Level].Max;
+    }
+
+    InsertNodes(Tree, Current->Left);
+    InsertNodes(Tree, Current->Right);
+    delete Current;
+  }
+} /* KDDelete */
+
+/**
+ * This routine searches the K-D tree specified by Tree and
+ * finds the QuerySize nearest neighbors of Query.  All neighbors
+ * must be within MaxDistance of Query.  The data contents of
+ * the nearest neighbors
+ * are placed in NBuffer and their distances from Query are
+ * placed in DBuffer.
+ * @param Tree    ptr to K-D tree to be searched
+ * @param Query    ptr to query key (point in D-space)
+ * @param QuerySize  number of nearest neighbors to be found
+ * @param MaxDistance  all neighbors must be within this distance
+ * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors
+ * @param DBuffer ptr to QuerySize buffer to hold distances
+ *          from nearest neighbor to query point
+ * @param NumberOfResults [out] Number of nearest neighbors actually found
+ */
+void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance,
+                             int *NumberOfResults, void **NBuffer, float DBuffer[]) {
+  KDTreeSearch search(Tree, Query, QuerySize);
+  search.Search(NumberOfResults, DBuffer, NBuffer);
+}
+
+/*---------------------------------------------------------------------------*/
+/** Walk a given Tree with action. */
+void KDWalk(KDTREE *Tree, kdwalk_proc action, ClusteringContext *context) {
+  if (Tree->Root.Left != nullptr) {
+    Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
+  }
+}
+
+/*-----------------------------------------------------------------------------
+              Private Code
+-----------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Recursively accumulate the k_closest points to query_point_ into results_.
+ * @param Level  level in tree of sub-tree to be searched
+ * @param SubTree  sub-tree to be searched
+ */
+void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
+  if (level >= tree_->KeySize) {
+    level = 0;
+  }
+
+  if (!BoxIntersectsSearch(sb_min_, sb_max_)) {
+    return;
+  }
+
+  results_.insert(DistanceSquared(tree_->KeySize, &tree_->KeyDesc[0], query_point_, sub_tree->Key),
+                  sub_tree->Data);
+
+  if (query_point_[level] < sub_tree->BranchPoint) {
+    if (sub_tree->Left != nullptr) {
+      float tmp = sb_max_[level];
+      sb_max_[level] = sub_tree->LeftBranch;
+      SearchRec(NextLevel(tree_, level), sub_tree->Left);
+      sb_max_[level] = tmp;
+    }
+    if (sub_tree->Right != nullptr) {
+      float tmp = sb_min_[level];
+      sb_min_[level] = sub_tree->RightBranch;
+      SearchRec(NextLevel(tree_, level), sub_tree->Right);
+      sb_min_[level] = tmp;
+    }
+  } else {
+    if (sub_tree->Right != nullptr) {
+      float tmp = sb_min_[level];
+      sb_min_[level] = sub_tree->RightBranch;
+      SearchRec(NextLevel(tree_, level), sub_tree->Right);
+      sb_min_[level] = tmp;
+    }
+    if (sub_tree->Left != nullptr) {
+      float tmp = sb_max_[level];
+      sb_max_[level] = sub_tree->LeftBranch;
+      SearchRec(NextLevel(tree_, level), sub_tree->Left);
+      sb_max_[level] = tmp;
+    }
+  }
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ *Returns the Euclidean distance squared between p1 and p2 for all essential
+ * dimensions.
+ * @param k      keys are in k-space
+ * @param dim    dimension descriptions (essential, circular, etc)
+ * @param p1,p2  two different points in K-D space
+ */
+float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
+  float total_distance = 0;
+
+  for (; k > 0; k--, p1++, p2++, dim++) {
+    if (dim->NonEssential) {
+      continue;
+    }
+
+    float dimension_distance = *p1 - *p2;
+
+    /* if this dimension is circular - check wraparound distance */
+    if (dim->Circular) {
+      dimension_distance = Magnitude(dimension_distance);
+      float wrap_distance = dim->Max - dim->Min - dimension_distance;
+      dimension_distance = std::min(dimension_distance, wrap_distance);
+    }
+
+    total_distance += dimension_distance * dimension_distance;
+  }
+  return total_distance;
+}
+
+float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
+  return std::sqrt(DistanceSquared(k, dim, p1, p2));
+}
+
+/*---------------------------------------------------------------------------*/
+/// Return whether the query region (the smallest known circle about
+/// query_point_ containing results->k_ points) intersects the box specified
+/// between lower and upper.  For circular dimensions, we also check the point
+/// one wrap distance away from the query.
+bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) {
+  float *query = query_point_;
+  // Compute the sum in higher precision.
+  double total_distance = 0.0;
+  double radius_squared =
+      static_cast<double>(results_.max_insertable_key()) * results_.max_insertable_key();
+  PARAM_DESC *dim = &tree_->KeyDesc[0];
+
+  for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
+    if (dim->NonEssential) {
+      continue;
+    }
+
+    float dimension_distance;
+    if (*query < *lower) {
+      dimension_distance = *lower - *query;
+    } else if (*query > *upper) {
+      dimension_distance = *query - *upper;
+    } else {
+      dimension_distance = 0;
+    }
+
+    /* if this dimension is circular - check wraparound distance */
+    if (dim->Circular) {
+      float wrap_distance = FLT_MAX;
+      if (*query < *lower) {
+        wrap_distance = *query + dim->Max - dim->Min - *upper;
+      } else if (*query > *upper) {
+        wrap_distance = *lower - (*query - (dim->Max - dim->Min));
+      }
+      dimension_distance = std::min(dimension_distance, wrap_distance);
+    }
+
+    total_distance += static_cast<double>(dimension_distance) * dimension_distance;
+    if (total_distance >= radius_squared) {
+      return false;
+    }
+  }
+  return true;
+}
+
+/*---------------------------------------------------------------------------*/
+/**
+ * Walk a tree, calling action once on each node.
+ *
+ * Operation:
+ *   This routine walks through the specified sub_tree and invokes action
+ *   action at each node as follows:
+ *       action(context, data, level)
+ *   data  the data contents of the node being visited,
+ *   level is the level of the node in the tree with the root being level 0.
+ * @param tree  root of the tree being walked.
+ * @param action  action to be performed at every node
+ * @param context  action's context
+ * @param sub_tree  ptr to root of subtree to be walked
+ * @param level  current level in the tree for this node
+ */
+void Walk(KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level) {
+  (*action)(context, sub_tree->Data, level);
+  if (sub_tree->Left != nullptr) {
+    Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
+  }
+  if (sub_tree->Right != nullptr) {
+    Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
+  }
+}
+
+/** Given a subtree nodes, insert all of its elements into tree. */
+void InsertNodes(KDTREE *tree, KDNODE *nodes) {
+  if (nodes == nullptr) {
+    return;
+  }
+
+  KDStore(tree, nodes->Key, nodes->Data);
+  InsertNodes(tree, nodes->Left);
+  InsertNodes(tree, nodes->Right);
+}
+
+} // namespace tesseract