Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccutil/genericheap.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright 2012 Google Inc. All Rights Reserved. | |
| 2 // Author: rays@google.com (Ray Smith) | |
| 3 /////////////////////////////////////////////////////////////////////// | |
| 4 // File: genericheap.h | |
| 5 // Description: Template heap class. | |
| 6 // Author: Ray Smith, based on Dan Johnson's original code. | |
| 7 // | |
| 8 // (C) Copyright 2012, Google Inc. | |
| 9 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 10 // you may not use this file except in compliance with the License. | |
| 11 // You may obtain a copy of the License at | |
| 12 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 13 // Unless required by applicable law or agreed to in writing, software | |
| 14 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 16 // See the License for the specific language governing permissions and | |
| 17 // limitations under the License. | |
| 18 // | |
| 19 /////////////////////////////////////////////////////////////////////// | |
| 20 | |
| 21 #ifndef TESSERACT_CCUTIL_GENERICHEAP_H_ | |
| 22 #define TESSERACT_CCUTIL_GENERICHEAP_H_ | |
| 23 | |
| 24 #include "errcode.h" | |
| 25 | |
| 26 #include <vector> | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 // GenericHeap requires 1 template argument: | |
| 31 // Pair will normally be either KDPairInc<Key, Data> or KDPairDec<Key, Data> | |
| 32 // for some arbitrary Key and scalar, smart pointer, or non-ownership pointer | |
| 33 // Data type, according to whether a MIN heap or a MAX heap is desired, | |
| 34 // respectively. Using KDPtrPairInc<Key, Data> or KDPtrPairDec<Key, Data>, | |
| 35 // GenericHeap can also handle simple Data pointers and own them. | |
| 36 // If no additional data is required, Pair can also be a scalar, since | |
| 37 // GenericHeap doesn't look inside it except for operator<. | |
| 38 // | |
| 39 // The heap is stored as a packed binary tree in an array hosted by a | |
| 40 // vector<Pair>, with the invariant that the children of each node are | |
| 41 // both NOT Pair::operator< the parent node. KDPairInc defines Pair::operator< | |
| 42 // to use Key::operator< to generate a MIN heap and KDPairDec defines | |
| 43 // Pair::operator< to use Key::operator> to generate a MAX heap by reversing | |
| 44 // all the comparisons. | |
| 45 // See http://en.wikipedia.org/wiki/Heap_(data_structure) for more detail on | |
| 46 // the basic heap implementation. | |
| 47 // | |
| 48 // Insertion and removal are both O(log n) and, unlike the STL heap, an | |
| 49 // explicit Reshuffle function allows a node to be repositioned in time O(log n) | |
| 50 // after changing its value. | |
| 51 // | |
| 52 // Accessing the element for revaluation is a more complex matter, since the | |
| 53 // index and pointer can be changed arbitrarily by heap operations. | |
| 54 // Revaluation can be done by making the Data type in the Pair derived from or | |
| 55 // contain a DoublePtr as its first data element, making it possible to convert | |
| 56 // the pointer to a Pair using KDPairInc::RecastDataPointer. | |
| 57 template <typename Pair> | |
| 58 class GenericHeap { | |
| 59 public: | |
| 60 GenericHeap() = default; | |
| 61 // The initial size is only a vector::reserve. It is not enforced as | |
| 62 // the size limit of the heap. Caller must implement their own enforcement. | |
| 63 explicit GenericHeap(int initial_size) { | |
| 64 heap_.reserve(initial_size); | |
| 65 } | |
| 66 | |
| 67 // Simple accessors. | |
| 68 bool empty() const { | |
| 69 return heap_.empty(); | |
| 70 } | |
| 71 int size() const { | |
| 72 return heap_.size(); | |
| 73 } | |
| 74 int size_reserved() const { | |
| 75 return heap_.size_reserved(); | |
| 76 } | |
| 77 void clear() { | |
| 78 // Clear truncates to 0 to keep the number reserved in tact. | |
| 79 heap_.clear(); | |
| 80 } | |
| 81 // Provides access to the underlying vector. | |
| 82 // Caution! any changes that modify the keys will invalidate the heap! | |
| 83 std::vector<Pair> &heap() { | |
| 84 return heap_; | |
| 85 } | |
| 86 // Provides read-only access to an element of the underlying vector. | |
| 87 const Pair &get(int index) const { | |
| 88 return heap_[index]; | |
| 89 } | |
| 90 | |
| 91 // Add entry to the heap, keeping the smallest item at the top, by operator<. | |
| 92 // Note that *entry is used as the source of operator=, but it is non-const | |
| 93 // to allow for a smart pointer to be contained within. | |
| 94 // Time = O(log n). | |
| 95 void Push(Pair *entry) { | |
| 96 int hole_index = heap_.size(); | |
| 97 // Make a hole in the end of heap_ and sift it up to be the correct | |
| 98 // location for the new *entry. To avoid needing a default constructor | |
| 99 // for primitive types, and to allow for use of DoublePtr in the Pair | |
| 100 // somewhere, we have to incur a double copy here. | |
| 101 heap_.push_back(*entry); | |
| 102 *entry = heap_.back(); | |
| 103 hole_index = SiftUp(hole_index, *entry); | |
| 104 heap_[hole_index] = *entry; | |
| 105 } | |
| 106 | |
| 107 // Get the value of the top (smallest, defined by operator< ) element. | |
| 108 const Pair &PeekTop() const { | |
| 109 return heap_[0]; | |
| 110 } | |
| 111 // Get the value of the worst (largest, defined by operator< ) element. | |
| 112 const Pair &PeekWorst() const { | |
| 113 return heap_[IndexOfWorst()]; | |
| 114 } | |
| 115 | |
| 116 // Removes the top element of the heap. If entry is not nullptr, the element | |
| 117 // is copied into *entry, otherwise it is discarded. | |
| 118 // Returns false if the heap was already empty. | |
| 119 // Time = O(log n). | |
| 120 bool Pop(Pair *entry) { | |
| 121 int new_size = heap_.size() - 1; | |
| 122 if (new_size < 0) { | |
| 123 return false; // Already empty. | |
| 124 } | |
| 125 if (entry != nullptr) { | |
| 126 *entry = heap_[0]; | |
| 127 } | |
| 128 if (new_size > 0) { | |
| 129 // Sift the hole at the start of the heap_ downwards to match the last | |
| 130 // element. | |
| 131 Pair hole_pair = heap_[new_size]; | |
| 132 heap_.resize(new_size); | |
| 133 int hole_index = SiftDown(0, hole_pair); | |
| 134 heap_[hole_index] = std::move(hole_pair); | |
| 135 } else { | |
| 136 heap_.resize(new_size); | |
| 137 } | |
| 138 return true; | |
| 139 } | |
| 140 | |
| 141 // Removes the MAXIMUM element of the heap. (MIN from a MAX heap.) If entry is | |
| 142 // not nullptr, the element is copied into *entry, otherwise it is discarded. | |
| 143 // Time = O(n). Returns false if the heap was already empty. | |
| 144 bool PopWorst(Pair *entry) { | |
| 145 int worst_index = IndexOfWorst(); | |
| 146 if (worst_index < 0) { | |
| 147 return false; // It cannot be empty! | |
| 148 } | |
| 149 // Extract the worst element from the heap, leaving a hole at worst_index. | |
| 150 if (entry != nullptr) { | |
| 151 *entry = heap_[worst_index]; | |
| 152 } | |
| 153 int heap_size = heap_.size() - 1; | |
| 154 if (heap_size > 0) { | |
| 155 // Sift the hole upwards to match the last element of the heap_ | |
| 156 Pair hole_pair = heap_[heap_size]; | |
| 157 int hole_index = SiftUp(worst_index, hole_pair); | |
| 158 heap_[hole_index] = hole_pair; | |
| 159 } | |
| 160 heap_.resize(heap_size); | |
| 161 return true; | |
| 162 } | |
| 163 | |
| 164 // Returns the index of the worst element. Time = O(n/2). | |
| 165 int IndexOfWorst() const { | |
| 166 int heap_size = heap_.size(); | |
| 167 if (heap_size == 0) { | |
| 168 return -1; // It cannot be empty! | |
| 169 } | |
| 170 | |
| 171 // Find the maximum element. Its index is guaranteed to be greater than | |
| 172 // the index of the parent of the last element, since by the heap invariant | |
| 173 // the parent must be less than or equal to the children. | |
| 174 int worst_index = heap_size - 1; | |
| 175 int end_parent = ParentNode(worst_index); | |
| 176 for (int i = worst_index - 1; i > end_parent; --i) { | |
| 177 if (heap_[worst_index] < heap_[i]) { | |
| 178 worst_index = i; | |
| 179 } | |
| 180 } | |
| 181 return worst_index; | |
| 182 } | |
| 183 | |
| 184 // The pointed-to Pair has changed its key value, so the location of pair | |
| 185 // is reshuffled to maintain the heap invariant. | |
| 186 // Must be a valid pointer to an element of the heap_! | |
| 187 // Caution! Since GenericHeap is based on vector, reallocs may occur | |
| 188 // whenever the vector is extended and elements may get shuffled by any | |
| 189 // Push or Pop operation. Therefore use this function only if Data in Pair is | |
| 190 // of type DoublePtr, derived (first) from DoublePtr, or has a DoublePtr as | |
| 191 // its first element. Reshuffles the heap to maintain the invariant. | |
| 192 // Time = O(log n). | |
| 193 void Reshuffle(Pair *pair) { | |
| 194 int index = pair - &heap_[0]; | |
| 195 Pair hole_pair = heap_[index]; | |
| 196 index = SiftDown(index, hole_pair); | |
| 197 index = SiftUp(index, hole_pair); | |
| 198 heap_[index] = std::move(hole_pair); | |
| 199 } | |
| 200 | |
| 201 private: | |
| 202 // A hole in the heap exists at hole_index, and we want to fill it with the | |
| 203 // given pair. SiftUp sifts the hole upward to the correct position and | |
| 204 // returns the destination index without actually putting pair there. | |
| 205 int SiftUp(int hole_index, const Pair &pair) { | |
| 206 int parent; | |
| 207 while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) { | |
| 208 heap_[hole_index] = heap_[parent]; | |
| 209 hole_index = parent; | |
| 210 } | |
| 211 return hole_index; | |
| 212 } | |
| 213 | |
| 214 // A hole in the heap exists at hole_index, and we want to fill it with the | |
| 215 // given pair. SiftDown sifts the hole downward to the correct position and | |
| 216 // returns the destination index without actually putting pair there. | |
| 217 int SiftDown(int hole_index, const Pair &pair) { | |
| 218 int heap_size = heap_.size(); | |
| 219 int child; | |
| 220 while ((child = LeftChild(hole_index)) < heap_size) { | |
| 221 if (child + 1 < heap_size && heap_[child + 1] < heap_[child]) { | |
| 222 ++child; | |
| 223 } | |
| 224 if (heap_[child] < pair) { | |
| 225 heap_[hole_index] = heap_[child]; | |
| 226 hole_index = child; | |
| 227 } else { | |
| 228 break; | |
| 229 } | |
| 230 } | |
| 231 return hole_index; | |
| 232 } | |
| 233 | |
| 234 // Functions to navigate the tree. Unlike the original implementation, we | |
| 235 // store the root at index 0. | |
| 236 int ParentNode(int index) const { | |
| 237 return (index + 1) / 2 - 1; | |
| 238 } | |
| 239 int LeftChild(int index) const { | |
| 240 return index * 2 + 1; | |
| 241 } | |
| 242 | |
| 243 private: | |
| 244 std::vector<Pair> heap_; | |
| 245 }; | |
| 246 | |
| 247 } // namespace tesseract | |
| 248 | |
| 249 #endif // TESSERACT_CCUTIL_GENERICHEAP_H_ |
