Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccutil/sorthelper.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: sorthelper.h | |
| 3 // Description: Generic sort and maxfinding class. | |
| 4 // Author: Ray Smith | |
| 5 // | |
| 6 // (C) Copyright 2010, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #ifndef TESSERACT_CCUTIL_SORTHELPER_H_ | |
| 20 #define TESSERACT_CCUTIL_SORTHELPER_H_ | |
| 21 | |
| 22 #include <cstdlib> | |
| 23 #include <vector> | |
| 24 | |
| 25 namespace tesseract { | |
| 26 | |
| 27 // Generic class to provide functions based on a <value,count> pair. | |
| 28 // T is the value type. | |
| 29 // The class keeps a count of each value and can return the most frequent | |
| 30 // value or a sorted array of the values with counts. | |
| 31 // Note that this class uses linear search for adding. It is better | |
| 32 // to use the STATS class to get the mode of a large number of values | |
| 33 // in a small space. SortHelper is better to get the mode of a small number | |
| 34 // of values from a large space. | |
| 35 // T must have a copy constructor. | |
| 36 template <typename T> | |
| 37 class SortHelper { | |
| 38 public: | |
| 39 // Simple pair class to hold the values and counts. | |
| 40 template <typename PairT> | |
| 41 struct SortPair { | |
| 42 PairT value; | |
| 43 int count; | |
| 44 }; | |
| 45 // qsort function to sort by decreasing count. | |
| 46 static int SortPairsByCount(const void *v1, const void *v2) { | |
| 47 const auto *p1 = static_cast<const SortPair<T> *>(v1); | |
| 48 const auto *p2 = static_cast<const SortPair<T> *>(v2); | |
| 49 return p2->count - p1->count; | |
| 50 } | |
| 51 // qsort function to sort by decreasing value. | |
| 52 static int SortPairsByValue(const void *v1, const void *v2) { | |
| 53 const auto *p1 = static_cast<const SortPair<T> *>(v1); | |
| 54 const auto *p2 = static_cast<const SortPair<T> *>(v2); | |
| 55 if (p2->value - p1->value < 0) { | |
| 56 return -1; | |
| 57 } | |
| 58 if (p2->value - p1->value > 0) { | |
| 59 return 1; | |
| 60 } | |
| 61 return 0; | |
| 62 } | |
| 63 | |
| 64 // Constructor takes a hint of the array size, but it need not be accurate. | |
| 65 explicit SortHelper(int sizehint) { | |
| 66 counts_.reserve(sizehint); | |
| 67 } | |
| 68 | |
| 69 // Add a value that may be a duplicate of an existing value. | |
| 70 // Uses a linear search. | |
| 71 void Add(T value, int count) { | |
| 72 // Linear search for value. | |
| 73 for (auto &it : counts_) { | |
| 74 if (it.value == value) { | |
| 75 it.count += count; | |
| 76 return; | |
| 77 } | |
| 78 } | |
| 79 SortPair<T> new_pair = {value, count}; | |
| 80 counts_.push_back(SortPair<T>(new_pair)); | |
| 81 } | |
| 82 | |
| 83 // Returns the frequency of the most frequent value. | |
| 84 // If max_value is not nullptr, returns the most frequent value. | |
| 85 // If the array is empty, returns -INT32_MAX and max_value is unchanged. | |
| 86 int MaxCount(T *max_value) const { | |
| 87 int best_count = -INT32_MAX; | |
| 88 for (auto &it : counts_) { | |
| 89 if (it.count > best_count) { | |
| 90 best_count = it.count; | |
| 91 if (max_value != nullptr) { | |
| 92 *max_value = it.value; | |
| 93 } | |
| 94 } | |
| 95 } | |
| 96 return best_count; | |
| 97 } | |
| 98 | |
| 99 // Returns the data array sorted by decreasing frequency. | |
| 100 const std::vector<SortPair<T>> &SortByCount() { | |
| 101 counts_.sort(&SortPairsByCount); | |
| 102 return counts_; | |
| 103 } | |
| 104 // Returns the data array sorted by decreasing value. | |
| 105 const std::vector<SortPair<T>> &SortByValue() { | |
| 106 counts_.sort(&SortPairsByValue); | |
| 107 return counts_; | |
| 108 } | |
| 109 | |
| 110 private: | |
| 111 std::vector<SortPair<T>> counts_; | |
| 112 }; | |
| 113 | |
| 114 } // namespace tesseract | |
| 115 | |
| 116 #endif // TESSERACT_CCUTIL_SORTHELPER_H_. |
