Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccutil/helpers.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 * | |
| 3 * File: helpers.h | |
| 4 * Description: General utility functions | |
| 5 * Author: Daria Antonova | |
| 6 * | |
| 7 * (c) Copyright 2009, Google Inc. | |
| 8 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 ** you may not use this file except in compliance with the License. | |
| 10 ** You may obtain a copy of the License at | |
| 11 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 ** Unless required by applicable law or agreed to in writing, software | |
| 13 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 ** See the License for the specific language governing permissions and | |
| 16 ** limitations under the License. | |
| 17 * | |
| 18 *****************************************************************************/ | |
| 19 | |
| 20 #ifndef TESSERACT_CCUTIL_HELPERS_H_ | |
| 21 #define TESSERACT_CCUTIL_HELPERS_H_ | |
| 22 | |
| 23 #include <cassert> | |
| 24 #include <climits> // for INT_MIN, INT_MAX | |
| 25 #include <cmath> // std::isfinite | |
| 26 #include <cstdio> | |
| 27 #include <cstring> | |
| 28 #include <algorithm> // for std::find | |
| 29 #include <functional> | |
| 30 #include <random> | |
| 31 #include <string> | |
| 32 #include <vector> | |
| 33 | |
| 34 #include "serialis.h" | |
| 35 | |
| 36 namespace tesseract { | |
| 37 | |
| 38 // Copy a std::string to a newly allocated char *. | |
| 39 // TODO: Remove this function once the related code has been converted | |
| 40 // to use std::string. | |
| 41 inline char *copy_string(const std::string &from) { | |
| 42 auto length = from.length(); | |
| 43 char *target_string = new char[length + 1]; | |
| 44 from.copy(target_string, length); | |
| 45 target_string[length] = '\0'; | |
| 46 return target_string; | |
| 47 } | |
| 48 | |
| 49 template <class T> | |
| 50 inline bool contains(const std::vector<T> &data, const T &value) { | |
| 51 return std::find(data.begin(), data.end(), value) != data.end(); | |
| 52 } | |
| 53 | |
| 54 inline const std::vector<std::string> split(const std::string &s, char c) { | |
| 55 std::string buff; | |
| 56 std::vector<std::string> v; | |
| 57 for (auto n : s) { | |
| 58 if (n != c) { | |
| 59 buff += n; | |
| 60 } else if (n == c && !buff.empty()) { | |
| 61 v.push_back(buff); | |
| 62 buff.clear(); | |
| 63 } | |
| 64 } | |
| 65 if (!buff.empty()) { | |
| 66 v.push_back(buff); | |
| 67 } | |
| 68 return v; | |
| 69 } | |
| 70 | |
| 71 // A simple linear congruential random number generator. | |
| 72 class TRand { | |
| 73 public: | |
| 74 // Sets the seed to the given value. | |
| 75 void set_seed(uint64_t seed) { | |
| 76 e.seed(seed); | |
| 77 } | |
| 78 // Sets the seed using a hash of a string. | |
| 79 void set_seed(const std::string &str) { | |
| 80 std::hash<std::string> hasher; | |
| 81 set_seed(static_cast<uint64_t>(hasher(str))); | |
| 82 } | |
| 83 | |
| 84 // Returns an integer in the range 0 to INT32_MAX. | |
| 85 int32_t IntRand() { | |
| 86 return e(); | |
| 87 } | |
| 88 // Returns a floating point value in the range [-range, range]. | |
| 89 double SignedRand(double range) { | |
| 90 return range * 2.0 * IntRand() / INT32_MAX - range; | |
| 91 } | |
| 92 // Returns a floating point value in the range [0, range]. | |
| 93 double UnsignedRand(double range) { | |
| 94 return range * IntRand() / INT32_MAX; | |
| 95 } | |
| 96 | |
| 97 private: | |
| 98 std::minstd_rand e; | |
| 99 }; | |
| 100 | |
| 101 // Remove newline (if any) at the end of the string. | |
| 102 inline void chomp_string(char *str) { | |
| 103 int last_index = static_cast<int>(strlen(str)) - 1; | |
| 104 while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) { | |
| 105 str[last_index--] = '\0'; | |
| 106 } | |
| 107 } | |
| 108 | |
| 109 // return the smallest multiple of block_size greater than or equal to n. | |
| 110 inline int RoundUp(int n, int block_size) { | |
| 111 return block_size * ((n + block_size - 1) / block_size); | |
| 112 } | |
| 113 | |
| 114 // Clip a numeric value to the interval [lower_bound, upper_bound]. | |
| 115 template <typename T> | |
| 116 inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) { | |
| 117 if (x < lower_bound) { | |
| 118 return lower_bound; | |
| 119 } | |
| 120 if (x > upper_bound) { | |
| 121 return upper_bound; | |
| 122 } | |
| 123 return x; | |
| 124 } | |
| 125 | |
| 126 // Extend the range [lower_bound, upper_bound] to include x. | |
| 127 template <typename T1, typename T2> | |
| 128 inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) { | |
| 129 if (x < *lower_bound) { | |
| 130 *lower_bound = x; | |
| 131 } | |
| 132 if (x > *upper_bound) { | |
| 133 *upper_bound = x; | |
| 134 } | |
| 135 } | |
| 136 | |
| 137 // Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi. | |
| 138 template <typename T1, typename T2> | |
| 139 inline void UpdateRange(const T1 &x_lo, const T1 &x_hi, T2 *lower_bound, T2 *upper_bound) { | |
| 140 if (x_lo < *lower_bound) { | |
| 141 *lower_bound = x_lo; | |
| 142 } | |
| 143 if (x_hi > *upper_bound) { | |
| 144 *upper_bound = x_hi; | |
| 145 } | |
| 146 } | |
| 147 | |
| 148 // Intersect the range [*lower2, *upper2] with the range [lower1, upper1], | |
| 149 // putting the result back in [*lower2, *upper2]. | |
| 150 // If non-intersecting ranges are given, we end up with *lower2 > *upper2. | |
| 151 template <typename T> | |
| 152 inline void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2) { | |
| 153 if (lower1 > *lower2) { | |
| 154 *lower2 = lower1; | |
| 155 } | |
| 156 if (upper1 < *upper2) { | |
| 157 *upper2 = upper1; | |
| 158 } | |
| 159 } | |
| 160 | |
| 161 // Proper modulo arithmetic operator. Returns a mod b that works for -ve a. | |
| 162 // For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for | |
| 163 // some integer n. | |
| 164 inline int Modulo(int a, int b) { | |
| 165 return (a % b + b) % b; | |
| 166 } | |
| 167 | |
| 168 // Integer division operator with rounding that works for negative input. | |
| 169 // Returns a divided by b, rounded to the nearest integer, without double | |
| 170 // counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0, | |
| 171 // -3/3 = 0 and -4/3 = -1. | |
| 172 // I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1. | |
| 173 inline int DivRounded(int a, int b) { | |
| 174 if (b < 0) { | |
| 175 return -DivRounded(a, -b); | |
| 176 } | |
| 177 return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b; | |
| 178 } | |
| 179 | |
| 180 // Return a double cast to int with rounding. | |
| 181 inline int IntCastRounded(double x) { | |
| 182 assert(std::isfinite(x)); | |
| 183 assert(x < INT_MAX); | |
| 184 assert(x > INT_MIN); | |
| 185 return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5); | |
| 186 } | |
| 187 | |
| 188 // Return a float cast to int with rounding. | |
| 189 inline int IntCastRounded(float x) { | |
| 190 assert(std::isfinite(x)); | |
| 191 return x >= 0.0F ? static_cast<int>(x + 0.5F) : -static_cast<int>(-x + 0.5F); | |
| 192 } | |
| 193 | |
| 194 // Reverse the order of bytes in a n byte quantity for big/little-endian switch. | |
| 195 inline void ReverseN(void *ptr, int num_bytes) { | |
| 196 assert(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8); | |
| 197 char *cptr = static_cast<char *>(ptr); | |
| 198 int halfsize = num_bytes / 2; | |
| 199 for (int i = 0; i < halfsize; ++i) { | |
| 200 char tmp = cptr[i]; | |
| 201 cptr[i] = cptr[num_bytes - 1 - i]; | |
| 202 cptr[num_bytes - 1 - i] = tmp; | |
| 203 } | |
| 204 } | |
| 205 | |
| 206 // Reverse the order of bytes in a 32 bit quantity for big/little-endian switch. | |
| 207 inline void Reverse32(void *ptr) { | |
| 208 ReverseN(ptr, 4); | |
| 209 } | |
| 210 | |
| 211 // Reads a vector of simple types from the given file. Assumes that bitwise | |
| 212 // read/write will work with ReverseN according to sizeof(T). | |
| 213 // Returns false in case of error. | |
| 214 // If swap is true, assumes a big/little-endian swap is needed. | |
| 215 template <typename T> | |
| 216 bool DeSerialize(bool swap, FILE *fp, std::vector<T> &data) { | |
| 217 uint32_t size; | |
| 218 if (fread(&size, sizeof(size), 1, fp) != 1) { | |
| 219 return false; | |
| 220 } | |
| 221 if (swap) { | |
| 222 Reverse32(&size); | |
| 223 } | |
| 224 // Arbitrarily limit the number of elements to protect against bad data. | |
| 225 assert(size <= UINT16_MAX); | |
| 226 if (size > UINT16_MAX) { | |
| 227 return false; | |
| 228 } | |
| 229 // TODO: optimize. | |
| 230 data.resize(size); | |
| 231 if (size > 0) { | |
| 232 if (fread(&data[0], sizeof(T), size, fp) != size) { | |
| 233 return false; | |
| 234 } | |
| 235 if (swap) { | |
| 236 for (uint32_t i = 0; i < size; ++i) { | |
| 237 ReverseN(&data[i], sizeof(T)); | |
| 238 } | |
| 239 } | |
| 240 } | |
| 241 return true; | |
| 242 } | |
| 243 | |
| 244 // Writes a vector of simple types to the given file. Assumes that bitwise | |
| 245 // read/write of T will work. Returns false in case of error. | |
| 246 template <typename T> | |
| 247 bool Serialize(FILE *fp, const std::vector<T> &data) { | |
| 248 uint32_t size = data.size(); | |
| 249 if (fwrite(&size, sizeof(size), 1, fp) != 1) { | |
| 250 return false; | |
| 251 } else if constexpr (std::is_class<T>::value) { | |
| 252 // Serialize a tesseract class. | |
| 253 for (auto &item : data) { | |
| 254 if (!item.Serialize(fp)) { | |
| 255 return false; | |
| 256 } | |
| 257 } | |
| 258 } else if constexpr (std::is_pointer<T>::value) { | |
| 259 // Serialize pointers. | |
| 260 for (auto &item : data) { | |
| 261 uint8_t non_null = (item != nullptr); | |
| 262 if (!Serialize(fp, &non_null)) { | |
| 263 return false; | |
| 264 } | |
| 265 if (non_null) { | |
| 266 if (!item->Serialize(fp)) { | |
| 267 return false; | |
| 268 } | |
| 269 } | |
| 270 } | |
| 271 } else if (size > 0) { | |
| 272 if (fwrite(&data[0], sizeof(T), size, fp) != size) { | |
| 273 return false; | |
| 274 } | |
| 275 } | |
| 276 return true; | |
| 277 } | |
| 278 | |
| 279 } // namespace tesseract | |
| 280 | |
| 281 #endif // TESSERACT_CCUTIL_HELPERS_H_ |
