Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccutil/helpers.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccutil/helpers.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,281 @@ +/****************************************************************************** + * + * File: helpers.h + * Description: General utility functions + * Author: Daria Antonova + * + * (c) Copyright 2009, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + *****************************************************************************/ + +#ifndef TESSERACT_CCUTIL_HELPERS_H_ +#define TESSERACT_CCUTIL_HELPERS_H_ + +#include <cassert> +#include <climits> // for INT_MIN, INT_MAX +#include <cmath> // std::isfinite +#include <cstdio> +#include <cstring> +#include <algorithm> // for std::find +#include <functional> +#include <random> +#include <string> +#include <vector> + +#include "serialis.h" + +namespace tesseract { + +// Copy a std::string to a newly allocated char *. +// TODO: Remove this function once the related code has been converted +// to use std::string. +inline char *copy_string(const std::string &from) { + auto length = from.length(); + char *target_string = new char[length + 1]; + from.copy(target_string, length); + target_string[length] = '\0'; + return target_string; +} + +template <class T> +inline bool contains(const std::vector<T> &data, const T &value) { + return std::find(data.begin(), data.end(), value) != data.end(); +} + +inline const std::vector<std::string> split(const std::string &s, char c) { + std::string buff; + std::vector<std::string> v; + for (auto n : s) { + if (n != c) { + buff += n; + } else if (n == c && !buff.empty()) { + v.push_back(buff); + buff.clear(); + } + } + if (!buff.empty()) { + v.push_back(buff); + } + return v; +} + +// A simple linear congruential random number generator. +class TRand { +public: + // Sets the seed to the given value. + void set_seed(uint64_t seed) { + e.seed(seed); + } + // Sets the seed using a hash of a string. + void set_seed(const std::string &str) { + std::hash<std::string> hasher; + set_seed(static_cast<uint64_t>(hasher(str))); + } + + // Returns an integer in the range 0 to INT32_MAX. + int32_t IntRand() { + return e(); + } + // Returns a floating point value in the range [-range, range]. + double SignedRand(double range) { + return range * 2.0 * IntRand() / INT32_MAX - range; + } + // Returns a floating point value in the range [0, range]. + double UnsignedRand(double range) { + return range * IntRand() / INT32_MAX; + } + +private: + std::minstd_rand e; +}; + +// Remove newline (if any) at the end of the string. +inline void chomp_string(char *str) { + int last_index = static_cast<int>(strlen(str)) - 1; + while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) { + str[last_index--] = '\0'; + } +} + +// return the smallest multiple of block_size greater than or equal to n. +inline int RoundUp(int n, int block_size) { + return block_size * ((n + block_size - 1) / block_size); +} + +// Clip a numeric value to the interval [lower_bound, upper_bound]. +template <typename T> +inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) { + if (x < lower_bound) { + return lower_bound; + } + if (x > upper_bound) { + return upper_bound; + } + return x; +} + +// Extend the range [lower_bound, upper_bound] to include x. +template <typename T1, typename T2> +inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) { + if (x < *lower_bound) { + *lower_bound = x; + } + if (x > *upper_bound) { + *upper_bound = x; + } +} + +// Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi. +template <typename T1, typename T2> +inline void UpdateRange(const T1 &x_lo, const T1 &x_hi, T2 *lower_bound, T2 *upper_bound) { + if (x_lo < *lower_bound) { + *lower_bound = x_lo; + } + if (x_hi > *upper_bound) { + *upper_bound = x_hi; + } +} + +// Intersect the range [*lower2, *upper2] with the range [lower1, upper1], +// putting the result back in [*lower2, *upper2]. +// If non-intersecting ranges are given, we end up with *lower2 > *upper2. +template <typename T> +inline void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2) { + if (lower1 > *lower2) { + *lower2 = lower1; + } + if (upper1 < *upper2) { + *upper2 = upper1; + } +} + +// Proper modulo arithmetic operator. Returns a mod b that works for -ve a. +// For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for +// some integer n. +inline int Modulo(int a, int b) { + return (a % b + b) % b; +} + +// Integer division operator with rounding that works for negative input. +// Returns a divided by b, rounded to the nearest integer, without double +// counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0, +// -3/3 = 0 and -4/3 = -1. +// I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1. +inline int DivRounded(int a, int b) { + if (b < 0) { + return -DivRounded(a, -b); + } + return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b; +} + +// Return a double cast to int with rounding. +inline int IntCastRounded(double x) { + assert(std::isfinite(x)); + assert(x < INT_MAX); + assert(x > INT_MIN); + return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5); +} + +// Return a float cast to int with rounding. +inline int IntCastRounded(float x) { + assert(std::isfinite(x)); + return x >= 0.0F ? static_cast<int>(x + 0.5F) : -static_cast<int>(-x + 0.5F); +} + +// Reverse the order of bytes in a n byte quantity for big/little-endian switch. +inline void ReverseN(void *ptr, int num_bytes) { + assert(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8); + char *cptr = static_cast<char *>(ptr); + int halfsize = num_bytes / 2; + for (int i = 0; i < halfsize; ++i) { + char tmp = cptr[i]; + cptr[i] = cptr[num_bytes - 1 - i]; + cptr[num_bytes - 1 - i] = tmp; + } +} + +// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch. +inline void Reverse32(void *ptr) { + ReverseN(ptr, 4); +} + +// Reads a vector of simple types from the given file. Assumes that bitwise +// read/write will work with ReverseN according to sizeof(T). +// Returns false in case of error. +// If swap is true, assumes a big/little-endian swap is needed. +template <typename T> +bool DeSerialize(bool swap, FILE *fp, std::vector<T> &data) { + uint32_t size; + if (fread(&size, sizeof(size), 1, fp) != 1) { + return false; + } + if (swap) { + Reverse32(&size); + } + // Arbitrarily limit the number of elements to protect against bad data. + assert(size <= UINT16_MAX); + if (size > UINT16_MAX) { + return false; + } + // TODO: optimize. + data.resize(size); + if (size > 0) { + if (fread(&data[0], sizeof(T), size, fp) != size) { + return false; + } + if (swap) { + for (uint32_t i = 0; i < size; ++i) { + ReverseN(&data[i], sizeof(T)); + } + } + } + return true; +} + +// Writes a vector of simple types to the given file. Assumes that bitwise +// read/write of T will work. Returns false in case of error. +template <typename T> +bool Serialize(FILE *fp, const std::vector<T> &data) { + uint32_t size = data.size(); + if (fwrite(&size, sizeof(size), 1, fp) != 1) { + return false; + } else if constexpr (std::is_class<T>::value) { + // Serialize a tesseract class. + for (auto &item : data) { + if (!item.Serialize(fp)) { + return false; + } + } + } else if constexpr (std::is_pointer<T>::value) { + // Serialize pointers. + for (auto &item : data) { + uint8_t non_null = (item != nullptr); + if (!Serialize(fp, &non_null)) { + return false; + } + if (non_null) { + if (!item->Serialize(fp)) { + return false; + } + } + } + } else if (size > 0) { + if (fwrite(&data[0], sizeof(T), size, fp) != size) { + return false; + } + } + return true; +} + +} // namespace tesseract + +#endif // TESSERACT_CCUTIL_HELPERS_H_
