Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccutil/serialis.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccutil/serialis.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,234 @@ +/********************************************************************** + * File: serialis.h (Formerly serialmac.h) + * Description: Inline routines and macros for serialisation functions + * Author: Phil Cheatle + * + * (C) Copyright 1990, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef SERIALIS_H +#define SERIALIS_H + +#include <tesseract/baseapi.h> // FileReader +#include <cstdint> // uint8_t +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <type_traits> +#include <vector> // std::vector + +namespace tesseract { + +// Return number of elements of an array. +template <typename T, size_t N> +constexpr size_t countof(T const (&)[N]) noexcept { + return N; +} + +// Function to write a std::vector<char> to a whole file. +// Returns false on failure. +using FileWriter = bool (*)(const std::vector<char> &data, const char *filename); + +TESS_API +bool LoadDataFromFile(const char *filename, std::vector<char> *data); +TESS_API +bool SaveDataToFile(const std::vector<char> &data, const char *filename); + +// Deserialize data from file. +template <typename T> +bool DeSerialize(FILE *fp, T *data, size_t n = 1) { + return fread(data, sizeof(T), n, fp) == n; +} + +// Serialize data to file. +template <typename T> +bool Serialize(FILE *fp, const T *data, size_t n = 1) { + return fwrite(data, sizeof(T), n, fp) == n; +} + +// Simple file class. +// Allows for portable file input from memory and from foreign file systems. +class TESS_API TFile { +public: + TFile(); + ~TFile(); + + // All the Open methods load the whole file into memory for reading. + // Opens a file with a supplied reader, or nullptr to use the default. + // Note that mixed read/write is not supported. + bool Open(const char *filename, FileReader reader); + // From an existing memory buffer. + bool Open(const char *data, size_t size); + // From an open file and an end offset. + bool Open(FILE *fp, int64_t end_offset); + // Sets the value of the swap flag, so that FReadEndian does the right thing. + void set_swap(bool value) { + swap_ = value; + } + + // Deserialize data. + bool DeSerializeSize(int32_t *data); + bool DeSerializeSkip(size_t size = 1); + bool DeSerialize(std::string &data); + bool DeSerialize(std::vector<char> &data); + //bool DeSerialize(std::vector<std::string> &data); + template <typename T> + bool DeSerialize(T *data, size_t count = 1) { + return FReadEndian(data, sizeof(T), count) == count; + } + template <typename T> + bool DeSerialize(std::vector<T> &data) { + uint32_t size; + if (!DeSerialize(&size)) { + return false; + } else if (size == 0) { + data.clear(); + } else if (size > 50000000) { + // Arbitrarily limit the number of elements to protect against bad data. + return false; + } else if constexpr (std::is_same<T, std::string>::value) { + // Deserialize a string. + // TODO: optimize. + data.resize(size); + for (auto &item : data) { + if (!DeSerialize(item)) { + return false; + } + } + } else if constexpr (std::is_class<T>::value) { + // Deserialize a tesseract class. + // TODO: optimize. + data.resize(size); + for (auto &item : data) { + if (!item.DeSerialize(this)) { + return false; + } + } + } else if constexpr (std::is_pointer<T>::value) { + // Deserialize pointers. + // TODO: optimize. + data.resize(size); + for (uint32_t i = 0; i < size; i++) { + uint8_t non_null; + if (!DeSerialize(&non_null)) { + return false; + } + if (non_null) { + typedef typename std::remove_pointer<T>::type ST; + auto item = new ST; + if (!item->DeSerialize(this)) { + delete item; + return false; + } + data[i] = item; + } + } + } else { + // Deserialize a non-class. + // TODO: optimize. + data.resize(size); + return DeSerialize(&data[0], size); + } + return true; + } + + // Serialize data. + bool Serialize(const std::string &data); + bool Serialize(const std::vector<char> &data); + template <typename T> + bool Serialize(const T *data, size_t count = 1) { + return FWrite(data, sizeof(T), count) == count; + } + template <typename T> + bool Serialize(const std::vector<T> &data) { + // Serialize number of elements first. + uint32_t size = data.size(); + if (!Serialize(&size)) { + return false; + } else if constexpr (std::is_same<T, std::string>::value) { + // Serialize strings. + for (auto &&string : data) { + if (!Serialize(string)) { + return false; + } + } + } else if constexpr (std::is_class<T>::value) { + // Serialize a tesseract class. + for (auto &item : data) { + if (!item.Serialize(this)) { + return false; + } + } + } else if constexpr (std::is_pointer<T>::value) { + // Serialize pointers. + for (auto &item : data) { + uint8_t non_null = (item != nullptr); + if (!Serialize(&non_null)) { + return false; + } + if (non_null) { + if (!item->Serialize(this)) { + return false; + } + } + } + } else if (size > 0) { + // Serialize a non-class. + return Serialize(&data[0], size); + } + return true; + } + + // Skip data. + bool Skip(size_t count); + + // Reads a line like fgets. Returns nullptr on EOF, otherwise buffer. + // Reads at most buffer_size bytes, including '\0' terminator, even if + // the line is longer. Does nothing if buffer_size <= 0. + char *FGets(char *buffer, int buffer_size); + // Replicates fread, followed by a swap of the bytes if needed, returning the + // number of items read. If swap_ is true then the count items will each have + // size bytes reversed. + size_t FReadEndian(void *buffer, size_t size, size_t count); + // Replicates fread, returning the number of items read. + size_t FRead(void *buffer, size_t size, size_t count); + // Resets the TFile as if it has been Opened, but nothing read. + // Only allowed while reading! + void Rewind(); + + // Open for writing. Either supply a non-nullptr data with OpenWrite before + // calling FWrite, (no close required), or supply a nullptr data to OpenWrite + // and call CloseWrite to write to a file after the FWrites. + void OpenWrite(std::vector<char> *data); + bool CloseWrite(const char *filename, FileWriter writer); + + // Replicates fwrite, returning the number of items written. + // To use fprintf, use snprintf and FWrite. + size_t FWrite(const void *buffer, size_t size, size_t count); + +private: + // The buffered data from the file. + std::vector<char> *data_ = nullptr; + // The number of bytes used so far. + unsigned offset_ = 0; + // True if the data_ pointer is owned by *this. + bool data_is_owned_ = false; + // True if the TFile is open for writing. + bool is_writing_ = false; + // True if bytes need to be swapped in FReadEndian. + bool swap_ = false; +}; + +} // namespace tesseract. + +#endif
