Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccutil/serialis.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: serialis.h (Formerly serialmac.h) | |
| 3 * Description: Inline routines and macros for serialisation functions | |
| 4 * Author: Phil Cheatle | |
| 5 * | |
| 6 * (C) Copyright 1990, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #ifndef SERIALIS_H | |
| 20 #define SERIALIS_H | |
| 21 | |
| 22 #include <tesseract/baseapi.h> // FileReader | |
| 23 #include <cstdint> // uint8_t | |
| 24 #include <cstdio> | |
| 25 #include <cstdlib> | |
| 26 #include <cstring> | |
| 27 #include <type_traits> | |
| 28 #include <vector> // std::vector | |
| 29 | |
| 30 namespace tesseract { | |
| 31 | |
| 32 // Return number of elements of an array. | |
| 33 template <typename T, size_t N> | |
| 34 constexpr size_t countof(T const (&)[N]) noexcept { | |
| 35 return N; | |
| 36 } | |
| 37 | |
| 38 // Function to write a std::vector<char> to a whole file. | |
| 39 // Returns false on failure. | |
| 40 using FileWriter = bool (*)(const std::vector<char> &data, const char *filename); | |
| 41 | |
| 42 TESS_API | |
| 43 bool LoadDataFromFile(const char *filename, std::vector<char> *data); | |
| 44 TESS_API | |
| 45 bool SaveDataToFile(const std::vector<char> &data, const char *filename); | |
| 46 | |
| 47 // Deserialize data from file. | |
| 48 template <typename T> | |
| 49 bool DeSerialize(FILE *fp, T *data, size_t n = 1) { | |
| 50 return fread(data, sizeof(T), n, fp) == n; | |
| 51 } | |
| 52 | |
| 53 // Serialize data to file. | |
| 54 template <typename T> | |
| 55 bool Serialize(FILE *fp, const T *data, size_t n = 1) { | |
| 56 return fwrite(data, sizeof(T), n, fp) == n; | |
| 57 } | |
| 58 | |
| 59 // Simple file class. | |
| 60 // Allows for portable file input from memory and from foreign file systems. | |
| 61 class TESS_API TFile { | |
| 62 public: | |
| 63 TFile(); | |
| 64 ~TFile(); | |
| 65 | |
| 66 // All the Open methods load the whole file into memory for reading. | |
| 67 // Opens a file with a supplied reader, or nullptr to use the default. | |
| 68 // Note that mixed read/write is not supported. | |
| 69 bool Open(const char *filename, FileReader reader); | |
| 70 // From an existing memory buffer. | |
| 71 bool Open(const char *data, size_t size); | |
| 72 // From an open file and an end offset. | |
| 73 bool Open(FILE *fp, int64_t end_offset); | |
| 74 // Sets the value of the swap flag, so that FReadEndian does the right thing. | |
| 75 void set_swap(bool value) { | |
| 76 swap_ = value; | |
| 77 } | |
| 78 | |
| 79 // Deserialize data. | |
| 80 bool DeSerializeSize(int32_t *data); | |
| 81 bool DeSerializeSkip(size_t size = 1); | |
| 82 bool DeSerialize(std::string &data); | |
| 83 bool DeSerialize(std::vector<char> &data); | |
| 84 //bool DeSerialize(std::vector<std::string> &data); | |
| 85 template <typename T> | |
| 86 bool DeSerialize(T *data, size_t count = 1) { | |
| 87 return FReadEndian(data, sizeof(T), count) == count; | |
| 88 } | |
| 89 template <typename T> | |
| 90 bool DeSerialize(std::vector<T> &data) { | |
| 91 uint32_t size; | |
| 92 if (!DeSerialize(&size)) { | |
| 93 return false; | |
| 94 } else if (size == 0) { | |
| 95 data.clear(); | |
| 96 } else if (size > 50000000) { | |
| 97 // Arbitrarily limit the number of elements to protect against bad data. | |
| 98 return false; | |
| 99 } else if constexpr (std::is_same<T, std::string>::value) { | |
| 100 // Deserialize a string. | |
| 101 // TODO: optimize. | |
| 102 data.resize(size); | |
| 103 for (auto &item : data) { | |
| 104 if (!DeSerialize(item)) { | |
| 105 return false; | |
| 106 } | |
| 107 } | |
| 108 } else if constexpr (std::is_class<T>::value) { | |
| 109 // Deserialize a tesseract class. | |
| 110 // TODO: optimize. | |
| 111 data.resize(size); | |
| 112 for (auto &item : data) { | |
| 113 if (!item.DeSerialize(this)) { | |
| 114 return false; | |
| 115 } | |
| 116 } | |
| 117 } else if constexpr (std::is_pointer<T>::value) { | |
| 118 // Deserialize pointers. | |
| 119 // TODO: optimize. | |
| 120 data.resize(size); | |
| 121 for (uint32_t i = 0; i < size; i++) { | |
| 122 uint8_t non_null; | |
| 123 if (!DeSerialize(&non_null)) { | |
| 124 return false; | |
| 125 } | |
| 126 if (non_null) { | |
| 127 typedef typename std::remove_pointer<T>::type ST; | |
| 128 auto item = new ST; | |
| 129 if (!item->DeSerialize(this)) { | |
| 130 delete item; | |
| 131 return false; | |
| 132 } | |
| 133 data[i] = item; | |
| 134 } | |
| 135 } | |
| 136 } else { | |
| 137 // Deserialize a non-class. | |
| 138 // TODO: optimize. | |
| 139 data.resize(size); | |
| 140 return DeSerialize(&data[0], size); | |
| 141 } | |
| 142 return true; | |
| 143 } | |
| 144 | |
| 145 // Serialize data. | |
| 146 bool Serialize(const std::string &data); | |
| 147 bool Serialize(const std::vector<char> &data); | |
| 148 template <typename T> | |
| 149 bool Serialize(const T *data, size_t count = 1) { | |
| 150 return FWrite(data, sizeof(T), count) == count; | |
| 151 } | |
| 152 template <typename T> | |
| 153 bool Serialize(const std::vector<T> &data) { | |
| 154 // Serialize number of elements first. | |
| 155 uint32_t size = data.size(); | |
| 156 if (!Serialize(&size)) { | |
| 157 return false; | |
| 158 } else if constexpr (std::is_same<T, std::string>::value) { | |
| 159 // Serialize strings. | |
| 160 for (auto &&string : data) { | |
| 161 if (!Serialize(string)) { | |
| 162 return false; | |
| 163 } | |
| 164 } | |
| 165 } else if constexpr (std::is_class<T>::value) { | |
| 166 // Serialize a tesseract class. | |
| 167 for (auto &item : data) { | |
| 168 if (!item.Serialize(this)) { | |
| 169 return false; | |
| 170 } | |
| 171 } | |
| 172 } else if constexpr (std::is_pointer<T>::value) { | |
| 173 // Serialize pointers. | |
| 174 for (auto &item : data) { | |
| 175 uint8_t non_null = (item != nullptr); | |
| 176 if (!Serialize(&non_null)) { | |
| 177 return false; | |
| 178 } | |
| 179 if (non_null) { | |
| 180 if (!item->Serialize(this)) { | |
| 181 return false; | |
| 182 } | |
| 183 } | |
| 184 } | |
| 185 } else if (size > 0) { | |
| 186 // Serialize a non-class. | |
| 187 return Serialize(&data[0], size); | |
| 188 } | |
| 189 return true; | |
| 190 } | |
| 191 | |
| 192 // Skip data. | |
| 193 bool Skip(size_t count); | |
| 194 | |
| 195 // Reads a line like fgets. Returns nullptr on EOF, otherwise buffer. | |
| 196 // Reads at most buffer_size bytes, including '\0' terminator, even if | |
| 197 // the line is longer. Does nothing if buffer_size <= 0. | |
| 198 char *FGets(char *buffer, int buffer_size); | |
| 199 // Replicates fread, followed by a swap of the bytes if needed, returning the | |
| 200 // number of items read. If swap_ is true then the count items will each have | |
| 201 // size bytes reversed. | |
| 202 size_t FReadEndian(void *buffer, size_t size, size_t count); | |
| 203 // Replicates fread, returning the number of items read. | |
| 204 size_t FRead(void *buffer, size_t size, size_t count); | |
| 205 // Resets the TFile as if it has been Opened, but nothing read. | |
| 206 // Only allowed while reading! | |
| 207 void Rewind(); | |
| 208 | |
| 209 // Open for writing. Either supply a non-nullptr data with OpenWrite before | |
| 210 // calling FWrite, (no close required), or supply a nullptr data to OpenWrite | |
| 211 // and call CloseWrite to write to a file after the FWrites. | |
| 212 void OpenWrite(std::vector<char> *data); | |
| 213 bool CloseWrite(const char *filename, FileWriter writer); | |
| 214 | |
| 215 // Replicates fwrite, returning the number of items written. | |
| 216 // To use fprintf, use snprintf and FWrite. | |
| 217 size_t FWrite(const void *buffer, size_t size, size_t count); | |
| 218 | |
| 219 private: | |
| 220 // The buffered data from the file. | |
| 221 std::vector<char> *data_ = nullptr; | |
| 222 // The number of bytes used so far. | |
| 223 unsigned offset_ = 0; | |
| 224 // True if the data_ pointer is owned by *this. | |
| 225 bool data_is_owned_ = false; | |
| 226 // True if the TFile is open for writing. | |
| 227 bool is_writing_ = false; | |
| 228 // True if bytes need to be swapped in FReadEndian. | |
| 229 bool swap_ = false; | |
| 230 }; | |
| 231 | |
| 232 } // namespace tesseract. | |
| 233 | |
| 234 #endif |
