comparison mupdf-source/thirdparty/tesseract/src/ccutil/serialis.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /**********************************************************************
2 * File: serialis.h (Formerly serialmac.h)
3 * Description: Inline routines and macros for serialisation functions
4 * Author: Phil Cheatle
5 *
6 * (C) Copyright 1990, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19 #ifndef SERIALIS_H
20 #define SERIALIS_H
21
22 #include <tesseract/baseapi.h> // FileReader
23 #include <cstdint> // uint8_t
24 #include <cstdio>
25 #include <cstdlib>
26 #include <cstring>
27 #include <type_traits>
28 #include <vector> // std::vector
29
30 namespace tesseract {
31
32 // Return number of elements of an array.
33 template <typename T, size_t N>
34 constexpr size_t countof(T const (&)[N]) noexcept {
35 return N;
36 }
37
38 // Function to write a std::vector<char> to a whole file.
39 // Returns false on failure.
40 using FileWriter = bool (*)(const std::vector<char> &data, const char *filename);
41
42 TESS_API
43 bool LoadDataFromFile(const char *filename, std::vector<char> *data);
44 TESS_API
45 bool SaveDataToFile(const std::vector<char> &data, const char *filename);
46
47 // Deserialize data from file.
48 template <typename T>
49 bool DeSerialize(FILE *fp, T *data, size_t n = 1) {
50 return fread(data, sizeof(T), n, fp) == n;
51 }
52
53 // Serialize data to file.
54 template <typename T>
55 bool Serialize(FILE *fp, const T *data, size_t n = 1) {
56 return fwrite(data, sizeof(T), n, fp) == n;
57 }
58
59 // Simple file class.
60 // Allows for portable file input from memory and from foreign file systems.
61 class TESS_API TFile {
62 public:
63 TFile();
64 ~TFile();
65
66 // All the Open methods load the whole file into memory for reading.
67 // Opens a file with a supplied reader, or nullptr to use the default.
68 // Note that mixed read/write is not supported.
69 bool Open(const char *filename, FileReader reader);
70 // From an existing memory buffer.
71 bool Open(const char *data, size_t size);
72 // From an open file and an end offset.
73 bool Open(FILE *fp, int64_t end_offset);
74 // Sets the value of the swap flag, so that FReadEndian does the right thing.
75 void set_swap(bool value) {
76 swap_ = value;
77 }
78
79 // Deserialize data.
80 bool DeSerializeSize(int32_t *data);
81 bool DeSerializeSkip(size_t size = 1);
82 bool DeSerialize(std::string &data);
83 bool DeSerialize(std::vector<char> &data);
84 //bool DeSerialize(std::vector<std::string> &data);
85 template <typename T>
86 bool DeSerialize(T *data, size_t count = 1) {
87 return FReadEndian(data, sizeof(T), count) == count;
88 }
89 template <typename T>
90 bool DeSerialize(std::vector<T> &data) {
91 uint32_t size;
92 if (!DeSerialize(&size)) {
93 return false;
94 } else if (size == 0) {
95 data.clear();
96 } else if (size > 50000000) {
97 // Arbitrarily limit the number of elements to protect against bad data.
98 return false;
99 } else if constexpr (std::is_same<T, std::string>::value) {
100 // Deserialize a string.
101 // TODO: optimize.
102 data.resize(size);
103 for (auto &item : data) {
104 if (!DeSerialize(item)) {
105 return false;
106 }
107 }
108 } else if constexpr (std::is_class<T>::value) {
109 // Deserialize a tesseract class.
110 // TODO: optimize.
111 data.resize(size);
112 for (auto &item : data) {
113 if (!item.DeSerialize(this)) {
114 return false;
115 }
116 }
117 } else if constexpr (std::is_pointer<T>::value) {
118 // Deserialize pointers.
119 // TODO: optimize.
120 data.resize(size);
121 for (uint32_t i = 0; i < size; i++) {
122 uint8_t non_null;
123 if (!DeSerialize(&non_null)) {
124 return false;
125 }
126 if (non_null) {
127 typedef typename std::remove_pointer<T>::type ST;
128 auto item = new ST;
129 if (!item->DeSerialize(this)) {
130 delete item;
131 return false;
132 }
133 data[i] = item;
134 }
135 }
136 } else {
137 // Deserialize a non-class.
138 // TODO: optimize.
139 data.resize(size);
140 return DeSerialize(&data[0], size);
141 }
142 return true;
143 }
144
145 // Serialize data.
146 bool Serialize(const std::string &data);
147 bool Serialize(const std::vector<char> &data);
148 template <typename T>
149 bool Serialize(const T *data, size_t count = 1) {
150 return FWrite(data, sizeof(T), count) == count;
151 }
152 template <typename T>
153 bool Serialize(const std::vector<T> &data) {
154 // Serialize number of elements first.
155 uint32_t size = data.size();
156 if (!Serialize(&size)) {
157 return false;
158 } else if constexpr (std::is_same<T, std::string>::value) {
159 // Serialize strings.
160 for (auto &&string : data) {
161 if (!Serialize(string)) {
162 return false;
163 }
164 }
165 } else if constexpr (std::is_class<T>::value) {
166 // Serialize a tesseract class.
167 for (auto &item : data) {
168 if (!item.Serialize(this)) {
169 return false;
170 }
171 }
172 } else if constexpr (std::is_pointer<T>::value) {
173 // Serialize pointers.
174 for (auto &item : data) {
175 uint8_t non_null = (item != nullptr);
176 if (!Serialize(&non_null)) {
177 return false;
178 }
179 if (non_null) {
180 if (!item->Serialize(this)) {
181 return false;
182 }
183 }
184 }
185 } else if (size > 0) {
186 // Serialize a non-class.
187 return Serialize(&data[0], size);
188 }
189 return true;
190 }
191
192 // Skip data.
193 bool Skip(size_t count);
194
195 // Reads a line like fgets. Returns nullptr on EOF, otherwise buffer.
196 // Reads at most buffer_size bytes, including '\0' terminator, even if
197 // the line is longer. Does nothing if buffer_size <= 0.
198 char *FGets(char *buffer, int buffer_size);
199 // Replicates fread, followed by a swap of the bytes if needed, returning the
200 // number of items read. If swap_ is true then the count items will each have
201 // size bytes reversed.
202 size_t FReadEndian(void *buffer, size_t size, size_t count);
203 // Replicates fread, returning the number of items read.
204 size_t FRead(void *buffer, size_t size, size_t count);
205 // Resets the TFile as if it has been Opened, but nothing read.
206 // Only allowed while reading!
207 void Rewind();
208
209 // Open for writing. Either supply a non-nullptr data with OpenWrite before
210 // calling FWrite, (no close required), or supply a nullptr data to OpenWrite
211 // and call CloseWrite to write to a file after the FWrites.
212 void OpenWrite(std::vector<char> *data);
213 bool CloseWrite(const char *filename, FileWriter writer);
214
215 // Replicates fwrite, returning the number of items written.
216 // To use fprintf, use snprintf and FWrite.
217 size_t FWrite(const void *buffer, size_t size, size_t count);
218
219 private:
220 // The buffered data from the file.
221 std::vector<char> *data_ = nullptr;
222 // The number of bytes used so far.
223 unsigned offset_ = 0;
224 // True if the data_ pointer is owned by *this.
225 bool data_is_owned_ = false;
226 // True if the TFile is open for writing.
227 bool is_writing_ = false;
228 // True if bytes need to be swapped in FReadEndian.
229 bool swap_ = false;
230 };
231
232 } // namespace tesseract.
233
234 #endif