Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccutil/serialis.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: serialis.cpp (Formerly serialmac.h) | |
| 3 * Description: Inline routines and macros for serialisation functions | |
| 4 * Author: Phil Cheatle | |
| 5 * | |
| 6 * (C) Copyright 1990, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #include "serialis.h" | |
| 20 | |
| 21 #include "errcode.h" | |
| 22 | |
| 23 #include "helpers.h" // for ReverseN | |
| 24 | |
| 25 #include <climits> // for INT_MAX | |
| 26 #include <cstdio> | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 // The default FileReader loads the whole file into the vector of char, | |
| 31 // returning false on error. | |
| 32 bool LoadDataFromFile(const char *filename, std::vector<char> *data) { | |
| 33 bool result = false; | |
| 34 FILE *fp = fopen(filename, "rb"); | |
| 35 if (fp != nullptr) { | |
| 36 fseek(fp, 0, SEEK_END); | |
| 37 auto size = std::ftell(fp); | |
| 38 fseek(fp, 0, SEEK_SET); | |
| 39 // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here. | |
| 40 if (size > 0 && size < LONG_MAX) { | |
| 41 // reserve an extra byte in case caller wants to append a '\0' character | |
| 42 data->reserve(size + 1); | |
| 43 data->resize(size); // TODO: optimize no init | |
| 44 result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size; | |
| 45 } | |
| 46 fclose(fp); | |
| 47 } | |
| 48 return result; | |
| 49 } | |
| 50 | |
| 51 // The default FileWriter writes the vector of char to the filename file, | |
| 52 // returning false on error. | |
| 53 bool SaveDataToFile(const std::vector<char> &data, const char *filename) { | |
| 54 FILE *fp = fopen(filename, "wb"); | |
| 55 if (fp == nullptr) { | |
| 56 return false; | |
| 57 } | |
| 58 bool result = fwrite(&data[0], 1, data.size(), fp) == data.size(); | |
| 59 fclose(fp); | |
| 60 return result; | |
| 61 } | |
| 62 | |
| 63 TFile::TFile() { | |
| 64 } | |
| 65 | |
| 66 TFile::~TFile() { | |
| 67 if (data_is_owned_) { | |
| 68 delete data_; | |
| 69 } | |
| 70 } | |
| 71 | |
| 72 bool TFile::DeSerializeSize(int32_t *pSize) { | |
| 73 uint32_t size; | |
| 74 if (FReadEndian(&size, sizeof(size), 1) != 1) { | |
| 75 return false; | |
| 76 } | |
| 77 if (size > data_->size() / 4) { | |
| 78 // Reverse endianness. | |
| 79 swap_ = !swap_; | |
| 80 ReverseN(&size, 4); | |
| 81 } | |
| 82 *pSize = size; | |
| 83 return true; | |
| 84 } | |
| 85 | |
| 86 bool TFile::DeSerializeSkip(size_t size) { | |
| 87 uint32_t len; | |
| 88 if (!DeSerialize(&len)) { | |
| 89 return false; | |
| 90 } | |
| 91 return Skip(len * size); | |
| 92 } | |
| 93 | |
| 94 bool TFile::DeSerialize(std::string &data) { | |
| 95 uint32_t size; | |
| 96 if (!DeSerialize(&size)) { | |
| 97 return false; | |
| 98 } else if (size > 0) { | |
| 99 // TODO: optimize. | |
| 100 data.resize(size); | |
| 101 return DeSerialize(&data[0], size); | |
| 102 } | |
| 103 data.clear(); | |
| 104 return true; | |
| 105 } | |
| 106 | |
| 107 bool TFile::Serialize(const std::string &data) { | |
| 108 uint32_t size = data.size(); | |
| 109 return Serialize(&size) && Serialize(data.c_str(), size); | |
| 110 } | |
| 111 | |
| 112 bool TFile::DeSerialize(std::vector<char> &data) { | |
| 113 uint32_t size; | |
| 114 if (!DeSerialize(&size)) { | |
| 115 return false; | |
| 116 } else if (size > 0) { | |
| 117 // TODO: optimize. | |
| 118 data.resize(size); | |
| 119 return DeSerialize(&data[0], data.size()); | |
| 120 } | |
| 121 data.clear(); | |
| 122 return true; | |
| 123 } | |
| 124 | |
| 125 bool TFile::Serialize(const std::vector<char> &data) { | |
| 126 uint32_t size = data.size(); | |
| 127 if (!Serialize(&size)) { | |
| 128 return false; | |
| 129 } else if (size > 0) { | |
| 130 return Serialize(&data[0], size); | |
| 131 } | |
| 132 return true; | |
| 133 } | |
| 134 | |
| 135 bool TFile::Skip(size_t count) { | |
| 136 offset_ += count; | |
| 137 return true; | |
| 138 } | |
| 139 | |
| 140 bool TFile::Open(const char *filename, FileReader reader) { | |
| 141 if (!data_is_owned_) { | |
| 142 data_ = new std::vector<char>; | |
| 143 data_is_owned_ = true; | |
| 144 } | |
| 145 offset_ = 0; | |
| 146 is_writing_ = false; | |
| 147 swap_ = false; | |
| 148 if (reader == nullptr) { | |
| 149 return LoadDataFromFile(filename, data_); | |
| 150 } else { | |
| 151 return (*reader)(filename, data_); | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 bool TFile::Open(const char *data, size_t size) { | |
| 156 offset_ = 0; | |
| 157 if (!data_is_owned_) { | |
| 158 data_ = new std::vector<char>; | |
| 159 data_is_owned_ = true; | |
| 160 } | |
| 161 is_writing_ = false; | |
| 162 swap_ = false; | |
| 163 data_->resize(size); // TODO: optimize no init | |
| 164 memcpy(&(*data_)[0], data, size); | |
| 165 return true; | |
| 166 } | |
| 167 | |
| 168 bool TFile::Open(FILE *fp, int64_t end_offset) { | |
| 169 offset_ = 0; | |
| 170 auto current_pos = std::ftell(fp); | |
| 171 if (current_pos < 0) { | |
| 172 // ftell failed. | |
| 173 return false; | |
| 174 } | |
| 175 if (end_offset < 0) { | |
| 176 if (fseek(fp, 0, SEEK_END)) { | |
| 177 return false; | |
| 178 } | |
| 179 end_offset = ftell(fp); | |
| 180 if (fseek(fp, current_pos, SEEK_SET)) { | |
| 181 return false; | |
| 182 } | |
| 183 } | |
| 184 size_t size = end_offset - current_pos; | |
| 185 is_writing_ = false; | |
| 186 swap_ = false; | |
| 187 if (!data_is_owned_) { | |
| 188 data_ = new std::vector<char>; | |
| 189 data_is_owned_ = true; | |
| 190 } | |
| 191 data_->resize(size); // TODO: optimize no init | |
| 192 return fread(&(*data_)[0], 1, size, fp) == size; | |
| 193 } | |
| 194 | |
| 195 char *TFile::FGets(char *buffer, int buffer_size) { | |
| 196 ASSERT_HOST(!is_writing_); | |
| 197 int size = 0; | |
| 198 while (size + 1 < buffer_size && offset_ < data_->size()) { | |
| 199 buffer[size++] = (*data_)[offset_++]; | |
| 200 if ((*data_)[offset_ - 1] == '\n') { | |
| 201 break; | |
| 202 } | |
| 203 } | |
| 204 if (size < buffer_size) { | |
| 205 buffer[size] = '\0'; | |
| 206 } | |
| 207 return size > 0 ? buffer : nullptr; | |
| 208 } | |
| 209 | |
| 210 size_t TFile::FReadEndian(void *buffer, size_t size, size_t count) { | |
| 211 auto num_read = FRead(buffer, size, count); | |
| 212 if (swap_ && size != 1) { | |
| 213 char *char_buffer = static_cast<char *>(buffer); | |
| 214 for (size_t i = 0; i < num_read; ++i, char_buffer += size) { | |
| 215 ReverseN(char_buffer, size); | |
| 216 } | |
| 217 } | |
| 218 return num_read; | |
| 219 } | |
| 220 | |
| 221 size_t TFile::FRead(void *buffer, size_t size, size_t count) { | |
| 222 ASSERT_HOST(!is_writing_); | |
| 223 ASSERT_HOST(size > 0); | |
| 224 size_t required_size; | |
| 225 if (SIZE_MAX / size <= count) { | |
| 226 // Avoid integer overflow. | |
| 227 required_size = data_->size() - offset_; | |
| 228 } else { | |
| 229 required_size = size * count; | |
| 230 if (data_->size() - offset_ < required_size) { | |
| 231 required_size = data_->size() - offset_; | |
| 232 } | |
| 233 } | |
| 234 if (required_size > 0 && buffer != nullptr) { | |
| 235 memcpy(buffer, &(*data_)[offset_], required_size); | |
| 236 } | |
| 237 offset_ += required_size; | |
| 238 return required_size / size; | |
| 239 } | |
| 240 | |
| 241 void TFile::Rewind() { | |
| 242 ASSERT_HOST(!is_writing_); | |
| 243 offset_ = 0; | |
| 244 } | |
| 245 | |
| 246 void TFile::OpenWrite(std::vector<char> *data) { | |
| 247 offset_ = 0; | |
| 248 if (data != nullptr) { | |
| 249 if (data_is_owned_) { | |
| 250 delete data_; | |
| 251 } | |
| 252 data_ = data; | |
| 253 data_is_owned_ = false; | |
| 254 } else if (!data_is_owned_) { | |
| 255 data_ = new std::vector<char>; | |
| 256 data_is_owned_ = true; | |
| 257 } | |
| 258 is_writing_ = true; | |
| 259 swap_ = false; | |
| 260 data_->clear(); | |
| 261 } | |
| 262 | |
| 263 bool TFile::CloseWrite(const char *filename, FileWriter writer) { | |
| 264 ASSERT_HOST(is_writing_); | |
| 265 if (writer == nullptr) { | |
| 266 return SaveDataToFile(*data_, filename); | |
| 267 } else { | |
| 268 return (*writer)(*data_, filename); | |
| 269 } | |
| 270 } | |
| 271 | |
| 272 size_t TFile::FWrite(const void *buffer, size_t size, size_t count) { | |
| 273 ASSERT_HOST(is_writing_); | |
| 274 ASSERT_HOST(size > 0); | |
| 275 ASSERT_HOST(SIZE_MAX / size > count); | |
| 276 size_t total = size * count; | |
| 277 const char *buf = static_cast<const char *>(buffer); | |
| 278 // This isn't very efficient, but memory is so fast compared to disk | |
| 279 // that it is relatively unimportant, and very simple. | |
| 280 for (size_t i = 0; i < total; ++i) { | |
| 281 data_->push_back(buf[i]); | |
| 282 } | |
| 283 return count; | |
| 284 } | |
| 285 | |
| 286 } // namespace tesseract. |
