Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccutil/serialis.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccutil/serialis.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,286 @@ +/********************************************************************** + * File: serialis.cpp (Formerly serialmac.h) + * Description: Inline routines and macros for serialisation functions + * Author: Phil Cheatle + * + * (C) Copyright 1990, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "serialis.h" + +#include "errcode.h" + +#include "helpers.h" // for ReverseN + +#include <climits> // for INT_MAX +#include <cstdio> + +namespace tesseract { + +// The default FileReader loads the whole file into the vector of char, +// returning false on error. +bool LoadDataFromFile(const char *filename, std::vector<char> *data) { + bool result = false; + FILE *fp = fopen(filename, "rb"); + if (fp != nullptr) { + fseek(fp, 0, SEEK_END); + auto size = std::ftell(fp); + fseek(fp, 0, SEEK_SET); + // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here. + if (size > 0 && size < LONG_MAX) { + // reserve an extra byte in case caller wants to append a '\0' character + data->reserve(size + 1); + data->resize(size); // TODO: optimize no init + result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size; + } + fclose(fp); + } + return result; +} + +// The default FileWriter writes the vector of char to the filename file, +// returning false on error. +bool SaveDataToFile(const std::vector<char> &data, const char *filename) { + FILE *fp = fopen(filename, "wb"); + if (fp == nullptr) { + return false; + } + bool result = fwrite(&data[0], 1, data.size(), fp) == data.size(); + fclose(fp); + return result; +} + +TFile::TFile() { +} + +TFile::~TFile() { + if (data_is_owned_) { + delete data_; + } +} + +bool TFile::DeSerializeSize(int32_t *pSize) { + uint32_t size; + if (FReadEndian(&size, sizeof(size), 1) != 1) { + return false; + } + if (size > data_->size() / 4) { + // Reverse endianness. + swap_ = !swap_; + ReverseN(&size, 4); + } + *pSize = size; + return true; +} + +bool TFile::DeSerializeSkip(size_t size) { + uint32_t len; + if (!DeSerialize(&len)) { + return false; + } + return Skip(len * size); +} + +bool TFile::DeSerialize(std::string &data) { + uint32_t size; + if (!DeSerialize(&size)) { + return false; + } else if (size > 0) { + // TODO: optimize. + data.resize(size); + return DeSerialize(&data[0], size); + } + data.clear(); + return true; +} + +bool TFile::Serialize(const std::string &data) { + uint32_t size = data.size(); + return Serialize(&size) && Serialize(data.c_str(), size); +} + +bool TFile::DeSerialize(std::vector<char> &data) { + uint32_t size; + if (!DeSerialize(&size)) { + return false; + } else if (size > 0) { + // TODO: optimize. + data.resize(size); + return DeSerialize(&data[0], data.size()); + } + data.clear(); + return true; +} + +bool TFile::Serialize(const std::vector<char> &data) { + uint32_t size = data.size(); + if (!Serialize(&size)) { + return false; + } else if (size > 0) { + return Serialize(&data[0], size); + } + return true; +} + +bool TFile::Skip(size_t count) { + offset_ += count; + return true; +} + +bool TFile::Open(const char *filename, FileReader reader) { + if (!data_is_owned_) { + data_ = new std::vector<char>; + data_is_owned_ = true; + } + offset_ = 0; + is_writing_ = false; + swap_ = false; + if (reader == nullptr) { + return LoadDataFromFile(filename, data_); + } else { + return (*reader)(filename, data_); + } +} + +bool TFile::Open(const char *data, size_t size) { + offset_ = 0; + if (!data_is_owned_) { + data_ = new std::vector<char>; + data_is_owned_ = true; + } + is_writing_ = false; + swap_ = false; + data_->resize(size); // TODO: optimize no init + memcpy(&(*data_)[0], data, size); + return true; +} + +bool TFile::Open(FILE *fp, int64_t end_offset) { + offset_ = 0; + auto current_pos = std::ftell(fp); + if (current_pos < 0) { + // ftell failed. + return false; + } + if (end_offset < 0) { + if (fseek(fp, 0, SEEK_END)) { + return false; + } + end_offset = ftell(fp); + if (fseek(fp, current_pos, SEEK_SET)) { + return false; + } + } + size_t size = end_offset - current_pos; + is_writing_ = false; + swap_ = false; + if (!data_is_owned_) { + data_ = new std::vector<char>; + data_is_owned_ = true; + } + data_->resize(size); // TODO: optimize no init + return fread(&(*data_)[0], 1, size, fp) == size; +} + +char *TFile::FGets(char *buffer, int buffer_size) { + ASSERT_HOST(!is_writing_); + int size = 0; + while (size + 1 < buffer_size && offset_ < data_->size()) { + buffer[size++] = (*data_)[offset_++]; + if ((*data_)[offset_ - 1] == '\n') { + break; + } + } + if (size < buffer_size) { + buffer[size] = '\0'; + } + return size > 0 ? buffer : nullptr; +} + +size_t TFile::FReadEndian(void *buffer, size_t size, size_t count) { + auto num_read = FRead(buffer, size, count); + if (swap_ && size != 1) { + char *char_buffer = static_cast<char *>(buffer); + for (size_t i = 0; i < num_read; ++i, char_buffer += size) { + ReverseN(char_buffer, size); + } + } + return num_read; +} + +size_t TFile::FRead(void *buffer, size_t size, size_t count) { + ASSERT_HOST(!is_writing_); + ASSERT_HOST(size > 0); + size_t required_size; + if (SIZE_MAX / size <= count) { + // Avoid integer overflow. + required_size = data_->size() - offset_; + } else { + required_size = size * count; + if (data_->size() - offset_ < required_size) { + required_size = data_->size() - offset_; + } + } + if (required_size > 0 && buffer != nullptr) { + memcpy(buffer, &(*data_)[offset_], required_size); + } + offset_ += required_size; + return required_size / size; +} + +void TFile::Rewind() { + ASSERT_HOST(!is_writing_); + offset_ = 0; +} + +void TFile::OpenWrite(std::vector<char> *data) { + offset_ = 0; + if (data != nullptr) { + if (data_is_owned_) { + delete data_; + } + data_ = data; + data_is_owned_ = false; + } else if (!data_is_owned_) { + data_ = new std::vector<char>; + data_is_owned_ = true; + } + is_writing_ = true; + swap_ = false; + data_->clear(); +} + +bool TFile::CloseWrite(const char *filename, FileWriter writer) { + ASSERT_HOST(is_writing_); + if (writer == nullptr) { + return SaveDataToFile(*data_, filename); + } else { + return (*writer)(*data_, filename); + } +} + +size_t TFile::FWrite(const void *buffer, size_t size, size_t count) { + ASSERT_HOST(is_writing_); + ASSERT_HOST(size > 0); + ASSERT_HOST(SIZE_MAX / size > count); + size_t total = size * count; + const char *buf = static_cast<const char *>(buffer); + // This isn't very efficient, but memory is so fast compared to disk + // that it is relatively unimportant, and very simple. + for (size_t i = 0; i < total; ++i) { + data_->push_back(buf[i]); + } + return count; +} + +} // namespace tesseract.
