Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/api/renderer.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/api/renderer.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,244 @@ +/////////////////////////////////////////////////////////////////////// +// File: renderer.cpp +// Description: Rendering interface to inject into TessBaseAPI +// +// (C) Copyright 2011, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +# include "config_auto.h" +#endif +#include <tesseract/baseapi.h> +#include <tesseract/renderer.h> +#include <cstring> +#include <memory> // std::unique_ptr +#include <string> // std::string +#include "serialis.h" // Serialize + +namespace tesseract { + +/********************************************************************** + * Base Renderer interface implementation + **********************************************************************/ +TessResultRenderer::TessResultRenderer(const char *outputbase, const char *extension) + : next_(nullptr) + , fout_(stdout) + , file_extension_(extension) + , title_("") + , imagenum_(-1) + , happy_(true) { + if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { + std::string outfile = std::string(outputbase) + "." + extension; + fout_ = fopen(outfile.c_str(), "wb"); + if (fout_ == nullptr) { + happy_ = false; + } + } +} + +TessResultRenderer::~TessResultRenderer() { + if (fout_ != nullptr) { + if (fout_ != stdout) { + fclose(fout_); + } else { + clearerr(fout_); + } + } + delete next_; +} + +void TessResultRenderer::insert(TessResultRenderer *next) { + if (next == nullptr) { + return; + } + + TessResultRenderer *remainder = next_; + next_ = next; + if (remainder) { + while (next->next_ != nullptr) { + next = next->next_; + } + next->next_ = remainder; + } +} + +bool TessResultRenderer::BeginDocument(const char *title) { + if (!happy_) { + return false; + } + title_ = title; + imagenum_ = -1; + bool ok = BeginDocumentHandler(); + if (next_) { + ok = next_->BeginDocument(title) && ok; + } + return ok; +} + +bool TessResultRenderer::AddImage(TessBaseAPI *api) { + if (!happy_) { + return false; + } + ++imagenum_; + bool ok = AddImageHandler(api); + if (next_) { + ok = next_->AddImage(api) && ok; + } + return ok; +} + +bool TessResultRenderer::EndDocument() { + if (!happy_) { + return false; + } + bool ok = EndDocumentHandler(); + if (next_) { + ok = next_->EndDocument() && ok; + } + return ok; +} + +void TessResultRenderer::AppendString(const char *s) { + if (s == nullptr) { + return; + } + AppendData(s, strlen(s)); +} + +void TessResultRenderer::AppendData(const char *s, int len) { + if (!tesseract::Serialize(fout_, s, len)) { + happy_ = false; + } + fflush(fout_); +} + +bool TessResultRenderer::BeginDocumentHandler() { + return happy_; +} + +bool TessResultRenderer::EndDocumentHandler() { + return happy_; +} + +/********************************************************************** + * UTF8 Text Renderer interface implementation + **********************************************************************/ +TessTextRenderer::TessTextRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "txt") {} + +bool TessTextRenderer::AddImageHandler(TessBaseAPI *api) { + const std::unique_ptr<const char[]> utf8(api->GetUTF8Text()); + if (utf8 == nullptr) { + return false; + } + + const char *pageSeparator = api->GetStringVariable("page_separator"); + if (pageSeparator != nullptr && *pageSeparator != '\0' && imagenum() > 0) { + AppendString(pageSeparator); + } + + AppendString(utf8.get()); + + return true; +} + +/********************************************************************** + * TSV Text Renderer interface implementation + **********************************************************************/ +TessTsvRenderer::TessTsvRenderer(const char *outputbase) : TessResultRenderer(outputbase, "tsv") { + font_info_ = false; +} + +TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info) + : TessResultRenderer(outputbase, "tsv") { + font_info_ = font_info; +} + +bool TessTsvRenderer::BeginDocumentHandler() { + // Output TSV column headings + AppendString( + "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" + "num\tleft\ttop\twidth\theight\tconf\ttext\n"); + return true; +} + +bool TessTsvRenderer::EndDocumentHandler() { + return true; +} + +bool TessTsvRenderer::AddImageHandler(TessBaseAPI *api) { + const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum())); + if (tsv == nullptr) { + return false; + } + + AppendString(tsv.get()); + + return true; +} + +/********************************************************************** + * UNLV Text Renderer interface implementation + **********************************************************************/ +TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "unlv") {} + +bool TessUnlvRenderer::AddImageHandler(TessBaseAPI *api) { + const std::unique_ptr<const char[]> unlv(api->GetUNLVText()); + if (unlv == nullptr) { + return false; + } + + AppendString(unlv.get()); + + return true; +} + +/********************************************************************** + * BoxText Renderer interface implementation + **********************************************************************/ +TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "box") {} + +bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI *api) { + const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum())); + if (text == nullptr) { + return false; + } + + AppendString(text.get()); + + return true; +} + +#ifndef DISABLED_LEGACY_ENGINE + +/********************************************************************** + * Osd Text Renderer interface implementation + **********************************************************************/ +TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(outputbase, "osd") {} + +bool TessOsdRenderer::AddImageHandler(TessBaseAPI *api) { + const std::unique_ptr<const char[]> osd(api->GetOsdText(imagenum())); + if (osd == nullptr) { + return false; + } + + AppendString(osd.get()); + + return true; +} + +#endif // ndef DISABLED_LEGACY_ENGINE + +} // namespace tesseract
