diff mupdf-source/thirdparty/tesseract/src/api/renderer.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/api/renderer.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,244 @@
+///////////////////////////////////////////////////////////////////////
+// File:        renderer.cpp
+// Description: Rendering interface to inject into TessBaseAPI
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+#include <tesseract/baseapi.h>
+#include <tesseract/renderer.h>
+#include <cstring>
+#include <memory>     // std::unique_ptr
+#include <string>     // std::string
+#include "serialis.h" // Serialize
+
+namespace tesseract {
+
+/**********************************************************************
+ * Base Renderer interface implementation
+ **********************************************************************/
+TessResultRenderer::TessResultRenderer(const char *outputbase, const char *extension)
+    : next_(nullptr)
+    , fout_(stdout)
+    , file_extension_(extension)
+    , title_("")
+    , imagenum_(-1)
+    , happy_(true) {
+  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
+    std::string outfile = std::string(outputbase) + "." + extension;
+    fout_ = fopen(outfile.c_str(), "wb");
+    if (fout_ == nullptr) {
+      happy_ = false;
+    }
+  }
+}
+
+TessResultRenderer::~TessResultRenderer() {
+  if (fout_ != nullptr) {
+    if (fout_ != stdout) {
+      fclose(fout_);
+    } else {
+      clearerr(fout_);
+    }
+  }
+  delete next_;
+}
+
+void TessResultRenderer::insert(TessResultRenderer *next) {
+  if (next == nullptr) {
+    return;
+  }
+
+  TessResultRenderer *remainder = next_;
+  next_ = next;
+  if (remainder) {
+    while (next->next_ != nullptr) {
+      next = next->next_;
+    }
+    next->next_ = remainder;
+  }
+}
+
+bool TessResultRenderer::BeginDocument(const char *title) {
+  if (!happy_) {
+    return false;
+  }
+  title_ = title;
+  imagenum_ = -1;
+  bool ok = BeginDocumentHandler();
+  if (next_) {
+    ok = next_->BeginDocument(title) && ok;
+  }
+  return ok;
+}
+
+bool TessResultRenderer::AddImage(TessBaseAPI *api) {
+  if (!happy_) {
+    return false;
+  }
+  ++imagenum_;
+  bool ok = AddImageHandler(api);
+  if (next_) {
+    ok = next_->AddImage(api) && ok;
+  }
+  return ok;
+}
+
+bool TessResultRenderer::EndDocument() {
+  if (!happy_) {
+    return false;
+  }
+  bool ok = EndDocumentHandler();
+  if (next_) {
+    ok = next_->EndDocument() && ok;
+  }
+  return ok;
+}
+
+void TessResultRenderer::AppendString(const char *s) {
+  if (s == nullptr) {
+    return;
+  }
+  AppendData(s, strlen(s));
+}
+
+void TessResultRenderer::AppendData(const char *s, int len) {
+  if (!tesseract::Serialize(fout_, s, len)) {
+    happy_ = false;
+  }
+  fflush(fout_);
+}
+
+bool TessResultRenderer::BeginDocumentHandler() {
+  return happy_;
+}
+
+bool TessResultRenderer::EndDocumentHandler() {
+  return happy_;
+}
+
+/**********************************************************************
+ * UTF8 Text Renderer interface implementation
+ **********************************************************************/
+TessTextRenderer::TessTextRenderer(const char *outputbase)
+    : TessResultRenderer(outputbase, "txt") {}
+
+bool TessTextRenderer::AddImageHandler(TessBaseAPI *api) {
+  const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
+  if (utf8 == nullptr) {
+    return false;
+  }
+
+  const char *pageSeparator = api->GetStringVariable("page_separator");
+  if (pageSeparator != nullptr && *pageSeparator != '\0' && imagenum() > 0) {
+    AppendString(pageSeparator);
+  }
+
+  AppendString(utf8.get());
+
+  return true;
+}
+
+/**********************************************************************
+ * TSV Text Renderer interface implementation
+ **********************************************************************/
+TessTsvRenderer::TessTsvRenderer(const char *outputbase) : TessResultRenderer(outputbase, "tsv") {
+  font_info_ = false;
+}
+
+TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info)
+    : TessResultRenderer(outputbase, "tsv") {
+  font_info_ = font_info;
+}
+
+bool TessTsvRenderer::BeginDocumentHandler() {
+  // Output TSV column headings
+  AppendString(
+      "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
+      "num\tleft\ttop\twidth\theight\tconf\ttext\n");
+  return true;
+}
+
+bool TessTsvRenderer::EndDocumentHandler() {
+  return true;
+}
+
+bool TessTsvRenderer::AddImageHandler(TessBaseAPI *api) {
+  const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
+  if (tsv == nullptr) {
+    return false;
+  }
+
+  AppendString(tsv.get());
+
+  return true;
+}
+
+/**********************************************************************
+ * UNLV Text Renderer interface implementation
+ **********************************************************************/
+TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
+    : TessResultRenderer(outputbase, "unlv") {}
+
+bool TessUnlvRenderer::AddImageHandler(TessBaseAPI *api) {
+  const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
+  if (unlv == nullptr) {
+    return false;
+  }
+
+  AppendString(unlv.get());
+
+  return true;
+}
+
+/**********************************************************************
+ * BoxText Renderer interface implementation
+ **********************************************************************/
+TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
+    : TessResultRenderer(outputbase, "box") {}
+
+bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI *api) {
+  const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
+  if (text == nullptr) {
+    return false;
+  }
+
+  AppendString(text.get());
+
+  return true;
+}
+
+#ifndef DISABLED_LEGACY_ENGINE
+
+/**********************************************************************
+ * Osd Text Renderer interface implementation
+ **********************************************************************/
+TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(outputbase, "osd") {}
+
+bool TessOsdRenderer::AddImageHandler(TessBaseAPI *api) {
+  const std::unique_ptr<const char[]> osd(api->GetOsdText(imagenum()));
+  if (osd == nullptr) {
+    return false;
+  }
+
+  AppendString(osd.get());
+
+  return true;
+}
+
+#endif // ndef DISABLED_LEGACY_ENGINE
+
+} // namespace tesseract