diff mupdf-source/thirdparty/tesseract/src/ccstruct/ocrpara.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/ccstruct/ocrpara.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,93 @@
+/////////////////////////////////////////////////////////////////////
+// File:        ocrpara.cpp
+// Description: OCR Paragraph Output Type
+// Author:      David Eger
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "ocrpara.h"
+
+#include "host.h" // For NearlyEqual()
+
+#include <cstdio>
+
+namespace tesseract {
+
+using tesseract::JUSTIFICATION_CENTER;
+using tesseract::JUSTIFICATION_LEFT;
+using tesseract::JUSTIFICATION_RIGHT;
+using tesseract::JUSTIFICATION_UNKNOWN;
+
+static const char *ParagraphJustificationToString(tesseract::ParagraphJustification justification) {
+  switch (justification) {
+    case JUSTIFICATION_LEFT:
+      return "LEFT";
+    case JUSTIFICATION_RIGHT:
+      return "RIGHT";
+    case JUSTIFICATION_CENTER:
+      return "CENTER";
+    default:
+      return "UNKNOWN";
+  }
+}
+
+bool ParagraphModel::ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const {
+  switch (justification_) {
+    case JUSTIFICATION_LEFT:
+      return NearlyEqual(lmargin + lindent, margin_ + first_indent_, tolerance_);
+    case JUSTIFICATION_RIGHT:
+      return NearlyEqual(rmargin + rindent, margin_ + first_indent_, tolerance_);
+    case JUSTIFICATION_CENTER:
+      return NearlyEqual(lindent, rindent, tolerance_ * 2);
+    default:
+      // shouldn't happen
+      return false;
+  }
+}
+
+bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const {
+  switch (justification_) {
+    case JUSTIFICATION_LEFT:
+      return NearlyEqual(lmargin + lindent, margin_ + body_indent_, tolerance_);
+    case JUSTIFICATION_RIGHT:
+      return NearlyEqual(rmargin + rindent, margin_ + body_indent_, tolerance_);
+    case JUSTIFICATION_CENTER:
+      return NearlyEqual(lindent, rindent, tolerance_ * 2);
+    default:
+      // shouldn't happen
+      return false;
+  }
+}
+
+bool ParagraphModel::Comparable(const ParagraphModel &other) const {
+  if (justification_ != other.justification_) {
+    return false;
+  }
+  if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN) {
+    return true;
+  }
+  int tolerance = (tolerance_ + other.tolerance_) / 4;
+  return NearlyEqual(margin_ + first_indent_, other.margin_ + other.first_indent_, tolerance) &&
+         NearlyEqual(margin_ + body_indent_, other.margin_ + other.body_indent_, tolerance);
+}
+
+std::string ParagraphModel::ToString() const {
+  char buffer[200];
+  const char *alignment = ParagraphJustificationToString(justification_);
+  snprintf(buffer, sizeof(buffer), "margin: %d, first_indent: %d, body_indent: %d, alignment: %s",
+           margin_, first_indent_, body_indent_, alignment);
+  return std::string(buffer);
+}
+
+} // namespace tesseract