diff mupdf-source/thirdparty/tesseract/src/ccstruct/seam.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/ccstruct/seam.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,206 @@
+/******************************************************************************
+ *
+ * File:        seam.h
+ * Author:      Mark Seaman, SW Productivity
+ *
+ * (c) Copyright 1987, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+#ifndef SEAM_H
+#define SEAM_H
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "blobs.h"
+#include "split.h"
+
+namespace tesseract {
+
+using PRIORITY = float; /*  PRIORITY  */
+
+class SEAM {
+public:
+  // A seam with no splits
+  SEAM(float priority, const TPOINT &location)
+      : priority_(priority), location_(location), num_splits_(0) {}
+  // A seam with a single split point.
+  SEAM(float priority, const TPOINT &location, const SPLIT &split)
+      : priority_(priority), location_(location), num_splits_(1) {
+    splits_[0] = split;
+  }
+  // Default copy constructor, operator= and destructor are OK!
+
+  // Accessors.
+  float priority() const {
+    return priority_;
+  }
+  void set_priority(float priority) {
+    priority_ = priority;
+  }
+  bool HasAnySplits() const {
+    return num_splits_ > 0;
+  }
+
+  // Returns the bounding box of all the points in the seam.
+  TBOX bounding_box() const;
+
+  // Returns true if other can be combined into *this.
+  bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const {
+    int dist = location_.x - other.location_.x;
+    return -max_x_dist < dist && dist < max_x_dist &&
+           num_splits_ + other.num_splits_ <= kMaxNumSplits &&
+           priority_ + other.priority_ < max_total_priority && !OverlappingSplits(other) &&
+           !SharesPosition(other);
+  }
+
+  // Combines other into *this. Only works if CombinableWith returned true.
+  void CombineWith(const SEAM &other) {
+    priority_ += other.priority_;
+    location_ += other.location_;
+    location_ /= 2;
+
+    for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) {
+      splits_[num_splits_++] = other.splits_[s];
+    }
+  }
+
+  // Returns true if the given blob contains all splits of *this SEAM.
+  bool ContainedByBlob(const TBLOB &blob) const {
+    for (int s = 0; s < num_splits_; ++s) {
+      if (!splits_[s].ContainedByBlob(blob)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Returns true if the given EDGEPT is used by this SEAM, checking only
+  // the EDGEPT pointer, not the coordinates.
+  bool UsesPoint(const EDGEPT *point) const {
+    for (int s = 0; s < num_splits_; ++s) {
+      if (splits_[s].UsesPoint(point)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  // Returns true if *this and other share any common point, by coordinates.
+  bool SharesPosition(const SEAM &other) const {
+    for (int s = 0; s < num_splits_; ++s) {
+      for (int t = 0; t < other.num_splits_; ++t) {
+        if (splits_[s].SharesPosition(other.splits_[t])) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+  // Returns true if *this and other have any vertically overlapping splits.
+  bool OverlappingSplits(const SEAM &other) const {
+    for (int s = 0; s < num_splits_; ++s) {
+      TBOX split1_box = splits_[s].bounding_box();
+      for (int t = 0; t < other.num_splits_; ++t) {
+        TBOX split2_box = other.splits_[t].bounding_box();
+        if (split1_box.y_overlap(split2_box)) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  // Marks the edgepts used by the seam so the segments made by the cut
+  // never get split further by another seam in the future.
+  void Finalize() {
+    for (int s = 0; s < num_splits_; ++s) {
+      splits_[s].point1->MarkChop();
+      splits_[s].point2->MarkChop();
+    }
+  }
+
+  // Returns true if the splits in *this SEAM appear OK in the sense that they
+  // do not cross any outlines and do not chop off any ridiculously small
+  // pieces.
+  bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const;
+
+  // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
+  // seam, which is about to be inserted at insert_index. Returns false if
+  // any of the computations fails, as this indicates an invalid chop.
+  // widthn_/widthp_ are only changed if modify is true.
+  bool PrepareToInsertSeam(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
+                           int insert_index, bool modify);
+  // Computes the widthp_/widthn_ range. Returns false if not all the splits
+  // are accounted for. widthn_/widthp_ are only changed if modify is true.
+  bool FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify);
+
+  // Splits this blob into two blobs by applying the splits included in
+  // *this SEAM
+  void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const;
+  // Undoes ApplySeam by removing the seam between these two blobs.
+  // Produces one blob as a result, and deletes other_blob.
+  void UndoSeam(TBLOB *blob, TBLOB *other_blob) const;
+
+  // Prints everything in *this SEAM.
+  void Print(const char *label) const;
+  // Prints a collection of SEAMs.
+  static void PrintSeams(const char *label, const std::vector<SEAM *> &seams);
+#ifndef GRAPHICS_DISABLED
+  // Draws the seam in the given window.
+  void Mark(ScrollView *window) const;
+#endif
+
+  // Break up the blobs in this chain so that they are all independent.
+  // This operation should undo the affect of join_pieces.
+  static void BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
+                          int first, int last);
+  // Join a group of base level pieces into a single blob that can then
+  // be classified.
+  static void JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
+                         int first, int last);
+
+  // Hides the seam so the outlines appear not to be cut by it.
+  void Hide() const;
+  // Undoes hide, so the outlines are cut by the seam.
+  void Reveal() const;
+
+  // Computes and returns, but does not set, the full priority of *this SEAM.
+  // The arguments here are config parameters defined in Wordrec. Add chop_
+  // to the beginning of the name.
+  float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
+                     double center_knob, double width_change_knob) const;
+
+private:
+  // Maximum number of splits that a SEAM can hold.
+  static const uint8_t kMaxNumSplits = 3;
+  // Priority of this split. Lower is better.
+  float priority_;
+  // Position of the middle of the seam.
+  TPOINT location_;
+  // A range such that all splits in *this SEAM are contained within blobs in
+  // the range [index - widthn_,index + widthp_] where index is the index of
+  // this SEAM in the seams vector.
+  uint8_t widthp_ = 0;
+  uint8_t widthn_ = 0;
+  // Number of splits_ that are used.
+  uint8_t num_splits_;
+  // Set of pairs of points that are the ends of each split in the SEAM.
+  SPLIT splits_[kMaxNumSplits];
+};
+
+void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array);
+
+} // namespace tesseract
+
+#endif