diff mupdf-source/thirdparty/tesseract/src/wordrec/chop.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/wordrec/chop.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,320 @@
+/******************************************************************************
+ *
+ * File:        chop.cpp  (Formerly chop.c)
+ * Author:      Mark Seaman, OCR Technology
+ *
+ * (c) Copyright 1987, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+
+/*----------------------------------------------------------------------
+              I n c l u d e s
+----------------------------------------------------------------------*/
+
+#define _USE_MATH_DEFINES // for M_PI
+#include "chop.h"
+#include <cmath> // for M_PI
+#include "outlines.h"
+#include "plotedges.h"
+#include "wordrec.h"
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+namespace tesseract {
+
+// Show if the line is going in the positive or negative X direction.
+static int direction(const EDGEPT *point) {
+  //* direction to return
+  int dir = 0;
+  //* prev point
+  const EDGEPT *prev = point->prev;
+  //* next point
+  const EDGEPT *next = point->next;
+
+  if (((prev->pos.x <= point->pos.x) && (point->pos.x < next->pos.x)) ||
+      ((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x))) {
+    dir = 1;
+  }
+  if (((prev->pos.x >= point->pos.x) && (point->pos.x > next->pos.x)) ||
+      ((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x))) {
+    dir = -1;
+  }
+
+  return dir;
+}
+
+/**
+ * @name point_priority
+ *
+ * Assign a priority to and edge point that might be used as part of a
+ * split. The argument should be of type EDGEPT.
+ */
+PRIORITY Wordrec::point_priority(EDGEPT *point) {
+  return static_cast<PRIORITY>(angle_change(point->prev, point, point->next));
+}
+
+/**
+ * @name add_point_to_list
+ *
+ * Add an edge point to a POINT_GROUP containing a list of other points.
+ */
+void Wordrec::add_point_to_list(PointHeap *point_heap, EDGEPT *point) {
+  if (point_heap->size() < MAX_NUM_POINTS - 2) {
+    PointPair pair(point_priority(point), point);
+    point_heap->Push(&pair);
+  }
+
+#ifndef GRAPHICS_DISABLED
+  if (chop_debug > 2) {
+    mark_outline(point);
+  }
+#endif
+}
+
+// Returns true if the edgept supplied as input is an inside angle.  This
+// is determined by the angular change of the vectors from point to point.
+bool Wordrec::is_inside_angle(EDGEPT *pt) {
+  return angle_change(pt->prev, pt, pt->next) < chop_inside_angle;
+}
+
+/**
+ * @name angle_change
+ *
+ * Return the change in angle (degrees) of the line segments between
+ * points one and two, and two and three.
+ */
+int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
+  VECTOR vector1;
+  VECTOR vector2;
+
+  int angle;
+
+  /* Compute angle */
+  vector1.x = point2->pos.x - point1->pos.x;
+  vector1.y = point2->pos.y - point1->pos.y;
+  vector2.x = point3->pos.x - point2->pos.x;
+  vector2.y = point3->pos.y - point2->pos.y;
+  /* Use cross product */
+  float length = std::sqrt(static_cast<float>(vector1.length2()) * vector2.length2());
+  if (static_cast<int>(length) == 0) {
+    return (0);
+  }
+  auto f = vector1.cross(vector2) / length;
+  // Avoid FP exception in std::asin caused by illegal values of f
+  // (caused by rounding errors).
+  if (f <= -1.0f) {
+    angle = -90;
+  } else if (f >= 1.0f) {
+    angle = 90;
+  } else {
+    angle = static_cast<int>(floor(std::asin(f) / M_PI * 180.0 + 0.5));
+    // Use dot product.
+    if (vector1.dot(vector2) < 0) {
+      angle = 180 - angle;
+    }
+    // Adjust angle.
+    if (angle > 180) {
+      angle -= 360;
+    } else if (angle <= -180) {
+      angle += 360;
+    }
+  }
+  return angle;
+}
+
+/**
+ * @name pick_close_point
+ *
+ * Choose the edge point that is closest to the critical point.  This
+ * point may not be exactly vertical from the critical point.
+ */
+EDGEPT *Wordrec::pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) {
+  EDGEPT *best_point = nullptr;
+  int this_distance;
+  bool found_better;
+
+  do {
+    found_better = false;
+
+    this_distance = edgept_dist(critical_point, vertical_point);
+    if (this_distance <= *best_dist) {
+      if (!(same_point(critical_point->pos, vertical_point->pos) ||
+            same_point(critical_point->pos, vertical_point->next->pos) ||
+            (best_point && same_point(best_point->pos, vertical_point->pos)) ||
+            is_exterior_point(critical_point, vertical_point))) {
+        *best_dist = this_distance;
+        best_point = vertical_point;
+        if (chop_vertical_creep) {
+          found_better = true;
+        }
+      }
+    }
+    vertical_point = vertical_point->next;
+  } while (found_better == true);
+
+  return (best_point);
+}
+
+/**
+ * @name prioritize_points
+ *
+ * Find a list of edge points from the outer outline of this blob.  For
+ * each of these points assign a priority.  Sort these points using a
+ * heap structure so that they can be visited in order.
+ */
+void Wordrec::prioritize_points(TESSLINE *outline, PointHeap *points) {
+  EDGEPT *this_point;
+  EDGEPT *local_min = nullptr;
+  EDGEPT *local_max = nullptr;
+
+  this_point = outline->loop;
+  local_min = this_point;
+  local_max = this_point;
+  do {
+    if (this_point->vec.y < 0) {
+      /* Look for minima */
+      if (local_max != nullptr) {
+        new_max_point(local_max, points);
+      } else if (is_inside_angle(this_point)) {
+        add_point_to_list(points, this_point);
+      }
+      local_max = nullptr;
+      local_min = this_point->next;
+    } else if (this_point->vec.y > 0) {
+      /* Look for maxima */
+      if (local_min != nullptr) {
+        new_min_point(local_min, points);
+      } else if (is_inside_angle(this_point)) {
+        add_point_to_list(points, this_point);
+      }
+      local_min = nullptr;
+      local_max = this_point->next;
+    } else {
+      /* Flat area */
+      if (local_max != nullptr) {
+        if (local_max->prev->vec.y != 0) {
+          new_max_point(local_max, points);
+        }
+        local_max = this_point->next;
+        local_min = nullptr;
+      } else {
+        if (local_min->prev->vec.y != 0) {
+          new_min_point(local_min, points);
+        }
+        local_min = this_point->next;
+        local_max = nullptr;
+      }
+    }
+
+    /* Next point */
+    this_point = this_point->next;
+  } while (this_point != outline->loop);
+}
+
+/**
+ * @name new_min_point
+ *
+ * Found a new minimum point try to decide whether to save it or not.
+ * Return the new value for the local minimum.  If a point is saved then
+ * the local minimum is reset to nullptr.
+ */
+void Wordrec::new_min_point(EDGEPT *local_min, PointHeap *points) {
+  int16_t dir;
+
+  dir = direction(local_min);
+
+  if (dir < 0) {
+    add_point_to_list(points, local_min);
+    return;
+  }
+
+  if (dir == 0 && point_priority(local_min) < 0) {
+    add_point_to_list(points, local_min);
+    return;
+  }
+}
+
+/**
+ * @name new_max_point
+ *
+ * Found a new minimum point try to decide whether to save it or not.
+ * Return the new value for the local minimum.  If a point is saved then
+ * the local minimum is reset to nullptr.
+ */
+void Wordrec::new_max_point(EDGEPT *local_max, PointHeap *points) {
+  int16_t dir;
+
+  dir = direction(local_max);
+
+  if (dir > 0) {
+    add_point_to_list(points, local_max);
+    return;
+  }
+
+  if (dir == 0 && point_priority(local_max) < 0) {
+    add_point_to_list(points, local_max);
+    return;
+  }
+}
+
+/**
+ * @name vertical_projection_point
+ *
+ * For one point on the outline, find the corresponding point on the
+ * other side of the outline that is a likely projection for a split
+ * point.  This is done by iterating through the edge points until the
+ * X value of the point being looked at is greater than the X value of
+ * the split point.  Ensure that the point being returned is not right
+ * next to the split point.  Return the edge point in *best_point as
+ * a result, and any points that were newly created are also saved on
+ * the new_points list.
+ */
+void Wordrec::vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point,
+                                        EDGEPT **best_point, EDGEPT_CLIST *new_points) {
+  EDGEPT *p;           /* Iterator */
+  EDGEPT *this_edgept; /* Iterator */
+  EDGEPT_C_IT new_point_it(new_points);
+  int x = split_point->pos.x;     /* X value of vertical */
+  int best_dist = LARGE_DISTANCE; /* Best point found */
+
+  if (*best_point != nullptr) {
+    best_dist = edgept_dist(split_point, *best_point);
+  }
+
+  p = target_point;
+  /* Look at each edge point */
+  do {
+    if (((p->pos.x <= x && x <= p->next->pos.x) || (p->next->pos.x <= x && x <= p->pos.x)) &&
+        !same_point(split_point->pos, p->pos) && !same_point(split_point->pos, p->next->pos) &&
+        !p->IsChopPt() && (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) {
+      if (near_point(split_point, p, p->next, &this_edgept)) {
+        new_point_it.add_before_then_move(this_edgept);
+      }
+
+      if (*best_point == nullptr) {
+        best_dist = edgept_dist(split_point, this_edgept);
+      }
+
+      this_edgept = pick_close_point(split_point, this_edgept, &best_dist);
+      if (this_edgept) {
+        *best_point = this_edgept;
+      }
+    }
+
+    p = p->next;
+  } while (p != target_point);
+}
+
+} // namespace tesseract