diff mupdf-source/thirdparty/tesseract/src/classify/mfx.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/classify/mfx.cpp	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,144 @@
+/******************************************************************************
+ ** Filename:       mfx.c
+ ** Purpose:        Micro feature extraction routines
+ ** Author:         Dan Johnson
+ **
+ ** (c) Copyright Hewlett-Packard Company, 1988.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *****************************************************************************/
+
+#include "mfx.h"
+
+#include "clusttool.h" //NEEDED
+#include "intfx.h"
+#include "mfdefs.h"
+#include "mfoutline.h"
+#include "normalis.h"
+#include "params.h"
+
+namespace tesseract {
+
+/* old numbers corresponded to 10.0 degrees and 80.0 degrees */
+double_VAR(classify_min_slope, 0.414213562, "Slope below which lines are called horizontal");
+double_VAR(classify_max_slope, 2.414213562, "Slope above which lines are called vertical");
+
+/*----------------------------------------------------------------------------
+          Private Function Prototypes
+-----------------------------------------------------------------------------*/
+
+MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, MICROFEATURES MicroFeatures);
+
+MicroFeature ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End);
+
+/*----------------------------------------------------------------------------
+            Public Code
+----------------------------------------------------------------------------*/
+
+/**
+ * This routine extracts micro-features from the specified
+ * blob and returns a list of the micro-features.  All
+ * micro-features are normalized according to the specified
+ * line statistics.
+ * @param Blob blob to extract micro-features from
+ * @param cn_denorm control parameter to feature extractor
+ * @return List of micro-features extracted from the blob.
+ */
+MICROFEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM &cn_denorm) {
+  MICROFEATURES MicroFeatures;
+  LIST Outlines;
+  LIST RemainingOutlines;
+
+  if (Blob != nullptr) {
+    Outlines = ConvertBlob(Blob);
+
+    RemainingOutlines = Outlines;
+    iterate(RemainingOutlines) {
+      auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
+      CharNormalizeOutline(Outline, cn_denorm);
+    }
+
+    RemainingOutlines = Outlines;
+    iterate(RemainingOutlines) {
+      auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
+      FindDirectionChanges(Outline, classify_min_slope, classify_max_slope);
+      MarkDirectionChanges(Outline);
+      MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures);
+    }
+    FreeOutlines(Outlines);
+  }
+  return MicroFeatures;
+} /* BlobMicroFeatures */
+
+/*---------------------------------------------------------------------------
+            Private Code
+---------------------------------------------------------------------------*/
+
+/**
+ * Convert Outline to MicroFeatures
+ * @param Outline         outline to extract micro-features from
+ * @param MicroFeatures   list of micro-features to add to
+ * @return List of micro-features with new features added to front.
+ * @note Globals: none
+ */
+MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, MICROFEATURES MicroFeatures) {
+  MFOUTLINE Current;
+  MFOUTLINE Last;
+  MFOUTLINE First;
+
+  if (DegenerateOutline(Outline)) {
+    return (MicroFeatures);
+  }
+
+  First = NextExtremity(Outline);
+  Last = First;
+  do {
+    Current = NextExtremity(Last);
+    if (!PointAt(Current)->Hidden) {
+      auto NewFeature = ExtractMicroFeature(Last, Current);
+      MicroFeatures.push_front(NewFeature);
+    }
+    Last = Current;
+  } while (Last != First);
+
+  return MicroFeatures;
+} /* ConvertToMicroFeatures */
+
+/**
+ * This routine computes the feature parameters which describe
+ * the micro-feature that starts and Start and ends at End.
+ * A new micro-feature is allocated, filled with the feature
+ * parameters, and returned.  The routine assumes that
+ * Start and End are not the same point.  If they are the
+ * same point, nullptr is returned, a warning message is
+ * printed, and the current outline is dumped to stdout.
+ * @param Start starting point of micro-feature
+ * @param End ending point of micro-feature
+ * @return New micro-feature or nullptr if the feature was rejected.
+ * @note Globals: none
+ */
+MicroFeature ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) {
+  MFEDGEPT *P1, *P2;
+
+  P1 = PointAt(Start);
+  P2 = PointAt(End);
+
+  MicroFeature NewFeature;
+  NewFeature[(int)MicroFeatureParameter::MFXPosition] = AverageOf(P1->Point.x, P2->Point.x);
+  NewFeature[(int)MicroFeatureParameter::MFYPosition] = AverageOf(P1->Point.y, P2->Point.y);
+  NewFeature[(int)MicroFeatureParameter::MFLength] = DistanceBetween(P1->Point, P2->Point);
+  NewFeature[(int)MicroFeatureParameter::MFDirection] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0);
+  NewFeature[(int)MicroFeatureParameter::MFBulge1] = 0.0f;  // deprecated
+  NewFeature[(int)MicroFeatureParameter::MFBulge2] = 0.0f; // deprecated
+
+  return NewFeature;
+} /* ExtractMicroFeature */
+
+} // namespace tesseract