diff mupdf-source/thirdparty/tesseract/src/textord/alignedblob.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/textord/alignedblob.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,118 @@
+///////////////////////////////////////////////////////////////////////
+// File:        alignedblob.h
+// Description: A class to find vertically aligned blobs in a BBGrid,
+//              and a struct to hold control parameters.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_ALIGNEDBLOB_H_
+#define TESSERACT_TEXTORD_ALIGNEDBLOB_H_
+
+#include "bbgrid.h"
+#include "blobbox.h"
+#include "tabvector.h"
+
+namespace tesseract {
+
+extern INT_VAR_H(textord_debug_bugs);
+extern INT_VAR_H(textord_debug_tabfind);
+extern BOOL_VAR_H(textord_debug_printable);
+
+// Simple structure to hold the search parameters for AlignedBlob.
+// The members are mostly derived from constants, which are
+// conditioned on the alignment parameter.
+// For finding vertical lines, a different set of constants are
+// used, conditioned on the different constructor.
+struct AlignedBlobParams {
+  // Constructor to set the parameters for finding aligned and ragged tabs.
+  // Vertical_x and vertical_y are the current estimates of the true vertical
+  // direction (up) in the image. Height is the height of the starter blob.
+  // v_gap_multiple is the multiple of height that will be used as a limit
+  // on vertical gap before giving up and calling the line ended.
+  // resolution is the original image resolution, and align0 indicates the
+  // type of tab stop to be found.
+  AlignedBlobParams(int vertical_x, int vertical_y, int height, int v_gap_multiple,
+                    int min_gutter_width, int resolution, TabAlignment alignment0);
+  // Constructor to set the parameters for finding vertical lines.
+  // Vertical_x and vertical_y are the current estimates of the true vertical
+  // direction (up) in the image. Width is the width of the starter blob.
+  AlignedBlobParams(int vertical_x, int vertical_y, int width);
+
+  // Fit the vertical vector into an ICOORD, which is 16 bit.
+  void set_vertical(int vertical_x, int vertical_y);
+
+  double gutter_fraction; // Multiple of height used for min_gutter.
+  bool right_tab;         // We are looking at right edges.
+  bool ragged;            // We are looking for a ragged (vs aligned) edge.
+  TabAlignment alignment; // The type we are trying to produce.
+  TabType confirmed_type; // Type to flag blobs if accepted.
+  int max_v_gap;          // Max vertical gap to be tolerated.
+  int min_gutter;         // Minimum gutter between columns.
+  // Tolerances allowed on horizontal alignment of aligned edges.
+  int l_align_tolerance; // Left edges.
+  int r_align_tolerance; // Right edges.
+  // Conditions for accepting a line.
+  int min_points; // Minimum number of points to be OK.
+  int min_length; // Min length of completed line.
+
+  ICOORD vertical; // Current estimate of logical vertical.
+};
+
+// The AlignedBlob class contains code to find vertically aligned blobs.
+// This is factored out into a separate class, so it can be used by both
+// vertical line finding (LineFind) and tabstop finding (TabFind).
+class TESS_API AlignedBlob : public BlobGrid {
+public:
+  AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright);
+  ~AlignedBlob() override;
+
+  // Return true if the given coordinates are within the test rectangle
+  // and the debug level is at least the given detail level.
+  static bool WithinTestRegion(int detail_level, int x, int y);
+
+  // Display the tab codes of the BLOBNBOXes in this grid.
+  ScrollView *DisplayTabs(const char *window_name, ScrollView *tab_win);
+
+  // Finds a vector corresponding to a set of vertically aligned blob edges
+  // running through the given box. The type of vector returned and the
+  // search parameters are determined by the AlignedBlobParams.
+  // vertical_x and y are updated with an estimate of the real
+  // vertical direction. (skew finding.)
+  // Returns nullptr if no decent vector can be found.
+  TabVector *FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x,
+                                   int *vertical_y);
+
+private:
+  // Find a set of blobs that are aligned in the given vertical
+  // direction with the given blob. Returns a list of aligned
+  // blobs and the number in the list.
+  // For other parameters see FindAlignedBlob below.
+  int AlignTabs(const AlignedBlobParams &params, bool top_to_bottom, BLOBNBOX *bbox,
+                BLOBNBOX_CLIST *good_points, int *end_y);
+
+  // Search vertically for a blob that is aligned with the input bbox.
+  // The search parameters are determined by AlignedBlobParams.
+  // top_to_bottom tells whether to search down or up.
+  // The return value is nullptr if nothing was found in the search box
+  // or if a blob was found in the gutter. On a nullptr return, end_y
+  // is set to the edge of the search box or the leading edge of the
+  // gutter blob if one was found.
+  BLOBNBOX *FindAlignedBlob(const AlignedBlobParams &p, bool top_to_bottom, BLOBNBOX *bbox,
+                            int x_start, int *end_y);
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_ALIGNEDBLOB_H_