comparison mupdf-source/thirdparty/tesseract/src/textord/alignedblob.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: alignedblob.h
3 // Description: A class to find vertically aligned blobs in a BBGrid,
4 // and a struct to hold control parameters.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19
20 #ifndef TESSERACT_TEXTORD_ALIGNEDBLOB_H_
21 #define TESSERACT_TEXTORD_ALIGNEDBLOB_H_
22
23 #include "bbgrid.h"
24 #include "blobbox.h"
25 #include "tabvector.h"
26
27 namespace tesseract {
28
29 extern INT_VAR_H(textord_debug_bugs);
30 extern INT_VAR_H(textord_debug_tabfind);
31 extern BOOL_VAR_H(textord_debug_printable);
32
33 // Simple structure to hold the search parameters for AlignedBlob.
34 // The members are mostly derived from constants, which are
35 // conditioned on the alignment parameter.
36 // For finding vertical lines, a different set of constants are
37 // used, conditioned on the different constructor.
38 struct AlignedBlobParams {
39 // Constructor to set the parameters for finding aligned and ragged tabs.
40 // Vertical_x and vertical_y are the current estimates of the true vertical
41 // direction (up) in the image. Height is the height of the starter blob.
42 // v_gap_multiple is the multiple of height that will be used as a limit
43 // on vertical gap before giving up and calling the line ended.
44 // resolution is the original image resolution, and align0 indicates the
45 // type of tab stop to be found.
46 AlignedBlobParams(int vertical_x, int vertical_y, int height, int v_gap_multiple,
47 int min_gutter_width, int resolution, TabAlignment alignment0);
48 // Constructor to set the parameters for finding vertical lines.
49 // Vertical_x and vertical_y are the current estimates of the true vertical
50 // direction (up) in the image. Width is the width of the starter blob.
51 AlignedBlobParams(int vertical_x, int vertical_y, int width);
52
53 // Fit the vertical vector into an ICOORD, which is 16 bit.
54 void set_vertical(int vertical_x, int vertical_y);
55
56 double gutter_fraction; // Multiple of height used for min_gutter.
57 bool right_tab; // We are looking at right edges.
58 bool ragged; // We are looking for a ragged (vs aligned) edge.
59 TabAlignment alignment; // The type we are trying to produce.
60 TabType confirmed_type; // Type to flag blobs if accepted.
61 int max_v_gap; // Max vertical gap to be tolerated.
62 int min_gutter; // Minimum gutter between columns.
63 // Tolerances allowed on horizontal alignment of aligned edges.
64 int l_align_tolerance; // Left edges.
65 int r_align_tolerance; // Right edges.
66 // Conditions for accepting a line.
67 int min_points; // Minimum number of points to be OK.
68 int min_length; // Min length of completed line.
69
70 ICOORD vertical; // Current estimate of logical vertical.
71 };
72
73 // The AlignedBlob class contains code to find vertically aligned blobs.
74 // This is factored out into a separate class, so it can be used by both
75 // vertical line finding (LineFind) and tabstop finding (TabFind).
76 class TESS_API AlignedBlob : public BlobGrid {
77 public:
78 AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright);
79 ~AlignedBlob() override;
80
81 // Return true if the given coordinates are within the test rectangle
82 // and the debug level is at least the given detail level.
83 static bool WithinTestRegion(int detail_level, int x, int y);
84
85 // Display the tab codes of the BLOBNBOXes in this grid.
86 ScrollView *DisplayTabs(const char *window_name, ScrollView *tab_win);
87
88 // Finds a vector corresponding to a set of vertically aligned blob edges
89 // running through the given box. The type of vector returned and the
90 // search parameters are determined by the AlignedBlobParams.
91 // vertical_x and y are updated with an estimate of the real
92 // vertical direction. (skew finding.)
93 // Returns nullptr if no decent vector can be found.
94 TabVector *FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x,
95 int *vertical_y);
96
97 private:
98 // Find a set of blobs that are aligned in the given vertical
99 // direction with the given blob. Returns a list of aligned
100 // blobs and the number in the list.
101 // For other parameters see FindAlignedBlob below.
102 int AlignTabs(const AlignedBlobParams &params, bool top_to_bottom, BLOBNBOX *bbox,
103 BLOBNBOX_CLIST *good_points, int *end_y);
104
105 // Search vertically for a blob that is aligned with the input bbox.
106 // The search parameters are determined by AlignedBlobParams.
107 // top_to_bottom tells whether to search down or up.
108 // The return value is nullptr if nothing was found in the search box
109 // or if a blob was found in the gutter. On a nullptr return, end_y
110 // is set to the edge of the search box or the leading edge of the
111 // gutter blob if one was found.
112 BLOBNBOX *FindAlignedBlob(const AlignedBlobParams &p, bool top_to_bottom, BLOBNBOX *bbox,
113 int x_start, int *end_y);
114 };
115
116 } // namespace tesseract.
117
118 #endif // TESSERACT_TEXTORD_ALIGNEDBLOB_H_