comparison mupdf-source/thirdparty/tesseract/src/ccstruct/split.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 *
3 * File: split.h
4 * Author: Mark Seaman, SW Productivity
5 * Status: Reusable Software Component
6 *
7 * (c) Copyright 1987, Hewlett-Packard Company.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 *****************************************************************************/
19 #ifndef SPLIT_H
20 #define SPLIT_H
21
22 #include "blobs.h" // for EDGEPT, TBLOB, TESSLINE
23 #include "params.h" // for BOOL_VAR_H, BoolParam
24 #include "rect.h" // for TBOX
25
26 namespace tesseract {
27
28 class ScrollView;
29
30 /*----------------------------------------------------------------------
31 T y p e s
32 ----------------------------------------------------------------------*/
33 struct SPLIT {
34 SPLIT() : point1(nullptr), point2(nullptr) {}
35 SPLIT(EDGEPT *pt1, EDGEPT *pt2) : point1(pt1), point2(pt2) {}
36
37 // Returns the bounding box of all the points in the split.
38 TBOX bounding_box() const {
39 return TBOX(std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y),
40 std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y));
41 }
42
43 // Returns the bounding box of the outline from point1 to point2.
44 TBOX Box12() const {
45 return point1->SegmentBox(point2);
46 }
47 // Returns the bounding box of the outline from point1 to point1.
48 TBOX Box21() const {
49 return point2->SegmentBox(point1);
50 }
51 // Returns the bounding box of the out
52
53 // Hides the SPLIT so the outlines appear not to be cut by it.
54 void Hide() const;
55 // Undoes hide, so the outlines are cut by the SPLIT.
56 void Reveal() const;
57
58 // Returns true if the given EDGEPT is used by this SPLIT, checking only
59 // the EDGEPT pointer, not the coordinates.
60 bool UsesPoint(const EDGEPT *point) const {
61 return point1 == point || point2 == point;
62 }
63 // Returns true if the other SPLIT has any position shared with *this.
64 bool SharesPosition(const SPLIT &other) const {
65 return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) ||
66 point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2);
67 }
68 // Returns true if both points are contained within the blob.
69 bool ContainedByBlob(const TBLOB &blob) const {
70 return blob.Contains(point1->pos) && blob.Contains(point2->pos);
71 }
72 // Returns true if both points are contained within the outline.
73 bool ContainedByOutline(const TESSLINE &outline) const {
74 return outline.Contains(point1->pos) && outline.Contains(point2->pos);
75 }
76 // Compute a split priority based on the bounding boxes of the parts.
77 // The arguments here are config parameters defined in Wordrec. Add chop_
78 // to the beginning of the name.
79 float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
80 double center_knob, double width_change_knob) const;
81 // Returns true if *this SPLIT appears OK in the sense that it does not cross
82 // any outlines and does not chop off any ridiculously small pieces.
83 bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const;
84 // Returns true if the split generates a small chunk in terms of either area
85 // or number of points.
86 bool IsLittleChunk(int min_points, int min_area) const;
87
88 void Print() const;
89 #ifndef GRAPHICS_DISABLED
90 // Draws the split in the given window.
91 void Mark(ScrollView *window) const;
92 #endif
93
94 // Creates two outlines out of one by splitting the original one in half.
95 // Inserts the resulting outlines into the given list.
96 void SplitOutlineList(TESSLINE *outlines) const;
97 // Makes a split between these two edge points, but does not affect the
98 // outlines to which they belong.
99 void SplitOutline() const;
100 // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
101 // the split, but possibly leaving some duplicate outlines.
102 void UnsplitOutlineList(TBLOB *blob) const;
103 // Removes the split that was put between these two points.
104 void UnsplitOutlines() const;
105
106 EDGEPT *point1;
107 EDGEPT *point2;
108 };
109
110 /*----------------------------------------------------------------------
111 V a r i a b l e s
112 ----------------------------------------------------------------------*/
113
114 extern BOOL_VAR_H(wordrec_display_splits);
115
116 /*----------------------------------------------------------------------
117 F u n c t i o n s
118 ----------------------------------------------------------------------*/
119 EDGEPT *make_edgept(TDimension x, TDimension y, EDGEPT *next, EDGEPT *prev);
120
121 void remove_edgept(EDGEPT *point);
122
123 } // namespace tesseract
124
125 #endif