comparison mupdf-source/thirdparty/tesseract/src/ccstruct/seam.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /******************************************************************************
2 *
3 * File: seam.cpp (Formerly seam.c)
4 * Author: Mark Seaman, OCR Technology
5 *
6 * (c) Copyright 1987, Hewlett-Packard Company.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 *****************************************************************************/
18 /*----------------------------------------------------------------------
19 I n c l u d e s
20 ----------------------------------------------------------------------*/
21 #include "seam.h"
22
23 #include "blobs.h"
24 #include "tprintf.h"
25
26 namespace tesseract {
27
28 /*----------------------------------------------------------------------
29 Public Function Code
30 ----------------------------------------------------------------------*/
31
32 // Returns the bounding box of all the points in the seam.
33 TBOX SEAM::bounding_box() const {
34 TBOX box(location_.x, location_.y, location_.x, location_.y);
35 for (int s = 0; s < num_splits_; ++s) {
36 box += splits_[s].bounding_box();
37 }
38 return box;
39 }
40
41 // Returns true if the splits in *this SEAM appear OK in the sense that they
42 // do not cross any outlines and do not chop off any ridiculously small
43 // pieces.
44 bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
45 // TODO(rays) Try testing all the splits. Duplicating original code for now,
46 // which tested only the first.
47 return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
48 }
49
50 // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
51 // seam, which is about to be inserted at insert_index. Returns false if
52 // any of the computations fails, as this indicates an invalid chop.
53 // widthn_/widthp_ are only changed if modify is true.
54 bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams,
55 const std::vector<TBLOB *> &blobs, int insert_index, bool modify) {
56 for (int s = 0; s < insert_index; ++s) {
57 if (!seams[s]->FindBlobWidth(blobs, s, modify)) {
58 return false;
59 }
60 }
61 if (!FindBlobWidth(blobs, insert_index, modify)) {
62 return false;
63 }
64 for (unsigned s = insert_index; s < seams.size(); ++s) {
65 if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) {
66 return false;
67 }
68 }
69 return true;
70 }
71
72 // Computes the widthp_/widthn_ range. Returns false if not all the splits
73 // are accounted for. widthn_/widthp_ are only changed if modify is true.
74 bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify) {
75 int num_found = 0;
76 if (modify) {
77 widthp_ = 0;
78 widthn_ = 0;
79 }
80 for (int s = 0; s < num_splits_; ++s) {
81 const SPLIT &split = splits_[s];
82 bool found_split = split.ContainedByBlob(*blobs[index]);
83 // Look right.
84 for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) {
85 found_split = split.ContainedByBlob(*blobs[b]);
86 if (found_split && b - index > widthp_ && modify) {
87 widthp_ = b - index;
88 }
89 }
90 // Look left.
91 for (int b = index - 1; !found_split && b >= 0; --b) {
92 found_split = split.ContainedByBlob(*blobs[b]);
93 if (found_split && index - b > widthn_ && modify) {
94 widthn_ = index - b;
95 }
96 }
97 if (found_split) {
98 ++num_found;
99 }
100 }
101 return num_found == num_splits_;
102 }
103
104 // Splits this blob into two blobs by applying the splits included in
105 // *this SEAM
106 void SEAM::ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const {
107 for (int s = 0; s < num_splits_; ++s) {
108 splits_[s].SplitOutlineList(blob->outlines);
109 }
110 blob->ComputeBoundingBoxes();
111
112 divide_blobs(blob, other_blob, italic_blob, location_);
113
114 blob->EliminateDuplicateOutlines();
115 other_blob->EliminateDuplicateOutlines();
116
117 blob->CorrectBlobOrder(other_blob);
118 }
119
120 // Undoes ApplySeam by removing the seam between these two blobs.
121 // Produces one blob as a result, and deletes other_blob.
122 void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const {
123 if (blob->outlines == nullptr) {
124 blob->outlines = other_blob->outlines;
125 other_blob->outlines = nullptr;
126 }
127
128 TESSLINE *outline = blob->outlines;
129 while (outline->next) {
130 outline = outline->next;
131 }
132 outline->next = other_blob->outlines;
133 other_blob->outlines = nullptr;
134 delete other_blob;
135
136 for (int s = 0; s < num_splits_; ++s) {
137 splits_[s].UnsplitOutlineList(blob);
138 }
139 blob->ComputeBoundingBoxes();
140 blob->EliminateDuplicateOutlines();
141 }
142
143 // Prints everything in *this SEAM.
144 void SEAM::Print(const char *label) const {
145 tprintf("%s", label);
146 tprintf(" %6.2f @ (%d,%d), p=%u, n=%u ", priority_, location_.x, location_.y, widthp_, widthn_);
147 for (int s = 0; s < num_splits_; ++s) {
148 splits_[s].Print();
149 if (s + 1 < num_splits_) {
150 tprintf(", ");
151 }
152 }
153 tprintf("\n");
154 }
155
156 // Prints a collection of SEAMs.
157 /* static */
158 void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
159 if (!seams.empty()) {
160 tprintf("%s\n", label);
161 for (unsigned x = 0; x < seams.size(); ++x) {
162 tprintf("%2u: ", x);
163 seams[x]->Print("");
164 }
165 tprintf("\n");
166 }
167 }
168
169 #ifndef GRAPHICS_DISABLED
170 // Draws the seam in the given window.
171 void SEAM::Mark(ScrollView *window) const {
172 for (int s = 0; s < num_splits_; ++s) {
173 splits_[s].Mark(window);
174 }
175 }
176 #endif
177
178 // Break up the blobs in this chain so that they are all independent.
179 // This operation should undo the affect of join_pieces.
180 /* static */
181 void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
182 int first, int last) {
183 for (int x = first; x < last; ++x) {
184 seams[x]->Reveal();
185 }
186
187 TESSLINE *outline = blobs[first]->outlines;
188 int next_blob = first + 1;
189
190 while (outline != nullptr && next_blob <= last) {
191 if (outline->next == blobs[next_blob]->outlines) {
192 outline->next = nullptr;
193 outline = blobs[next_blob]->outlines;
194 ++next_blob;
195 } else {
196 outline = outline->next;
197 }
198 }
199 }
200
201 // Join a group of base level pieces into a single blob that can then
202 // be classified.
203 /* static */
204 void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
205 int first, int last) {
206 TESSLINE *outline = blobs[first]->outlines;
207 if (!outline) {
208 return;
209 }
210
211 for (int x = first; x < last; ++x) {
212 SEAM *seam = seams[x];
213 if (x - seam->widthn_ >= first && x + seam->widthp_ < last) {
214 seam->Hide();
215 }
216 while (outline->next) {
217 outline = outline->next;
218 }
219 outline->next = blobs[x + 1]->outlines;
220 }
221 }
222
223 // Hides the seam so the outlines appear not to be cut by it.
224 void SEAM::Hide() const {
225 for (int s = 0; s < num_splits_; ++s) {
226 splits_[s].Hide();
227 }
228 }
229
230 // Undoes hide, so the outlines are cut by the seam.
231 void SEAM::Reveal() const {
232 for (int s = 0; s < num_splits_; ++s) {
233 splits_[s].Reveal();
234 }
235 }
236
237 // Computes and returns, but does not set, the full priority of *this SEAM.
238 float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
239 double center_knob, double width_change_knob) const {
240 if (num_splits_ == 0) {
241 return 0.0f;
242 }
243 for (int s = 1; s < num_splits_; ++s) {
244 splits_[s].SplitOutline();
245 }
246 float full_priority =
247 priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, center_knob,
248 width_change_knob);
249 for (int s = num_splits_ - 1; s >= 1; --s) {
250 splits_[s].UnsplitOutlines();
251 }
252 return full_priority;
253 }
254
255 /**
256 * @name start_seam_list
257 *
258 * Initialize a list of seams that match the original number of blobs
259 * present in the starting segmentation. Each of the seams created
260 * by this routine have location information only.
261 */
262 void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array) {
263 seam_array->clear();
264 TPOINT location;
265
266 for (unsigned b = 1; b < word->NumBlobs(); ++b) {
267 TBOX bbox = word->blobs[b - 1]->bounding_box();
268 TBOX nbox = word->blobs[b]->bounding_box();
269 location.x = (bbox.right() + nbox.left()) / 2;
270 location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
271 seam_array->push_back(new SEAM(0.0f, location));
272 }
273 }
274
275 } // namespace tesseract