Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/seam.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 * | |
| 3 * File: seam.cpp (Formerly seam.c) | |
| 4 * Author: Mark Seaman, OCR Technology | |
| 5 * | |
| 6 * (c) Copyright 1987, Hewlett-Packard Company. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 *****************************************************************************/ | |
| 18 /*---------------------------------------------------------------------- | |
| 19 I n c l u d e s | |
| 20 ----------------------------------------------------------------------*/ | |
| 21 #include "seam.h" | |
| 22 | |
| 23 #include "blobs.h" | |
| 24 #include "tprintf.h" | |
| 25 | |
| 26 namespace tesseract { | |
| 27 | |
| 28 /*---------------------------------------------------------------------- | |
| 29 Public Function Code | |
| 30 ----------------------------------------------------------------------*/ | |
| 31 | |
| 32 // Returns the bounding box of all the points in the seam. | |
| 33 TBOX SEAM::bounding_box() const { | |
| 34 TBOX box(location_.x, location_.y, location_.x, location_.y); | |
| 35 for (int s = 0; s < num_splits_; ++s) { | |
| 36 box += splits_[s].bounding_box(); | |
| 37 } | |
| 38 return box; | |
| 39 } | |
| 40 | |
| 41 // Returns true if the splits in *this SEAM appear OK in the sense that they | |
| 42 // do not cross any outlines and do not chop off any ridiculously small | |
| 43 // pieces. | |
| 44 bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const { | |
| 45 // TODO(rays) Try testing all the splits. Duplicating original code for now, | |
| 46 // which tested only the first. | |
| 47 return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area); | |
| 48 } | |
| 49 | |
| 50 // Computes the widthp_/widthn_ range for all existing SEAMs and for *this | |
| 51 // seam, which is about to be inserted at insert_index. Returns false if | |
| 52 // any of the computations fails, as this indicates an invalid chop. | |
| 53 // widthn_/widthp_ are only changed if modify is true. | |
| 54 bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams, | |
| 55 const std::vector<TBLOB *> &blobs, int insert_index, bool modify) { | |
| 56 for (int s = 0; s < insert_index; ++s) { | |
| 57 if (!seams[s]->FindBlobWidth(blobs, s, modify)) { | |
| 58 return false; | |
| 59 } | |
| 60 } | |
| 61 if (!FindBlobWidth(blobs, insert_index, modify)) { | |
| 62 return false; | |
| 63 } | |
| 64 for (unsigned s = insert_index; s < seams.size(); ++s) { | |
| 65 if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) { | |
| 66 return false; | |
| 67 } | |
| 68 } | |
| 69 return true; | |
| 70 } | |
| 71 | |
| 72 // Computes the widthp_/widthn_ range. Returns false if not all the splits | |
| 73 // are accounted for. widthn_/widthp_ are only changed if modify is true. | |
| 74 bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify) { | |
| 75 int num_found = 0; | |
| 76 if (modify) { | |
| 77 widthp_ = 0; | |
| 78 widthn_ = 0; | |
| 79 } | |
| 80 for (int s = 0; s < num_splits_; ++s) { | |
| 81 const SPLIT &split = splits_[s]; | |
| 82 bool found_split = split.ContainedByBlob(*blobs[index]); | |
| 83 // Look right. | |
| 84 for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) { | |
| 85 found_split = split.ContainedByBlob(*blobs[b]); | |
| 86 if (found_split && b - index > widthp_ && modify) { | |
| 87 widthp_ = b - index; | |
| 88 } | |
| 89 } | |
| 90 // Look left. | |
| 91 for (int b = index - 1; !found_split && b >= 0; --b) { | |
| 92 found_split = split.ContainedByBlob(*blobs[b]); | |
| 93 if (found_split && index - b > widthn_ && modify) { | |
| 94 widthn_ = index - b; | |
| 95 } | |
| 96 } | |
| 97 if (found_split) { | |
| 98 ++num_found; | |
| 99 } | |
| 100 } | |
| 101 return num_found == num_splits_; | |
| 102 } | |
| 103 | |
| 104 // Splits this blob into two blobs by applying the splits included in | |
| 105 // *this SEAM | |
| 106 void SEAM::ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const { | |
| 107 for (int s = 0; s < num_splits_; ++s) { | |
| 108 splits_[s].SplitOutlineList(blob->outlines); | |
| 109 } | |
| 110 blob->ComputeBoundingBoxes(); | |
| 111 | |
| 112 divide_blobs(blob, other_blob, italic_blob, location_); | |
| 113 | |
| 114 blob->EliminateDuplicateOutlines(); | |
| 115 other_blob->EliminateDuplicateOutlines(); | |
| 116 | |
| 117 blob->CorrectBlobOrder(other_blob); | |
| 118 } | |
| 119 | |
| 120 // Undoes ApplySeam by removing the seam between these two blobs. | |
| 121 // Produces one blob as a result, and deletes other_blob. | |
| 122 void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const { | |
| 123 if (blob->outlines == nullptr) { | |
| 124 blob->outlines = other_blob->outlines; | |
| 125 other_blob->outlines = nullptr; | |
| 126 } | |
| 127 | |
| 128 TESSLINE *outline = blob->outlines; | |
| 129 while (outline->next) { | |
| 130 outline = outline->next; | |
| 131 } | |
| 132 outline->next = other_blob->outlines; | |
| 133 other_blob->outlines = nullptr; | |
| 134 delete other_blob; | |
| 135 | |
| 136 for (int s = 0; s < num_splits_; ++s) { | |
| 137 splits_[s].UnsplitOutlineList(blob); | |
| 138 } | |
| 139 blob->ComputeBoundingBoxes(); | |
| 140 blob->EliminateDuplicateOutlines(); | |
| 141 } | |
| 142 | |
| 143 // Prints everything in *this SEAM. | |
| 144 void SEAM::Print(const char *label) const { | |
| 145 tprintf("%s", label); | |
| 146 tprintf(" %6.2f @ (%d,%d), p=%u, n=%u ", priority_, location_.x, location_.y, widthp_, widthn_); | |
| 147 for (int s = 0; s < num_splits_; ++s) { | |
| 148 splits_[s].Print(); | |
| 149 if (s + 1 < num_splits_) { | |
| 150 tprintf(", "); | |
| 151 } | |
| 152 } | |
| 153 tprintf("\n"); | |
| 154 } | |
| 155 | |
| 156 // Prints a collection of SEAMs. | |
| 157 /* static */ | |
| 158 void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) { | |
| 159 if (!seams.empty()) { | |
| 160 tprintf("%s\n", label); | |
| 161 for (unsigned x = 0; x < seams.size(); ++x) { | |
| 162 tprintf("%2u: ", x); | |
| 163 seams[x]->Print(""); | |
| 164 } | |
| 165 tprintf("\n"); | |
| 166 } | |
| 167 } | |
| 168 | |
| 169 #ifndef GRAPHICS_DISABLED | |
| 170 // Draws the seam in the given window. | |
| 171 void SEAM::Mark(ScrollView *window) const { | |
| 172 for (int s = 0; s < num_splits_; ++s) { | |
| 173 splits_[s].Mark(window); | |
| 174 } | |
| 175 } | |
| 176 #endif | |
| 177 | |
| 178 // Break up the blobs in this chain so that they are all independent. | |
| 179 // This operation should undo the affect of join_pieces. | |
| 180 /* static */ | |
| 181 void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs, | |
| 182 int first, int last) { | |
| 183 for (int x = first; x < last; ++x) { | |
| 184 seams[x]->Reveal(); | |
| 185 } | |
| 186 | |
| 187 TESSLINE *outline = blobs[first]->outlines; | |
| 188 int next_blob = first + 1; | |
| 189 | |
| 190 while (outline != nullptr && next_blob <= last) { | |
| 191 if (outline->next == blobs[next_blob]->outlines) { | |
| 192 outline->next = nullptr; | |
| 193 outline = blobs[next_blob]->outlines; | |
| 194 ++next_blob; | |
| 195 } else { | |
| 196 outline = outline->next; | |
| 197 } | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 // Join a group of base level pieces into a single blob that can then | |
| 202 // be classified. | |
| 203 /* static */ | |
| 204 void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs, | |
| 205 int first, int last) { | |
| 206 TESSLINE *outline = blobs[first]->outlines; | |
| 207 if (!outline) { | |
| 208 return; | |
| 209 } | |
| 210 | |
| 211 for (int x = first; x < last; ++x) { | |
| 212 SEAM *seam = seams[x]; | |
| 213 if (x - seam->widthn_ >= first && x + seam->widthp_ < last) { | |
| 214 seam->Hide(); | |
| 215 } | |
| 216 while (outline->next) { | |
| 217 outline = outline->next; | |
| 218 } | |
| 219 outline->next = blobs[x + 1]->outlines; | |
| 220 } | |
| 221 } | |
| 222 | |
| 223 // Hides the seam so the outlines appear not to be cut by it. | |
| 224 void SEAM::Hide() const { | |
| 225 for (int s = 0; s < num_splits_; ++s) { | |
| 226 splits_[s].Hide(); | |
| 227 } | |
| 228 } | |
| 229 | |
| 230 // Undoes hide, so the outlines are cut by the seam. | |
| 231 void SEAM::Reveal() const { | |
| 232 for (int s = 0; s < num_splits_; ++s) { | |
| 233 splits_[s].Reveal(); | |
| 234 } | |
| 235 } | |
| 236 | |
| 237 // Computes and returns, but does not set, the full priority of *this SEAM. | |
| 238 float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, | |
| 239 double center_knob, double width_change_knob) const { | |
| 240 if (num_splits_ == 0) { | |
| 241 return 0.0f; | |
| 242 } | |
| 243 for (int s = 1; s < num_splits_; ++s) { | |
| 244 splits_[s].SplitOutline(); | |
| 245 } | |
| 246 float full_priority = | |
| 247 priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, center_knob, | |
| 248 width_change_knob); | |
| 249 for (int s = num_splits_ - 1; s >= 1; --s) { | |
| 250 splits_[s].UnsplitOutlines(); | |
| 251 } | |
| 252 return full_priority; | |
| 253 } | |
| 254 | |
| 255 /** | |
| 256 * @name start_seam_list | |
| 257 * | |
| 258 * Initialize a list of seams that match the original number of blobs | |
| 259 * present in the starting segmentation. Each of the seams created | |
| 260 * by this routine have location information only. | |
| 261 */ | |
| 262 void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array) { | |
| 263 seam_array->clear(); | |
| 264 TPOINT location; | |
| 265 | |
| 266 for (unsigned b = 1; b < word->NumBlobs(); ++b) { | |
| 267 TBOX bbox = word->blobs[b - 1]->bounding_box(); | |
| 268 TBOX nbox = word->blobs[b]->bounding_box(); | |
| 269 location.x = (bbox.right() + nbox.left()) / 2; | |
| 270 location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4; | |
| 271 seam_array->push_back(new SEAM(0.0f, location)); | |
| 272 } | |
| 273 } | |
| 274 | |
| 275 } // namespace tesseract |
