Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccstruct/blobs.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /****************************************************************************** | |
| 2 * | |
| 3 * File: blobs.cpp (Formerly blobs.c) | |
| 4 * Description: Blob definition | |
| 5 * Author: Mark Seaman, OCR Technology | |
| 6 * | |
| 7 * (c) Copyright 1989, Hewlett-Packard Company. | |
| 8 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 ** you may not use this file except in compliance with the License. | |
| 10 ** You may obtain a copy of the License at | |
| 11 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 ** Unless required by applicable law or agreed to in writing, software | |
| 13 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 ** See the License for the specific language governing permissions and | |
| 16 ** limitations under the License. | |
| 17 * | |
| 18 *****************************************************************************/ | |
| 19 | |
| 20 /*---------------------------------------------------------------------- | |
| 21 I n c l u d e s | |
| 22 ----------------------------------------------------------------------*/ | |
| 23 // Include automatically generated configuration file if running autoconf. | |
| 24 #ifdef HAVE_CONFIG_H | |
| 25 # include "config_auto.h" | |
| 26 #endif | |
| 27 | |
| 28 #include "blobs.h" | |
| 29 | |
| 30 #include "ccstruct.h" | |
| 31 #include "clst.h" | |
| 32 #include "linlsq.h" | |
| 33 #include "normalis.h" | |
| 34 #include "ocrblock.h" | |
| 35 #include "ocrrow.h" | |
| 36 #include "points.h" | |
| 37 #include "polyaprx.h" | |
| 38 #include "werd.h" | |
| 39 | |
| 40 #include "helpers.h" | |
| 41 | |
| 42 #include <algorithm> | |
| 43 | |
| 44 namespace tesseract { | |
| 45 | |
| 46 // A Vector representing the "vertical" direction when measuring the | |
| 47 // divisiblity of blobs into multiple blobs just by separating outlines. | |
| 48 // See divisible_blob below for the use. | |
| 49 const TPOINT kDivisibleVerticalUpright(0, 1); | |
| 50 // A vector representing the "vertical" direction for italic text for use | |
| 51 // when separating outlines. Using it actually deteriorates final accuracy, | |
| 52 // so it is only used for ApplyBoxes chopping to get a better segmentation. | |
| 53 const TPOINT kDivisibleVerticalItalic(1, 5); | |
| 54 | |
| 55 /*---------------------------------------------------------------------- | |
| 56 F u n c t i o n s | |
| 57 ----------------------------------------------------------------------*/ | |
| 58 | |
| 59 // Returns true when the two line segments cross each other. | |
| 60 // (Moved from outlines.cpp). | |
| 61 // Finds where the projected lines would cross and then checks to see if the | |
| 62 // point of intersection lies on both of the line segments. If it does | |
| 63 // then these two segments cross. | |
| 64 /* static */ | |
| 65 bool TPOINT::IsCrossed(const TPOINT &a0, const TPOINT &a1, const TPOINT &b0, const TPOINT &b1) { | |
| 66 TPOINT b0a1, b0a0, a1b1, b0b1, a1a0; | |
| 67 | |
| 68 b0a1.x = a1.x - b0.x; | |
| 69 b0a0.x = a0.x - b0.x; | |
| 70 a1b1.x = b1.x - a1.x; | |
| 71 b0b1.x = b1.x - b0.x; | |
| 72 a1a0.x = a0.x - a1.x; | |
| 73 b0a1.y = a1.y - b0.y; | |
| 74 b0a0.y = a0.y - b0.y; | |
| 75 a1b1.y = b1.y - a1.y; | |
| 76 b0b1.y = b1.y - b0.y; | |
| 77 a1a0.y = a0.y - a1.y; | |
| 78 | |
| 79 int b0a1xb0b1 = b0a1.cross(b0b1); | |
| 80 int b0b1xb0a0 = b0b1.cross(b0a0); | |
| 81 int a1b1xa1a0 = a1b1.cross(a1a0); | |
| 82 // For clarity, we want a1a0.cross(a1b0) here but we have b0a1 instead of a1b0 | |
| 83 // so use -a1b0.cross(b0a1) instead, which is the same. | |
| 84 int a1a0xa1b0 = -a1a0.cross(b0a1); | |
| 85 | |
| 86 return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) || (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) && | |
| 87 ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0)); | |
| 88 } | |
| 89 | |
| 90 // Consume the circular list of EDGEPTs to make a TESSLINE. | |
| 91 TESSLINE *TESSLINE::BuildFromOutlineList(EDGEPT *outline) { | |
| 92 auto *result = new TESSLINE; | |
| 93 result->loop = outline; | |
| 94 if (outline->src_outline != nullptr) { | |
| 95 // ASSUMPTION: This function is only ever called from ApproximateOutline | |
| 96 // and therefore either all points have a src_outline or all do not. | |
| 97 // Just as SetupFromPos sets the vectors from the vertices, setup the | |
| 98 // step_count members to indicate the (positive) number of original | |
| 99 // C_OUTLINE steps to the next vertex. | |
| 100 EDGEPT *pt = outline; | |
| 101 do { | |
| 102 pt->step_count = pt->next->start_step - pt->start_step; | |
| 103 if (pt->step_count < 0) { | |
| 104 pt->step_count += pt->src_outline->pathlength(); | |
| 105 } | |
| 106 pt = pt->next; | |
| 107 } while (pt != outline); | |
| 108 } | |
| 109 result->SetupFromPos(); | |
| 110 return result; | |
| 111 } | |
| 112 | |
| 113 // Copies the data and the outline, but leaves next untouched. | |
| 114 void TESSLINE::CopyFrom(const TESSLINE &src) { | |
| 115 Clear(); | |
| 116 topleft = src.topleft; | |
| 117 botright = src.botright; | |
| 118 start = src.start; | |
| 119 is_hole = src.is_hole; | |
| 120 if (src.loop != nullptr) { | |
| 121 EDGEPT *prevpt = nullptr; | |
| 122 EDGEPT *newpt = nullptr; | |
| 123 EDGEPT *srcpt = src.loop; | |
| 124 do { | |
| 125 newpt = new EDGEPT(*srcpt); | |
| 126 if (prevpt == nullptr) { | |
| 127 loop = newpt; | |
| 128 } else { | |
| 129 newpt->prev = prevpt; | |
| 130 prevpt->next = newpt; | |
| 131 } | |
| 132 prevpt = newpt; | |
| 133 srcpt = srcpt->next; | |
| 134 } while (srcpt != src.loop); | |
| 135 loop->prev = newpt; | |
| 136 newpt->next = loop; | |
| 137 } | |
| 138 } | |
| 139 | |
| 140 // Deletes owned data. | |
| 141 void TESSLINE::Clear() { | |
| 142 if (loop == nullptr) { | |
| 143 return; | |
| 144 } | |
| 145 | |
| 146 EDGEPT *this_edge = loop; | |
| 147 do { | |
| 148 EDGEPT *next_edge = this_edge->next; | |
| 149 delete this_edge; | |
| 150 this_edge = next_edge; | |
| 151 } while (this_edge != loop); | |
| 152 loop = nullptr; | |
| 153 } | |
| 154 | |
| 155 // Normalize in-place using the DENORM. | |
| 156 void TESSLINE::Normalize(const DENORM &denorm) { | |
| 157 EDGEPT *pt = loop; | |
| 158 do { | |
| 159 denorm.LocalNormTransform(pt->pos, &pt->pos); | |
| 160 pt = pt->next; | |
| 161 } while (pt != loop); | |
| 162 SetupFromPos(); | |
| 163 } | |
| 164 | |
| 165 // Rotates by the given rotation in place. | |
| 166 void TESSLINE::Rotate(const FCOORD rot) { | |
| 167 EDGEPT *pt = loop; | |
| 168 do { | |
| 169 int tmp = static_cast<int>(floor(pt->pos.x * rot.x() - pt->pos.y * rot.y() + 0.5)); | |
| 170 pt->pos.y = static_cast<int>(floor(pt->pos.y * rot.x() + pt->pos.x * rot.y() + 0.5)); | |
| 171 pt->pos.x = tmp; | |
| 172 pt = pt->next; | |
| 173 } while (pt != loop); | |
| 174 SetupFromPos(); | |
| 175 } | |
| 176 | |
| 177 // Moves by the given vec in place. | |
| 178 void TESSLINE::Move(const ICOORD vec) { | |
| 179 EDGEPT *pt = loop; | |
| 180 do { | |
| 181 pt->pos.x += vec.x(); | |
| 182 pt->pos.y += vec.y(); | |
| 183 pt = pt->next; | |
| 184 } while (pt != loop); | |
| 185 SetupFromPos(); | |
| 186 } | |
| 187 | |
| 188 // Scales by the given factor in place. | |
| 189 void TESSLINE::Scale(float factor) { | |
| 190 EDGEPT *pt = loop; | |
| 191 do { | |
| 192 pt->pos.x = static_cast<int>(floor(pt->pos.x * factor + 0.5)); | |
| 193 pt->pos.y = static_cast<int>(floor(pt->pos.y * factor + 0.5)); | |
| 194 pt = pt->next; | |
| 195 } while (pt != loop); | |
| 196 SetupFromPos(); | |
| 197 } | |
| 198 | |
| 199 // Sets up the start and vec members of the loop from the pos members. | |
| 200 void TESSLINE::SetupFromPos() { | |
| 201 EDGEPT *pt = loop; | |
| 202 do { | |
| 203 pt->vec.x = pt->next->pos.x - pt->pos.x; | |
| 204 pt->vec.y = pt->next->pos.y - pt->pos.y; | |
| 205 pt = pt->next; | |
| 206 } while (pt != loop); | |
| 207 start = pt->pos; | |
| 208 ComputeBoundingBox(); | |
| 209 } | |
| 210 | |
| 211 // Recomputes the bounding box from the points in the loop. | |
| 212 void TESSLINE::ComputeBoundingBox() { | |
| 213 int minx = INT32_MAX; | |
| 214 int miny = INT32_MAX; | |
| 215 int maxx = -INT32_MAX; | |
| 216 int maxy = -INT32_MAX; | |
| 217 | |
| 218 // Find boundaries. | |
| 219 start = loop->pos; | |
| 220 EDGEPT *this_edge = loop; | |
| 221 do { | |
| 222 if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { | |
| 223 if (this_edge->pos.x < minx) { | |
| 224 minx = this_edge->pos.x; | |
| 225 } | |
| 226 if (this_edge->pos.y < miny) { | |
| 227 miny = this_edge->pos.y; | |
| 228 } | |
| 229 if (this_edge->pos.x > maxx) { | |
| 230 maxx = this_edge->pos.x; | |
| 231 } | |
| 232 if (this_edge->pos.y > maxy) { | |
| 233 maxy = this_edge->pos.y; | |
| 234 } | |
| 235 } | |
| 236 this_edge = this_edge->next; | |
| 237 } while (this_edge != loop); | |
| 238 // Reset bounds. | |
| 239 topleft.x = minx; | |
| 240 topleft.y = maxy; | |
| 241 botright.x = maxx; | |
| 242 botright.y = miny; | |
| 243 } | |
| 244 | |
| 245 // Computes the min and max cross product of the outline points with the | |
| 246 // given vec and returns the results in min_xp and max_xp. Geometrically | |
| 247 // this is the left and right edge of the outline perpendicular to the | |
| 248 // given direction, but to get the distance units correct, you would | |
| 249 // have to divide by the modulus of vec. | |
| 250 void TESSLINE::MinMaxCrossProduct(const TPOINT vec, int *min_xp, int *max_xp) const { | |
| 251 *min_xp = INT32_MAX; | |
| 252 *max_xp = INT32_MIN; | |
| 253 EDGEPT *this_edge = loop; | |
| 254 do { | |
| 255 if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) { | |
| 256 int product = this_edge->pos.cross(vec); | |
| 257 UpdateRange(product, min_xp, max_xp); | |
| 258 } | |
| 259 this_edge = this_edge->next; | |
| 260 } while (this_edge != loop); | |
| 261 } | |
| 262 | |
| 263 TBOX TESSLINE::bounding_box() const { | |
| 264 return TBOX(topleft.x, botright.y, botright.x, topleft.y); | |
| 265 } | |
| 266 | |
| 267 #ifndef GRAPHICS_DISABLED | |
| 268 void TESSLINE::plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color) { | |
| 269 if (is_hole) { | |
| 270 window->Pen(child_color); | |
| 271 } else { | |
| 272 window->Pen(color); | |
| 273 } | |
| 274 window->SetCursor(start.x, start.y); | |
| 275 EDGEPT *pt = loop; | |
| 276 do { | |
| 277 bool prev_hidden = pt->IsHidden(); | |
| 278 pt = pt->next; | |
| 279 if (prev_hidden) { | |
| 280 window->SetCursor(pt->pos.x, pt->pos.y); | |
| 281 } else { | |
| 282 window->DrawTo(pt->pos.x, pt->pos.y); | |
| 283 } | |
| 284 } while (pt != loop); | |
| 285 } | |
| 286 #endif // !GRAPHICS_DISABLED | |
| 287 | |
| 288 // Returns the first non-hidden EDGEPT that has a different src_outline to | |
| 289 // its predecessor, or, if all the same, the lowest indexed point. | |
| 290 EDGEPT *TESSLINE::FindBestStartPt() const { | |
| 291 EDGEPT *best_start = loop; | |
| 292 int best_step = loop->start_step; | |
| 293 // Iterate the polygon. | |
| 294 EDGEPT *pt = loop; | |
| 295 do { | |
| 296 if (pt->IsHidden()) { | |
| 297 continue; | |
| 298 } | |
| 299 if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline) { | |
| 300 return pt; // Qualifies as the best. | |
| 301 } | |
| 302 if (pt->start_step < best_step) { | |
| 303 best_step = pt->start_step; | |
| 304 best_start = pt; | |
| 305 } | |
| 306 } while ((pt = pt->next) != loop); | |
| 307 return best_start; | |
| 308 } | |
| 309 | |
| 310 // Iterate the given list of outlines, converting to TESSLINE by polygonal | |
| 311 // approximation and recursively any children, returning the current tail | |
| 312 // of the resulting list of TESSLINEs. | |
| 313 static TESSLINE **ApproximateOutlineList(bool allow_detailed_fx, C_OUTLINE_LIST *outlines, | |
| 314 bool children, TESSLINE **tail) { | |
| 315 C_OUTLINE_IT ol_it(outlines); | |
| 316 for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { | |
| 317 C_OUTLINE *outline = ol_it.data(); | |
| 318 if (outline->pathlength() > 0) { | |
| 319 TESSLINE *tessline = ApproximateOutline(allow_detailed_fx, outline); | |
| 320 tessline->is_hole = children; | |
| 321 *tail = tessline; | |
| 322 tail = &tessline->next; | |
| 323 } | |
| 324 if (!outline->child()->empty()) { | |
| 325 tail = ApproximateOutlineList(allow_detailed_fx, outline->child(), true, tail); | |
| 326 } | |
| 327 } | |
| 328 return tail; | |
| 329 } | |
| 330 | |
| 331 // Factory to build a TBLOB from a C_BLOB with polygonal approximation along | |
| 332 // the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB | |
| 333 // contain pointers to the input C_OUTLINEs that enable higher-resolution | |
| 334 // feature extraction that does not use the polygonal approximation. | |
| 335 TBLOB *TBLOB::PolygonalCopy(bool allow_detailed_fx, C_BLOB *src) { | |
| 336 auto *tblob = new TBLOB; | |
| 337 ApproximateOutlineList(allow_detailed_fx, src->out_list(), false, &tblob->outlines); | |
| 338 return tblob; | |
| 339 } | |
| 340 | |
| 341 // Factory builds a blob with no outlines, but copies the other member data. | |
| 342 TBLOB *TBLOB::ShallowCopy(const TBLOB &src) { | |
| 343 auto *blob = new TBLOB; | |
| 344 blob->denorm_ = src.denorm_; | |
| 345 return blob; | |
| 346 } | |
| 347 | |
| 348 // Normalizes the blob for classification only if needed. | |
| 349 // (Normally this means a non-zero classify rotation.) | |
| 350 // If no Normalization is needed, then nullptr is returned, and the input blob | |
| 351 // can be used directly. Otherwise a new TBLOB is returned which must be | |
| 352 // deleted after use. | |
| 353 TBLOB *TBLOB::ClassifyNormalizeIfNeeded() const { | |
| 354 TBLOB *rotated_blob = nullptr; | |
| 355 // If necessary, copy the blob and rotate it. The rotation is always | |
| 356 // +/- 90 degrees, as 180 was already taken care of. | |
| 357 if (denorm_.block() != nullptr && denorm_.block()->classify_rotation().y() != 0.0) { | |
| 358 TBOX box = bounding_box(); | |
| 359 int x_middle = (box.left() + box.right()) / 2; | |
| 360 int y_middle = (box.top() + box.bottom()) / 2; | |
| 361 rotated_blob = new TBLOB(*this); | |
| 362 const FCOORD &rotation = denorm_.block()->classify_rotation(); | |
| 363 // Move the rotated blob back to the same y-position so that we | |
| 364 // can still distinguish similar glyphs with different y-position. | |
| 365 float target_y = | |
| 366 kBlnBaselineOffset + (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle); | |
| 367 rotated_blob->Normalize(nullptr, &rotation, &denorm_, x_middle, y_middle, 1.0f, 1.0f, 0.0f, | |
| 368 target_y, denorm_.inverse(), denorm_.pix()); | |
| 369 } | |
| 370 return rotated_blob; | |
| 371 } | |
| 372 | |
| 373 // Copies the data and the outline, but leaves next untouched. | |
| 374 void TBLOB::CopyFrom(const TBLOB &src) { | |
| 375 Clear(); | |
| 376 TESSLINE *prev_outline = nullptr; | |
| 377 for (TESSLINE *srcline = src.outlines; srcline != nullptr; srcline = srcline->next) { | |
| 378 auto *new_outline = new TESSLINE(*srcline); | |
| 379 if (outlines == nullptr) { | |
| 380 outlines = new_outline; | |
| 381 } else { | |
| 382 prev_outline->next = new_outline; | |
| 383 } | |
| 384 prev_outline = new_outline; | |
| 385 } | |
| 386 denorm_ = src.denorm_; | |
| 387 } | |
| 388 | |
| 389 // Deletes owned data. | |
| 390 void TBLOB::Clear() { | |
| 391 for (TESSLINE *next_outline = nullptr; outlines != nullptr; outlines = next_outline) { | |
| 392 next_outline = outlines->next; | |
| 393 delete outlines; | |
| 394 } | |
| 395 } | |
| 396 | |
| 397 // Sets up the built-in DENORM and normalizes the blob in-place. | |
| 398 // For parameters see DENORM::SetupNormalization, plus the inverse flag for | |
| 399 // this blob and the Pix for the full image. | |
| 400 void TBLOB::Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, | |
| 401 float x_origin, float y_origin, float x_scale, float y_scale, | |
| 402 float final_xshift, float final_yshift, bool inverse, Image pix) { | |
| 403 denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale, | |
| 404 final_xshift, final_yshift); | |
| 405 denorm_.set_inverse(inverse); | |
| 406 denorm_.set_pix(pix); | |
| 407 // TODO(rays) outline->Normalize is more accurate, but breaks tests due | |
| 408 // the changes it makes. Reinstate this code with a retraining. | |
| 409 // The reason this change is troublesome is that it normalizes for the | |
| 410 // baseline value computed independently at each x-coord. If the baseline | |
| 411 // is not horizontal, this introduces shear into the normalized blob, which | |
| 412 // is useful on the rare occasions that the baseline is really curved, but | |
| 413 // the baselines need to be stabilized the rest of the time. | |
| 414 #if 0 | |
| 415 for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) { | |
| 416 outline->Normalize(denorm_); | |
| 417 } | |
| 418 #else | |
| 419 denorm_.LocalNormBlob(this); | |
| 420 #endif | |
| 421 } | |
| 422 | |
| 423 // Rotates by the given rotation in place. | |
| 424 void TBLOB::Rotate(const FCOORD rotation) { | |
| 425 for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) { | |
| 426 outline->Rotate(rotation); | |
| 427 } | |
| 428 } | |
| 429 | |
| 430 // Moves by the given vec in place. | |
| 431 void TBLOB::Move(const ICOORD vec) { | |
| 432 for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) { | |
| 433 outline->Move(vec); | |
| 434 } | |
| 435 } | |
| 436 | |
| 437 // Scales by the given factor in place. | |
| 438 void TBLOB::Scale(float factor) { | |
| 439 for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) { | |
| 440 outline->Scale(factor); | |
| 441 } | |
| 442 } | |
| 443 | |
| 444 // Recomputes the bounding boxes of the outlines. | |
| 445 void TBLOB::ComputeBoundingBoxes() { | |
| 446 for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) { | |
| 447 outline->ComputeBoundingBox(); | |
| 448 } | |
| 449 } | |
| 450 | |
| 451 // Returns the number of outlines. | |
| 452 int TBLOB::NumOutlines() const { | |
| 453 int result = 0; | |
| 454 for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) { | |
| 455 ++result; | |
| 456 } | |
| 457 return result; | |
| 458 } | |
| 459 | |
| 460 /********************************************************************** | |
| 461 * TBLOB::bounding_box() | |
| 462 * | |
| 463 * Compute the bounding_box of a compound blob, defined to be the | |
| 464 * bounding box of the union of all top-level outlines in the blob. | |
| 465 **********************************************************************/ | |
| 466 TBOX TBLOB::bounding_box() const { | |
| 467 if (outlines == nullptr) { | |
| 468 return TBOX(0, 0, 0, 0); | |
| 469 } | |
| 470 TESSLINE *outline = outlines; | |
| 471 TBOX box = outline->bounding_box(); | |
| 472 for (outline = outline->next; outline != nullptr; outline = outline->next) { | |
| 473 box += outline->bounding_box(); | |
| 474 } | |
| 475 return box; | |
| 476 } | |
| 477 | |
| 478 // Finds and deletes any duplicate outlines in this blob, without deleting | |
| 479 // their EDGEPTs. | |
| 480 void TBLOB::EliminateDuplicateOutlines() { | |
| 481 for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) { | |
| 482 TESSLINE *last_outline = outline; | |
| 483 for (TESSLINE *other_outline = outline->next; other_outline != nullptr; | |
| 484 last_outline = other_outline, other_outline = other_outline->next) { | |
| 485 if (outline->SameBox(*other_outline)) { | |
| 486 last_outline->next = other_outline->next; | |
| 487 // This doesn't leak - the outlines share the EDGEPTs. | |
| 488 other_outline->loop = nullptr; | |
| 489 delete other_outline; | |
| 490 other_outline = last_outline; | |
| 491 // If it is part of a cut, then it can't be a hole any more. | |
| 492 outline->is_hole = false; | |
| 493 } | |
| 494 } | |
| 495 } | |
| 496 } | |
| 497 | |
| 498 // Swaps the outlines of *this and next if needed to keep the centers in | |
| 499 // increasing x. | |
| 500 void TBLOB::CorrectBlobOrder(TBLOB *next) { | |
| 501 TBOX box = bounding_box(); | |
| 502 TBOX next_box = next->bounding_box(); | |
| 503 if (box.x_middle() > next_box.x_middle()) { | |
| 504 std::swap(outlines, next->outlines); | |
| 505 } | |
| 506 } | |
| 507 | |
| 508 #ifndef GRAPHICS_DISABLED | |
| 509 void TBLOB::plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color) { | |
| 510 for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) { | |
| 511 outline->plot(window, color, child_color); | |
| 512 } | |
| 513 } | |
| 514 #endif // !GRAPHICS_DISABLED | |
| 515 | |
| 516 // Computes the center of mass and second moments for the old baseline and | |
| 517 // 2nd moment normalizations. Returns the outline length. | |
| 518 // The input denorm should be the normalizations that have been applied from | |
| 519 // the image to the current state of this TBLOB. | |
| 520 int TBLOB::ComputeMoments(FCOORD *center, FCOORD *second_moments) const { | |
| 521 // Compute 1st and 2nd moments of the original outline. | |
| 522 LLSQ accumulator; | |
| 523 TBOX box = bounding_box(); | |
| 524 // Iterate the outlines, accumulating edges relative the box.botleft(). | |
| 525 CollectEdges(box, nullptr, &accumulator, nullptr, nullptr); | |
| 526 *center = accumulator.mean_point() + box.botleft(); | |
| 527 // The 2nd moments are just the standard deviation of the point positions. | |
| 528 double x2nd = sqrt(accumulator.x_variance()); | |
| 529 double y2nd = sqrt(accumulator.y_variance()); | |
| 530 if (x2nd < 1.0) { | |
| 531 x2nd = 1.0; | |
| 532 } | |
| 533 if (y2nd < 1.0) { | |
| 534 y2nd = 1.0; | |
| 535 } | |
| 536 second_moments->set_x(x2nd); | |
| 537 second_moments->set_y(y2nd); | |
| 538 return accumulator.count(); | |
| 539 } | |
| 540 | |
| 541 // Computes the precise bounding box of the coords that are generated by | |
| 542 // GetEdgeCoords. This may be different from the bounding box of the polygon. | |
| 543 void TBLOB::GetPreciseBoundingBox(TBOX *precise_box) const { | |
| 544 TBOX box = bounding_box(); | |
| 545 *precise_box = TBOX(); | |
| 546 CollectEdges(box, precise_box, nullptr, nullptr, nullptr); | |
| 547 precise_box->move(box.botleft()); | |
| 548 } | |
| 549 | |
| 550 // Adds edges to the given vectors. | |
| 551 // For all the edge steps in all the outlines, or polygonal approximation | |
| 552 // where there are no edge steps, collects the steps into x_coords/y_coords. | |
| 553 // x_coords is a collection of the x-coords of vertical edges for each | |
| 554 // y-coord starting at box.bottom(). | |
| 555 // y_coords is a collection of the y-coords of horizontal edges for each | |
| 556 // x-coord starting at box.left(). | |
| 557 // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. | |
| 558 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. | |
| 559 void TBLOB::GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords, | |
| 560 std::vector<std::vector<int>> &y_coords) const { | |
| 561 x_coords.clear(); | |
| 562 x_coords.resize(box.height()); | |
| 563 y_coords.clear(); | |
| 564 y_coords.resize(box.width()); | |
| 565 CollectEdges(box, nullptr, nullptr, &x_coords, &y_coords); | |
| 566 // Sort the output vectors. | |
| 567 for (auto &coord : x_coords) { | |
| 568 std::sort(coord.begin(), coord.end()); | |
| 569 } | |
| 570 for (auto &coord : y_coords) { | |
| 571 std::sort(coord.begin(), coord.end()); | |
| 572 } | |
| 573 } | |
| 574 | |
| 575 // Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over | |
| 576 // the integer coordinate grid to properly weight long vectors. | |
| 577 static void SegmentLLSQ(const FCOORD &pt1, const FCOORD &pt2, LLSQ *accumulator) { | |
| 578 FCOORD step(pt2); | |
| 579 step -= pt1; | |
| 580 int xstart = IntCastRounded(std::min(pt1.x(), pt2.x())); | |
| 581 int xend = IntCastRounded(std::max(pt1.x(), pt2.x())); | |
| 582 int ystart = IntCastRounded(std::min(pt1.y(), pt2.y())); | |
| 583 int yend = IntCastRounded(std::max(pt1.y(), pt2.y())); | |
| 584 if (xstart == xend && ystart == yend) { | |
| 585 return; // Nothing to do. | |
| 586 } | |
| 587 double weight = step.length() / (xend - xstart + yend - ystart); | |
| 588 // Compute and save the y-position at the middle of each x-step. | |
| 589 for (int x = xstart; x < xend; ++x) { | |
| 590 double y = pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x(); | |
| 591 accumulator->add(x + 0.5, y, weight); | |
| 592 } | |
| 593 // Compute and save the x-position at the middle of each y-step. | |
| 594 for (int y = ystart; y < yend; ++y) { | |
| 595 double x = pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y(); | |
| 596 accumulator->add(x, y + 0.5, weight); | |
| 597 } | |
| 598 } | |
| 599 | |
| 600 // Adds any edges from a single segment of outline between pt1 and pt2 to | |
| 601 // the x_coords, y_coords vectors. pt1 and pt2 should be relative to the | |
| 602 // bottom-left of the bounding box, hence indices to x_coords, y_coords | |
| 603 // are clipped to ([0,x_limit], [0,y_limit]). | |
| 604 // See GetEdgeCoords above for a description of x_coords, y_coords. | |
| 605 static void SegmentCoords(const FCOORD &pt1, const FCOORD &pt2, int x_limit, int y_limit, | |
| 606 std::vector<std::vector<int>> *x_coords, | |
| 607 std::vector<std::vector<int>> *y_coords) { | |
| 608 FCOORD step(pt2); | |
| 609 step -= pt1; | |
| 610 int start = ClipToRange(IntCastRounded(std::min(pt1.x(), pt2.x())), 0, x_limit); | |
| 611 int end = ClipToRange(IntCastRounded(std::max(pt1.x(), pt2.x())), 0, x_limit); | |
| 612 for (int x = start; x < end; ++x) { | |
| 613 int y = IntCastRounded(pt1.y() + step.y() * (x + 0.5 - pt1.x()) / step.x()); | |
| 614 (*y_coords)[x].push_back(y); | |
| 615 } | |
| 616 start = ClipToRange(IntCastRounded(std::min(pt1.y(), pt2.y())), 0, y_limit); | |
| 617 end = ClipToRange(IntCastRounded(std::max(pt1.y(), pt2.y())), 0, y_limit); | |
| 618 for (int y = start; y < end; ++y) { | |
| 619 int x = IntCastRounded(pt1.x() + step.x() * (y + 0.5 - pt1.y()) / step.y()); | |
| 620 (*x_coords)[y].push_back(x); | |
| 621 } | |
| 622 } | |
| 623 | |
| 624 // Adds any edges from a single segment of outline between pt1 and pt2 to | |
| 625 // the bbox such that it guarantees to contain anything produced by | |
| 626 // SegmentCoords. | |
| 627 static void SegmentBBox(const FCOORD &pt1, const FCOORD &pt2, TBOX *bbox) { | |
| 628 FCOORD step(pt2); | |
| 629 step -= pt1; | |
| 630 int x1 = IntCastRounded(std::min(pt1.x(), pt2.x())); | |
| 631 int x2 = IntCastRounded(std::max(pt1.x(), pt2.x())); | |
| 632 if (x2 > x1) { | |
| 633 int y1 = IntCastRounded(pt1.y() + step.y() * (x1 + 0.5 - pt1.x()) / step.x()); | |
| 634 int y2 = IntCastRounded(pt1.y() + step.y() * (x2 - 0.5 - pt1.x()) / step.x()); | |
| 635 TBOX point(x1, std::min(y1, y2), x2, std::max(y1, y2)); | |
| 636 *bbox += point; | |
| 637 } | |
| 638 int y1 = IntCastRounded(std::min(pt1.y(), pt2.y())); | |
| 639 int y2 = IntCastRounded(std::max(pt1.y(), pt2.y())); | |
| 640 if (y2 > y1) { | |
| 641 int x1 = IntCastRounded(pt1.x() + step.x() * (y1 + 0.5 - pt1.y()) / step.y()); | |
| 642 int x2 = IntCastRounded(pt1.x() + step.x() * (y2 - 0.5 - pt1.y()) / step.y()); | |
| 643 TBOX point(std::min(x1, x2), y1, std::max(x1, x2), y2); | |
| 644 *bbox += point; | |
| 645 } | |
| 646 } | |
| 647 | |
| 648 // Collects edges into the given bounding box, LLSQ accumulator and/or x_coords, | |
| 649 // y_coords vectors. | |
| 650 // For a description of x_coords/y_coords, see GetEdgeCoords above. | |
| 651 // Startpt to lastpt, inclusive, MUST have the same src_outline member, | |
| 652 // which may be nullptr. The vector from lastpt to its next is included in | |
| 653 // the accumulation. Hidden edges should be excluded by the caller. | |
| 654 // The input denorm should be the normalizations that have been applied from | |
| 655 // the image to the current state of the TBLOB from which startpt, lastpt come. | |
| 656 // box is the bounding box of the blob from which the EDGEPTs are taken and | |
| 657 // indices into x_coords, y_coords are offset by box.botleft(). | |
| 658 static void CollectEdgesOfRun(const EDGEPT *startpt, const EDGEPT *lastpt, const DENORM &denorm, | |
| 659 const TBOX &box, TBOX *bounding_box, LLSQ *accumulator, | |
| 660 std::vector<std::vector<int>> *x_coords, | |
| 661 std::vector<std::vector<int>> *y_coords) { | |
| 662 const C_OUTLINE *outline = startpt->src_outline; | |
| 663 int x_limit = box.width() - 1; | |
| 664 int y_limit = box.height() - 1; | |
| 665 if (outline != nullptr) { | |
| 666 // Use higher-resolution edge points stored on the outline. | |
| 667 // The outline coordinates may not match the binary image because of the | |
| 668 // rotation for vertical text lines, but the root_denorm IS the matching | |
| 669 // start of the DENORM chain. | |
| 670 const DENORM *root_denorm = denorm.RootDenorm(); | |
| 671 int step_length = outline->pathlength(); | |
| 672 int start_index = startpt->start_step; | |
| 673 // Note that if this run straddles the wrap-around point of the outline, | |
| 674 // that lastpt->start_step may have a lower index than startpt->start_step, | |
| 675 // and we want to use an end_index that allows us to use a positive | |
| 676 // increment, so we add step_length if necessary, but that may be beyond the | |
| 677 // bounds of the outline steps/ due to wrap-around, so we use % step_length | |
| 678 // everywhere, except for start_index. | |
| 679 int end_index = lastpt->start_step + lastpt->step_count; | |
| 680 if (end_index <= start_index) { | |
| 681 end_index += step_length; | |
| 682 } | |
| 683 // pos is the integer coordinates of the binary image steps. | |
| 684 ICOORD pos = outline->position_at_index(start_index); | |
| 685 FCOORD origin(box.left(), box.bottom()); | |
| 686 // f_pos is a floating-point version of pos that offers improved edge | |
| 687 // positioning using greyscale information or smoothing of edge steps. | |
| 688 FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index); | |
| 689 // pos_normed is f_pos after the appropriate normalization, and relative | |
| 690 // to origin. | |
| 691 // prev_normed is the previous value of pos_normed. | |
| 692 FCOORD prev_normed; | |
| 693 denorm.NormTransform(root_denorm, f_pos, &prev_normed); | |
| 694 prev_normed -= origin; | |
| 695 for (int index = start_index; index < end_index; ++index) { | |
| 696 ICOORD step = outline->step(index % step_length); | |
| 697 // Only use the point if its edge strength is positive. This excludes | |
| 698 // points that don't provide useful information, eg | |
| 699 // ___________ | |
| 700 // |___________ | |
| 701 // The vertical step provides only noisy, damaging information, as even | |
| 702 // with a greyscale image, the positioning of the edge there may be a | |
| 703 // fictitious extrapolation, so previous processing has eliminated it. | |
| 704 if (outline->edge_strength_at_index(index % step_length) > 0) { | |
| 705 FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, index % step_length); | |
| 706 FCOORD pos_normed; | |
| 707 denorm.NormTransform(root_denorm, f_pos, &pos_normed); | |
| 708 pos_normed -= origin; | |
| 709 // Accumulate the information that is selected by the caller. | |
| 710 if (bounding_box != nullptr) { | |
| 711 SegmentBBox(pos_normed, prev_normed, bounding_box); | |
| 712 } | |
| 713 if (accumulator != nullptr) { | |
| 714 SegmentLLSQ(pos_normed, prev_normed, accumulator); | |
| 715 } | |
| 716 if (x_coords != nullptr && y_coords != nullptr) { | |
| 717 SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, x_coords, y_coords); | |
| 718 } | |
| 719 prev_normed = pos_normed; | |
| 720 } | |
| 721 pos += step; | |
| 722 } | |
| 723 } else { | |
| 724 // There is no outline, so we are forced to use the polygonal approximation. | |
| 725 const EDGEPT *endpt = lastpt->next; | |
| 726 const EDGEPT *pt = startpt; | |
| 727 do { | |
| 728 FCOORD next_pos(pt->next->pos.x - box.left(), pt->next->pos.y - box.bottom()); | |
| 729 FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom()); | |
| 730 if (bounding_box != nullptr) { | |
| 731 SegmentBBox(next_pos, pos, bounding_box); | |
| 732 } | |
| 733 if (accumulator != nullptr) { | |
| 734 SegmentLLSQ(next_pos, pos, accumulator); | |
| 735 } | |
| 736 if (x_coords != nullptr && y_coords != nullptr) { | |
| 737 SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords); | |
| 738 } | |
| 739 } while ((pt = pt->next) != endpt); | |
| 740 } | |
| 741 } | |
| 742 | |
| 743 // For all the edge steps in all the outlines, or polygonal approximation | |
| 744 // where there are no edge steps, collects the steps into the bounding_box, | |
| 745 // llsq and/or the x_coords/y_coords. Both are used in different kinds of | |
| 746 // normalization. | |
| 747 // For a description of x_coords, y_coords, see GetEdgeCoords above. | |
| 748 void TBLOB::CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq, | |
| 749 std::vector<std::vector<int>> *x_coords, | |
| 750 std::vector<std::vector<int>> *y_coords) const { | |
| 751 // Iterate the outlines. | |
| 752 for (const TESSLINE *ol = outlines; ol != nullptr; ol = ol->next) { | |
| 753 // Iterate the polygon. | |
| 754 EDGEPT *loop_pt = ol->FindBestStartPt(); | |
| 755 EDGEPT *pt = loop_pt; | |
| 756 if (pt == nullptr) { | |
| 757 continue; | |
| 758 } | |
| 759 do { | |
| 760 if (pt->IsHidden()) { | |
| 761 continue; | |
| 762 } | |
| 763 // Find a run of equal src_outline. | |
| 764 EDGEPT *last_pt = pt; | |
| 765 do { | |
| 766 last_pt = last_pt->next; | |
| 767 } while (last_pt != loop_pt && !last_pt->IsHidden() && | |
| 768 last_pt->src_outline == pt->src_outline); | |
| 769 last_pt = last_pt->prev; | |
| 770 CollectEdgesOfRun(pt, last_pt, denorm_, box, bounding_box, llsq, x_coords, y_coords); | |
| 771 pt = last_pt; | |
| 772 } while ((pt = pt->next) != loop_pt); | |
| 773 } | |
| 774 } | |
| 775 | |
| 776 // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal | |
| 777 // approximation along the way. | |
| 778 TWERD *TWERD::PolygonalCopy(bool allow_detailed_fx, WERD *src) { | |
| 779 auto *tessword = new TWERD; | |
| 780 tessword->latin_script = src->flag(W_SCRIPT_IS_LATIN); | |
| 781 C_BLOB_IT b_it(src->cblob_list()); | |
| 782 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { | |
| 783 C_BLOB *blob = b_it.data(); | |
| 784 TBLOB *tblob = TBLOB::PolygonalCopy(allow_detailed_fx, blob); | |
| 785 tessword->blobs.push_back(tblob); | |
| 786 } | |
| 787 return tessword; | |
| 788 } | |
| 789 | |
| 790 // Baseline normalizes the blobs in-place, recording the normalization in the | |
| 791 // DENORMs in the blobs. | |
| 792 void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height, | |
| 793 float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, | |
| 794 const TBOX *norm_box, DENORM *word_denorm) { | |
| 795 TBOX word_box = bounding_box(); | |
| 796 if (norm_box != nullptr) { | |
| 797 word_box = *norm_box; | |
| 798 } | |
| 799 float word_middle = (word_box.left() + word_box.right()) / 2.0f; | |
| 800 float input_y_offset = 0.0f; | |
| 801 auto final_y_offset = static_cast<float>(kBlnBaselineOffset); | |
| 802 float scale = kBlnXHeight / x_height; | |
| 803 if (row == nullptr) { | |
| 804 word_middle = word_box.left(); | |
| 805 input_y_offset = word_box.bottom(); | |
| 806 final_y_offset = 0.0f; | |
| 807 } else { | |
| 808 input_y_offset = row->base_line(word_middle) + baseline_shift; | |
| 809 } | |
| 810 for (auto blob : blobs) { | |
| 811 TBOX blob_box = blob->bounding_box(); | |
| 812 float mid_x = (blob_box.left() + blob_box.right()) / 2.0f; | |
| 813 float baseline = input_y_offset; | |
| 814 float blob_scale = scale; | |
| 815 if (numeric_mode) { | |
| 816 baseline = blob_box.bottom(); | |
| 817 blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), scale, scale * 1.5f); | |
| 818 } else if (row != nullptr) { | |
| 819 baseline = row->base_line(mid_x) + baseline_shift; | |
| 820 } | |
| 821 // The image will be 8-bit grey if the input was grey or color. Note that in | |
| 822 // a grey image 0 is black and 255 is white. If the input was binary, then | |
| 823 // the pix will be binary and 0 is white, with 1 being black. | |
| 824 // To tell the difference pixGetDepth() will return 8 or 1. | |
| 825 // The inverse flag will be true iff the word has been determined to be | |
| 826 // white on black, and is independent of whether the pix is 8 bit or 1 bit. | |
| 827 blob->Normalize(block, nullptr, nullptr, word_middle, baseline, blob_scale, blob_scale, 0.0f, | |
| 828 final_y_offset, inverse, pix); | |
| 829 } | |
| 830 if (word_denorm != nullptr) { | |
| 831 word_denorm->SetupNormalization(block, nullptr, nullptr, word_middle, input_y_offset, scale, | |
| 832 scale, 0.0f, final_y_offset); | |
| 833 word_denorm->set_inverse(inverse); | |
| 834 word_denorm->set_pix(pix); | |
| 835 } | |
| 836 } | |
| 837 | |
| 838 // Copies the data and the blobs, but leaves next untouched. | |
| 839 void TWERD::CopyFrom(const TWERD &src) { | |
| 840 Clear(); | |
| 841 latin_script = src.latin_script; | |
| 842 for (auto blob : src.blobs) { | |
| 843 auto *new_blob = new TBLOB(*blob); | |
| 844 blobs.push_back(new_blob); | |
| 845 } | |
| 846 } | |
| 847 | |
| 848 // Deletes owned data. | |
| 849 void TWERD::Clear() { | |
| 850 for (auto blob : blobs) { | |
| 851 delete blob; | |
| 852 } | |
| 853 blobs.clear(); | |
| 854 } | |
| 855 | |
| 856 // Recomputes the bounding boxes of the blobs. | |
| 857 void TWERD::ComputeBoundingBoxes() { | |
| 858 for (auto &blob : blobs) { | |
| 859 blob->ComputeBoundingBoxes(); | |
| 860 } | |
| 861 } | |
| 862 | |
| 863 TBOX TWERD::bounding_box() const { | |
| 864 TBOX result; | |
| 865 for (auto blob : blobs) { | |
| 866 TBOX box = blob->bounding_box(); | |
| 867 result += box; | |
| 868 } | |
| 869 return result; | |
| 870 } | |
| 871 | |
| 872 // Merges the blobs from start to end, not including end, and deletes | |
| 873 // the blobs between start and end. | |
| 874 void TWERD::MergeBlobs(unsigned start, unsigned end) { | |
| 875 if (end > blobs.size()) { | |
| 876 end = blobs.size(); | |
| 877 } | |
| 878 if (start >= end) { | |
| 879 return; // Nothing to do. | |
| 880 } | |
| 881 TESSLINE *outline = blobs[start]->outlines; | |
| 882 for (auto i = start + 1; i < end; ++i) { | |
| 883 TBLOB *next_blob = blobs[i]; | |
| 884 // Take the outlines from the next blob. | |
| 885 if (outline == nullptr) { | |
| 886 blobs[start]->outlines = next_blob->outlines; | |
| 887 outline = blobs[start]->outlines; | |
| 888 } else { | |
| 889 while (outline->next != nullptr) { | |
| 890 outline = outline->next; | |
| 891 } | |
| 892 outline->next = next_blob->outlines; | |
| 893 next_blob->outlines = nullptr; | |
| 894 } | |
| 895 // Delete the next blob and move on. | |
| 896 delete next_blob; | |
| 897 blobs[i] = nullptr; | |
| 898 } | |
| 899 // Remove dead blobs from the vector. | |
| 900 // TODO: optimize. | |
| 901 for (auto i = start + 1; i < end && start + 1 < blobs.size(); ++i) { | |
| 902 blobs.erase(blobs.begin() + start + 1); | |
| 903 } | |
| 904 } | |
| 905 | |
| 906 #ifndef GRAPHICS_DISABLED | |
| 907 void TWERD::plot(ScrollView *window) { | |
| 908 ScrollView::Color color = WERD::NextColor(ScrollView::BLACK); | |
| 909 for (auto &blob : blobs) { | |
| 910 blob->plot(window, color, ScrollView::BROWN); | |
| 911 color = WERD::NextColor(color); | |
| 912 } | |
| 913 } | |
| 914 #endif // !GRAPHICS_DISABLED | |
| 915 | |
| 916 /********************************************************************** | |
| 917 * divisible_blob | |
| 918 * | |
| 919 * Returns true if the blob contains multiple outlines than can be | |
| 920 * separated using divide_blobs. Sets the location to be used in the | |
| 921 * call to divide_blobs. | |
| 922 **********************************************************************/ | |
| 923 bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location) { | |
| 924 if (blob->outlines == nullptr || blob->outlines->next == nullptr) { | |
| 925 return false; // Need at least 2 outlines for it to be possible. | |
| 926 } | |
| 927 int max_gap = 0; | |
| 928 TPOINT vertical = italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright; | |
| 929 for (TESSLINE *outline1 = blob->outlines; outline1 != nullptr; outline1 = outline1->next) { | |
| 930 if (outline1->is_hole) { | |
| 931 continue; // Holes do not count as separable. | |
| 932 } | |
| 933 TPOINT mid_pt1((outline1->topleft.x + outline1->botright.x) / 2, | |
| 934 (outline1->topleft.y + outline1->botright.y) / 2); | |
| 935 int mid_prod1 = mid_pt1.cross(vertical); | |
| 936 int min_prod1, max_prod1; | |
| 937 outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1); | |
| 938 for (TESSLINE *outline2 = outline1->next; outline2 != nullptr; outline2 = outline2->next) { | |
| 939 if (outline2->is_hole) { | |
| 940 continue; // Holes do not count as separable. | |
| 941 } | |
| 942 TPOINT mid_pt2((outline2->topleft.x + outline2->botright.x) / 2, | |
| 943 (outline2->topleft.y + outline2->botright.y) / 2); | |
| 944 int mid_prod2 = mid_pt2.cross(vertical); | |
| 945 int min_prod2, max_prod2; | |
| 946 outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2); | |
| 947 int mid_gap = abs(mid_prod2 - mid_prod1); | |
| 948 int overlap = std::min(max_prod1, max_prod2) - std::max(min_prod1, min_prod2); | |
| 949 if (mid_gap - overlap / 4 > max_gap) { | |
| 950 max_gap = mid_gap - overlap / 4; | |
| 951 *location = mid_pt1; | |
| 952 *location += mid_pt2; | |
| 953 *location /= 2; | |
| 954 } | |
| 955 } | |
| 956 } | |
| 957 // Use the y component of the vertical vector as an approximation to its | |
| 958 // length. | |
| 959 return max_gap > vertical.y; | |
| 960 } | |
| 961 | |
| 962 /********************************************************************** | |
| 963 * divide_blobs | |
| 964 * | |
| 965 * Create two blobs by grouping the outlines in the appropriate blob. | |
| 966 * The outlines that are beyond the location point are moved to the | |
| 967 * other blob. The ones whose x location is less than that point are | |
| 968 * retained in the original blob. | |
| 969 **********************************************************************/ | |
| 970 void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location) { | |
| 971 TPOINT vertical = italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright; | |
| 972 TESSLINE *outline1 = nullptr; | |
| 973 TESSLINE *outline2 = nullptr; | |
| 974 | |
| 975 TESSLINE *outline = blob->outlines; | |
| 976 blob->outlines = nullptr; | |
| 977 int location_prod = location.cross(vertical); | |
| 978 | |
| 979 while (outline != nullptr) { | |
| 980 TPOINT mid_pt((outline->topleft.x + outline->botright.x) / 2, | |
| 981 (outline->topleft.y + outline->botright.y) / 2); | |
| 982 int mid_prod = mid_pt.cross(vertical); | |
| 983 if (mid_prod < location_prod) { | |
| 984 // Outline is in left blob. | |
| 985 if (outline1) { | |
| 986 outline1->next = outline; | |
| 987 } else { | |
| 988 blob->outlines = outline; | |
| 989 } | |
| 990 outline1 = outline; | |
| 991 } else { | |
| 992 // Outline is in right blob. | |
| 993 if (outline2) { | |
| 994 outline2->next = outline; | |
| 995 } else { | |
| 996 other_blob->outlines = outline; | |
| 997 } | |
| 998 outline2 = outline; | |
| 999 } | |
| 1000 outline = outline->next; | |
| 1001 } | |
| 1002 | |
| 1003 if (outline1) { | |
| 1004 outline1->next = nullptr; | |
| 1005 } | |
| 1006 if (outline2) { | |
| 1007 outline2->next = nullptr; | |
| 1008 } | |
| 1009 } | |
| 1010 | |
| 1011 } // namespace tesseract |
