Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/colpartitiongrid.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: colpartitiongrid.cpp | |
| 3 // Description: Class collecting code that acts on a BBGrid of ColPartitions. | |
| 4 // Author: Ray Smith | |
| 5 // | |
| 6 // (C) Copyright 2009, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #ifdef HAVE_CONFIG_H | |
| 20 # include "config_auto.h" | |
| 21 #endif | |
| 22 | |
| 23 #include "colpartitiongrid.h" | |
| 24 #include "colpartitionset.h" | |
| 25 #include "imagefind.h" | |
| 26 | |
| 27 #include <algorithm> | |
| 28 #include <utility> | |
| 29 | |
| 30 namespace tesseract { | |
| 31 | |
| 32 // Max pad factor used to search the neighbourhood of a partition to smooth | |
| 33 // partition types. | |
| 34 const int kMaxPadFactor = 6; | |
| 35 // Max multiple of size (min(height, width)) for the distance of the nearest | |
| 36 // neighbour for the change of type to be used. | |
| 37 const int kMaxNeighbourDistFactor = 4; | |
| 38 // Maximum number of lines in a credible figure caption. | |
| 39 const int kMaxCaptionLines = 7; | |
| 40 // Min ratio between biggest and smallest gap to bound a caption. | |
| 41 const double kMinCaptionGapRatio = 2.0; | |
| 42 // Min ratio between biggest gap and mean line height to bound a caption. | |
| 43 const double kMinCaptionGapHeightRatio = 0.5; | |
| 44 // Min fraction of ColPartition height to be overlapping for margin purposes. | |
| 45 const double kMarginOverlapFraction = 0.25; | |
| 46 // Size ratio required to consider an unmerged overlapping partition to be big. | |
| 47 const double kBigPartSizeRatio = 1.75; | |
| 48 // Fraction of gridsize to allow arbitrary overlap between partitions. | |
| 49 const double kTinyEnoughTextlineOverlapFraction = 0.25; | |
| 50 // Max vertical distance of neighbouring ColPartition as a multiple of | |
| 51 // partition height for it to be a partner. | |
| 52 // TODO(rays) fix the problem that causes a larger number to not work well. | |
| 53 // The value needs to be larger as sparse text blocks in a page that gets | |
| 54 // marked as single column will not find adjacent lines as partners, and | |
| 55 // will merge horizontally distant, but aligned lines. See rep.4B3 p5. | |
| 56 // The value needs to be small because double-spaced legal docs written | |
| 57 // in a single column, but justified courier have widely spaced lines | |
| 58 // that need to get merged before they partner-up with the lines above | |
| 59 // and below. See legal.3B5 p13/17. Neither of these should depend on | |
| 60 // the value of kMaxPartitionSpacing to be successful, and ColPartition | |
| 61 // merging needs attention to fix this problem. | |
| 62 const double kMaxPartitionSpacing = 1.75; | |
| 63 // Margin by which text has to beat image or vice-versa to make a firm | |
| 64 // decision in GridSmoothNeighbour. | |
| 65 const int kSmoothDecisionMargin = 4; | |
| 66 | |
| 67 ColPartitionGrid::ColPartitionGrid(int gridsize, const ICOORD &bleft, | |
| 68 const ICOORD &tright) | |
| 69 : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>( | |
| 70 gridsize, bleft, tright) {} | |
| 71 | |
| 72 // Handles a click event in a display window. | |
| 73 void ColPartitionGrid::HandleClick(int x, int y) { | |
| 74 BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, | |
| 75 y); | |
| 76 // Run a radial search for partitions that overlap. | |
| 77 ColPartitionGridSearch radsearch(this); | |
| 78 radsearch.SetUniqueMode(true); | |
| 79 radsearch.StartRadSearch(x, y, 1); | |
| 80 ColPartition *neighbour; | |
| 81 FCOORD click(x, y); | |
| 82 while ((neighbour = radsearch.NextRadSearch()) != nullptr) { | |
| 83 const TBOX &nbox = neighbour->bounding_box(); | |
| 84 if (nbox.contains(click)) { | |
| 85 tprintf("Block box:"); | |
| 86 neighbour->bounding_box().print(); | |
| 87 neighbour->Print(); | |
| 88 } | |
| 89 } | |
| 90 } | |
| 91 | |
| 92 // Merges ColPartitions in the grid that look like they belong in the same | |
| 93 // textline. | |
| 94 // For all partitions in the grid, calls the box_cb permanent callback | |
| 95 // to compute the search box, searches the box, and if a candidate is found, | |
| 96 // calls the confirm_cb to check any more rules. If the confirm_cb returns | |
| 97 // true, then the partitions are merged. | |
| 98 // Both callbacks are deleted before returning. | |
| 99 void ColPartitionGrid::Merges( | |
| 100 const std::function<bool(ColPartition *, TBOX *)> &box_cb, | |
| 101 const std::function<bool(const ColPartition *, const ColPartition *)> | |
| 102 &confirm_cb) { | |
| 103 // Iterate the ColPartitions in the grid. | |
| 104 ColPartitionGridSearch gsearch(this); | |
| 105 gsearch.StartFullSearch(); | |
| 106 ColPartition *part; | |
| 107 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 108 if (MergePart(box_cb, confirm_cb, part)) { | |
| 109 gsearch.RepositionIterator(); | |
| 110 } | |
| 111 } | |
| 112 } | |
| 113 | |
| 114 // For the given partition, calls the box_cb permanent callback | |
| 115 // to compute the search box, searches the box, and if a candidate is found, | |
| 116 // calls the confirm_cb to check any more rules. If the confirm_cb returns | |
| 117 // true, then the partitions are merged. | |
| 118 // Returns true if the partition is consumed by one or more merges. | |
| 119 bool ColPartitionGrid::MergePart( | |
| 120 const std::function<bool(ColPartition *, TBOX *)> &box_cb, | |
| 121 const std::function<bool(const ColPartition *, const ColPartition *)> | |
| 122 &confirm_cb, | |
| 123 ColPartition *part) { | |
| 124 if (part->IsUnMergeableType()) { | |
| 125 return false; | |
| 126 } | |
| 127 bool any_done = false; | |
| 128 // Repeatedly merge part while we find a best merge candidate that works. | |
| 129 bool merge_done = false; | |
| 130 do { | |
| 131 merge_done = false; | |
| 132 TBOX box = part->bounding_box(); | |
| 133 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); | |
| 134 if (debug) { | |
| 135 tprintf("Merge candidate:"); | |
| 136 box.print(); | |
| 137 } | |
| 138 // Set up a rectangle search bounded by the part. | |
| 139 if (!box_cb(part, &box)) { | |
| 140 continue; | |
| 141 } | |
| 142 // Create a list of merge candidates. | |
| 143 ColPartition_CLIST merge_candidates; | |
| 144 FindMergeCandidates(part, box, debug, &merge_candidates); | |
| 145 // Find the best merge candidate based on minimal overlap increase. | |
| 146 int overlap_increase; | |
| 147 ColPartition *neighbour = BestMergeCandidate(part, &merge_candidates, debug, | |
| 148 confirm_cb, &overlap_increase); | |
| 149 if (neighbour != nullptr && overlap_increase <= 0) { | |
| 150 if (debug) { | |
| 151 tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", | |
| 152 part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour), | |
| 153 overlap_increase); | |
| 154 } | |
| 155 // Looks like a good candidate so merge it. | |
| 156 RemoveBBox(neighbour); | |
| 157 // We will modify the box of part, so remove it from the grid, merge | |
| 158 // it and then re-insert it into the grid. | |
| 159 RemoveBBox(part); | |
| 160 part->Absorb(neighbour, nullptr); | |
| 161 InsertBBox(true, true, part); | |
| 162 merge_done = true; | |
| 163 any_done = true; | |
| 164 } else if (neighbour != nullptr) { | |
| 165 if (debug) { | |
| 166 tprintf("Overlapped when merged with increase %d: ", overlap_increase); | |
| 167 neighbour->bounding_box().print(); | |
| 168 } | |
| 169 } else if (debug) { | |
| 170 tprintf("No candidate neighbour returned\n"); | |
| 171 } | |
| 172 } while (merge_done); | |
| 173 return any_done; | |
| 174 } | |
| 175 | |
| 176 // Returns true if the given part and merge candidate might believably | |
| 177 // be part of a single text line according to the default rules. | |
| 178 // In general we only want to merge partitions that look like they | |
| 179 // are on the same text line, ie their median limits overlap, but we have | |
| 180 // to make exceptions for diacritics and stray punctuation. | |
| 181 static bool OKMergeCandidate(const ColPartition *part, | |
| 182 const ColPartition *candidate, bool debug) { | |
| 183 const TBOX &part_box = part->bounding_box(); | |
| 184 if (candidate == part) { | |
| 185 return false; // Ignore itself. | |
| 186 } | |
| 187 if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType()) { | |
| 188 return false; // Don't mix inappropriate types. | |
| 189 } | |
| 190 | |
| 191 const TBOX &c_box = candidate->bounding_box(); | |
| 192 if (debug) { | |
| 193 tprintf("Examining merge candidate:"); | |
| 194 c_box.print(); | |
| 195 } | |
| 196 // Candidates must be within a reasonable distance. | |
| 197 if (candidate->IsVerticalType() || part->IsVerticalType()) { | |
| 198 int h_dist = -part->HCoreOverlap(*candidate); | |
| 199 if (h_dist >= std::max(part_box.width(), c_box.width()) / 2) { | |
| 200 if (debug) { | |
| 201 tprintf("Too far away: h_dist = %d\n", h_dist); | |
| 202 } | |
| 203 return false; | |
| 204 } | |
| 205 } else { | |
| 206 // Coarse filter by vertical distance between partitions. | |
| 207 int v_dist = -part->VCoreOverlap(*candidate); | |
| 208 if (v_dist >= std::max(part_box.height(), c_box.height()) / 2) { | |
| 209 if (debug) { | |
| 210 tprintf("Too far away: v_dist = %d\n", v_dist); | |
| 211 } | |
| 212 return false; | |
| 213 } | |
| 214 // Candidates must either overlap in median y, | |
| 215 // or part or candidate must be an acceptable diacritic. | |
| 216 if (!part->VSignificantCoreOverlap(*candidate) && | |
| 217 !part->OKDiacriticMerge(*candidate, debug) && | |
| 218 !candidate->OKDiacriticMerge(*part, debug)) { | |
| 219 if (debug) { | |
| 220 tprintf("Candidate fails overlap and diacritic tests!\n"); | |
| 221 } | |
| 222 return false; | |
| 223 } | |
| 224 } | |
| 225 return true; | |
| 226 } | |
| 227 | |
| 228 // Helper function to compute the increase in overlap of the parts list of | |
| 229 // Colpartitions with the combination of merge1 and merge2, compared to | |
| 230 // the overlap with them uncombined. | |
| 231 // An overlap is not counted if passes the OKMergeOverlap test with ok_overlap | |
| 232 // as the pixel overlap limit. merge1 and merge2 must both be non-nullptr. | |
| 233 static int IncreaseInOverlap(const ColPartition *merge1, | |
| 234 const ColPartition *merge2, int ok_overlap, | |
| 235 ColPartition_CLIST *parts) { | |
| 236 ASSERT_HOST(merge1 != nullptr && merge2 != nullptr); | |
| 237 int total_area = 0; | |
| 238 ColPartition_C_IT it(parts); | |
| 239 TBOX merged_box(merge1->bounding_box()); | |
| 240 merged_box += merge2->bounding_box(); | |
| 241 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 242 ColPartition *part = it.data(); | |
| 243 if (part == merge1 || part == merge2) { | |
| 244 continue; | |
| 245 } | |
| 246 TBOX part_box = part->bounding_box(); | |
| 247 // Compute the overlap of the merged box with part. | |
| 248 int overlap_area = part_box.intersection(merged_box).area(); | |
| 249 if (overlap_area > 0 && | |
| 250 !part->OKMergeOverlap(*merge1, *merge2, ok_overlap, false)) { | |
| 251 total_area += overlap_area; | |
| 252 // Subtract the overlap of merge1 and merge2 individually. | |
| 253 overlap_area = part_box.intersection(merge1->bounding_box()).area(); | |
| 254 if (overlap_area > 0) { | |
| 255 total_area -= overlap_area; | |
| 256 } | |
| 257 TBOX intersection_box = part_box.intersection(merge2->bounding_box()); | |
| 258 overlap_area = intersection_box.area(); | |
| 259 if (overlap_area > 0) { | |
| 260 total_area -= overlap_area; | |
| 261 // Add back the 3-way area. | |
| 262 intersection_box &= merge1->bounding_box(); // In-place intersection. | |
| 263 overlap_area = intersection_box.area(); | |
| 264 if (overlap_area > 0) { | |
| 265 total_area += overlap_area; | |
| 266 } | |
| 267 } | |
| 268 } | |
| 269 } | |
| 270 return total_area; | |
| 271 } | |
| 272 | |
| 273 // Helper function to test that each partition in candidates is either a | |
| 274 // good diacritic merge with part or an OK merge candidate with all others | |
| 275 // in the candidates list. | |
| 276 // ASCII Art Scenario: | |
| 277 // We sometimes get text such as "join-this" where the - is actually a long | |
| 278 // dash culled from a standard set of extra characters that don't match the | |
| 279 // font of the text. This makes its strokewidth not match and forms a broken | |
| 280 // set of 3 partitions for "join", "-" and "this" and the dash may slightly | |
| 281 // overlap BOTH words. | |
| 282 // ------- ------- | |
| 283 // | ==== | | |
| 284 // ------- ------- | |
| 285 // The standard merge rule: "you can merge 2 partitions as long as there is | |
| 286 // no increase in overlap elsewhere" fails miserably here. Merge any pair | |
| 287 // of partitions and the combined box overlaps more with the third than | |
| 288 // before. To allow the merge, we need to consider whether it is safe to | |
| 289 // merge everything, without merging separate text lines. For that we need | |
| 290 // everything to be an OKMergeCandidate (which is supposed to prevent | |
| 291 // separate text lines merging), but this is hard for diacritics to satisfy, | |
| 292 // so an alternative to being OKMergeCandidate with everything is to be an | |
| 293 // OKDiacriticMerge with part as the base character. | |
| 294 static bool TestCompatibleCandidates(const ColPartition &part, bool debug, | |
| 295 ColPartition_CLIST *candidates) { | |
| 296 ColPartition_C_IT it(candidates); | |
| 297 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 298 ColPartition *candidate = it.data(); | |
| 299 if (!candidate->OKDiacriticMerge(part, false)) { | |
| 300 ColPartition_C_IT it2(it); | |
| 301 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { | |
| 302 ColPartition *candidate2 = it2.data(); | |
| 303 if (candidate2 != candidate && | |
| 304 !OKMergeCandidate(candidate, candidate2, false)) { | |
| 305 if (debug) { | |
| 306 tprintf("NC overlap failed:Candidate:"); | |
| 307 candidate2->bounding_box().print(); | |
| 308 tprintf("fails to be a good merge with:"); | |
| 309 candidate->bounding_box().print(); | |
| 310 } | |
| 311 return false; | |
| 312 } | |
| 313 } | |
| 314 } | |
| 315 } | |
| 316 return true; | |
| 317 } | |
| 318 | |
| 319 // Computes and returns the total overlap of all partitions in the grid. | |
| 320 // If overlap_grid is non-null, it is filled with a grid that holds empty | |
| 321 // partitions representing the union of all overlapped partitions. | |
| 322 int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid **overlap_grid) { | |
| 323 int total_overlap = 0; | |
| 324 // Iterate the ColPartitions in the grid. | |
| 325 ColPartitionGridSearch gsearch(this); | |
| 326 gsearch.StartFullSearch(); | |
| 327 ColPartition *part; | |
| 328 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 329 ColPartition_CLIST neighbors; | |
| 330 const TBOX &part_box = part->bounding_box(); | |
| 331 FindOverlappingPartitions(part_box, part, &neighbors); | |
| 332 ColPartition_C_IT n_it(&neighbors); | |
| 333 bool any_part_overlap = false; | |
| 334 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { | |
| 335 const TBOX &n_box = n_it.data()->bounding_box(); | |
| 336 int overlap = n_box.intersection(part_box).area(); | |
| 337 if (overlap > 0 && overlap_grid != nullptr) { | |
| 338 if (*overlap_grid == nullptr) { | |
| 339 *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright()); | |
| 340 } | |
| 341 (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy()); | |
| 342 if (!any_part_overlap) { | |
| 343 (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy()); | |
| 344 } | |
| 345 } | |
| 346 any_part_overlap = true; | |
| 347 total_overlap += overlap; | |
| 348 } | |
| 349 } | |
| 350 return total_overlap; | |
| 351 } | |
| 352 | |
| 353 // Finds all the ColPartitions in the grid that overlap with the given | |
| 354 // box and returns them SortByBoxLeft(ed) and uniqued in the given list. | |
| 355 // Any partition equal to not_this (may be nullptr) is excluded. | |
| 356 void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box, | |
| 357 const ColPartition *not_this, | |
| 358 ColPartition_CLIST *parts) { | |
| 359 ColPartitionGridSearch rsearch(this); | |
| 360 rsearch.StartRectSearch(box); | |
| 361 ColPartition *part; | |
| 362 while ((part = rsearch.NextRectSearch()) != nullptr) { | |
| 363 if (part != not_this) { | |
| 364 parts->add_sorted(SortByBoxLeft<ColPartition>, true, part); | |
| 365 } | |
| 366 } | |
| 367 } | |
| 368 | |
| 369 // Finds and returns the best candidate ColPartition to merge with part, | |
| 370 // selected from the candidates list, based on the minimum increase in | |
| 371 // pairwise overlap among all the partitions overlapped by the combined box. | |
| 372 // If overlap_increase is not nullptr then it returns the increase in overlap | |
| 373 // that would result from the merge. | |
| 374 // confirm_cb is a permanent callback that (if non-null) will be used to | |
| 375 // confirm the validity of a proposed merge candidate before selecting it. | |
| 376 // | |
| 377 // ======HOW MERGING WORKS====== | |
| 378 // The problem: | |
| 379 // We want to merge all the parts of a textline together, but avoid merging | |
| 380 // separate textlines. Diacritics, i dots, punctuation, and broken characters | |
| 381 // are examples of small bits that need merging with the main textline. | |
| 382 // Drop-caps and descenders in one line that touch ascenders in the one below | |
| 383 // are examples of cases where we don't want to merge. | |
| 384 // | |
| 385 // The solution: | |
| 386 // Merges that increase overlap among other partitions are generally bad. | |
| 387 // Those that don't increase overlap (much) and minimize the total area | |
| 388 // seem to be good. | |
| 389 // | |
| 390 // Ascii art example: | |
| 391 // The text: | |
| 392 // groggy descenders | |
| 393 // minimum ascenders | |
| 394 // The boxes: The === represents a small box near or overlapping the lower box. | |
| 395 // ----------------- | |
| 396 // | | | |
| 397 // ----------------- | |
| 398 // -===------------- | |
| 399 // | | | |
| 400 // ----------------- | |
| 401 // In considering what to do with the small === box, we find the 2 larger | |
| 402 // boxes as neighbours and possible merge candidates, but merging with the | |
| 403 // upper box increases overlap with the lower box, whereas merging with the | |
| 404 // lower box does not increase overlap. | |
| 405 // If the small === box didn't overlap either to start with, total area | |
| 406 // would be minimized by merging with the nearer (lower) box. | |
| 407 // | |
| 408 // This is a simple example. In reality, we have to allow some increase | |
| 409 // in overlap, or tightly spaced text would end up in bits. | |
| 410 ColPartition *ColPartitionGrid::BestMergeCandidate( | |
| 411 const ColPartition *part, ColPartition_CLIST *candidates, bool debug, | |
| 412 const std::function<bool(const ColPartition *, const ColPartition *)> | |
| 413 &confirm_cb, | |
| 414 int *overlap_increase) { | |
| 415 if (overlap_increase != nullptr) { | |
| 416 *overlap_increase = 0; | |
| 417 } | |
| 418 if (candidates->empty()) { | |
| 419 return nullptr; | |
| 420 } | |
| 421 int ok_overlap = | |
| 422 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); | |
| 423 // The best neighbour to merge with is the one that causes least | |
| 424 // total pairwise overlap among all the neighbours. | |
| 425 // If more than one offers the same total overlap, choose the one | |
| 426 // with the least total area. | |
| 427 const TBOX &part_box = part->bounding_box(); | |
| 428 ColPartition_C_IT it(candidates); | |
| 429 ColPartition *best_candidate = nullptr; | |
| 430 // Find the total combined box of all candidates and the original. | |
| 431 TBOX full_box(part_box); | |
| 432 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 433 ColPartition *candidate = it.data(); | |
| 434 full_box += candidate->bounding_box(); | |
| 435 } | |
| 436 // Keep valid neighbours in a list. | |
| 437 ColPartition_CLIST neighbours; | |
| 438 // Now run a rect search of the merged box for overlapping neighbours, as | |
| 439 // we need anything that might be overlapped by the merged box. | |
| 440 FindOverlappingPartitions(full_box, part, &neighbours); | |
| 441 if (debug) { | |
| 442 tprintf("Finding best merge candidate from %d, %d neighbours for box:", | |
| 443 candidates->length(), neighbours.length()); | |
| 444 part_box.print(); | |
| 445 } | |
| 446 // If the best increase in overlap is positive, then we also check the | |
| 447 // worst non-candidate overlap. This catches the case of multiple good | |
| 448 // candidates that overlap each other when merged. If the worst | |
| 449 // non-candidate overlap is better than the best overlap, then return | |
| 450 // the worst non-candidate overlap instead. | |
| 451 ColPartition_CLIST non_candidate_neighbours; | |
| 452 non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true, | |
| 453 &neighbours, candidates); | |
| 454 int worst_nc_increase = 0; | |
| 455 int best_increase = INT32_MAX; | |
| 456 int best_area = 0; | |
| 457 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 458 ColPartition *candidate = it.data(); | |
| 459 if (confirm_cb != nullptr && !confirm_cb(part, candidate)) { | |
| 460 if (debug) { | |
| 461 tprintf("Candidate not confirmed:"); | |
| 462 candidate->bounding_box().print(); | |
| 463 } | |
| 464 continue; | |
| 465 } | |
| 466 int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours); | |
| 467 const TBOX &cand_box = candidate->bounding_box(); | |
| 468 if (best_candidate == nullptr || increase < best_increase) { | |
| 469 best_candidate = candidate; | |
| 470 best_increase = increase; | |
| 471 best_area = cand_box.bounding_union(part_box).area() - cand_box.area(); | |
| 472 if (debug) { | |
| 473 tprintf("New best merge candidate has increase %d, area %d, over box:", | |
| 474 increase, best_area); | |
| 475 full_box.print(); | |
| 476 candidate->Print(); | |
| 477 } | |
| 478 } else if (increase == best_increase) { | |
| 479 int area = cand_box.bounding_union(part_box).area() - cand_box.area(); | |
| 480 if (area < best_area) { | |
| 481 best_area = area; | |
| 482 best_candidate = candidate; | |
| 483 } | |
| 484 } | |
| 485 increase = IncreaseInOverlap(part, candidate, ok_overlap, | |
| 486 &non_candidate_neighbours); | |
| 487 if (increase > worst_nc_increase) { | |
| 488 worst_nc_increase = increase; | |
| 489 } | |
| 490 } | |
| 491 if (best_increase > 0) { | |
| 492 // If the worst non-candidate increase is less than the best increase | |
| 493 // including the candidates, then all the candidates can merge together | |
| 494 // and the increase in outside overlap would be less, so use that result, | |
| 495 // but only if each candidate is either a good diacritic merge with part, | |
| 496 // or an ok merge candidate with all the others. | |
| 497 // See TestCompatibleCandidates for more explanation and a picture. | |
| 498 if (worst_nc_increase < best_increase && | |
| 499 TestCompatibleCandidates(*part, debug, candidates)) { | |
| 500 best_increase = worst_nc_increase; | |
| 501 } | |
| 502 } | |
| 503 if (overlap_increase != nullptr) { | |
| 504 *overlap_increase = best_increase; | |
| 505 } | |
| 506 return best_candidate; | |
| 507 } | |
| 508 | |
| 509 // Helper to remove the given box from the given partition, put it in its | |
| 510 // own partition, and add to the partition list. | |
| 511 static void RemoveBadBox(BLOBNBOX *box, ColPartition *part, | |
| 512 ColPartition_LIST *part_list) { | |
| 513 part->RemoveBox(box); | |
| 514 ColPartition::MakeBigPartition(box, part_list); | |
| 515 } | |
| 516 | |
| 517 // Split partitions where it reduces overlap between their bounding boxes. | |
| 518 // ColPartitions are after all supposed to be a partitioning of the blobs | |
| 519 // AND of the space on the page! | |
| 520 // Blobs that cause overlaps get removed, put in individual partitions | |
| 521 // and added to the big_parts list. They are most likely characters on | |
| 522 // 2 textlines that touch, or something big like a dropcap. | |
| 523 void ColPartitionGrid::SplitOverlappingPartitions( | |
| 524 ColPartition_LIST *big_parts) { | |
| 525 int ok_overlap = | |
| 526 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); | |
| 527 // Iterate the ColPartitions in the grid. | |
| 528 ColPartitionGridSearch gsearch(this); | |
| 529 gsearch.StartFullSearch(); | |
| 530 ColPartition *part; | |
| 531 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 532 // Set up a rectangle search bounded by the part. | |
| 533 const TBOX &box = part->bounding_box(); | |
| 534 ColPartitionGridSearch rsearch(this); | |
| 535 rsearch.SetUniqueMode(true); | |
| 536 rsearch.StartRectSearch(box); | |
| 537 int unresolved_overlaps = 0; | |
| 538 | |
| 539 ColPartition *neighbour; | |
| 540 while ((neighbour = rsearch.NextRectSearch()) != nullptr) { | |
| 541 if (neighbour == part) { | |
| 542 continue; | |
| 543 } | |
| 544 const TBOX &neighbour_box = neighbour->bounding_box(); | |
| 545 if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) && | |
| 546 part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) { | |
| 547 continue; // The overlap is OK both ways. | |
| 548 } | |
| 549 | |
| 550 // If removal of the biggest box from either partition eliminates the | |
| 551 // overlap, and it is much bigger than the box left behind, then | |
| 552 // it is either a drop-cap, an inter-line join, or some junk that | |
| 553 // we don't want anyway, so put it in the big_parts list. | |
| 554 if (!part->IsSingleton()) { | |
| 555 BLOBNBOX *excluded = part->BiggestBox(); | |
| 556 TBOX shrunken = part->BoundsWithoutBox(excluded); | |
| 557 if (!shrunken.overlap(neighbour_box) && | |
| 558 excluded->bounding_box().height() > | |
| 559 kBigPartSizeRatio * shrunken.height()) { | |
| 560 // Removing the biggest box fixes the overlap, so do it! | |
| 561 gsearch.RemoveBBox(); | |
| 562 RemoveBadBox(excluded, part, big_parts); | |
| 563 InsertBBox(true, true, part); | |
| 564 gsearch.RepositionIterator(); | |
| 565 break; | |
| 566 } | |
| 567 } else if (box.contains(neighbour_box)) { | |
| 568 ++unresolved_overlaps; | |
| 569 continue; // No amount of splitting will fix it. | |
| 570 } | |
| 571 if (!neighbour->IsSingleton()) { | |
| 572 BLOBNBOX *excluded = neighbour->BiggestBox(); | |
| 573 TBOX shrunken = neighbour->BoundsWithoutBox(excluded); | |
| 574 if (!shrunken.overlap(box) && | |
| 575 excluded->bounding_box().height() > | |
| 576 kBigPartSizeRatio * shrunken.height()) { | |
| 577 // Removing the biggest box fixes the overlap, so do it! | |
| 578 rsearch.RemoveBBox(); | |
| 579 RemoveBadBox(excluded, neighbour, big_parts); | |
| 580 InsertBBox(true, true, neighbour); | |
| 581 gsearch.RepositionIterator(); | |
| 582 break; | |
| 583 } | |
| 584 } | |
| 585 int part_overlap_count = part->CountOverlappingBoxes(neighbour_box); | |
| 586 int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box); | |
| 587 ColPartition *right_part = nullptr; | |
| 588 if (neighbour_overlap_count <= part_overlap_count || | |
| 589 part->IsSingleton()) { | |
| 590 // Try to split the neighbour to reduce overlap. | |
| 591 BLOBNBOX *split_blob = neighbour->OverlapSplitBlob(box); | |
| 592 if (split_blob != nullptr) { | |
| 593 rsearch.RemoveBBox(); | |
| 594 right_part = neighbour->SplitAtBlob(split_blob); | |
| 595 InsertBBox(true, true, neighbour); | |
| 596 ASSERT_HOST(right_part != nullptr); | |
| 597 } | |
| 598 } else { | |
| 599 // Try to split part to reduce overlap. | |
| 600 BLOBNBOX *split_blob = part->OverlapSplitBlob(neighbour_box); | |
| 601 if (split_blob != nullptr) { | |
| 602 gsearch.RemoveBBox(); | |
| 603 right_part = part->SplitAtBlob(split_blob); | |
| 604 InsertBBox(true, true, part); | |
| 605 ASSERT_HOST(right_part != nullptr); | |
| 606 } | |
| 607 } | |
| 608 if (right_part != nullptr) { | |
| 609 InsertBBox(true, true, right_part); | |
| 610 gsearch.RepositionIterator(); | |
| 611 rsearch.RepositionIterator(); | |
| 612 break; | |
| 613 } | |
| 614 } | |
| 615 if (unresolved_overlaps > 2 && part->IsSingleton()) { | |
| 616 // This part is no good so just add to big_parts. | |
| 617 RemoveBBox(part); | |
| 618 ColPartition_IT big_it(big_parts); | |
| 619 part->set_block_owned(true); | |
| 620 big_it.add_to_end(part); | |
| 621 gsearch.RepositionIterator(); | |
| 622 } | |
| 623 } | |
| 624 } | |
| 625 | |
| 626 // Filters partitions of source_type by looking at local neighbours. | |
| 627 // Where a majority of neighbours have a text type, the partitions are | |
| 628 // changed to text, where the neighbours have image type, they are changed | |
| 629 // to image, and partitions that have no definite neighbourhood type are | |
| 630 // left unchanged. | |
| 631 // im_box and rerotation are used to map blob coordinates onto the | |
| 632 // nontext_map, which is used to prevent the spread of text neighbourhoods | |
| 633 // into images. | |
| 634 // Returns true if anything was changed. | |
| 635 bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, | |
| 636 Image nontext_map, | |
| 637 const TBOX &im_box, | |
| 638 const FCOORD &rotation) { | |
| 639 // Iterate the ColPartitions in the grid. | |
| 640 ColPartitionGridSearch gsearch(this); | |
| 641 gsearch.StartFullSearch(); | |
| 642 ColPartition *part; | |
| 643 bool any_changed = false; | |
| 644 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 645 if (part->flow() != source_type || | |
| 646 BLOBNBOX::IsLineType(part->blob_type())) { | |
| 647 continue; | |
| 648 } | |
| 649 const TBOX &box = part->bounding_box(); | |
| 650 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); | |
| 651 if (SmoothRegionType(nontext_map, im_box, rotation, debug, part)) { | |
| 652 any_changed = true; | |
| 653 } | |
| 654 } | |
| 655 return any_changed; | |
| 656 } | |
| 657 | |
| 658 // Reflects the grid and its colpartitions in the y-axis, assuming that | |
| 659 // all blob boxes have already been done. | |
| 660 void ColPartitionGrid::ReflectInYAxis() { | |
| 661 ColPartition_LIST parts; | |
| 662 ColPartition_IT part_it(&parts); | |
| 663 // Iterate the ColPartitions in the grid to extract them. | |
| 664 ColPartitionGridSearch gsearch(this); | |
| 665 gsearch.StartFullSearch(); | |
| 666 ColPartition *part; | |
| 667 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 668 part_it.add_after_then_move(part); | |
| 669 } | |
| 670 ICOORD bot_left(-tright().x(), bleft().y()); | |
| 671 ICOORD top_right(-bleft().x(), tright().y()); | |
| 672 // Reinitializing the grid with reflected coords also clears all the | |
| 673 // pointers, so parts will now own the ColPartitions. (Briefly). | |
| 674 Init(gridsize(), bot_left, top_right); | |
| 675 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { | |
| 676 part = part_it.extract(); | |
| 677 part->ReflectInYAxis(); | |
| 678 InsertBBox(true, true, part); | |
| 679 } | |
| 680 } | |
| 681 | |
| 682 // Transforms the grid of partitions to the output blocks, putting each | |
| 683 // partition into a separate block. We don't really care about the order, | |
| 684 // as we just want to get as much text as possible without trying to organize | |
| 685 // it into proper blocks or columns. | |
| 686 // TODO(rays) some kind of sort function would be useful and probably better | |
| 687 // than the default here, which is to sort by order of the grid search. | |
| 688 void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, | |
| 689 TO_BLOCK_LIST *to_blocks) { | |
| 690 TO_BLOCK_IT to_block_it(to_blocks); | |
| 691 BLOCK_IT block_it(blocks); | |
| 692 // All partitions will be put on this list and deleted on return. | |
| 693 ColPartition_LIST parts; | |
| 694 ColPartition_IT part_it(&parts); | |
| 695 // Iterate the ColPartitions in the grid to extract them. | |
| 696 ColPartitionGridSearch gsearch(this); | |
| 697 gsearch.StartFullSearch(); | |
| 698 ColPartition *part; | |
| 699 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 700 part_it.add_after_then_move(part); | |
| 701 // The partition has to be at least vaguely like text. | |
| 702 BlobRegionType blob_type = part->blob_type(); | |
| 703 if (BLOBNBOX::IsTextType(blob_type) || | |
| 704 (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) { | |
| 705 PolyBlockType type = | |
| 706 blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT; | |
| 707 // Get metrics from the row that will be used for the block. | |
| 708 TBOX box = part->bounding_box(); | |
| 709 int median_width = part->median_width(); | |
| 710 int median_height = part->median_height(); | |
| 711 // Turn the partition into a TO_ROW. | |
| 712 TO_ROW *row = part->MakeToRow(); | |
| 713 if (row == nullptr) { | |
| 714 // This partition is dead. | |
| 715 part->DeleteBoxes(); | |
| 716 continue; | |
| 717 } | |
| 718 auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), | |
| 719 box.right(), box.top()); | |
| 720 block->pdblk.set_poly_block(new POLY_BLOCK(box, type)); | |
| 721 auto *to_block = new TO_BLOCK(block); | |
| 722 TO_ROW_IT row_it(to_block->get_rows()); | |
| 723 row_it.add_after_then_move(row); | |
| 724 // We haven't differentially rotated vertical and horizontal text at | |
| 725 // this point, so use width or height as appropriate. | |
| 726 if (blob_type == BRT_VERT_TEXT) { | |
| 727 to_block->line_size = static_cast<float>(median_width); | |
| 728 to_block->line_spacing = static_cast<float>(box.width()); | |
| 729 to_block->max_blob_size = static_cast<float>(box.width() + 1); | |
| 730 } else { | |
| 731 to_block->line_size = static_cast<float>(median_height); | |
| 732 to_block->line_spacing = static_cast<float>(box.height()); | |
| 733 to_block->max_blob_size = static_cast<float>(box.height() + 1); | |
| 734 } | |
| 735 if (to_block->line_size == 0) { | |
| 736 to_block->line_size = 1; | |
| 737 } | |
| 738 block_it.add_to_end(block); | |
| 739 to_block_it.add_to_end(to_block); | |
| 740 } else { | |
| 741 // This partition is dead. | |
| 742 part->DeleteBoxes(); | |
| 743 } | |
| 744 } | |
| 745 Clear(); | |
| 746 // Now it is safe to delete the ColPartitions as parts goes out of scope. | |
| 747 } | |
| 748 | |
| 749 // Rotates the grid and its colpartitions by the given angle, assuming that | |
| 750 // all blob boxes have already been done. | |
| 751 void ColPartitionGrid::Deskew(const FCOORD &deskew) { | |
| 752 ColPartition_LIST parts; | |
| 753 ColPartition_IT part_it(&parts); | |
| 754 // Iterate the ColPartitions in the grid to extract them. | |
| 755 ColPartitionGridSearch gsearch(this); | |
| 756 gsearch.StartFullSearch(); | |
| 757 ColPartition *part; | |
| 758 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 759 part_it.add_after_then_move(part); | |
| 760 } | |
| 761 // Rebuild the grid to the new size. | |
| 762 TBOX grid_box(bleft_, tright_); | |
| 763 grid_box.rotate_large(deskew); | |
| 764 Init(gridsize(), grid_box.botleft(), grid_box.topright()); | |
| 765 // Reinitializing the grid with rotated coords also clears all the | |
| 766 // pointers, so parts will now own the ColPartitions. (Briefly). | |
| 767 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { | |
| 768 part = part_it.extract(); | |
| 769 part->ComputeLimits(); | |
| 770 InsertBBox(true, true, part); | |
| 771 } | |
| 772 } | |
| 773 | |
| 774 // Sets the left and right tabs of the partitions in the grid. | |
| 775 void ColPartitionGrid::SetTabStops(TabFind *tabgrid) { | |
| 776 // Iterate the ColPartitions in the grid. | |
| 777 ColPartitionGridSearch gsearch(this); | |
| 778 gsearch.StartFullSearch(); | |
| 779 ColPartition *part; | |
| 780 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 781 const TBOX &part_box = part->bounding_box(); | |
| 782 TabVector *left_line = tabgrid->LeftTabForBox(part_box, true, false); | |
| 783 // If the overlapping line is not a left tab, try for non-overlapping. | |
| 784 if (left_line != nullptr && !left_line->IsLeftTab()) { | |
| 785 left_line = tabgrid->LeftTabForBox(part_box, false, false); | |
| 786 } | |
| 787 if (left_line != nullptr && left_line->IsLeftTab()) { | |
| 788 part->SetLeftTab(left_line); | |
| 789 } | |
| 790 TabVector *right_line = tabgrid->RightTabForBox(part_box, true, false); | |
| 791 if (right_line != nullptr && !right_line->IsRightTab()) { | |
| 792 right_line = tabgrid->RightTabForBox(part_box, false, false); | |
| 793 } | |
| 794 if (right_line != nullptr && right_line->IsRightTab()) { | |
| 795 part->SetRightTab(right_line); | |
| 796 } | |
| 797 part->SetColumnGoodness(tabgrid->WidthCB()); | |
| 798 } | |
| 799 } | |
| 800 | |
| 801 // Makes the ColPartSets and puts them in the PartSetVector ready | |
| 802 // for finding column bounds. Returns false if no partitions were found. | |
| 803 bool ColPartitionGrid::MakeColPartSets(PartSetVector *part_sets) { | |
| 804 auto *part_lists = new ColPartition_LIST[gridheight()]; | |
| 805 part_sets->reserve(gridheight()); | |
| 806 // Iterate the ColPartitions in the grid to get parts onto lists for the | |
| 807 // y bottom of each. | |
| 808 ColPartitionGridSearch gsearch(this); | |
| 809 gsearch.StartFullSearch(); | |
| 810 ColPartition *part; | |
| 811 bool any_parts_found = false; | |
| 812 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 813 BlobRegionType blob_type = part->blob_type(); | |
| 814 if (blob_type != BRT_NOISE && | |
| 815 (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { | |
| 816 int grid_x, grid_y; | |
| 817 const TBOX &part_box = part->bounding_box(); | |
| 818 GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); | |
| 819 ColPartition_IT part_it(&part_lists[grid_y]); | |
| 820 part_it.add_to_end(part); | |
| 821 any_parts_found = true; | |
| 822 } | |
| 823 } | |
| 824 if (any_parts_found) { | |
| 825 for (int grid_y = 0; grid_y < gridheight(); ++grid_y) { | |
| 826 ColPartitionSet *line_set = nullptr; | |
| 827 if (!part_lists[grid_y].empty()) { | |
| 828 line_set = new ColPartitionSet(&part_lists[grid_y]); | |
| 829 } | |
| 830 part_sets->push_back(line_set); | |
| 831 } | |
| 832 } | |
| 833 delete[] part_lists; | |
| 834 return any_parts_found; | |
| 835 } | |
| 836 | |
| 837 // Makes a single ColPartitionSet consisting of a single ColPartition that | |
| 838 // represents the total horizontal extent of the significant content on the | |
| 839 // page. Used for the single column setting in place of automatic detection. | |
| 840 // Returns nullptr if the page is empty of significant content. | |
| 841 ColPartitionSet *ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) { | |
| 842 ColPartition *single_column_part = nullptr; | |
| 843 // Iterate the ColPartitions in the grid to get parts onto lists for the | |
| 844 // y bottom of each. | |
| 845 ColPartitionGridSearch gsearch(this); | |
| 846 gsearch.StartFullSearch(); | |
| 847 ColPartition *part; | |
| 848 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 849 BlobRegionType blob_type = part->blob_type(); | |
| 850 if (blob_type != BRT_NOISE && | |
| 851 (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { | |
| 852 // Consider for single column. | |
| 853 BlobTextFlowType flow = part->flow(); | |
| 854 if ((blob_type == BRT_TEXT && | |
| 855 (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || | |
| 856 flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || | |
| 857 blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) { | |
| 858 if (single_column_part == nullptr) { | |
| 859 single_column_part = part->ShallowCopy(); | |
| 860 single_column_part->set_blob_type(BRT_TEXT); | |
| 861 // Copy the tabs from itself to properly setup the margins. | |
| 862 single_column_part->CopyLeftTab(*single_column_part, false); | |
| 863 single_column_part->CopyRightTab(*single_column_part, false); | |
| 864 } else { | |
| 865 if (part->left_key() < single_column_part->left_key()) { | |
| 866 single_column_part->CopyLeftTab(*part, false); | |
| 867 } | |
| 868 if (part->right_key() > single_column_part->right_key()) { | |
| 869 single_column_part->CopyRightTab(*part, false); | |
| 870 } | |
| 871 } | |
| 872 } | |
| 873 } | |
| 874 } | |
| 875 if (single_column_part != nullptr) { | |
| 876 // Make a ColPartitionSet out of the single_column_part as a candidate | |
| 877 // for the single column case. | |
| 878 single_column_part->SetColumnGoodness(cb); | |
| 879 return new ColPartitionSet(single_column_part); | |
| 880 } | |
| 881 return nullptr; | |
| 882 } | |
| 883 | |
| 884 // Mark the BLOBNBOXes in each partition as being owned by that partition. | |
| 885 void ColPartitionGrid::ClaimBoxes() { | |
| 886 // Iterate the ColPartitions in the grid. | |
| 887 ColPartitionGridSearch gsearch(this); | |
| 888 gsearch.StartFullSearch(); | |
| 889 ColPartition *part; | |
| 890 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 891 part->ClaimBoxes(); | |
| 892 } | |
| 893 } | |
| 894 | |
| 895 // Retypes all the blobs referenced by the partitions in the grid. | |
| 896 // Image blobs are found and returned in the im_blobs list, as they are not | |
| 897 // owned by the block. | |
| 898 void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST *im_blobs) { | |
| 899 BLOBNBOX_IT im_blob_it(im_blobs); | |
| 900 ColPartition_LIST dead_parts; | |
| 901 ColPartition_IT dead_part_it(&dead_parts); | |
| 902 // Iterate the ColPartitions in the grid. | |
| 903 ColPartitionGridSearch gsearch(this); | |
| 904 gsearch.StartFullSearch(); | |
| 905 ColPartition *part; | |
| 906 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 907 BlobRegionType blob_type = part->blob_type(); | |
| 908 BlobTextFlowType flow = part->flow(); | |
| 909 bool any_blobs_moved = false; | |
| 910 if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) { | |
| 911 BLOBNBOX_C_IT blob_it(part->boxes()); | |
| 912 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { | |
| 913 BLOBNBOX *blob = blob_it.data(); | |
| 914 im_blob_it.add_after_then_move(blob); | |
| 915 } | |
| 916 } else if (blob_type != BRT_NOISE) { | |
| 917 // Make sure the blobs are marked with the correct type and flow. | |
| 918 BLOBNBOX_C_IT blob_it(part->boxes()); | |
| 919 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { | |
| 920 BLOBNBOX *blob = blob_it.data(); | |
| 921 if (blob->region_type() == BRT_NOISE) { | |
| 922 // TODO(rays) Deprecated. Change this section to an assert to verify | |
| 923 // and then delete. | |
| 924 ASSERT_HOST(blob->cblob()->area() != 0); | |
| 925 blob->set_owner(nullptr); | |
| 926 blob_it.extract(); | |
| 927 any_blobs_moved = true; | |
| 928 } else { | |
| 929 blob->set_region_type(blob_type); | |
| 930 if (blob->flow() != BTFT_LEADER) { | |
| 931 blob->set_flow(flow); | |
| 932 } | |
| 933 } | |
| 934 } | |
| 935 } | |
| 936 if (blob_type == BRT_NOISE || part->boxes()->empty()) { | |
| 937 BLOBNBOX_C_IT blob_it(part->boxes()); | |
| 938 part->DisownBoxes(); | |
| 939 dead_part_it.add_to_end(part); | |
| 940 gsearch.RemoveBBox(); | |
| 941 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { | |
| 942 BLOBNBOX *blob = blob_it.data(); | |
| 943 if (blob->cblob()->area() == 0) { | |
| 944 // Any blob with zero area is a fake image blob and should be deleted. | |
| 945 delete blob->cblob(); | |
| 946 delete blob; | |
| 947 } | |
| 948 } | |
| 949 } else if (any_blobs_moved) { | |
| 950 gsearch.RemoveBBox(); | |
| 951 part->ComputeLimits(); | |
| 952 InsertBBox(true, true, part); | |
| 953 gsearch.RepositionIterator(); | |
| 954 } | |
| 955 } | |
| 956 } | |
| 957 | |
| 958 // The boxes within the partitions have changed (by deskew) so recompute | |
| 959 // the bounds of all the partitions and reinsert them into the grid. | |
| 960 void ColPartitionGrid::RecomputeBounds(int gridsize, const ICOORD &bleft, | |
| 961 const ICOORD &tright, | |
| 962 const ICOORD &vertical) { | |
| 963 ColPartition_LIST saved_parts; | |
| 964 ColPartition_IT part_it(&saved_parts); | |
| 965 // Iterate the ColPartitions in the grid to get parts onto a list. | |
| 966 ColPartitionGridSearch gsearch(this); | |
| 967 gsearch.StartFullSearch(); | |
| 968 ColPartition *part; | |
| 969 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 970 part_it.add_to_end(part); | |
| 971 } | |
| 972 // Reinitialize grid to the new size. | |
| 973 Init(gridsize, bleft, tright); | |
| 974 // Recompute the bounds of the parts and put them back in the new grid. | |
| 975 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { | |
| 976 part = part_it.extract(); | |
| 977 part->set_vertical(vertical); | |
| 978 part->ComputeLimits(); | |
| 979 InsertBBox(true, true, part); | |
| 980 } | |
| 981 } | |
| 982 | |
| 983 // Improves the margins of the ColPartitions in the grid by calling | |
| 984 // FindPartitionMargins on each. | |
| 985 // best_columns, which may be nullptr, is an array of pointers indicating the | |
| 986 // column set at each y-coordinate in the grid. | |
| 987 // best_columns is usually the best_columns_ member of ColumnFinder. | |
| 988 void ColPartitionGrid::GridFindMargins(ColPartitionSet **best_columns) { | |
| 989 // Iterate the ColPartitions in the grid. | |
| 990 ColPartitionGridSearch gsearch(this); | |
| 991 gsearch.StartFullSearch(); | |
| 992 ColPartition *part; | |
| 993 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 994 // Set up a rectangle search x-bounded by the column and y by the part. | |
| 995 ColPartitionSet *columns = | |
| 996 best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr; | |
| 997 FindPartitionMargins(columns, part); | |
| 998 const TBOX &box = part->bounding_box(); | |
| 999 if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { | |
| 1000 tprintf("Computed margins for part:"); | |
| 1001 part->Print(); | |
| 1002 } | |
| 1003 } | |
| 1004 } | |
| 1005 | |
| 1006 // Improves the margins of the ColPartitions in the list by calling | |
| 1007 // FindPartitionMargins on each. | |
| 1008 // best_columns, which may be nullptr, is an array of pointers indicating the | |
| 1009 // column set at each y-coordinate in the grid. | |
| 1010 // best_columns is usually the best_columns_ member of ColumnFinder. | |
| 1011 void ColPartitionGrid::ListFindMargins(ColPartitionSet **best_columns, | |
| 1012 ColPartition_LIST *parts) { | |
| 1013 ColPartition_IT part_it(parts); | |
| 1014 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { | |
| 1015 ColPartition *part = part_it.data(); | |
| 1016 ColPartitionSet *columns = nullptr; | |
| 1017 if (best_columns != nullptr) { | |
| 1018 const TBOX &part_box = part->bounding_box(); | |
| 1019 // Get the columns from the y grid coord. | |
| 1020 int grid_x, grid_y; | |
| 1021 GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); | |
| 1022 columns = best_columns[grid_y]; | |
| 1023 } | |
| 1024 FindPartitionMargins(columns, part); | |
| 1025 } | |
| 1026 } | |
| 1027 | |
| 1028 // Deletes all the partitions in the grid after disowning all the blobs. | |
| 1029 void ColPartitionGrid::DeleteParts() { | |
| 1030 ColPartition_LIST dead_parts; | |
| 1031 ColPartition_IT dead_it(&dead_parts); | |
| 1032 ColPartitionGridSearch gsearch(this); | |
| 1033 gsearch.StartFullSearch(); | |
| 1034 ColPartition *part; | |
| 1035 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1036 part->DisownBoxes(); | |
| 1037 dead_it.add_to_end(part); // Parts will be deleted on return. | |
| 1038 } | |
| 1039 Clear(); | |
| 1040 } | |
| 1041 | |
| 1042 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and | |
| 1043 // all the blobs in them. | |
| 1044 void ColPartitionGrid::DeleteUnknownParts(TO_BLOCK *block) { | |
| 1045 ColPartitionGridSearch gsearch(this); | |
| 1046 gsearch.StartFullSearch(); | |
| 1047 ColPartition *part; | |
| 1048 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1049 if (part->blob_type() == BRT_UNKNOWN) { | |
| 1050 gsearch.RemoveBBox(); | |
| 1051 // Once marked, the blobs will be swept up by DeleteUnownedNoise. | |
| 1052 part->set_flow(BTFT_NONTEXT); | |
| 1053 part->set_blob_type(BRT_NOISE); | |
| 1054 part->SetBlobTypes(); | |
| 1055 part->DisownBoxes(); | |
| 1056 delete part; | |
| 1057 } | |
| 1058 } | |
| 1059 block->DeleteUnownedNoise(); | |
| 1060 } | |
| 1061 | |
| 1062 // Deletes all the partitions in the grid that are NOT of flow type BTFT_LEADER. | |
| 1063 void ColPartitionGrid::DeleteNonLeaderParts() { | |
| 1064 ColPartitionGridSearch gsearch(this); | |
| 1065 gsearch.StartFullSearch(); | |
| 1066 ColPartition *part; | |
| 1067 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1068 if (part->flow() != BTFT_LEADER) { | |
| 1069 gsearch.RemoveBBox(); | |
| 1070 if (part->ReleaseNonLeaderBoxes()) { | |
| 1071 InsertBBox(true, true, part); | |
| 1072 gsearch.RepositionIterator(); | |
| 1073 } else { | |
| 1074 delete part; | |
| 1075 } | |
| 1076 } | |
| 1077 } | |
| 1078 } | |
| 1079 | |
| 1080 // Finds and marks text partitions that represent figure captions. | |
| 1081 void ColPartitionGrid::FindFigureCaptions() { | |
| 1082 // For each image region find its best candidate text caption region, | |
| 1083 // if any and mark it as such. | |
| 1084 ColPartitionGridSearch gsearch(this); | |
| 1085 gsearch.StartFullSearch(); | |
| 1086 ColPartition *part; | |
| 1087 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1088 if (part->IsImageType()) { | |
| 1089 const TBOX &part_box = part->bounding_box(); | |
| 1090 bool debug = | |
| 1091 AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom()); | |
| 1092 ColPartition *best_caption = nullptr; | |
| 1093 int best_dist = 0; // Distance to best_caption. | |
| 1094 int best_upper = 0; // Direction of best_caption. | |
| 1095 // Handle both lower and upper directions. | |
| 1096 for (int upper = 0; upper < 2; ++upper) { | |
| 1097 ColPartition_C_IT partner_it(upper ? part->upper_partners() | |
| 1098 : part->lower_partners()); | |
| 1099 // If there are no image partners, then this direction is ok. | |
| 1100 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); | |
| 1101 partner_it.forward()) { | |
| 1102 ColPartition *partner = partner_it.data(); | |
| 1103 if (partner->IsImageType()) { | |
| 1104 break; | |
| 1105 } | |
| 1106 } | |
| 1107 if (!partner_it.cycled_list()) { | |
| 1108 continue; | |
| 1109 } | |
| 1110 // Find the nearest totally overlapping text partner. | |
| 1111 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); | |
| 1112 partner_it.forward()) { | |
| 1113 ColPartition *partner = partner_it.data(); | |
| 1114 if (!partner->IsTextType() || partner->type() == PT_TABLE) { | |
| 1115 continue; | |
| 1116 } | |
| 1117 const TBOX &partner_box = partner->bounding_box(); | |
| 1118 if (debug) { | |
| 1119 tprintf("Finding figure captions for image part:"); | |
| 1120 part_box.print(); | |
| 1121 tprintf("Considering partner:"); | |
| 1122 partner_box.print(); | |
| 1123 } | |
| 1124 if (partner_box.left() >= part_box.left() && | |
| 1125 partner_box.right() <= part_box.right()) { | |
| 1126 int dist = partner_box.y_gap(part_box); | |
| 1127 if (best_caption == nullptr || dist < best_dist) { | |
| 1128 best_dist = dist; | |
| 1129 best_caption = partner; | |
| 1130 best_upper = upper; | |
| 1131 } | |
| 1132 } | |
| 1133 } | |
| 1134 } | |
| 1135 if (best_caption != nullptr) { | |
| 1136 if (debug) { | |
| 1137 tprintf("Best caption candidate:"); | |
| 1138 best_caption->bounding_box().print(); | |
| 1139 } | |
| 1140 // We have a candidate caption. Qualify it as being separable from | |
| 1141 // any body text. We are looking for either a small number of lines | |
| 1142 // or a big gap that indicates a separation from the body text. | |
| 1143 int line_count = 0; | |
| 1144 int biggest_gap = 0; | |
| 1145 int smallest_gap = INT16_MAX; | |
| 1146 int total_height = 0; | |
| 1147 int mean_height = 0; | |
| 1148 ColPartition *end_partner = nullptr; | |
| 1149 ColPartition *next_partner = nullptr; | |
| 1150 for (ColPartition *partner = best_caption; | |
| 1151 partner != nullptr && line_count <= kMaxCaptionLines; | |
| 1152 partner = next_partner) { | |
| 1153 if (!partner->IsTextType()) { | |
| 1154 end_partner = partner; | |
| 1155 break; | |
| 1156 } | |
| 1157 ++line_count; | |
| 1158 total_height += partner->bounding_box().height(); | |
| 1159 next_partner = partner->SingletonPartner(best_upper); | |
| 1160 if (next_partner != nullptr) { | |
| 1161 int gap = | |
| 1162 partner->bounding_box().y_gap(next_partner->bounding_box()); | |
| 1163 if (gap > biggest_gap) { | |
| 1164 biggest_gap = gap; | |
| 1165 end_partner = next_partner; | |
| 1166 mean_height = total_height / line_count; | |
| 1167 } else if (gap < smallest_gap) { | |
| 1168 smallest_gap = gap; | |
| 1169 } | |
| 1170 // If the gap looks big compared to the text size and the smallest | |
| 1171 // gap seen so far, then we can stop. | |
| 1172 if (biggest_gap > mean_height * kMinCaptionGapHeightRatio && | |
| 1173 biggest_gap > smallest_gap * kMinCaptionGapRatio) { | |
| 1174 break; | |
| 1175 } | |
| 1176 } | |
| 1177 } | |
| 1178 if (debug) { | |
| 1179 tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", | |
| 1180 line_count, biggest_gap, smallest_gap, mean_height); | |
| 1181 if (end_partner != nullptr) { | |
| 1182 tprintf("End partner:"); | |
| 1183 end_partner->bounding_box().print(); | |
| 1184 } | |
| 1185 } | |
| 1186 if (next_partner == nullptr && line_count <= kMaxCaptionLines) { | |
| 1187 end_partner = nullptr; // No gap, but line count is small. | |
| 1188 } | |
| 1189 if (line_count <= kMaxCaptionLines) { | |
| 1190 // This is a qualified caption. Mark the text as caption. | |
| 1191 for (ColPartition *partner = best_caption; | |
| 1192 partner != nullptr && partner != end_partner; | |
| 1193 partner = next_partner) { | |
| 1194 partner->set_type(PT_CAPTION_TEXT); | |
| 1195 partner->SetBlobTypes(); | |
| 1196 if (debug) { | |
| 1197 tprintf("Set caption type for partition:"); | |
| 1198 partner->bounding_box().print(); | |
| 1199 } | |
| 1200 next_partner = partner->SingletonPartner(best_upper); | |
| 1201 } | |
| 1202 } | |
| 1203 } | |
| 1204 } | |
| 1205 } | |
| 1206 } | |
| 1207 | |
| 1208 //////// Functions that manipulate ColPartitions in the part_grid_ ///// | |
| 1209 //////// to find chains of partner partitions of the same type. /////// | |
| 1210 | |
| 1211 // For every ColPartition in the grid, finds its upper and lower neighbours. | |
| 1212 void ColPartitionGrid::FindPartitionPartners() { | |
| 1213 ColPartitionGridSearch gsearch(this); | |
| 1214 gsearch.StartFullSearch(); | |
| 1215 ColPartition *part; | |
| 1216 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1217 if (part->IsVerticalType()) { | |
| 1218 FindVPartitionPartners(true, part); | |
| 1219 FindVPartitionPartners(false, part); | |
| 1220 } else { | |
| 1221 FindPartitionPartners(true, part); | |
| 1222 FindPartitionPartners(false, part); | |
| 1223 } | |
| 1224 } | |
| 1225 } | |
| 1226 | |
| 1227 // Finds the best partner in the given direction for the given partition. | |
| 1228 // Stores the result with AddPartner. | |
| 1229 void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition *part) { | |
| 1230 if (part->type() == PT_NOISE) { | |
| 1231 return; // Noise is not allowed to partner anything. | |
| 1232 } | |
| 1233 const TBOX &box = part->bounding_box(); | |
| 1234 int top = part->median_top(); | |
| 1235 int bottom = part->median_bottom(); | |
| 1236 int height = top - bottom; | |
| 1237 int mid_y = (bottom + top) / 2; | |
| 1238 ColPartitionGridSearch vsearch(this); | |
| 1239 // Search down for neighbour below | |
| 1240 vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY()); | |
| 1241 ColPartition *neighbour; | |
| 1242 ColPartition *best_neighbour = nullptr; | |
| 1243 int best_dist = INT32_MAX; | |
| 1244 while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) { | |
| 1245 if (neighbour == part || neighbour->type() == PT_NOISE) { | |
| 1246 continue; // Noise is not allowed to partner anything. | |
| 1247 } | |
| 1248 int neighbour_bottom = neighbour->median_bottom(); | |
| 1249 int neighbour_top = neighbour->median_top(); | |
| 1250 int neighbour_y = (neighbour_bottom + neighbour_top) / 2; | |
| 1251 if (upper != (neighbour_y > mid_y)) { | |
| 1252 continue; | |
| 1253 } | |
| 1254 if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) { | |
| 1255 continue; | |
| 1256 } | |
| 1257 if (!part->TypesMatch(*neighbour)) { | |
| 1258 if (best_neighbour == nullptr) { | |
| 1259 best_neighbour = neighbour; | |
| 1260 } | |
| 1261 continue; | |
| 1262 } | |
| 1263 int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; | |
| 1264 if (dist <= kMaxPartitionSpacing * height) { | |
| 1265 if (dist < best_dist) { | |
| 1266 best_dist = dist; | |
| 1267 best_neighbour = neighbour; | |
| 1268 } | |
| 1269 } else { | |
| 1270 break; | |
| 1271 } | |
| 1272 } | |
| 1273 if (best_neighbour != nullptr) { | |
| 1274 part->AddPartner(upper, best_neighbour); | |
| 1275 } | |
| 1276 } | |
| 1277 | |
| 1278 // Finds the best partner in the given direction for the given partition. | |
| 1279 // Stores the result with AddPartner. | |
| 1280 void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, | |
| 1281 ColPartition *part) { | |
| 1282 if (part->type() == PT_NOISE) { | |
| 1283 return; // Noise is not allowed to partner anything. | |
| 1284 } | |
| 1285 const TBOX &box = part->bounding_box(); | |
| 1286 int left = part->median_left(); | |
| 1287 int right = part->median_right(); | |
| 1288 int width = right >= left ? right - left : -1; | |
| 1289 int mid_x = (left + right) / 2; | |
| 1290 ColPartitionGridSearch hsearch(this); | |
| 1291 // Search left for neighbour to_the_left | |
| 1292 hsearch.StartSideSearch(mid_x, box.bottom(), box.top()); | |
| 1293 ColPartition *neighbour; | |
| 1294 ColPartition *best_neighbour = nullptr; | |
| 1295 int best_dist = INT32_MAX; | |
| 1296 while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) { | |
| 1297 if (neighbour == part || neighbour->type() == PT_NOISE) { | |
| 1298 continue; // Noise is not allowed to partner anything. | |
| 1299 } | |
| 1300 int neighbour_left = neighbour->median_left(); | |
| 1301 int neighbour_right = neighbour->median_right(); | |
| 1302 int neighbour_x = (neighbour_left + neighbour_right) / 2; | |
| 1303 if (to_the_left != (neighbour_x < mid_x)) { | |
| 1304 continue; | |
| 1305 } | |
| 1306 if (!part->VOverlaps(*neighbour)) { | |
| 1307 continue; | |
| 1308 } | |
| 1309 if (!part->TypesMatch(*neighbour)) { | |
| 1310 continue; // Only match to other vertical text. | |
| 1311 } | |
| 1312 int dist = to_the_left ? left - neighbour_right : neighbour_left - right; | |
| 1313 if (dist <= kMaxPartitionSpacing * width) { | |
| 1314 if (dist < best_dist || best_neighbour == nullptr) { | |
| 1315 best_dist = dist; | |
| 1316 best_neighbour = neighbour; | |
| 1317 } | |
| 1318 } else { | |
| 1319 break; | |
| 1320 } | |
| 1321 } | |
| 1322 // For vertical partitions, the upper partner is to the left, and lower is | |
| 1323 // to the right. | |
| 1324 if (best_neighbour != nullptr) { | |
| 1325 part->AddPartner(to_the_left, best_neighbour); | |
| 1326 } | |
| 1327 } | |
| 1328 | |
| 1329 // For every ColPartition with multiple partners in the grid, reduces the | |
| 1330 // number of partners to 0 or 1. If get_desperate is true, goes to more | |
| 1331 // desperate merge methods to merge flowing text before breaking partnerships. | |
| 1332 void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { | |
| 1333 ColPartitionGridSearch gsearch(this); | |
| 1334 // Refine in type order so that chasing multiple partners can be done | |
| 1335 // before eliminating type mis-matching partners. | |
| 1336 for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) { | |
| 1337 // Iterate the ColPartitions in the grid. | |
| 1338 gsearch.StartFullSearch(); | |
| 1339 ColPartition *part; | |
| 1340 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1341 part->RefinePartners(static_cast<PolyBlockType>(type), get_desperate, | |
| 1342 this); | |
| 1343 // Iterator may have been messed up by a merge. | |
| 1344 gsearch.RepositionIterator(); | |
| 1345 } | |
| 1346 } | |
| 1347 } | |
| 1348 | |
| 1349 // ========================== PRIVATE CODE ======================== | |
| 1350 | |
| 1351 // Finds and returns a list of candidate ColPartitions to merge with part. | |
| 1352 // The candidates must overlap search_box, and when merged must not | |
| 1353 // overlap any other partitions that are not overlapped by each individually. | |
| 1354 void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, | |
| 1355 const TBOX &search_box, bool debug, | |
| 1356 ColPartition_CLIST *candidates) { | |
| 1357 int ok_overlap = | |
| 1358 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); | |
| 1359 const TBOX &part_box = part->bounding_box(); | |
| 1360 // Now run the rect search. | |
| 1361 ColPartitionGridSearch rsearch(this); | |
| 1362 rsearch.SetUniqueMode(true); | |
| 1363 rsearch.StartRectSearch(search_box); | |
| 1364 ColPartition *candidate; | |
| 1365 while ((candidate = rsearch.NextRectSearch()) != nullptr) { | |
| 1366 if (!OKMergeCandidate(part, candidate, debug)) { | |
| 1367 continue; | |
| 1368 } | |
| 1369 const TBOX &c_box = candidate->bounding_box(); | |
| 1370 // Candidate seems to be a potential merge with part. If one contains | |
| 1371 // the other, then the merge is a no-brainer. Otherwise, search the | |
| 1372 // combined box to see if anything else is inappropriately overlapped. | |
| 1373 if (!part_box.contains(c_box) && !c_box.contains(part_box)) { | |
| 1374 // Search the combined rectangle to see if anything new is overlapped. | |
| 1375 // This is a preliminary test designed to quickly weed-out poor | |
| 1376 // merge candidates that would create a big list of overlapped objects | |
| 1377 // for the squared-order overlap analysis. Eg. vertical and horizontal | |
| 1378 // line-like objects that overlap real text when merged: | |
| 1379 // || ========================== | |
| 1380 // || | |
| 1381 // || r e a l t e x t | |
| 1382 // || | |
| 1383 // || | |
| 1384 TBOX merged_box(part_box); | |
| 1385 merged_box += c_box; | |
| 1386 ColPartitionGridSearch msearch(this); | |
| 1387 msearch.SetUniqueMode(true); | |
| 1388 msearch.StartRectSearch(merged_box); | |
| 1389 ColPartition *neighbour; | |
| 1390 while ((neighbour = msearch.NextRectSearch()) != nullptr) { | |
| 1391 if (neighbour == part || neighbour == candidate) { | |
| 1392 continue; // Ignore itself. | |
| 1393 } | |
| 1394 if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false)) { | |
| 1395 continue; // This kind of merge overlap is OK. | |
| 1396 } | |
| 1397 TBOX n_box = neighbour->bounding_box(); | |
| 1398 // The overlap is OK if: | |
| 1399 // * the n_box already overlapped the part or the candidate OR | |
| 1400 // * the n_box is a suitable merge with either part or candidate | |
| 1401 if (!n_box.overlap(part_box) && !n_box.overlap(c_box) && | |
| 1402 !OKMergeCandidate(part, neighbour, false) && | |
| 1403 !OKMergeCandidate(candidate, neighbour, false)) { | |
| 1404 break; | |
| 1405 } | |
| 1406 } | |
| 1407 if (neighbour != nullptr) { | |
| 1408 if (debug) { | |
| 1409 tprintf( | |
| 1410 "Combined box overlaps another that is not OK despite" | |
| 1411 " allowance of %d:", | |
| 1412 ok_overlap); | |
| 1413 neighbour->bounding_box().print(); | |
| 1414 tprintf("Reason:"); | |
| 1415 OKMergeCandidate(part, neighbour, true); | |
| 1416 tprintf("...and:"); | |
| 1417 OKMergeCandidate(candidate, neighbour, true); | |
| 1418 tprintf("Overlap:"); | |
| 1419 neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true); | |
| 1420 } | |
| 1421 continue; | |
| 1422 } | |
| 1423 } | |
| 1424 if (debug) { | |
| 1425 tprintf("Adding candidate:"); | |
| 1426 candidate->bounding_box().print(); | |
| 1427 } | |
| 1428 // Unique elements as they arrive. | |
| 1429 candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate); | |
| 1430 } | |
| 1431 } | |
| 1432 | |
| 1433 // Smoothes the region type/flow type of the given part by looking at local | |
| 1434 // neighbours and the given image mask. Searches a padded rectangle with the | |
| 1435 // padding truncated on one size of the part's box in turn for each side, | |
| 1436 // using the result (if any) that has the least distance to all neighbours | |
| 1437 // that contribute to the decision. This biases in favor of rectangular | |
| 1438 // regions without completely enforcing them. | |
| 1439 // If a good decision cannot be reached, the part is left unchanged. | |
| 1440 // im_box and rerotation are used to map blob coordinates onto the | |
| 1441 // nontext_map, which is used to prevent the spread of text neighbourhoods | |
| 1442 // into images. | |
| 1443 // Returns true if the partition was changed. | |
| 1444 bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box, | |
| 1445 const FCOORD &rerotation, bool debug, | |
| 1446 ColPartition *part) { | |
| 1447 const TBOX &part_box = part->bounding_box(); | |
| 1448 if (debug) { | |
| 1449 tprintf("Smooothing part at:"); | |
| 1450 part_box.print(); | |
| 1451 } | |
| 1452 BlobRegionType best_type = BRT_UNKNOWN; | |
| 1453 int best_dist = INT32_MAX; | |
| 1454 int max_dist = std::min(part_box.width(), part_box.height()); | |
| 1455 max_dist = std::max(max_dist * kMaxNeighbourDistFactor, gridsize() * 2); | |
| 1456 // Search with the pad truncated on each side of the box in turn. | |
| 1457 bool any_image = false; | |
| 1458 bool all_image = true; | |
| 1459 for (int d = 0; d < BND_COUNT; ++d) { | |
| 1460 int dist; | |
| 1461 auto dir = static_cast<BlobNeighbourDir>(d); | |
| 1462 BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box, | |
| 1463 rerotation, debug, *part, &dist); | |
| 1464 if (debug) { | |
| 1465 tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist); | |
| 1466 } | |
| 1467 if (type != BRT_UNKNOWN && dist < best_dist) { | |
| 1468 best_dist = dist; | |
| 1469 best_type = type; | |
| 1470 } | |
| 1471 if (type == BRT_POLYIMAGE) { | |
| 1472 any_image = true; | |
| 1473 } else { | |
| 1474 all_image = false; | |
| 1475 } | |
| 1476 } | |
| 1477 if (best_dist > max_dist) { | |
| 1478 return false; // Too far away to set the type with it. | |
| 1479 } | |
| 1480 if (part->flow() == BTFT_STRONG_CHAIN && !all_image) { | |
| 1481 return false; // We are not modifying it. | |
| 1482 } | |
| 1483 BlobRegionType new_type = part->blob_type(); | |
| 1484 BlobTextFlowType new_flow = part->flow(); | |
| 1485 if (best_type == BRT_TEXT && !any_image) { | |
| 1486 new_flow = BTFT_STRONG_CHAIN; | |
| 1487 new_type = BRT_TEXT; | |
| 1488 } else if (best_type == BRT_VERT_TEXT && !any_image) { | |
| 1489 new_flow = BTFT_STRONG_CHAIN; | |
| 1490 new_type = BRT_VERT_TEXT; | |
| 1491 } else if (best_type == BRT_POLYIMAGE) { | |
| 1492 new_flow = BTFT_NONTEXT; | |
| 1493 new_type = BRT_UNKNOWN; | |
| 1494 } | |
| 1495 if (new_type != part->blob_type() || new_flow != part->flow()) { | |
| 1496 part->set_flow(new_flow); | |
| 1497 part->set_blob_type(new_type); | |
| 1498 part->SetBlobTypes(); | |
| 1499 if (debug) { | |
| 1500 tprintf("Modified part:"); | |
| 1501 part->Print(); | |
| 1502 } | |
| 1503 return true; | |
| 1504 } else { | |
| 1505 return false; | |
| 1506 } | |
| 1507 } | |
| 1508 | |
| 1509 // Sets up a search box based on the part_box, padded in all directions | |
| 1510 // except direction. Also setup dist_scaling to weight x,y distances according | |
| 1511 // to the given direction. | |
| 1512 static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, | |
| 1513 const TBOX &part_box, int min_padding, | |
| 1514 TBOX *search_box, ICOORD *dist_scaling) { | |
| 1515 *search_box = part_box; | |
| 1516 // Generate a pad value based on the min dimension of part_box, but at least | |
| 1517 // min_padding and then scaled by kMaxPadFactor. | |
| 1518 int padding = std::min(part_box.height(), part_box.width()); | |
| 1519 padding = std::max(padding, min_padding); | |
| 1520 padding *= kMaxPadFactor; | |
| 1521 search_box->pad(padding, padding); | |
| 1522 // Truncate the box in the appropriate direction and make the distance | |
| 1523 // metric slightly biased in the truncated direction. | |
| 1524 switch (direction) { | |
| 1525 case BND_LEFT: | |
| 1526 search_box->set_left(part_box.left()); | |
| 1527 *dist_scaling = ICOORD(2, 1); | |
| 1528 break; | |
| 1529 case BND_BELOW: | |
| 1530 search_box->set_bottom(part_box.bottom()); | |
| 1531 *dist_scaling = ICOORD(1, 2); | |
| 1532 break; | |
| 1533 case BND_RIGHT: | |
| 1534 search_box->set_right(part_box.right()); | |
| 1535 *dist_scaling = ICOORD(2, 1); | |
| 1536 break; | |
| 1537 case BND_ABOVE: | |
| 1538 search_box->set_top(part_box.top()); | |
| 1539 *dist_scaling = ICOORD(1, 2); | |
| 1540 break; | |
| 1541 default: | |
| 1542 ASSERT_HOST(false); | |
| 1543 } | |
| 1544 } | |
| 1545 | |
| 1546 // Local enum used by SmoothInOneDirection and AccumulatePartDistances | |
| 1547 // for the different types of partition neighbour. | |
| 1548 enum NeighbourPartitionType { | |
| 1549 NPT_HTEXT, // Definite horizontal text. | |
| 1550 NPT_VTEXT, // Definite vertical text. | |
| 1551 NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but | |
| 1552 // image for image and VTEXT. | |
| 1553 NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but | |
| 1554 // image for image and HTEXT. | |
| 1555 NPT_IMAGE, // Defininte non-text. | |
| 1556 NPT_COUNT // Number of array elements. | |
| 1557 }; | |
| 1558 | |
| 1559 // Executes the search for SmoothRegionType in a single direction. | |
| 1560 // Creates a bounding box that is padded in all directions except direction, | |
| 1561 // and searches it for other partitions. Finds the nearest collection of | |
| 1562 // partitions that makes a decisive result (if any) and returns the type | |
| 1563 // and the distance of the collection. If there are any pixels in the | |
| 1564 // nontext_map, then the decision is biased towards image. | |
| 1565 BlobRegionType ColPartitionGrid::SmoothInOneDirection( | |
| 1566 BlobNeighbourDir direction, Image nontext_map, const TBOX &im_box, | |
| 1567 const FCOORD &rerotation, bool debug, const ColPartition &part, | |
| 1568 int *best_distance) { | |
| 1569 // Set up a rectangle search bounded by the part. | |
| 1570 const TBOX &part_box = part.bounding_box(); | |
| 1571 TBOX search_box; | |
| 1572 ICOORD dist_scaling; | |
| 1573 ComputeSearchBoxAndScaling(direction, part_box, gridsize(), &search_box, | |
| 1574 &dist_scaling); | |
| 1575 bool image_region = ImageFind::CountPixelsInRotatedBox( | |
| 1576 search_box, im_box, rerotation, nontext_map) > 0; | |
| 1577 std::vector<int> dists[NPT_COUNT]; | |
| 1578 AccumulatePartDistances(part, dist_scaling, search_box, nontext_map, im_box, | |
| 1579 rerotation, debug, dists); | |
| 1580 // By iteratively including the next smallest distance across the vectors, | |
| 1581 // (as in a merge sort) we can use the vector indices as counts of each type | |
| 1582 // and find the nearest set of objects that give us a definite decision. | |
| 1583 unsigned counts[NPT_COUNT]; | |
| 1584 memset(counts, 0, sizeof(counts)); | |
| 1585 // If there is image in the search box, tip the balance in image's favor. | |
| 1586 int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0; | |
| 1587 BlobRegionType text_dir = part.blob_type(); | |
| 1588 BlobTextFlowType flow_type = part.flow(); | |
| 1589 int min_dist = 0; | |
| 1590 do { | |
| 1591 // Find the minimum new entry across the vectors | |
| 1592 min_dist = INT32_MAX; | |
| 1593 for (int i = 0; i < NPT_COUNT; ++i) { | |
| 1594 if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist) { | |
| 1595 min_dist = dists[i][counts[i]]; | |
| 1596 } | |
| 1597 } | |
| 1598 // Step all the indices/counts forward to include min_dist. | |
| 1599 for (int i = 0; i < NPT_COUNT; ++i) { | |
| 1600 while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist) { | |
| 1601 ++counts[i]; | |
| 1602 } | |
| 1603 } | |
| 1604 *best_distance = min_dist; | |
| 1605 if (debug) { | |
| 1606 tprintf("Totals: htext=%u+%u, vtext=%u+%u, image=%u+%u, at dist=%d\n", | |
| 1607 counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT], | |
| 1608 counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE], image_bias, min_dist); | |
| 1609 } | |
| 1610 // See if we have a decision yet. | |
| 1611 auto image_count = counts[NPT_IMAGE]; | |
| 1612 int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - | |
| 1613 (image_count + counts[NPT_WEAK_VTEXT]); | |
| 1614 int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - | |
| 1615 (image_count + counts[NPT_WEAK_HTEXT]); | |
| 1616 if (image_count > 0 && image_bias - htext_score >= kSmoothDecisionMargin && | |
| 1617 image_bias - vtext_score >= kSmoothDecisionMargin) { | |
| 1618 *best_distance = dists[NPT_IMAGE][0]; | |
| 1619 if (!dists[NPT_WEAK_VTEXT].empty() && | |
| 1620 *best_distance > dists[NPT_WEAK_VTEXT][0]) { | |
| 1621 *best_distance = dists[NPT_WEAK_VTEXT][0]; | |
| 1622 } | |
| 1623 if (!dists[NPT_WEAK_HTEXT].empty() && | |
| 1624 *best_distance > dists[NPT_WEAK_HTEXT][0]) { | |
| 1625 *best_distance = dists[NPT_WEAK_HTEXT][0]; | |
| 1626 } | |
| 1627 return BRT_POLYIMAGE; | |
| 1628 } | |
| 1629 if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && | |
| 1630 counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) { | |
| 1631 *best_distance = dists[NPT_HTEXT][0]; | |
| 1632 return BRT_TEXT; | |
| 1633 } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && | |
| 1634 counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) { | |
| 1635 *best_distance = dists[NPT_VTEXT][0]; | |
| 1636 return BRT_VERT_TEXT; | |
| 1637 } | |
| 1638 } while (min_dist < INT32_MAX); | |
| 1639 return BRT_UNKNOWN; | |
| 1640 } | |
| 1641 | |
| 1642 // Counts the partitions in the given search_box by appending the gap | |
| 1643 // distance (scaled by dist_scaling) of the part from the base_part to the | |
| 1644 // vector of the appropriate type for the partition. Prior to return, the | |
| 1645 // vectors in the dists array are sorted in increasing order. | |
| 1646 // The nontext_map (+im_box, rerotation) is used to make text invisible if | |
| 1647 // there is non-text in between. | |
| 1648 // dists must be an array of vectors of size NPT_COUNT. | |
| 1649 void ColPartitionGrid::AccumulatePartDistances( | |
| 1650 const ColPartition &base_part, const ICOORD &dist_scaling, | |
| 1651 const TBOX &search_box, Image nontext_map, const TBOX &im_box, | |
| 1652 const FCOORD &rerotation, bool debug, std::vector<int> *dists) { | |
| 1653 const TBOX &part_box = base_part.bounding_box(); | |
| 1654 ColPartitionGridSearch rsearch(this); | |
| 1655 rsearch.SetUniqueMode(true); | |
| 1656 rsearch.StartRectSearch(search_box); | |
| 1657 ColPartition *neighbour; | |
| 1658 // Search for compatible neighbours with a similar strokewidth, but not | |
| 1659 // on the other side of a tab vector. | |
| 1660 while ((neighbour = rsearch.NextRectSearch()) != nullptr) { | |
| 1661 if (neighbour->IsUnMergeableType() || | |
| 1662 !base_part.ConfirmNoTabViolation(*neighbour) || | |
| 1663 neighbour == &base_part) { | |
| 1664 continue; | |
| 1665 } | |
| 1666 TBOX nbox = neighbour->bounding_box(); | |
| 1667 BlobRegionType n_type = neighbour->blob_type(); | |
| 1668 if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && | |
| 1669 !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, | |
| 1670 nontext_map)) { | |
| 1671 continue; // Text not visible the other side of image. | |
| 1672 } | |
| 1673 if (BLOBNBOX::IsLineType(n_type)) { | |
| 1674 continue; // Don't use horizontal lines as neighbours. | |
| 1675 } | |
| 1676 int x_gap = std::max(part_box.x_gap(nbox), 0); | |
| 1677 int y_gap = std::max(part_box.y_gap(nbox), 0); | |
| 1678 int n_dist = x_gap * dist_scaling.x() + y_gap * dist_scaling.y(); | |
| 1679 if (debug) { | |
| 1680 tprintf("Part has x-gap=%d, y=%d, dist=%d at:", x_gap, y_gap, n_dist); | |
| 1681 nbox.print(); | |
| 1682 } | |
| 1683 // Truncate the number of boxes, so text doesn't get too much advantage. | |
| 1684 int n_boxes = std::min(neighbour->boxes_count(), kSmoothDecisionMargin); | |
| 1685 BlobTextFlowType n_flow = neighbour->flow(); | |
| 1686 std::vector<int> *count_vector = nullptr; | |
| 1687 if (n_flow == BTFT_STRONG_CHAIN) { | |
| 1688 if (n_type == BRT_TEXT) { | |
| 1689 count_vector = &dists[NPT_HTEXT]; | |
| 1690 } else { | |
| 1691 count_vector = &dists[NPT_VTEXT]; | |
| 1692 } | |
| 1693 if (debug) { | |
| 1694 tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes); | |
| 1695 } | |
| 1696 } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && | |
| 1697 (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) { | |
| 1698 // Medium text counts as weak, and all else counts as image. | |
| 1699 if (n_type == BRT_TEXT) { | |
| 1700 count_vector = &dists[NPT_WEAK_HTEXT]; | |
| 1701 } else { | |
| 1702 count_vector = &dists[NPT_WEAK_VTEXT]; | |
| 1703 } | |
| 1704 if (debug) { | |
| 1705 tprintf("Weak %d\n", n_boxes); | |
| 1706 } | |
| 1707 } else { | |
| 1708 count_vector = &dists[NPT_IMAGE]; | |
| 1709 if (debug) { | |
| 1710 tprintf("Image %d\n", n_boxes); | |
| 1711 } | |
| 1712 } | |
| 1713 if (count_vector != nullptr) { | |
| 1714 for (int i = 0; i < n_boxes; ++i) { | |
| 1715 count_vector->push_back(n_dist); | |
| 1716 } | |
| 1717 } | |
| 1718 if (debug) { | |
| 1719 neighbour->Print(); | |
| 1720 } | |
| 1721 } | |
| 1722 for (int i = 0; i < NPT_COUNT; ++i) { | |
| 1723 std::sort(dists[i].begin(), dists[i].end()); | |
| 1724 } | |
| 1725 } | |
| 1726 | |
| 1727 // Improves the margins of the part ColPartition by searching for | |
| 1728 // neighbours that vertically overlap significantly. | |
| 1729 // columns may be nullptr, and indicates the assigned column structure this | |
| 1730 // is applicable to part. | |
| 1731 void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns, | |
| 1732 ColPartition *part) { | |
| 1733 // Set up a rectangle search x-bounded by the column and y by the part. | |
| 1734 TBOX box = part->bounding_box(); | |
| 1735 int y = part->MidY(); | |
| 1736 // Initial left margin is based on the column, if there is one. | |
| 1737 int left_margin = bleft().x(); | |
| 1738 int right_margin = tright().x(); | |
| 1739 if (columns != nullptr) { | |
| 1740 ColPartition *column = columns->ColumnContaining(box.left(), y); | |
| 1741 if (column != nullptr) { | |
| 1742 left_margin = column->LeftAtY(y); | |
| 1743 } | |
| 1744 column = columns->ColumnContaining(box.right(), y); | |
| 1745 if (column != nullptr) { | |
| 1746 right_margin = column->RightAtY(y); | |
| 1747 } | |
| 1748 } | |
| 1749 left_margin -= kColumnWidthFactor; | |
| 1750 right_margin += kColumnWidthFactor; | |
| 1751 // Search for ColPartitions that reduce the margin. | |
| 1752 left_margin = FindMargin(box.left() + box.height(), true, left_margin, | |
| 1753 box.bottom(), box.top(), part); | |
| 1754 part->set_left_margin(left_margin); | |
| 1755 // Search for ColPartitions that reduce the margin. | |
| 1756 right_margin = FindMargin(box.right() - box.height(), false, right_margin, | |
| 1757 box.bottom(), box.top(), part); | |
| 1758 part->set_right_margin(right_margin); | |
| 1759 } | |
| 1760 | |
| 1761 // Starting at x, and going in the specified direction, up to x_limit, finds | |
| 1762 // the margin for the given y range by searching sideways, | |
| 1763 // and ignoring not_this. | |
| 1764 int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, | |
| 1765 int y_bottom, int y_top, | |
| 1766 const ColPartition *not_this) { | |
| 1767 int height = y_top - y_bottom; | |
| 1768 // Iterate the ColPartitions in the grid. | |
| 1769 ColPartitionGridSearch side_search(this); | |
| 1770 side_search.SetUniqueMode(true); | |
| 1771 side_search.StartSideSearch(x, y_bottom, y_top); | |
| 1772 ColPartition *part; | |
| 1773 while ((part = side_search.NextSideSearch(right_to_left)) != nullptr) { | |
| 1774 // Ignore itself. | |
| 1775 if (part == not_this) { // || part->IsLineType()) | |
| 1776 continue; | |
| 1777 } | |
| 1778 // Must overlap by enough, based on the min of the heights, so | |
| 1779 // large partitions can't smash through small ones. | |
| 1780 TBOX box = part->bounding_box(); | |
| 1781 int min_overlap = std::min(height, static_cast<int>(box.height())); | |
| 1782 min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5); | |
| 1783 int y_overlap = std::min(y_top, static_cast<int>(box.top())) - | |
| 1784 std::max(y_bottom, static_cast<int>(box.bottom())); | |
| 1785 if (y_overlap < min_overlap) { | |
| 1786 continue; | |
| 1787 } | |
| 1788 // Must be going the right way. | |
| 1789 int x_edge = right_to_left ? box.right() : box.left(); | |
| 1790 if ((x_edge < x) != right_to_left) { | |
| 1791 continue; | |
| 1792 } | |
| 1793 // If we have gone past x_limit, then x_limit will do. | |
| 1794 if ((x_edge < x_limit) == right_to_left) { | |
| 1795 break; | |
| 1796 } | |
| 1797 // It reduces x limit, so save the new one. | |
| 1798 x_limit = x_edge; | |
| 1799 } | |
| 1800 return x_limit; | |
| 1801 } | |
| 1802 | |
| 1803 } // namespace tesseract. |
