Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/colfind.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: colfind.cpp | |
| 3 // Description: Class to hold BLOBNBOXs in a grid for fast access | |
| 4 // to neighbours. | |
| 5 // Author: Ray Smith | |
| 6 // | |
| 7 // (C) Copyright 2007, Google Inc. | |
| 8 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 // you may not use this file except in compliance with the License. | |
| 10 // You may obtain a copy of the License at | |
| 11 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 // Unless required by applicable law or agreed to in writing, software | |
| 13 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 // See the License for the specific language governing permissions and | |
| 16 // limitations under the License. | |
| 17 // | |
| 18 /////////////////////////////////////////////////////////////////////// | |
| 19 | |
| 20 // Include automatically generated configuration file if running autoconf. | |
| 21 #ifdef HAVE_CONFIG_H | |
| 22 # include "config_auto.h" | |
| 23 #endif | |
| 24 | |
| 25 #include "colfind.h" | |
| 26 | |
| 27 #include "ccnontextdetect.h" | |
| 28 #include "colpartition.h" | |
| 29 #include "colpartitionset.h" | |
| 30 #ifndef DISABLED_LEGACY_ENGINE | |
| 31 # include "equationdetectbase.h" | |
| 32 #endif | |
| 33 #include "blobbox.h" | |
| 34 #include "linefind.h" | |
| 35 #include "normalis.h" | |
| 36 #include "params.h" | |
| 37 #include "scrollview.h" | |
| 38 #include "strokewidth.h" | |
| 39 #include "tablefind.h" | |
| 40 #include "workingpartset.h" | |
| 41 | |
| 42 #include <algorithm> | |
| 43 | |
| 44 namespace tesseract { | |
| 45 | |
| 46 // When assigning columns, the max number of misfit grid rows/ColPartitionSets | |
| 47 // that can be ignored. | |
| 48 const int kMaxIncompatibleColumnCount = 2; | |
| 49 // Max fraction of mean_column_gap_ for the gap between two partitions within a | |
| 50 // column to allow them to merge. | |
| 51 const double kHorizontalGapMergeFraction = 0.5; | |
| 52 // Minimum gutter width as a fraction of gridsize | |
| 53 const double kMinGutterWidthGrid = 0.5; | |
| 54 // Max multiple of a partition's median size as a distance threshold for | |
| 55 // adding noise blobs. | |
| 56 const double kMaxDistToPartSizeRatio = 1.5; | |
| 57 | |
| 58 #ifndef GRAPHICS_DISABLED | |
| 59 static BOOL_VAR(textord_tabfind_show_initial_partitions, false, "Show partition bounds"); | |
| 60 static BOOL_VAR(textord_tabfind_show_reject_blobs, false, "Show blobs rejected as noise"); | |
| 61 static INT_VAR(textord_tabfind_show_partitions, 0, | |
| 62 "Show partition bounds, waiting if >1 (ScrollView)"); | |
| 63 static BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds (ScrollView)"); | |
| 64 static BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds (ScrollView)"); | |
| 65 #endif | |
| 66 static BOOL_VAR(textord_tabfind_find_tables, true, "run table detection"); | |
| 67 | |
| 68 #ifndef GRAPHICS_DISABLED | |
| 69 ScrollView *ColumnFinder::blocks_win_ = nullptr; | |
| 70 #endif | |
| 71 | |
| 72 // Gridsize is an estimate of the text size in the image. A suitable value | |
| 73 // is in TO_BLOCK::line_size after find_components has been used to make | |
| 74 // the blobs. | |
| 75 // bleft and tright are the bounds of the image (or rectangle) being processed. | |
| 76 // vlines is a (possibly empty) list of TabVector and vertical_x and y are | |
| 77 // the sum logical vertical vector produced by LineFinder::FindVerticalLines. | |
| 78 ColumnFinder::ColumnFinder(int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, | |
| 79 bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, | |
| 80 TabVector_LIST *hlines, int vertical_x, int vertical_y) | |
| 81 : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, resolution) | |
| 82 , cjk_script_(cjk_script) | |
| 83 , min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)) | |
| 84 , mean_column_gap_(tright.x() - bleft.x()) | |
| 85 , tabfind_aligned_gap_fraction_(aligned_gap_fraction) | |
| 86 , deskew_(0.0f, 0.0f) | |
| 87 , reskew_(1.0f, 0.0f) | |
| 88 , rotation_(1.0f, 0.0f) | |
| 89 , rerotate_(1.0f, 0.0f) | |
| 90 , text_rotation_(0.0f, 0.0f) | |
| 91 , best_columns_(nullptr) | |
| 92 , stroke_width_(nullptr) | |
| 93 , part_grid_(gridsize, bleft, tright) | |
| 94 , nontext_map_(nullptr) | |
| 95 , projection_(resolution) | |
| 96 , denorm_(nullptr) | |
| 97 , equation_detect_(nullptr) { | |
| 98 TabVector_IT h_it(&horizontal_lines_); | |
| 99 h_it.add_list_after(hlines); | |
| 100 } | |
| 101 | |
| 102 ColumnFinder::~ColumnFinder() { | |
| 103 for (auto set : column_sets_) { | |
| 104 delete set; | |
| 105 } | |
| 106 delete[] best_columns_; | |
| 107 delete stroke_width_; | |
| 108 #ifndef GRAPHICS_DISABLED | |
| 109 delete input_blobs_win_; | |
| 110 #endif | |
| 111 nontext_map_.destroy(); | |
| 112 while (denorm_ != nullptr) { | |
| 113 DENORM *dead_denorm = denorm_; | |
| 114 denorm_ = const_cast<DENORM *>(denorm_->predecessor()); | |
| 115 delete dead_denorm; | |
| 116 } | |
| 117 | |
| 118 // The ColPartitions are destroyed automatically, but any boxes in | |
| 119 // the noise_parts_ list are owned and need to be deleted explicitly. | |
| 120 ColPartition_IT part_it(&noise_parts_); | |
| 121 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { | |
| 122 ColPartition *part = part_it.data(); | |
| 123 part->DeleteBoxes(); | |
| 124 } | |
| 125 // Likewise any boxes in the good_parts_ list need to be deleted. | |
| 126 // These are just the image parts. Text parts have already given their | |
| 127 // boxes on to the TO_BLOCK, and have empty lists. | |
| 128 part_it.set_to_list(&good_parts_); | |
| 129 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { | |
| 130 ColPartition *part = part_it.data(); | |
| 131 part->DeleteBoxes(); | |
| 132 } | |
| 133 // Also, any blobs on the image_bblobs_ list need to have their cblobs | |
| 134 // deleted. This only happens if there has been an early return from | |
| 135 // FindColumns, as in a normal return, the blobs go into the grid and | |
| 136 // end up in noise_parts_, good_parts_ or the output blocks. | |
| 137 BLOBNBOX_IT bb_it(&image_bblobs_); | |
| 138 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { | |
| 139 BLOBNBOX *bblob = bb_it.data(); | |
| 140 delete bblob->cblob(); | |
| 141 } | |
| 142 } | |
| 143 | |
| 144 // Performs initial processing on the blobs in the input_block: | |
| 145 // Setup the part_grid_, stroke_width_, nontext_map. | |
| 146 // Obvious noise blobs are filtered out and used to mark the nontext_map_. | |
| 147 // Initial stroke-width analysis is used to get local text alignment | |
| 148 // direction, so the textline projection_ map can be setup. | |
| 149 // On return, IsVerticallyAlignedText may be called (now optionally) to | |
| 150 // determine the gross textline alignment of the page. | |
| 151 void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, | |
| 152 TO_BLOCK *input_block) { | |
| 153 part_grid_.Init(gridsize(), bleft(), tright()); | |
| 154 delete stroke_width_; | |
| 155 stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); | |
| 156 min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize()); | |
| 157 input_block->ReSetAndReFilterBlobs(); | |
| 158 #ifndef GRAPHICS_DISABLED | |
| 159 if (textord_tabfind_show_blocks) { | |
| 160 input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); | |
| 161 input_block->plot_graded_blobs(input_blobs_win_); | |
| 162 } | |
| 163 #endif // !GRAPHICS_DISABLED | |
| 164 SetBlockRuleEdges(input_block); | |
| 165 nontext_map_.destroy(); | |
| 166 // Run a preliminary strokewidth neighbour detection on the medium blobs. | |
| 167 stroke_width_->SetNeighboursOnMediumBlobs(input_block); | |
| 168 CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); | |
| 169 // Remove obvious noise and make the initial non-text map. | |
| 170 nontext_map_ = | |
| 171 nontext_detect.ComputeNonTextMask(textord_debug_tabfind, photo_mask_pix, input_block); | |
| 172 stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_, input_block); | |
| 173 // Clear the strokewidth grid ready for rotation or leader finding. | |
| 174 stroke_width_->Clear(); | |
| 175 } | |
| 176 | |
| 177 // Tests for vertical alignment of text (returning true if so), and generates | |
| 178 // a list of blobs of moderate aspect ratio, in the most frequent writing | |
| 179 // direction (in osd_blobs) for orientation and script detection to test | |
| 180 // the character orientation. | |
| 181 // block is the single block for the whole page or rectangle to be OCRed. | |
| 182 // Note that the vertical alignment may be due to text whose writing direction | |
| 183 // is vertical, like say Japanese, or due to text whose writing direction is | |
| 184 // horizontal but whose text appears vertically aligned because the image is | |
| 185 // not the right way up. | |
| 186 bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, | |
| 187 BLOBNBOX_CLIST *osd_blobs) { | |
| 188 return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio, block, osd_blobs); | |
| 189 } | |
| 190 | |
| 191 // Rotates the blobs and the TabVectors so that the gross writing direction | |
| 192 // (text lines) are horizontal and lines are read down the page. | |
| 193 // Applied rotation stored in rotation_. | |
| 194 // A second rotation is calculated for application during recognition to | |
| 195 // make the rotated blobs upright for recognition. | |
| 196 // Subsequent rotation stored in text_rotation_. | |
| 197 // | |
| 198 // Arguments: | |
| 199 // vertical_text_lines true if the text lines are vertical. | |
| 200 // recognition_rotation [0..3] is the number of anti-clockwise 90 degree | |
| 201 // rotations from osd required for the text to be upright and readable. | |
| 202 void ColumnFinder::CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, | |
| 203 int recognition_rotation) { | |
| 204 const FCOORD anticlockwise90(0.0f, 1.0f); | |
| 205 const FCOORD clockwise90(0.0f, -1.0f); | |
| 206 const FCOORD rotation180(-1.0f, 0.0f); | |
| 207 const FCOORD norotation(1.0f, 0.0f); | |
| 208 | |
| 209 text_rotation_ = norotation; | |
| 210 // Rotate the page to make the text upright, as implied by | |
| 211 // recognition_rotation. | |
| 212 rotation_ = norotation; | |
| 213 if (recognition_rotation == 1) { | |
| 214 rotation_ = anticlockwise90; | |
| 215 } else if (recognition_rotation == 2) { | |
| 216 rotation_ = rotation180; | |
| 217 } else if (recognition_rotation == 3) { | |
| 218 rotation_ = clockwise90; | |
| 219 } | |
| 220 // We infer text writing direction to be vertical if there are several | |
| 221 // vertical text lines detected, and horizontal if not. But if the page | |
| 222 // orientation was determined to be 90 or 270 degrees, the true writing | |
| 223 // direction is the opposite of what we inferred. | |
| 224 if (recognition_rotation & 1) { | |
| 225 vertical_text_lines = !vertical_text_lines; | |
| 226 } | |
| 227 // If we still believe the writing direction is vertical, we use the | |
| 228 // convention of rotating the page ccw 90 degrees to make the text lines | |
| 229 // horizontal, and mark the blobs for rotation cw 90 degrees for | |
| 230 // classification so that the text order is correct after recognition. | |
| 231 if (vertical_text_lines) { | |
| 232 rotation_.rotate(anticlockwise90); | |
| 233 text_rotation_.rotate(clockwise90); | |
| 234 } | |
| 235 // Set rerotate_ to the inverse of rotation_. | |
| 236 rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); | |
| 237 if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { | |
| 238 // Rotate all the blobs and tab vectors. | |
| 239 RotateBlobList(rotation_, &block->large_blobs); | |
| 240 RotateBlobList(rotation_, &block->blobs); | |
| 241 RotateBlobList(rotation_, &block->small_blobs); | |
| 242 RotateBlobList(rotation_, &block->noise_blobs); | |
| 243 TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, &min_gutter_width_); | |
| 244 part_grid_.Init(gridsize(), bleft(), tright()); | |
| 245 // Reset all blobs to initial state and filter by size. | |
| 246 // Since they have rotated, the list they belong on could have changed. | |
| 247 block->ReSetAndReFilterBlobs(); | |
| 248 SetBlockRuleEdges(block); | |
| 249 stroke_width_->CorrectForRotation(rerotate_, &part_grid_); | |
| 250 } | |
| 251 if (textord_debug_tabfind) { | |
| 252 tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", vertical_text_lines, | |
| 253 recognition_rotation, rotation_.x(), rotation_.y(), text_rotation_.x(), | |
| 254 text_rotation_.y()); | |
| 255 } | |
| 256 // Setup the denormalization. | |
| 257 ASSERT_HOST(denorm_ == nullptr); | |
| 258 denorm_ = new DENORM; | |
| 259 denorm_->SetupNormalization(nullptr, &rotation_, nullptr, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); | |
| 260 } | |
| 261 | |
| 262 // Finds blocks of text, image, rule line, table etc, returning them in the | |
| 263 // blocks and to_blocks | |
| 264 // (Each TO_BLOCK points to the basic BLOCK and adds more information.) | |
| 265 // Image blocks are generated by a combination of photo_mask_pix (which may | |
| 266 // NOT be nullptr) and the rejected text found during preliminary textline | |
| 267 // finding. | |
| 268 // The input_block is the result of a call to find_components, and contains | |
| 269 // the blobs found in the image or rectangle to be OCRed. These blobs will be | |
| 270 // removed and placed in the output blocks, while unused ones will be deleted. | |
| 271 // If single_column is true, the input is treated as single column, but | |
| 272 // it is still divided into blocks of equal line spacing/text size. | |
| 273 // scaled_color is scaled down by scaled_factor from the input color image, | |
| 274 // and may be nullptr if the input was not color. | |
| 275 // grey_pix is optional, but if present must match the photo_mask_pix in size, | |
| 276 // and must be a *real* grey image instead of binary_pix * 255. | |
| 277 // thresholds_pix is expected to be present iff grey_pix is present and | |
| 278 // can be an integer factor reduction of the grey_pix. It represents the | |
| 279 // thresholds that were used to create the binary_pix from the grey_pix. | |
| 280 // If diacritic_blobs is non-null, then diacritics/noise blobs, that would | |
| 281 // confuse layout analysis by causing textline overlap, are placed there, | |
| 282 // with the expectation that they will be reassigned to words later and | |
| 283 // noise/diacriticness determined via classification. | |
| 284 // Returns -1 if the user hits the 'd' key in the blocks window while running | |
| 285 // in debug mode, which requests a retry with more debug info. | |
| 286 int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, | |
| 287 TO_BLOCK *input_block, Image photo_mask_pix, Image thresholds_pix, | |
| 288 Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, | |
| 289 BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks) { | |
| 290 photo_mask_pix |= nontext_map_; | |
| 291 stroke_width_->FindLeaderPartitions(input_block, &part_grid_); | |
| 292 stroke_width_->RemoveLineResidue(&big_parts_); | |
| 293 FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_, input_block); | |
| 294 SetBlockRuleEdges(input_block); | |
| 295 stroke_width_->GradeBlobsIntoPartitions(pageseg_mode, rerotate_, input_block, nontext_map_, | |
| 296 denorm_, cjk_script_, &projection_, diacritic_blobs, | |
| 297 &part_grid_, &big_parts_); | |
| 298 if (!PSM_SPARSE(pageseg_mode)) { | |
| 299 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this, | |
| 300 pixa_debug, &part_grid_, &big_parts_); | |
| 301 ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, photo_mask_pix); | |
| 302 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this, | |
| 303 pixa_debug, &part_grid_, &big_parts_); | |
| 304 } | |
| 305 part_grid_.ReTypeBlobs(&image_bblobs_); | |
| 306 TidyBlobs(input_block); | |
| 307 Reset(); | |
| 308 // TODO(rays) need to properly handle big_parts_. | |
| 309 ColPartition_IT p_it(&big_parts_); | |
| 310 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { | |
| 311 p_it.data()->DisownBoxesNoAssert(); | |
| 312 } | |
| 313 big_parts_.clear(); | |
| 314 delete stroke_width_; | |
| 315 stroke_width_ = nullptr; | |
| 316 // Compute the edge offsets whether or not there is a grey_pix. It is done | |
| 317 // here as the c_blobs haven't been touched by rotation or anything yet, | |
| 318 // so no denorm is required, yet the text has been separated from image, so | |
| 319 // no time is wasted running it on image blobs. | |
| 320 input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); | |
| 321 | |
| 322 // A note about handling right-to-left scripts (Hebrew/Arabic): | |
| 323 // The columns must be reversed and come out in right-to-left instead of | |
| 324 // the normal left-to-right order. Because the left-to-right ordering | |
| 325 // is implicit in many data structures, it is simpler to fool the algorithms | |
| 326 // into thinking they are dealing with left-to-right text. | |
| 327 // To do this, we reflect the needed data in the y-axis and then reflect | |
| 328 // the blocks back after they have been created. This is a temporary | |
| 329 // arrangement that is confined to this function only, so the reflection | |
| 330 // is completely invisible in the output blocks. | |
| 331 // The only objects reflected are: | |
| 332 // The vertical separator lines that have already been found; | |
| 333 // The bounding boxes of all BLOBNBOXES on all lists on the input_block | |
| 334 // plus the image_bblobs. The outlines are not touched, since they are | |
| 335 // not looked at. | |
| 336 bool input_is_rtl = input_block->block->right_to_left(); | |
| 337 if (input_is_rtl) { | |
| 338 // Reflect the vertical separator lines (member of TabFind). | |
| 339 ReflectInYAxis(); | |
| 340 // Reflect the blob boxes. | |
| 341 ReflectForRtl(input_block, &image_bblobs_); | |
| 342 part_grid_.ReflectInYAxis(); | |
| 343 } | |
| 344 | |
| 345 if (!PSM_SPARSE(pageseg_mode)) { | |
| 346 if (!PSM_COL_FIND_ENABLED(pageseg_mode)) { | |
| 347 // No tab stops needed. Just the grid that FindTabVectors makes. | |
| 348 DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_); | |
| 349 } else { | |
| 350 SetBlockRuleEdges(input_block); | |
| 351 // Find the tab stops, estimate skew, and deskew the tabs, blobs and | |
| 352 // part_grid_. | |
| 353 FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, min_gutter_width_, | |
| 354 tabfind_aligned_gap_fraction_, &part_grid_, &deskew_, &reskew_); | |
| 355 // Add the deskew to the denorm_. | |
| 356 auto *new_denorm = new DENORM; | |
| 357 new_denorm->SetupNormalization(nullptr, &deskew_, denorm_, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, | |
| 358 0.0f); | |
| 359 denorm_ = new_denorm; | |
| 360 } | |
| 361 SetBlockRuleEdges(input_block); | |
| 362 part_grid_.SetTabStops(this); | |
| 363 | |
| 364 // Make the column_sets_. | |
| 365 if (!MakeColumns(false)) { | |
| 366 tprintf("Empty page!!\n"); | |
| 367 part_grid_.DeleteParts(); | |
| 368 return 0; // This is an empty page. | |
| 369 } | |
| 370 | |
| 371 // Refill the grid using rectangular spreading, and get the benefit | |
| 372 // of the completed tab vectors marking the rule edges of each blob. | |
| 373 Clear(); | |
| 374 #ifndef GRAPHICS_DISABLED | |
| 375 if (textord_tabfind_show_reject_blobs) { | |
| 376 ScrollView *rej_win = MakeWindow(500, 300, "Rejected blobs"); | |
| 377 input_block->plot_graded_blobs(rej_win); | |
| 378 } | |
| 379 #endif // !GRAPHICS_DISABLED | |
| 380 InsertBlobsToGrid(false, false, &image_bblobs_, this); | |
| 381 InsertBlobsToGrid(true, true, &input_block->blobs, this); | |
| 382 | |
| 383 part_grid_.GridFindMargins(best_columns_); | |
| 384 // Split and merge the partitions by looking at local neighbours. | |
| 385 GridSplitPartitions(); | |
| 386 // Resolve unknown partitions by adding to an existing partition, fixing | |
| 387 // the type, or declaring them noise. | |
| 388 part_grid_.GridFindMargins(best_columns_); | |
| 389 GridMergePartitions(); | |
| 390 // Insert any unused noise blobs that are close enough to an appropriate | |
| 391 // partition. | |
| 392 InsertRemainingNoise(input_block); | |
| 393 // Add horizontal line separators as partitions. | |
| 394 GridInsertHLinePartitions(); | |
| 395 GridInsertVLinePartitions(); | |
| 396 // Recompute margins based on a local neighbourhood search. | |
| 397 part_grid_.GridFindMargins(best_columns_); | |
| 398 SetPartitionTypes(); | |
| 399 } | |
| 400 #ifndef GRAPHICS_DISABLED | |
| 401 if (textord_tabfind_show_initial_partitions) { | |
| 402 ScrollView *part_win = MakeWindow(100, 300, "InitialPartitions"); | |
| 403 part_grid_.DisplayBoxes(part_win); | |
| 404 DisplayTabVectors(part_win); | |
| 405 } | |
| 406 #endif | |
| 407 if (!PSM_SPARSE(pageseg_mode)) { | |
| 408 #ifndef DISABLED_LEGACY_ENGINE | |
| 409 if (equation_detect_) { | |
| 410 equation_detect_->FindEquationParts(&part_grid_, best_columns_); | |
| 411 } | |
| 412 #endif | |
| 413 if (textord_tabfind_find_tables) { | |
| 414 TableFinder table_finder; | |
| 415 table_finder.Init(gridsize(), bleft(), tright()); | |
| 416 table_finder.set_resolution(resolution_); | |
| 417 table_finder.set_left_to_right_language(!input_block->block->right_to_left()); | |
| 418 // Copy cleaned partitions from part_grid_ to clean_part_grid_ and | |
| 419 // insert dot-like noise into period_grid_ | |
| 420 table_finder.InsertCleanPartitions(&part_grid_, input_block); | |
| 421 // Get Table Regions | |
| 422 table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); | |
| 423 } | |
| 424 GridRemoveUnderlinePartitions(); | |
| 425 part_grid_.DeleteUnknownParts(input_block); | |
| 426 | |
| 427 // Build the partitions into chains that belong in the same block and | |
| 428 // refine into one-to-one links, then smooth the types within each chain. | |
| 429 part_grid_.FindPartitionPartners(); | |
| 430 part_grid_.FindFigureCaptions(); | |
| 431 part_grid_.RefinePartitionPartners(true); | |
| 432 SmoothPartnerRuns(); | |
| 433 | |
| 434 #ifndef GRAPHICS_DISABLED | |
| 435 if (textord_tabfind_show_partitions) { | |
| 436 ScrollView *window = MakeWindow(400, 300, "Partitions"); | |
| 437 if (window != nullptr) { | |
| 438 part_grid_.DisplayBoxes(window); | |
| 439 if (!textord_debug_printable) { | |
| 440 DisplayTabVectors(window); | |
| 441 } | |
| 442 if (window != nullptr && textord_tabfind_show_partitions > 1) { | |
| 443 window->AwaitEvent(SVET_DESTROY); | |
| 444 } | |
| 445 } | |
| 446 } | |
| 447 #endif // !GRAPHICS_DISABLED | |
| 448 part_grid_.AssertNoDuplicates(); | |
| 449 } | |
| 450 // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, | |
| 451 // and ownership of the BLOBNBOXes moves to the ColPartitions. | |
| 452 // (They were previously owned by the block or the image_bblobs list.) | |
| 453 ReleaseBlobsAndCleanupUnused(input_block); | |
| 454 // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and | |
| 455 // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves | |
| 456 // from the ColPartitions to the output TO_BLOCK. In non-text, the | |
| 457 // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor. | |
| 458 if (PSM_SPARSE(pageseg_mode)) { | |
| 459 part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks); | |
| 460 } else { | |
| 461 TransformToBlocks(blocks, to_blocks); | |
| 462 } | |
| 463 if (textord_debug_tabfind) { | |
| 464 tprintf("Found %d blocks, %d to_blocks\n", blocks->length(), to_blocks->length()); | |
| 465 } | |
| 466 | |
| 467 #ifndef GRAPHICS_DISABLED | |
| 468 if (textord_tabfind_show_blocks) { | |
| 469 DisplayBlocks(blocks); | |
| 470 } | |
| 471 #endif | |
| 472 RotateAndReskewBlocks(input_is_rtl, to_blocks); | |
| 473 int result = 0; | |
| 474 #ifndef GRAPHICS_DISABLED | |
| 475 if (blocks_win_ != nullptr) { | |
| 476 bool waiting = false; | |
| 477 do { | |
| 478 waiting = false; | |
| 479 auto event = blocks_win_->AwaitEvent(SVET_ANY); | |
| 480 if (event->type == SVET_INPUT && event->parameter != nullptr) { | |
| 481 if (*event->parameter == 'd') { | |
| 482 result = -1; | |
| 483 } else { | |
| 484 blocks->clear(); | |
| 485 } | |
| 486 } else if (event->type == SVET_DESTROY) { | |
| 487 blocks_win_ = nullptr; | |
| 488 } else { | |
| 489 waiting = true; | |
| 490 } | |
| 491 } while (waiting); | |
| 492 } | |
| 493 #endif // !GRAPHICS_DISABLED | |
| 494 return result; | |
| 495 } | |
| 496 | |
| 497 // Get the rotation required to deskew, and its inverse rotation. | |
| 498 void ColumnFinder::GetDeskewVectors(FCOORD *deskew, FCOORD *reskew) { | |
| 499 *reskew = reskew_; | |
| 500 *deskew = reskew_; | |
| 501 deskew->set_y(-deskew->y()); | |
| 502 } | |
| 503 | |
| 504 #ifndef DISABLED_LEGACY_ENGINE | |
| 505 void ColumnFinder::SetEquationDetect(EquationDetectBase *detect) { | |
| 506 equation_detect_ = detect; | |
| 507 } | |
| 508 #endif | |
| 509 | |
| 510 //////////////// PRIVATE CODE ///////////////////////// | |
| 511 | |
| 512 #ifndef GRAPHICS_DISABLED | |
| 513 | |
| 514 // Displays the blob and block bounding boxes in a window called Blocks. | |
| 515 void ColumnFinder::DisplayBlocks(BLOCK_LIST *blocks) { | |
| 516 if (blocks_win_ == nullptr) { | |
| 517 blocks_win_ = MakeWindow(700, 300, "Blocks"); | |
| 518 } else { | |
| 519 blocks_win_->Clear(); | |
| 520 } | |
| 521 DisplayBoxes(blocks_win_); | |
| 522 BLOCK_IT block_it(blocks); | |
| 523 int serial = 1; | |
| 524 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { | |
| 525 BLOCK *block = block_it.data(); | |
| 526 block->pdblk.plot(blocks_win_, serial++, | |
| 527 textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); | |
| 528 } | |
| 529 blocks_win_->Update(); | |
| 530 } | |
| 531 | |
| 532 // Displays the column edges at each grid y coordinate defined by | |
| 533 // best_columns_. | |
| 534 void ColumnFinder::DisplayColumnBounds(PartSetVector *sets) { | |
| 535 ScrollView *col_win = MakeWindow(50, 300, "Columns"); | |
| 536 DisplayBoxes(col_win); | |
| 537 col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); | |
| 538 for (int i = 0; i < gridheight_; ++i) { | |
| 539 ColPartitionSet *columns = best_columns_[i]; | |
| 540 if (columns != nullptr) { | |
| 541 columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win); | |
| 542 } | |
| 543 } | |
| 544 } | |
| 545 | |
| 546 #endif // !GRAPHICS_DISABLED | |
| 547 | |
| 548 // Sets up column_sets_ (the determined column layout at each horizontal | |
| 549 // slice). Returns false if the page is empty. | |
| 550 bool ColumnFinder::MakeColumns(bool single_column) { | |
| 551 // The part_sets_ are a temporary structure used during column creation, | |
| 552 // and is a vector of ColPartitionSets, representing ColPartitions found | |
| 553 // at horizontal slices through the page. | |
| 554 PartSetVector part_sets; | |
| 555 if (!single_column) { | |
| 556 if (!part_grid_.MakeColPartSets(&part_sets)) { | |
| 557 return false; // Empty page. | |
| 558 } | |
| 559 ASSERT_HOST(part_grid_.gridheight() == gridheight_); | |
| 560 // Try using only the good parts first. | |
| 561 bool good_only = true; | |
| 562 do { | |
| 563 for (int i = 0; i < gridheight_; ++i) { | |
| 564 ColPartitionSet *line_set = part_sets.at(i); | |
| 565 if (line_set != nullptr && line_set->LegalColumnCandidate()) { | |
| 566 ColPartitionSet *column_candidate = line_set->Copy(good_only); | |
| 567 if (column_candidate != nullptr) { | |
| 568 column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); | |
| 569 } | |
| 570 } | |
| 571 } | |
| 572 good_only = !good_only; | |
| 573 } while (column_sets_.empty() && !good_only); | |
| 574 if (textord_debug_tabfind) { | |
| 575 PrintColumnCandidates("Column candidates"); | |
| 576 } | |
| 577 // Improve the column candidates against themselves. | |
| 578 ImproveColumnCandidates(&column_sets_, &column_sets_); | |
| 579 if (textord_debug_tabfind) { | |
| 580 PrintColumnCandidates("Improved columns"); | |
| 581 } | |
| 582 // Improve the column candidates using the part_sets_. | |
| 583 ImproveColumnCandidates(&part_sets, &column_sets_); | |
| 584 } | |
| 585 ColPartitionSet *single_column_set = part_grid_.MakeSingleColumnSet(WidthCB()); | |
| 586 if (single_column_set != nullptr) { | |
| 587 // Always add the single column set as a backup even if not in | |
| 588 // single column mode. | |
| 589 single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); | |
| 590 } | |
| 591 if (textord_debug_tabfind) { | |
| 592 PrintColumnCandidates("Final Columns"); | |
| 593 } | |
| 594 bool has_columns = !column_sets_.empty(); | |
| 595 if (has_columns) { | |
| 596 // Divide the page into sections of uniform column layout. | |
| 597 bool any_multi_column = AssignColumns(part_sets); | |
| 598 #ifndef GRAPHICS_DISABLED | |
| 599 if (textord_tabfind_show_columns) { | |
| 600 DisplayColumnBounds(&part_sets); | |
| 601 } | |
| 602 #endif | |
| 603 ComputeMeanColumnGap(any_multi_column); | |
| 604 } | |
| 605 for (auto line_set : part_sets) { | |
| 606 if (line_set != nullptr) { | |
| 607 line_set->RelinquishParts(); | |
| 608 delete line_set; | |
| 609 } | |
| 610 } | |
| 611 return has_columns; | |
| 612 } | |
| 613 | |
| 614 // Attempt to improve the column_candidates by expanding the columns | |
| 615 // and adding new partitions from the partition sets in src_sets. | |
| 616 // Src_sets may be equal to column_candidates, in which case it will | |
| 617 // use them as a source to improve themselves. | |
| 618 void ColumnFinder::ImproveColumnCandidates(PartSetVector *src_sets, PartSetVector *column_sets) { | |
| 619 // TODO: optimize. | |
| 620 PartSetVector temp_cols = *column_sets; | |
| 621 column_sets->clear(); | |
| 622 if (src_sets == column_sets) { | |
| 623 src_sets = &temp_cols; | |
| 624 } | |
| 625 int set_size = temp_cols.size(); | |
| 626 // Try using only the good parts first. | |
| 627 bool good_only = true; | |
| 628 do { | |
| 629 for (int i = 0; i < set_size; ++i) { | |
| 630 ColPartitionSet *column_candidate = temp_cols.at(i); | |
| 631 ASSERT_HOST(column_candidate != nullptr); | |
| 632 ColPartitionSet *improved = column_candidate->Copy(good_only); | |
| 633 if (improved != nullptr) { | |
| 634 improved->ImproveColumnCandidate(WidthCB(), src_sets); | |
| 635 improved->AddToColumnSetsIfUnique(column_sets, WidthCB()); | |
| 636 } | |
| 637 } | |
| 638 good_only = !good_only; | |
| 639 } while (column_sets->empty() && !good_only); | |
| 640 if (column_sets->empty()) { | |
| 641 // TODO: optimize. | |
| 642 *column_sets = temp_cols; | |
| 643 temp_cols.clear(); | |
| 644 } else { | |
| 645 for (auto data : temp_cols) { | |
| 646 delete data; | |
| 647 } | |
| 648 } | |
| 649 } | |
| 650 | |
| 651 // Prints debug information on the column candidates. | |
| 652 void ColumnFinder::PrintColumnCandidates(const char *title) { | |
| 653 int set_size = column_sets_.size(); | |
| 654 tprintf("Found %d %s:\n", set_size, title); | |
| 655 if (textord_debug_tabfind >= 3) { | |
| 656 for (int i = 0; i < set_size; ++i) { | |
| 657 ColPartitionSet *column_set = column_sets_.at(i); | |
| 658 column_set->Print(); | |
| 659 } | |
| 660 } | |
| 661 } | |
| 662 | |
| 663 // Finds the optimal set of columns that cover the entire image with as | |
| 664 // few changes in column partition as possible. | |
| 665 // NOTE: this could be thought of as an optimization problem, but a simple | |
| 666 // greedy algorithm is used instead. The algorithm repeatedly finds the modal | |
| 667 // compatible column in an unassigned region and uses that with the extra | |
| 668 // tweak of extending the modal region over small breaks in compatibility. | |
| 669 // Where modal regions overlap, the boundary is chosen so as to minimize | |
| 670 // the cost in terms of ColPartitions not fitting an approved column. | |
| 671 // Returns true if any part of the page is multi-column. | |
| 672 bool ColumnFinder::AssignColumns(const PartSetVector &part_sets) { | |
| 673 int set_count = part_sets.size(); | |
| 674 ASSERT_HOST(set_count == gridheight()); | |
| 675 // Allocate and init the best_columns_. | |
| 676 best_columns_ = new ColPartitionSet *[set_count]; | |
| 677 for (int y = 0; y < set_count; ++y) { | |
| 678 best_columns_[y] = nullptr; | |
| 679 } | |
| 680 int column_count = column_sets_.size(); | |
| 681 // column_set_costs[part_sets_ index][column_sets_ index] is | |
| 682 // < INT32_MAX if the partition set is compatible with the column set, | |
| 683 // in which case its value is the cost for that set used in deciding | |
| 684 // which competing set to assign. | |
| 685 // any_columns_possible[part_sets_ index] is true if any of | |
| 686 // possible_column_sets[part_sets_ index][*] is < INT32_MAX. | |
| 687 // assigned_costs[part_sets_ index] is set to the column_set_costs | |
| 688 // of the assigned column_sets_ index or INT32_MAX if none is set. | |
| 689 // On return the best_columns_ member is set. | |
| 690 bool *any_columns_possible = new bool[set_count]; | |
| 691 int *assigned_costs = new int[set_count]; | |
| 692 int **column_set_costs = new int *[set_count]; | |
| 693 // Set possible column_sets to indicate whether each set is compatible | |
| 694 // with each column. | |
| 695 for (int part_i = 0; part_i < set_count; ++part_i) { | |
| 696 ColPartitionSet *line_set = part_sets.at(part_i); | |
| 697 bool debug = line_set != nullptr && WithinTestRegion(2, line_set->bounding_box().left(), | |
| 698 line_set->bounding_box().bottom()); | |
| 699 column_set_costs[part_i] = new int[column_count]; | |
| 700 any_columns_possible[part_i] = false; | |
| 701 assigned_costs[part_i] = INT32_MAX; | |
| 702 for (int col_i = 0; col_i < column_count; ++col_i) { | |
| 703 if (line_set != nullptr && | |
| 704 column_sets_.at(col_i)->CompatibleColumns(debug, line_set, WidthCB())) { | |
| 705 column_set_costs[part_i][col_i] = column_sets_.at(col_i)->UnmatchedWidth(line_set); | |
| 706 any_columns_possible[part_i] = true; | |
| 707 } else { | |
| 708 column_set_costs[part_i][col_i] = INT32_MAX; | |
| 709 if (debug) { | |
| 710 tprintf("Set id %d did not match at y=%d, lineset =%p\n", | |
| 711 col_i, part_i, static_cast<void *>(line_set)); | |
| 712 } | |
| 713 } | |
| 714 } | |
| 715 } | |
| 716 bool any_multi_column = false; | |
| 717 // Assign a column set to each vertical grid position. | |
| 718 // While there is an unassigned range, find its mode. | |
| 719 int start, end; | |
| 720 while (BiggestUnassignedRange(set_count, any_columns_possible, &start, &end)) { | |
| 721 if (textord_debug_tabfind >= 2) { | |
| 722 tprintf("Biggest unassigned range = %d- %d\n", start, end); | |
| 723 } | |
| 724 // Find the modal column_set_id in the range. | |
| 725 int column_set_id = RangeModalColumnSet(column_set_costs, assigned_costs, start, end); | |
| 726 if (textord_debug_tabfind >= 2) { | |
| 727 tprintf("Range modal column id = %d\n", column_set_id); | |
| 728 column_sets_.at(column_set_id)->Print(); | |
| 729 } | |
| 730 // Now find the longest run of the column_set_id in the range. | |
| 731 ShrinkRangeToLongestRun(column_set_costs, assigned_costs, any_columns_possible, column_set_id, | |
| 732 &start, &end); | |
| 733 if (textord_debug_tabfind >= 2) { | |
| 734 tprintf("Shrunk range = %d- %d\n", start, end); | |
| 735 } | |
| 736 // Extend the start and end past the longest run, while there are | |
| 737 // only small gaps in compatibility that can be overcome by larger | |
| 738 // regions of compatibility beyond. | |
| 739 ExtendRangePastSmallGaps(column_set_costs, assigned_costs, any_columns_possible, column_set_id, | |
| 740 -1, -1, &start); | |
| 741 --end; | |
| 742 ExtendRangePastSmallGaps(column_set_costs, assigned_costs, any_columns_possible, column_set_id, | |
| 743 1, set_count, &end); | |
| 744 ++end; | |
| 745 if (textord_debug_tabfind) { | |
| 746 tprintf("Column id %d applies to range = %d - %d\n", column_set_id, start, end); | |
| 747 } | |
| 748 // Assign the column to the range, which now may overlap with other ranges. | |
| 749 AssignColumnToRange(column_set_id, start, end, column_set_costs, assigned_costs); | |
| 750 if (column_sets_.at(column_set_id)->GoodColumnCount() > 1) { | |
| 751 any_multi_column = true; | |
| 752 } | |
| 753 } | |
| 754 // If anything remains unassigned, the whole lot is unassigned, so | |
| 755 // arbitrarily assign id 0. | |
| 756 if (best_columns_[0] == nullptr) { | |
| 757 AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs); | |
| 758 } | |
| 759 // Free memory. | |
| 760 for (int i = 0; i < set_count; ++i) { | |
| 761 delete[] column_set_costs[i]; | |
| 762 } | |
| 763 delete[] assigned_costs; | |
| 764 delete[] any_columns_possible; | |
| 765 delete[] column_set_costs; | |
| 766 return any_multi_column; | |
| 767 } | |
| 768 | |
| 769 // Finds the biggest range in part_sets_ that has no assigned column, but | |
| 770 // column assignment is possible. | |
| 771 bool ColumnFinder::BiggestUnassignedRange(int set_count, const bool *any_columns_possible, | |
| 772 int *best_start, int *best_end) { | |
| 773 int best_range_size = 0; | |
| 774 *best_start = set_count; | |
| 775 *best_end = set_count; | |
| 776 int end = set_count; | |
| 777 for (int start = 0; start < gridheight_; start = end) { | |
| 778 // Find the first unassigned index in start. | |
| 779 while (start < set_count) { | |
| 780 if (best_columns_[start] == nullptr && any_columns_possible[start]) { | |
| 781 break; | |
| 782 } | |
| 783 ++start; | |
| 784 } | |
| 785 // Find the first past the end and count the good ones in between. | |
| 786 int range_size = 1; // Number of non-null, but unassigned line sets. | |
| 787 end = start + 1; | |
| 788 while (end < set_count) { | |
| 789 if (best_columns_[end] != nullptr) { | |
| 790 break; | |
| 791 } | |
| 792 if (any_columns_possible[end]) { | |
| 793 ++range_size; | |
| 794 } | |
| 795 ++end; | |
| 796 } | |
| 797 if (start < set_count && range_size > best_range_size) { | |
| 798 best_range_size = range_size; | |
| 799 *best_start = start; | |
| 800 *best_end = end; | |
| 801 } | |
| 802 } | |
| 803 return *best_start < *best_end; | |
| 804 } | |
| 805 | |
| 806 // Finds the modal compatible column_set_ index within the given range. | |
| 807 int ColumnFinder::RangeModalColumnSet(int **column_set_costs, const int *assigned_costs, int start, | |
| 808 int end) { | |
| 809 int column_count = column_sets_.size(); | |
| 810 STATS column_stats(0, column_count - 1); | |
| 811 for (int part_i = start; part_i < end; ++part_i) { | |
| 812 for (int col_j = 0; col_j < column_count; ++col_j) { | |
| 813 if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) { | |
| 814 column_stats.add(col_j, 1); | |
| 815 } | |
| 816 } | |
| 817 } | |
| 818 ASSERT_HOST(column_stats.get_total() > 0); | |
| 819 return column_stats.mode(); | |
| 820 } | |
| 821 | |
| 822 // Given that there are many column_set_id compatible columns in the range, | |
| 823 // shrinks the range to the longest contiguous run of compatibility, allowing | |
| 824 // gaps where no columns are possible, but not where competing columns are | |
| 825 // possible. | |
| 826 void ColumnFinder::ShrinkRangeToLongestRun(int **column_set_costs, const int *assigned_costs, | |
| 827 const bool *any_columns_possible, int column_set_id, | |
| 828 int *best_start, int *best_end) { | |
| 829 // orig_start and orig_end are the maximum range we will look at. | |
| 830 int orig_start = *best_start; | |
| 831 int orig_end = *best_end; | |
| 832 int best_range_size = 0; | |
| 833 *best_start = orig_end; | |
| 834 *best_end = orig_end; | |
| 835 int end = orig_end; | |
| 836 for (int start = orig_start; start < orig_end; start = end) { | |
| 837 // Find the first possible | |
| 838 while (start < orig_end) { | |
| 839 if (column_set_costs[start][column_set_id] < assigned_costs[start] || | |
| 840 !any_columns_possible[start]) { | |
| 841 break; | |
| 842 } | |
| 843 ++start; | |
| 844 } | |
| 845 // Find the first past the end. | |
| 846 end = start + 1; | |
| 847 while (end < orig_end) { | |
| 848 if (column_set_costs[end][column_set_id] >= assigned_costs[start] && | |
| 849 any_columns_possible[end]) { | |
| 850 break; | |
| 851 } | |
| 852 ++end; | |
| 853 } | |
| 854 if (start < orig_end && end - start > best_range_size) { | |
| 855 best_range_size = end - start; | |
| 856 *best_start = start; | |
| 857 *best_end = end; | |
| 858 } | |
| 859 } | |
| 860 } | |
| 861 | |
| 862 // Moves start in the direction of step, up to, but not including end while | |
| 863 // the only incompatible regions are no more than kMaxIncompatibleColumnCount | |
| 864 // in size, and the compatible regions beyond are bigger. | |
| 865 void ColumnFinder::ExtendRangePastSmallGaps(int **column_set_costs, const int *assigned_costs, | |
| 866 const bool *any_columns_possible, int column_set_id, | |
| 867 int step, int end, int *start) { | |
| 868 if (textord_debug_tabfind > 2) { | |
| 869 tprintf("Starting expansion at %d, step=%d, limit=%d\n", *start, step, end); | |
| 870 } | |
| 871 if (*start == end) { | |
| 872 return; // Cannot be expanded. | |
| 873 } | |
| 874 | |
| 875 int barrier_size = 0; | |
| 876 int good_size = 0; | |
| 877 do { | |
| 878 // Find the size of the incompatible barrier. | |
| 879 barrier_size = 0; | |
| 880 int i; | |
| 881 for (i = *start + step; i != end; i += step) { | |
| 882 if (column_set_costs[i][column_set_id] < assigned_costs[i]) { | |
| 883 break; // We are back on. | |
| 884 } | |
| 885 // Locations where none are possible don't count. | |
| 886 if (any_columns_possible[i]) { | |
| 887 ++barrier_size; | |
| 888 } | |
| 889 } | |
| 890 if (textord_debug_tabfind > 2) { | |
| 891 tprintf("At %d, Barrier size=%d\n", i, barrier_size); | |
| 892 } | |
| 893 if (barrier_size > kMaxIncompatibleColumnCount) { | |
| 894 return; // Barrier too big. | |
| 895 } | |
| 896 if (i == end) { | |
| 897 // We can't go any further, but the barrier was small, so go to the end. | |
| 898 *start = i - step; | |
| 899 return; | |
| 900 } | |
| 901 // Now find the size of the good region on the other side. | |
| 902 good_size = 1; | |
| 903 for (i += step; i != end; i += step) { | |
| 904 if (column_set_costs[i][column_set_id] < assigned_costs[i]) { | |
| 905 ++good_size; | |
| 906 } else if (any_columns_possible[i]) { | |
| 907 break; | |
| 908 } | |
| 909 } | |
| 910 if (textord_debug_tabfind > 2) { | |
| 911 tprintf("At %d, good size = %d\n", i, good_size); | |
| 912 } | |
| 913 // If we had enough good ones we can extend the start and keep looking. | |
| 914 if (good_size >= barrier_size) { | |
| 915 *start = i - step; | |
| 916 } | |
| 917 } while (good_size >= barrier_size); | |
| 918 } | |
| 919 | |
| 920 // Assigns the given column_set_id to the given range. | |
| 921 void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end, | |
| 922 int **column_set_costs, int *assigned_costs) { | |
| 923 ColPartitionSet *column_set = column_sets_.at(column_set_id); | |
| 924 for (int i = start; i < end; ++i) { | |
| 925 assigned_costs[i] = column_set_costs[i][column_set_id]; | |
| 926 best_columns_[i] = column_set; | |
| 927 } | |
| 928 } | |
| 929 | |
| 930 // Computes the mean_column_gap_. | |
| 931 void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) { | |
| 932 int total_gap = 0; | |
| 933 int total_width = 0; | |
| 934 int gap_samples = 0; | |
| 935 int width_samples = 0; | |
| 936 for (int i = 0; i < gridheight_; ++i) { | |
| 937 ASSERT_HOST(best_columns_[i] != nullptr); | |
| 938 best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width, &width_samples, &total_gap, | |
| 939 &gap_samples); | |
| 940 } | |
| 941 mean_column_gap_ = any_multi_column && gap_samples > 0 | |
| 942 ? total_gap / gap_samples | |
| 943 : width_samples > 0 ? total_width / width_samples : 0; | |
| 944 } | |
| 945 | |
| 946 //////// Functions that manipulate ColPartitions in the part_grid_ ///// | |
| 947 //////// to split, merge, find margins, and find types. ////////////// | |
| 948 | |
| 949 // Helper to delete all the deletable blobs on the list. Owned blobs are | |
| 950 // extracted from the list, but not deleted, leaving them owned by the owner(). | |
| 951 static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST *blobs) { | |
| 952 for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) { | |
| 953 BLOBNBOX *blob = blob_it.extract(); | |
| 954 if (blob->owner() == nullptr) { | |
| 955 delete blob; | |
| 956 } | |
| 957 } | |
| 958 } | |
| 959 | |
| 960 // Hoovers up all un-owned blobs and deletes them. | |
| 961 // The rest get released from the block so the ColPartitions can pass | |
| 962 // ownership to the output blocks. | |
| 963 void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK *block) { | |
| 964 ReleaseAllBlobsAndDeleteUnused(&block->blobs); | |
| 965 ReleaseAllBlobsAndDeleteUnused(&block->small_blobs); | |
| 966 ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs); | |
| 967 ReleaseAllBlobsAndDeleteUnused(&block->large_blobs); | |
| 968 ReleaseAllBlobsAndDeleteUnused(&image_bblobs_); | |
| 969 } | |
| 970 | |
| 971 // Splits partitions that cross columns where they have nothing in the gap. | |
| 972 void ColumnFinder::GridSplitPartitions() { | |
| 973 // Iterate the ColPartitions in the grid. | |
| 974 ColPartitionGridSearch gsearch(&part_grid_); | |
| 975 gsearch.StartFullSearch(); | |
| 976 ColPartition *dont_repeat = nullptr; | |
| 977 ColPartition *part; | |
| 978 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 979 if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat) { | |
| 980 continue; // Only applies to text partitions. | |
| 981 } | |
| 982 ColPartitionSet *column_set = best_columns_[gsearch.GridY()]; | |
| 983 int first_col = -1; | |
| 984 int last_col = -1; | |
| 985 // Find which columns the partition spans. | |
| 986 part->ColumnRange(resolution_, column_set, &first_col, &last_col); | |
| 987 if (first_col > 0) { | |
| 988 --first_col; | |
| 989 } | |
| 990 // Convert output column indices to physical column indices. | |
| 991 first_col /= 2; | |
| 992 last_col /= 2; | |
| 993 // We will only consider cases where a partition spans two columns, | |
| 994 // since a heading that spans more columns than that is most likely | |
| 995 // genuine. | |
| 996 if (last_col != first_col + 1) { | |
| 997 continue; | |
| 998 } | |
| 999 // Set up a rectangle search x-bounded by the column gap and y by the part. | |
| 1000 int y = part->MidY(); | |
| 1001 TBOX margin_box = part->bounding_box(); | |
| 1002 bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(), margin_box.bottom()); | |
| 1003 if (debug) { | |
| 1004 tprintf("Considering partition for GridSplit:"); | |
| 1005 part->Print(); | |
| 1006 } | |
| 1007 ColPartition *column = column_set->GetColumnByIndex(first_col); | |
| 1008 if (column == nullptr) { | |
| 1009 continue; | |
| 1010 } | |
| 1011 margin_box.set_left(column->RightAtY(y) + 2); | |
| 1012 column = column_set->GetColumnByIndex(last_col); | |
| 1013 if (column == nullptr) { | |
| 1014 continue; | |
| 1015 } | |
| 1016 margin_box.set_right(column->LeftAtY(y) - 2); | |
| 1017 // TODO(rays) Decide whether to keep rectangular filling or not in the | |
| 1018 // main grid and therefore whether we need a fancier search here. | |
| 1019 // Now run the rect search on the main blob grid. | |
| 1020 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this); | |
| 1021 if (debug) { | |
| 1022 tprintf("Searching box (%d,%d)->(%d,%d)\n", margin_box.left(), margin_box.bottom(), | |
| 1023 margin_box.right(), margin_box.top()); | |
| 1024 part->Print(); | |
| 1025 } | |
| 1026 rectsearch.StartRectSearch(margin_box); | |
| 1027 BLOBNBOX *bbox; | |
| 1028 while ((bbox = rectsearch.NextRectSearch()) != nullptr) { | |
| 1029 if (bbox->bounding_box().overlap(margin_box)) { | |
| 1030 break; | |
| 1031 } | |
| 1032 } | |
| 1033 if (bbox == nullptr) { | |
| 1034 // There seems to be nothing in the hole, so split the partition. | |
| 1035 gsearch.RemoveBBox(); | |
| 1036 int x_middle = (margin_box.left() + margin_box.right()) / 2; | |
| 1037 if (debug) { | |
| 1038 tprintf("Splitting part at %d:", x_middle); | |
| 1039 part->Print(); | |
| 1040 } | |
| 1041 ColPartition *split_part = part->SplitAt(x_middle); | |
| 1042 if (split_part != nullptr) { | |
| 1043 if (debug) { | |
| 1044 tprintf("Split result:"); | |
| 1045 part->Print(); | |
| 1046 split_part->Print(); | |
| 1047 } | |
| 1048 part_grid_.InsertBBox(true, true, split_part); | |
| 1049 } else { | |
| 1050 // Split had no effect | |
| 1051 if (debug) { | |
| 1052 tprintf("Split had no effect\n"); | |
| 1053 } | |
| 1054 dont_repeat = part; | |
| 1055 } | |
| 1056 part_grid_.InsertBBox(true, true, part); | |
| 1057 gsearch.RepositionIterator(); | |
| 1058 } else if (debug) { | |
| 1059 tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n", | |
| 1060 bbox->bounding_box().left(), bbox->bounding_box().bottom(), | |
| 1061 bbox->bounding_box().right(), bbox->bounding_box().top()); | |
| 1062 } | |
| 1063 } | |
| 1064 } | |
| 1065 | |
| 1066 // Merges partitions where there is vertical overlap, within a single column, | |
| 1067 // and the horizontal gap is small enough. | |
| 1068 void ColumnFinder::GridMergePartitions() { | |
| 1069 // Iterate the ColPartitions in the grid. | |
| 1070 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&part_grid_); | |
| 1071 gsearch.StartFullSearch(); | |
| 1072 ColPartition *part; | |
| 1073 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1074 if (part->IsUnMergeableType()) { | |
| 1075 continue; | |
| 1076 } | |
| 1077 // Set up a rectangle search x-bounded by the column and y by the part. | |
| 1078 ColPartitionSet *columns = best_columns_[gsearch.GridY()]; | |
| 1079 TBOX box = part->bounding_box(); | |
| 1080 bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom()); | |
| 1081 if (debug) { | |
| 1082 tprintf("Considering part for merge at:"); | |
| 1083 part->Print(); | |
| 1084 } | |
| 1085 int y = part->MidY(); | |
| 1086 ColPartition *left_column = columns->ColumnContaining(box.left(), y); | |
| 1087 ColPartition *right_column = columns->ColumnContaining(box.right(), y); | |
| 1088 if (left_column == nullptr || right_column != left_column) { | |
| 1089 if (debug) { | |
| 1090 tprintf("In different columns\n"); | |
| 1091 } | |
| 1092 continue; | |
| 1093 } | |
| 1094 box.set_left(left_column->LeftAtY(y)); | |
| 1095 box.set_right(right_column->RightAtY(y)); | |
| 1096 // Now run the rect search. | |
| 1097 bool modified_box = false; | |
| 1098 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rsearch(&part_grid_); | |
| 1099 rsearch.SetUniqueMode(true); | |
| 1100 rsearch.StartRectSearch(box); | |
| 1101 ColPartition *neighbour; | |
| 1102 | |
| 1103 while ((neighbour = rsearch.NextRectSearch()) != nullptr) { | |
| 1104 if (neighbour == part || neighbour->IsUnMergeableType()) { | |
| 1105 continue; | |
| 1106 } | |
| 1107 const TBOX &neighbour_box = neighbour->bounding_box(); | |
| 1108 if (debug) { | |
| 1109 tprintf("Considering merge with neighbour at:"); | |
| 1110 neighbour->Print(); | |
| 1111 } | |
| 1112 if (neighbour_box.right() < box.left() || neighbour_box.left() > box.right()) { | |
| 1113 continue; // Not within the same column. | |
| 1114 } | |
| 1115 if (part->VSignificantCoreOverlap(*neighbour) && part->TypesMatch(*neighbour)) { | |
| 1116 // There is vertical overlap and the gross types match, but only | |
| 1117 // merge if the horizontal gap is small enough, as one of the | |
| 1118 // partitions may be a figure caption within a column. | |
| 1119 // If there is only one column, then the mean_column_gap_ is large | |
| 1120 // enough to allow almost any merge, by being the mean column width. | |
| 1121 const TBOX &part_box = part->bounding_box(); | |
| 1122 // Don't merge if there is something else in the way. Use the margin | |
| 1123 // to decide, and check both to allow a bit of overlap. | |
| 1124 if (neighbour_box.left() > part->right_margin() && | |
| 1125 part_box.right() < neighbour->left_margin()) { | |
| 1126 continue; // Neighbour is too far to the right. | |
| 1127 } | |
| 1128 if (neighbour_box.right() < part->left_margin() && | |
| 1129 part_box.left() > neighbour->right_margin()) { | |
| 1130 continue; // Neighbour is too far to the left. | |
| 1131 } | |
| 1132 int h_gap = std::max(part_box.left(), neighbour_box.left()) - | |
| 1133 std::min(part_box.right(), neighbour_box.right()); | |
| 1134 if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction || | |
| 1135 part_box.width() < mean_column_gap_ || neighbour_box.width() < mean_column_gap_) { | |
| 1136 if (debug) { | |
| 1137 tprintf("Running grid-based merge between:\n"); | |
| 1138 part->Print(); | |
| 1139 neighbour->Print(); | |
| 1140 } | |
| 1141 rsearch.RemoveBBox(); | |
| 1142 if (!modified_box) { | |
| 1143 // We are going to modify part, so remove it and re-insert it after. | |
| 1144 gsearch.RemoveBBox(); | |
| 1145 rsearch.RepositionIterator(); | |
| 1146 modified_box = true; | |
| 1147 } | |
| 1148 part->Absorb(neighbour, WidthCB()); | |
| 1149 } else if (debug) { | |
| 1150 tprintf("Neighbour failed hgap test\n"); | |
| 1151 } | |
| 1152 } else if (debug) { | |
| 1153 tprintf("Neighbour failed overlap or typesmatch test\n"); | |
| 1154 } | |
| 1155 } | |
| 1156 if (modified_box) { | |
| 1157 // We modified the box of part, so re-insert it into the grid. | |
| 1158 // This does no harm in the current cell, as it already exists there, | |
| 1159 // but it needs to exist in all the cells covered by its bounding box, | |
| 1160 // or it will never be found by a full search. | |
| 1161 // Because the box has changed, it has to be removed first, otherwise | |
| 1162 // add_sorted may fail to keep a single copy of the pointer. | |
| 1163 part_grid_.InsertBBox(true, true, part); | |
| 1164 gsearch.RepositionIterator(); | |
| 1165 } | |
| 1166 } | |
| 1167 } | |
| 1168 | |
| 1169 // Inserts remaining noise blobs into the most applicable partition if any. | |
| 1170 // If there is no applicable partition, then the blobs are deleted. | |
| 1171 void ColumnFinder::InsertRemainingNoise(TO_BLOCK *block) { | |
| 1172 BLOBNBOX_IT blob_it(&block->noise_blobs); | |
| 1173 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { | |
| 1174 BLOBNBOX *blob = blob_it.data(); | |
| 1175 if (blob->owner() != nullptr) { | |
| 1176 continue; | |
| 1177 } | |
| 1178 TBOX search_box(blob->bounding_box()); | |
| 1179 bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom()); | |
| 1180 search_box.pad(gridsize(), gridsize()); | |
| 1181 // Setup a rectangle search to find the best partition to merge with. | |
| 1182 ColPartitionGridSearch rsearch(&part_grid_); | |
| 1183 rsearch.SetUniqueMode(true); | |
| 1184 rsearch.StartRectSearch(search_box); | |
| 1185 ColPartition *part; | |
| 1186 ColPartition *best_part = nullptr; | |
| 1187 int best_distance = 0; | |
| 1188 while ((part = rsearch.NextRectSearch()) != nullptr) { | |
| 1189 if (part->IsUnMergeableType()) { | |
| 1190 continue; | |
| 1191 } | |
| 1192 int distance = | |
| 1193 projection_.DistanceOfBoxFromPartition(blob->bounding_box(), *part, denorm_, debug); | |
| 1194 if (best_part == nullptr || distance < best_distance) { | |
| 1195 best_part = part; | |
| 1196 best_distance = distance; | |
| 1197 } | |
| 1198 } | |
| 1199 if (best_part != nullptr && | |
| 1200 best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) { | |
| 1201 // Close enough to merge. | |
| 1202 if (debug) { | |
| 1203 tprintf("Adding noise blob with distance %d, thr=%g:box:", best_distance, | |
| 1204 kMaxDistToPartSizeRatio * best_part->median_height()); | |
| 1205 blob->bounding_box().print(); | |
| 1206 tprintf("To partition:"); | |
| 1207 best_part->Print(); | |
| 1208 } | |
| 1209 part_grid_.RemoveBBox(best_part); | |
| 1210 best_part->AddBox(blob); | |
| 1211 part_grid_.InsertBBox(true, true, best_part); | |
| 1212 blob->set_owner(best_part); | |
| 1213 blob->set_flow(best_part->flow()); | |
| 1214 blob->set_region_type(best_part->blob_type()); | |
| 1215 } else { | |
| 1216 // Mark the blob for deletion. | |
| 1217 blob->set_region_type(BRT_NOISE); | |
| 1218 } | |
| 1219 } | |
| 1220 // Delete the marked blobs, clearing neighbour references. | |
| 1221 block->DeleteUnownedNoise(); | |
| 1222 } | |
| 1223 | |
| 1224 // Helper makes a box from a horizontal line. | |
| 1225 static TBOX BoxFromHLine(const TabVector *hline) { | |
| 1226 int top = std::max(hline->startpt().y(), hline->endpt().y()); | |
| 1227 int bottom = std::min(hline->startpt().y(), hline->endpt().y()); | |
| 1228 top += hline->mean_width(); | |
| 1229 if (top == bottom) { | |
| 1230 if (bottom > 0) { | |
| 1231 --bottom; | |
| 1232 } else { | |
| 1233 ++top; | |
| 1234 } | |
| 1235 } | |
| 1236 return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top); | |
| 1237 } | |
| 1238 | |
| 1239 // Remove partitions that come from horizontal lines that look like | |
| 1240 // underlines, but are not part of a table. | |
| 1241 void ColumnFinder::GridRemoveUnderlinePartitions() { | |
| 1242 TabVector_IT hline_it(&horizontal_lines_); | |
| 1243 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { | |
| 1244 TabVector *hline = hline_it.data(); | |
| 1245 if (hline->intersects_other_lines()) { | |
| 1246 continue; | |
| 1247 } | |
| 1248 TBOX line_box = BoxFromHLine(hline); | |
| 1249 TBOX search_box = line_box; | |
| 1250 search_box.pad(0, line_box.height()); | |
| 1251 ColPartitionGridSearch part_search(&part_grid_); | |
| 1252 part_search.SetUniqueMode(true); | |
| 1253 part_search.StartRectSearch(search_box); | |
| 1254 ColPartition *covered; | |
| 1255 bool touched_table = false; | |
| 1256 bool touched_text = false; | |
| 1257 ColPartition *line_part = nullptr; | |
| 1258 while ((covered = part_search.NextRectSearch()) != nullptr) { | |
| 1259 if (covered->type() == PT_TABLE) { | |
| 1260 touched_table = true; | |
| 1261 break; | |
| 1262 } else if (covered->IsTextType()) { | |
| 1263 // TODO(rays) Add a list of underline sections to ColPartition. | |
| 1264 int text_bottom = covered->median_bottom(); | |
| 1265 if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top()) { | |
| 1266 touched_text = true; | |
| 1267 } | |
| 1268 } else if (covered->blob_type() == BRT_HLINE && line_box.contains(covered->bounding_box()) && | |
| 1269 // not if same instance (identical to hline) | |
| 1270 !TBOX(covered->bounding_box()).contains(line_box)) { | |
| 1271 line_part = covered; | |
| 1272 } | |
| 1273 } | |
| 1274 if (line_part != nullptr && !touched_table && touched_text) { | |
| 1275 part_grid_.RemoveBBox(line_part); | |
| 1276 delete line_part; | |
| 1277 } | |
| 1278 } | |
| 1279 } | |
| 1280 | |
| 1281 // Add horizontal line separators as partitions. | |
| 1282 void ColumnFinder::GridInsertHLinePartitions() { | |
| 1283 TabVector_IT hline_it(&horizontal_lines_); | |
| 1284 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { | |
| 1285 TabVector *hline = hline_it.data(); | |
| 1286 TBOX line_box = BoxFromHLine(hline); | |
| 1287 ColPartition *part = | |
| 1288 ColPartition::MakeLinePartition(BRT_HLINE, vertical_skew_, line_box.left(), | |
| 1289 line_box.bottom(), line_box.right(), line_box.top()); | |
| 1290 part->set_type(PT_HORZ_LINE); | |
| 1291 bool any_image = false; | |
| 1292 ColPartitionGridSearch part_search(&part_grid_); | |
| 1293 part_search.SetUniqueMode(true); | |
| 1294 part_search.StartRectSearch(line_box); | |
| 1295 ColPartition *covered; | |
| 1296 while ((covered = part_search.NextRectSearch()) != nullptr) { | |
| 1297 if (covered->IsImageType()) { | |
| 1298 any_image = true; | |
| 1299 break; | |
| 1300 } | |
| 1301 } | |
| 1302 if (!any_image) { | |
| 1303 part_grid_.InsertBBox(true, true, part); | |
| 1304 } else { | |
| 1305 delete part; | |
| 1306 } | |
| 1307 } | |
| 1308 } | |
| 1309 | |
| 1310 // Add horizontal line separators as partitions. | |
| 1311 void ColumnFinder::GridInsertVLinePartitions() { | |
| 1312 TabVector_IT vline_it(dead_vectors()); | |
| 1313 for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { | |
| 1314 TabVector *vline = vline_it.data(); | |
| 1315 if (!vline->IsSeparator()) { | |
| 1316 continue; | |
| 1317 } | |
| 1318 int left = std::min(vline->startpt().x(), vline->endpt().x()); | |
| 1319 int right = std::max(vline->startpt().x(), vline->endpt().x()); | |
| 1320 right += vline->mean_width(); | |
| 1321 if (left == right) { | |
| 1322 if (left > 0) { | |
| 1323 --left; | |
| 1324 } else { | |
| 1325 ++right; | |
| 1326 } | |
| 1327 } | |
| 1328 ColPartition *part = ColPartition::MakeLinePartition( | |
| 1329 BRT_VLINE, vertical_skew_, left, vline->startpt().y(), right, vline->endpt().y()); | |
| 1330 part->set_type(PT_VERT_LINE); | |
| 1331 bool any_image = false; | |
| 1332 ColPartitionGridSearch part_search(&part_grid_); | |
| 1333 part_search.SetUniqueMode(true); | |
| 1334 part_search.StartRectSearch(part->bounding_box()); | |
| 1335 ColPartition *covered; | |
| 1336 while ((covered = part_search.NextRectSearch()) != nullptr) { | |
| 1337 if (covered->IsImageType()) { | |
| 1338 any_image = true; | |
| 1339 break; | |
| 1340 } | |
| 1341 } | |
| 1342 if (!any_image) { | |
| 1343 part_grid_.InsertBBox(true, true, part); | |
| 1344 } else { | |
| 1345 delete part; | |
| 1346 } | |
| 1347 } | |
| 1348 } | |
| 1349 | |
| 1350 // For every ColPartition in the grid, sets its type based on position | |
| 1351 // in the columns. | |
| 1352 void ColumnFinder::SetPartitionTypes() { | |
| 1353 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&part_grid_); | |
| 1354 gsearch.StartFullSearch(); | |
| 1355 ColPartition *part; | |
| 1356 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1357 part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]); | |
| 1358 } | |
| 1359 } | |
| 1360 | |
| 1361 // Only images remain with multiple types in a run of partners. | |
| 1362 // Sets the type of all in the group to the maximum of the group. | |
| 1363 void ColumnFinder::SmoothPartnerRuns() { | |
| 1364 // Iterate the ColPartitions in the grid. | |
| 1365 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&part_grid_); | |
| 1366 gsearch.StartFullSearch(); | |
| 1367 ColPartition *part; | |
| 1368 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1369 ColPartition *partner = part->SingletonPartner(true); | |
| 1370 if (partner != nullptr) { | |
| 1371 if (partner->SingletonPartner(false) != part) { | |
| 1372 tprintf("Ooops! Partition:(%d partners)", part->upper_partners()->length()); | |
| 1373 part->Print(); | |
| 1374 tprintf("has singleton partner:(%d partners", partner->lower_partners()->length()); | |
| 1375 partner->Print(); | |
| 1376 tprintf("but its singleton partner is:"); | |
| 1377 if (partner->SingletonPartner(false) == nullptr) { | |
| 1378 tprintf("NULL\n"); | |
| 1379 } else { | |
| 1380 partner->SingletonPartner(false)->Print(); | |
| 1381 } | |
| 1382 } | |
| 1383 ASSERT_HOST(partner->SingletonPartner(false) == part); | |
| 1384 } else if (part->SingletonPartner(false) != nullptr) { | |
| 1385 ColPartitionSet *column_set = best_columns_[gsearch.GridY()]; | |
| 1386 int column_count = column_set->ColumnCount(); | |
| 1387 part->SmoothPartnerRun(column_count * 2 + 1); | |
| 1388 } | |
| 1389 } | |
| 1390 } | |
| 1391 | |
| 1392 // Helper functions for TransformToBlocks. | |
| 1393 // Add the part to the temp list in the correct order. | |
| 1394 void ColumnFinder::AddToTempPartList(ColPartition *part, ColPartition_CLIST *temp_list) { | |
| 1395 int mid_y = part->MidY(); | |
| 1396 ColPartition_C_IT it(temp_list); | |
| 1397 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 1398 ColPartition *test_part = it.data(); | |
| 1399 if (part->type() == PT_NOISE || test_part->type() == PT_NOISE) { | |
| 1400 continue; // Noise stays in sequence. | |
| 1401 } | |
| 1402 if (test_part == part->SingletonPartner(false)) { | |
| 1403 break; // Insert before its lower partner. | |
| 1404 } | |
| 1405 int neighbour_bottom = test_part->median_bottom(); | |
| 1406 int neighbour_top = test_part->median_top(); | |
| 1407 int neighbour_y = (neighbour_bottom + neighbour_top) / 2; | |
| 1408 if (neighbour_y < mid_y) { | |
| 1409 break; // part is above test_part so insert it. | |
| 1410 } | |
| 1411 if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part)) { | |
| 1412 continue; // Incompatibles stay in order | |
| 1413 } | |
| 1414 } | |
| 1415 if (it.cycled_list()) { | |
| 1416 it.add_to_end(part); | |
| 1417 } else { | |
| 1418 it.add_before_stay_put(part); | |
| 1419 } | |
| 1420 } | |
| 1421 | |
| 1422 // Add everything from the temp list to the work_set assuming correct order. | |
| 1423 void ColumnFinder::EmptyTempPartList(ColPartition_CLIST *temp_list, WorkingPartSet_LIST *work_set) { | |
| 1424 ColPartition_C_IT it(temp_list); | |
| 1425 while (!it.empty()) { | |
| 1426 it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, &good_parts_, work_set); | |
| 1427 it.forward(); | |
| 1428 } | |
| 1429 } | |
| 1430 | |
| 1431 // Transform the grid of partitions to the output blocks. | |
| 1432 void ColumnFinder::TransformToBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { | |
| 1433 WorkingPartSet_LIST work_set; | |
| 1434 ColPartitionSet *column_set = nullptr; | |
| 1435 ColPartition_IT noise_it(&noise_parts_); | |
| 1436 // The temp_part_list holds a list of parts at the same grid y coord | |
| 1437 // so they can be added in the correct order. This prevents thin objects | |
| 1438 // like horizontal lines going before the text lines above them. | |
| 1439 ColPartition_CLIST temp_part_list; | |
| 1440 // Iterate the ColPartitions in the grid. It starts at the top | |
| 1441 ColPartitionGridSearch gsearch(&part_grid_); | |
| 1442 gsearch.StartFullSearch(); | |
| 1443 int prev_grid_y = -1; | |
| 1444 ColPartition *part; | |
| 1445 while ((part = gsearch.NextFullSearch()) != nullptr) { | |
| 1446 int grid_y = gsearch.GridY(); | |
| 1447 if (grid_y != prev_grid_y) { | |
| 1448 EmptyTempPartList(&temp_part_list, &work_set); | |
| 1449 prev_grid_y = grid_y; | |
| 1450 } | |
| 1451 if (best_columns_[grid_y] != column_set) { | |
| 1452 column_set = best_columns_[grid_y]; | |
| 1453 // Every line should have a non-null best column. | |
| 1454 ASSERT_HOST(column_set != nullptr); | |
| 1455 column_set->ChangeWorkColumns(bleft_, tright_, resolution_, &good_parts_, &work_set); | |
| 1456 if (textord_debug_tabfind) { | |
| 1457 tprintf("Changed column groups at grid index %d, y=%d\n", gsearch.GridY(), | |
| 1458 gsearch.GridY() * gridsize()); | |
| 1459 } | |
| 1460 } | |
| 1461 if (part->type() == PT_NOISE) { | |
| 1462 noise_it.add_to_end(part); | |
| 1463 } else { | |
| 1464 AddToTempPartList(part, &temp_part_list); | |
| 1465 } | |
| 1466 } | |
| 1467 EmptyTempPartList(&temp_part_list, &work_set); | |
| 1468 // Now finish all working sets and transfer ColPartitionSets to block_sets. | |
| 1469 WorkingPartSet_IT work_it(&work_set); | |
| 1470 while (!work_it.empty()) { | |
| 1471 WorkingPartSet *working_set = work_it.extract(); | |
| 1472 working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_, &good_parts_, blocks, | |
| 1473 to_blocks); | |
| 1474 delete working_set; | |
| 1475 work_it.forward(); | |
| 1476 } | |
| 1477 } | |
| 1478 | |
| 1479 // Helper reflects a list of blobs in the y-axis. | |
| 1480 // Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below. | |
| 1481 static void ReflectBlobList(BLOBNBOX_LIST *bblobs) { | |
| 1482 BLOBNBOX_IT it(bblobs); | |
| 1483 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 1484 it.data()->reflect_box_in_y_axis(); | |
| 1485 } | |
| 1486 } | |
| 1487 | |
| 1488 // Reflect the blob boxes (but not the outlines) in the y-axis so that | |
| 1489 // the blocks get created in the correct RTL order. Reflects the blobs | |
| 1490 // in the input_block and the bblobs list. | |
| 1491 // The reflection is undone in RotateAndReskewBlocks by | |
| 1492 // reflecting the blocks themselves, and then recomputing the blob bounding | |
| 1493 // boxes. | |
| 1494 void ColumnFinder::ReflectForRtl(TO_BLOCK *input_block, BLOBNBOX_LIST *bblobs) { | |
| 1495 ReflectBlobList(bblobs); | |
| 1496 ReflectBlobList(&input_block->blobs); | |
| 1497 ReflectBlobList(&input_block->small_blobs); | |
| 1498 ReflectBlobList(&input_block->noise_blobs); | |
| 1499 ReflectBlobList(&input_block->large_blobs); | |
| 1500 // Update the denorm with the reflection. | |
| 1501 auto *new_denorm = new DENORM; | |
| 1502 new_denorm->SetupNormalization(nullptr, nullptr, denorm_, 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); | |
| 1503 denorm_ = new_denorm; | |
| 1504 } | |
| 1505 | |
| 1506 // Helper fixes up blobs and cblobs to match the desired rotation, | |
| 1507 // exploding multi-outline blobs back to single blobs and accumulating | |
| 1508 // the bounding box widths and heights. | |
| 1509 static void RotateAndExplodeBlobList(const FCOORD &blob_rotation, BLOBNBOX_LIST *bblobs, | |
| 1510 STATS *widths, STATS *heights) { | |
| 1511 BLOBNBOX_IT it(bblobs); | |
| 1512 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 1513 BLOBNBOX *blob = it.data(); | |
| 1514 C_BLOB *cblob = blob->cblob(); | |
| 1515 C_OUTLINE_LIST *outlines = cblob->out_list(); | |
| 1516 C_OUTLINE_IT ol_it(outlines); | |
| 1517 if (!outlines->singleton()) { | |
| 1518 // This blob has multiple outlines from CJK repair. | |
| 1519 // Explode the blob back into individual outlines. | |
| 1520 for (; !ol_it.empty(); ol_it.forward()) { | |
| 1521 C_OUTLINE *outline = ol_it.extract(); | |
| 1522 BLOBNBOX *new_blob = BLOBNBOX::RealBlob(outline); | |
| 1523 // This blob will be revisited later since we add_after_stay_put here. | |
| 1524 // This means it will get rotated and have its width/height added to | |
| 1525 // the stats below. | |
| 1526 it.add_after_stay_put(new_blob); | |
| 1527 } | |
| 1528 it.extract(); | |
| 1529 delete blob; | |
| 1530 } else { | |
| 1531 if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) { | |
| 1532 cblob->rotate(blob_rotation); | |
| 1533 } | |
| 1534 blob->compute_bounding_box(); | |
| 1535 widths->add(blob->bounding_box().width(), 1); | |
| 1536 heights->add(blob->bounding_box().height(), 1); | |
| 1537 } | |
| 1538 } | |
| 1539 } | |
| 1540 | |
| 1541 // Undo the deskew that was done in FindTabVectors, as recognition is done | |
| 1542 // without correcting blobs or blob outlines for skew. | |
| 1543 // Reskew the completed blocks to put them back to the original rotated coords | |
| 1544 // that were created by CorrectOrientation. | |
| 1545 // If the input_is_rtl, then reflect the blocks in the y-axis to undo the | |
| 1546 // reflection that was done before FindTabVectors. | |
| 1547 // Blocks that were identified as vertical text (relative to the rotated | |
| 1548 // coordinates) are further rotated so the text lines are horizontal. | |
| 1549 // blob polygonal outlines are rotated to match the position of the blocks | |
| 1550 // that they are in, and their bounding boxes are recalculated to be accurate. | |
| 1551 // Record appropriate inverse transformations and required | |
| 1552 // classifier transformation in the blocks. | |
| 1553 void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST *blocks) { | |
| 1554 if (input_is_rtl) { | |
| 1555 // The skew is backwards because of the reflection. | |
| 1556 FCOORD tmp = deskew_; | |
| 1557 deskew_ = reskew_; | |
| 1558 reskew_ = tmp; | |
| 1559 } | |
| 1560 TO_BLOCK_IT it(blocks); | |
| 1561 int block_index = 1; | |
| 1562 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 1563 TO_BLOCK *to_block = it.data(); | |
| 1564 BLOCK *block = to_block->block; | |
| 1565 // Blocks are created on the deskewed blob outlines in TransformToBlocks() | |
| 1566 // so we need to reskew them back to page coordinates. | |
| 1567 if (input_is_rtl) { | |
| 1568 block->reflect_polygon_in_y_axis(); | |
| 1569 } | |
| 1570 block->rotate(reskew_); | |
| 1571 // Copy the right_to_left flag to the created block. | |
| 1572 block->set_right_to_left(input_is_rtl); | |
| 1573 // Save the skew angle in the block for baseline computations. | |
| 1574 block->set_skew(reskew_); | |
| 1575 block->pdblk.set_index(block_index++); | |
| 1576 FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block); | |
| 1577 // Rotate all the blobs if needed and recompute the bounding boxes. | |
| 1578 // Compute the block median blob width and height as we go. | |
| 1579 STATS widths(0, block->pdblk.bounding_box().width() - 1); | |
| 1580 STATS heights(0, block->pdblk.bounding_box().height() - 1); | |
| 1581 RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, &widths, &heights); | |
| 1582 TO_ROW_IT row_it(to_block->get_rows()); | |
| 1583 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { | |
| 1584 TO_ROW *row = row_it.data(); | |
| 1585 RotateAndExplodeBlobList(blob_rotation, row->blob_list(), &widths, &heights); | |
| 1586 } | |
| 1587 block->set_median_size(static_cast<int>(widths.median() + 0.5), | |
| 1588 static_cast<int>(heights.median() + 0.5)); | |
| 1589 if (textord_debug_tabfind >= 2) { | |
| 1590 tprintf("Block median size = (%d, %d)\n", block->median_size().x(), block->median_size().y()); | |
| 1591 } | |
| 1592 } | |
| 1593 } | |
| 1594 | |
| 1595 // Computes the rotations for the block (to make textlines horizontal) and | |
| 1596 // for the blobs (for classification) and sets the appropriate members | |
| 1597 // of the given block. | |
| 1598 // Returns the rotation that needs to be applied to the blobs to make | |
| 1599 // them sit in the rotated block. | |
| 1600 FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK *block) { | |
| 1601 // The text_rotation_ tells us the gross page text rotation that needs | |
| 1602 // to be applied for classification | |
| 1603 // TODO(rays) find block-level classify rotation by orientation detection. | |
| 1604 // In the mean time, assume that "up" for text printed in the minority | |
| 1605 // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading. | |
| 1606 // Accomplish this by zero-ing out the text rotation. This covers the | |
| 1607 // common cases of image credits in documents written in Latin scripts | |
| 1608 // and page headings for predominantly vertically written CJK books. | |
| 1609 FCOORD classify_rotation(text_rotation_); | |
| 1610 FCOORD block_rotation(1.0f, 0.0f); | |
| 1611 if (block->pdblk.poly_block()->isA() == PT_VERTICAL_TEXT) { | |
| 1612 // Vertical text needs to be 90 degrees rotated relative to the rest. | |
| 1613 // If the rest has a 90 degree rotation already, use the inverse, making | |
| 1614 // the vertical text the original way up. Otherwise use 90 degrees | |
| 1615 // clockwise. | |
| 1616 if (rerotate_.x() == 0.0f) { | |
| 1617 block_rotation = rerotate_; | |
| 1618 } else { | |
| 1619 block_rotation = FCOORD(0.0f, -1.0f); | |
| 1620 } | |
| 1621 block->rotate(block_rotation); | |
| 1622 classify_rotation = FCOORD(1.0f, 0.0f); | |
| 1623 } | |
| 1624 block_rotation.rotate(rotation_); | |
| 1625 // block_rotation is now what we have done to the blocks. Now do the same | |
| 1626 // thing to the blobs, but save the inverse rotation in the block, as that | |
| 1627 // is what we need to DENORM back to the image coordinates. | |
| 1628 FCOORD blob_rotation(block_rotation); | |
| 1629 block_rotation.set_y(-block_rotation.y()); | |
| 1630 block->set_re_rotation(block_rotation); | |
| 1631 block->set_classify_rotation(classify_rotation); | |
| 1632 if (textord_debug_tabfind) { | |
| 1633 tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:", block->pdblk.index(), | |
| 1634 block->pdblk.poly_block()->isA(), block->re_rotation().x(), block->re_rotation().y(), | |
| 1635 classify_rotation.x(), classify_rotation.y()); | |
| 1636 block->pdblk.bounding_box().print(); | |
| 1637 } | |
| 1638 return blob_rotation; | |
| 1639 } | |
| 1640 | |
| 1641 } // namespace tesseract. |
