Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/baselinedetect.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: baselinedetect.cpp | |
| 3 // Description: Initial Baseline Determination. | |
| 4 // Copyright 2012 Google Inc. All Rights Reserved. | |
| 5 // Author: rays@google.com (Ray Smith) | |
| 6 // | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #define _USE_MATH_DEFINES // for M_PI | |
| 20 | |
| 21 #ifdef HAVE_CONFIG_H | |
| 22 # include "config_auto.h" | |
| 23 #endif | |
| 24 | |
| 25 #include "baselinedetect.h" | |
| 26 | |
| 27 #include <allheaders.h> | |
| 28 #include <algorithm> | |
| 29 #include <cfloat> // for FLT_MAX | |
| 30 #include <cmath> // for M_PI | |
| 31 #include "blobbox.h" | |
| 32 #include "detlinefit.h" | |
| 33 #include "drawtord.h" | |
| 34 #include "helpers.h" | |
| 35 #include "linlsq.h" | |
| 36 #include "makerow.h" | |
| 37 #include "tesserrstream.h" // for tesserr | |
| 38 #include "textord.h" | |
| 39 #include "tprintf.h" | |
| 40 #include "underlin.h" | |
| 41 | |
| 42 // Number of displacement modes kept in displacement_modes_; | |
| 43 const int kMaxDisplacementsModes = 3; | |
| 44 // Number of points to skip when retrying initial fit. | |
| 45 const int kNumSkipPoints = 3; | |
| 46 // Max angle deviation (in radians) allowed to keep the independent baseline. | |
| 47 const double kMaxSkewDeviation = 1.0 / 64; | |
| 48 // Fraction of line spacing estimate for quantization of blob displacements. | |
| 49 const double kOffsetQuantizationFactor = 3.0 / 64; | |
| 50 // Fraction of line spacing estimate for computing blob fit error. | |
| 51 const double kFitHalfrangeFactor = 6.0 / 64; | |
| 52 // Max fraction of line spacing allowed before a baseline counts as badly | |
| 53 // fitting. | |
| 54 const double kMaxBaselineError = 3.0 / 64; | |
| 55 // Multiple of linespacing that sets max_blob_size in TO_BLOCK. | |
| 56 // Copied from textord_excess_blobsize. | |
| 57 const double kMaxBlobSizeMultiple = 1.3; | |
| 58 // Min fraction of linespacing gaps that should be close to the model before | |
| 59 // we will force the linespacing model on all the lines. | |
| 60 const double kMinFittingLinespacings = 0.25; | |
| 61 // A y-coordinate within a textline that is to be debugged. | |
| 62 //#define kDebugYCoord 1525 | |
| 63 | |
| 64 namespace tesseract { | |
| 65 | |
| 66 BaselineRow::BaselineRow(double line_spacing, TO_ROW *to_row) | |
| 67 : blobs_(to_row->blob_list()), | |
| 68 baseline_pt1_(0.0f, 0.0f), | |
| 69 baseline_pt2_(0.0f, 0.0f), | |
| 70 baseline_error_(0.0), | |
| 71 good_baseline_(false) { | |
| 72 ComputeBoundingBox(); | |
| 73 // Compute a scale factor for rounding to ints. | |
| 74 disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing; | |
| 75 fit_halfrange_ = kFitHalfrangeFactor * line_spacing; | |
| 76 max_baseline_error_ = kMaxBaselineError * line_spacing; | |
| 77 } | |
| 78 | |
| 79 // Sets the TO_ROW with the output straight line. | |
| 80 void BaselineRow::SetupOldLineParameters(TO_ROW *row) const { | |
| 81 // TODO(rays) get rid of this when m and c are no longer used. | |
| 82 double gradient = tan(BaselineAngle()); | |
| 83 // para_c is the actual intercept of the baseline on the y-axis. | |
| 84 float para_c = StraightYAtX(0.0); | |
| 85 row->set_line(gradient, para_c, baseline_error_); | |
| 86 row->set_parallel_line(gradient, para_c, baseline_error_); | |
| 87 } | |
| 88 | |
| 89 // Outputs diagnostic information. | |
| 90 void BaselineRow::Print() const { | |
| 91 tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", | |
| 92 baseline_pt1_.x(), baseline_pt1_.y(), baseline_pt2_.x(), | |
| 93 baseline_pt2_.y(), BaselineAngle(), StraightYAtX(0.0)); | |
| 94 tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_, | |
| 95 baseline_error_, good_baseline_); | |
| 96 bounding_box_.print(); | |
| 97 } | |
| 98 | |
| 99 // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. | |
| 100 double BaselineRow::BaselineAngle() const { | |
| 101 FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_); | |
| 102 double angle = baseline_dir.angle(); | |
| 103 // Baseline directions are only unique in a range of pi so constrain to | |
| 104 // [-pi/2, pi/2]. | |
| 105 return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5; | |
| 106 } | |
| 107 | |
| 108 // Computes and returns the linespacing at the middle of the overlap | |
| 109 // between this and other. | |
| 110 double BaselineRow::SpaceBetween(const BaselineRow &other) const { | |
| 111 // Find the x-centre of overlap of the lines. | |
| 112 float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) + | |
| 113 std::min(bounding_box_.right(), other.bounding_box_.right())) / | |
| 114 2.0f; | |
| 115 // Find the vertical centre between them. | |
| 116 float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f; | |
| 117 // Find the perpendicular distance of (x,y) from each line. | |
| 118 FCOORD pt(x, y); | |
| 119 return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt); | |
| 120 } | |
| 121 | |
| 122 // Computes and returns the displacement of the center of the line | |
| 123 // perpendicular to the given direction. | |
| 124 double BaselineRow::PerpDisp(const FCOORD &direction) const { | |
| 125 float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f; | |
| 126 FCOORD middle_pos(middle_x, StraightYAtX(middle_x)); | |
| 127 return direction * middle_pos / direction.length(); | |
| 128 } | |
| 129 | |
| 130 // Computes the y coordinate at the given x using the straight baseline | |
| 131 // defined by baseline_pt1_ and baseline_pt2__. | |
| 132 double BaselineRow::StraightYAtX(double x) const { | |
| 133 double denominator = baseline_pt2_.x() - baseline_pt1_.x(); | |
| 134 if (denominator == 0.0) { | |
| 135 return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0; | |
| 136 } | |
| 137 return baseline_pt1_.y() + (x - baseline_pt1_.x()) * | |
| 138 (baseline_pt2_.y() - baseline_pt1_.y()) / | |
| 139 denominator; | |
| 140 } | |
| 141 | |
| 142 // Fits a straight baseline to the points. Returns true if it had enough | |
| 143 // points to be reasonably sure of the fitted baseline. | |
| 144 // If use_box_bottoms is false, baselines positions are formed by | |
| 145 // considering the outlines of the blobs. | |
| 146 bool BaselineRow::FitBaseline(bool use_box_bottoms) { | |
| 147 // Deterministic fitting is used wherever possible. | |
| 148 fitter_.Clear(); | |
| 149 // Linear least squares is a backup if the DetLineFit produces a bad line. | |
| 150 LLSQ llsq; | |
| 151 BLOBNBOX_IT blob_it(blobs_); | |
| 152 | |
| 153 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { | |
| 154 BLOBNBOX *blob = blob_it.data(); | |
| 155 if (!use_box_bottoms) { | |
| 156 blob->EstimateBaselinePosition(); | |
| 157 } | |
| 158 const TBOX &box = blob->bounding_box(); | |
| 159 int x_middle = (box.left() + box.right()) / 2; | |
| 160 #ifdef kDebugYCoord | |
| 161 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) { | |
| 162 tprintf("Box bottom = %d, baseline pos=%d for box at:", box.bottom(), | |
| 163 blob->baseline_position()); | |
| 164 box.print(); | |
| 165 } | |
| 166 #endif | |
| 167 fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2); | |
| 168 llsq.add(x_middle, blob->baseline_position()); | |
| 169 } | |
| 170 // Fit the line. | |
| 171 ICOORD pt1, pt2; | |
| 172 baseline_error_ = fitter_.Fit(&pt1, &pt2); | |
| 173 baseline_pt1_ = pt1; | |
| 174 baseline_pt2_ = pt2; | |
| 175 if (baseline_error_ > max_baseline_error_ && | |
| 176 fitter_.SufficientPointsForIndependentFit()) { | |
| 177 // The fit was bad but there were plenty of points, so try skipping | |
| 178 // the first and last few, and use the new line if it dramatically improves | |
| 179 // the error of fit. | |
| 180 double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2); | |
| 181 if (error < baseline_error_ / 2.0) { | |
| 182 baseline_error_ = error; | |
| 183 baseline_pt1_ = pt1; | |
| 184 baseline_pt2_ = pt2; | |
| 185 } | |
| 186 } | |
| 187 int debug = 0; | |
| 188 #ifdef kDebugYCoord | |
| 189 Print(); | |
| 190 debug = bounding_box_.bottom() < kDebugYCoord && | |
| 191 bounding_box_.top() > kDebugYCoord | |
| 192 ? 3 | |
| 193 : 2; | |
| 194 #endif | |
| 195 // Now we obtained a direction from that fit, see if we can improve the | |
| 196 // fit using the same direction and some other start point. | |
| 197 FCOORD direction(pt2 - pt1); | |
| 198 double target_offset = direction * pt1; | |
| 199 good_baseline_ = false; | |
| 200 FitConstrainedIfBetter(debug, direction, 0.0, target_offset); | |
| 201 // Wild lines can be produced because DetLineFit allows vertical lines, but | |
| 202 // vertical text has been rotated so angles over pi/4 should be disallowed. | |
| 203 // Near vertical lines can still be produced by vertically aligned components | |
| 204 // on very short lines. | |
| 205 double angle = BaselineAngle(); | |
| 206 if (fabs(angle) > M_PI * 0.25) { | |
| 207 // Use the llsq fit as a backup. | |
| 208 baseline_pt1_ = llsq.mean_point(); | |
| 209 baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m()); | |
| 210 // TODO(rays) get rid of this when m and c are no longer used. | |
| 211 double m = llsq.m(); | |
| 212 double c = llsq.c(m); | |
| 213 baseline_error_ = llsq.rms(m, c); | |
| 214 good_baseline_ = false; | |
| 215 } | |
| 216 return good_baseline_; | |
| 217 } | |
| 218 | |
| 219 // Modifies an existing result of FitBaseline to be parallel to the given | |
| 220 // direction vector if that produces a better result. | |
| 221 void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) { | |
| 222 SetupBlobDisplacements(direction); | |
| 223 if (displacement_modes_.empty()) { | |
| 224 return; | |
| 225 } | |
| 226 #ifdef kDebugYCoord | |
| 227 if (bounding_box_.bottom() < kDebugYCoord && | |
| 228 bounding_box_.top() > kDebugYCoord && debug < 3) | |
| 229 debug = 3; | |
| 230 #endif | |
| 231 FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]); | |
| 232 } | |
| 233 | |
| 234 // Modifies the baseline to snap to the textline grid if the existing | |
| 235 // result is not good enough. | |
| 236 double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, | |
| 237 double line_spacing, | |
| 238 double line_offset) { | |
| 239 if (blobs_->empty()) { | |
| 240 if (debug > 1) { | |
| 241 tprintf("Row empty at:"); | |
| 242 bounding_box_.print(); | |
| 243 } | |
| 244 return line_offset; | |
| 245 } | |
| 246 // Find the displacement_modes_ entry nearest to the grid. | |
| 247 double best_error = 0.0; | |
| 248 int best_index = -1; | |
| 249 for (unsigned i = 0; i < displacement_modes_.size(); ++i) { | |
| 250 double blob_y = displacement_modes_[i]; | |
| 251 double error = | |
| 252 BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset); | |
| 253 if (debug > 1) { | |
| 254 tprintf("Mode at %g has error %g from model \n", blob_y, error); | |
| 255 } | |
| 256 if (best_index < 0 || error < best_error) { | |
| 257 best_error = error; | |
| 258 best_index = i; | |
| 259 } | |
| 260 } | |
| 261 // We will move the baseline only if the chosen mode is close enough to the | |
| 262 // model. | |
| 263 double model_margin = max_baseline_error_ - best_error; | |
| 264 if (best_index >= 0 && model_margin > 0.0) { | |
| 265 // But if the current baseline is already close to the mode there is no | |
| 266 // point, and only the potential to damage accuracy by changing its angle. | |
| 267 double perp_disp = PerpDisp(direction); | |
| 268 double shift = displacement_modes_[best_index] - perp_disp; | |
| 269 if (fabs(shift) > max_baseline_error_) { | |
| 270 if (debug > 1) { | |
| 271 tprintf("Attempting linespacing model fit with mode %g to row at:", | |
| 272 displacement_modes_[best_index]); | |
| 273 bounding_box_.print(); | |
| 274 } | |
| 275 FitConstrainedIfBetter(debug, direction, model_margin, | |
| 276 displacement_modes_[best_index]); | |
| 277 } else if (debug > 1) { | |
| 278 tprintf("Linespacing model only moves current line by %g for row at:", | |
| 279 shift); | |
| 280 bounding_box_.print(); | |
| 281 } | |
| 282 } else if (debug > 1) { | |
| 283 tprintf("Linespacing model not close enough to any mode for row at:"); | |
| 284 bounding_box_.print(); | |
| 285 } | |
| 286 return fmod(PerpDisp(direction), line_spacing); | |
| 287 } | |
| 288 | |
| 289 // Sets up displacement_modes_ with the top few modes of the perpendicular | |
| 290 // distance of each blob from the given direction vector, after rounding. | |
| 291 void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) { | |
| 292 // Set of perpendicular displacements of the blob bottoms from the required | |
| 293 // baseline direction. | |
| 294 std::vector<double> perp_blob_dists; | |
| 295 displacement_modes_.clear(); | |
| 296 // Gather the skew-corrected position of every blob. | |
| 297 double min_dist = FLT_MAX; | |
| 298 double max_dist = -FLT_MAX; | |
| 299 BLOBNBOX_IT blob_it(blobs_); | |
| 300 #ifdef kDebugYCoord | |
| 301 bool debug = false; | |
| 302 #endif | |
| 303 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { | |
| 304 BLOBNBOX *blob = blob_it.data(); | |
| 305 const TBOX &box = blob->bounding_box(); | |
| 306 #ifdef kDebugYCoord | |
| 307 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) | |
| 308 debug = true; | |
| 309 #endif | |
| 310 FCOORD blob_pos((box.left() + box.right()) / 2.0f, | |
| 311 blob->baseline_position()); | |
| 312 double offset = direction * blob_pos; | |
| 313 perp_blob_dists.push_back(offset); | |
| 314 #ifdef kDebugYCoord | |
| 315 if (debug) { | |
| 316 tprintf("Displacement %g for blob at:", offset); | |
| 317 box.print(); | |
| 318 } | |
| 319 #endif | |
| 320 UpdateRange(offset, &min_dist, &max_dist); | |
| 321 } | |
| 322 // Set up a histogram using disp_quant_factor_ as the bucket size. | |
| 323 STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_), | |
| 324 IntCastRounded(max_dist / disp_quant_factor_)); | |
| 325 for (double perp_blob_dist : perp_blob_dists) { | |
| 326 dist_stats.add(IntCastRounded(perp_blob_dist / disp_quant_factor_), 1); | |
| 327 } | |
| 328 std::vector<KDPairInc<float, int>> scaled_modes; | |
| 329 dist_stats.top_n_modes(kMaxDisplacementsModes, scaled_modes); | |
| 330 #ifdef kDebugYCoord | |
| 331 if (debug) { | |
| 332 for (int i = 0; i < scaled_modes.size(); ++i) { | |
| 333 tprintf("Top mode = %g * %d\n", scaled_modes[i].key * disp_quant_factor_, | |
| 334 scaled_modes[i].data()); | |
| 335 } | |
| 336 } | |
| 337 #endif | |
| 338 for (auto &scaled_mode : scaled_modes) { | |
| 339 displacement_modes_.push_back(disp_quant_factor_ * scaled_mode.key()); | |
| 340 } | |
| 341 } | |
| 342 | |
| 343 // Fits a line in the given direction to blobs that are close to the given | |
| 344 // target_offset perpendicular displacement from the direction. The fit | |
| 345 // error is allowed to be cheat_allowance worse than the existing fit, and | |
| 346 // will still be used. | |
| 347 // If cheat_allowance > 0, the new fit will be good and replace the current | |
| 348 // fit if it has better fit (with cheat) OR its error is below | |
| 349 // max_baseline_error_ and the old fit is marked bad. | |
| 350 // Otherwise the new fit will only replace the old if it is really better, | |
| 351 // or the old fit is marked bad and the new fit has sufficient points, as | |
| 352 // well as being within the max_baseline_error_. | |
| 353 void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction, | |
| 354 double cheat_allowance, | |
| 355 double target_offset) { | |
| 356 double halfrange = fit_halfrange_ * direction.length(); | |
| 357 double min_dist = target_offset - halfrange; | |
| 358 double max_dist = target_offset + halfrange; | |
| 359 ICOORD line_pt; | |
| 360 double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist, | |
| 361 debug > 2, &line_pt); | |
| 362 // Allow cheat_allowance off the new error | |
| 363 new_error -= cheat_allowance; | |
| 364 double old_angle = BaselineAngle(); | |
| 365 double new_angle = direction.angle(); | |
| 366 if (debug > 1) { | |
| 367 tprintf("Constrained error = %g, original = %g", new_error, | |
| 368 baseline_error_); | |
| 369 tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle, | |
| 370 new_angle, new_angle - old_angle, kMaxSkewDeviation); | |
| 371 } | |
| 372 bool new_good_baseline = | |
| 373 new_error <= max_baseline_error_ && | |
| 374 (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit()); | |
| 375 // The new will replace the old if any are true: | |
| 376 // 1. the new error is better | |
| 377 // 2. the old is NOT good, but the new is | |
| 378 // 3. there is a wild angular difference between them (assuming that the new | |
| 379 // is a better guess at the angle.) | |
| 380 if (new_error <= baseline_error_ || (!good_baseline_ && new_good_baseline) || | |
| 381 fabs(new_angle - old_angle) > kMaxSkewDeviation) { | |
| 382 baseline_error_ = new_error; | |
| 383 baseline_pt1_ = line_pt; | |
| 384 baseline_pt2_ = baseline_pt1_ + direction; | |
| 385 good_baseline_ = new_good_baseline; | |
| 386 if (debug > 1) { | |
| 387 tprintf("Replacing with constrained baseline, good = %d\n", | |
| 388 good_baseline_); | |
| 389 } | |
| 390 } else if (debug > 1) { | |
| 391 tprintf("Keeping old baseline\n"); | |
| 392 } | |
| 393 } | |
| 394 | |
| 395 // Returns the perpendicular distance of the point from the straight | |
| 396 // baseline. | |
| 397 float BaselineRow::PerpDistanceFromBaseline(const FCOORD &pt) const { | |
| 398 FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_); | |
| 399 FCOORD offset_vector(pt - baseline_pt1_); | |
| 400 float distance = baseline_vector * offset_vector; | |
| 401 float sqlength = baseline_vector.sqlength(); | |
| 402 if (sqlength == 0.0f) { | |
| 403 tprintf("unexpected baseline vector (0,0)\n"); | |
| 404 return 0.0f; | |
| 405 } | |
| 406 return std::sqrt(distance * distance / sqlength); | |
| 407 } | |
| 408 | |
| 409 // Computes the bounding box of the row. | |
| 410 void BaselineRow::ComputeBoundingBox() { | |
| 411 BLOBNBOX_IT it(blobs_); | |
| 412 TBOX box; | |
| 413 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 414 box += it.data()->bounding_box(); | |
| 415 } | |
| 416 bounding_box_ = box; | |
| 417 } | |
| 418 | |
| 419 BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block) | |
| 420 : block_(block), | |
| 421 debug_level_(debug_level), | |
| 422 non_text_block_(non_text), | |
| 423 good_skew_angle_(false), | |
| 424 skew_angle_(0.0), | |
| 425 line_spacing_(block->line_spacing), | |
| 426 line_offset_(0.0), | |
| 427 model_error_(0.0) { | |
| 428 TO_ROW_IT row_it(block_->get_rows()); | |
| 429 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { | |
| 430 // Sort the blobs on the rows. | |
| 431 row_it.data()->blob_list()->sort(blob_x_order); | |
| 432 rows_.push_back(new BaselineRow(block->line_spacing, row_it.data())); | |
| 433 } | |
| 434 } | |
| 435 | |
| 436 // Computes and returns the absolute error of the given perp_disp from the | |
| 437 // given linespacing model. | |
| 438 double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing, | |
| 439 double line_offset) { | |
| 440 // Round to the nearest multiple of line_spacing + line offset. | |
| 441 int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing); | |
| 442 double model_y = line_spacing * multiple + line_offset; | |
| 443 return fabs(perp_disp - model_y); | |
| 444 } | |
| 445 | |
| 446 // Fits straight line baselines and computes the skew angle from the | |
| 447 // median angle. Returns true if a good angle is found. | |
| 448 // If use_box_bottoms is false, baseline positions are formed by | |
| 449 // considering the outlines of the blobs. | |
| 450 bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) { | |
| 451 if (non_text_block_) { | |
| 452 return false; | |
| 453 } | |
| 454 std::vector<double> angles; | |
| 455 for (auto row : rows_) { | |
| 456 if (row->FitBaseline(use_box_bottoms)) { | |
| 457 double angle = row->BaselineAngle(); | |
| 458 angles.push_back(angle); | |
| 459 } | |
| 460 if (debug_level_ > 1) { | |
| 461 row->Print(); | |
| 462 } | |
| 463 } | |
| 464 | |
| 465 if (!angles.empty()) { | |
| 466 skew_angle_ = MedianOfCircularValues(M_PI, angles); | |
| 467 good_skew_angle_ = true; | |
| 468 } else { | |
| 469 skew_angle_ = 0.0f; | |
| 470 good_skew_angle_ = false; | |
| 471 } | |
| 472 if (debug_level_ > 0) { | |
| 473 tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_, | |
| 474 good_skew_angle_); | |
| 475 } | |
| 476 return good_skew_angle_; | |
| 477 } | |
| 478 | |
| 479 // Refits the baseline to a constrained angle, using the stored block | |
| 480 // skew if good enough, otherwise the supplied default skew. | |
| 481 void BaselineBlock::ParallelizeBaselines(double default_block_skew) { | |
| 482 if (non_text_block_) { | |
| 483 return; | |
| 484 } | |
| 485 if (!good_skew_angle_) { | |
| 486 skew_angle_ = default_block_skew; | |
| 487 } | |
| 488 if (debug_level_ > 0) { | |
| 489 tprintf("Adjusting block to skew angle %g\n", skew_angle_); | |
| 490 } | |
| 491 FCOORD direction(cos(skew_angle_), sin(skew_angle_)); | |
| 492 for (auto row : rows_) { | |
| 493 row->AdjustBaselineToParallel(debug_level_, direction); | |
| 494 if (debug_level_ > 1) { | |
| 495 row->Print(); | |
| 496 } | |
| 497 } | |
| 498 if (rows_.size() < 3 || !ComputeLineSpacing()) { | |
| 499 return; | |
| 500 } | |
| 501 // Enforce the line spacing model on all lines that don't yet have a good | |
| 502 // baseline. | |
| 503 // Start by finding the row that is best fitted to the model. | |
| 504 unsigned best_row = 0; | |
| 505 double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), | |
| 506 line_spacing_, line_offset_); | |
| 507 for (unsigned r = 1; r < rows_.size(); ++r) { | |
| 508 double error = SpacingModelError(rows_[r]->PerpDisp(direction), | |
| 509 line_spacing_, line_offset_); | |
| 510 if (error < best_error) { | |
| 511 best_error = error; | |
| 512 best_row = r; | |
| 513 } | |
| 514 } | |
| 515 // Starting at the best fitting row, work outwards, syncing the offset. | |
| 516 double offset = line_offset_; | |
| 517 for (auto r = best_row + 1; r < rows_.size(); ++r) { | |
| 518 offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, | |
| 519 line_spacing_, offset); | |
| 520 } | |
| 521 offset = line_offset_; | |
| 522 for (int r = best_row - 1; r >= 0; --r) { | |
| 523 offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, | |
| 524 line_spacing_, offset); | |
| 525 } | |
| 526 } | |
| 527 | |
| 528 // Sets the parameters in TO_BLOCK that are needed by subsequent processes. | |
| 529 void BaselineBlock::SetupBlockParameters() const { | |
| 530 if (line_spacing_ > 0.0) { | |
| 531 // Where was block_line_spacing set before? | |
| 532 float min_spacing = | |
| 533 std::min(block_->line_spacing, static_cast<float>(line_spacing_)); | |
| 534 if (min_spacing < block_->line_size) { | |
| 535 block_->line_size = min_spacing; | |
| 536 } | |
| 537 block_->line_spacing = line_spacing_; | |
| 538 block_->baseline_offset = line_offset_; | |
| 539 block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple; | |
| 540 } | |
| 541 // Setup the parameters on all the rows. | |
| 542 TO_ROW_IT row_it(block_->get_rows()); | |
| 543 for (unsigned r = 0; r < rows_.size(); ++r, row_it.forward()) { | |
| 544 BaselineRow *row = rows_[r]; | |
| 545 TO_ROW *to_row = row_it.data(); | |
| 546 row->SetupOldLineParameters(to_row); | |
| 547 } | |
| 548 } | |
| 549 | |
| 550 // Processing that is required before fitting baseline splines, but requires | |
| 551 // linear baselines in order to be successful: | |
| 552 // Removes noise if required | |
| 553 // Separates out underlines | |
| 554 // Pre-associates blob fragments. | |
| 555 // TODO(rays/joeliu) This entire section of code is inherited from the past | |
| 556 // and could be improved/eliminated. | |
| 557 // page_tr is used to size a debug window. | |
| 558 void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) { | |
| 559 if (non_text_block_) { | |
| 560 return; | |
| 561 } | |
| 562 if (remove_noise) { | |
| 563 vigorous_noise_removal(block_); | |
| 564 } | |
| 565 FCOORD rotation(1.0f, 0.0f); | |
| 566 double gradient = tan(skew_angle_); | |
| 567 separate_underlines(block_, gradient, rotation, true); | |
| 568 pre_associate_blobs(page_tr, block_, rotation, true); | |
| 569 } | |
| 570 | |
| 571 // Fits splines to the textlines, or creates fake QSPLINES from the straight | |
| 572 // baselines that are already on the TO_ROWs. | |
| 573 // As a side-effect, computes the xheights of the rows and the block. | |
| 574 // Although x-height estimation is conceptually separate, it is part of | |
| 575 // detecting perspective distortion and therefore baseline fitting. | |
| 576 void BaselineBlock::FitBaselineSplines(bool enable_splines, | |
| 577 bool show_final_rows, Textord *textord) { | |
| 578 double gradient = tan(skew_angle_); | |
| 579 FCOORD rotation(1.0f, 0.0f); | |
| 580 | |
| 581 if (enable_splines) { | |
| 582 textord->make_spline_rows(block_, gradient, show_final_rows); | |
| 583 } else { | |
| 584 // Make a fake spline from the existing line. | |
| 585 TBOX block_box = block_->block->pdblk.bounding_box(); | |
| 586 TO_ROW_IT row_it = block_->get_rows(); | |
| 587 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { | |
| 588 TO_ROW *row = row_it.data(); | |
| 589 int32_t xstarts[2] = {block_box.left(), block_box.right()}; | |
| 590 double coeffs[3] = {0.0, row->line_m(), row->line_c()}; | |
| 591 row->baseline = QSPLINE(1, xstarts, coeffs); | |
| 592 textord->compute_row_xheight(row, block_->block->classify_rotation(), | |
| 593 row->line_m(), block_->line_size); | |
| 594 } | |
| 595 } | |
| 596 textord->compute_block_xheight(block_, gradient); | |
| 597 block_->block->set_xheight(block_->xheight); | |
| 598 if (textord_restore_underlines) { // fix underlines | |
| 599 restore_underlined_blobs(block_); | |
| 600 } | |
| 601 } | |
| 602 | |
| 603 #ifndef GRAPHICS_DISABLED | |
| 604 | |
| 605 // Draws the (straight) baselines and final blobs colored according to | |
| 606 // what was discarded as noise and what is associated with each row. | |
| 607 void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) { | |
| 608 if (non_text_block_) { | |
| 609 return; | |
| 610 } | |
| 611 double gradient = tan(skew_angle_); | |
| 612 FCOORD rotation(1.0f, 0.0f); | |
| 613 int left_edge = block_->block->pdblk.bounding_box().left(); | |
| 614 ScrollView *win = create_to_win(page_tr); | |
| 615 ScrollView::Color colour = ScrollView::RED; | |
| 616 TO_ROW_IT row_it = block_->get_rows(); | |
| 617 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { | |
| 618 plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation); | |
| 619 colour = static_cast<ScrollView::Color>(colour + 1); | |
| 620 if (colour > ScrollView::MAGENTA) { | |
| 621 colour = ScrollView::RED; | |
| 622 } | |
| 623 } | |
| 624 plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE); | |
| 625 // Show discarded blobs. | |
| 626 plot_blob_list(win, &block_->underlines, ScrollView::YELLOW, | |
| 627 ScrollView::CORAL); | |
| 628 if (block_->blobs.length() > 0) { | |
| 629 tprintf("%d blobs discarded as noise\n", block_->blobs.length()); | |
| 630 } | |
| 631 draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation); | |
| 632 } | |
| 633 | |
| 634 #endif // !GRAPHICS_DISABLED | |
| 635 | |
| 636 void BaselineBlock::DrawPixSpline(Image pix_in) { | |
| 637 if (non_text_block_) { | |
| 638 return; | |
| 639 } | |
| 640 TO_ROW_IT row_it = block_->get_rows(); | |
| 641 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { | |
| 642 row_it.data()->baseline.plot(pix_in); | |
| 643 } | |
| 644 } | |
| 645 | |
| 646 // Top-level line-spacing calculation. Computes an estimate of the line- | |
| 647 // spacing, using the current baselines in the TO_ROWS of the block, and | |
| 648 // then refines it by fitting a regression line to the baseline positions | |
| 649 // as a function of their integer index. | |
| 650 // Returns true if it seems that the model is a reasonable fit to the | |
| 651 // observations. | |
| 652 bool BaselineBlock::ComputeLineSpacing() { | |
| 653 FCOORD direction(cos(skew_angle_), sin(skew_angle_)); | |
| 654 std::vector<double> row_positions; | |
| 655 ComputeBaselinePositions(direction, &row_positions); | |
| 656 if (row_positions.size() < 2) { | |
| 657 return false; | |
| 658 } | |
| 659 EstimateLineSpacing(); | |
| 660 RefineLineSpacing(row_positions); | |
| 661 // Verify that the model is reasonable. | |
| 662 double max_baseline_error = kMaxBaselineError * line_spacing_; | |
| 663 int non_trivial_gaps = 0; | |
| 664 int fitting_gaps = 0; | |
| 665 for (unsigned i = 1; i < row_positions.size(); ++i) { | |
| 666 double row_gap = fabs(row_positions[i - 1] - row_positions[i]); | |
| 667 if (row_gap > max_baseline_error) { | |
| 668 ++non_trivial_gaps; | |
| 669 if (fabs(row_gap - line_spacing_) <= max_baseline_error) { | |
| 670 ++fitting_gaps; | |
| 671 } | |
| 672 } | |
| 673 } | |
| 674 if (debug_level_ > 0) { | |
| 675 tesserr << "Spacing " << line_spacing_ << ", in " | |
| 676 << row_positions.size() << " rows, " | |
| 677 << fitting_gaps << " gaps fitted out of " | |
| 678 << non_trivial_gaps << " non-trivial\n"; | |
| 679 } | |
| 680 return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings; | |
| 681 } | |
| 682 | |
| 683 // Computes the deskewed vertical position of each baseline in the block and | |
| 684 // stores them in the given vector. | |
| 685 // This is calculated as the perpendicular distance of the middle of each | |
| 686 // baseline (in case it has a different skew angle) from the line passing | |
| 687 // through the origin parallel to the block baseline angle. | |
| 688 // NOTE that "distance" above is a signed quantity so we can tell which side | |
| 689 // of the block baseline a line sits, hence the function and argument name | |
| 690 // positions not distances. | |
| 691 void BaselineBlock::ComputeBaselinePositions(const FCOORD &direction, | |
| 692 std::vector<double> *positions) { | |
| 693 positions->clear(); | |
| 694 for (auto row : rows_) { | |
| 695 const TBOX &row_box = row->bounding_box(); | |
| 696 float x_middle = (row_box.left() + row_box.right()) / 2.0f; | |
| 697 FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle))); | |
| 698 float offset = direction * row_pos; | |
| 699 positions->push_back(offset); | |
| 700 } | |
| 701 } | |
| 702 | |
| 703 // Computes an estimate of the line spacing of the block from the median | |
| 704 // of the spacings between adjacent overlapping textlines. | |
| 705 void BaselineBlock::EstimateLineSpacing() { | |
| 706 std::vector<float> spacings; | |
| 707 for (unsigned r = 0; r < rows_.size(); ++r) { | |
| 708 BaselineRow *row = rows_[r]; | |
| 709 // Exclude silly lines. | |
| 710 if (fabs(row->BaselineAngle()) > M_PI * 0.25) { | |
| 711 continue; | |
| 712 } | |
| 713 // Find the first row after row that overlaps it significantly. | |
| 714 const TBOX &row_box = row->bounding_box(); | |
| 715 unsigned r2; | |
| 716 for (r2 = r + 1; r2 < rows_.size() && | |
| 717 !row_box.major_x_overlap(rows_[r2]->bounding_box()); | |
| 718 ++r2) { | |
| 719 ; | |
| 720 } | |
| 721 if (r2 < rows_.size()) { | |
| 722 BaselineRow *row2 = rows_[r2]; | |
| 723 // Exclude silly lines. | |
| 724 if (fabs(row2->BaselineAngle()) > M_PI * 0.25) { | |
| 725 continue; | |
| 726 } | |
| 727 float spacing = row->SpaceBetween(*row2); | |
| 728 spacings.push_back(spacing); | |
| 729 } | |
| 730 } | |
| 731 // If we have at least one value, use it, otherwise leave the previous | |
| 732 // value unchanged. | |
| 733 if (!spacings.empty()) { | |
| 734 std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2, | |
| 735 spacings.end()); | |
| 736 line_spacing_ = spacings[spacings.size() / 2]; | |
| 737 if (debug_level_ > 1) { | |
| 738 tprintf("Estimate of linespacing = %g\n", line_spacing_); | |
| 739 } | |
| 740 } | |
| 741 } | |
| 742 | |
| 743 // Refines the line spacing of the block by fitting a regression | |
| 744 // line to the deskewed y-position of each baseline as a function of its | |
| 745 // estimated line index, allowing for a small error in the initial linespacing | |
| 746 // and choosing the best available model. | |
| 747 void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) { | |
| 748 double spacings[3], offsets[3], errors[3]; | |
| 749 int index_range; | |
| 750 errors[0] = FitLineSpacingModel(positions, line_spacing_, &spacings[0], | |
| 751 &offsets[0], &index_range); | |
| 752 if (index_range > 1) { | |
| 753 double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range); | |
| 754 // Try the hypotheses that there might be index_range +/- 1 line spaces. | |
| 755 errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1], | |
| 756 &offsets[1], nullptr); | |
| 757 double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range); | |
| 758 errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2], | |
| 759 &offsets[2], nullptr); | |
| 760 for (int i = 1; i <= 2; ++i) { | |
| 761 if (errors[i] < errors[0]) { | |
| 762 spacings[0] = spacings[i]; | |
| 763 offsets[0] = offsets[i]; | |
| 764 errors[0] = errors[i]; | |
| 765 } | |
| 766 } | |
| 767 } | |
| 768 if (spacings[0] > 0.0) { | |
| 769 line_spacing_ = spacings[0]; | |
| 770 line_offset_ = offsets[0]; | |
| 771 model_error_ = errors[0]; | |
| 772 if (debug_level_ > 0) { | |
| 773 tprintf("Final linespacing model = %g + offset %g, error %g\n", | |
| 774 line_spacing_, line_offset_, model_error_); | |
| 775 } | |
| 776 } | |
| 777 } | |
| 778 | |
| 779 // Given an initial estimate of line spacing (m_in) and the positions of each | |
| 780 // baseline, computes the line spacing of the block more accurately in m_out, | |
| 781 // and the corresponding intercept in c_out, and the number of spacings seen | |
| 782 // in index_delta. Returns the error of fit to the line spacing model. | |
| 783 // Uses a simple linear regression, but optimized the offset using the median. | |
| 784 double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions, | |
| 785 double m_in, double *m_out, | |
| 786 double *c_out, int *index_delta) { | |
| 787 if (m_in == 0.0f || positions.size() < 2) { | |
| 788 *m_out = m_in; | |
| 789 *c_out = 0.0; | |
| 790 if (index_delta != nullptr) { | |
| 791 *index_delta = 0; | |
| 792 } | |
| 793 return 0.0; | |
| 794 } | |
| 795 std::vector<double> offsets; | |
| 796 // Get the offset (remainder) linespacing for each line and choose the median. | |
| 797 offsets.reserve(positions.size()); | |
| 798 for (double position : positions) { | |
| 799 offsets.push_back(fmod(position, m_in)); | |
| 800 } | |
| 801 // Get the median offset. | |
| 802 double median_offset = MedianOfCircularValues(m_in, offsets); | |
| 803 // Now fit a line to quantized line number and offset. | |
| 804 LLSQ llsq; | |
| 805 int min_index = INT32_MAX; | |
| 806 int max_index = -INT32_MAX; | |
| 807 for (double y_pos : positions) { | |
| 808 int row_index = IntCastRounded((y_pos - median_offset) / m_in); | |
| 809 UpdateRange(row_index, &min_index, &max_index); | |
| 810 llsq.add(row_index, y_pos); | |
| 811 } | |
| 812 // Get the refined line spacing. | |
| 813 *m_out = llsq.m(); | |
| 814 // Use the median offset rather than the mean. | |
| 815 offsets.clear(); | |
| 816 if (*m_out != 0.0) { | |
| 817 for (double position : positions) { | |
| 818 offsets.push_back(fmod(position, *m_out)); | |
| 819 } | |
| 820 // Get the median offset. | |
| 821 if (debug_level_ > 2) { | |
| 822 for (unsigned i = 0; i < offsets.size(); ++i) { | |
| 823 tprintf("%u: %g\n", i, offsets[i]); | |
| 824 } | |
| 825 } | |
| 826 *c_out = MedianOfCircularValues(*m_out, offsets); | |
| 827 } else { | |
| 828 *c_out = 0.0; | |
| 829 } | |
| 830 if (debug_level_ > 1) { | |
| 831 tprintf("Median offset = %g, compared to mean of %g.\n", *c_out, | |
| 832 llsq.c(*m_out)); | |
| 833 } | |
| 834 // Index_delta is the number of hypothesized line gaps present. | |
| 835 if (index_delta != nullptr) { | |
| 836 *index_delta = max_index - min_index; | |
| 837 } | |
| 838 // Use the regression model's intercept to compute the error, as it may be | |
| 839 // a full line-spacing in disagreement with the median. | |
| 840 double rms_error = llsq.rms(*m_out, llsq.c(*m_out)); | |
| 841 if (debug_level_ > 1) { | |
| 842 tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in, | |
| 843 median_offset, *m_out, *c_out, rms_error); | |
| 844 } | |
| 845 return rms_error; | |
| 846 } | |
| 847 | |
| 848 BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew, | |
| 849 TO_BLOCK_LIST *blocks) | |
| 850 : page_skew_(page_skew), debug_level_(debug_level) { | |
| 851 TO_BLOCK_IT it(blocks); | |
| 852 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | |
| 853 TO_BLOCK *to_block = it.data(); | |
| 854 BLOCK *block = to_block->block; | |
| 855 POLY_BLOCK *pb = block->pdblk.poly_block(); | |
| 856 // A note about non-text blocks. | |
| 857 // On output, non-text blocks are supposed to contain a single empty word | |
| 858 // in each incoming text line. These mark out the polygonal bounds of the | |
| 859 // block. Ideally no baselines should be required, but currently | |
| 860 // make_words crashes if a baseline and xheight are not provided, so we | |
| 861 // include non-text blocks here, but flag them for special treatment. | |
| 862 bool non_text = pb != nullptr && !pb->IsText(); | |
| 863 blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block)); | |
| 864 } | |
| 865 } | |
| 866 | |
| 867 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers | |
| 868 // block-wise and page-wise data to smooth small blocks/rows, and applies | |
| 869 // smoothing based on block/page-level skew and block-level linespacing. | |
| 870 void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) { | |
| 871 std::vector<double> block_skew_angles; | |
| 872 for (auto bl_block : blocks_) { | |
| 873 if (debug_level_ > 0) { | |
| 874 tprintf("Fitting initial baselines...\n"); | |
| 875 } | |
| 876 if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) { | |
| 877 block_skew_angles.push_back(bl_block->skew_angle()); | |
| 878 } | |
| 879 } | |
| 880 // Compute a page-wide default skew for blocks with too little information. | |
| 881 double default_block_skew = page_skew_.angle(); | |
| 882 if (!block_skew_angles.empty()) { | |
| 883 default_block_skew = MedianOfCircularValues(M_PI, block_skew_angles); | |
| 884 } | |
| 885 if (debug_level_ > 0) { | |
| 886 tprintf("Page skew angle = %g\n", default_block_skew); | |
| 887 } | |
| 888 // Set bad lines in each block to the default block skew and then force fit | |
| 889 // a linespacing model where it makes sense to do so. | |
| 890 for (auto bl_block : blocks_) { | |
| 891 bl_block->ParallelizeBaselines(default_block_skew); | |
| 892 bl_block->SetupBlockParameters(); // This replaced compute_row_stats. | |
| 893 } | |
| 894 } | |
| 895 | |
| 896 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and | |
| 897 // other associated side-effects, including pre-associating blobs, computing | |
| 898 // x-heights and displaying debug information. | |
| 899 // NOTE that ComputeStraightBaselines must have been called first as this | |
| 900 // sets up data in the TO_ROWs upon which this function depends. | |
| 901 void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, | |
| 902 bool enable_splines, | |
| 903 bool remove_noise, | |
| 904 bool show_final_rows, | |
| 905 Textord *textord) { | |
| 906 for (auto bl_block : blocks_) { | |
| 907 if (enable_splines) { | |
| 908 bl_block->PrepareForSplineFitting(page_tr, remove_noise); | |
| 909 } | |
| 910 bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord); | |
| 911 #ifndef GRAPHICS_DISABLED | |
| 912 if (show_final_rows) { | |
| 913 bl_block->DrawFinalRows(page_tr); | |
| 914 } | |
| 915 #endif | |
| 916 } | |
| 917 } | |
| 918 | |
| 919 } // namespace tesseract. |
