comparison mupdf-source/thirdparty/tesseract/src/textord/baselinedetect.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: baselinedetect.cpp
3 // Description: Initial Baseline Determination.
4 // Copyright 2012 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #define _USE_MATH_DEFINES // for M_PI
20
21 #ifdef HAVE_CONFIG_H
22 # include "config_auto.h"
23 #endif
24
25 #include "baselinedetect.h"
26
27 #include <allheaders.h>
28 #include <algorithm>
29 #include <cfloat> // for FLT_MAX
30 #include <cmath> // for M_PI
31 #include "blobbox.h"
32 #include "detlinefit.h"
33 #include "drawtord.h"
34 #include "helpers.h"
35 #include "linlsq.h"
36 #include "makerow.h"
37 #include "tesserrstream.h" // for tesserr
38 #include "textord.h"
39 #include "tprintf.h"
40 #include "underlin.h"
41
42 // Number of displacement modes kept in displacement_modes_;
43 const int kMaxDisplacementsModes = 3;
44 // Number of points to skip when retrying initial fit.
45 const int kNumSkipPoints = 3;
46 // Max angle deviation (in radians) allowed to keep the independent baseline.
47 const double kMaxSkewDeviation = 1.0 / 64;
48 // Fraction of line spacing estimate for quantization of blob displacements.
49 const double kOffsetQuantizationFactor = 3.0 / 64;
50 // Fraction of line spacing estimate for computing blob fit error.
51 const double kFitHalfrangeFactor = 6.0 / 64;
52 // Max fraction of line spacing allowed before a baseline counts as badly
53 // fitting.
54 const double kMaxBaselineError = 3.0 / 64;
55 // Multiple of linespacing that sets max_blob_size in TO_BLOCK.
56 // Copied from textord_excess_blobsize.
57 const double kMaxBlobSizeMultiple = 1.3;
58 // Min fraction of linespacing gaps that should be close to the model before
59 // we will force the linespacing model on all the lines.
60 const double kMinFittingLinespacings = 0.25;
61 // A y-coordinate within a textline that is to be debugged.
62 //#define kDebugYCoord 1525
63
64 namespace tesseract {
65
66 BaselineRow::BaselineRow(double line_spacing, TO_ROW *to_row)
67 : blobs_(to_row->blob_list()),
68 baseline_pt1_(0.0f, 0.0f),
69 baseline_pt2_(0.0f, 0.0f),
70 baseline_error_(0.0),
71 good_baseline_(false) {
72 ComputeBoundingBox();
73 // Compute a scale factor for rounding to ints.
74 disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
75 fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
76 max_baseline_error_ = kMaxBaselineError * line_spacing;
77 }
78
79 // Sets the TO_ROW with the output straight line.
80 void BaselineRow::SetupOldLineParameters(TO_ROW *row) const {
81 // TODO(rays) get rid of this when m and c are no longer used.
82 double gradient = tan(BaselineAngle());
83 // para_c is the actual intercept of the baseline on the y-axis.
84 float para_c = StraightYAtX(0.0);
85 row->set_line(gradient, para_c, baseline_error_);
86 row->set_parallel_line(gradient, para_c, baseline_error_);
87 }
88
89 // Outputs diagnostic information.
90 void BaselineRow::Print() const {
91 tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
92 baseline_pt1_.x(), baseline_pt1_.y(), baseline_pt2_.x(),
93 baseline_pt2_.y(), BaselineAngle(), StraightYAtX(0.0));
94 tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_,
95 baseline_error_, good_baseline_);
96 bounding_box_.print();
97 }
98
99 // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
100 double BaselineRow::BaselineAngle() const {
101 FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
102 double angle = baseline_dir.angle();
103 // Baseline directions are only unique in a range of pi so constrain to
104 // [-pi/2, pi/2].
105 return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
106 }
107
108 // Computes and returns the linespacing at the middle of the overlap
109 // between this and other.
110 double BaselineRow::SpaceBetween(const BaselineRow &other) const {
111 // Find the x-centre of overlap of the lines.
112 float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) +
113 std::min(bounding_box_.right(), other.bounding_box_.right())) /
114 2.0f;
115 // Find the vertical centre between them.
116 float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
117 // Find the perpendicular distance of (x,y) from each line.
118 FCOORD pt(x, y);
119 return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
120 }
121
122 // Computes and returns the displacement of the center of the line
123 // perpendicular to the given direction.
124 double BaselineRow::PerpDisp(const FCOORD &direction) const {
125 float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
126 FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
127 return direction * middle_pos / direction.length();
128 }
129
130 // Computes the y coordinate at the given x using the straight baseline
131 // defined by baseline_pt1_ and baseline_pt2__.
132 double BaselineRow::StraightYAtX(double x) const {
133 double denominator = baseline_pt2_.x() - baseline_pt1_.x();
134 if (denominator == 0.0) {
135 return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
136 }
137 return baseline_pt1_.y() + (x - baseline_pt1_.x()) *
138 (baseline_pt2_.y() - baseline_pt1_.y()) /
139 denominator;
140 }
141
142 // Fits a straight baseline to the points. Returns true if it had enough
143 // points to be reasonably sure of the fitted baseline.
144 // If use_box_bottoms is false, baselines positions are formed by
145 // considering the outlines of the blobs.
146 bool BaselineRow::FitBaseline(bool use_box_bottoms) {
147 // Deterministic fitting is used wherever possible.
148 fitter_.Clear();
149 // Linear least squares is a backup if the DetLineFit produces a bad line.
150 LLSQ llsq;
151 BLOBNBOX_IT blob_it(blobs_);
152
153 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
154 BLOBNBOX *blob = blob_it.data();
155 if (!use_box_bottoms) {
156 blob->EstimateBaselinePosition();
157 }
158 const TBOX &box = blob->bounding_box();
159 int x_middle = (box.left() + box.right()) / 2;
160 #ifdef kDebugYCoord
161 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
162 tprintf("Box bottom = %d, baseline pos=%d for box at:", box.bottom(),
163 blob->baseline_position());
164 box.print();
165 }
166 #endif
167 fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
168 llsq.add(x_middle, blob->baseline_position());
169 }
170 // Fit the line.
171 ICOORD pt1, pt2;
172 baseline_error_ = fitter_.Fit(&pt1, &pt2);
173 baseline_pt1_ = pt1;
174 baseline_pt2_ = pt2;
175 if (baseline_error_ > max_baseline_error_ &&
176 fitter_.SufficientPointsForIndependentFit()) {
177 // The fit was bad but there were plenty of points, so try skipping
178 // the first and last few, and use the new line if it dramatically improves
179 // the error of fit.
180 double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
181 if (error < baseline_error_ / 2.0) {
182 baseline_error_ = error;
183 baseline_pt1_ = pt1;
184 baseline_pt2_ = pt2;
185 }
186 }
187 int debug = 0;
188 #ifdef kDebugYCoord
189 Print();
190 debug = bounding_box_.bottom() < kDebugYCoord &&
191 bounding_box_.top() > kDebugYCoord
192 ? 3
193 : 2;
194 #endif
195 // Now we obtained a direction from that fit, see if we can improve the
196 // fit using the same direction and some other start point.
197 FCOORD direction(pt2 - pt1);
198 double target_offset = direction * pt1;
199 good_baseline_ = false;
200 FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
201 // Wild lines can be produced because DetLineFit allows vertical lines, but
202 // vertical text has been rotated so angles over pi/4 should be disallowed.
203 // Near vertical lines can still be produced by vertically aligned components
204 // on very short lines.
205 double angle = BaselineAngle();
206 if (fabs(angle) > M_PI * 0.25) {
207 // Use the llsq fit as a backup.
208 baseline_pt1_ = llsq.mean_point();
209 baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
210 // TODO(rays) get rid of this when m and c are no longer used.
211 double m = llsq.m();
212 double c = llsq.c(m);
213 baseline_error_ = llsq.rms(m, c);
214 good_baseline_ = false;
215 }
216 return good_baseline_;
217 }
218
219 // Modifies an existing result of FitBaseline to be parallel to the given
220 // direction vector if that produces a better result.
221 void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) {
222 SetupBlobDisplacements(direction);
223 if (displacement_modes_.empty()) {
224 return;
225 }
226 #ifdef kDebugYCoord
227 if (bounding_box_.bottom() < kDebugYCoord &&
228 bounding_box_.top() > kDebugYCoord && debug < 3)
229 debug = 3;
230 #endif
231 FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
232 }
233
234 // Modifies the baseline to snap to the textline grid if the existing
235 // result is not good enough.
236 double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction,
237 double line_spacing,
238 double line_offset) {
239 if (blobs_->empty()) {
240 if (debug > 1) {
241 tprintf("Row empty at:");
242 bounding_box_.print();
243 }
244 return line_offset;
245 }
246 // Find the displacement_modes_ entry nearest to the grid.
247 double best_error = 0.0;
248 int best_index = -1;
249 for (unsigned i = 0; i < displacement_modes_.size(); ++i) {
250 double blob_y = displacement_modes_[i];
251 double error =
252 BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset);
253 if (debug > 1) {
254 tprintf("Mode at %g has error %g from model \n", blob_y, error);
255 }
256 if (best_index < 0 || error < best_error) {
257 best_error = error;
258 best_index = i;
259 }
260 }
261 // We will move the baseline only if the chosen mode is close enough to the
262 // model.
263 double model_margin = max_baseline_error_ - best_error;
264 if (best_index >= 0 && model_margin > 0.0) {
265 // But if the current baseline is already close to the mode there is no
266 // point, and only the potential to damage accuracy by changing its angle.
267 double perp_disp = PerpDisp(direction);
268 double shift = displacement_modes_[best_index] - perp_disp;
269 if (fabs(shift) > max_baseline_error_) {
270 if (debug > 1) {
271 tprintf("Attempting linespacing model fit with mode %g to row at:",
272 displacement_modes_[best_index]);
273 bounding_box_.print();
274 }
275 FitConstrainedIfBetter(debug, direction, model_margin,
276 displacement_modes_[best_index]);
277 } else if (debug > 1) {
278 tprintf("Linespacing model only moves current line by %g for row at:",
279 shift);
280 bounding_box_.print();
281 }
282 } else if (debug > 1) {
283 tprintf("Linespacing model not close enough to any mode for row at:");
284 bounding_box_.print();
285 }
286 return fmod(PerpDisp(direction), line_spacing);
287 }
288
289 // Sets up displacement_modes_ with the top few modes of the perpendicular
290 // distance of each blob from the given direction vector, after rounding.
291 void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) {
292 // Set of perpendicular displacements of the blob bottoms from the required
293 // baseline direction.
294 std::vector<double> perp_blob_dists;
295 displacement_modes_.clear();
296 // Gather the skew-corrected position of every blob.
297 double min_dist = FLT_MAX;
298 double max_dist = -FLT_MAX;
299 BLOBNBOX_IT blob_it(blobs_);
300 #ifdef kDebugYCoord
301 bool debug = false;
302 #endif
303 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
304 BLOBNBOX *blob = blob_it.data();
305 const TBOX &box = blob->bounding_box();
306 #ifdef kDebugYCoord
307 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord)
308 debug = true;
309 #endif
310 FCOORD blob_pos((box.left() + box.right()) / 2.0f,
311 blob->baseline_position());
312 double offset = direction * blob_pos;
313 perp_blob_dists.push_back(offset);
314 #ifdef kDebugYCoord
315 if (debug) {
316 tprintf("Displacement %g for blob at:", offset);
317 box.print();
318 }
319 #endif
320 UpdateRange(offset, &min_dist, &max_dist);
321 }
322 // Set up a histogram using disp_quant_factor_ as the bucket size.
323 STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
324 IntCastRounded(max_dist / disp_quant_factor_));
325 for (double perp_blob_dist : perp_blob_dists) {
326 dist_stats.add(IntCastRounded(perp_blob_dist / disp_quant_factor_), 1);
327 }
328 std::vector<KDPairInc<float, int>> scaled_modes;
329 dist_stats.top_n_modes(kMaxDisplacementsModes, scaled_modes);
330 #ifdef kDebugYCoord
331 if (debug) {
332 for (int i = 0; i < scaled_modes.size(); ++i) {
333 tprintf("Top mode = %g * %d\n", scaled_modes[i].key * disp_quant_factor_,
334 scaled_modes[i].data());
335 }
336 }
337 #endif
338 for (auto &scaled_mode : scaled_modes) {
339 displacement_modes_.push_back(disp_quant_factor_ * scaled_mode.key());
340 }
341 }
342
343 // Fits a line in the given direction to blobs that are close to the given
344 // target_offset perpendicular displacement from the direction. The fit
345 // error is allowed to be cheat_allowance worse than the existing fit, and
346 // will still be used.
347 // If cheat_allowance > 0, the new fit will be good and replace the current
348 // fit if it has better fit (with cheat) OR its error is below
349 // max_baseline_error_ and the old fit is marked bad.
350 // Otherwise the new fit will only replace the old if it is really better,
351 // or the old fit is marked bad and the new fit has sufficient points, as
352 // well as being within the max_baseline_error_.
353 void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction,
354 double cheat_allowance,
355 double target_offset) {
356 double halfrange = fit_halfrange_ * direction.length();
357 double min_dist = target_offset - halfrange;
358 double max_dist = target_offset + halfrange;
359 ICOORD line_pt;
360 double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
361 debug > 2, &line_pt);
362 // Allow cheat_allowance off the new error
363 new_error -= cheat_allowance;
364 double old_angle = BaselineAngle();
365 double new_angle = direction.angle();
366 if (debug > 1) {
367 tprintf("Constrained error = %g, original = %g", new_error,
368 baseline_error_);
369 tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle,
370 new_angle, new_angle - old_angle, kMaxSkewDeviation);
371 }
372 bool new_good_baseline =
373 new_error <= max_baseline_error_ &&
374 (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
375 // The new will replace the old if any are true:
376 // 1. the new error is better
377 // 2. the old is NOT good, but the new is
378 // 3. there is a wild angular difference between them (assuming that the new
379 // is a better guess at the angle.)
380 if (new_error <= baseline_error_ || (!good_baseline_ && new_good_baseline) ||
381 fabs(new_angle - old_angle) > kMaxSkewDeviation) {
382 baseline_error_ = new_error;
383 baseline_pt1_ = line_pt;
384 baseline_pt2_ = baseline_pt1_ + direction;
385 good_baseline_ = new_good_baseline;
386 if (debug > 1) {
387 tprintf("Replacing with constrained baseline, good = %d\n",
388 good_baseline_);
389 }
390 } else if (debug > 1) {
391 tprintf("Keeping old baseline\n");
392 }
393 }
394
395 // Returns the perpendicular distance of the point from the straight
396 // baseline.
397 float BaselineRow::PerpDistanceFromBaseline(const FCOORD &pt) const {
398 FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
399 FCOORD offset_vector(pt - baseline_pt1_);
400 float distance = baseline_vector * offset_vector;
401 float sqlength = baseline_vector.sqlength();
402 if (sqlength == 0.0f) {
403 tprintf("unexpected baseline vector (0,0)\n");
404 return 0.0f;
405 }
406 return std::sqrt(distance * distance / sqlength);
407 }
408
409 // Computes the bounding box of the row.
410 void BaselineRow::ComputeBoundingBox() {
411 BLOBNBOX_IT it(blobs_);
412 TBOX box;
413 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
414 box += it.data()->bounding_box();
415 }
416 bounding_box_ = box;
417 }
418
419 BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
420 : block_(block),
421 debug_level_(debug_level),
422 non_text_block_(non_text),
423 good_skew_angle_(false),
424 skew_angle_(0.0),
425 line_spacing_(block->line_spacing),
426 line_offset_(0.0),
427 model_error_(0.0) {
428 TO_ROW_IT row_it(block_->get_rows());
429 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
430 // Sort the blobs on the rows.
431 row_it.data()->blob_list()->sort(blob_x_order);
432 rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
433 }
434 }
435
436 // Computes and returns the absolute error of the given perp_disp from the
437 // given linespacing model.
438 double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
439 double line_offset) {
440 // Round to the nearest multiple of line_spacing + line offset.
441 int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
442 double model_y = line_spacing * multiple + line_offset;
443 return fabs(perp_disp - model_y);
444 }
445
446 // Fits straight line baselines and computes the skew angle from the
447 // median angle. Returns true if a good angle is found.
448 // If use_box_bottoms is false, baseline positions are formed by
449 // considering the outlines of the blobs.
450 bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
451 if (non_text_block_) {
452 return false;
453 }
454 std::vector<double> angles;
455 for (auto row : rows_) {
456 if (row->FitBaseline(use_box_bottoms)) {
457 double angle = row->BaselineAngle();
458 angles.push_back(angle);
459 }
460 if (debug_level_ > 1) {
461 row->Print();
462 }
463 }
464
465 if (!angles.empty()) {
466 skew_angle_ = MedianOfCircularValues(M_PI, angles);
467 good_skew_angle_ = true;
468 } else {
469 skew_angle_ = 0.0f;
470 good_skew_angle_ = false;
471 }
472 if (debug_level_ > 0) {
473 tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_,
474 good_skew_angle_);
475 }
476 return good_skew_angle_;
477 }
478
479 // Refits the baseline to a constrained angle, using the stored block
480 // skew if good enough, otherwise the supplied default skew.
481 void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
482 if (non_text_block_) {
483 return;
484 }
485 if (!good_skew_angle_) {
486 skew_angle_ = default_block_skew;
487 }
488 if (debug_level_ > 0) {
489 tprintf("Adjusting block to skew angle %g\n", skew_angle_);
490 }
491 FCOORD direction(cos(skew_angle_), sin(skew_angle_));
492 for (auto row : rows_) {
493 row->AdjustBaselineToParallel(debug_level_, direction);
494 if (debug_level_ > 1) {
495 row->Print();
496 }
497 }
498 if (rows_.size() < 3 || !ComputeLineSpacing()) {
499 return;
500 }
501 // Enforce the line spacing model on all lines that don't yet have a good
502 // baseline.
503 // Start by finding the row that is best fitted to the model.
504 unsigned best_row = 0;
505 double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
506 line_spacing_, line_offset_);
507 for (unsigned r = 1; r < rows_.size(); ++r) {
508 double error = SpacingModelError(rows_[r]->PerpDisp(direction),
509 line_spacing_, line_offset_);
510 if (error < best_error) {
511 best_error = error;
512 best_row = r;
513 }
514 }
515 // Starting at the best fitting row, work outwards, syncing the offset.
516 double offset = line_offset_;
517 for (auto r = best_row + 1; r < rows_.size(); ++r) {
518 offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
519 line_spacing_, offset);
520 }
521 offset = line_offset_;
522 for (int r = best_row - 1; r >= 0; --r) {
523 offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
524 line_spacing_, offset);
525 }
526 }
527
528 // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
529 void BaselineBlock::SetupBlockParameters() const {
530 if (line_spacing_ > 0.0) {
531 // Where was block_line_spacing set before?
532 float min_spacing =
533 std::min(block_->line_spacing, static_cast<float>(line_spacing_));
534 if (min_spacing < block_->line_size) {
535 block_->line_size = min_spacing;
536 }
537 block_->line_spacing = line_spacing_;
538 block_->baseline_offset = line_offset_;
539 block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
540 }
541 // Setup the parameters on all the rows.
542 TO_ROW_IT row_it(block_->get_rows());
543 for (unsigned r = 0; r < rows_.size(); ++r, row_it.forward()) {
544 BaselineRow *row = rows_[r];
545 TO_ROW *to_row = row_it.data();
546 row->SetupOldLineParameters(to_row);
547 }
548 }
549
550 // Processing that is required before fitting baseline splines, but requires
551 // linear baselines in order to be successful:
552 // Removes noise if required
553 // Separates out underlines
554 // Pre-associates blob fragments.
555 // TODO(rays/joeliu) This entire section of code is inherited from the past
556 // and could be improved/eliminated.
557 // page_tr is used to size a debug window.
558 void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
559 if (non_text_block_) {
560 return;
561 }
562 if (remove_noise) {
563 vigorous_noise_removal(block_);
564 }
565 FCOORD rotation(1.0f, 0.0f);
566 double gradient = tan(skew_angle_);
567 separate_underlines(block_, gradient, rotation, true);
568 pre_associate_blobs(page_tr, block_, rotation, true);
569 }
570
571 // Fits splines to the textlines, or creates fake QSPLINES from the straight
572 // baselines that are already on the TO_ROWs.
573 // As a side-effect, computes the xheights of the rows and the block.
574 // Although x-height estimation is conceptually separate, it is part of
575 // detecting perspective distortion and therefore baseline fitting.
576 void BaselineBlock::FitBaselineSplines(bool enable_splines,
577 bool show_final_rows, Textord *textord) {
578 double gradient = tan(skew_angle_);
579 FCOORD rotation(1.0f, 0.0f);
580
581 if (enable_splines) {
582 textord->make_spline_rows(block_, gradient, show_final_rows);
583 } else {
584 // Make a fake spline from the existing line.
585 TBOX block_box = block_->block->pdblk.bounding_box();
586 TO_ROW_IT row_it = block_->get_rows();
587 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
588 TO_ROW *row = row_it.data();
589 int32_t xstarts[2] = {block_box.left(), block_box.right()};
590 double coeffs[3] = {0.0, row->line_m(), row->line_c()};
591 row->baseline = QSPLINE(1, xstarts, coeffs);
592 textord->compute_row_xheight(row, block_->block->classify_rotation(),
593 row->line_m(), block_->line_size);
594 }
595 }
596 textord->compute_block_xheight(block_, gradient);
597 block_->block->set_xheight(block_->xheight);
598 if (textord_restore_underlines) { // fix underlines
599 restore_underlined_blobs(block_);
600 }
601 }
602
603 #ifndef GRAPHICS_DISABLED
604
605 // Draws the (straight) baselines and final blobs colored according to
606 // what was discarded as noise and what is associated with each row.
607 void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) {
608 if (non_text_block_) {
609 return;
610 }
611 double gradient = tan(skew_angle_);
612 FCOORD rotation(1.0f, 0.0f);
613 int left_edge = block_->block->pdblk.bounding_box().left();
614 ScrollView *win = create_to_win(page_tr);
615 ScrollView::Color colour = ScrollView::RED;
616 TO_ROW_IT row_it = block_->get_rows();
617 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
618 plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
619 colour = static_cast<ScrollView::Color>(colour + 1);
620 if (colour > ScrollView::MAGENTA) {
621 colour = ScrollView::RED;
622 }
623 }
624 plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
625 // Show discarded blobs.
626 plot_blob_list(win, &block_->underlines, ScrollView::YELLOW,
627 ScrollView::CORAL);
628 if (block_->blobs.length() > 0) {
629 tprintf("%d blobs discarded as noise\n", block_->blobs.length());
630 }
631 draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
632 }
633
634 #endif // !GRAPHICS_DISABLED
635
636 void BaselineBlock::DrawPixSpline(Image pix_in) {
637 if (non_text_block_) {
638 return;
639 }
640 TO_ROW_IT row_it = block_->get_rows();
641 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
642 row_it.data()->baseline.plot(pix_in);
643 }
644 }
645
646 // Top-level line-spacing calculation. Computes an estimate of the line-
647 // spacing, using the current baselines in the TO_ROWS of the block, and
648 // then refines it by fitting a regression line to the baseline positions
649 // as a function of their integer index.
650 // Returns true if it seems that the model is a reasonable fit to the
651 // observations.
652 bool BaselineBlock::ComputeLineSpacing() {
653 FCOORD direction(cos(skew_angle_), sin(skew_angle_));
654 std::vector<double> row_positions;
655 ComputeBaselinePositions(direction, &row_positions);
656 if (row_positions.size() < 2) {
657 return false;
658 }
659 EstimateLineSpacing();
660 RefineLineSpacing(row_positions);
661 // Verify that the model is reasonable.
662 double max_baseline_error = kMaxBaselineError * line_spacing_;
663 int non_trivial_gaps = 0;
664 int fitting_gaps = 0;
665 for (unsigned i = 1; i < row_positions.size(); ++i) {
666 double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
667 if (row_gap > max_baseline_error) {
668 ++non_trivial_gaps;
669 if (fabs(row_gap - line_spacing_) <= max_baseline_error) {
670 ++fitting_gaps;
671 }
672 }
673 }
674 if (debug_level_ > 0) {
675 tesserr << "Spacing " << line_spacing_ << ", in "
676 << row_positions.size() << " rows, "
677 << fitting_gaps << " gaps fitted out of "
678 << non_trivial_gaps << " non-trivial\n";
679 }
680 return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
681 }
682
683 // Computes the deskewed vertical position of each baseline in the block and
684 // stores them in the given vector.
685 // This is calculated as the perpendicular distance of the middle of each
686 // baseline (in case it has a different skew angle) from the line passing
687 // through the origin parallel to the block baseline angle.
688 // NOTE that "distance" above is a signed quantity so we can tell which side
689 // of the block baseline a line sits, hence the function and argument name
690 // positions not distances.
691 void BaselineBlock::ComputeBaselinePositions(const FCOORD &direction,
692 std::vector<double> *positions) {
693 positions->clear();
694 for (auto row : rows_) {
695 const TBOX &row_box = row->bounding_box();
696 float x_middle = (row_box.left() + row_box.right()) / 2.0f;
697 FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
698 float offset = direction * row_pos;
699 positions->push_back(offset);
700 }
701 }
702
703 // Computes an estimate of the line spacing of the block from the median
704 // of the spacings between adjacent overlapping textlines.
705 void BaselineBlock::EstimateLineSpacing() {
706 std::vector<float> spacings;
707 for (unsigned r = 0; r < rows_.size(); ++r) {
708 BaselineRow *row = rows_[r];
709 // Exclude silly lines.
710 if (fabs(row->BaselineAngle()) > M_PI * 0.25) {
711 continue;
712 }
713 // Find the first row after row that overlaps it significantly.
714 const TBOX &row_box = row->bounding_box();
715 unsigned r2;
716 for (r2 = r + 1; r2 < rows_.size() &&
717 !row_box.major_x_overlap(rows_[r2]->bounding_box());
718 ++r2) {
719 ;
720 }
721 if (r2 < rows_.size()) {
722 BaselineRow *row2 = rows_[r2];
723 // Exclude silly lines.
724 if (fabs(row2->BaselineAngle()) > M_PI * 0.25) {
725 continue;
726 }
727 float spacing = row->SpaceBetween(*row2);
728 spacings.push_back(spacing);
729 }
730 }
731 // If we have at least one value, use it, otherwise leave the previous
732 // value unchanged.
733 if (!spacings.empty()) {
734 std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2,
735 spacings.end());
736 line_spacing_ = spacings[spacings.size() / 2];
737 if (debug_level_ > 1) {
738 tprintf("Estimate of linespacing = %g\n", line_spacing_);
739 }
740 }
741 }
742
743 // Refines the line spacing of the block by fitting a regression
744 // line to the deskewed y-position of each baseline as a function of its
745 // estimated line index, allowing for a small error in the initial linespacing
746 // and choosing the best available model.
747 void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) {
748 double spacings[3], offsets[3], errors[3];
749 int index_range;
750 errors[0] = FitLineSpacingModel(positions, line_spacing_, &spacings[0],
751 &offsets[0], &index_range);
752 if (index_range > 1) {
753 double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
754 // Try the hypotheses that there might be index_range +/- 1 line spaces.
755 errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1],
756 &offsets[1], nullptr);
757 double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
758 errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2],
759 &offsets[2], nullptr);
760 for (int i = 1; i <= 2; ++i) {
761 if (errors[i] < errors[0]) {
762 spacings[0] = spacings[i];
763 offsets[0] = offsets[i];
764 errors[0] = errors[i];
765 }
766 }
767 }
768 if (spacings[0] > 0.0) {
769 line_spacing_ = spacings[0];
770 line_offset_ = offsets[0];
771 model_error_ = errors[0];
772 if (debug_level_ > 0) {
773 tprintf("Final linespacing model = %g + offset %g, error %g\n",
774 line_spacing_, line_offset_, model_error_);
775 }
776 }
777 }
778
779 // Given an initial estimate of line spacing (m_in) and the positions of each
780 // baseline, computes the line spacing of the block more accurately in m_out,
781 // and the corresponding intercept in c_out, and the number of spacings seen
782 // in index_delta. Returns the error of fit to the line spacing model.
783 // Uses a simple linear regression, but optimized the offset using the median.
784 double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
785 double m_in, double *m_out,
786 double *c_out, int *index_delta) {
787 if (m_in == 0.0f || positions.size() < 2) {
788 *m_out = m_in;
789 *c_out = 0.0;
790 if (index_delta != nullptr) {
791 *index_delta = 0;
792 }
793 return 0.0;
794 }
795 std::vector<double> offsets;
796 // Get the offset (remainder) linespacing for each line and choose the median.
797 offsets.reserve(positions.size());
798 for (double position : positions) {
799 offsets.push_back(fmod(position, m_in));
800 }
801 // Get the median offset.
802 double median_offset = MedianOfCircularValues(m_in, offsets);
803 // Now fit a line to quantized line number and offset.
804 LLSQ llsq;
805 int min_index = INT32_MAX;
806 int max_index = -INT32_MAX;
807 for (double y_pos : positions) {
808 int row_index = IntCastRounded((y_pos - median_offset) / m_in);
809 UpdateRange(row_index, &min_index, &max_index);
810 llsq.add(row_index, y_pos);
811 }
812 // Get the refined line spacing.
813 *m_out = llsq.m();
814 // Use the median offset rather than the mean.
815 offsets.clear();
816 if (*m_out != 0.0) {
817 for (double position : positions) {
818 offsets.push_back(fmod(position, *m_out));
819 }
820 // Get the median offset.
821 if (debug_level_ > 2) {
822 for (unsigned i = 0; i < offsets.size(); ++i) {
823 tprintf("%u: %g\n", i, offsets[i]);
824 }
825 }
826 *c_out = MedianOfCircularValues(*m_out, offsets);
827 } else {
828 *c_out = 0.0;
829 }
830 if (debug_level_ > 1) {
831 tprintf("Median offset = %g, compared to mean of %g.\n", *c_out,
832 llsq.c(*m_out));
833 }
834 // Index_delta is the number of hypothesized line gaps present.
835 if (index_delta != nullptr) {
836 *index_delta = max_index - min_index;
837 }
838 // Use the regression model's intercept to compute the error, as it may be
839 // a full line-spacing in disagreement with the median.
840 double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
841 if (debug_level_ > 1) {
842 tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in,
843 median_offset, *m_out, *c_out, rms_error);
844 }
845 return rms_error;
846 }
847
848 BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew,
849 TO_BLOCK_LIST *blocks)
850 : page_skew_(page_skew), debug_level_(debug_level) {
851 TO_BLOCK_IT it(blocks);
852 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
853 TO_BLOCK *to_block = it.data();
854 BLOCK *block = to_block->block;
855 POLY_BLOCK *pb = block->pdblk.poly_block();
856 // A note about non-text blocks.
857 // On output, non-text blocks are supposed to contain a single empty word
858 // in each incoming text line. These mark out the polygonal bounds of the
859 // block. Ideally no baselines should be required, but currently
860 // make_words crashes if a baseline and xheight are not provided, so we
861 // include non-text blocks here, but flag them for special treatment.
862 bool non_text = pb != nullptr && !pb->IsText();
863 blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
864 }
865 }
866
867 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
868 // block-wise and page-wise data to smooth small blocks/rows, and applies
869 // smoothing based on block/page-level skew and block-level linespacing.
870 void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
871 std::vector<double> block_skew_angles;
872 for (auto bl_block : blocks_) {
873 if (debug_level_ > 0) {
874 tprintf("Fitting initial baselines...\n");
875 }
876 if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
877 block_skew_angles.push_back(bl_block->skew_angle());
878 }
879 }
880 // Compute a page-wide default skew for blocks with too little information.
881 double default_block_skew = page_skew_.angle();
882 if (!block_skew_angles.empty()) {
883 default_block_skew = MedianOfCircularValues(M_PI, block_skew_angles);
884 }
885 if (debug_level_ > 0) {
886 tprintf("Page skew angle = %g\n", default_block_skew);
887 }
888 // Set bad lines in each block to the default block skew and then force fit
889 // a linespacing model where it makes sense to do so.
890 for (auto bl_block : blocks_) {
891 bl_block->ParallelizeBaselines(default_block_skew);
892 bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
893 }
894 }
895
896 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
897 // other associated side-effects, including pre-associating blobs, computing
898 // x-heights and displaying debug information.
899 // NOTE that ComputeStraightBaselines must have been called first as this
900 // sets up data in the TO_ROWs upon which this function depends.
901 void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD &page_tr,
902 bool enable_splines,
903 bool remove_noise,
904 bool show_final_rows,
905 Textord *textord) {
906 for (auto bl_block : blocks_) {
907 if (enable_splines) {
908 bl_block->PrepareForSplineFitting(page_tr, remove_noise);
909 }
910 bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
911 #ifndef GRAPHICS_DISABLED
912 if (show_final_rows) {
913 bl_block->DrawFinalRows(page_tr);
914 }
915 #endif
916 }
917 }
918
919 } // namespace tesseract.