Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/textord/baselinedetect.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/textord/baselinedetect.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,274 @@ +/////////////////////////////////////////////////////////////////////// +// File: baselinedetect.h +// Description: Initial Baseline Determination. +// Copyright 2012 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_ +#define TESSERACT_TEXTORD_BASELINEDETECT_H_ + +#include "detlinefit.h" +#include "points.h" +#include "rect.h" + +struct Pix; + +namespace tesseract { + +class Textord; +class BLOBNBOX_LIST; +class TO_BLOCK; +class TO_BLOCK_LIST; +class TO_ROW; + +// Class to compute and hold baseline data for a TO_ROW. +class BaselineRow { +public: + BaselineRow(double line_size, TO_ROW *to_row); + + const TBOX &bounding_box() const { + return bounding_box_; + } + // Sets the TO_ROW with the output straight line. + void SetupOldLineParameters(TO_ROW *row) const; + + // Outputs diagnostic information. + void Print() const; + + // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. + double BaselineAngle() const; + // Computes and returns the linespacing at the middle of the overlap + // between this and other. + double SpaceBetween(const BaselineRow &other) const; + // Computes and returns the displacement of the center of the line + // perpendicular to the given direction. + double PerpDisp(const FCOORD &direction) const; + // Computes the y coordinate at the given x using the straight baseline + // defined by baseline1_ and baseline2_. + double StraightYAtX(double x) const; + + // Fits a straight baseline to the points. Returns true if it had enough + // points to be reasonably sure of the fitted baseline. + // If use_box_bottoms is false, baselines positions are formed by + // considering the outlines of the blobs. + bool FitBaseline(bool use_box_bottoms); + // Modifies an existing result of FitBaseline to be parallel to the given + // vector if that produces a better result. + void AdjustBaselineToParallel(int debug, const FCOORD &direction); + // Modifies the baseline to snap to the textline grid if the existing + // result is not good enough. + double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, + double line_offset); + +private: + // Sets up displacement_modes_ with the top few modes of the perpendicular + // distance of each blob from the given direction vector, after rounding. + void SetupBlobDisplacements(const FCOORD &direction); + + // Fits a line in the given direction to blobs that are close to the given + // target_offset perpendicular displacement from the direction. The fit + // error is allowed to be cheat_allowance worse than the existing fit, and + // will still be used. + // If cheat_allowance > 0, the new fit will be good and replace the current + // fit if it has better fit (with cheat) OR its error is below + // max_baseline_error_ and the old fit is marked bad. + // Otherwise the new fit will only replace the old if it is really better, + // or the old fit is marked bad and the new fit has sufficient points, as + // well as being within the max_baseline_error_. + void FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance, + double target_offset); + // Returns the perpendicular distance of the point from the straight + // baseline. + float PerpDistanceFromBaseline(const FCOORD &pt) const; + // Computes the bounding box of the row. + void ComputeBoundingBox(); + + // The blobs of the row to which this BaselineRow adds extra information + // during baseline fitting. Note that blobs_ could easily come from either + // a TO_ROW or a ColPartition. + BLOBNBOX_LIST *blobs_; + // Bounding box of all the blobs. + TBOX bounding_box_; + // Fitter used to fit lines to the blobs. + DetLineFit fitter_; + // 2 points on the straight baseline. + FCOORD baseline_pt1_; + FCOORD baseline_pt2_; + // Set of modes of displacements. They indicate preferable baseline positions. + std::vector<double> displacement_modes_; + // Quantization factor used for displacement_modes_. + double disp_quant_factor_; + // Half the acceptance range of blob displacements for computing the + // error during a constrained fit. + double fit_halfrange_; + // Max baseline error before a line is regarded as fitting badly. + double max_baseline_error_; + // The error of fit of the baseline. + double baseline_error_; + // True if this row seems to have a good baseline. + bool good_baseline_; +}; + +// Class to compute and hold baseline data for a TO_BLOCK. +class BaselineBlock { +public: + BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block); + + ~BaselineBlock() { + for (auto row : rows_) { + delete row; + } + } + + TO_BLOCK *block() const { + return block_; + } + double skew_angle() const { + return skew_angle_; + } + + // Computes and returns the absolute error of the given perp_disp from the + // given linespacing model. + static double SpacingModelError(double perp_disp, double line_spacing, double line_offset); + + // Fits straight line baselines and computes the skew angle from the + // median angle. Returns true if a good angle is found. + // If use_box_bottoms is false, baseline positions are formed by + // considering the outlines of the blobs. + bool FitBaselinesAndFindSkew(bool use_box_bottoms); + + // Refits the baseline to a constrained angle, using the stored block + // skew if good enough, otherwise the supplied default skew. + void ParallelizeBaselines(double default_block_skew); + + // Sets the parameters in TO_BLOCK that are needed by subsequent processes. + void SetupBlockParameters() const; + + // Processing that is required before fitting baseline splines, but requires + // linear baselines in order to be successful: + // Removes noise if required + // Separates out underlines + // Pre-associates blob fragments. + // TODO(rays/joeliu) This entire section of code is inherited from the past + // and could be improved/eliminated. + // page_tr is used to size a debug window. + void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise); + + // Fits splines to the textlines, or creates fake QSPLINES from the straight + // baselines that are already on the TO_ROWs. + // As a side-effect, computes the xheights of the rows and the block. + // Although x-height estimation is conceptually separate, it is part of + // detecting perspective distortion and therefore baseline fitting. + void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord); + + // Draws the (straight) baselines and final blobs colored according to + // what was discarded as noise and what is associated with each row. + void DrawFinalRows(const ICOORD &page_tr); + + // Render the generated spline baselines for this block on pix_in. + void DrawPixSpline(Image pix_in); + +private: + // Top-level line-spacing calculation. Computes an estimate of the line- + // spacing, using the current baselines in the TO_ROWS of the block, and + // then refines it by fitting a regression line to the baseline positions + // as a function of their integer index. + // Returns true if it seems that the model is a reasonable fit to the + // observations. + bool ComputeLineSpacing(); + + // Computes the deskewed vertical position of each baseline in the block and + // stores them in the given vector. + void ComputeBaselinePositions(const FCOORD &direction, std::vector<double> *positions); + + // Computes an estimate of the line spacing of the block from the median + // of the spacings between adjacent overlapping textlines. + void EstimateLineSpacing(); + + // Refines the line spacing of the block by fitting a regression + // line to the deskewed y-position of each baseline as a function of its + // estimated line index, allowing for a small error in the initial linespacing + // and choosing the best available model. + void RefineLineSpacing(const std::vector<double> &positions); + + // Given an initial estimate of line spacing (m_in) and the positions of each + // baseline, computes the line spacing of the block more accurately in m_out, + // and the corresponding intercept in c_out, and the number of spacings seen + // in index_delta. Returns the error of fit to the line spacing model. + double FitLineSpacingModel(const std::vector<double> &positions, double m_in, double *m_out, + double *c_out, int *index_delta); + + // The block to which this class adds extra information used during baseline + // calculation. + TO_BLOCK *block_; + // The rows in the block that we will be working with. + std::vector<BaselineRow *> rows_; + // Amount of debugging output to provide. + int debug_level_; + // True if the block is non-text (graphic). + bool non_text_block_; + // True if the block has at least one good enough baseline to compute the + // skew angle and therefore skew_angle_ is valid. + bool good_skew_angle_; + // Angle of skew in radians using the conventional anticlockwise from x-axis. + double skew_angle_; + // Current best estimate line spacing in pixels perpendicular to skew_angle_. + double line_spacing_; + // Offset for baseline positions, in pixels. Each baseline is at + // line_spacing_ * n + line_offset_ for integer n, which represents + // [textline] line number in a line numbering system that has line 0 on or + // at least near the x-axis. Not equal to the actual line number of a line + // within a block as most blocks are not near the x-axis. + double line_offset_; + // The error of the line spacing model. + double model_error_; +}; + +class BaselineDetect { +public: + BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks); + + ~BaselineDetect() { + for (auto block : blocks_) { + delete block; + } + } + + // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers + // block-wise and page-wise data to smooth small blocks/rows, and applies + // smoothing based on block/page-level skew and block-level linespacing. + void ComputeStraightBaselines(bool use_box_bottoms); + + // Computes the baseline splines for each TO_ROW in each TO_BLOCK and + // other associated side-effects, including pre-associating blobs, computing + // x-heights and displaying debug information. + // NOTE that ComputeStraightBaselines must have been called first as this + // sets up data in the TO_ROWs upon which this function depends. + void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, + bool remove_noise, bool show_final_rows, Textord *textord); + +private: + // Average (median) skew of the blocks on the page among those that have + // a good angle of their own. + FCOORD page_skew_; + // Amount of debug output to produce. + int debug_level_; + // The blocks that we are working with. + std::vector<BaselineBlock *> blocks_; +}; + +} // namespace tesseract + +#endif // TESSERACT_TEXTORD_BASELINEDETECT_H_
