Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/baselinedetect.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: baselinedetect.h | |
| 3 // Description: Initial Baseline Determination. | |
| 4 // Copyright 2012 Google Inc. All Rights Reserved. | |
| 5 // Author: rays@google.com (Ray Smith) | |
| 6 // | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_ | |
| 20 #define TESSERACT_TEXTORD_BASELINEDETECT_H_ | |
| 21 | |
| 22 #include "detlinefit.h" | |
| 23 #include "points.h" | |
| 24 #include "rect.h" | |
| 25 | |
| 26 struct Pix; | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 class Textord; | |
| 31 class BLOBNBOX_LIST; | |
| 32 class TO_BLOCK; | |
| 33 class TO_BLOCK_LIST; | |
| 34 class TO_ROW; | |
| 35 | |
| 36 // Class to compute and hold baseline data for a TO_ROW. | |
| 37 class BaselineRow { | |
| 38 public: | |
| 39 BaselineRow(double line_size, TO_ROW *to_row); | |
| 40 | |
| 41 const TBOX &bounding_box() const { | |
| 42 return bounding_box_; | |
| 43 } | |
| 44 // Sets the TO_ROW with the output straight line. | |
| 45 void SetupOldLineParameters(TO_ROW *row) const; | |
| 46 | |
| 47 // Outputs diagnostic information. | |
| 48 void Print() const; | |
| 49 | |
| 50 // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. | |
| 51 double BaselineAngle() const; | |
| 52 // Computes and returns the linespacing at the middle of the overlap | |
| 53 // between this and other. | |
| 54 double SpaceBetween(const BaselineRow &other) const; | |
| 55 // Computes and returns the displacement of the center of the line | |
| 56 // perpendicular to the given direction. | |
| 57 double PerpDisp(const FCOORD &direction) const; | |
| 58 // Computes the y coordinate at the given x using the straight baseline | |
| 59 // defined by baseline1_ and baseline2_. | |
| 60 double StraightYAtX(double x) const; | |
| 61 | |
| 62 // Fits a straight baseline to the points. Returns true if it had enough | |
| 63 // points to be reasonably sure of the fitted baseline. | |
| 64 // If use_box_bottoms is false, baselines positions are formed by | |
| 65 // considering the outlines of the blobs. | |
| 66 bool FitBaseline(bool use_box_bottoms); | |
| 67 // Modifies an existing result of FitBaseline to be parallel to the given | |
| 68 // vector if that produces a better result. | |
| 69 void AdjustBaselineToParallel(int debug, const FCOORD &direction); | |
| 70 // Modifies the baseline to snap to the textline grid if the existing | |
| 71 // result is not good enough. | |
| 72 double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, | |
| 73 double line_offset); | |
| 74 | |
| 75 private: | |
| 76 // Sets up displacement_modes_ with the top few modes of the perpendicular | |
| 77 // distance of each blob from the given direction vector, after rounding. | |
| 78 void SetupBlobDisplacements(const FCOORD &direction); | |
| 79 | |
| 80 // Fits a line in the given direction to blobs that are close to the given | |
| 81 // target_offset perpendicular displacement from the direction. The fit | |
| 82 // error is allowed to be cheat_allowance worse than the existing fit, and | |
| 83 // will still be used. | |
| 84 // If cheat_allowance > 0, the new fit will be good and replace the current | |
| 85 // fit if it has better fit (with cheat) OR its error is below | |
| 86 // max_baseline_error_ and the old fit is marked bad. | |
| 87 // Otherwise the new fit will only replace the old if it is really better, | |
| 88 // or the old fit is marked bad and the new fit has sufficient points, as | |
| 89 // well as being within the max_baseline_error_. | |
| 90 void FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance, | |
| 91 double target_offset); | |
| 92 // Returns the perpendicular distance of the point from the straight | |
| 93 // baseline. | |
| 94 float PerpDistanceFromBaseline(const FCOORD &pt) const; | |
| 95 // Computes the bounding box of the row. | |
| 96 void ComputeBoundingBox(); | |
| 97 | |
| 98 // The blobs of the row to which this BaselineRow adds extra information | |
| 99 // during baseline fitting. Note that blobs_ could easily come from either | |
| 100 // a TO_ROW or a ColPartition. | |
| 101 BLOBNBOX_LIST *blobs_; | |
| 102 // Bounding box of all the blobs. | |
| 103 TBOX bounding_box_; | |
| 104 // Fitter used to fit lines to the blobs. | |
| 105 DetLineFit fitter_; | |
| 106 // 2 points on the straight baseline. | |
| 107 FCOORD baseline_pt1_; | |
| 108 FCOORD baseline_pt2_; | |
| 109 // Set of modes of displacements. They indicate preferable baseline positions. | |
| 110 std::vector<double> displacement_modes_; | |
| 111 // Quantization factor used for displacement_modes_. | |
| 112 double disp_quant_factor_; | |
| 113 // Half the acceptance range of blob displacements for computing the | |
| 114 // error during a constrained fit. | |
| 115 double fit_halfrange_; | |
| 116 // Max baseline error before a line is regarded as fitting badly. | |
| 117 double max_baseline_error_; | |
| 118 // The error of fit of the baseline. | |
| 119 double baseline_error_; | |
| 120 // True if this row seems to have a good baseline. | |
| 121 bool good_baseline_; | |
| 122 }; | |
| 123 | |
| 124 // Class to compute and hold baseline data for a TO_BLOCK. | |
| 125 class BaselineBlock { | |
| 126 public: | |
| 127 BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block); | |
| 128 | |
| 129 ~BaselineBlock() { | |
| 130 for (auto row : rows_) { | |
| 131 delete row; | |
| 132 } | |
| 133 } | |
| 134 | |
| 135 TO_BLOCK *block() const { | |
| 136 return block_; | |
| 137 } | |
| 138 double skew_angle() const { | |
| 139 return skew_angle_; | |
| 140 } | |
| 141 | |
| 142 // Computes and returns the absolute error of the given perp_disp from the | |
| 143 // given linespacing model. | |
| 144 static double SpacingModelError(double perp_disp, double line_spacing, double line_offset); | |
| 145 | |
| 146 // Fits straight line baselines and computes the skew angle from the | |
| 147 // median angle. Returns true if a good angle is found. | |
| 148 // If use_box_bottoms is false, baseline positions are formed by | |
| 149 // considering the outlines of the blobs. | |
| 150 bool FitBaselinesAndFindSkew(bool use_box_bottoms); | |
| 151 | |
| 152 // Refits the baseline to a constrained angle, using the stored block | |
| 153 // skew if good enough, otherwise the supplied default skew. | |
| 154 void ParallelizeBaselines(double default_block_skew); | |
| 155 | |
| 156 // Sets the parameters in TO_BLOCK that are needed by subsequent processes. | |
| 157 void SetupBlockParameters() const; | |
| 158 | |
| 159 // Processing that is required before fitting baseline splines, but requires | |
| 160 // linear baselines in order to be successful: | |
| 161 // Removes noise if required | |
| 162 // Separates out underlines | |
| 163 // Pre-associates blob fragments. | |
| 164 // TODO(rays/joeliu) This entire section of code is inherited from the past | |
| 165 // and could be improved/eliminated. | |
| 166 // page_tr is used to size a debug window. | |
| 167 void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise); | |
| 168 | |
| 169 // Fits splines to the textlines, or creates fake QSPLINES from the straight | |
| 170 // baselines that are already on the TO_ROWs. | |
| 171 // As a side-effect, computes the xheights of the rows and the block. | |
| 172 // Although x-height estimation is conceptually separate, it is part of | |
| 173 // detecting perspective distortion and therefore baseline fitting. | |
| 174 void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord); | |
| 175 | |
| 176 // Draws the (straight) baselines and final blobs colored according to | |
| 177 // what was discarded as noise and what is associated with each row. | |
| 178 void DrawFinalRows(const ICOORD &page_tr); | |
| 179 | |
| 180 // Render the generated spline baselines for this block on pix_in. | |
| 181 void DrawPixSpline(Image pix_in); | |
| 182 | |
| 183 private: | |
| 184 // Top-level line-spacing calculation. Computes an estimate of the line- | |
| 185 // spacing, using the current baselines in the TO_ROWS of the block, and | |
| 186 // then refines it by fitting a regression line to the baseline positions | |
| 187 // as a function of their integer index. | |
| 188 // Returns true if it seems that the model is a reasonable fit to the | |
| 189 // observations. | |
| 190 bool ComputeLineSpacing(); | |
| 191 | |
| 192 // Computes the deskewed vertical position of each baseline in the block and | |
| 193 // stores them in the given vector. | |
| 194 void ComputeBaselinePositions(const FCOORD &direction, std::vector<double> *positions); | |
| 195 | |
| 196 // Computes an estimate of the line spacing of the block from the median | |
| 197 // of the spacings between adjacent overlapping textlines. | |
| 198 void EstimateLineSpacing(); | |
| 199 | |
| 200 // Refines the line spacing of the block by fitting a regression | |
| 201 // line to the deskewed y-position of each baseline as a function of its | |
| 202 // estimated line index, allowing for a small error in the initial linespacing | |
| 203 // and choosing the best available model. | |
| 204 void RefineLineSpacing(const std::vector<double> &positions); | |
| 205 | |
| 206 // Given an initial estimate of line spacing (m_in) and the positions of each | |
| 207 // baseline, computes the line spacing of the block more accurately in m_out, | |
| 208 // and the corresponding intercept in c_out, and the number of spacings seen | |
| 209 // in index_delta. Returns the error of fit to the line spacing model. | |
| 210 double FitLineSpacingModel(const std::vector<double> &positions, double m_in, double *m_out, | |
| 211 double *c_out, int *index_delta); | |
| 212 | |
| 213 // The block to which this class adds extra information used during baseline | |
| 214 // calculation. | |
| 215 TO_BLOCK *block_; | |
| 216 // The rows in the block that we will be working with. | |
| 217 std::vector<BaselineRow *> rows_; | |
| 218 // Amount of debugging output to provide. | |
| 219 int debug_level_; | |
| 220 // True if the block is non-text (graphic). | |
| 221 bool non_text_block_; | |
| 222 // True if the block has at least one good enough baseline to compute the | |
| 223 // skew angle and therefore skew_angle_ is valid. | |
| 224 bool good_skew_angle_; | |
| 225 // Angle of skew in radians using the conventional anticlockwise from x-axis. | |
| 226 double skew_angle_; | |
| 227 // Current best estimate line spacing in pixels perpendicular to skew_angle_. | |
| 228 double line_spacing_; | |
| 229 // Offset for baseline positions, in pixels. Each baseline is at | |
| 230 // line_spacing_ * n + line_offset_ for integer n, which represents | |
| 231 // [textline] line number in a line numbering system that has line 0 on or | |
| 232 // at least near the x-axis. Not equal to the actual line number of a line | |
| 233 // within a block as most blocks are not near the x-axis. | |
| 234 double line_offset_; | |
| 235 // The error of the line spacing model. | |
| 236 double model_error_; | |
| 237 }; | |
| 238 | |
| 239 class BaselineDetect { | |
| 240 public: | |
| 241 BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks); | |
| 242 | |
| 243 ~BaselineDetect() { | |
| 244 for (auto block : blocks_) { | |
| 245 delete block; | |
| 246 } | |
| 247 } | |
| 248 | |
| 249 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers | |
| 250 // block-wise and page-wise data to smooth small blocks/rows, and applies | |
| 251 // smoothing based on block/page-level skew and block-level linespacing. | |
| 252 void ComputeStraightBaselines(bool use_box_bottoms); | |
| 253 | |
| 254 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and | |
| 255 // other associated side-effects, including pre-associating blobs, computing | |
| 256 // x-heights and displaying debug information. | |
| 257 // NOTE that ComputeStraightBaselines must have been called first as this | |
| 258 // sets up data in the TO_ROWs upon which this function depends. | |
| 259 void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, | |
| 260 bool remove_noise, bool show_final_rows, Textord *textord); | |
| 261 | |
| 262 private: | |
| 263 // Average (median) skew of the blocks on the page among those that have | |
| 264 // a good angle of their own. | |
| 265 FCOORD page_skew_; | |
| 266 // Amount of debug output to produce. | |
| 267 int debug_level_; | |
| 268 // The blocks that we are working with. | |
| 269 std::vector<BaselineBlock *> blocks_; | |
| 270 }; | |
| 271 | |
| 272 } // namespace tesseract | |
| 273 | |
| 274 #endif // TESSERACT_TEXTORD_BASELINEDETECT_H_ |
