Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/textlineprojection.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright 2011 Google Inc. All Rights Reserved. | |
| 2 // Author: rays@google.com (Ray Smith) | |
| 3 // | |
| 4 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 // you may not use this file except in compliance with the License. | |
| 6 // You may obtain a copy of the License at | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // Unless required by applicable law or agreed to in writing, software | |
| 9 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 11 // See the License for the specific language governing permissions and | |
| 12 // limitations under the License. | |
| 13 | |
| 14 #ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ | |
| 15 #define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ | |
| 16 | |
| 17 #include "blobgrid.h" // For BlobGrid | |
| 18 | |
| 19 struct Pix; | |
| 20 | |
| 21 namespace tesseract { | |
| 22 | |
| 23 class DENORM; | |
| 24 struct TPOINT; | |
| 25 class ColPartition; | |
| 26 | |
| 27 // Simple class to encapsulate the computation of an image representing | |
| 28 // local textline density, and function(s) to make use of it. | |
| 29 // The underlying principle is that if you smear connected components | |
| 30 // horizontally (vertically for components on a vertically written textline) | |
| 31 // and count the number of smeared components in an image, then the resulting | |
| 32 // image shows the density of the textlines at each image position. | |
| 33 class TESS_API TextlineProjection { | |
| 34 public: | |
| 35 // The down-scaling factor is computed to obtain a projection resolution | |
| 36 // of about 100 dpi, whatever the input. | |
| 37 explicit TextlineProjection(int resolution); | |
| 38 ~TextlineProjection(); | |
| 39 | |
| 40 // Build the projection profile given the input_block containing lists of | |
| 41 // blobs, a rotation to convert to image coords, | |
| 42 // and a full-resolution nontext_map, marking out areas to avoid. | |
| 43 // During construction, we have the following assumptions: | |
| 44 // The rotation is a multiple of 90 degrees, ie no deskew yet. | |
| 45 // The blobs have had their left and right rules set to also limit | |
| 46 // the range of projection. | |
| 47 void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map); | |
| 48 | |
| 49 // Display the blobs in the window colored according to textline quality. | |
| 50 void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win); | |
| 51 | |
| 52 // Moves blobs that look like they don't sit well on a textline from the | |
| 53 // input blobs list to the output small_blobs list. | |
| 54 // This gets them away from initial textline finding to stop diacritics | |
| 55 // from forming incorrect textlines. (Introduced mainly to fix Thai.) | |
| 56 void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const; | |
| 57 | |
| 58 // Create a window and display the projection in it. | |
| 59 void DisplayProjection() const; | |
| 60 | |
| 61 // Compute the distance of the box from the partition using curved projection | |
| 62 // space. As DistanceOfBoxFromBox, except that the direction is taken from | |
| 63 // the ColPartition and the median bounds of the ColPartition are used as | |
| 64 // the to_box. | |
| 65 int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm, | |
| 66 bool debug) const; | |
| 67 | |
| 68 // Compute the distance from the from_box to the to_box using curved | |
| 69 // projection space. Separation that involves a decrease in projection | |
| 70 // density (moving from the from_box to the to_box) is weighted more heavily | |
| 71 // than constant density, and an increase is weighted less. | |
| 72 // If horizontal_textline is true, then curved space is used vertically, | |
| 73 // as for a diacritic on the edge of a textline. | |
| 74 // The projection uses original image coords, so denorm is used to get | |
| 75 // back to the image coords from box/part space. | |
| 76 int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, | |
| 77 const DENORM *denorm, bool debug) const; | |
| 78 | |
| 79 // Compute the distance between (x, y1) and (x, y2) using the rule that | |
| 80 // a decrease in textline density is weighted more heavily than an increase. | |
| 81 // The coordinates are in source image space, ie processed by any denorm | |
| 82 // already, but not yet scaled by scale_factor_. | |
| 83 // Going from the outside of a textline to the inside should measure much | |
| 84 // less distance than going from the inside of a textline to the outside. | |
| 85 int VerticalDistance(bool debug, int x, int y1, int y2) const; | |
| 86 | |
| 87 // Compute the distance between (x1, y) and (x2, y) using the rule that | |
| 88 // a decrease in textline density is weighted more heavily than an increase. | |
| 89 int HorizontalDistance(bool debug, int x1, int x2, int y) const; | |
| 90 | |
| 91 // Returns true if the blob appears to be outside of a horizontal textline. | |
| 92 // Such blobs are potentially diacritics (even if large in Thai) and should | |
| 93 // be kept away from initial textline finding. | |
| 94 bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const; | |
| 95 | |
| 96 // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below, | |
| 97 // but uses the median top/bottom for horizontal and median left/right for | |
| 98 // vertical instead of the bounding box edges. | |
| 99 // Evaluates for both horizontal and vertical and returns the best result, | |
| 100 // with a positive value for horizontal and a negative value for vertical. | |
| 101 int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const; | |
| 102 | |
| 103 // Computes the mean projection gradients over the horizontal and vertical | |
| 104 // edges of the box: | |
| 105 // -h-h-h-h-h-h | |
| 106 // |------------| mean=htop -v|+v--------+v|-v | |
| 107 // |+h+h+h+h+h+h| -v|+v +v|-v | |
| 108 // | | -v|+v +v|-v | |
| 109 // | box | -v|+v box +v|-v | |
| 110 // | | -v|+v +v|-v | |
| 111 // |+h+h+h+h+h+h| -v|+v +v|-v | |
| 112 // |------------| mean=hbot -v|+v--------+v|-v | |
| 113 // -h-h-h-h-h-h | |
| 114 // mean=vleft mean=vright | |
| 115 // | |
| 116 // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number | |
| 117 // for a horizontal textline, a negative number for a vertical textline, | |
| 118 // and near zero for undecided. Undecided is most likely non-text. | |
| 119 int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const; | |
| 120 | |
| 121 private: | |
| 122 // Internal version of EvaluateBox returns the unclipped gradients as well | |
| 123 // as the result of EvaluateBox. | |
| 124 // hgrad1 and hgrad2 are the gradients for the horizontal textline. | |
| 125 int EvaluateBoxInternal(const TBOX &box, const DENORM *denorm, bool debug, int *hgrad1, | |
| 126 int *hgrad2, int *vgrad1, int *vgrad2) const; | |
| 127 | |
| 128 // Helper returns the mean gradient value for the horizontal row at the given | |
| 129 // y, (in the external coordinates) by subtracting the mean of the transformed | |
| 130 // row 2 pixels above from the mean of the transformed row 2 pixels below. | |
| 131 // This gives a positive value for a good top edge and negative for bottom. | |
| 132 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. | |
| 133 int BestMeanGradientInRow(const DENORM *denorm, int16_t min_x, int16_t max_x, int16_t y, | |
| 134 bool best_is_max) const; | |
| 135 | |
| 136 // Helper returns the mean gradient value for the vertical column at the | |
| 137 // given x, (in the external coordinates) by subtracting the mean of the | |
| 138 // transformed column 2 pixels left from the mean of the transformed column | |
| 139 // 2 pixels to the right. | |
| 140 // This gives a positive value for a good left edge and negative for right. | |
| 141 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. | |
| 142 int BestMeanGradientInColumn(const DENORM *denorm, int16_t x, int16_t min_y, int16_t max_y, | |
| 143 bool best_is_max) const; | |
| 144 | |
| 145 // Helper returns the mean pixel value over the line between the start_pt and | |
| 146 // end_pt (inclusive), but shifted perpendicular to the line in the projection | |
| 147 // image by offset pixels. For simplicity, it is assumed that the vector is | |
| 148 // either nearly horizontal or nearly vertical. It works on skewed textlines! | |
| 149 // The end points are in external coordinates, and will be denormalized with | |
| 150 // the denorm if not nullptr before further conversion to pix coordinates. | |
| 151 // After all the conversions, the offset is added to the direction | |
| 152 // perpendicular to the line direction. The offset is thus in projection image | |
| 153 // coordinates, which allows the caller to get a guaranteed displacement | |
| 154 // between pixels used to calculate gradients. | |
| 155 int MeanPixelsInLineSegment(const DENORM *denorm, int offset, TPOINT start_pt, | |
| 156 TPOINT end_pt) const; | |
| 157 | |
| 158 // Helper function to add 1 to a rectangle in source image coords to the | |
| 159 // internal projection pix_. | |
| 160 void IncrementRectangle8Bit(const TBOX &box); | |
| 161 // Inserts a list of blobs into the projection. | |
| 162 // Rotation is a multiple of 90 degrees to get from blob coords to | |
| 163 // nontext_map coords, image_box is the bounds of the nontext_map. | |
| 164 // Blobs are spread horizontally or vertically according to their internal | |
| 165 // flags, but the spreading is truncated by set pixels in the nontext_map | |
| 166 // and also by the horizontal rule line limits on the blobs. | |
| 167 void ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation, const TBOX &image_box, | |
| 168 Image nontext_map); | |
| 169 // Pads the bounding box of the given blob according to whether it is on | |
| 170 // a horizontal or vertical text line, taking into account tab-stops near | |
| 171 // the blob. Returns true if padding was in the horizontal direction. | |
| 172 bool PadBlobBox(BLOBNBOX *blob, TBOX *bbox); | |
| 173 | |
| 174 // Helper denormalizes the TPOINT with the denorm if not nullptr, then | |
| 175 // converts to pix_ coordinates. | |
| 176 void TransformToPixCoords(const DENORM *denorm, TPOINT *pt) const; | |
| 177 | |
| 178 // Helper truncates the TPOINT to be within the pix_. | |
| 179 void TruncateToImageBounds(TPOINT *pt) const; | |
| 180 | |
| 181 // Transform tesseract coordinates to coordinates used in the pix. | |
| 182 int ImageXToProjectionX(int x) const; | |
| 183 int ImageYToProjectionY(int y) const; | |
| 184 | |
| 185 // The down-sampling scale factor used in building the image. | |
| 186 int scale_factor_; | |
| 187 // The blob coordinates of the top-left (origin of the pix_) in tesseract | |
| 188 // coordinates. Used to transform the bottom-up tesseract coordinates to | |
| 189 // the top-down coordinates of the pix. | |
| 190 int x_origin_; | |
| 191 int y_origin_; | |
| 192 // The image of horizontally smeared blob boxes summed to provide a | |
| 193 // textline density map. As with a horizontal projection, the map has | |
| 194 // dips in the gaps between textlines. | |
| 195 Image pix_; | |
| 196 }; | |
| 197 | |
| 198 } // namespace tesseract. | |
| 199 | |
| 200 #endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ |
