Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/strokewidth.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: strokewidth.h | |
| 3 // Description: Subclass of BBGrid to find uniformity of strokewidth. | |
| 4 // Author: Ray Smith | |
| 5 // | |
| 6 // (C) Copyright 2008, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_ | |
| 20 #define TESSERACT_TEXTORD_STROKEWIDTH_H_ | |
| 21 | |
| 22 #include "blobbox.h" // BlobNeighbourDir. | |
| 23 #include "blobgrid.h" // Base class. | |
| 24 #include "colpartitiongrid.h" | |
| 25 #include "textlineprojection.h" | |
| 26 | |
| 27 class DENORM; | |
| 28 class ScrollView; | |
| 29 class TO_BLOCK; | |
| 30 | |
| 31 namespace tesseract { | |
| 32 | |
| 33 class ColPartition_LIST; | |
| 34 class TabFind; | |
| 35 class TextlineProjection; | |
| 36 | |
| 37 // Misc enums to clarify bool arguments for direction-controlling args. | |
| 38 enum LeftOrRight { LR_LEFT, LR_RIGHT }; | |
| 39 | |
| 40 // Return value from FindInitialPartitions indicates detection of severe | |
| 41 // skew or noise. | |
| 42 enum PartitionFindResult { | |
| 43 PFR_OK, // Everything is OK. | |
| 44 PFR_SKEW, // Skew was detected and rotated. | |
| 45 PFR_NOISE // Noise was detected and removed. | |
| 46 }; | |
| 47 | |
| 48 /** | |
| 49 * The StrokeWidth class holds all the normal and large blobs. | |
| 50 * It is used to find good large blobs and move them to the normal blobs | |
| 51 * by virtue of having a reasonable strokewidth compatible neighbour. | |
| 52 */ | |
| 53 class StrokeWidth : public BlobGrid { | |
| 54 public: | |
| 55 StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright); | |
| 56 ~StrokeWidth() override; | |
| 57 | |
| 58 // Sets the neighbours member of the medium-sized blobs in the block. | |
| 59 // Searches on 4 sides of each blob for similar-sized, similar-strokewidth | |
| 60 // blobs and sets pointers to the good neighbours. | |
| 61 void SetNeighboursOnMediumBlobs(TO_BLOCK *block); | |
| 62 | |
| 63 // Sets the neighbour/textline writing direction members of the medium | |
| 64 // and large blobs with optional repair of broken CJK characters first. | |
| 65 // Repair of broken CJK is needed here because broken CJK characters | |
| 66 // can fool the textline direction detection algorithm. | |
| 67 void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, | |
| 68 TO_BLOCK *input_block); | |
| 69 | |
| 70 // To save computation, the process of generating partitions is broken | |
| 71 // into the following 4 steps: | |
| 72 // TestVerticalTextDirection | |
| 73 // CorrectForRotation (used only if a rotation is to be applied) | |
| 74 // FindLeaderPartitions | |
| 75 // GradeBlobsIntoPartitions. | |
| 76 // These functions are all required, in sequence, except for | |
| 77 // CorrectForRotation, which is not needed if no rotation is applied. | |
| 78 | |
| 79 // Types all the blobs as vertical or horizontal text or unknown and | |
| 80 // returns true if the majority are vertical. | |
| 81 // If the blobs are rotated, it is necessary to call CorrectForRotation | |
| 82 // after rotating everything, otherwise the work done here will be enough. | |
| 83 // If osd_blobs is not null, a list of blobs from the dominant textline | |
| 84 // direction are returned for use in orientation and script detection. | |
| 85 // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. | |
| 86 bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, | |
| 87 BLOBNBOX_CLIST *osd_blobs); | |
| 88 | |
| 89 // Corrects the data structures for the given rotation. | |
| 90 void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid); | |
| 91 | |
| 92 // Finds leader partitions and inserts them into the given grid. | |
| 93 void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid); | |
| 94 | |
| 95 // Finds and marks noise those blobs that look like bits of vertical lines | |
| 96 // that would otherwise screw up layout analysis. | |
| 97 void RemoveLineResidue(ColPartition_LIST *big_part_list); | |
| 98 | |
| 99 // Types all the blobs as vertical text or horizontal text or unknown and | |
| 100 // puts them into initial ColPartitions in the supplied part_grid. | |
| 101 // rerotation determines how to get back to the image coordinates from the | |
| 102 // blob coordinates (since they may have been rotated for vertical text). | |
| 103 // block is the single block for the whole page or rectangle to be OCRed. | |
| 104 // nontext_pix (full-size), is a binary mask used to prevent merges across | |
| 105 // photo/text boundaries. It is not kept beyond this function. | |
| 106 // denorm provides a mapping back to the image from the current blob | |
| 107 // coordinate space. | |
| 108 // projection provides a measure of textline density over the image and | |
| 109 // provides functions to assist with diacritic detection. It should be a | |
| 110 // pointer to a new TextlineProjection, and will be setup here. | |
| 111 // part_grid is the output grid of textline partitions. | |
| 112 // Large blobs that cause overlap are put in separate partitions and added | |
| 113 // to the big_parts list. | |
| 114 void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, | |
| 115 Image nontext_pix, const DENORM *denorm, bool cjk_script, | |
| 116 TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, | |
| 117 ColPartitionGrid *part_grid, ColPartition_LIST *big_parts); | |
| 118 | |
| 119 // Handles a click event in a display window. | |
| 120 void HandleClick(int x, int y) override; | |
| 121 | |
| 122 private: | |
| 123 // Computes the noise_density_ by summing the number of elements in a | |
| 124 // neighbourhood of each grid cell. | |
| 125 void ComputeNoiseDensity(TO_BLOCK *block, TabFind *line_grid); | |
| 126 | |
| 127 // Detects and marks leader dots/dashes. | |
| 128 // Leaders are horizontal chains of small or noise blobs that look | |
| 129 // monospace according to ColPartition::MarkAsLeaderIfMonospaced(). | |
| 130 // Detected leaders become the only occupants of the block->small_blobs list. | |
| 131 // Non-leader small blobs get moved to the blobs list. | |
| 132 // Non-leader noise blobs remain singletons in the noise list. | |
| 133 // All small and noise blobs in high density regions are marked BTFT_NONTEXT. | |
| 134 // block is the single block for the whole page or rectangle to be OCRed. | |
| 135 // leader_parts is the output. | |
| 136 void FindLeadersAndMarkNoise(TO_BLOCK *block, ColPartition_LIST *leader_parts); | |
| 137 | |
| 138 /** Inserts the block blobs (normal and large) into this grid. | |
| 139 * Blobs remain owned by the block. */ | |
| 140 void InsertBlobs(TO_BLOCK *block); | |
| 141 | |
| 142 // Fix broken CJK characters, using the fake joined blobs mechanism. | |
| 143 // Blobs are really merged, ie the master takes all the outlines and the | |
| 144 // others are deleted. | |
| 145 // Returns true if sufficient blobs are merged that it may be worth running | |
| 146 // again, due to a better estimate of character size. | |
| 147 bool FixBrokenCJK(TO_BLOCK *block); | |
| 148 | |
| 149 // Collect blobs that overlap or are within max_dist of the input bbox. | |
| 150 // Return them in the list of blobs and expand the bbox to be the union | |
| 151 // of all the boxes. not_this is excluded from the search, as are blobs | |
| 152 // that cause the merged box to exceed max_size in either dimension. | |
| 153 void AccumulateOverlaps(const BLOBNBOX *not_this, bool debug, int max_size, int max_dist, | |
| 154 TBOX *bbox, BLOBNBOX_CLIST *blobs); | |
| 155 | |
| 156 // For each blob in this grid, Finds the textline direction to be horizontal | |
| 157 // or vertical according to distance to neighbours and 1st and 2nd order | |
| 158 // neighbours. Non-text tends to end up without a definite direction. | |
| 159 // Result is setting of the neighbours and vert_possible/horz_possible | |
| 160 // flags in the BLOBNBOXes currently in this grid. | |
| 161 // This function is called more than once if page orientation is uncertain, | |
| 162 // so display_if_debugging is true on the final call to display the results. | |
| 163 void FindTextlineFlowDirection(PageSegMode pageseg_mode, bool display_if_debugging); | |
| 164 | |
| 165 // Sets the neighbours and good_stroke_neighbours members of the blob by | |
| 166 // searching close on all 4 sides. | |
| 167 // When finding leader dots/dashes, there is a slightly different rule for | |
| 168 // what makes a good neighbour. | |
| 169 // If activate_line_trap, then line-like objects are found and isolated. | |
| 170 void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX *blob); | |
| 171 | |
| 172 // Sets the good_stroke_neighbours member of the blob if it has a | |
| 173 // GoodNeighbour on the given side. | |
| 174 // Also sets the neighbour in the blob, whether or not a good one is found. | |
| 175 // Return value is the number of neighbours in the line trap size range. | |
| 176 // Leaders get extra special lenient treatment. | |
| 177 int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX *blob); | |
| 178 | |
| 179 // Makes the blob to be only horizontal or vertical where evidence | |
| 180 // is clear based on gaps of 2nd order neighbours. | |
| 181 void SetNeighbourFlows(BLOBNBOX *blob); | |
| 182 | |
| 183 // Nullify the neighbours in the wrong directions where the direction | |
| 184 // is clear-cut based on a distance margin. Good for isolating vertical | |
| 185 // text from neighbouring horizontal text. | |
| 186 void SimplifyObviousNeighbours(BLOBNBOX *blob); | |
| 187 | |
| 188 // Smoothes the vertical/horizontal type of the blob based on the | |
| 189 // 2nd-order neighbours. If reset_all is true, then all blobs are | |
| 190 // changed. Otherwise, only ambiguous blobs are processed. | |
| 191 void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate, BLOBNBOX *blob); | |
| 192 | |
| 193 // Checks the left or right side of the given leader partition and sets the | |
| 194 // (opposite) leader_on_right or leader_on_left flags for blobs | |
| 195 // that are next to the given side of the given leader partition. | |
| 196 void MarkLeaderNeighbours(const ColPartition *part, LeftOrRight side); | |
| 197 | |
| 198 // Partition creation. Accumulates vertical and horizontal text chains, | |
| 199 // puts the remaining blobs in as unknowns, and then merges/splits to | |
| 200 // minimize overlap and smoothes the types with neighbours and the color | |
| 201 // image if provided. rerotation is used to rotate the coordinate space | |
| 202 // back to the nontext_map_ image. | |
| 203 // If find_problems is true, detects possible noise pollution by the amount | |
| 204 // of partition overlap that is created by the diacritics. If excessive, the | |
| 205 // noise is separated out into diacritic blobs, and PFR_NOISE is returned. | |
| 206 // [TODO(rays): if the partition overlap is caused by heavy skew, deskews | |
| 207 // the components, saves the skew_angle and returns PFR_SKEW.] If the return | |
| 208 // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be | |
| 209 // called again after cleaning up the partly done work. | |
| 210 PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, | |
| 211 bool find_problems, TO_BLOCK *block, | |
| 212 BLOBNBOX_LIST *diacritic_blobs, | |
| 213 ColPartitionGrid *part_grid, | |
| 214 ColPartition_LIST *big_parts, FCOORD *skew_angle); | |
| 215 // Detects noise by a significant increase in partition overlap from | |
| 216 // pre_overlap to now, and removes noise from the union of all the overlapping | |
| 217 // partitions, placing the blobs in diacritic_blobs. Returns true if any noise | |
| 218 // was found and removed. | |
| 219 bool DetectAndRemoveNoise(int pre_overlap, const TBOX &grid_box, TO_BLOCK *block, | |
| 220 ColPartitionGrid *part_grid, BLOBNBOX_LIST *diacritic_blobs); | |
| 221 // Finds vertical chains of text-like blobs and puts them in ColPartitions. | |
| 222 void FindVerticalTextChains(ColPartitionGrid *part_grid); | |
| 223 // Finds horizontal chains of text-like blobs and puts them in ColPartitions. | |
| 224 void FindHorizontalTextChains(ColPartitionGrid *part_grid); | |
| 225 // Finds diacritics and saves their base character in the blob. | |
| 226 void TestDiacritics(ColPartitionGrid *part_grid, TO_BLOCK *block); | |
| 227 // Searches this grid for an appropriately close and sized neighbour of the | |
| 228 // given [small] blob. If such a blob is found, the diacritic base is saved | |
| 229 // in the blob and true is returned. | |
| 230 // The small_grid is a secondary grid that contains the small/noise objects | |
| 231 // that are not in this grid, but may be useful for determining a connection | |
| 232 // between blob and its potential base character. (See DiacriticXGapFilled.) | |
| 233 bool DiacriticBlob(BlobGrid *small_grid, BLOBNBOX *blob); | |
| 234 // Returns true if there is no gap between the base char and the diacritic | |
| 235 // bigger than a fraction of the height of the base char: | |
| 236 // Eg: line end.....' | |
| 237 // The quote is a long way from the end of the line, yet it needs to be a | |
| 238 // diacritic. To determine that the quote is not part of an image, or | |
| 239 // a different text block, we check for other marks in the gap between | |
| 240 // the base char and the diacritic. | |
| 241 // '<--Diacritic | |
| 242 // |---------| | |
| 243 // | |<-toobig-gap-> | |
| 244 // | Base |<ok gap> | |
| 245 // |---------| x<-----Dot occupying gap | |
| 246 // The grid is const really. | |
| 247 bool DiacriticXGapFilled(BlobGrid *grid, const TBOX &diacritic_box, const TBOX &base_box); | |
| 248 // Merges diacritics with the ColPartition of the base character blob. | |
| 249 void MergeDiacritics(TO_BLOCK *block, ColPartitionGrid *part_grid); | |
| 250 // Any blobs on the large_blobs list of block that are still unowned by a | |
| 251 // ColPartition, are probably drop-cap or vertically touching so the blobs | |
| 252 // are removed to the big_parts list and treated separately. | |
| 253 void RemoveLargeUnusedBlobs(TO_BLOCK *block, ColPartitionGrid *part_grid, | |
| 254 ColPartition_LIST *big_parts); | |
| 255 | |
| 256 // All remaining unused blobs are put in individual ColPartitions. | |
| 257 void PartitionRemainingBlobs(PageSegMode pageseg_mode, ColPartitionGrid *part_grid); | |
| 258 | |
| 259 // If combine, put all blobs in the cell_list into a single partition, | |
| 260 // otherwise put each one into its own partition. | |
| 261 void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine, | |
| 262 ColPartitionGrid *part_grid, BLOBNBOX_CLIST *cell_list); | |
| 263 | |
| 264 // Helper function to finish setting up a ColPartition and insert into | |
| 265 // part_grid. | |
| 266 void CompletePartition(PageSegMode pageseg_mode, ColPartition *part, ColPartitionGrid *part_grid); | |
| 267 | |
| 268 // Helper returns true if we are looking only for vertical textlines, | |
| 269 // taking into account any rotation that has been done. | |
| 270 bool FindingVerticalOnly(PageSegMode pageseg_mode) const { | |
| 271 if (rerotation_.y() == 0.0f) { | |
| 272 return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; | |
| 273 } | |
| 274 return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; | |
| 275 } | |
| 276 // Helper returns true if we are looking only for horizontal textlines, | |
| 277 // taking into account any rotation that has been done. | |
| 278 bool FindingHorizontalOnly(PageSegMode pageseg_mode) const { | |
| 279 if (rerotation_.y() == 0.0f) { | |
| 280 return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; | |
| 281 } | |
| 282 return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; | |
| 283 } | |
| 284 | |
| 285 // Merge partitions where the merge appears harmless. | |
| 286 void EasyMerges(ColPartitionGrid *part_grid); | |
| 287 | |
| 288 // Compute a search box based on the orientation of the partition. | |
| 289 // Returns true if a suitable box can be calculated. | |
| 290 // Callback for EasyMerges. | |
| 291 bool OrientationSearchBox(ColPartition *part, TBOX *box); | |
| 292 | |
| 293 // Merge confirmation callback for EasyMerges. | |
| 294 bool ConfirmEasyMerge(const ColPartition *p1, const ColPartition *p2); | |
| 295 | |
| 296 // Returns true if there is no significant noise in between the boxes. | |
| 297 bool NoNoiseInBetween(const TBOX &box1, const TBOX &box2) const; | |
| 298 | |
| 299 #ifndef GRAPHICS_DISABLED | |
| 300 // Displays the blobs colored according to the number of good neighbours | |
| 301 // and the vertical/horizontal flow. | |
| 302 ScrollView *DisplayGoodBlobs(const char *window_name, int x, int y); | |
| 303 | |
| 304 // Displays blobs colored according to whether or not they are diacritics. | |
| 305 ScrollView *DisplayDiacritics(const char *window_name, int x, int y, TO_BLOCK *block); | |
| 306 #endif | |
| 307 | |
| 308 private: | |
| 309 // Image map of photo/noise areas on the page. Borrowed pointer (not owned.) | |
| 310 Image nontext_map_; | |
| 311 // Textline projection map. Borrowed pointer. | |
| 312 TextlineProjection *projection_; | |
| 313 // DENORM used by projection_ to get back to image coords. Borrowed pointer. | |
| 314 const DENORM *denorm_; | |
| 315 // Bounding box of the grid. | |
| 316 TBOX grid_box_; | |
| 317 // Rerotation to get back to the original image. | |
| 318 FCOORD rerotation_; | |
| 319 #ifndef GRAPHICS_DISABLED | |
| 320 // Windows for debug display. | |
| 321 ScrollView *leaders_win_ = nullptr; | |
| 322 ScrollView *initial_widths_win_ = nullptr; | |
| 323 ScrollView *widths_win_ = nullptr; | |
| 324 ScrollView *chains_win_ = nullptr; | |
| 325 ScrollView *diacritics_win_ = nullptr; | |
| 326 ScrollView *textlines_win_ = nullptr; | |
| 327 ScrollView *smoothed_win_ = nullptr; | |
| 328 #endif | |
| 329 }; | |
| 330 | |
| 331 } // namespace tesseract. | |
| 332 | |
| 333 #endif // TESSERACT_TEXTORD_STROKEWIDTH_H_ |
