Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/colpartitionset.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: colpartitionset.h | |
| 3 // Description: Class to hold a list of ColPartitions of the page that | |
| 4 // correspond roughly to columns. | |
| 5 // Author: Ray Smith | |
| 6 // | |
| 7 // (C) Copyright 2008, Google Inc. | |
| 8 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 // you may not use this file except in compliance with the License. | |
| 10 // You may obtain a copy of the License at | |
| 11 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 // Unless required by applicable law or agreed to in writing, software | |
| 13 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 // See the License for the specific language governing permissions and | |
| 16 // limitations under the License. | |
| 17 // | |
| 18 /////////////////////////////////////////////////////////////////////// | |
| 19 | |
| 20 #ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_ | |
| 21 #define TESSERACT_TEXTORD_COLPARTITIONSET_H_ | |
| 22 | |
| 23 #include "colpartition.h" // For ColPartition_LIST. | |
| 24 #include "rect.h" // For TBOX. | |
| 25 #include "tabvector.h" // For BLOBNBOX_CLIST. | |
| 26 | |
| 27 namespace tesseract { | |
| 28 | |
| 29 class WorkingPartSet_LIST; | |
| 30 class ColSegment_LIST; | |
| 31 class ColPartitionSet; | |
| 32 using PartSetVector = std::vector<ColPartitionSet *>; | |
| 33 | |
| 34 // ColPartitionSet is a class that holds a list of ColPartitions. | |
| 35 // Its main use is in holding a candidate partitioning of the width of the | |
| 36 // image into columns, where each member ColPartition is a single column. | |
| 37 // ColPartitionSets are used in building the column layout of a page. | |
| 38 class ColPartitionSet : public ELIST_LINK { | |
| 39 public: | |
| 40 ColPartitionSet() = default; | |
| 41 explicit ColPartitionSet(ColPartition_LIST *partitions); | |
| 42 explicit ColPartitionSet(ColPartition *partition); | |
| 43 | |
| 44 ~ColPartitionSet() = default; | |
| 45 | |
| 46 // Simple accessors. | |
| 47 const TBOX &bounding_box() const { | |
| 48 return bounding_box_; | |
| 49 } | |
| 50 bool Empty() const { | |
| 51 return parts_.empty(); | |
| 52 } | |
| 53 int ColumnCount() const { | |
| 54 return parts_.length(); | |
| 55 } | |
| 56 | |
| 57 // Returns the number of columns of good width. | |
| 58 int GoodColumnCount() const; | |
| 59 | |
| 60 // Return an element of the parts_ list from its index. | |
| 61 ColPartition *GetColumnByIndex(int index); | |
| 62 | |
| 63 // Return the ColPartition that contains the given coords, if any, else | |
| 64 // nullptr. | |
| 65 ColPartition *ColumnContaining(int x, int y); | |
| 66 | |
| 67 // Return the bounding boxes of columns at the given y-range | |
| 68 void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments); | |
| 69 | |
| 70 // Extract all the parts from the list, relinquishing ownership. | |
| 71 void RelinquishParts(); | |
| 72 | |
| 73 // Attempt to improve this by adding partitions or expanding partitions. | |
| 74 void ImproveColumnCandidate(const WidthCallback &cb, PartSetVector *src_sets); | |
| 75 | |
| 76 // If this set is good enough to represent a new partitioning into columns, | |
| 77 // add it to the vector of sets, otherwise delete it. | |
| 78 void AddToColumnSetsIfUnique(PartSetVector *column_sets, | |
| 79 const WidthCallback &cb); | |
| 80 | |
| 81 // Return true if the partitions in other are all compatible with the columns | |
| 82 // in this. | |
| 83 bool CompatibleColumns(bool debug, ColPartitionSet *other, | |
| 84 const WidthCallback &cb); | |
| 85 | |
| 86 // Returns the total width of all blobs in the part_set that do not lie | |
| 87 // within an approved column. Used as a cost measure for using this | |
| 88 // column set over another that might be compatible. | |
| 89 int UnmatchedWidth(ColPartitionSet *part_set); | |
| 90 | |
| 91 // Return true if this ColPartitionSet makes a legal column candidate by | |
| 92 // having legal individual partitions and non-overlapping adjacent pairs. | |
| 93 bool LegalColumnCandidate(); | |
| 94 | |
| 95 // Return a copy of this. If good_only will only copy the Good ColPartitions. | |
| 96 ColPartitionSet *Copy(bool good_only); | |
| 97 | |
| 98 // Display the edges of the columns at the given y coords. | |
| 99 void DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win); | |
| 100 | |
| 101 // Return the ColumnSpanningType that best explains the columns overlapped | |
| 102 // by the given coords(left,right,y), with the given margins. | |
| 103 // Also return the first and last column index touched by the coords and | |
| 104 // the leftmost spanned column. | |
| 105 // Column indices are 2n + 1 for real columns (0 based) and even values | |
| 106 // represent the gaps in between columns, with 0 being left of the leftmost. | |
| 107 // resolution refers to the ppi resolution of the image. It may be 0 if only | |
| 108 // the first_col and last_col are required. | |
| 109 ColumnSpanningType SpanningType(int resolution, int left, int right, | |
| 110 int height, int y, int left_margin, | |
| 111 int right_margin, int *first_col, | |
| 112 int *last_col, int *first_spanned_col); | |
| 113 | |
| 114 // The column_set has changed. Close down all in-progress WorkingPartSets in | |
| 115 // columns that do not match and start new ones for the new columns in this. | |
| 116 // As ColPartitions are turned into BLOCKs, the used ones are put in | |
| 117 // used_parts, as they still need to be referenced in the grid. | |
| 118 void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, | |
| 119 int resolution, ColPartition_LIST *used_parts, | |
| 120 WorkingPartSet_LIST *working_set); | |
| 121 | |
| 122 // Accumulate the widths and gaps into the given variables. | |
| 123 void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, | |
| 124 int *total_gap, int *gap_samples); | |
| 125 | |
| 126 // Provide debug output for this ColPartitionSet and all the ColPartitions. | |
| 127 void Print(); | |
| 128 | |
| 129 private: | |
| 130 // Add the given partition to the list in the appropriate place. | |
| 131 void AddPartition(ColPartition *new_part, ColPartition_IT *it); | |
| 132 | |
| 133 // Compute the coverage and good column count. Coverage is the amount of the | |
| 134 // width of the page (in pixels) that is covered by ColPartitions, which are | |
| 135 // used to provide candidate column layouts. | |
| 136 // Coverage is split into good and bad. Good coverage is provided by | |
| 137 // ColPartitions of a frequent width (according to the callback function | |
| 138 // provided by TabFinder::WidthCB, which accesses stored statistics on the | |
| 139 // widths of ColPartitions) and bad coverage is provided by all other | |
| 140 // ColPartitions, even if they have tab vectors at both sides. Thus: | |
| 141 // |-----------------------------------------------------------------| | |
| 142 // | Double width heading | | |
| 143 // |-----------------------------------------------------------------| | |
| 144 // |-------------------------------| |-------------------------------| | |
| 145 // | Common width ColPartition | | Common width ColPartition | | |
| 146 // |-------------------------------| |-------------------------------| | |
| 147 // the layout with two common-width columns has better coverage than the | |
| 148 // double width heading, because the coverage is "good," even though less in | |
| 149 // total coverage than the heading, because the heading coverage is "bad." | |
| 150 void ComputeCoverage(); | |
| 151 | |
| 152 // Adds the coverage, column count and box for a single partition, | |
| 153 // without adding it to the list. (Helper factored from ComputeCoverage.) | |
| 154 void AddPartitionCoverageAndBox(const ColPartition &part); | |
| 155 | |
| 156 // The partitions in this column candidate. | |
| 157 ColPartition_LIST parts_; | |
| 158 // The number of partitions that have a frequent column width. | |
| 159 int good_column_count_; | |
| 160 // Total width of all the good ColPartitions. | |
| 161 int good_coverage_; | |
| 162 // Total width of all the bad ColPartitions. | |
| 163 int bad_coverage_; | |
| 164 // Bounding box of all partitions in the set. | |
| 165 TBOX bounding_box_; | |
| 166 }; | |
| 167 | |
| 168 ELISTIZEH(ColPartitionSet) | |
| 169 | |
| 170 } // namespace tesseract. | |
| 171 | |
| 172 #endif // TESSERACT_TEXTORD_COLPARTITION_H_ |
