comparison mupdf-source/thirdparty/tesseract/src/textord/devanagari_processing.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: shobhitsaxena@google.com (Shobhit Saxena)
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12
13 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15
16 #include <allheaders.h>
17 #include "ocrblock.h"
18 #include "params.h"
19
20 struct Pix;
21 struct Box;
22 struct Boxa;
23
24 namespace tesseract {
25
26 extern INT_VAR_H(devanagari_split_debuglevel);
27
28 extern BOOL_VAR_H(devanagari_split_debugimage);
29
30 class TBOX;
31 class DebugPixa;
32
33 class PixelHistogram {
34 public:
35 PixelHistogram() {
36 hist_ = nullptr;
37 length_ = 0;
38 }
39
40 ~PixelHistogram() {
41 Clear();
42 }
43
44 void Clear() {
45 delete[] hist_;
46 length_ = 0;
47 }
48
49 int *hist() const {
50 return hist_;
51 }
52
53 int length() const {
54 return length_;
55 }
56
57 // Methods to construct histograms from images. These clear any existing data.
58 void ConstructVerticalCountHist(Image pix);
59 void ConstructHorizontalCountHist(Image pix);
60
61 // This method returns the global-maxima for the histogram. The frequency of
62 // the global maxima is returned in count, if specified.
63 int GetHistogramMaximum(int *count) const;
64
65 private:
66 int *hist_;
67 int length_;
68 };
69
70 class ShiroRekhaSplitter {
71 public:
72 enum SplitStrategy {
73 NO_SPLIT = 0, // No splitting is performed for the phase.
74 MINIMAL_SPLIT, // Blobs are split minimally.
75 MAXIMAL_SPLIT // Blobs are split maximally.
76 };
77
78 ShiroRekhaSplitter();
79 virtual ~ShiroRekhaSplitter();
80
81 // Top-level method to perform splitting based on current settings.
82 // Returns true if a split was actually performed.
83 // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
84 // splitting. If false, the ocr_split_strategy_ is used.
85 bool Split(bool split_for_pageseg, DebugPixa *pixa_debug);
86
87 // Clears the memory held by this object.
88 void Clear();
89
90 // Refreshes the words in the segmentation block list by using blobs in the
91 // input blob list.
92 // The segmentation block list must be set.
93 void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs);
94
95 // Returns true if the split strategies for pageseg and ocr are different.
96 bool HasDifferentSplitStrategies() const {
97 return pageseg_split_strategy_ != ocr_split_strategy_;
98 }
99
100 // This only keeps a copy of the block list pointer. At split call, the list
101 // object should still be alive. This block list is used as a golden
102 // segmentation when performing splitting.
103 void set_segmentation_block_list(BLOCK_LIST *block_list) {
104 segmentation_block_list_ = block_list;
105 }
106
107 static const int kUnspecifiedXheight = -1;
108
109 void set_global_xheight(int xheight) {
110 global_xheight_ = xheight;
111 }
112
113 void set_perform_close(bool perform) {
114 perform_close_ = perform;
115 }
116
117 // Returns the image obtained from shiro-rekha splitting. The returned object
118 // is owned by this class. Callers may want to clone the returned pix to keep
119 // it alive beyond the life of ShiroRekhaSplitter object.
120 Image splitted_image() {
121 return splitted_image_;
122 }
123
124 // On setting the input image, a clone of it is owned by this class.
125 void set_orig_pix(Image pix);
126
127 // Returns the input image provided to the object. This object is owned by
128 // this class. Callers may want to clone the returned pix to work with it.
129 Image orig_pix() {
130 return orig_pix_;
131 }
132
133 SplitStrategy ocr_split_strategy() const {
134 return ocr_split_strategy_;
135 }
136
137 void set_ocr_split_strategy(SplitStrategy strategy) {
138 ocr_split_strategy_ = strategy;
139 }
140
141 SplitStrategy pageseg_split_strategy() const {
142 return pageseg_split_strategy_;
143 }
144
145 void set_pageseg_split_strategy(SplitStrategy strategy) {
146 pageseg_split_strategy_ = strategy;
147 }
148
149 BLOCK_LIST *segmentation_block_list() {
150 return segmentation_block_list_;
151 }
152
153 // This method returns the computed mode-height of blobs in the pix.
154 // It also prunes very small blobs from calculation. Could be used to provide
155 // a global xheight estimate for images which have the same point-size text.
156 static int GetModeHeight(Image pix);
157
158 private:
159 // Method to perform a close operation on the input image. The xheight
160 // estimate decides the size of sel used.
161 static void PerformClose(Image pix, int xheight_estimate);
162
163 // This method resolves the cc bbox to a particular row and returns the row's
164 // xheight. This uses block_list_ if available, else just returns the
165 // global_xheight_ estimate currently set in the object.
166 int GetXheightForCC(Box *cc_bbox);
167
168 // Returns a list of regions (boxes) which should be cleared in the original
169 // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
170 // (or less) word only. Xheight measure could be the global estimate, the row
171 // estimate, or unspecified. If unspecified, over splitting may occur, since a
172 // conservative estimate of stroke width along with an associated multiplier
173 // is used in its place. It is advisable to have a specified xheight when
174 // splitting for classification/training.
175 void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,
176 int word_top, Boxa *regions_to_clear);
177
178 // Returns a new box object for the corresponding TBOX, based on the original
179 // image's coordinate system.
180 Box *GetBoxForTBOX(const TBOX &tbox) const;
181
182 // This method returns y-extents of the shiro-rekha computed from the input
183 // word image.
184 static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom,
185 int *shirorekha_ylevel);
186
187 Image orig_pix_; // Just a clone of the input image passed.
188 Image splitted_image_; // Image produced after the last splitting round. The
189 // object is owned by this class.
190 SplitStrategy pageseg_split_strategy_;
191 SplitStrategy ocr_split_strategy_;
192 Image debug_image_;
193 // This block list is used as a golden segmentation when performing splitting.
194 BLOCK_LIST *segmentation_block_list_;
195 int global_xheight_;
196 bool perform_close_; // Whether a morphological close operation should be
197 // performed before CCs are run through splitting.
198 };
199
200 } // namespace tesseract.
201
202 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_