comparison mupdf-source/thirdparty/tesseract/src/ccmain/thresholder.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: thresholder.h
3 // Description: Base API for thresholding images in tesseract.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2008, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 #ifndef TESSERACT_CCMAIN_THRESHOLDER_H_
20 #define TESSERACT_CCMAIN_THRESHOLDER_H_
21
22 #include <tesseract/export.h>
23
24 #include <vector> // for std::vector
25
26 struct Pix;
27
28 namespace tesseract {
29
30 enum class ThresholdMethod {
31 Otsu, // Tesseract's legacy Otsu
32 LeptonicaOtsu, // Leptonica's Otsu
33 Sauvola, // Leptonica's Sauvola
34 Max, // Number of Thresholding methods
35 };
36
37 class TessBaseAPI;
38
39 /// Base class for all tesseract image thresholding classes.
40 /// Specific classes can add new thresholding methods by
41 /// overriding ThresholdToPix.
42 /// Each instance deals with a single image, but the design is intended to
43 /// be useful for multiple calls to SetRectangle and ThresholdTo* if
44 /// desired.
45 class TESS_API ImageThresholder {
46 public:
47 ImageThresholder();
48 virtual ~ImageThresholder();
49
50 /// Destroy the Pix if there is one, freeing memory.
51 virtual void Clear();
52
53 /// Return true if no image has been set.
54 bool IsEmpty() const;
55
56 /// SetImage makes a copy of all the image data, so it may be deleted
57 /// immediately after this call.
58 /// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
59 /// Palette color images will not work properly and must be converted to
60 /// 24 bit.
61 /// Binary images of 1 bit per pixel may also be given but they must be
62 /// byte packed with the MSB of the first byte being the first pixel, and a
63 /// one pixel is WHITE. For binary images set bytes_per_pixel=0.
64 void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel,
65 int bytes_per_line);
66
67 /// Store the coordinates of the rectangle to process for later use.
68 /// Doesn't actually do any thresholding.
69 void SetRectangle(int left, int top, int width, int height);
70
71 /// Get enough parameters to be able to rebuild bounding boxes in the
72 /// original image (not just within the rectangle).
73 /// Left and top are enough with top-down coordinates, but
74 /// the height of the rectangle and the image are needed for bottom-up.
75 virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth,
76 int *imageheight);
77
78 /// Return true if the source image is color.
79 bool IsColor() const {
80 return pix_channels_ >= 3;
81 }
82
83 /// Returns true if the source image is binary.
84 bool IsBinary() const {
85 return pix_channels_ == 0;
86 }
87
88 int GetScaleFactor() const {
89 return scale_;
90 }
91
92 // Set the resolution of the source image in pixels per inch.
93 // This should be called right after SetImage(), and will let us return
94 // appropriate font sizes for the text.
95 void SetSourceYResolution(int ppi) {
96 yres_ = ppi;
97 estimated_res_ = ppi;
98 }
99 int GetSourceYResolution() const {
100 return yres_;
101 }
102 int GetScaledYResolution() const {
103 return scale_ * yres_;
104 }
105 // Set the resolution of the source image in pixels per inch, as estimated
106 // by the thresholder from the text size found during thresholding.
107 // This value will be used to set internal size thresholds during recognition
108 // and will not influence the output "point size." The default value is
109 // the same as the source resolution. (yres_)
110 void SetEstimatedResolution(int ppi) {
111 estimated_res_ = ppi;
112 }
113 // Returns the estimated resolution, including any active scaling.
114 // This value will be used to set internal size thresholds during recognition.
115 int GetScaledEstimatedResolution() const {
116 return scale_ * estimated_res_;
117 }
118
119 /// Pix vs raw, which to use? Pix is the preferred input for efficiency,
120 /// since raw buffers are copied.
121 /// SetImage for Pix clones its input, so the source pix may be pixDestroyed
122 /// immediately after, but may not go away until after the Thresholder has
123 /// finished with it.
124 void SetImage(const Image pix);
125
126 /// Threshold the source image as efficiently as possible to the output Pix.
127 /// Creates a Pix and sets pix to point to the resulting pointer.
128 /// Caller must use pixDestroy to free the created Pix.
129 /// Returns false on error.
130 virtual bool ThresholdToPix(Image *pix);
131
132 virtual std::tuple<bool, Image, Image, Image> Threshold(TessBaseAPI *api,
133 ThresholdMethod method);
134
135 // Gets a pix that contains an 8 bit threshold value at each pixel. The
136 // returned pix may be an integer reduction of the binary image such that
137 // the scale factor may be inferred from the ratio of the sizes, even down
138 // to the extreme of a 1x1 pixel thresholds image.
139 // Ideally the 8 bit threshold should be the exact threshold used to generate
140 // the binary image in ThresholdToPix, but this is not a hard constraint.
141 // Returns nullptr if the input is binary. PixDestroy after use.
142 virtual Image GetPixRectThresholds();
143
144 /// Get a clone/copy of the source image rectangle.
145 /// The returned Pix must be pixDestroyed.
146 /// This function will be used in the future by the page layout analysis, and
147 /// the layout analysis that uses it will only be available with Leptonica,
148 /// so there is no raw equivalent.
149 Image GetPixRect();
150
151 // Get a clone/copy of the source image rectangle, reduced to greyscale,
152 // and at the same resolution as the output binary.
153 // The returned Pix must be pixDestroyed.
154 // Provided to the classifier to extract features from the greyscale image.
155 virtual Image GetPixRectGrey();
156
157 protected:
158 // ----------------------------------------------------------------------
159 // Utility functions that may be useful components for other thresholders.
160
161 /// Common initialization shared between SetImage methods.
162 virtual void Init();
163
164 /// Return true if we are processing the full image.
165 bool IsFullImage() const {
166 return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ &&
167 rect_height_ == image_height_;
168 }
169
170 // Otsu thresholds the rectangle, taking the rectangle from *this.
171 void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const;
172
173 /// Threshold the rectangle, taking everything except the src_pix
174 /// from the class, using thresholds/hi_values to the output pix.
175 /// NOTE that num_channels is the size of the thresholds and hi_values
176 // arrays and also the bytes per pixel in src_pix.
177 void ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
178 const std::vector <int> &hi_values, Image *pix) const;
179
180 protected:
181 /// Clone or other copy of the source Pix.
182 /// The pix will always be PixDestroy()ed on destruction of the class.
183 Image pix_;
184
185 int image_width_; ///< Width of source pix_.
186 int image_height_; ///< Height of source pix_.
187 int pix_channels_; ///< Number of 8-bit channels in pix_.
188 int pix_wpl_; ///< Words per line of pix_.
189 // Limits of image rectangle to be processed.
190 int scale_; ///< Scale factor from original image.
191 int yres_; ///< y pixels/inch in source image.
192 int estimated_res_; ///< Resolution estimate from text size.
193 int rect_left_;
194 int rect_top_;
195 int rect_width_;
196 int rect_height_;
197 };
198
199 } // namespace tesseract.
200
201 #endif // TESSERACT_CCMAIN_THRESHOLDER_H_