Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccmain/thresholder.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: thresholder.h | |
| 3 // Description: Base API for thresholding images in tesseract. | |
| 4 // Author: Ray Smith | |
| 5 // | |
| 6 // (C) Copyright 2008, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 #ifndef TESSERACT_CCMAIN_THRESHOLDER_H_ | |
| 20 #define TESSERACT_CCMAIN_THRESHOLDER_H_ | |
| 21 | |
| 22 #include <tesseract/export.h> | |
| 23 | |
| 24 #include <vector> // for std::vector | |
| 25 | |
| 26 struct Pix; | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 enum class ThresholdMethod { | |
| 31 Otsu, // Tesseract's legacy Otsu | |
| 32 LeptonicaOtsu, // Leptonica's Otsu | |
| 33 Sauvola, // Leptonica's Sauvola | |
| 34 Max, // Number of Thresholding methods | |
| 35 }; | |
| 36 | |
| 37 class TessBaseAPI; | |
| 38 | |
| 39 /// Base class for all tesseract image thresholding classes. | |
| 40 /// Specific classes can add new thresholding methods by | |
| 41 /// overriding ThresholdToPix. | |
| 42 /// Each instance deals with a single image, but the design is intended to | |
| 43 /// be useful for multiple calls to SetRectangle and ThresholdTo* if | |
| 44 /// desired. | |
| 45 class TESS_API ImageThresholder { | |
| 46 public: | |
| 47 ImageThresholder(); | |
| 48 virtual ~ImageThresholder(); | |
| 49 | |
| 50 /// Destroy the Pix if there is one, freeing memory. | |
| 51 virtual void Clear(); | |
| 52 | |
| 53 /// Return true if no image has been set. | |
| 54 bool IsEmpty() const; | |
| 55 | |
| 56 /// SetImage makes a copy of all the image data, so it may be deleted | |
| 57 /// immediately after this call. | |
| 58 /// Greyscale of 8 and color of 24 or 32 bits per pixel may be given. | |
| 59 /// Palette color images will not work properly and must be converted to | |
| 60 /// 24 bit. | |
| 61 /// Binary images of 1 bit per pixel may also be given but they must be | |
| 62 /// byte packed with the MSB of the first byte being the first pixel, and a | |
| 63 /// one pixel is WHITE. For binary images set bytes_per_pixel=0. | |
| 64 void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, | |
| 65 int bytes_per_line); | |
| 66 | |
| 67 /// Store the coordinates of the rectangle to process for later use. | |
| 68 /// Doesn't actually do any thresholding. | |
| 69 void SetRectangle(int left, int top, int width, int height); | |
| 70 | |
| 71 /// Get enough parameters to be able to rebuild bounding boxes in the | |
| 72 /// original image (not just within the rectangle). | |
| 73 /// Left and top are enough with top-down coordinates, but | |
| 74 /// the height of the rectangle and the image are needed for bottom-up. | |
| 75 virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, | |
| 76 int *imageheight); | |
| 77 | |
| 78 /// Return true if the source image is color. | |
| 79 bool IsColor() const { | |
| 80 return pix_channels_ >= 3; | |
| 81 } | |
| 82 | |
| 83 /// Returns true if the source image is binary. | |
| 84 bool IsBinary() const { | |
| 85 return pix_channels_ == 0; | |
| 86 } | |
| 87 | |
| 88 int GetScaleFactor() const { | |
| 89 return scale_; | |
| 90 } | |
| 91 | |
| 92 // Set the resolution of the source image in pixels per inch. | |
| 93 // This should be called right after SetImage(), and will let us return | |
| 94 // appropriate font sizes for the text. | |
| 95 void SetSourceYResolution(int ppi) { | |
| 96 yres_ = ppi; | |
| 97 estimated_res_ = ppi; | |
| 98 } | |
| 99 int GetSourceYResolution() const { | |
| 100 return yres_; | |
| 101 } | |
| 102 int GetScaledYResolution() const { | |
| 103 return scale_ * yres_; | |
| 104 } | |
| 105 // Set the resolution of the source image in pixels per inch, as estimated | |
| 106 // by the thresholder from the text size found during thresholding. | |
| 107 // This value will be used to set internal size thresholds during recognition | |
| 108 // and will not influence the output "point size." The default value is | |
| 109 // the same as the source resolution. (yres_) | |
| 110 void SetEstimatedResolution(int ppi) { | |
| 111 estimated_res_ = ppi; | |
| 112 } | |
| 113 // Returns the estimated resolution, including any active scaling. | |
| 114 // This value will be used to set internal size thresholds during recognition. | |
| 115 int GetScaledEstimatedResolution() const { | |
| 116 return scale_ * estimated_res_; | |
| 117 } | |
| 118 | |
| 119 /// Pix vs raw, which to use? Pix is the preferred input for efficiency, | |
| 120 /// since raw buffers are copied. | |
| 121 /// SetImage for Pix clones its input, so the source pix may be pixDestroyed | |
| 122 /// immediately after, but may not go away until after the Thresholder has | |
| 123 /// finished with it. | |
| 124 void SetImage(const Image pix); | |
| 125 | |
| 126 /// Threshold the source image as efficiently as possible to the output Pix. | |
| 127 /// Creates a Pix and sets pix to point to the resulting pointer. | |
| 128 /// Caller must use pixDestroy to free the created Pix. | |
| 129 /// Returns false on error. | |
| 130 virtual bool ThresholdToPix(Image *pix); | |
| 131 | |
| 132 virtual std::tuple<bool, Image, Image, Image> Threshold(TessBaseAPI *api, | |
| 133 ThresholdMethod method); | |
| 134 | |
| 135 // Gets a pix that contains an 8 bit threshold value at each pixel. The | |
| 136 // returned pix may be an integer reduction of the binary image such that | |
| 137 // the scale factor may be inferred from the ratio of the sizes, even down | |
| 138 // to the extreme of a 1x1 pixel thresholds image. | |
| 139 // Ideally the 8 bit threshold should be the exact threshold used to generate | |
| 140 // the binary image in ThresholdToPix, but this is not a hard constraint. | |
| 141 // Returns nullptr if the input is binary. PixDestroy after use. | |
| 142 virtual Image GetPixRectThresholds(); | |
| 143 | |
| 144 /// Get a clone/copy of the source image rectangle. | |
| 145 /// The returned Pix must be pixDestroyed. | |
| 146 /// This function will be used in the future by the page layout analysis, and | |
| 147 /// the layout analysis that uses it will only be available with Leptonica, | |
| 148 /// so there is no raw equivalent. | |
| 149 Image GetPixRect(); | |
| 150 | |
| 151 // Get a clone/copy of the source image rectangle, reduced to greyscale, | |
| 152 // and at the same resolution as the output binary. | |
| 153 // The returned Pix must be pixDestroyed. | |
| 154 // Provided to the classifier to extract features from the greyscale image. | |
| 155 virtual Image GetPixRectGrey(); | |
| 156 | |
| 157 protected: | |
| 158 // ---------------------------------------------------------------------- | |
| 159 // Utility functions that may be useful components for other thresholders. | |
| 160 | |
| 161 /// Common initialization shared between SetImage methods. | |
| 162 virtual void Init(); | |
| 163 | |
| 164 /// Return true if we are processing the full image. | |
| 165 bool IsFullImage() const { | |
| 166 return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ && | |
| 167 rect_height_ == image_height_; | |
| 168 } | |
| 169 | |
| 170 // Otsu thresholds the rectangle, taking the rectangle from *this. | |
| 171 void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const; | |
| 172 | |
| 173 /// Threshold the rectangle, taking everything except the src_pix | |
| 174 /// from the class, using thresholds/hi_values to the output pix. | |
| 175 /// NOTE that num_channels is the size of the thresholds and hi_values | |
| 176 // arrays and also the bytes per pixel in src_pix. | |
| 177 void ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds, | |
| 178 const std::vector <int> &hi_values, Image *pix) const; | |
| 179 | |
| 180 protected: | |
| 181 /// Clone or other copy of the source Pix. | |
| 182 /// The pix will always be PixDestroy()ed on destruction of the class. | |
| 183 Image pix_; | |
| 184 | |
| 185 int image_width_; ///< Width of source pix_. | |
| 186 int image_height_; ///< Height of source pix_. | |
| 187 int pix_channels_; ///< Number of 8-bit channels in pix_. | |
| 188 int pix_wpl_; ///< Words per line of pix_. | |
| 189 // Limits of image rectangle to be processed. | |
| 190 int scale_; ///< Scale factor from original image. | |
| 191 int yres_; ///< y pixels/inch in source image. | |
| 192 int estimated_res_; ///< Resolution estimate from text size. | |
| 193 int rect_left_; | |
| 194 int rect_top_; | |
| 195 int rect_width_; | |
| 196 int rect_height_; | |
| 197 }; | |
| 198 | |
| 199 } // namespace tesseract. | |
| 200 | |
| 201 #endif // TESSERACT_CCMAIN_THRESHOLDER_H_ |
