Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/ccmain/thresholder.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: thresholder.cpp | |
| 3 // Description: Base API for thresholding images in tesseract. | |
| 4 // Author: Ray Smith | |
| 5 // | |
| 6 // (C) Copyright 2008, Google Inc. | |
| 7 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 // you may not use this file except in compliance with the License. | |
| 9 // You may obtain a copy of the License at | |
| 10 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 // Unless required by applicable law or agreed to in writing, software | |
| 12 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 // See the License for the specific language governing permissions and | |
| 15 // limitations under the License. | |
| 16 // | |
| 17 /////////////////////////////////////////////////////////////////////// | |
| 18 | |
| 19 // Include automatically generated configuration file | |
| 20 #ifdef HAVE_CONFIG_H | |
| 21 # include "config_auto.h" | |
| 22 #endif | |
| 23 | |
| 24 #include "otsuthr.h" | |
| 25 #include "thresholder.h" | |
| 26 #include "tprintf.h" // for tprintf | |
| 27 | |
| 28 #include <allheaders.h> | |
| 29 #include <tesseract/baseapi.h> // for api->GetIntVariable() | |
| 30 | |
| 31 #include <algorithm> // for std::max, std::min | |
| 32 #include <cstdint> // for uint32_t | |
| 33 #include <cstring> | |
| 34 #include <tuple> | |
| 35 | |
| 36 namespace tesseract { | |
| 37 | |
| 38 ImageThresholder::ImageThresholder() | |
| 39 : pix_(nullptr) | |
| 40 , image_width_(0) | |
| 41 , image_height_(0) | |
| 42 , pix_channels_(0) | |
| 43 , pix_wpl_(0) | |
| 44 , scale_(1) | |
| 45 , yres_(300) | |
| 46 , estimated_res_(300) { | |
| 47 SetRectangle(0, 0, 0, 0); | |
| 48 } | |
| 49 | |
| 50 ImageThresholder::~ImageThresholder() { | |
| 51 Clear(); | |
| 52 } | |
| 53 | |
| 54 // Destroy the Pix if there is one, freeing memory. | |
| 55 void ImageThresholder::Clear() { | |
| 56 pix_.destroy(); | |
| 57 } | |
| 58 | |
| 59 // Return true if no image has been set. | |
| 60 bool ImageThresholder::IsEmpty() const { | |
| 61 return pix_ == nullptr; | |
| 62 } | |
| 63 | |
| 64 // SetImage makes a copy of all the image data, so it may be deleted | |
| 65 // immediately after this call. | |
| 66 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. | |
| 67 // Palette color images will not work properly and must be converted to | |
| 68 // 24 bit. | |
| 69 // Binary images of 1 bit per pixel may also be given but they must be | |
| 70 // byte packed with the MSB of the first byte being the first pixel, and a | |
| 71 // one pixel is WHITE. For binary images set bytes_per_pixel=0. | |
| 72 void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height, | |
| 73 int bytes_per_pixel, int bytes_per_line) { | |
| 74 int bpp = bytes_per_pixel * 8; | |
| 75 if (bpp == 0) { | |
| 76 bpp = 1; | |
| 77 } | |
| 78 Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); | |
| 79 l_uint32 *data = pixGetData(pix); | |
| 80 int wpl = pixGetWpl(pix); | |
| 81 switch (bpp) { | |
| 82 case 1: | |
| 83 for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { | |
| 84 for (int x = 0; x < width; ++x) { | |
| 85 if (imagedata[x / 8] & (0x80 >> (x % 8))) { | |
| 86 CLEAR_DATA_BIT(data, x); | |
| 87 } else { | |
| 88 SET_DATA_BIT(data, x); | |
| 89 } | |
| 90 } | |
| 91 } | |
| 92 break; | |
| 93 | |
| 94 case 8: | |
| 95 // Greyscale just copies the bytes in the right order. | |
| 96 for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { | |
| 97 for (int x = 0; x < width; ++x) { | |
| 98 SET_DATA_BYTE(data, x, imagedata[x]); | |
| 99 } | |
| 100 } | |
| 101 break; | |
| 102 | |
| 103 case 24: | |
| 104 // Put the colors in the correct places in the line buffer. | |
| 105 for (int y = 0; y < height; ++y, imagedata += bytes_per_line) { | |
| 106 for (int x = 0; x < width; ++x, ++data) { | |
| 107 SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]); | |
| 108 SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]); | |
| 109 SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]); | |
| 110 } | |
| 111 } | |
| 112 break; | |
| 113 | |
| 114 case 32: | |
| 115 // Maintain byte order consistency across different endianness. | |
| 116 for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) { | |
| 117 for (int x = 0; x < width; ++x) { | |
| 118 data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) | | |
| 119 (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]; | |
| 120 } | |
| 121 } | |
| 122 break; | |
| 123 | |
| 124 default: | |
| 125 tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp); | |
| 126 } | |
| 127 SetImage(pix); | |
| 128 pix.destroy(); | |
| 129 } | |
| 130 | |
| 131 // Store the coordinates of the rectangle to process for later use. | |
| 132 // Doesn't actually do any thresholding. | |
| 133 void ImageThresholder::SetRectangle(int left, int top, int width, int height) { | |
| 134 rect_left_ = left; | |
| 135 rect_top_ = top; | |
| 136 rect_width_ = width; | |
| 137 rect_height_ = height; | |
| 138 } | |
| 139 | |
| 140 // Get enough parameters to be able to rebuild bounding boxes in the | |
| 141 // original image (not just within the rectangle). | |
| 142 // Left and top are enough with top-down coordinates, but | |
| 143 // the height of the rectangle and the image are needed for bottom-up. | |
| 144 void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, | |
| 145 int *imageheight) { | |
| 146 *left = rect_left_; | |
| 147 *top = rect_top_; | |
| 148 *width = rect_width_; | |
| 149 *height = rect_height_; | |
| 150 *imagewidth = image_width_; | |
| 151 *imageheight = image_height_; | |
| 152 } | |
| 153 | |
| 154 // Pix vs raw, which to use? Pix is the preferred input for efficiency, | |
| 155 // since raw buffers are copied. | |
| 156 // SetImage for Pix clones its input, so the source pix may be pixDestroyed | |
| 157 // immediately after, but may not go away until after the Thresholder has | |
| 158 // finished with it. | |
| 159 void ImageThresholder::SetImage(const Image pix) { | |
| 160 if (pix_ != nullptr) { | |
| 161 pix_.destroy(); | |
| 162 } | |
| 163 Image src = pix; | |
| 164 int depth; | |
| 165 pixGetDimensions(src, &image_width_, &image_height_, &depth); | |
| 166 // Convert the image as necessary so it is one of binary, plain RGB, or | |
| 167 // 8 bit with no colormap. Guarantee that we always end up with our own copy, | |
| 168 // not just a clone of the input. | |
| 169 if (depth > 1 && depth < 8) { | |
| 170 pix_ = pixConvertTo8(src, false); | |
| 171 } else { | |
| 172 pix_ = src.copy(); | |
| 173 } | |
| 174 depth = pixGetDepth(pix_); | |
| 175 pix_channels_ = depth / 8; | |
| 176 pix_wpl_ = pixGetWpl(pix_); | |
| 177 scale_ = 1; | |
| 178 estimated_res_ = yres_ = pixGetYRes(pix_); | |
| 179 Init(); | |
| 180 } | |
| 181 | |
| 182 std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold( | |
| 183 TessBaseAPI *api, | |
| 184 ThresholdMethod method) { | |
| 185 Image pix_binary = nullptr; | |
| 186 Image pix_thresholds = nullptr; | |
| 187 | |
| 188 if (pix_channels_ == 0) { | |
| 189 // We have a binary image, but it still has to be copied, as this API | |
| 190 // allows the caller to modify the output. | |
| 191 Image original = GetPixRect(); | |
| 192 pix_binary = original.copy(); | |
| 193 original.destroy(); | |
| 194 return std::make_tuple(true, nullptr, pix_binary, nullptr); | |
| 195 } | |
| 196 | |
| 197 auto pix_grey = GetPixRectGrey(); | |
| 198 | |
| 199 int r; | |
| 200 | |
| 201 l_int32 pix_w, pix_h; | |
| 202 pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr); | |
| 203 | |
| 204 bool thresholding_debug; | |
| 205 api->GetBoolVariable("thresholding_debug", &thresholding_debug); | |
| 206 if (thresholding_debug) { | |
| 207 tprintf("\nimage width: %d height: %d ppi: %d\n", pix_w, pix_h, yres_); | |
| 208 } | |
| 209 | |
| 210 if (method == ThresholdMethod::Sauvola) { | |
| 211 int window_size; | |
| 212 double window_size_factor; | |
| 213 api->GetDoubleVariable("thresholding_window_size", &window_size_factor); | |
| 214 window_size = window_size_factor * yres_; | |
| 215 window_size = std::max(7, window_size); | |
| 216 window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size); | |
| 217 int half_window_size = window_size / 2; | |
| 218 | |
| 219 // factor for image division into tiles; >= 1 | |
| 220 l_int32 nx, ny; | |
| 221 // tiles size will be approx. 250 x 250 pixels | |
| 222 nx = std::max(1, (pix_w + 125) / 250); | |
| 223 ny = std::max(1, (pix_h + 125) / 250); | |
| 224 auto xrat = pix_w / nx; | |
| 225 auto yrat = pix_h / ny; | |
| 226 if (xrat < half_window_size + 2) { | |
| 227 nx = pix_w / (half_window_size + 2); | |
| 228 } | |
| 229 if (yrat < half_window_size + 2) { | |
| 230 ny = pix_h / (half_window_size + 2); | |
| 231 } | |
| 232 | |
| 233 double kfactor; | |
| 234 api->GetDoubleVariable("thresholding_kfactor", &kfactor); | |
| 235 kfactor = std::max(0.0, kfactor); | |
| 236 | |
| 237 if (thresholding_debug) { | |
| 238 tprintf("window size: %d kfactor: %.3f nx:%d ny: %d\n", window_size, kfactor, nx, ny); | |
| 239 } | |
| 240 | |
| 241 r = pixSauvolaBinarizeTiled(pix_grey, half_window_size, kfactor, nx, ny, | |
| 242 (PIX**)pix_thresholds, | |
| 243 (PIX**)pix_binary); | |
| 244 } else { // if (method == ThresholdMethod::LeptonicaOtsu) | |
| 245 int tile_size; | |
| 246 double tile_size_factor; | |
| 247 api->GetDoubleVariable("thresholding_tile_size", &tile_size_factor); | |
| 248 tile_size = tile_size_factor * yres_; | |
| 249 tile_size = std::max(16, tile_size); | |
| 250 | |
| 251 int smooth_size; | |
| 252 double smooth_size_factor; | |
| 253 api->GetDoubleVariable("thresholding_smooth_kernel_size", | |
| 254 &smooth_size_factor); | |
| 255 smooth_size_factor = std::max(0.0, smooth_size_factor); | |
| 256 smooth_size = smooth_size_factor * yres_; | |
| 257 int half_smooth_size = smooth_size / 2; | |
| 258 | |
| 259 double score_fraction; | |
| 260 api->GetDoubleVariable("thresholding_score_fraction", &score_fraction); | |
| 261 | |
| 262 if (thresholding_debug) { | |
| 263 tprintf("tile size: %d smooth_size: %d score_fraction: %.2f\n", tile_size, smooth_size, score_fraction); | |
| 264 } | |
| 265 | |
| 266 r = pixOtsuAdaptiveThreshold(pix_grey, tile_size, tile_size, | |
| 267 half_smooth_size, half_smooth_size, | |
| 268 score_fraction, | |
| 269 (PIX**)pix_thresholds, | |
| 270 (PIX**)pix_binary); | |
| 271 } | |
| 272 | |
| 273 bool ok = (r == 0); | |
| 274 return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds); | |
| 275 } | |
| 276 | |
| 277 // Threshold the source image as efficiently as possible to the output Pix. | |
| 278 // Creates a Pix and sets pix to point to the resulting pointer. | |
| 279 // Caller must use pixDestroy to free the created Pix. | |
| 280 /// Returns false on error. | |
| 281 bool ImageThresholder::ThresholdToPix(Image *pix) { | |
| 282 if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) { | |
| 283 tprintf("Image too large: (%d, %d)\n", image_width_, image_height_); | |
| 284 return false; | |
| 285 } | |
| 286 Image original = GetPixRect(); | |
| 287 if (pix_channels_ == 0) { | |
| 288 // We have a binary image, but it still has to be copied, as this API | |
| 289 // allows the caller to modify the output. | |
| 290 *pix = original.copy(); | |
| 291 } else { | |
| 292 if (pixGetColormap(original)) { | |
| 293 Image tmp; | |
| 294 Image without_cmap = | |
| 295 pixRemoveColormap(original, REMOVE_CMAP_BASED_ON_SRC); | |
| 296 int depth = pixGetDepth(without_cmap); | |
| 297 if (depth > 1 && depth < 8) { | |
| 298 tmp = pixConvertTo8(without_cmap, false); | |
| 299 } else { | |
| 300 tmp = without_cmap.copy(); | |
| 301 } | |
| 302 without_cmap.destroy(); | |
| 303 OtsuThresholdRectToPix(tmp, pix); | |
| 304 tmp.destroy(); | |
| 305 } else { | |
| 306 OtsuThresholdRectToPix(pix_, pix); | |
| 307 } | |
| 308 } | |
| 309 original.destroy(); | |
| 310 return true; | |
| 311 } | |
| 312 | |
| 313 // Gets a pix that contains an 8 bit threshold value at each pixel. The | |
| 314 // returned pix may be an integer reduction of the binary image such that | |
| 315 // the scale factor may be inferred from the ratio of the sizes, even down | |
| 316 // to the extreme of a 1x1 pixel thresholds image. | |
| 317 // Ideally the 8 bit threshold should be the exact threshold used to generate | |
| 318 // the binary image in ThresholdToPix, but this is not a hard constraint. | |
| 319 // Returns nullptr if the input is binary. PixDestroy after use. | |
| 320 Image ImageThresholder::GetPixRectThresholds() { | |
| 321 if (IsBinary()) { | |
| 322 return nullptr; | |
| 323 } | |
| 324 Image pix_grey = GetPixRectGrey(); | |
| 325 int width = pixGetWidth(pix_grey); | |
| 326 int height = pixGetHeight(pix_grey); | |
| 327 std::vector<int> thresholds; | |
| 328 std::vector<int> hi_values; | |
| 329 OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values); | |
| 330 pix_grey.destroy(); | |
| 331 Image pix_thresholds = pixCreate(width, height, 8); | |
| 332 int threshold = thresholds[0] > 0 ? thresholds[0] : 128; | |
| 333 pixSetAllArbitrary(pix_thresholds, threshold); | |
| 334 return pix_thresholds; | |
| 335 } | |
| 336 | |
| 337 // Common initialization shared between SetImage methods. | |
| 338 void ImageThresholder::Init() { | |
| 339 SetRectangle(0, 0, image_width_, image_height_); | |
| 340 } | |
| 341 | |
| 342 // Get a clone/copy of the source image rectangle. | |
| 343 // The returned Pix must be pixDestroyed. | |
| 344 // This function will be used in the future by the page layout analysis, and | |
| 345 // the layout analysis that uses it will only be available with Leptonica, | |
| 346 // so there is no raw equivalent. | |
| 347 Image ImageThresholder::GetPixRect() { | |
| 348 if (IsFullImage()) { | |
| 349 // Just clone the whole thing. | |
| 350 return pix_.clone(); | |
| 351 } else { | |
| 352 // Crop to the given rectangle. | |
| 353 Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_); | |
| 354 Image cropped = pixClipRectangle(pix_, box, nullptr); | |
| 355 boxDestroy(&box); | |
| 356 return cropped; | |
| 357 } | |
| 358 } | |
| 359 | |
| 360 // Get a clone/copy of the source image rectangle, reduced to greyscale, | |
| 361 // and at the same resolution as the output binary. | |
| 362 // The returned Pix must be pixDestroyed. | |
| 363 // Provided to the classifier to extract features from the greyscale image. | |
| 364 Image ImageThresholder::GetPixRectGrey() { | |
| 365 auto pix = GetPixRect(); // May have to be reduced to grey. | |
| 366 int depth = pixGetDepth(pix); | |
| 367 if (depth != 8 || pixGetColormap(pix)) { | |
| 368 if (depth == 24) { | |
| 369 auto tmp = pixConvert24To32(pix); | |
| 370 pix.destroy(); | |
| 371 pix = tmp; | |
| 372 } | |
| 373 auto result = pixConvertTo8(pix, false); | |
| 374 pix.destroy(); | |
| 375 return result; | |
| 376 } | |
| 377 return pix; | |
| 378 } | |
| 379 | |
| 380 // Otsu thresholds the rectangle, taking the rectangle from *this. | |
| 381 void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const { | |
| 382 std::vector<int> thresholds; | |
| 383 std::vector<int> hi_values; | |
| 384 | |
| 385 int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_, | |
| 386 thresholds, hi_values); | |
| 387 ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix); | |
| 388 } | |
| 389 | |
| 390 /// Threshold the rectangle, taking everything except the src_pix | |
| 391 /// from the class, using thresholds/hi_values to the output pix. | |
| 392 /// NOTE that num_channels is the size of the thresholds and hi_values | |
| 393 // arrays and also the bytes per pixel in src_pix. | |
| 394 void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds, | |
| 395 const std::vector<int> &hi_values, Image *pix) const { | |
| 396 *pix = pixCreate(rect_width_, rect_height_, 1); | |
| 397 uint32_t *pixdata = pixGetData(*pix); | |
| 398 int wpl = pixGetWpl(*pix); | |
| 399 int src_wpl = pixGetWpl(src_pix); | |
| 400 uint32_t *srcdata = pixGetData(src_pix); | |
| 401 pixSetXRes(*pix, pixGetXRes(src_pix)); | |
| 402 pixSetYRes(*pix, pixGetYRes(src_pix)); | |
| 403 for (int y = 0; y < rect_height_; ++y) { | |
| 404 const uint32_t *linedata = srcdata + (y + rect_top_) * src_wpl; | |
| 405 uint32_t *pixline = pixdata + y * wpl; | |
| 406 for (int x = 0; x < rect_width_; ++x) { | |
| 407 bool white_result = true; | |
| 408 for (int ch = 0; ch < num_channels; ++ch) { | |
| 409 int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch); | |
| 410 if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) { | |
| 411 white_result = false; | |
| 412 break; | |
| 413 } | |
| 414 } | |
| 415 if (white_result) { | |
| 416 CLEAR_DATA_BIT(pixline, x); | |
| 417 } else { | |
| 418 SET_DATA_BIT(pixline, x); | |
| 419 } | |
| 420 } | |
| 421 } | |
| 422 } | |
| 423 | |
| 424 } // namespace tesseract. |
