comparison mupdf-source/thirdparty/tesseract/src/ccmain/thresholder.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 ///////////////////////////////////////////////////////////////////////
2 // File: thresholder.cpp
3 // Description: Base API for thresholding images in tesseract.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2008, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18
19 // Include automatically generated configuration file
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23
24 #include "otsuthr.h"
25 #include "thresholder.h"
26 #include "tprintf.h" // for tprintf
27
28 #include <allheaders.h>
29 #include <tesseract/baseapi.h> // for api->GetIntVariable()
30
31 #include <algorithm> // for std::max, std::min
32 #include <cstdint> // for uint32_t
33 #include <cstring>
34 #include <tuple>
35
36 namespace tesseract {
37
38 ImageThresholder::ImageThresholder()
39 : pix_(nullptr)
40 , image_width_(0)
41 , image_height_(0)
42 , pix_channels_(0)
43 , pix_wpl_(0)
44 , scale_(1)
45 , yres_(300)
46 , estimated_res_(300) {
47 SetRectangle(0, 0, 0, 0);
48 }
49
50 ImageThresholder::~ImageThresholder() {
51 Clear();
52 }
53
54 // Destroy the Pix if there is one, freeing memory.
55 void ImageThresholder::Clear() {
56 pix_.destroy();
57 }
58
59 // Return true if no image has been set.
60 bool ImageThresholder::IsEmpty() const {
61 return pix_ == nullptr;
62 }
63
64 // SetImage makes a copy of all the image data, so it may be deleted
65 // immediately after this call.
66 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
67 // Palette color images will not work properly and must be converted to
68 // 24 bit.
69 // Binary images of 1 bit per pixel may also be given but they must be
70 // byte packed with the MSB of the first byte being the first pixel, and a
71 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
72 void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height,
73 int bytes_per_pixel, int bytes_per_line) {
74 int bpp = bytes_per_pixel * 8;
75 if (bpp == 0) {
76 bpp = 1;
77 }
78 Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
79 l_uint32 *data = pixGetData(pix);
80 int wpl = pixGetWpl(pix);
81 switch (bpp) {
82 case 1:
83 for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
84 for (int x = 0; x < width; ++x) {
85 if (imagedata[x / 8] & (0x80 >> (x % 8))) {
86 CLEAR_DATA_BIT(data, x);
87 } else {
88 SET_DATA_BIT(data, x);
89 }
90 }
91 }
92 break;
93
94 case 8:
95 // Greyscale just copies the bytes in the right order.
96 for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
97 for (int x = 0; x < width; ++x) {
98 SET_DATA_BYTE(data, x, imagedata[x]);
99 }
100 }
101 break;
102
103 case 24:
104 // Put the colors in the correct places in the line buffer.
105 for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
106 for (int x = 0; x < width; ++x, ++data) {
107 SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
108 SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
109 SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
110 }
111 }
112 break;
113
114 case 32:
115 // Maintain byte order consistency across different endianness.
116 for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
117 for (int x = 0; x < width; ++x) {
118 data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
119 (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
120 }
121 }
122 break;
123
124 default:
125 tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
126 }
127 SetImage(pix);
128 pix.destroy();
129 }
130
131 // Store the coordinates of the rectangle to process for later use.
132 // Doesn't actually do any thresholding.
133 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
134 rect_left_ = left;
135 rect_top_ = top;
136 rect_width_ = width;
137 rect_height_ = height;
138 }
139
140 // Get enough parameters to be able to rebuild bounding boxes in the
141 // original image (not just within the rectangle).
142 // Left and top are enough with top-down coordinates, but
143 // the height of the rectangle and the image are needed for bottom-up.
144 void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth,
145 int *imageheight) {
146 *left = rect_left_;
147 *top = rect_top_;
148 *width = rect_width_;
149 *height = rect_height_;
150 *imagewidth = image_width_;
151 *imageheight = image_height_;
152 }
153
154 // Pix vs raw, which to use? Pix is the preferred input for efficiency,
155 // since raw buffers are copied.
156 // SetImage for Pix clones its input, so the source pix may be pixDestroyed
157 // immediately after, but may not go away until after the Thresholder has
158 // finished with it.
159 void ImageThresholder::SetImage(const Image pix) {
160 if (pix_ != nullptr) {
161 pix_.destroy();
162 }
163 Image src = pix;
164 int depth;
165 pixGetDimensions(src, &image_width_, &image_height_, &depth);
166 // Convert the image as necessary so it is one of binary, plain RGB, or
167 // 8 bit with no colormap. Guarantee that we always end up with our own copy,
168 // not just a clone of the input.
169 if (depth > 1 && depth < 8) {
170 pix_ = pixConvertTo8(src, false);
171 } else {
172 pix_ = src.copy();
173 }
174 depth = pixGetDepth(pix_);
175 pix_channels_ = depth / 8;
176 pix_wpl_ = pixGetWpl(pix_);
177 scale_ = 1;
178 estimated_res_ = yres_ = pixGetYRes(pix_);
179 Init();
180 }
181
182 std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
183 TessBaseAPI *api,
184 ThresholdMethod method) {
185 Image pix_binary = nullptr;
186 Image pix_thresholds = nullptr;
187
188 if (pix_channels_ == 0) {
189 // We have a binary image, but it still has to be copied, as this API
190 // allows the caller to modify the output.
191 Image original = GetPixRect();
192 pix_binary = original.copy();
193 original.destroy();
194 return std::make_tuple(true, nullptr, pix_binary, nullptr);
195 }
196
197 auto pix_grey = GetPixRectGrey();
198
199 int r;
200
201 l_int32 pix_w, pix_h;
202 pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);
203
204 bool thresholding_debug;
205 api->GetBoolVariable("thresholding_debug", &thresholding_debug);
206 if (thresholding_debug) {
207 tprintf("\nimage width: %d height: %d ppi: %d\n", pix_w, pix_h, yres_);
208 }
209
210 if (method == ThresholdMethod::Sauvola) {
211 int window_size;
212 double window_size_factor;
213 api->GetDoubleVariable("thresholding_window_size", &window_size_factor);
214 window_size = window_size_factor * yres_;
215 window_size = std::max(7, window_size);
216 window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size);
217 int half_window_size = window_size / 2;
218
219 // factor for image division into tiles; >= 1
220 l_int32 nx, ny;
221 // tiles size will be approx. 250 x 250 pixels
222 nx = std::max(1, (pix_w + 125) / 250);
223 ny = std::max(1, (pix_h + 125) / 250);
224 auto xrat = pix_w / nx;
225 auto yrat = pix_h / ny;
226 if (xrat < half_window_size + 2) {
227 nx = pix_w / (half_window_size + 2);
228 }
229 if (yrat < half_window_size + 2) {
230 ny = pix_h / (half_window_size + 2);
231 }
232
233 double kfactor;
234 api->GetDoubleVariable("thresholding_kfactor", &kfactor);
235 kfactor = std::max(0.0, kfactor);
236
237 if (thresholding_debug) {
238 tprintf("window size: %d kfactor: %.3f nx:%d ny: %d\n", window_size, kfactor, nx, ny);
239 }
240
241 r = pixSauvolaBinarizeTiled(pix_grey, half_window_size, kfactor, nx, ny,
242 (PIX**)pix_thresholds,
243 (PIX**)pix_binary);
244 } else { // if (method == ThresholdMethod::LeptonicaOtsu)
245 int tile_size;
246 double tile_size_factor;
247 api->GetDoubleVariable("thresholding_tile_size", &tile_size_factor);
248 tile_size = tile_size_factor * yres_;
249 tile_size = std::max(16, tile_size);
250
251 int smooth_size;
252 double smooth_size_factor;
253 api->GetDoubleVariable("thresholding_smooth_kernel_size",
254 &smooth_size_factor);
255 smooth_size_factor = std::max(0.0, smooth_size_factor);
256 smooth_size = smooth_size_factor * yres_;
257 int half_smooth_size = smooth_size / 2;
258
259 double score_fraction;
260 api->GetDoubleVariable("thresholding_score_fraction", &score_fraction);
261
262 if (thresholding_debug) {
263 tprintf("tile size: %d smooth_size: %d score_fraction: %.2f\n", tile_size, smooth_size, score_fraction);
264 }
265
266 r = pixOtsuAdaptiveThreshold(pix_grey, tile_size, tile_size,
267 half_smooth_size, half_smooth_size,
268 score_fraction,
269 (PIX**)pix_thresholds,
270 (PIX**)pix_binary);
271 }
272
273 bool ok = (r == 0);
274 return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
275 }
276
277 // Threshold the source image as efficiently as possible to the output Pix.
278 // Creates a Pix and sets pix to point to the resulting pointer.
279 // Caller must use pixDestroy to free the created Pix.
280 /// Returns false on error.
281 bool ImageThresholder::ThresholdToPix(Image *pix) {
282 if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
283 tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
284 return false;
285 }
286 Image original = GetPixRect();
287 if (pix_channels_ == 0) {
288 // We have a binary image, but it still has to be copied, as this API
289 // allows the caller to modify the output.
290 *pix = original.copy();
291 } else {
292 if (pixGetColormap(original)) {
293 Image tmp;
294 Image without_cmap =
295 pixRemoveColormap(original, REMOVE_CMAP_BASED_ON_SRC);
296 int depth = pixGetDepth(without_cmap);
297 if (depth > 1 && depth < 8) {
298 tmp = pixConvertTo8(without_cmap, false);
299 } else {
300 tmp = without_cmap.copy();
301 }
302 without_cmap.destroy();
303 OtsuThresholdRectToPix(tmp, pix);
304 tmp.destroy();
305 } else {
306 OtsuThresholdRectToPix(pix_, pix);
307 }
308 }
309 original.destroy();
310 return true;
311 }
312
313 // Gets a pix that contains an 8 bit threshold value at each pixel. The
314 // returned pix may be an integer reduction of the binary image such that
315 // the scale factor may be inferred from the ratio of the sizes, even down
316 // to the extreme of a 1x1 pixel thresholds image.
317 // Ideally the 8 bit threshold should be the exact threshold used to generate
318 // the binary image in ThresholdToPix, but this is not a hard constraint.
319 // Returns nullptr if the input is binary. PixDestroy after use.
320 Image ImageThresholder::GetPixRectThresholds() {
321 if (IsBinary()) {
322 return nullptr;
323 }
324 Image pix_grey = GetPixRectGrey();
325 int width = pixGetWidth(pix_grey);
326 int height = pixGetHeight(pix_grey);
327 std::vector<int> thresholds;
328 std::vector<int> hi_values;
329 OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
330 pix_grey.destroy();
331 Image pix_thresholds = pixCreate(width, height, 8);
332 int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
333 pixSetAllArbitrary(pix_thresholds, threshold);
334 return pix_thresholds;
335 }
336
337 // Common initialization shared between SetImage methods.
338 void ImageThresholder::Init() {
339 SetRectangle(0, 0, image_width_, image_height_);
340 }
341
342 // Get a clone/copy of the source image rectangle.
343 // The returned Pix must be pixDestroyed.
344 // This function will be used in the future by the page layout analysis, and
345 // the layout analysis that uses it will only be available with Leptonica,
346 // so there is no raw equivalent.
347 Image ImageThresholder::GetPixRect() {
348 if (IsFullImage()) {
349 // Just clone the whole thing.
350 return pix_.clone();
351 } else {
352 // Crop to the given rectangle.
353 Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
354 Image cropped = pixClipRectangle(pix_, box, nullptr);
355 boxDestroy(&box);
356 return cropped;
357 }
358 }
359
360 // Get a clone/copy of the source image rectangle, reduced to greyscale,
361 // and at the same resolution as the output binary.
362 // The returned Pix must be pixDestroyed.
363 // Provided to the classifier to extract features from the greyscale image.
364 Image ImageThresholder::GetPixRectGrey() {
365 auto pix = GetPixRect(); // May have to be reduced to grey.
366 int depth = pixGetDepth(pix);
367 if (depth != 8 || pixGetColormap(pix)) {
368 if (depth == 24) {
369 auto tmp = pixConvert24To32(pix);
370 pix.destroy();
371 pix = tmp;
372 }
373 auto result = pixConvertTo8(pix, false);
374 pix.destroy();
375 return result;
376 }
377 return pix;
378 }
379
380 // Otsu thresholds the rectangle, taking the rectangle from *this.
381 void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
382 std::vector<int> thresholds;
383 std::vector<int> hi_values;
384
385 int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
386 thresholds, hi_values);
387 ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
388 }
389
390 /// Threshold the rectangle, taking everything except the src_pix
391 /// from the class, using thresholds/hi_values to the output pix.
392 /// NOTE that num_channels is the size of the thresholds and hi_values
393 // arrays and also the bytes per pixel in src_pix.
394 void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
395 const std::vector<int> &hi_values, Image *pix) const {
396 *pix = pixCreate(rect_width_, rect_height_, 1);
397 uint32_t *pixdata = pixGetData(*pix);
398 int wpl = pixGetWpl(*pix);
399 int src_wpl = pixGetWpl(src_pix);
400 uint32_t *srcdata = pixGetData(src_pix);
401 pixSetXRes(*pix, pixGetXRes(src_pix));
402 pixSetYRes(*pix, pixGetYRes(src_pix));
403 for (int y = 0; y < rect_height_; ++y) {
404 const uint32_t *linedata = srcdata + (y + rect_top_) * src_wpl;
405 uint32_t *pixline = pixdata + y * wpl;
406 for (int x = 0; x < rect_width_; ++x) {
407 bool white_result = true;
408 for (int ch = 0; ch < num_channels; ++ch) {
409 int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
410 if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
411 white_result = false;
412 break;
413 }
414 }
415 if (white_result) {
416 CLEAR_DATA_BIT(pixline, x);
417 } else {
418 SET_DATA_BIT(pixline, x);
419 }
420 }
421 }
422 }
423
424 } // namespace tesseract.