Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccmain/thresholder.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccmain/thresholder.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,424 @@ +/////////////////////////////////////////////////////////////////////// +// File: thresholder.cpp +// Description: Base API for thresholding images in tesseract. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +// Include automatically generated configuration file +#ifdef HAVE_CONFIG_H +# include "config_auto.h" +#endif + +#include "otsuthr.h" +#include "thresholder.h" +#include "tprintf.h" // for tprintf + +#include <allheaders.h> +#include <tesseract/baseapi.h> // for api->GetIntVariable() + +#include <algorithm> // for std::max, std::min +#include <cstdint> // for uint32_t +#include <cstring> +#include <tuple> + +namespace tesseract { + +ImageThresholder::ImageThresholder() + : pix_(nullptr) + , image_width_(0) + , image_height_(0) + , pix_channels_(0) + , pix_wpl_(0) + , scale_(1) + , yres_(300) + , estimated_res_(300) { + SetRectangle(0, 0, 0, 0); +} + +ImageThresholder::~ImageThresholder() { + Clear(); +} + +// Destroy the Pix if there is one, freeing memory. +void ImageThresholder::Clear() { + pix_.destroy(); +} + +// Return true if no image has been set. +bool ImageThresholder::IsEmpty() const { + return pix_ == nullptr; +} + +// SetImage makes a copy of all the image data, so it may be deleted +// immediately after this call. +// Greyscale of 8 and color of 24 or 32 bits per pixel may be given. +// Palette color images will not work properly and must be converted to +// 24 bit. +// Binary images of 1 bit per pixel may also be given but they must be +// byte packed with the MSB of the first byte being the first pixel, and a +// one pixel is WHITE. For binary images set bytes_per_pixel=0. +void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height, + int bytes_per_pixel, int bytes_per_line) { + int bpp = bytes_per_pixel * 8; + if (bpp == 0) { + bpp = 1; + } + Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); + l_uint32 *data = pixGetData(pix); + int wpl = pixGetWpl(pix); + switch (bpp) { + case 1: + for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { + for (int x = 0; x < width; ++x) { + if (imagedata[x / 8] & (0x80 >> (x % 8))) { + CLEAR_DATA_BIT(data, x); + } else { + SET_DATA_BIT(data, x); + } + } + } + break; + + case 8: + // Greyscale just copies the bytes in the right order. + for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { + for (int x = 0; x < width; ++x) { + SET_DATA_BYTE(data, x, imagedata[x]); + } + } + break; + + case 24: + // Put the colors in the correct places in the line buffer. + for (int y = 0; y < height; ++y, imagedata += bytes_per_line) { + for (int x = 0; x < width; ++x, ++data) { + SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]); + SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]); + SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]); + } + } + break; + + case 32: + // Maintain byte order consistency across different endianness. + for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) { + for (int x = 0; x < width; ++x) { + data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) | + (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]; + } + } + break; + + default: + tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp); + } + SetImage(pix); + pix.destroy(); +} + +// Store the coordinates of the rectangle to process for later use. +// Doesn't actually do any thresholding. +void ImageThresholder::SetRectangle(int left, int top, int width, int height) { + rect_left_ = left; + rect_top_ = top; + rect_width_ = width; + rect_height_ = height; +} + +// Get enough parameters to be able to rebuild bounding boxes in the +// original image (not just within the rectangle). +// Left and top are enough with top-down coordinates, but +// the height of the rectangle and the image are needed for bottom-up. +void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, + int *imageheight) { + *left = rect_left_; + *top = rect_top_; + *width = rect_width_; + *height = rect_height_; + *imagewidth = image_width_; + *imageheight = image_height_; +} + +// Pix vs raw, which to use? Pix is the preferred input for efficiency, +// since raw buffers are copied. +// SetImage for Pix clones its input, so the source pix may be pixDestroyed +// immediately after, but may not go away until after the Thresholder has +// finished with it. +void ImageThresholder::SetImage(const Image pix) { + if (pix_ != nullptr) { + pix_.destroy(); + } + Image src = pix; + int depth; + pixGetDimensions(src, &image_width_, &image_height_, &depth); + // Convert the image as necessary so it is one of binary, plain RGB, or + // 8 bit with no colormap. Guarantee that we always end up with our own copy, + // not just a clone of the input. + if (depth > 1 && depth < 8) { + pix_ = pixConvertTo8(src, false); + } else { + pix_ = src.copy(); + } + depth = pixGetDepth(pix_); + pix_channels_ = depth / 8; + pix_wpl_ = pixGetWpl(pix_); + scale_ = 1; + estimated_res_ = yres_ = pixGetYRes(pix_); + Init(); +} + +std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold( + TessBaseAPI *api, + ThresholdMethod method) { + Image pix_binary = nullptr; + Image pix_thresholds = nullptr; + + if (pix_channels_ == 0) { + // We have a binary image, but it still has to be copied, as this API + // allows the caller to modify the output. + Image original = GetPixRect(); + pix_binary = original.copy(); + original.destroy(); + return std::make_tuple(true, nullptr, pix_binary, nullptr); + } + + auto pix_grey = GetPixRectGrey(); + + int r; + + l_int32 pix_w, pix_h; + pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr); + + bool thresholding_debug; + api->GetBoolVariable("thresholding_debug", &thresholding_debug); + if (thresholding_debug) { + tprintf("\nimage width: %d height: %d ppi: %d\n", pix_w, pix_h, yres_); + } + + if (method == ThresholdMethod::Sauvola) { + int window_size; + double window_size_factor; + api->GetDoubleVariable("thresholding_window_size", &window_size_factor); + window_size = window_size_factor * yres_; + window_size = std::max(7, window_size); + window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size); + int half_window_size = window_size / 2; + + // factor for image division into tiles; >= 1 + l_int32 nx, ny; + // tiles size will be approx. 250 x 250 pixels + nx = std::max(1, (pix_w + 125) / 250); + ny = std::max(1, (pix_h + 125) / 250); + auto xrat = pix_w / nx; + auto yrat = pix_h / ny; + if (xrat < half_window_size + 2) { + nx = pix_w / (half_window_size + 2); + } + if (yrat < half_window_size + 2) { + ny = pix_h / (half_window_size + 2); + } + + double kfactor; + api->GetDoubleVariable("thresholding_kfactor", &kfactor); + kfactor = std::max(0.0, kfactor); + + if (thresholding_debug) { + tprintf("window size: %d kfactor: %.3f nx:%d ny: %d\n", window_size, kfactor, nx, ny); + } + + r = pixSauvolaBinarizeTiled(pix_grey, half_window_size, kfactor, nx, ny, + (PIX**)pix_thresholds, + (PIX**)pix_binary); + } else { // if (method == ThresholdMethod::LeptonicaOtsu) + int tile_size; + double tile_size_factor; + api->GetDoubleVariable("thresholding_tile_size", &tile_size_factor); + tile_size = tile_size_factor * yres_; + tile_size = std::max(16, tile_size); + + int smooth_size; + double smooth_size_factor; + api->GetDoubleVariable("thresholding_smooth_kernel_size", + &smooth_size_factor); + smooth_size_factor = std::max(0.0, smooth_size_factor); + smooth_size = smooth_size_factor * yres_; + int half_smooth_size = smooth_size / 2; + + double score_fraction; + api->GetDoubleVariable("thresholding_score_fraction", &score_fraction); + + if (thresholding_debug) { + tprintf("tile size: %d smooth_size: %d score_fraction: %.2f\n", tile_size, smooth_size, score_fraction); + } + + r = pixOtsuAdaptiveThreshold(pix_grey, tile_size, tile_size, + half_smooth_size, half_smooth_size, + score_fraction, + (PIX**)pix_thresholds, + (PIX**)pix_binary); + } + + bool ok = (r == 0); + return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds); +} + +// Threshold the source image as efficiently as possible to the output Pix. +// Creates a Pix and sets pix to point to the resulting pointer. +// Caller must use pixDestroy to free the created Pix. +/// Returns false on error. +bool ImageThresholder::ThresholdToPix(Image *pix) { + if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) { + tprintf("Image too large: (%d, %d)\n", image_width_, image_height_); + return false; + } + Image original = GetPixRect(); + if (pix_channels_ == 0) { + // We have a binary image, but it still has to be copied, as this API + // allows the caller to modify the output. + *pix = original.copy(); + } else { + if (pixGetColormap(original)) { + Image tmp; + Image without_cmap = + pixRemoveColormap(original, REMOVE_CMAP_BASED_ON_SRC); + int depth = pixGetDepth(without_cmap); + if (depth > 1 && depth < 8) { + tmp = pixConvertTo8(without_cmap, false); + } else { + tmp = without_cmap.copy(); + } + without_cmap.destroy(); + OtsuThresholdRectToPix(tmp, pix); + tmp.destroy(); + } else { + OtsuThresholdRectToPix(pix_, pix); + } + } + original.destroy(); + return true; +} + +// Gets a pix that contains an 8 bit threshold value at each pixel. The +// returned pix may be an integer reduction of the binary image such that +// the scale factor may be inferred from the ratio of the sizes, even down +// to the extreme of a 1x1 pixel thresholds image. +// Ideally the 8 bit threshold should be the exact threshold used to generate +// the binary image in ThresholdToPix, but this is not a hard constraint. +// Returns nullptr if the input is binary. PixDestroy after use. +Image ImageThresholder::GetPixRectThresholds() { + if (IsBinary()) { + return nullptr; + } + Image pix_grey = GetPixRectGrey(); + int width = pixGetWidth(pix_grey); + int height = pixGetHeight(pix_grey); + std::vector<int> thresholds; + std::vector<int> hi_values; + OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values); + pix_grey.destroy(); + Image pix_thresholds = pixCreate(width, height, 8); + int threshold = thresholds[0] > 0 ? thresholds[0] : 128; + pixSetAllArbitrary(pix_thresholds, threshold); + return pix_thresholds; +} + +// Common initialization shared between SetImage methods. +void ImageThresholder::Init() { + SetRectangle(0, 0, image_width_, image_height_); +} + +// Get a clone/copy of the source image rectangle. +// The returned Pix must be pixDestroyed. +// This function will be used in the future by the page layout analysis, and +// the layout analysis that uses it will only be available with Leptonica, +// so there is no raw equivalent. +Image ImageThresholder::GetPixRect() { + if (IsFullImage()) { + // Just clone the whole thing. + return pix_.clone(); + } else { + // Crop to the given rectangle. + Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_); + Image cropped = pixClipRectangle(pix_, box, nullptr); + boxDestroy(&box); + return cropped; + } +} + +// Get a clone/copy of the source image rectangle, reduced to greyscale, +// and at the same resolution as the output binary. +// The returned Pix must be pixDestroyed. +// Provided to the classifier to extract features from the greyscale image. +Image ImageThresholder::GetPixRectGrey() { + auto pix = GetPixRect(); // May have to be reduced to grey. + int depth = pixGetDepth(pix); + if (depth != 8 || pixGetColormap(pix)) { + if (depth == 24) { + auto tmp = pixConvert24To32(pix); + pix.destroy(); + pix = tmp; + } + auto result = pixConvertTo8(pix, false); + pix.destroy(); + return result; + } + return pix; +} + +// Otsu thresholds the rectangle, taking the rectangle from *this. +void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const { + std::vector<int> thresholds; + std::vector<int> hi_values; + + int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_, + thresholds, hi_values); + ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix); +} + +/// Threshold the rectangle, taking everything except the src_pix +/// from the class, using thresholds/hi_values to the output pix. +/// NOTE that num_channels is the size of the thresholds and hi_values +// arrays and also the bytes per pixel in src_pix. +void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds, + const std::vector<int> &hi_values, Image *pix) const { + *pix = pixCreate(rect_width_, rect_height_, 1); + uint32_t *pixdata = pixGetData(*pix); + int wpl = pixGetWpl(*pix); + int src_wpl = pixGetWpl(src_pix); + uint32_t *srcdata = pixGetData(src_pix); + pixSetXRes(*pix, pixGetXRes(src_pix)); + pixSetYRes(*pix, pixGetYRes(src_pix)); + for (int y = 0; y < rect_height_; ++y) { + const uint32_t *linedata = srcdata + (y + rect_top_) * src_wpl; + uint32_t *pixline = pixdata + y * wpl; + for (int x = 0; x < rect_width_; ++x) { + bool white_result = true; + for (int ch = 0; ch < num_channels; ++ch) { + int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch); + if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) { + white_result = false; + break; + } + } + if (white_result) { + CLEAR_DATA_BIT(pixline, x); + } else { + SET_DATA_BIT(pixline, x); + } + } + } +} + +} // namespace tesseract.
