Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/textord/alignedblob.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/textord/alignedblob.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,545 @@ +/////////////////////////////////////////////////////////////////////// +// File: alignedblob.cpp +// Description: Subclass of BBGrid to find vertically aligned blobs. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +# include "config_auto.h" +#endif + +#include "alignedblob.h" + +#include <algorithm> + +namespace tesseract { + +INT_VAR(textord_debug_tabfind, 0, "Debug tab finding"); +INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding"); +static INT_VAR(textord_testregion_left, -1, + "Left edge of debug reporting rectangle in Leptonica coords " + "(bottom=0/top=height), with horizontal lines x/y-flipped"); +static INT_VAR(textord_testregion_top, INT32_MAX, + "Top edge of debug reporting rectangle in Leptonica coords " + "(bottom=0/top=height), with horizontal lines x/y-flipped"); +static INT_VAR(textord_testregion_right, INT32_MAX, + "Right edge of debug rectangle in Leptonica coords " + "(bottom=0/top=height), with horizontal lines x/y-flipped"); +static INT_VAR(textord_testregion_bottom, -1, + "Bottom edge of debug rectangle in Leptonica coords " + "(bottom=0/top=height), with horizontal lines x/y-flipped"); +BOOL_VAR(textord_debug_printable, false, "Make debug windows printable"); + +// Fraction of resolution used as alignment tolerance for aligned tabs. +const double kAlignedFraction = 0.03125; +// Fraction of resolution used as alignment tolerance for ragged tabs. +const double kRaggedFraction = 2.5; +// Fraction of height used as a minimum gutter gap for aligned blobs. +const double kAlignedGapFraction = 0.75; +// Fraction of height used as a minimum gutter gap for ragged tabs. +const double kRaggedGapFraction = 1.0; +// Constant number of pixels used as alignment tolerance for line finding. +const int kVLineAlignment = 3; +// Constant number of pixels used as gutter gap tolerance for line finding. +const int kVLineGutter = 1; +// Constant number of pixels used as the search size for line finding. +const int kVLineSearchSize = 150; +// Min number of points to accept for a ragged tab stop. +const int kMinRaggedTabs = 5; +// Min number of points to accept for an aligned tab stop. +const int kMinAlignedTabs = 4; +// Constant number of pixels minimum height of a vertical line. +const int kVLineMinLength = 300; +// Minimum gradient for a vertical tab vector. Used to prune away junk +// tab vectors with what would be a ridiculously large skew angle. +// Value corresponds to tan(90 - max allowed skew angle) +const double kMinTabGradient = 4.0; +// Tolerance to skew on top of current estimate of skew. Divide x or y length +// by kMaxSkewFactor to get the y or x skew distance. +// If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor. +const int kMaxSkewFactor = 15; + +// Constructor to set the parameters for finding aligned and ragged tabs. +// Vertical_x and vertical_y are the current estimates of the true vertical +// direction (up) in the image. Height is the height of the starter blob. +// v_gap_multiple is the multiple of height that will be used as a limit +// on vertical gap before giving up and calling the line ended. +// resolution is the original image resolution, and align0 indicates the +// type of tab stop to be found. +AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, int height, int v_gap_multiple, + int min_gutter_width, int resolution, TabAlignment align0) + : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED) + , ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED) + , alignment(align0) + , confirmed_type(TT_CONFIRMED) + , min_length(0) { + // Set the tolerances according to the type of line sought. + // For tab search, these are based on the image resolution for most, or + // the height of the starting blob for the maximum vertical gap. + max_v_gap = height * v_gap_multiple; + if (ragged) { + // In the case of a ragged edge, we are much more generous with the + // inside alignment fraction, but also require a much bigger gutter. + gutter_fraction = kRaggedGapFraction; + if (alignment == TA_RIGHT_RAGGED) { + l_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); + r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); + } else { + l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); + r_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); + } + min_points = kMinRaggedTabs; + } else { + gutter_fraction = kAlignedGapFraction; + l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); + r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); + min_points = kMinAlignedTabs; + } + min_gutter = static_cast<int>(height * gutter_fraction + 0.5); + if (min_gutter < min_gutter_width) { + min_gutter = min_gutter_width; + } + // Fit the vertical vector into an ICOORD, which is 16 bit. + set_vertical(vertical_x, vertical_y); +} + +// Constructor to set the parameters for finding vertical lines. +// Vertical_x and vertical_y are the current estimates of the true vertical +// direction (up) in the image. Width is the width of the starter blob. +AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, int width) + : gutter_fraction(0.0) + , right_tab(false) + , ragged(false) + , alignment(TA_SEPARATOR) + , confirmed_type(TT_VLINE) + , max_v_gap(kVLineSearchSize) + , min_gutter(kVLineGutter) + , min_points(1) + , min_length(kVLineMinLength) { + // Compute threshold for left and right alignment. + l_align_tolerance = std::max(kVLineAlignment, width); + r_align_tolerance = std::max(kVLineAlignment, width); + + // Fit the vertical vector into an ICOORD, which is 16 bit. + set_vertical(vertical_x, vertical_y); +} + +// Fit the vertical vector into an ICOORD, which is 16 bit. +void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) { + int factor = 1; + if (vertical_y > INT16_MAX) { + factor = vertical_y / INT16_MAX + 1; + } + vertical.set_x(vertical_x / factor); + vertical.set_y(vertical_y / factor); +} + +AlignedBlob::AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright) + : BlobGrid(gridsize, bleft, tright) {} + +// Return true if the given coordinates are within the test rectangle +// and the debug level is at least the given detail level. +bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { + if (textord_debug_tabfind < detail_level) { + return false; + } + return x >= textord_testregion_left && x <= textord_testregion_right && + y <= textord_testregion_top && y >= textord_testregion_bottom; +} + +#ifndef GRAPHICS_DISABLED + +// Display the tab codes of the BLOBNBOXes in this grid. +ScrollView *AlignedBlob::DisplayTabs(const char *window_name, ScrollView *tab_win) { + if (tab_win == nullptr) { + tab_win = MakeWindow(0, 50, window_name); + } + // For every tab in the grid, display it. + BlobGridSearch gsearch(this); + gsearch.StartFullSearch(); + BLOBNBOX *bbox; + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + const TBOX &box = bbox->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + TabType tabtype = bbox->left_tab_type(); + if (tabtype != TT_NONE) { + if (tabtype == TT_MAYBE_ALIGNED) { + tab_win->Pen(ScrollView::BLUE); + } else if (tabtype == TT_MAYBE_RAGGED) { + tab_win->Pen(ScrollView::YELLOW); + } else if (tabtype == TT_CONFIRMED) { + tab_win->Pen(ScrollView::GREEN); + } else { + tab_win->Pen(ScrollView::GREY); + } + tab_win->Line(left_x, top_y, left_x, bottom_y); + } + tabtype = bbox->right_tab_type(); + if (tabtype != TT_NONE) { + if (tabtype == TT_MAYBE_ALIGNED) { + tab_win->Pen(ScrollView::MAGENTA); + } else if (tabtype == TT_MAYBE_RAGGED) { + tab_win->Pen(ScrollView::ORANGE); + } else if (tabtype == TT_CONFIRMED) { + tab_win->Pen(ScrollView::RED); + } else { + tab_win->Pen(ScrollView::GREY); + } + tab_win->Line(right_x, top_y, right_x, bottom_y); + } + } + tab_win->Update(); + return tab_win; +} + +#endif // !GRAPHICS_DISABLED + +// Helper returns true if the total number of line_crossings of all the blobs +// in the list is at least 2. +static bool AtLeast2LineCrossings(BLOBNBOX_CLIST *blobs) { + BLOBNBOX_C_IT it(blobs); + int total_crossings = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + total_crossings += it.data()->line_crossings(); + } + return total_crossings >= 2; +} + +// Destructor. +// It is defined here, so the compiler can create a single vtable +// instead of weak vtables in every compilation unit. +AlignedBlob::~AlignedBlob() = default; + +// Finds a vector corresponding to a set of vertically aligned blob edges +// running through the given box. The type of vector returned and the +// search parameters are determined by the AlignedBlobParams. +// vertical_x and y are updated with an estimate of the real +// vertical direction. (skew finding.) +// Returns nullptr if no decent vector can be found. +TabVector *AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, + int *vertical_x, int *vertical_y) { + int ext_start_y, ext_end_y; + BLOBNBOX_CLIST good_points; + // Search up and then down from the starting bbox. + TBOX box = bbox->bounding_box(); + bool debug = WithinTestRegion(2, box.left(), box.bottom()); + int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); + pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); + BLOBNBOX_C_IT it(&good_points); + it.move_to_last(); + box = it.data()->bounding_box(); + int end_y = box.top(); + int end_x = align_params.right_tab ? box.right() : box.left(); + it.move_to_first(); + box = it.data()->bounding_box(); + int start_x = align_params.right_tab ? box.right() : box.left(); + int start_y = box.bottom(); + // Acceptable tab vectors must have a minimum number of points, + // have a minimum acceptable length, and have a minimum gradient. + // The gradient corresponds to the skew angle. + // Ragged tabs don't need to satisfy the gradient condition, as they + // will always end up parallel to the vertical direction. + bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); + if ((pt_count >= align_params.min_points && end_y - start_y >= align_params.min_length && + (align_params.ragged || end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || + at_least_2_crossings) { + int confirmed_points = 0; + // Count existing confirmed points to see if vector is acceptable. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + bbox = it.data(); + if (align_params.right_tab) { + if (bbox->right_tab_type() == align_params.confirmed_type) { + ++confirmed_points; + } + } else { + if (bbox->left_tab_type() == align_params.confirmed_type) { + ++confirmed_points; + } + } + } + // Ragged vectors are not allowed to use too many already used points. + if (!align_params.ragged || confirmed_points + confirmed_points < pt_count) { + const TBOX &box = bbox->bounding_box(); + if (debug) { + tprintf("Confirming tab vector of %d pts starting at %d,%d\n", pt_count, box.left(), + box.bottom()); + } + // Flag all the aligned neighbours as confirmed . + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + bbox = it.data(); + if (align_params.right_tab) { + bbox->set_right_tab_type(align_params.confirmed_type); + } else { + bbox->set_left_tab_type(align_params.confirmed_type); + } + if (debug) { + bbox->bounding_box().print(); + } + } + // Now make the vector and return it. + TabVector *result = + TabVector::FitVector(align_params.alignment, align_params.vertical, ext_start_y, + ext_end_y, &good_points, vertical_x, vertical_y); + result->set_intersects_other_lines(at_least_2_crossings); + if (debug) { + tprintf("Box was %d, %d\n", box.left(), box.bottom()); + result->Print("After fitting"); + } + return result; + } else if (debug) { + tprintf("Ragged tab used too many used points: %d out of %d\n", confirmed_points, pt_count); + } + } else if (debug) { + tprintf( + "Tab vector failed basic tests: pt count %d vs min %d, " + "length %d vs min %d, min grad %g\n", + pt_count, align_params.min_points, end_y - start_y, align_params.min_length, + abs(end_x - start_x) * kMinTabGradient); + } + return nullptr; +} + +// Find a set of blobs that are aligned in the given vertical +// direction with the given blob. Returns a list of aligned +// blobs and the number in the list. +// For other parameters see FindAlignedBlob below. +int AlignedBlob::AlignTabs(const AlignedBlobParams ¶ms, bool top_to_bottom, BLOBNBOX *bbox, + BLOBNBOX_CLIST *good_points, int *end_y) { + int ptcount = 0; + BLOBNBOX_C_IT it(good_points); + + TBOX box = bbox->bounding_box(); + bool debug = WithinTestRegion(2, box.left(), box.bottom()); + if (debug) { + tprintf("Starting alignment run at blob:"); + box.print(); + } + int x_start = params.right_tab ? box.right() : box.left(); + while (bbox != nullptr) { + // Add the blob to the list if the appropriate side is a tab candidate, + // or if we are working on a ragged tab. + TabType type = params.right_tab ? bbox->right_tab_type() : bbox->left_tab_type(); + if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && + (it.empty() || it.data() != bbox)) { + if (top_to_bottom) { + it.add_before_then_move(bbox); + } else { + it.add_after_then_move(bbox); + } + ++ptcount; + } + // Find the next blob that is aligned with the current one. + // FindAlignedBlob guarantees that forward progress will be made in the + // top_to_bottom direction, and therefore eventually it will return nullptr, + // making this while (bbox != nullptr) loop safe. + bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); + if (bbox != nullptr) { + box = bbox->bounding_box(); + if (!params.ragged) { + x_start = params.right_tab ? box.right() : box.left(); + } + } + } + if (debug) { + tprintf("Alignment run ended with %d pts at blob:", ptcount); + box.print(); + } + return ptcount; +} + +// Search vertically for a blob that is aligned with the input bbox. +// The search parameters are determined by AlignedBlobParams. +// top_to_bottom tells whether to search down or up. +// The return value is nullptr if nothing was found in the search box +// or if a blob was found in the gutter. On a nullptr return, end_y +// is set to the edge of the search box or the leading edge of the +// gutter blob if one was found. +BLOBNBOX *AlignedBlob::FindAlignedBlob(const AlignedBlobParams &p, bool top_to_bottom, + BLOBNBOX *bbox, int x_start, int *end_y) { + TBOX box = bbox->bounding_box(); + // If there are separator lines, get the column edges. + int left_column_edge = bbox->left_rule(); + int right_column_edge = bbox->right_rule(); + // start_y is used to guarantee that forward progress is made and the + // search does not go into an infinite loop. New blobs must extend the + // line beyond start_y. + int start_y = top_to_bottom ? box.bottom() : box.top(); + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", box.left(), box.top(), + box.right(), box.bottom(), left_column_edge, right_column_edge); + } + // Compute skew tolerance. + int skew_tolerance = p.max_v_gap / kMaxSkewFactor; + // Calculate xmin and xmax of the search box so that it contains + // all possibly relevant boxes up to p.max_v_gap above or below according + // to top_to_bottom. + // Start with a notion of vertical with the current estimate. + int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y() / 2) / p.vertical.y(); + if (top_to_bottom) { + x2 = x_start - x2; + *end_y = start_y - p.max_v_gap; + } else { + x2 = x_start + x2; + *end_y = start_y + p.max_v_gap; + } + // Expand the box by an additional skew tolerance + int xmin = std::min(x_start, x2) - skew_tolerance; + int xmax = std::max(x_start, x2) + skew_tolerance; + // Now add direction-specific tolerances. + if (p.right_tab) { + xmax += p.min_gutter; + xmin -= p.l_align_tolerance; + } else { + xmax += p.r_align_tolerance; + xmin -= p.min_gutter; + } + // Setup a vertical search for an aligned blob. + BlobGridSearch vsearch(this); + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", + p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", xmin, xmax, start_y, + p.max_v_gap, p.min_gutter); + } + vsearch.StartVerticalSearch(xmin, xmax, start_y); + // result stores the best real return value. + BLOBNBOX *result = nullptr; + // The backup_result is not a tab candidate and can be used if no + // real tab candidate result is found. + BLOBNBOX *backup_result = nullptr; + // neighbour is the blob that is currently being investigated. + BLOBNBOX *neighbour = nullptr; + while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { + if (neighbour == bbox) { + continue; + } + TBOX nbox = neighbour->bounding_box(); + int n_y = (nbox.top() + nbox.bottom()) / 2; + if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || + (top_to_bottom && n_y < start_y - p.max_v_gap)) { + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", nbox.left(), nbox.bottom(), nbox.right(), + nbox.top()); + } + break; // Gone far enough. + } + // It is CRITICAL to ensure that forward progress is made, (strictly + // in/decreasing n_y) or the caller could loop infinitely, while + // waiting for a sequence of blobs in a line to end. + // NextVerticalSearch alone does not guarantee this, as there may be + // more than one blob in a grid cell. See comment in AlignTabs. + if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box)) { + continue; // Only look in the required direction. + } + if (result != nullptr && result->bounding_box().y_gap(nbox) > gridsize()) { + return result; // This result is clear. + } + if (backup_result != nullptr && p.ragged && result == nullptr && + backup_result->bounding_box().y_gap(nbox) > gridsize()) { + return backup_result; // This result is clear. + } + + // If the neighbouring blob is the wrong side of a separator line, then it + // "doesn't exist" as far as we are concerned. + int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y(); + if (x_at_n_y < neighbour->left_crossing_rule() || x_at_n_y > neighbour->right_crossing_rule()) { + continue; // Separator line in the way. + } + int n_left = nbox.left(); + int n_right = nbox.right(); + int n_x = p.right_tab ? n_right : n_left; + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", nbox.left(), + nbox.bottom(), nbox.right(), nbox.top(), n_x, n_y, x_at_n_y); + } + if (p.right_tab && n_left < x_at_n_y + p.min_gutter && + n_right > x_at_n_y + p.r_align_tolerance && + (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { + // In the gutter so end of line. + if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED) { + bbox->set_right_tab_type(TT_DELETED); + } + *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("gutter\n"); + } + return nullptr; + } + if (!p.right_tab && n_left < x_at_n_y - p.l_align_tolerance && + n_right > x_at_n_y - p.min_gutter && + (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { + // In the gutter so end of line. + if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED) { + bbox->set_left_tab_type(TT_DELETED); + } + *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("gutter\n"); + } + return nullptr; + } + if ((p.right_tab && neighbour->leader_on_right()) || + (!p.right_tab && neighbour->leader_on_left())) { + continue; // Neighbours of leaders are not allowed to be used. + } + if (n_x <= x_at_n_y + p.r_align_tolerance && n_x >= x_at_n_y - p.l_align_tolerance) { + // Aligned so keep it. If it is a marked tab save it as result, + // otherwise keep it as backup_result to return in case of later failure. + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("aligned, seeking%d, l=%d, r=%d\n", p.right_tab, neighbour->left_tab_type(), + neighbour->right_tab_type()); + } + TabType n_type = p.right_tab ? neighbour->right_tab_type() : neighbour->left_tab_type(); + if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) { + if (result == nullptr) { + result = neighbour; + } else { + // Keep the closest neighbour by Euclidean distance. + // This prevents it from picking a tab blob in another column. + const TBOX &old_box = result->bounding_box(); + int x_diff = p.right_tab ? old_box.right() : old_box.left(); + x_diff -= x_at_n_y; + int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y; + int old_dist = x_diff * x_diff + y_diff * y_diff; + x_diff = n_x - x_at_n_y; + y_diff = n_y - start_y; + int new_dist = x_diff * x_diff + y_diff * y_diff; + if (new_dist < old_dist) { + result = neighbour; + } + } + } else if (backup_result == nullptr) { + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("Backup\n"); + } + backup_result = neighbour; + } else { + TBOX backup_box = backup_result->bounding_box(); + if ((p.right_tab && backup_box.right() < nbox.right()) || + (!p.right_tab && backup_box.left() > nbox.left())) { + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("Better backup\n"); + } + backup_result = neighbour; + } + } + } + } + return result != nullptr ? result : backup_result; +} + +} // namespace tesseract.
