Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/ccstruct/blobbox.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/ccstruct/blobbox.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,1082 @@ +/********************************************************************** + * File: blobbox.cpp (Formerly blobnbox.c) + * Description: Code for the textord blob class. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +# include "config_auto.h" +#endif + +#include "blobbox.h" +#include "blobs.h" // for TPOINT +#include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE, C_OUTLINE_LIST +#include "environ.h" // for l_uint32 +#include "host.h" // for NearlyEqual +#include "points.h" // for operator+=, ICOORD::rotate + +#include "helpers.h" // for UpdateRange, IntCastRounded + +#include <allheaders.h> // for pixGetHeight, pixGetPixel + +#include <algorithm> // for max, min +#include <cmath> +#include <cstdint> // for INT32_MAX, INT16_MAX + +#define PROJECTION_MARGIN 10 // arbitrary + +namespace tesseract { + +// Up to 30 degrees is allowed for rotations of diacritic blobs. +const double kCosSmallAngle = 0.866; +// Min aspect ratio for a joined word to indicate an obvious flow direction. +const double kDefiniteAspectRatio = 2.0; +// Multiple of short length in perimeter to make a joined word. +const double kComplexShapePerimeterRatio = 1.5; +// Min multiple of linesize for medium-sized blobs in ReFilterBlobs. +const double kMinMediumSizeRatio = 0.25; +// Max multiple of linesize for medium-sized blobs in ReFilterBlobs. +const double kMaxMediumSizeRatio = 4.0; + +// Rotates the box and the underlying blob. +void BLOBNBOX::rotate(FCOORD rotation) { + cblob_ptr->rotate(rotation); + rotate_box(rotation); + compute_bounding_box(); +} + +// Reflect the box in the y-axis, leaving the underlying blob untouched. +void BLOBNBOX::reflect_box_in_y_axis() { + int left = -box.right(); + box.set_right(-box.left()); + box.set_left(left); +} + +// Rotates the box by the angle given by rotation. +// If the blob is a diacritic, then only small rotations for skew +// correction can be applied. +void BLOBNBOX::rotate_box(FCOORD rotation) { + if (IsDiacritic()) { + ASSERT_HOST(rotation.x() >= kCosSmallAngle); + ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_); + ICOORD bottom_pt(top_pt.x(), base_char_bottom_); + top_pt.rotate(rotation); + base_char_top_ = top_pt.y(); + bottom_pt.rotate(rotation); + base_char_bottom_ = bottom_pt.y(); + box.rotate(rotation); + } else { + box.rotate(rotation); + set_diacritic_box(box); + } +} + +/********************************************************************** + * BLOBNBOX::merge + * + * Merge this blob with the given blob, which should be after this. + **********************************************************************/ +void BLOBNBOX::merge( // merge blobs + BLOBNBOX *nextblob // blob to join with +) { + box += nextblob->box; // merge boxes + set_diacritic_box(box); + nextblob->joined = true; +} + +// Merge this with other, taking the outlines from other. +// Other is not deleted, but left for the caller to handle. +void BLOBNBOX::really_merge(BLOBNBOX *other) { + if (other->cblob_ptr != nullptr) { + C_OUTLINE_IT ol_it(cblob_ptr->out_list()); + ol_it.add_list_after(other->cblob_ptr->out_list()); + } + compute_bounding_box(); +} + +/********************************************************************** + * BLOBNBOX::chop + * + * Chop this blob into equal sized pieces using the x height as a guide. + * The blob is not actually chopped. Instead, fake blobs are inserted + * with the relevant bounding boxes. + **********************************************************************/ + +void BLOBNBOX::chop( // chop blobs + BLOBNBOX_IT *start_it, // location of this + BLOBNBOX_IT *end_it, // iterator + FCOORD rotation, // for landscape + float xheight // of line +) { + int16_t blobcount; // no of blobs + BLOBNBOX *newblob; // fake blob + BLOBNBOX *blob; // current blob + int16_t blobindex; // number of chop + int16_t leftx; // left edge of blob + float blobwidth; // width of each + float rightx; // right edge to scan + float ymin, ymax; // limits of new blob + float test_ymin, test_ymax; // limits of part blob + ICOORD bl, tr; // corners of box + BLOBNBOX_IT blob_it; // blob iterator + + // get no of chops + blobcount = static_cast<int16_t>(std::floor(box.width() / xheight)); + if (blobcount > 1 && cblob_ptr != nullptr) { + // width of each + blobwidth = static_cast<float>(box.width() + 1) / blobcount; + for (blobindex = blobcount - 1, rightx = box.right(); blobindex >= 0; + blobindex--, rightx -= blobwidth) { + ymin = static_cast<float>(INT32_MAX); + ymax = static_cast<float>(-INT32_MAX); + blob_it = *start_it; + do { + blob = blob_it.data(); + find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, rightx, + /*rotation, */ test_ymin, test_ymax); + blob_it.forward(); + UpdateRange(test_ymin, test_ymax, &ymin, &ymax); + } while (blob != end_it->data()); + if (ymin < ymax) { + leftx = static_cast<int16_t>(std::floor(rightx - blobwidth)); + if (leftx < box.left()) { + leftx = box.left(); // clip to real box + } + bl = ICOORD(leftx, static_cast<int16_t>(std::floor(ymin))); + tr = ICOORD(static_cast<int16_t>(std::ceil(rightx)), static_cast<int16_t>(std::ceil(ymax))); + if (blobindex == 0) { + box = TBOX(bl, tr); // change box + } else { + newblob = new BLOBNBOX; + // box is all it has + newblob->box = TBOX(bl, tr); + // stay on current + newblob->base_char_top_ = tr.y(); + newblob->base_char_bottom_ = bl.y(); + end_it->add_after_stay_put(newblob); + } + } + } + } +} + +// Returns the box gaps between this and its neighbours_ in an array +// indexed by BlobNeighbourDir. +void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { + for (int dir = 0; dir < BND_COUNT; ++dir) { + gaps[dir] = INT16_MAX; + BLOBNBOX *neighbour = neighbours_[dir]; + if (neighbour != nullptr) { + const TBOX &n_box = neighbour->bounding_box(); + if (dir == BND_LEFT || dir == BND_RIGHT) { + gaps[dir] = box.x_gap(n_box); + } else { + gaps[dir] = box.y_gap(n_box); + } + } + } +} +// Returns the min and max horizontal and vertical gaps (from NeighbourGaps) +// modified so that if the max exceeds the max dimension of the blob, and +// the min is less, the max is replaced with the min. +// The objective is to catch cases where there is only a single neighbour +// and avoid reporting the other gap as a ridiculously large number +void BLOBNBOX::MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const { + int max_dimension = std::max(box.width(), box.height()); + int gaps[BND_COUNT]; + NeighbourGaps(gaps); + *h_min = std::min(gaps[BND_LEFT], gaps[BND_RIGHT]); + *h_max = std::max(gaps[BND_LEFT], gaps[BND_RIGHT]); + if (*h_max > max_dimension && *h_min < max_dimension) { + *h_max = *h_min; + } + *v_min = std::min(gaps[BND_ABOVE], gaps[BND_BELOW]); + *v_max = std::max(gaps[BND_ABOVE], gaps[BND_BELOW]); + if (*v_max > max_dimension && *v_min < max_dimension) { + *v_max = *v_min; + } +} + +// Nulls out any neighbours that are DeletableNoise to remove references. +void BLOBNBOX::CleanNeighbours() { + for (int dir = 0; dir < BND_COUNT; ++dir) { + BLOBNBOX *neighbour = neighbours_[dir]; + if (neighbour != nullptr && neighbour->DeletableNoise()) { + neighbours_[dir] = nullptr; + good_stroke_neighbours_[dir] = false; + } + } +} + +// Returns positive if there is at least one side neighbour that has a similar +// stroke width and is not on the other side of a rule line. +int BLOBNBOX::GoodTextBlob() const { + int score = 0; + for (int dir = 0; dir < BND_COUNT; ++dir) { + auto bnd = static_cast<BlobNeighbourDir>(dir); + if (good_stroke_neighbour(bnd)) { + ++score; + } + } + return score; +} + +// Returns the number of side neighbours that are of type BRT_NOISE. +int BLOBNBOX::NoisyNeighbours() const { + int count = 0; + for (int dir = 0; dir < BND_COUNT; ++dir) { + auto bnd = static_cast<BlobNeighbourDir>(dir); + BLOBNBOX *blob = neighbour(bnd); + if (blob != nullptr && blob->region_type() == BRT_NOISE) { + ++count; + } + } + return count; +} + +// Returns true, and sets vert_possible/horz_possible if the blob has some +// feature that makes it individually appear to flow one way. +// eg if it has a high aspect ratio, yet has a complex shape, such as a +// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc. +bool BLOBNBOX::DefiniteIndividualFlow() { + if (cblob() == nullptr) { + return false; + } + int box_perimeter = 2 * (box.height() + box.width()); + if (box.width() > box.height() * kDefiniteAspectRatio) { + // Attempt to distinguish a wide joined word from a dash. + // If it is a dash, then its perimeter is approximately + // 2 * (box width + stroke width), but more if the outline is noisy, + // so perimeter - 2*(box width + stroke width) should be close to zero. + // A complex shape such as a joined word should have a much larger value. + int perimeter = cblob()->perimeter(); + if (vert_stroke_width() > 0 || perimeter <= 0) { + perimeter -= 2 * vert_stroke_width(); + } else { + perimeter -= 4 * cblob()->area() / perimeter; + } + perimeter -= 2 * box.width(); + // Use a multiple of the box perimeter as a threshold. + if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { + set_vert_possible(false); + set_horz_possible(true); + return true; + } + } + if (box.height() > box.width() * kDefiniteAspectRatio) { + // As above, but for a putative vertical word vs a I/1/l. + int perimeter = cblob()->perimeter(); + if (horz_stroke_width() > 0 || perimeter <= 0) { + perimeter -= 2 * horz_stroke_width(); + } else { + perimeter -= 4 * cblob()->area() / perimeter; + } + perimeter -= 2 * box.height(); + if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { + set_vert_possible(true); + set_horz_possible(false); + return true; + } + } + return false; +} + +// Returns true if there is no tabstop violation in merging this and other. +bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX &other) const { + if (box.left() < other.box.left() && box.left() < other.left_rule_) { + return false; + } + if (other.box.left() < box.left() && other.box.left() < left_rule_) { + return false; + } + if (box.right() > other.box.right() && box.right() > other.right_rule_) { + return false; + } + if (other.box.right() > box.right() && other.box.right() > right_rule_) { + return false; + } + return true; +} + +// Returns true if other has a similar stroke width to this. +bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance, + double constant_tolerance) const { + // The perimeter-based width is used as a backup in case there is + // no information in the blob. + double p_width = area_stroke_width(); + double n_p_width = other.area_stroke_width(); + float h_tolerance = horz_stroke_width_ * fractional_tolerance + constant_tolerance; + float v_tolerance = vert_stroke_width_ * fractional_tolerance + constant_tolerance; + double p_tolerance = p_width * fractional_tolerance + constant_tolerance; + bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f; + bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f; + bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, other.horz_stroke_width_, h_tolerance); + bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, other.vert_stroke_width_, v_tolerance); + bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance); + // For a match, at least one of the horizontal and vertical widths + // must match, and the other one must either match or be zero. + // Only if both are zero will we look at the perimeter metric. + return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero)); +} + +// Returns a bounding box of the outline contained within the +// given horizontal range. +TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) { + FCOORD no_rotation(1.0f, 0.0f); + float top = box.top(); + float bottom = box.bottom(); + if (cblob_ptr != nullptr) { + find_cblob_limits(cblob_ptr, static_cast<float>(left), static_cast<float>(right), no_rotation, + bottom, top); + } + + if (top < bottom) { + top = box.top(); + bottom = box.bottom(); + } + FCOORD bot_left(left, bottom); + FCOORD top_right(right, top); + TBOX shrunken_box(bot_left); + TBOX shrunken_box2(top_right); + shrunken_box += shrunken_box2; + return shrunken_box; +} + +// Estimates and stores the baseline position based on the shape of the +// outline. +void BLOBNBOX::EstimateBaselinePosition() { + baseline_y_ = box.bottom(); // The default. + if (cblob_ptr == nullptr) { + return; + } + baseline_y_ = cblob_ptr->EstimateBaselinePosition(); +} + +// Helper to call CleanNeighbours on all blobs on the list. +void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST *blobs) { + BLOBNBOX_IT blob_it(blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob_it.data()->CleanNeighbours(); + } +} + +// Helper to delete all the deletable blobs on the list. +void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST *blobs) { + BLOBNBOX_IT blob_it(blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX *blob = blob_it.data(); + if (blob->DeletableNoise()) { + delete blob->remove_cblob(); + delete blob_it.extract(); + } + } +} + +// Helper to compute edge offsets for all the blobs on the list. +// See coutln.h for an explanation of edge offsets. +void BLOBNBOX::ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs) { + int grey_height = 0; + int thr_height = 0; + int scale_factor = 1; + if (thresholds != nullptr && grey != nullptr) { + grey_height = pixGetHeight(grey); + thr_height = pixGetHeight(thresholds); + scale_factor = IntCastRounded(static_cast<double>(grey_height) / thr_height); + } + BLOBNBOX_IT blob_it(blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX *blob = blob_it.data(); + if (blob->cblob() != nullptr) { + // Get the threshold that applies to this blob. + l_uint32 threshold = 128; + if (thresholds != nullptr && grey != nullptr) { + const TBOX &box = blob->cblob()->bounding_box(); + // Transform the coordinates if required. + TPOINT pt((box.left() + box.right()) / 2, (box.top() + box.bottom()) / 2); + pixGetPixel(thresholds, pt.x / scale_factor, thr_height - 1 - pt.y / scale_factor, + &threshold); + } + blob->cblob()->ComputeEdgeOffsets(threshold, grey); + } + } +} + +#ifndef GRAPHICS_DISABLED +// Helper to draw all the blobs on the list in the given body_colour, +// with child outlines in the child_colour. +void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour, + ScrollView::Color child_colour, ScrollView *win) { + BLOBNBOX_IT it(list); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(win, body_colour, child_colour); + } +} + +// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the +// given list in the given body_colour, with child outlines in the +// child_colour. +void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour, + ScrollView::Color child_colour, ScrollView *win) { + BLOBNBOX_IT it(list); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX *blob = it.data(); + if (blob->DeletableNoise()) { + blob->plot(win, body_colour, child_colour); + } + } +} + +ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type) { + switch (region_type) { + case BRT_HLINE: + return ScrollView::BROWN; + case BRT_VLINE: + return ScrollView::DARK_GREEN; + case BRT_RECTIMAGE: + return ScrollView::RED; + case BRT_POLYIMAGE: + return ScrollView::ORANGE; + case BRT_UNKNOWN: + return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE; + case BRT_VERT_TEXT: + if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) { + return ScrollView::GREEN; + } + if (flow_type == BTFT_CHAIN) { + return ScrollView::LIME_GREEN; + } + return ScrollView::YELLOW; + case BRT_TEXT: + if (flow_type == BTFT_STRONG_CHAIN) { + return ScrollView::BLUE; + } + if (flow_type == BTFT_TEXT_ON_IMAGE) { + return ScrollView::LIGHT_BLUE; + } + if (flow_type == BTFT_CHAIN) { + return ScrollView::MEDIUM_BLUE; + } + if (flow_type == BTFT_LEADER) { + return ScrollView::WHEAT; + } + if (flow_type == BTFT_NONTEXT) { + return ScrollView::PINK; + } + return ScrollView::MAGENTA; + default: + return ScrollView::GREY; + } +} + +// Keep in sync with BlobRegionType. +ScrollView::Color BLOBNBOX::BoxColor() const { + return TextlineColor(region_type_, flow_); +} + +void BLOBNBOX::plot(ScrollView *window, // window to draw in + ScrollView::Color blob_colour, // for outer bits + ScrollView::Color child_colour) { // for holes + if (cblob_ptr != nullptr) { + cblob_ptr->plot(window, blob_colour, child_colour); + } +} +#endif +/********************************************************************** + * find_cblob_limits + * + * Scan the outlines of the cblob to locate the y min and max + * between the given x limits. + **********************************************************************/ + +void find_cblob_limits( // get y limits + C_BLOB *blob, // blob to search + float leftx, // x limits + float rightx, + FCOORD rotation, // for landscape + float &ymin, // output y limits + float &ymax) { + int16_t stepindex; // current point + ICOORD pos; // current coords + ICOORD vec; // rotated step + C_OUTLINE *outline; // current outline + // outlines + C_OUTLINE_IT out_it = blob->out_list(); + + ymin = static_cast<float>(INT32_MAX); + ymax = static_cast<float>(-INT32_MAX); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.data(); + pos = outline->start_pos(); // get coords + pos.rotate(rotation); + for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { + // inside + if (pos.x() >= leftx && pos.x() <= rightx) { + UpdateRange(pos.y(), &ymin, &ymax); + } + vec = outline->step(stepindex); + vec.rotate(rotation); + pos += vec; // move to next + } + } +} + +/********************************************************************** + * find_cblob_vlimits + * + * Scan the outlines of the cblob to locate the y min and max + * between the given x limits. + **********************************************************************/ + +void find_cblob_vlimits( // get y limits + C_BLOB *blob, // blob to search + float leftx, // x limits + float rightx, + float &ymin, // output y limits + float &ymax) { + int16_t stepindex; // current point + ICOORD pos; // current coords + ICOORD vec; // rotated step + C_OUTLINE *outline; // current outline + // outlines + C_OUTLINE_IT out_it = blob->out_list(); + + ymin = static_cast<float>(INT32_MAX); + ymax = static_cast<float>(-INT32_MAX); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.data(); + pos = outline->start_pos(); // get coords + for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { + // inside + if (pos.x() >= leftx && pos.x() <= rightx) { + UpdateRange(pos.y(), &ymin, &ymax); + } + vec = outline->step(stepindex); + pos += vec; // move to next + } + } +} + +/********************************************************************** + * find_cblob_hlimits + * + * Scan the outlines of the cblob to locate the x min and max + * between the given y limits. + **********************************************************************/ + +void find_cblob_hlimits( // get x limits + C_BLOB *blob, // blob to search + float bottomy, // y limits + float topy, + float &xmin, // output x limits + float &xmax) { + int16_t stepindex; // current point + ICOORD pos; // current coords + ICOORD vec; // rotated step + C_OUTLINE *outline; // current outline + // outlines + C_OUTLINE_IT out_it = blob->out_list(); + + xmin = static_cast<float>(INT32_MAX); + xmax = static_cast<float>(-INT32_MAX); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.data(); + pos = outline->start_pos(); // get coords + for (stepindex = 0; stepindex < outline->pathlength(); stepindex++) { + // inside + if (pos.y() >= bottomy && pos.y() <= topy) { + UpdateRange(pos.x(), &xmin, &xmax); + } + vec = outline->step(stepindex); + pos += vec; // move to next + } + } +} + +/********************************************************************** + * crotate_cblob + * + * Rotate the copy by the given vector and return a C_BLOB. + **********************************************************************/ + +C_BLOB *crotate_cblob( // rotate it + C_BLOB *blob, // blob to search + FCOORD rotation // for landscape +) { + C_OUTLINE_LIST out_list; // output outlines + // input outlines + C_OUTLINE_IT in_it = blob->out_list(); + // output outlines + C_OUTLINE_IT out_it = &out_list; + + for (in_it.mark_cycle_pt(); !in_it.cycled_list(); in_it.forward()) { + out_it.add_after_then_move(new C_OUTLINE(in_it.data(), rotation)); + } + return new C_BLOB(&out_list); +} + +/********************************************************************** + * box_next + * + * Compute the bounding box of this blob with merging of x overlaps + * but no pre-chopping. + * Then move the iterator on to the start of the next blob. + **********************************************************************/ + +TBOX box_next( // get bounding box + BLOBNBOX_IT *it // iterator to blobds +) { + BLOBNBOX *blob; // current blob + TBOX result; // total box + + blob = it->data(); + result = blob->bounding_box(); + do { + it->forward(); + blob = it->data(); + if (blob->cblob() == nullptr) { + // was pre-chopped + result += blob->bounding_box(); + } + } + // until next real blob + while ((blob->cblob() == nullptr) || blob->joined_to_prev()); + return result; +} + +/********************************************************************** + * box_next_pre_chopped + * + * Compute the bounding box of this blob with merging of x overlaps + * but WITH pre-chopping. + * Then move the iterator on to the start of the next pre-chopped blob. + **********************************************************************/ + +TBOX box_next_pre_chopped( // get bounding box + BLOBNBOX_IT *it // iterator to blobds +) { + BLOBNBOX *blob; // current blob + TBOX result; // total box + + blob = it->data(); + result = blob->bounding_box(); + do { + it->forward(); + blob = it->data(); + } + // until next real blob + while (blob->joined_to_prev()); + return result; +} + +/********************************************************************** + * TO_ROW::TO_ROW + * + * Constructor to make a row from a blob. + **********************************************************************/ + +TO_ROW::TO_ROW( // constructor + BLOBNBOX *blob, // first blob + float top, // corrected top + float bottom, // of row + float row_size // ideal +) { + clear(); + y_min = bottom; + y_max = top; + initial_y_min = bottom; + + float diff; // in size + BLOBNBOX_IT it = &blobs; // list of blobs + + it.add_to_end(blob); + diff = top - bottom - row_size; + if (diff > 0) { + y_max -= diff / 2; + y_min += diff / 2; + } + // very small object + else if ((top - bottom) * 3 < row_size) { + diff = row_size / 3 + bottom - top; + y_max += diff / 2; + y_min -= diff / 2; + } +} + +void TO_ROW::print() const { + tprintf( + "pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g," + " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g," + " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n", + pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp, spacing, xheight, + y_origin, xheight_evidence, ascrise, descdrop, body_size, min_space, max_nonspace, + space_threshold, kern_size, space_size); +} + +/********************************************************************** + * TO_ROW:add_blob + * + * Add the blob to the end of the row. + **********************************************************************/ + +void TO_ROW::add_blob( // constructor + BLOBNBOX *blob, // first blob + float top, // corrected top + float bottom, // of row + float row_size // ideal +) { + float allowed; // allowed expansion + float available; // expansion + BLOBNBOX_IT it = &blobs; // list of blobs + + it.add_to_end(blob); + allowed = row_size + y_min - y_max; + if (allowed > 0) { + available = top > y_max ? top - y_max : 0; + if (bottom < y_min) { + // total available + available += y_min - bottom; + } + if (available > 0) { + available += available; // do it gradually + if (available < allowed) { + available = allowed; + } + if (bottom < y_min) { + y_min -= (y_min - bottom) * allowed / available; + } + if (top > y_max) { + y_max += (top - y_max) * allowed / available; + } + } + } +} + +/********************************************************************** + * TO_ROW:insert_blob + * + * Add the blob to the row in the correct position. + **********************************************************************/ + +void TO_ROW::insert_blob( // constructor + BLOBNBOX *blob // first blob +) { + BLOBNBOX_IT it = &blobs; // list of blobs + + if (it.empty()) { + it.add_before_then_move(blob); + } else { + it.mark_cycle_pt(); + while (!it.cycled_list() && it.data()->bounding_box().left() <= blob->bounding_box().left()) { + it.forward(); + } + if (it.cycled_list()) { + it.add_to_end(blob); + } else { + it.add_before_stay_put(blob); + } + } +} + +/********************************************************************** + * TO_ROW::compute_vertical_projection + * + * Compute the vertical projection of a TO_ROW from its blobs. + **********************************************************************/ + +void TO_ROW::compute_vertical_projection() { // project whole row + TBOX row_box; // bound of row + BLOBNBOX *blob; // current blob + TBOX blob_box; // bounding box + BLOBNBOX_IT blob_it = blob_list(); + + if (blob_it.empty()) { + return; + } + row_box = blob_it.data()->bounding_box(); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + row_box += blob_it.data()->bounding_box(); + } + + projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN - 1); + projection_left = row_box.left() - PROJECTION_MARGIN; + projection_right = row_box.right() + PROJECTION_MARGIN; + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + if (blob->cblob() != nullptr) { + vertical_cblob_projection(blob->cblob(), &projection); + } + } +} + +/********************************************************************** + * TO_ROW::clear + * + * Zero out all scalar members. + **********************************************************************/ +void TO_ROW::clear() { + all_caps = false; + used_dm_model = false; + projection_left = 0; + projection_right = 0; + pitch_decision = PITCH_DUNNO; + fixed_pitch = 0.0; + fp_space = 0.0; + fp_nonsp = 0.0; + pr_space = 0.0; + pr_nonsp = 0.0; + spacing = 0.0; + xheight = 0.0; + xheight_evidence = 0; + body_size = 0.0; + ascrise = 0.0; + descdrop = 0.0; + min_space = 0; + max_nonspace = 0; + space_threshold = 0; + kern_size = 0.0; + space_size = 0.0; + y_min = 0.0; + y_max = 0.0; + initial_y_min = 0.0; + m = 0.0; + c = 0.0; + error = 0.0; + para_c = 0.0; + para_error = 0.0; + y_origin = 0.0; + credibility = 0.0; + num_repeated_sets_ = -1; +} + +/********************************************************************** + * vertical_cblob_projection + * + * Compute the vertical projection of a cblob from its outlines + * and add to the given STATS. + **********************************************************************/ + +void vertical_cblob_projection( // project outlines + C_BLOB *blob, // blob to project + STATS *stats // output +) { + // outlines of blob + C_OUTLINE_IT out_it = blob->out_list(); + + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + vertical_coutline_projection(out_it.data(), stats); + } +} + +/********************************************************************** + * vertical_coutline_projection + * + * Compute the vertical projection of an outline from its outlines + * and add to the given STATS. + **********************************************************************/ + +void vertical_coutline_projection( // project outlines + C_OUTLINE *outline, // outline to project + STATS *stats // output +) { + ICOORD pos; // current point + ICOORD step; // edge step + int32_t length; // of outline + int16_t stepindex; // current step + C_OUTLINE_IT out_it = outline->child(); + + pos = outline->start_pos(); + length = outline->pathlength(); + for (stepindex = 0; stepindex < length; stepindex++) { + step = outline->step(stepindex); + if (step.x() > 0) { + stats->add(pos.x(), -pos.y()); + } else if (step.x() < 0) { + stats->add(pos.x() - 1, pos.y()); + } + pos += step; + } + + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + vertical_coutline_projection(out_it.data(), stats); + } +} + +/********************************************************************** + * TO_BLOCK::TO_BLOCK + * + * Constructor to make a TO_BLOCK from a real block. + **********************************************************************/ + +TO_BLOCK::TO_BLOCK( // make a block + BLOCK *src_block // real block +) { + clear(); + block = src_block; +} + +/********************************************************************** + * TO_BLOCK::clear + * + * Zero out all scalar members. + **********************************************************************/ +void TO_BLOCK::clear() { + block = nullptr; + pitch_decision = PITCH_DUNNO; + line_spacing = 0.0; + line_size = 0.0; + max_blob_size = 0.0; + baseline_offset = 0.0; + xheight = 0.0; + fixed_pitch = 0.0; + kern_size = 0.0; + space_size = 0.0; + min_space = 0; + max_nonspace = 0; + fp_space = 0.0; + fp_nonsp = 0.0; + pr_space = 0.0; + pr_nonsp = 0.0; + key_row = nullptr; +} + +TO_BLOCK::~TO_BLOCK() { + // Any residual BLOBNBOXes at this stage own their blobs, so delete them. + BLOBNBOX::clear_blobnboxes(&blobs); + BLOBNBOX::clear_blobnboxes(&underlines); + BLOBNBOX::clear_blobnboxes(&noise_blobs); + BLOBNBOX::clear_blobnboxes(&small_blobs); + BLOBNBOX::clear_blobnboxes(&large_blobs); +} + +// Helper function to divide the input blobs over noise, small, medium +// and large lists. Blobs small in height and (small in width or large in width) +// go in the noise list. Dash (-) candidates go in the small list, and +// medium and large are by height. +// SIDE-EFFECT: reset all blobs to initial state by calling Init(). +static void SizeFilterBlobs(int min_height, int max_height, BLOBNBOX_LIST *src_list, + BLOBNBOX_LIST *noise_list, BLOBNBOX_LIST *small_list, + BLOBNBOX_LIST *medium_list, BLOBNBOX_LIST *large_list) { + BLOBNBOX_IT noise_it(noise_list); + BLOBNBOX_IT small_it(small_list); + BLOBNBOX_IT medium_it(medium_list); + BLOBNBOX_IT large_it(large_list); + for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) { + BLOBNBOX *blob = src_it.extract(); + blob->ReInit(); + int width = blob->bounding_box().width(); + int height = blob->bounding_box().height(); + if (height < min_height && (width < min_height || width > max_height)) { + noise_it.add_after_then_move(blob); + } else if (height > max_height) { + large_it.add_after_then_move(blob); + } else if (height < min_height) { + small_it.add_after_then_move(blob); + } else { + medium_it.add_after_then_move(blob); + } + } +} + +// Reorganize the blob lists with a different definition of small, medium +// and large, compared to the original definition. +// Height is still the primary filter key, but medium width blobs of small +// height become small, and very wide blobs of small height stay noise, along +// with small dot-shaped blobs. +void TO_BLOCK::ReSetAndReFilterBlobs() { + int min_height = IntCastRounded(kMinMediumSizeRatio * line_size); + int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size); + BLOBNBOX_LIST noise_list; + BLOBNBOX_LIST small_list; + BLOBNBOX_LIST medium_list; + BLOBNBOX_LIST large_list; + SizeFilterBlobs(min_height, max_height, &blobs, &noise_list, &small_list, &medium_list, + &large_list); + SizeFilterBlobs(min_height, max_height, &large_blobs, &noise_list, &small_list, &medium_list, + &large_list); + SizeFilterBlobs(min_height, max_height, &small_blobs, &noise_list, &small_list, &medium_list, + &large_list); + SizeFilterBlobs(min_height, max_height, &noise_blobs, &noise_list, &small_list, &medium_list, + &large_list); + BLOBNBOX_IT blob_it(&blobs); + blob_it.add_list_after(&medium_list); + blob_it.set_to_list(&large_blobs); + blob_it.add_list_after(&large_list); + blob_it.set_to_list(&small_blobs); + blob_it.add_list_after(&small_list); + blob_it.set_to_list(&noise_blobs); + blob_it.add_list_after(&noise_list); +} + +// Deletes noise blobs from all lists where not owned by a ColPartition. +void TO_BLOCK::DeleteUnownedNoise() { + BLOBNBOX::CleanNeighbours(&blobs); + BLOBNBOX::CleanNeighbours(&small_blobs); + BLOBNBOX::CleanNeighbours(&noise_blobs); + BLOBNBOX::CleanNeighbours(&large_blobs); + BLOBNBOX::DeleteNoiseBlobs(&blobs); + BLOBNBOX::DeleteNoiseBlobs(&small_blobs); + BLOBNBOX::DeleteNoiseBlobs(&noise_blobs); + BLOBNBOX::DeleteNoiseBlobs(&large_blobs); +} + +// Computes and stores the edge offsets on each blob for use in feature +// extraction, using greyscale if the supplied grey and thresholds pixes +// are 8-bit or otherwise (if nullptr or not 8 bit) the original binary +// edge step outlines. +// Thresholds must either be the same size as grey or an integer down-scale +// of grey. +// See coutln.h for an explanation of edge offsets. +void TO_BLOCK::ComputeEdgeOffsets(Image thresholds, Image grey) { + BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs); + BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs); + BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs); +} + +#ifndef GRAPHICS_DISABLED +// Draw the noise blobs from all lists in red. +void TO_BLOCK::plot_noise_blobs(ScrollView *win) { + BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win); + BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win); + BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win); + BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win); +} + +// Draw the blobs on the various lists in the block in different colors. +void TO_BLOCK::plot_graded_blobs(ScrollView *win) { + BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win); + BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW, win); + BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW, win); + BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win); +} + +/********************************************************************** + * plot_blob_list + * + * Draw a list of blobs. + **********************************************************************/ + +void plot_blob_list(ScrollView *win, // window to draw in + BLOBNBOX_LIST *list, // blob list + ScrollView::Color body_colour, // colour to draw + ScrollView::Color child_colour) { // colour of child + BLOBNBOX_IT it = list; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(win, body_colour, child_colour); + } +} +#endif // !GRAPHICS_DISABLED + +} // namespace tesseract
