Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/textord/bbgrid.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/textord/bbgrid.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,280 @@ +/////////////////////////////////////////////////////////////////////// +// File: bbgrid.cpp +// Description: Class to hold BLOBNBOXs in a grid for fast access +// to neighbours. +// Author: Ray Smith +// +// (C) Copyright 2007, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "bbgrid.h" +#include "helpers.h" +#include "ocrblock.h" + +namespace tesseract { + +/////////////////////////////////////////////////////////////////////// +// BBGrid IMPLEMENTATION. +/////////////////////////////////////////////////////////////////////// +GridBase::GridBase(int gridsize, const ICOORD &bleft, const ICOORD &tright) { + Init(gridsize, bleft, tright); +} + +// Destructor. +// It is defined here, so the compiler can create a single vtable +// instead of weak vtables in every compilation unit. +GridBase::~GridBase() = default; + +// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, +// and bleft, tright are the bounding box of everything to go in it. +void GridBase::Init(int gridsize, const ICOORD &bleft, const ICOORD &tright) { + gridsize_ = gridsize; + bleft_ = bleft; + tright_ = tright; + if (gridsize_ == 0) { + gridsize_ = 1; + } + gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_; + gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_; + gridbuckets_ = gridwidth_ * gridheight_; +} + +// Compute the given grid coordinates from image coords. +void GridBase::GridCoords(int x, int y, int *grid_x, int *grid_y) const { + *grid_x = (x - bleft_.x()) / gridsize_; + *grid_y = (y - bleft_.y()) / gridsize_; + ClipGridCoords(grid_x, grid_y); +} + +// Clip the given grid coordinates to fit within the grid. +void GridBase::ClipGridCoords(int *x, int *y) const { + *x = ClipToRange(*x, 0, gridwidth_ - 1); + *y = ClipToRange(*y, 0, gridheight_ - 1); +} + +IntGrid::IntGrid() { + grid_ = nullptr; +} + +IntGrid::IntGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright) : grid_(nullptr) { + Init(gridsize, bleft, tright); +} + +IntGrid::~IntGrid() { + delete[] grid_; +} + +// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, +// and bleft, tright are the bounding box of everything to go in it. +void IntGrid::Init(int gridsize, const ICOORD &bleft, const ICOORD &tright) { + GridBase::Init(gridsize, bleft, tright); + delete[] grid_; + grid_ = new int[gridbuckets_]; + Clear(); +} + +// Clear all the ints in the grid to zero. +void IntGrid::Clear() { + for (int i = 0; i < gridbuckets_; ++i) { + grid_[i] = 0; + } +} + +// Rotate the grid by rotation, keeping cell contents. +// rotation must be a multiple of 90 degrees. +// NOTE: due to partial cells, cell coverage in the rotated grid will be +// inexact. This is why there is no Rotate for the generic BBGrid. +// TODO(rays) investigate fixing this inaccuracy by moving the origin after +// rotation. +void IntGrid::Rotate(const FCOORD &rotation) { + ASSERT_HOST(rotation.x() == 0.0f || rotation.y() == 0.0f); + ICOORD old_bleft(bleft()); + // ICOORD old_tright(tright()); + int old_width = gridwidth(); + int old_height = gridheight(); + TBOX box(bleft(), tright()); + box.rotate(rotation); + int *old_grid = grid_; + grid_ = nullptr; + Init(gridsize(), box.botleft(), box.topright()); + // Iterate over the old grid, copying data to the rotated position in the new. + int oldi = 0; + FCOORD x_step(rotation); + x_step *= gridsize(); + for (int oldy = 0; oldy < old_height; ++oldy) { + FCOORD line_pos(old_bleft.x(), old_bleft.y() + gridsize() * oldy); + line_pos.rotate(rotation); + for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) { + int grid_x, grid_y; + GridCoords(static_cast<int>(line_pos.x() + 0.5), static_cast<int>(line_pos.y() + 0.5), + &grid_x, &grid_y); + grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi]; + } + } + delete[] old_grid; +} + +// Returns a new IntGrid containing values equal to the sum of all the +// neighbouring cells. The returned grid must be deleted after use. +// For ease of implementation, edge cells are double counted, to make them +// have the same range as the non-edge cells. +IntGrid *IntGrid::NeighbourhoodSum() const { + auto *sumgrid = new IntGrid(gridsize(), bleft(), tright()); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + int cell_count = 0; + for (int yoffset = -1; yoffset <= 1; ++yoffset) { + for (int xoffset = -1; xoffset <= 1; ++xoffset) { + int grid_x = x + xoffset; + int grid_y = y + yoffset; + ClipGridCoords(&grid_x, &grid_y); + cell_count += GridCellValue(grid_x, grid_y); + } + } + if (GridCellValue(x, y) > 1) { + sumgrid->SetGridCell(x, y, cell_count); + } + } + } + return sumgrid; +} + +// Returns true if more than half the area of the rect is covered by grid +// cells that are over the threshold. +bool IntGrid::RectMostlyOverThreshold(const TBOX &rect, int threshold) const { + int min_x, min_y, max_x, max_y; + GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); + GridCoords(rect.right(), rect.top(), &max_x, &max_y); + int total_area = 0; + for (int y = min_y; y <= max_y; ++y) { + for (int x = min_x; x <= max_x; ++x) { + int value = GridCellValue(x, y); + if (value > threshold) { + TBOX cell_box(x * gridsize_, y * gridsize_, (x + 1) * gridsize_, (y + 1) * gridsize_); + cell_box &= rect; // This is in-place box intersection. + total_area += cell_box.area(); + } + } + } + return total_area * 2 > rect.area(); +} + +// Returns true if any cell value in the given rectangle is zero. +bool IntGrid::AnyZeroInRect(const TBOX &rect) const { + int min_x, min_y, max_x, max_y; + GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); + GridCoords(rect.right(), rect.top(), &max_x, &max_y); + for (int y = min_y; y <= max_y; ++y) { + for (int x = min_x; x <= max_x; ++x) { + if (GridCellValue(x, y) == 0) { + return true; + } + } + } + return false; +} + +// Returns a full-resolution binary pix in which each cell over the given +// threshold is filled as a black square. pixDestroy after use. +// Edge cells, which have a zero 4-neighbour, are not marked. +Image IntGrid::ThresholdToPix(int threshold) const { + Image pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1); + int cellsize = gridsize(); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + if (GridCellValue(x, y) > threshold && GridCellValue(x - 1, y) > 0 && + GridCellValue(x + 1, y) > 0 && GridCellValue(x, y - 1) > 0 && + GridCellValue(x, y + 1) > 0) { + pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize), cellsize, cellsize, + PIX_SET, nullptr, 0, 0); + } + } + } + return pix; +} + +// Make a Pix of the correct scaled size for the TraceOutline functions. +static Image GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) { + // Compute grid bounds of the outline and pad all round by 1. + int grid_left = (box.left() - bleft.x()) / gridsize - 1; + int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1; + int grid_right = (box.right() - bleft.x()) / gridsize + 1; + int grid_top = (box.top() - bleft.y()) / gridsize + 1; + *left = grid_left; + *bottom = grid_bottom; + return pixCreate(grid_right - grid_left + 1, grid_top - grid_bottom + 1, 1); +} + +// Helper function to return a scaled Pix with one pixel per grid cell, +// set (black) where the given outline enters the corresponding grid cell, +// and clear where the outline does not touch the grid cell. +// Also returns the grid coords of the bottom-left of the Pix, in *left +// and *bottom, which corresponds to (0, 0) on the Pix. +// Note that the Pix is used upside-down, with (0, 0) being the bottom-left. +Image TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, + int *bottom) { + const TBOX &box = outline->bounding_box(); + Image pix = GridReducedPix(box, gridsize, bleft, left, bottom); + int wpl = pixGetWpl(pix); + l_uint32 *data = pixGetData(pix); + int length = outline->pathlength(); + ICOORD pos = outline->start_pos(); + for (int i = 0; i < length; ++i) { + int grid_x = (pos.x() - bleft.x()) / gridsize - *left; + int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom; + SET_DATA_BIT(data + grid_y * wpl, grid_x); + pos += outline->step(i); + } + return pix; +} +#if 0 // Example code shows how to use TraceOutlineOnReducedPix. + C_OUTLINE_IT ol_it(blob->cblob()->out_list()); + int grid_left, grid_bottom; + Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_, + &grid_left, &grid_bottom); + grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob); + pix.destroy(); +#endif + +// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. +Image TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) { + const TBOX &box = block->pdblk.bounding_box(); + Image pix = GridReducedPix(box, gridsize, bleft, left, bottom); + int wpl = pixGetWpl(pix); + l_uint32 *data = pixGetData(pix); + ICOORDELT_IT it(block->pdblk.poly_block()->points()); + for (it.mark_cycle_pt(); !it.cycled_list();) { + ICOORD pos = *it.data(); + it.forward(); + ICOORD next_pos = *it.data(); + ICOORD line_vector = next_pos - pos; + int major, minor; + ICOORD major_step, minor_step; + line_vector.setup_render(&major_step, &minor_step, &major, &minor); + int accumulator = major / 2; + while (pos != next_pos) { + int grid_x = (pos.x() - bleft.x()) / gridsize - *left; + int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom; + SET_DATA_BIT(data + grid_y * wpl, grid_x); + pos += major_step; + accumulator += minor; + if (accumulator >= major) { + accumulator -= major; + pos += minor_step; + } + } + } + return pix; +} + +} // namespace tesseract.
