Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/bbgrid.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /////////////////////////////////////////////////////////////////////// | |
| 2 // File: bbgrid.cpp | |
| 3 // Description: Class to hold BLOBNBOXs in a grid for fast access | |
| 4 // to neighbours. | |
| 5 // Author: Ray Smith | |
| 6 // | |
| 7 // (C) Copyright 2007, Google Inc. | |
| 8 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 9 // you may not use this file except in compliance with the License. | |
| 10 // You may obtain a copy of the License at | |
| 11 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 // Unless required by applicable law or agreed to in writing, software | |
| 13 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 15 // See the License for the specific language governing permissions and | |
| 16 // limitations under the License. | |
| 17 // | |
| 18 /////////////////////////////////////////////////////////////////////// | |
| 19 | |
| 20 #include "bbgrid.h" | |
| 21 #include "helpers.h" | |
| 22 #include "ocrblock.h" | |
| 23 | |
| 24 namespace tesseract { | |
| 25 | |
| 26 /////////////////////////////////////////////////////////////////////// | |
| 27 // BBGrid IMPLEMENTATION. | |
| 28 /////////////////////////////////////////////////////////////////////// | |
| 29 GridBase::GridBase(int gridsize, const ICOORD &bleft, const ICOORD &tright) { | |
| 30 Init(gridsize, bleft, tright); | |
| 31 } | |
| 32 | |
| 33 // Destructor. | |
| 34 // It is defined here, so the compiler can create a single vtable | |
| 35 // instead of weak vtables in every compilation unit. | |
| 36 GridBase::~GridBase() = default; | |
| 37 | |
| 38 // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, | |
| 39 // and bleft, tright are the bounding box of everything to go in it. | |
| 40 void GridBase::Init(int gridsize, const ICOORD &bleft, const ICOORD &tright) { | |
| 41 gridsize_ = gridsize; | |
| 42 bleft_ = bleft; | |
| 43 tright_ = tright; | |
| 44 if (gridsize_ == 0) { | |
| 45 gridsize_ = 1; | |
| 46 } | |
| 47 gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_; | |
| 48 gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_; | |
| 49 gridbuckets_ = gridwidth_ * gridheight_; | |
| 50 } | |
| 51 | |
| 52 // Compute the given grid coordinates from image coords. | |
| 53 void GridBase::GridCoords(int x, int y, int *grid_x, int *grid_y) const { | |
| 54 *grid_x = (x - bleft_.x()) / gridsize_; | |
| 55 *grid_y = (y - bleft_.y()) / gridsize_; | |
| 56 ClipGridCoords(grid_x, grid_y); | |
| 57 } | |
| 58 | |
| 59 // Clip the given grid coordinates to fit within the grid. | |
| 60 void GridBase::ClipGridCoords(int *x, int *y) const { | |
| 61 *x = ClipToRange(*x, 0, gridwidth_ - 1); | |
| 62 *y = ClipToRange(*y, 0, gridheight_ - 1); | |
| 63 } | |
| 64 | |
| 65 IntGrid::IntGrid() { | |
| 66 grid_ = nullptr; | |
| 67 } | |
| 68 | |
| 69 IntGrid::IntGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright) : grid_(nullptr) { | |
| 70 Init(gridsize, bleft, tright); | |
| 71 } | |
| 72 | |
| 73 IntGrid::~IntGrid() { | |
| 74 delete[] grid_; | |
| 75 } | |
| 76 | |
| 77 // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, | |
| 78 // and bleft, tright are the bounding box of everything to go in it. | |
| 79 void IntGrid::Init(int gridsize, const ICOORD &bleft, const ICOORD &tright) { | |
| 80 GridBase::Init(gridsize, bleft, tright); | |
| 81 delete[] grid_; | |
| 82 grid_ = new int[gridbuckets_]; | |
| 83 Clear(); | |
| 84 } | |
| 85 | |
| 86 // Clear all the ints in the grid to zero. | |
| 87 void IntGrid::Clear() { | |
| 88 for (int i = 0; i < gridbuckets_; ++i) { | |
| 89 grid_[i] = 0; | |
| 90 } | |
| 91 } | |
| 92 | |
| 93 // Rotate the grid by rotation, keeping cell contents. | |
| 94 // rotation must be a multiple of 90 degrees. | |
| 95 // NOTE: due to partial cells, cell coverage in the rotated grid will be | |
| 96 // inexact. This is why there is no Rotate for the generic BBGrid. | |
| 97 // TODO(rays) investigate fixing this inaccuracy by moving the origin after | |
| 98 // rotation. | |
| 99 void IntGrid::Rotate(const FCOORD &rotation) { | |
| 100 ASSERT_HOST(rotation.x() == 0.0f || rotation.y() == 0.0f); | |
| 101 ICOORD old_bleft(bleft()); | |
| 102 // ICOORD old_tright(tright()); | |
| 103 int old_width = gridwidth(); | |
| 104 int old_height = gridheight(); | |
| 105 TBOX box(bleft(), tright()); | |
| 106 box.rotate(rotation); | |
| 107 int *old_grid = grid_; | |
| 108 grid_ = nullptr; | |
| 109 Init(gridsize(), box.botleft(), box.topright()); | |
| 110 // Iterate over the old grid, copying data to the rotated position in the new. | |
| 111 int oldi = 0; | |
| 112 FCOORD x_step(rotation); | |
| 113 x_step *= gridsize(); | |
| 114 for (int oldy = 0; oldy < old_height; ++oldy) { | |
| 115 FCOORD line_pos(old_bleft.x(), old_bleft.y() + gridsize() * oldy); | |
| 116 line_pos.rotate(rotation); | |
| 117 for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) { | |
| 118 int grid_x, grid_y; | |
| 119 GridCoords(static_cast<int>(line_pos.x() + 0.5), static_cast<int>(line_pos.y() + 0.5), | |
| 120 &grid_x, &grid_y); | |
| 121 grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi]; | |
| 122 } | |
| 123 } | |
| 124 delete[] old_grid; | |
| 125 } | |
| 126 | |
| 127 // Returns a new IntGrid containing values equal to the sum of all the | |
| 128 // neighbouring cells. The returned grid must be deleted after use. | |
| 129 // For ease of implementation, edge cells are double counted, to make them | |
| 130 // have the same range as the non-edge cells. | |
| 131 IntGrid *IntGrid::NeighbourhoodSum() const { | |
| 132 auto *sumgrid = new IntGrid(gridsize(), bleft(), tright()); | |
| 133 for (int y = 0; y < gridheight(); ++y) { | |
| 134 for (int x = 0; x < gridwidth(); ++x) { | |
| 135 int cell_count = 0; | |
| 136 for (int yoffset = -1; yoffset <= 1; ++yoffset) { | |
| 137 for (int xoffset = -1; xoffset <= 1; ++xoffset) { | |
| 138 int grid_x = x + xoffset; | |
| 139 int grid_y = y + yoffset; | |
| 140 ClipGridCoords(&grid_x, &grid_y); | |
| 141 cell_count += GridCellValue(grid_x, grid_y); | |
| 142 } | |
| 143 } | |
| 144 if (GridCellValue(x, y) > 1) { | |
| 145 sumgrid->SetGridCell(x, y, cell_count); | |
| 146 } | |
| 147 } | |
| 148 } | |
| 149 return sumgrid; | |
| 150 } | |
| 151 | |
| 152 // Returns true if more than half the area of the rect is covered by grid | |
| 153 // cells that are over the threshold. | |
| 154 bool IntGrid::RectMostlyOverThreshold(const TBOX &rect, int threshold) const { | |
| 155 int min_x, min_y, max_x, max_y; | |
| 156 GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); | |
| 157 GridCoords(rect.right(), rect.top(), &max_x, &max_y); | |
| 158 int total_area = 0; | |
| 159 for (int y = min_y; y <= max_y; ++y) { | |
| 160 for (int x = min_x; x <= max_x; ++x) { | |
| 161 int value = GridCellValue(x, y); | |
| 162 if (value > threshold) { | |
| 163 TBOX cell_box(x * gridsize_, y * gridsize_, (x + 1) * gridsize_, (y + 1) * gridsize_); | |
| 164 cell_box &= rect; // This is in-place box intersection. | |
| 165 total_area += cell_box.area(); | |
| 166 } | |
| 167 } | |
| 168 } | |
| 169 return total_area * 2 > rect.area(); | |
| 170 } | |
| 171 | |
| 172 // Returns true if any cell value in the given rectangle is zero. | |
| 173 bool IntGrid::AnyZeroInRect(const TBOX &rect) const { | |
| 174 int min_x, min_y, max_x, max_y; | |
| 175 GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); | |
| 176 GridCoords(rect.right(), rect.top(), &max_x, &max_y); | |
| 177 for (int y = min_y; y <= max_y; ++y) { | |
| 178 for (int x = min_x; x <= max_x; ++x) { | |
| 179 if (GridCellValue(x, y) == 0) { | |
| 180 return true; | |
| 181 } | |
| 182 } | |
| 183 } | |
| 184 return false; | |
| 185 } | |
| 186 | |
| 187 // Returns a full-resolution binary pix in which each cell over the given | |
| 188 // threshold is filled as a black square. pixDestroy after use. | |
| 189 // Edge cells, which have a zero 4-neighbour, are not marked. | |
| 190 Image IntGrid::ThresholdToPix(int threshold) const { | |
| 191 Image pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1); | |
| 192 int cellsize = gridsize(); | |
| 193 for (int y = 0; y < gridheight(); ++y) { | |
| 194 for (int x = 0; x < gridwidth(); ++x) { | |
| 195 if (GridCellValue(x, y) > threshold && GridCellValue(x - 1, y) > 0 && | |
| 196 GridCellValue(x + 1, y) > 0 && GridCellValue(x, y - 1) > 0 && | |
| 197 GridCellValue(x, y + 1) > 0) { | |
| 198 pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize), cellsize, cellsize, | |
| 199 PIX_SET, nullptr, 0, 0); | |
| 200 } | |
| 201 } | |
| 202 } | |
| 203 return pix; | |
| 204 } | |
| 205 | |
| 206 // Make a Pix of the correct scaled size for the TraceOutline functions. | |
| 207 static Image GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) { | |
| 208 // Compute grid bounds of the outline and pad all round by 1. | |
| 209 int grid_left = (box.left() - bleft.x()) / gridsize - 1; | |
| 210 int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1; | |
| 211 int grid_right = (box.right() - bleft.x()) / gridsize + 1; | |
| 212 int grid_top = (box.top() - bleft.y()) / gridsize + 1; | |
| 213 *left = grid_left; | |
| 214 *bottom = grid_bottom; | |
| 215 return pixCreate(grid_right - grid_left + 1, grid_top - grid_bottom + 1, 1); | |
| 216 } | |
| 217 | |
| 218 // Helper function to return a scaled Pix with one pixel per grid cell, | |
| 219 // set (black) where the given outline enters the corresponding grid cell, | |
| 220 // and clear where the outline does not touch the grid cell. | |
| 221 // Also returns the grid coords of the bottom-left of the Pix, in *left | |
| 222 // and *bottom, which corresponds to (0, 0) on the Pix. | |
| 223 // Note that the Pix is used upside-down, with (0, 0) being the bottom-left. | |
| 224 Image TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, | |
| 225 int *bottom) { | |
| 226 const TBOX &box = outline->bounding_box(); | |
| 227 Image pix = GridReducedPix(box, gridsize, bleft, left, bottom); | |
| 228 int wpl = pixGetWpl(pix); | |
| 229 l_uint32 *data = pixGetData(pix); | |
| 230 int length = outline->pathlength(); | |
| 231 ICOORD pos = outline->start_pos(); | |
| 232 for (int i = 0; i < length; ++i) { | |
| 233 int grid_x = (pos.x() - bleft.x()) / gridsize - *left; | |
| 234 int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom; | |
| 235 SET_DATA_BIT(data + grid_y * wpl, grid_x); | |
| 236 pos += outline->step(i); | |
| 237 } | |
| 238 return pix; | |
| 239 } | |
| 240 #if 0 // Example code shows how to use TraceOutlineOnReducedPix. | |
| 241 C_OUTLINE_IT ol_it(blob->cblob()->out_list()); | |
| 242 int grid_left, grid_bottom; | |
| 243 Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_, | |
| 244 &grid_left, &grid_bottom); | |
| 245 grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob); | |
| 246 pix.destroy(); | |
| 247 #endif | |
| 248 | |
| 249 // As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. | |
| 250 Image TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) { | |
| 251 const TBOX &box = block->pdblk.bounding_box(); | |
| 252 Image pix = GridReducedPix(box, gridsize, bleft, left, bottom); | |
| 253 int wpl = pixGetWpl(pix); | |
| 254 l_uint32 *data = pixGetData(pix); | |
| 255 ICOORDELT_IT it(block->pdblk.poly_block()->points()); | |
| 256 for (it.mark_cycle_pt(); !it.cycled_list();) { | |
| 257 ICOORD pos = *it.data(); | |
| 258 it.forward(); | |
| 259 ICOORD next_pos = *it.data(); | |
| 260 ICOORD line_vector = next_pos - pos; | |
| 261 int major, minor; | |
| 262 ICOORD major_step, minor_step; | |
| 263 line_vector.setup_render(&major_step, &minor_step, &major, &minor); | |
| 264 int accumulator = major / 2; | |
| 265 while (pos != next_pos) { | |
| 266 int grid_x = (pos.x() - bleft.x()) / gridsize - *left; | |
| 267 int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom; | |
| 268 SET_DATA_BIT(data + grid_y * wpl, grid_x); | |
| 269 pos += major_step; | |
| 270 accumulator += minor; | |
| 271 if (accumulator >= major) { | |
| 272 accumulator -= major; | |
| 273 pos += minor_step; | |
| 274 } | |
| 275 } | |
| 276 } | |
| 277 return pix; | |
| 278 } | |
| 279 | |
| 280 } // namespace tesseract. |
