Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/scanedg.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: scanedg.cpp (Formerly scanedge.c) | |
| 3 * Description: Raster scanning crack based edge extractor. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1991, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #include "scanedg.h" | |
| 20 | |
| 21 #include "crakedge.h" | |
| 22 #include "edgloop.h" | |
| 23 #include "pdblock.h" | |
| 24 | |
| 25 #include <allheaders.h> | |
| 26 | |
| 27 #include <memory> // std::unique_ptr | |
| 28 | |
| 29 namespace tesseract { | |
| 30 | |
| 31 #define WHITE_PIX 1 /*thresholded colours */ | |
| 32 #define BLACK_PIX 0 | |
| 33 // Flips between WHITE_PIX and BLACK_PIX. | |
| 34 #define FLIP_COLOUR(pix) (1 - (pix)) | |
| 35 | |
| 36 struct CrackPos { | |
| 37 CRACKEDGE **free_cracks; // Freelist for fast allocation. | |
| 38 int x; // Position of new edge. | |
| 39 int y; | |
| 40 }; | |
| 41 | |
| 42 static void free_crackedges(CRACKEDGE *start); | |
| 43 | |
| 44 static void join_edges(CRACKEDGE *edge1, CRACKEDGE *edge2, CRACKEDGE **free_cracks, | |
| 45 C_OUTLINE_IT *outline_it); | |
| 46 | |
| 47 static void line_edges(TDimension x, TDimension y, TDimension xext, uint8_t uppercolour, uint8_t *bwpos, | |
| 48 CRACKEDGE **prevline, CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it); | |
| 49 | |
| 50 static void make_margins(PDBLK *block, BLOCK_LINE_IT *line_it, uint8_t *pixels, uint8_t margin, | |
| 51 TDimension left, TDimension right, TDimension y); | |
| 52 | |
| 53 static CRACKEDGE *h_edge(int sign, CRACKEDGE *join, CrackPos *pos); | |
| 54 static CRACKEDGE *v_edge(int sign, CRACKEDGE *join, CrackPos *pos); | |
| 55 | |
| 56 /********************************************************************** | |
| 57 * block_edges | |
| 58 * | |
| 59 * Extract edges from a PDBLK. | |
| 60 **********************************************************************/ | |
| 61 | |
| 62 void block_edges(Image t_pix, // thresholded image | |
| 63 PDBLK *block, // block in image | |
| 64 C_OUTLINE_IT *outline_it) { | |
| 65 ICOORD bleft; // bounding box | |
| 66 ICOORD tright; | |
| 67 BLOCK_LINE_IT line_it = block; // line iterator | |
| 68 | |
| 69 int width = pixGetWidth(t_pix); | |
| 70 int height = pixGetHeight(t_pix); | |
| 71 int wpl = pixGetWpl(t_pix); | |
| 72 // lines in progress | |
| 73 std::unique_ptr<CRACKEDGE *[]> ptrline(new CRACKEDGE *[width + 1]); | |
| 74 CRACKEDGE *free_cracks = nullptr; | |
| 75 | |
| 76 block->bounding_box(bleft, tright); // block box | |
| 77 ASSERT_HOST(tright.x() <= width); | |
| 78 ASSERT_HOST(tright.y() <= height); | |
| 79 int block_width = tright.x() - bleft.x(); | |
| 80 for (int x = block_width; x >= 0; x--) { | |
| 81 ptrline[x] = nullptr; // no lines in progress | |
| 82 } | |
| 83 | |
| 84 std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]); | |
| 85 | |
| 86 const uint8_t margin = WHITE_PIX; | |
| 87 | |
| 88 for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) { | |
| 89 if (y >= bleft.y() && y < tright.y()) { | |
| 90 // Get the binary pixels from the image. | |
| 91 l_uint32 *line = pixGetData(t_pix) + wpl * (height - 1 - y); | |
| 92 for (int x = 0; x < block_width; ++x) { | |
| 93 bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1; | |
| 94 } | |
| 95 make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y); | |
| 96 } else { | |
| 97 memset(bwline.get(), margin, block_width * sizeof(bwline[0])); | |
| 98 } | |
| 99 line_edges(bleft.x(), y, block_width, margin, bwline.get(), ptrline.get(), &free_cracks, | |
| 100 outline_it); | |
| 101 } | |
| 102 | |
| 103 free_crackedges(free_cracks); // really free them | |
| 104 } | |
| 105 | |
| 106 /********************************************************************** | |
| 107 * make_margins | |
| 108 * | |
| 109 * Get an image line and set to margin non-text pixels. | |
| 110 **********************************************************************/ | |
| 111 | |
| 112 static void make_margins( // get a line | |
| 113 PDBLK *block, // block in image | |
| 114 BLOCK_LINE_IT *line_it, // for old style | |
| 115 uint8_t *pixels, // pixels to strip | |
| 116 uint8_t margin, // white-out pixel | |
| 117 TDimension left, // block edges | |
| 118 TDimension right, | |
| 119 TDimension y // line coord | |
| 120 ) { | |
| 121 ICOORDELT_IT seg_it; | |
| 122 | |
| 123 if (block->poly_block() != nullptr) { | |
| 124 std::unique_ptr<PB_LINE_IT> lines(new PB_LINE_IT(block->poly_block())); | |
| 125 const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y)); | |
| 126 if (!segments->empty()) { | |
| 127 seg_it.set_to_list(segments.get()); | |
| 128 seg_it.mark_cycle_pt(); | |
| 129 auto start = seg_it.data()->x(); | |
| 130 auto xext = seg_it.data()->y(); | |
| 131 for (auto xindex = left; xindex < right; xindex++) { | |
| 132 if (xindex >= start && !seg_it.cycled_list()) { | |
| 133 xindex = start + xext - 1; | |
| 134 seg_it.forward(); | |
| 135 start = seg_it.data()->x(); | |
| 136 xext = seg_it.data()->y(); | |
| 137 } else { | |
| 138 pixels[xindex - left] = margin; | |
| 139 } | |
| 140 } | |
| 141 } else { | |
| 142 for (auto xindex = left; xindex < right; xindex++) { | |
| 143 pixels[xindex - left] = margin; | |
| 144 } | |
| 145 } | |
| 146 } else { | |
| 147 TDimension xext; // of segment | |
| 148 auto start = line_it->get_line(y, xext); | |
| 149 for (auto xindex = left; xindex < start; xindex++) { | |
| 150 pixels[xindex - left] = margin; | |
| 151 } | |
| 152 for (auto xindex = start + xext; xindex < right; xindex++) { | |
| 153 pixels[xindex - left] = margin; | |
| 154 } | |
| 155 } | |
| 156 } | |
| 157 | |
| 158 /********************************************************************** | |
| 159 * line_edges | |
| 160 * | |
| 161 * Scan a line for edges and update the edges in progress. | |
| 162 * When edges close into loops, send them for approximation. | |
| 163 **********************************************************************/ | |
| 164 | |
| 165 static void line_edges(TDimension x, // coord of line start | |
| 166 TDimension y, // coord of line | |
| 167 TDimension xext, // width of line | |
| 168 uint8_t uppercolour, // start of prev line | |
| 169 uint8_t *bwpos, // thresholded line | |
| 170 CRACKEDGE **prevline, // edges in progress | |
| 171 CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it) { | |
| 172 CrackPos pos = {free_cracks, x, y}; | |
| 173 int xmax; // max x coord | |
| 174 int prevcolour; // of previous pixel | |
| 175 CRACKEDGE *current; // current h edge | |
| 176 CRACKEDGE *newcurrent; // new h edge | |
| 177 | |
| 178 xmax = x + xext; // max allowable coord | |
| 179 prevcolour = uppercolour; // forced plain margin | |
| 180 current = nullptr; // nothing yet | |
| 181 | |
| 182 // do each pixel | |
| 183 for (; pos.x < xmax; pos.x++, prevline++) { | |
| 184 const int colour = *bwpos++; // current pixel | |
| 185 if (*prevline != nullptr) { | |
| 186 // changed above | |
| 187 // change colour | |
| 188 uppercolour = FLIP_COLOUR(uppercolour); | |
| 189 if (colour == prevcolour) { | |
| 190 if (colour == uppercolour) { | |
| 191 // finish a line | |
| 192 join_edges(current, *prevline, free_cracks, outline_it); | |
| 193 current = nullptr; // no edge now | |
| 194 } else { | |
| 195 // new horiz edge | |
| 196 current = h_edge(uppercolour - colour, *prevline, &pos); | |
| 197 } | |
| 198 *prevline = nullptr; // no change this time | |
| 199 } else { | |
| 200 if (colour == uppercolour) { | |
| 201 *prevline = v_edge(colour - prevcolour, *prevline, &pos); | |
| 202 // 8 vs 4 connection | |
| 203 } else if (colour == WHITE_PIX) { | |
| 204 join_edges(current, *prevline, free_cracks, outline_it); | |
| 205 current = h_edge(uppercolour - colour, nullptr, &pos); | |
| 206 *prevline = v_edge(colour - prevcolour, current, &pos); | |
| 207 } else { | |
| 208 newcurrent = h_edge(uppercolour - colour, *prevline, &pos); | |
| 209 *prevline = v_edge(colour - prevcolour, current, &pos); | |
| 210 current = newcurrent; // right going h edge | |
| 211 } | |
| 212 prevcolour = colour; // remember new colour | |
| 213 } | |
| 214 } else { | |
| 215 if (colour != prevcolour) { | |
| 216 *prevline = current = v_edge(colour - prevcolour, current, &pos); | |
| 217 prevcolour = colour; | |
| 218 } | |
| 219 if (colour != uppercolour) { | |
| 220 current = h_edge(uppercolour - colour, current, &pos); | |
| 221 } else { | |
| 222 current = nullptr; // no edge now | |
| 223 } | |
| 224 } | |
| 225 } | |
| 226 if (current != nullptr) { | |
| 227 // out of block | |
| 228 if (*prevline != nullptr) { // got one to join to? | |
| 229 join_edges(current, *prevline, free_cracks, outline_it); | |
| 230 *prevline = nullptr; // tidy now | |
| 231 } else { | |
| 232 // fake vertical | |
| 233 *prevline = v_edge(FLIP_COLOUR(prevcolour) - prevcolour, current, &pos); | |
| 234 } | |
| 235 } else if (*prevline != nullptr) { | |
| 236 // continue fake | |
| 237 *prevline = v_edge(FLIP_COLOUR(prevcolour) - prevcolour, *prevline, &pos); | |
| 238 } | |
| 239 } | |
| 240 | |
| 241 /********************************************************************** | |
| 242 * h_edge | |
| 243 * | |
| 244 * Create a new horizontal CRACKEDGE and join it to the given edge. | |
| 245 **********************************************************************/ | |
| 246 | |
| 247 static CRACKEDGE *h_edge(int sign, // sign of edge | |
| 248 CRACKEDGE *join, // edge to join to | |
| 249 CrackPos *pos) { | |
| 250 CRACKEDGE *newpt; // return value | |
| 251 | |
| 252 if (*pos->free_cracks != nullptr) { | |
| 253 newpt = *pos->free_cracks; | |
| 254 *pos->free_cracks = newpt->next; // get one fast | |
| 255 } else { | |
| 256 newpt = new CRACKEDGE; | |
| 257 } | |
| 258 newpt->pos.set_y(pos->y + 1); // coords of pt | |
| 259 newpt->stepy = 0; // edge is horizontal | |
| 260 | |
| 261 if (sign > 0) { | |
| 262 newpt->pos.set_x(pos->x + 1); // start location | |
| 263 newpt->stepx = -1; | |
| 264 newpt->stepdir = 0; | |
| 265 } else { | |
| 266 newpt->pos.set_x(pos->x); // start location | |
| 267 newpt->stepx = 1; | |
| 268 newpt->stepdir = 2; | |
| 269 } | |
| 270 | |
| 271 if (join == nullptr) { | |
| 272 newpt->next = newpt; // ptrs to other ends | |
| 273 newpt->prev = newpt; | |
| 274 } else { | |
| 275 if (newpt->pos.x() + newpt->stepx == join->pos.x() && newpt->pos.y() == join->pos.y()) { | |
| 276 newpt->prev = join->prev; // update other ends | |
| 277 newpt->prev->next = newpt; | |
| 278 newpt->next = join; // join up | |
| 279 join->prev = newpt; | |
| 280 } else { | |
| 281 newpt->next = join->next; // update other ends | |
| 282 newpt->next->prev = newpt; | |
| 283 newpt->prev = join; // join up | |
| 284 join->next = newpt; | |
| 285 } | |
| 286 } | |
| 287 return newpt; | |
| 288 } | |
| 289 | |
| 290 /********************************************************************** | |
| 291 * v_edge | |
| 292 * | |
| 293 * Create a new vertical CRACKEDGE and join it to the given edge. | |
| 294 **********************************************************************/ | |
| 295 | |
| 296 static CRACKEDGE *v_edge(int sign, // sign of edge | |
| 297 CRACKEDGE *join, CrackPos *pos) { | |
| 298 CRACKEDGE *newpt; // return value | |
| 299 | |
| 300 if (*pos->free_cracks != nullptr) { | |
| 301 newpt = *pos->free_cracks; | |
| 302 *pos->free_cracks = newpt->next; // get one fast | |
| 303 } else { | |
| 304 newpt = new CRACKEDGE; | |
| 305 } | |
| 306 newpt->pos.set_x(pos->x); // coords of pt | |
| 307 newpt->stepx = 0; // edge is vertical | |
| 308 | |
| 309 if (sign > 0) { | |
| 310 newpt->pos.set_y(pos->y); // start location | |
| 311 newpt->stepy = 1; | |
| 312 newpt->stepdir = 3; | |
| 313 } else { | |
| 314 newpt->pos.set_y(pos->y + 1); // start location | |
| 315 newpt->stepy = -1; | |
| 316 newpt->stepdir = 1; | |
| 317 } | |
| 318 | |
| 319 if (join == nullptr) { | |
| 320 newpt->next = newpt; // ptrs to other ends | |
| 321 newpt->prev = newpt; | |
| 322 } else { | |
| 323 if (newpt->pos.x() == join->pos.x() && newpt->pos.y() + newpt->stepy == join->pos.y()) { | |
| 324 newpt->prev = join->prev; // update other ends | |
| 325 newpt->prev->next = newpt; | |
| 326 newpt->next = join; // join up | |
| 327 join->prev = newpt; | |
| 328 } else { | |
| 329 newpt->next = join->next; // update other ends | |
| 330 newpt->next->prev = newpt; | |
| 331 newpt->prev = join; // join up | |
| 332 join->next = newpt; | |
| 333 } | |
| 334 } | |
| 335 return newpt; | |
| 336 } | |
| 337 | |
| 338 /********************************************************************** | |
| 339 * join_edges | |
| 340 * | |
| 341 * Join 2 edges together. Send the outline for approximation when a | |
| 342 * closed loop is formed. | |
| 343 **********************************************************************/ | |
| 344 | |
| 345 static void join_edges(CRACKEDGE *edge1, // edges to join | |
| 346 CRACKEDGE *edge2, // no specific order | |
| 347 CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it) { | |
| 348 if (edge1->pos.x() + edge1->stepx != edge2->pos.x() || | |
| 349 edge1->pos.y() + edge1->stepy != edge2->pos.y()) { | |
| 350 CRACKEDGE *tempedge = edge1; | |
| 351 edge1 = edge2; // swap around | |
| 352 edge2 = tempedge; | |
| 353 } | |
| 354 | |
| 355 if (edge1->next == edge2) { | |
| 356 // already closed | |
| 357 complete_edge(edge1, outline_it); | |
| 358 // attach freelist to end | |
| 359 edge1->prev->next = *free_cracks; | |
| 360 *free_cracks = edge1; // and free list | |
| 361 } else { | |
| 362 // update opposite ends | |
| 363 edge2->prev->next = edge1->next; | |
| 364 edge1->next->prev = edge2->prev; | |
| 365 edge1->next = edge2; // make joins | |
| 366 edge2->prev = edge1; | |
| 367 } | |
| 368 } | |
| 369 | |
| 370 /********************************************************************** | |
| 371 * free_crackedges | |
| 372 * | |
| 373 * Really free the CRACKEDGEs by giving them back to delete. | |
| 374 **********************************************************************/ | |
| 375 | |
| 376 static void free_crackedges(CRACKEDGE *start) { | |
| 377 CRACKEDGE *current; // current edge to free | |
| 378 CRACKEDGE *next; // next one to free | |
| 379 | |
| 380 for (current = start; current != nullptr; current = next) { | |
| 381 next = current->next; | |
| 382 delete current; // delete them all | |
| 383 } | |
| 384 } | |
| 385 | |
| 386 } // namespace tesseract |
