Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/textord/scanedg.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/textord/scanedg.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,386 @@ +/********************************************************************** + * File: scanedg.cpp (Formerly scanedge.c) + * Description: Raster scanning crack based edge extractor. + * Author: Ray Smith + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "scanedg.h" + +#include "crakedge.h" +#include "edgloop.h" +#include "pdblock.h" + +#include <allheaders.h> + +#include <memory> // std::unique_ptr + +namespace tesseract { + +#define WHITE_PIX 1 /*thresholded colours */ +#define BLACK_PIX 0 +// Flips between WHITE_PIX and BLACK_PIX. +#define FLIP_COLOUR(pix) (1 - (pix)) + +struct CrackPos { + CRACKEDGE **free_cracks; // Freelist for fast allocation. + int x; // Position of new edge. + int y; +}; + +static void free_crackedges(CRACKEDGE *start); + +static void join_edges(CRACKEDGE *edge1, CRACKEDGE *edge2, CRACKEDGE **free_cracks, + C_OUTLINE_IT *outline_it); + +static void line_edges(TDimension x, TDimension y, TDimension xext, uint8_t uppercolour, uint8_t *bwpos, + CRACKEDGE **prevline, CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it); + +static void make_margins(PDBLK *block, BLOCK_LINE_IT *line_it, uint8_t *pixels, uint8_t margin, + TDimension left, TDimension right, TDimension y); + +static CRACKEDGE *h_edge(int sign, CRACKEDGE *join, CrackPos *pos); +static CRACKEDGE *v_edge(int sign, CRACKEDGE *join, CrackPos *pos); + +/********************************************************************** + * block_edges + * + * Extract edges from a PDBLK. + **********************************************************************/ + +void block_edges(Image t_pix, // thresholded image + PDBLK *block, // block in image + C_OUTLINE_IT *outline_it) { + ICOORD bleft; // bounding box + ICOORD tright; + BLOCK_LINE_IT line_it = block; // line iterator + + int width = pixGetWidth(t_pix); + int height = pixGetHeight(t_pix); + int wpl = pixGetWpl(t_pix); + // lines in progress + std::unique_ptr<CRACKEDGE *[]> ptrline(new CRACKEDGE *[width + 1]); + CRACKEDGE *free_cracks = nullptr; + + block->bounding_box(bleft, tright); // block box + ASSERT_HOST(tright.x() <= width); + ASSERT_HOST(tright.y() <= height); + int block_width = tright.x() - bleft.x(); + for (int x = block_width; x >= 0; x--) { + ptrline[x] = nullptr; // no lines in progress + } + + std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]); + + const uint8_t margin = WHITE_PIX; + + for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) { + if (y >= bleft.y() && y < tright.y()) { + // Get the binary pixels from the image. + l_uint32 *line = pixGetData(t_pix) + wpl * (height - 1 - y); + for (int x = 0; x < block_width; ++x) { + bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1; + } + make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y); + } else { + memset(bwline.get(), margin, block_width * sizeof(bwline[0])); + } + line_edges(bleft.x(), y, block_width, margin, bwline.get(), ptrline.get(), &free_cracks, + outline_it); + } + + free_crackedges(free_cracks); // really free them +} + +/********************************************************************** + * make_margins + * + * Get an image line and set to margin non-text pixels. + **********************************************************************/ + +static void make_margins( // get a line + PDBLK *block, // block in image + BLOCK_LINE_IT *line_it, // for old style + uint8_t *pixels, // pixels to strip + uint8_t margin, // white-out pixel + TDimension left, // block edges + TDimension right, + TDimension y // line coord +) { + ICOORDELT_IT seg_it; + + if (block->poly_block() != nullptr) { + std::unique_ptr<PB_LINE_IT> lines(new PB_LINE_IT(block->poly_block())); + const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y)); + if (!segments->empty()) { + seg_it.set_to_list(segments.get()); + seg_it.mark_cycle_pt(); + auto start = seg_it.data()->x(); + auto xext = seg_it.data()->y(); + for (auto xindex = left; xindex < right; xindex++) { + if (xindex >= start && !seg_it.cycled_list()) { + xindex = start + xext - 1; + seg_it.forward(); + start = seg_it.data()->x(); + xext = seg_it.data()->y(); + } else { + pixels[xindex - left] = margin; + } + } + } else { + for (auto xindex = left; xindex < right; xindex++) { + pixels[xindex - left] = margin; + } + } + } else { + TDimension xext; // of segment + auto start = line_it->get_line(y, xext); + for (auto xindex = left; xindex < start; xindex++) { + pixels[xindex - left] = margin; + } + for (auto xindex = start + xext; xindex < right; xindex++) { + pixels[xindex - left] = margin; + } + } +} + +/********************************************************************** + * line_edges + * + * Scan a line for edges and update the edges in progress. + * When edges close into loops, send them for approximation. + **********************************************************************/ + +static void line_edges(TDimension x, // coord of line start + TDimension y, // coord of line + TDimension xext, // width of line + uint8_t uppercolour, // start of prev line + uint8_t *bwpos, // thresholded line + CRACKEDGE **prevline, // edges in progress + CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it) { + CrackPos pos = {free_cracks, x, y}; + int xmax; // max x coord + int prevcolour; // of previous pixel + CRACKEDGE *current; // current h edge + CRACKEDGE *newcurrent; // new h edge + + xmax = x + xext; // max allowable coord + prevcolour = uppercolour; // forced plain margin + current = nullptr; // nothing yet + + // do each pixel + for (; pos.x < xmax; pos.x++, prevline++) { + const int colour = *bwpos++; // current pixel + if (*prevline != nullptr) { + // changed above + // change colour + uppercolour = FLIP_COLOUR(uppercolour); + if (colour == prevcolour) { + if (colour == uppercolour) { + // finish a line + join_edges(current, *prevline, free_cracks, outline_it); + current = nullptr; // no edge now + } else { + // new horiz edge + current = h_edge(uppercolour - colour, *prevline, &pos); + } + *prevline = nullptr; // no change this time + } else { + if (colour == uppercolour) { + *prevline = v_edge(colour - prevcolour, *prevline, &pos); + // 8 vs 4 connection + } else if (colour == WHITE_PIX) { + join_edges(current, *prevline, free_cracks, outline_it); + current = h_edge(uppercolour - colour, nullptr, &pos); + *prevline = v_edge(colour - prevcolour, current, &pos); + } else { + newcurrent = h_edge(uppercolour - colour, *prevline, &pos); + *prevline = v_edge(colour - prevcolour, current, &pos); + current = newcurrent; // right going h edge + } + prevcolour = colour; // remember new colour + } + } else { + if (colour != prevcolour) { + *prevline = current = v_edge(colour - prevcolour, current, &pos); + prevcolour = colour; + } + if (colour != uppercolour) { + current = h_edge(uppercolour - colour, current, &pos); + } else { + current = nullptr; // no edge now + } + } + } + if (current != nullptr) { + // out of block + if (*prevline != nullptr) { // got one to join to? + join_edges(current, *prevline, free_cracks, outline_it); + *prevline = nullptr; // tidy now + } else { + // fake vertical + *prevline = v_edge(FLIP_COLOUR(prevcolour) - prevcolour, current, &pos); + } + } else if (*prevline != nullptr) { + // continue fake + *prevline = v_edge(FLIP_COLOUR(prevcolour) - prevcolour, *prevline, &pos); + } +} + +/********************************************************************** + * h_edge + * + * Create a new horizontal CRACKEDGE and join it to the given edge. + **********************************************************************/ + +static CRACKEDGE *h_edge(int sign, // sign of edge + CRACKEDGE *join, // edge to join to + CrackPos *pos) { + CRACKEDGE *newpt; // return value + + if (*pos->free_cracks != nullptr) { + newpt = *pos->free_cracks; + *pos->free_cracks = newpt->next; // get one fast + } else { + newpt = new CRACKEDGE; + } + newpt->pos.set_y(pos->y + 1); // coords of pt + newpt->stepy = 0; // edge is horizontal + + if (sign > 0) { + newpt->pos.set_x(pos->x + 1); // start location + newpt->stepx = -1; + newpt->stepdir = 0; + } else { + newpt->pos.set_x(pos->x); // start location + newpt->stepx = 1; + newpt->stepdir = 2; + } + + if (join == nullptr) { + newpt->next = newpt; // ptrs to other ends + newpt->prev = newpt; + } else { + if (newpt->pos.x() + newpt->stepx == join->pos.x() && newpt->pos.y() == join->pos.y()) { + newpt->prev = join->prev; // update other ends + newpt->prev->next = newpt; + newpt->next = join; // join up + join->prev = newpt; + } else { + newpt->next = join->next; // update other ends + newpt->next->prev = newpt; + newpt->prev = join; // join up + join->next = newpt; + } + } + return newpt; +} + +/********************************************************************** + * v_edge + * + * Create a new vertical CRACKEDGE and join it to the given edge. + **********************************************************************/ + +static CRACKEDGE *v_edge(int sign, // sign of edge + CRACKEDGE *join, CrackPos *pos) { + CRACKEDGE *newpt; // return value + + if (*pos->free_cracks != nullptr) { + newpt = *pos->free_cracks; + *pos->free_cracks = newpt->next; // get one fast + } else { + newpt = new CRACKEDGE; + } + newpt->pos.set_x(pos->x); // coords of pt + newpt->stepx = 0; // edge is vertical + + if (sign > 0) { + newpt->pos.set_y(pos->y); // start location + newpt->stepy = 1; + newpt->stepdir = 3; + } else { + newpt->pos.set_y(pos->y + 1); // start location + newpt->stepy = -1; + newpt->stepdir = 1; + } + + if (join == nullptr) { + newpt->next = newpt; // ptrs to other ends + newpt->prev = newpt; + } else { + if (newpt->pos.x() == join->pos.x() && newpt->pos.y() + newpt->stepy == join->pos.y()) { + newpt->prev = join->prev; // update other ends + newpt->prev->next = newpt; + newpt->next = join; // join up + join->prev = newpt; + } else { + newpt->next = join->next; // update other ends + newpt->next->prev = newpt; + newpt->prev = join; // join up + join->next = newpt; + } + } + return newpt; +} + +/********************************************************************** + * join_edges + * + * Join 2 edges together. Send the outline for approximation when a + * closed loop is formed. + **********************************************************************/ + +static void join_edges(CRACKEDGE *edge1, // edges to join + CRACKEDGE *edge2, // no specific order + CRACKEDGE **free_cracks, C_OUTLINE_IT *outline_it) { + if (edge1->pos.x() + edge1->stepx != edge2->pos.x() || + edge1->pos.y() + edge1->stepy != edge2->pos.y()) { + CRACKEDGE *tempedge = edge1; + edge1 = edge2; // swap around + edge2 = tempedge; + } + + if (edge1->next == edge2) { + // already closed + complete_edge(edge1, outline_it); + // attach freelist to end + edge1->prev->next = *free_cracks; + *free_cracks = edge1; // and free list + } else { + // update opposite ends + edge2->prev->next = edge1->next; + edge1->next->prev = edge2->prev; + edge1->next = edge2; // make joins + edge2->prev = edge1; + } +} + +/********************************************************************** + * free_crackedges + * + * Really free the CRACKEDGEs by giving them back to delete. + **********************************************************************/ + +static void free_crackedges(CRACKEDGE *start) { + CRACKEDGE *current; // current edge to free + CRACKEDGE *next; // next one to free + + for (current = start; current != nullptr; current = next) { + next = current->next; + delete current; // delete them all + } +} + +} // namespace tesseract
