Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/wordrec/findseam.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/wordrec/findseam.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,350 @@ +/****************************************************************************** + * + * File: findseam.cpp (Formerly findseam.c) + * Author: Mark Seaman, OCR Technology + * + * (c) Copyright 1987, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + *****************************************************************************/ +/*---------------------------------------------------------------------- + I n c l u d e s +----------------------------------------------------------------------*/ +#include "findseam.h" +#include "outlines.h" +#include "plotedges.h" +#include "seam.h" +#include "wordrec.h" + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +# include "config_auto.h" +#endif + +/********************************************************************** + * partial_split_priority + * + * Assign a priority to this split based on the features that it has. + * Grade it according to the different rating schemes and return the + * value of its goodness. + **********************************************************************/ + +#define partial_split_priority(split) (grade_split_length(split) + grade_sharpness(split)) + +/*---------------------------------------------------------------------- + T y p e s +----------------------------------------------------------------------*/ +#define SPLIT_CLOSENESS 20 /* Difference in x value */ + /* How many to keep */ +#define MAX_NUM_SEAMS 150 +/* How many to keep */ +#define NO_FULL_PRIORITY (-1) // Special marker for pri. + /* Evaluate right away */ +#define BAD_PRIORITY 9999.0 + +/*---------------------------------------------------------------------- + F u n c t i o n s +----------------------------------------------------------------------*/ +namespace tesseract { + +/********************************************************************** + * add_seam_to_queue + * + * Adds the given new_seam to the seams priority queue, unless it is full + * and the new seam is worse than the worst. + **********************************************************************/ +void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams) { + if (new_seam == nullptr) { + return; + } + if (chop_debug) { + tprintf("Pushing new seam with priority %g :", new_priority); + new_seam->Print("seam: "); + } + if (seams->size() >= MAX_NUM_SEAMS) { + SeamPair old_pair(0, nullptr); + if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) { + if (chop_debug) { + tprintf("Old seam staying with priority %g\n", old_pair.key()); + } + delete new_seam; + seams->Push(&old_pair); + return; + } else if (chop_debug) { + tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority, + old_pair.key()); + } + } + SeamPair new_pair(new_priority, new_seam); + seams->Push(&new_pair); +} + +/********************************************************************** + * choose_best_seam + * + * Choose the best seam that can be created by assembling this a + * collection of splits. A queue of all the possible seams is + * maintained. Each new split received is placed in that queue with + * its partial priority value. These values in the seam queue are + * evaluated and combined until a good enough seam is found. If no + * further good seams are being found then this function returns to the + * caller, who will send more splits. If this function is called with + * a split of nullptr, then no further splits can be supplied by the + * caller. + **********************************************************************/ +void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, + SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) { + SEAM *seam; + float my_priority; + /* Add seam of split */ + my_priority = priority; + if (split != nullptr) { + TPOINT split_point = split->point1->pos; + split_point += split->point2->pos; + split_point /= 2; + seam = new SEAM(my_priority, split_point, *split); + if (chop_debug > 1) { + seam->Print("Partial priority "); + } + add_seam_to_queue(my_priority, seam, seam_queue); + + if (my_priority > chop_good_split) { + return; + } + } + + TBOX bbox = blob->bounding_box(); + /* Queue loop */ + while (!seam_queue->empty()) { + SeamPair seam_pair; + seam_queue->Pop(&seam_pair); + seam = seam_pair.extract_data(); + /* Set full priority */ + my_priority = + seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth, + chop_center_knob, chop_width_change_knob); + if (chop_debug) { + char str[80]; + snprintf(str, sizeof(str), "Full my_priority %0.0f, ", my_priority); + seam->Print(str); + } + + if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) && + my_priority < chop_ok_split) { + /* No crossing */ + if (seam->IsHealthy(*blob, chop_min_outline_points, chop_min_outline_area)) { + delete *seam_result; + *seam_result = new SEAM(*seam); + (*seam_result)->set_priority(my_priority); + } else { + delete seam; + seam = nullptr; + my_priority = BAD_PRIORITY; + } + } + + if (my_priority < chop_good_split) { + delete seam; + return; /* Made good answer */ + } + + if (seam) { + /* Combine with others */ + if (seam_pile->size() < chop_seam_pile_size) { + combine_seam(*seam_pile, seam, seam_queue); + SeamDecPair pair(seam_pair.key(), seam); + seam_pile->Push(&pair); + } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size && + seam_pile->PeekTop().key() > seam_pair.key()) { + combine_seam(*seam_pile, seam, seam_queue); + SeamDecPair pair; + seam_pile->Pop(&pair); // pop the worst. + // Replace the seam in pair (deleting the old one) with + // the new seam and score, then push back into the heap. + pair.set_key(seam_pair.key()); + pair.set_data(seam); + seam_pile->Push(&pair); + } else { + delete seam; + } + } + + my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key(); + if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) { + return; + } + } +} + +/********************************************************************** + * combine_seam + * + * Find other seams to combine with this one. The new seams that result + * from this union should be added to the seam queue. The return value + * tells whether or not any additional seams were added to the queue. + **********************************************************************/ +void Wordrec::combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) { + for (int x = 0; x < seam_pile.size(); ++x) { + const SEAM *this_one = seam_pile.get(x).data(); + if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) { + SEAM *new_one = new SEAM(*seam); + new_one->CombineWith(*this_one); + if (chop_debug > 1) { + new_one->Print("Combo priority "); + } + add_seam_to_queue(new_one->priority(), new_one, seam_queue); + } + } +} + +/********************************************************************** + * pick_good_seam + * + * Find and return a good seam that will split this blob into two pieces. + * Work from the outlines provided. + **********************************************************************/ +SEAM *Wordrec::pick_good_seam(TBLOB *blob) { + SeamPile seam_pile(chop_seam_pile_size); + EDGEPT *points[MAX_NUM_POINTS]; + EDGEPT_CLIST new_points; + SEAM *seam = nullptr; + TESSLINE *outline; + int16_t num_points = 0; + +#ifndef GRAPHICS_DISABLED + if (chop_debug > 2) { + wordrec_display_splits.set_value(true); + } + + draw_blob_edges(blob); +#endif + + PointHeap point_heap(MAX_NUM_POINTS); + for (outline = blob->outlines; outline; outline = outline->next) { + prioritize_points(outline, &point_heap); + } + + while (!point_heap.empty() && num_points < MAX_NUM_POINTS) { + points[num_points++] = point_heap.PeekTop().data(); + point_heap.Pop(nullptr); + } + + /* Initialize queue */ + SeamQueue seam_queue(MAX_NUM_SEAMS); + + try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob); + try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob); + + if (seam == nullptr) { + choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile); + } else if (seam->priority() > chop_good_split) { + choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile); + } + + EDGEPT_C_IT it(&new_points); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + EDGEPT *inserted_point = it.data(); + if (seam == nullptr || !seam->UsesPoint(inserted_point)) { + for (outline = blob->outlines; outline; outline = outline->next) { + if (outline->loop == inserted_point) { + outline->loop = outline->loop->next; + } + } + remove_edgept(inserted_point); + } + } + + if (seam) { + if (seam->priority() > chop_ok_split) { + delete seam; + seam = nullptr; + } +#ifndef GRAPHICS_DISABLED + else if (wordrec_display_splits) { + seam->Mark(edge_window); + if (chop_debug > 2) { + edge_window->Update(); + edge_window->Wait(); + } + } +#endif + } + + if (chop_debug) { + wordrec_display_splits.set_value(false); + } + + return (seam); +} + +/********************************************************************** + * try_point_pairs + * + * Try all the splits that are produced by pairing critical points + * together. See if any of them are suitable for use. Use a seam + * queue and seam pile that have already been initialized and used. + **********************************************************************/ +void Wordrec::try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, + SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, + TBLOB *blob) { + int16_t x; + int16_t y; + PRIORITY priority; + + for (x = 0; x < num_points; x++) { + for (y = x + 1; y < num_points; y++) { + if (points[y] && + points[x]->WeightedDistance(*points[y], chop_x_y_weight) < chop_split_length && + points[x] != points[y]->next && points[y] != points[x]->next && + !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) { + SPLIT split(points[x], points[y]); + priority = partial_split_priority(&split); + + choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); + } + } + } +} + +/********************************************************************** + * try_vertical_splits + * + * Try all the splits that are produced by vertical projection to see + * if any of them are suitable for use. Use a seam queue and seam pile + * that have already been initialized and used. + * Return in new_points a collection of points that were inserted into + * the blob while examining vertical splits and which may safely be + * removed once a seam is chosen if they are not part of the seam. + **********************************************************************/ +void Wordrec::try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, + EDGEPT_CLIST *new_points, SeamQueue *seam_queue, + SeamPile *seam_pile, SEAM **seam, TBLOB *blob) { + EDGEPT *vertical_point = nullptr; + int16_t x; + PRIORITY priority; + TESSLINE *outline; + + for (x = 0; x < num_points; x++) { + vertical_point = nullptr; + for (outline = blob->outlines; outline; outline = outline->next) { + vertical_projection_point(points[x], outline->loop, &vertical_point, new_points); + } + + if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next && + points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < chop_split_length) { + SPLIT split(points[x], vertical_point); + priority = partial_split_priority(&split); + choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); + } + } +} + +} // namespace tesseract
