Python2/PyMuPDF: mupdf-source/thirdparty/tesseract/src/textord/colfind.h comparison

comparison mupdf-source/thirdparty/tesseract/src/textord/colfind.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.

author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children

comparison

equal deleted inserted replaced

-:1d09e1dec1d9
+:b50eed0cc0ef
+///////////////////////////////////////////////////////////////////////
+// File:        colfind.h
+// Description: Class to find columns in the grid of BLOBNBOXes.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+#ifndef TESSERACT_TEXTORD_COLFIND_H_
+#define TESSERACT_TEXTORD_COLFIND_H_
+#include "colpartitiongrid.h"
+#include "colpartitionset.h"
+#include "debugpixa.h"
+#include "imagefind.h"
+#include "ocrblock.h"
+#include "tabfind.h"
+#include "textlineprojection.h"
+class BLOCK_LIST;
+struct Boxa;
+struct Pixa;
+class DENORM;
+class ScrollView;
+class STATS;
+class TO_BLOCK;
+namespace tesseract {
+class ColPartitionSet;
+class ColPartitionSet_LIST;
+class ColSegment_LIST;
+class ColumnGroup_LIST;
+class LineSpacing;
+class StrokeWidth;
+class TempColumn_LIST;
+class EquationDetectBase;
+// The ColumnFinder class finds columns in the grid.
+class TESS_API ColumnFinder : public TabFind {
+public:
+// Gridsize is an estimate of the text size in the image. A suitable value
+// is in TO_BLOCK::line_size after find_components has been used to make
+// the blobs.
+// bleft and tright are the bounds of the image (rectangle) being processed.
+// vlines is a (possibly empty) list of TabVector and vertical_x and y are
+// the sum logical vertical vector produced by LineFinder::FindVerticalLines.
+// If cjk_script is true, then broken CJK characters are fixed during
+// layout analysis to assist in detecting horizontal vs vertically written
+// textlines.
+ColumnFinder(int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution,
+bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines,
+TabVector_LIST *hlines, int vertical_x, int vertical_y);
+~ColumnFinder() override;
+// Accessors for testing
+const DENORM *denorm() const {
+return denorm_;
+}
+const TextlineProjection *projection() const {
+return &projection_;
+}
+void set_cjk_script(bool is_cjk) {
+cjk_script_ = is_cjk;
+}
+// ======================================================================
+// The main function of ColumnFinder is broken into pieces to facilitate
+// optional insertion of orientation and script detection in an efficient
+// way. The calling sequence IS MANDATORY however, whether or not
+// OSD is being used:
+// 1. Construction.
+// 2. SetupAndFilterNoise.
+// 3. IsVerticallyAlignedText.
+// 4. CorrectOrientation.
+// 5. FindBlocks.
+// 6. Destruction. Use of a single column finder for multiple images does not
+//    make sense.
+// Throughout these steps, the ColPartitions are owned by part_grid_, which
+// means that it must be kept correct. Exception: big_parts_ owns its
+// own ColPartitions.
+// The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except
+// for a phase in FindBlocks before TransformToBlocks, when they become
+// owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX
+// indicates more of a betrothal for the majority of layout analysis, ie
+// which ColPartition will take ownership when the blobs are release from
+// the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that
+// are part of the image regions, as they are not on any TO_BLOCK list.
+// TODO(rays) break up column finder further into smaller classes, as
+// there is a lot more to it than column finding now.
+// ======================================================================
+// Performs initial processing on the blobs in the input_block:
+// Setup the part_grid, stroke_width_, nontext_map_.
+// Obvious noise blobs are filtered out and used to mark the nontext_map_.
+// Initial stroke-width analysis is used to get local text alignment
+// direction, so the textline projection_ map can be setup.
+// On return, IsVerticallyAlignedText may be called (now optionally) to
+// determine the gross textline alignment of the page.
+void SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block);
+// Tests for vertical alignment of text (returning true if so), and generates
+// a list of blobs (in osd_blobs) for orientation and script detection.
+// block is the single block for the whole page or rectangle to be OCRed.
+// Note that the vertical alignment may be due to text whose writing direction
+// is vertical, like say Japanese, or due to text whose writing direction is
+// horizontal but whose text appears vertically aligned because the image is
+// not the right way up.
+// find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
+bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block,
+BLOBNBOX_CLIST *osd_blobs);
+// Rotates the blobs and the TabVectors so that the gross writing direction
+// (text lines) are horizontal and lines are read down the page.
+// Applied rotation stored in rotation_.
+// A second rotation is calculated for application during recognition to
+// make the rotated blobs upright for recognition.
+// Subsequent rotation stored in text_rotation_.
+//
+// Arguments:
+//   vertical_text_lines is true if the text lines are vertical.
+//   recognition_rotation [0..3] is the number of anti-clockwise 90 degree
+//   rotations from osd required for the text to be upright and readable.
+void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation);
+// Finds blocks of text, image, rule line, table etc, returning them in the
+// blocks and to_blocks
+// (Each TO_BLOCK points to the basic BLOCK and adds more information.)
+// Image blocks are generated by a combination of photo_mask_pix (which may
+// NOT be nullptr) and the rejected text found during preliminary textline
+// finding.
+// The input_block is the result of a call to find_components, and contains
+// the blobs found in the image or rectangle to be OCRed. These blobs will be
+// removed and placed in the output blocks, while unused ones will be deleted.
+// If single_column is true, the input is treated as single column, but
+// it is still divided into blocks of equal line spacing/text size.
+// scaled_color is scaled down by scaled_factor from the input color image,
+// and may be nullptr if the input was not color.
+// grey_pix is optional, but if present must match the photo_mask_pix in size,
+// and must be a *real* grey image instead of binary_pix * 255.
+// thresholds_pix is expected to be present iff grey_pix is present and
+// can be an integer factor reduction of the grey_pix. It represents the
+// thresholds that were used to create the binary_pix from the grey_pix.
+// Small blobs that confuse the segmentation into lines are placed into
+// diacritic_blobs, with the intention that they be put into the most
+// appropriate word after the rest of layout analysis.
+// Returns -1 if the user hits the 'd' key in the blocks window while running
+// in debug mode, which requests a retry with more debug info.
+int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block,
+Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug,
+BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks);
+// Get the rotation required to deskew, and its inverse rotation.
+void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew);
+// Set the equation detection pointer.
+void SetEquationDetect(EquationDetectBase *detect);
+private:
+// Displays the blob and block bounding boxes in a window called Blocks.
+void DisplayBlocks(BLOCK_LIST *blocks);
+// Displays the column edges at each grid y coordinate defined by
+// best_columns_.
+void DisplayColumnBounds(PartSetVector *sets);
+////// Functions involved in determining the columns used on the page. /////
+// Sets up column_sets_ (the determined column layout at each horizontal
+// slice). Returns false if the page is empty.
+bool MakeColumns(bool single_column);
+// Attempt to improve the column_candidates by expanding the columns
+// and adding new partitions from the partition sets in src_sets.
+// Src_sets may be equal to column_candidates, in which case it will
+// use them as a source to improve themselves.
+void ImproveColumnCandidates(PartSetVector *src_sets, PartSetVector *column_sets);
+// Prints debug information on the column candidates.
+void PrintColumnCandidates(const char *title);
+// Finds the optimal set of columns that cover the entire image with as
+// few changes in column partition as possible.
+// Returns true if any part of the page is multi-column.
+bool AssignColumns(const PartSetVector &part_sets);
+// Finds the biggest range in part_sets_ that has no assigned column, but
+// column assignment is possible.
+bool BiggestUnassignedRange(int set_count, const bool *any_columns_possible, int *start,
+int *end);
+// Finds the modal compatible column_set_ index within the given range.
+int RangeModalColumnSet(int **column_set_costs, const int *assigned_costs, int start, int end);
+// Given that there are many column_set_id compatible columns in the range,
+// shrinks the range to the longest contiguous run of compatibility, allowing
+// gaps where no columns are possible, but not where competing columns are
+// possible.
+void ShrinkRangeToLongestRun(int **column_set_costs, const int *assigned_costs,
+const bool *any_columns_possible, int column_set_id, int *best_start,
+int *best_end);
+// Moves start in the direction of step, up to, but not including end while
+// the only incompatible regions are no more than kMaxIncompatibleColumnCount
+// in size, and the compatible regions beyond are bigger.
+void ExtendRangePastSmallGaps(int **column_set_costs, const int *assigned_costs,
+const bool *any_columns_possible, int column_set_id, int step,
+int end, int *start);
+// Assigns the given column_set_id to the part_sets_ in the given range.
+void AssignColumnToRange(int column_set_id, int start, int end, int **column_set_costs,
+int *assigned_costs);
+// Computes the mean_column_gap_.
+void ComputeMeanColumnGap(bool any_multi_column);
+//////// Functions that manipulate ColPartitions in the part_grid_ /////
+//////// to split, merge, find margins, and find types.  //////////////
+// Hoovers up all un-owned blobs and deletes them.
+// The rest get released from the block so the ColPartitions can pass
+// ownership to the output blocks.
+void ReleaseBlobsAndCleanupUnused(TO_BLOCK *block);
+// Splits partitions that cross columns where they have nothing in the gap.
+void GridSplitPartitions();
+// Merges partitions where there is vertical overlap, within a single column,
+// and the horizontal gap is small enough.
+void GridMergePartitions();
+// Inserts remaining noise blobs into the most applicable partition if any.
+// If there is no applicable partition, then the blobs are deleted.
+void InsertRemainingNoise(TO_BLOCK *block);
+// Remove partitions that come from horizontal lines that look like
+// underlines, but are not part of a table.
+void GridRemoveUnderlinePartitions();
+// Add horizontal line separators as partitions.
+void GridInsertHLinePartitions();
+// Add vertical line separators as partitions.
+void GridInsertVLinePartitions();
+// For every ColPartition in the grid, sets its type based on position
+// in the columns.
+void SetPartitionTypes();
+// Only images remain with multiple types in a run of partners.
+// Sets the type of all in the group to the maximum of the group.
+void SmoothPartnerRuns();
+//////// Functions that make the final output blocks             ///////
+// Helper functions for TransformToBlocks.
+// Add the part to the temp list in the correct order.
+void AddToTempPartList(ColPartition *part, ColPartition_CLIST *temp_list);
+// Add everything from the temp list to the work_set assuming correct order.
+void EmptyTempPartList(ColPartition_CLIST *temp_list, WorkingPartSet_LIST *work_set);
+// Transform the grid of partitions to the output blocks.
+void TransformToBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
+// Reflect the blob boxes (but not the outlines) in the y-axis so that
+// the blocks get created in the correct RTL order. Rotates the blobs
+// in the input_block and the bblobs list.
+// The reflection is undone in RotateAndReskewBlocks by
+// reflecting the blocks themselves, and then recomputing the blob bounding
+//  boxes.
+void ReflectForRtl(TO_BLOCK *input_block, BLOBNBOX_LIST *bblobs);
+// Undo the deskew that was done in FindTabVectors, as recognition is done
+// without correcting blobs or blob outlines for skew.
+// Reskew the completed blocks to put them back to the original rotated coords
+// that were created by CorrectOrientation.
+// If the input_is_rtl, then reflect the blocks in the y-axis to undo the
+// reflection that was done before FindTabVectors.
+// Blocks that were identified as vertical text (relative to the rotated
+// coordinates) are further rotated so the text lines are horizontal.
+// blob polygonal outlines are rotated to match the position of the blocks
+// that they are in, and their bounding boxes are recalculated to be accurate.
+// Record appropriate inverse transformations and required
+// classifier transformation in the blocks.
+void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST *to_blocks);
+// Computes the rotations for the block (to make textlines horizontal) and
+// for the blobs (for classification) and sets the appropriate members
+// of the given block.
+// Returns the rotation that needs to be applied to the blobs to make
+// them sit in the rotated block.
+FCOORD ComputeBlockAndClassifyRotation(BLOCK *block);
+// If true then the page language is cjk, so it is safe to perform
+// FixBrokenCJK.
+bool cjk_script_;
+// The minimum gutter width to apply for finding columns.
+// Modified when vertical text is detected to prevent detection of
+// vertical text lines as columns.
+int min_gutter_width_;
+// The mean gap between columns over the page.
+int mean_column_gap_;
+// Config param saved at construction time. Modifies min_gutter_width_ with
+// vertical text to prevent detection of vertical text as columns.
+double tabfind_aligned_gap_fraction_;
+// The rotation vector needed to convert original coords to deskewed.
+FCOORD deskew_;
+// The rotation vector needed to convert deskewed back to original coords.
+FCOORD reskew_;
+// The rotation vector used to rotate vertically oriented pages.
+FCOORD rotation_;
+// The rotation vector needed to convert the rotated back to original coords.
+FCOORD rerotate_;
+// The additional rotation vector needed to rotate text for recognition.
+FCOORD text_rotation_;
+// The column_sets_ contain the ordered candidate ColPartitionSets that
+// define the possible divisions of the page into columns.
+PartSetVector column_sets_;
+// A simple array of pointers to the best assigned column division at
+// each grid y coordinate.
+ColPartitionSet **best_columns_;
+// The grid used for creating initial partitions with strokewidth.
+StrokeWidth *stroke_width_;
+// The grid used to hold ColPartitions after the columns have been determined.
+ColPartitionGrid part_grid_;
+// List of ColPartitions that are no longer needed after they have been
+// turned into regions, but are kept around because they are referenced
+// by the part_grid_.
+ColPartition_LIST good_parts_;
+// List of ColPartitions that are big and might be dropcap or vertically
+// joined.
+ColPartition_LIST big_parts_;
+// List of ColPartitions that have been declared noise.
+ColPartition_LIST noise_parts_;
+// The fake blobs that are made from the images.
+BLOBNBOX_LIST image_bblobs_;
+// Horizontal line separators.
+TabVector_LIST horizontal_lines_;
+// Image map of photo/noise areas on the page.
+Image nontext_map_;
+// Textline projection map.
+TextlineProjection projection_;
+// Sequence of DENORMS that indicate how to get back to the original image
+// coordinate space. The destructor must delete all the DENORMs in the chain.
+DENORM *denorm_;
+// The equation region detector pointer. Note: This pointer is passed in by
+// member function SetEquationDetect, and releasing it is NOT owned by this
+// class.
+EquationDetectBase *equation_detect_;
+#ifndef GRAPHICS_DISABLED
+// Various debug windows that automatically go away on completion.
+ScrollView *input_blobs_win_ = nullptr;
+// Allow a subsequent instance to reuse the blocks window.
+// Not thread-safe, but multiple threads shouldn't be using windows anyway.
+static ScrollView *blocks_win_;
+#endif
+};
+} // namespace tesseract.
+#endif // TESSERACT_TEXTORD_COLFIND_H_

Mercurial > hgrepos > Python2 > PyMuPDF

comparison mupdf-source/thirdparty/tesseract/src/textord/colfind.h @ 2:b50eed0cc0ef upstream