diff mupdf-source/thirdparty/tesseract/src/textord/strokewidth.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/src/textord/strokewidth.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,333 @@
+///////////////////////////////////////////////////////////////////////
+// File:        strokewidth.h
+// Description: Subclass of BBGrid to find uniformity of strokewidth.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_
+#define TESSERACT_TEXTORD_STROKEWIDTH_H_
+
+#include "blobbox.h"  // BlobNeighbourDir.
+#include "blobgrid.h" // Base class.
+#include "colpartitiongrid.h"
+#include "textlineprojection.h"
+
+class DENORM;
+class ScrollView;
+class TO_BLOCK;
+
+namespace tesseract {
+
+class ColPartition_LIST;
+class TabFind;
+class TextlineProjection;
+
+// Misc enums to clarify bool arguments for direction-controlling args.
+enum LeftOrRight { LR_LEFT, LR_RIGHT };
+
+// Return value from FindInitialPartitions indicates detection of severe
+// skew or noise.
+enum PartitionFindResult {
+  PFR_OK,   // Everything is OK.
+  PFR_SKEW, // Skew was detected and rotated.
+  PFR_NOISE // Noise was detected and removed.
+};
+
+/**
+ * The StrokeWidth class holds all the normal and large blobs.
+ * It is used to find good large blobs and move them to the normal blobs
+ * by virtue of having a reasonable strokewidth compatible neighbour.
+ */
+class StrokeWidth : public BlobGrid {
+public:
+  StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright);
+  ~StrokeWidth() override;
+
+  // Sets the neighbours member of the medium-sized blobs in the block.
+  // Searches on 4 sides of each blob for similar-sized, similar-strokewidth
+  // blobs and sets pointers to the good neighbours.
+  void SetNeighboursOnMediumBlobs(TO_BLOCK *block);
+
+  // Sets the neighbour/textline writing direction members of the medium
+  // and large blobs with optional repair of broken CJK characters first.
+  // Repair of broken CJK is needed here because broken CJK characters
+  // can fool the textline direction detection algorithm.
+  void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge,
+                                            TO_BLOCK *input_block);
+
+  // To save computation, the process of generating partitions is broken
+  // into the following 4 steps:
+  // TestVerticalTextDirection
+  // CorrectForRotation (used only if a rotation is to be applied)
+  // FindLeaderPartitions
+  // GradeBlobsIntoPartitions.
+  // These functions are all required, in sequence, except for
+  // CorrectForRotation, which is not needed if no rotation is applied.
+
+  // Types all the blobs as vertical or horizontal text or unknown and
+  // returns true if the majority are vertical.
+  // If the blobs are rotated, it is necessary to call CorrectForRotation
+  // after rotating everything, otherwise the work done here will be enough.
+  // If osd_blobs is not null, a list of blobs from the dominant textline
+  // direction are returned for use in orientation and script detection.
+  // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
+  bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block,
+                                 BLOBNBOX_CLIST *osd_blobs);
+
+  // Corrects the data structures for the given rotation.
+  void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid);
+
+  // Finds leader partitions and inserts them into the given grid.
+  void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid);
+
+  // Finds and marks noise those blobs that look like bits of vertical lines
+  // that would otherwise screw up layout analysis.
+  void RemoveLineResidue(ColPartition_LIST *big_part_list);
+
+  // Types all the blobs as vertical text or horizontal text or unknown and
+  // puts them into initial ColPartitions in the supplied part_grid.
+  // rerotation determines how to get back to the image coordinates from the
+  // blob coordinates (since they may have been rotated for vertical text).
+  // block is the single block for the whole page or rectangle to be OCRed.
+  // nontext_pix (full-size), is a binary mask used to prevent merges across
+  // photo/text boundaries. It is not kept beyond this function.
+  // denorm provides a mapping back to the image from the current blob
+  // coordinate space.
+  // projection provides a measure of textline density over the image and
+  // provides functions to assist with diacritic detection. It should be a
+  // pointer to a new TextlineProjection, and will be setup here.
+  // part_grid is the output grid of textline partitions.
+  // Large blobs that cause overlap are put in separate partitions and added
+  // to the big_parts list.
+  void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block,
+                                Image nontext_pix, const DENORM *denorm, bool cjk_script,
+                                TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs,
+                                ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);
+
+  // Handles a click event in a display window.
+  void HandleClick(int x, int y) override;
+
+private:
+  // Computes the noise_density_ by summing the number of elements in a
+  // neighbourhood of each grid cell.
+  void ComputeNoiseDensity(TO_BLOCK *block, TabFind *line_grid);
+
+  // Detects and marks leader dots/dashes.
+  //    Leaders are horizontal chains of small or noise blobs that look
+  //    monospace according to ColPartition::MarkAsLeaderIfMonospaced().
+  // Detected leaders become the only occupants of the block->small_blobs list.
+  // Non-leader small blobs get moved to the blobs list.
+  // Non-leader noise blobs remain singletons in the noise list.
+  // All small and noise blobs in high density regions are marked BTFT_NONTEXT.
+  // block is the single block for the whole page or rectangle to be OCRed.
+  // leader_parts is the output.
+  void FindLeadersAndMarkNoise(TO_BLOCK *block, ColPartition_LIST *leader_parts);
+
+  /** Inserts the block blobs (normal and large) into this grid.
+   * Blobs remain owned by the block. */
+  void InsertBlobs(TO_BLOCK *block);
+
+  // Fix broken CJK characters, using the fake joined blobs mechanism.
+  // Blobs are really merged, ie the master takes all the outlines and the
+  // others are deleted.
+  // Returns true if sufficient blobs are merged that it may be worth running
+  // again, due to a better estimate of character size.
+  bool FixBrokenCJK(TO_BLOCK *block);
+
+  // Collect blobs that overlap or are within max_dist of the input bbox.
+  // Return them in the list of blobs and expand the bbox to be the union
+  // of all the boxes. not_this is excluded from the search, as are blobs
+  // that cause the merged box to exceed max_size in either dimension.
+  void AccumulateOverlaps(const BLOBNBOX *not_this, bool debug, int max_size, int max_dist,
+                          TBOX *bbox, BLOBNBOX_CLIST *blobs);
+
+  // For each blob in this grid, Finds the textline direction to be horizontal
+  // or vertical according to distance to neighbours and 1st and 2nd order
+  // neighbours. Non-text tends to end up without a definite direction.
+  // Result is setting of the neighbours and vert_possible/horz_possible
+  // flags in the BLOBNBOXes currently in this grid.
+  // This function is called more than once if page orientation is uncertain,
+  // so display_if_debugging is true on the final call to display the results.
+  void FindTextlineFlowDirection(PageSegMode pageseg_mode, bool display_if_debugging);
+
+  // Sets the neighbours and good_stroke_neighbours members of the blob by
+  // searching close on all 4 sides.
+  // When finding leader dots/dashes, there is a slightly different rule for
+  // what makes a good neighbour.
+  // If activate_line_trap, then line-like objects are found and isolated.
+  void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX *blob);
+
+  // Sets the good_stroke_neighbours member of the blob if it has a
+  // GoodNeighbour on the given side.
+  // Also sets the neighbour in the blob, whether or not a good one is found.
+  // Return value is the number of neighbours in the line trap size range.
+  // Leaders get extra special lenient treatment.
+  int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX *blob);
+
+  // Makes the blob to be only horizontal or vertical where evidence
+  // is clear based on gaps of 2nd order neighbours.
+  void SetNeighbourFlows(BLOBNBOX *blob);
+
+  // Nullify the neighbours in the wrong directions where the direction
+  // is clear-cut based on a distance margin. Good for isolating vertical
+  // text from neighbouring horizontal text.
+  void SimplifyObviousNeighbours(BLOBNBOX *blob);
+
+  // Smoothes the vertical/horizontal type of the blob based on the
+  // 2nd-order neighbours. If reset_all is true, then all blobs are
+  // changed. Otherwise, only ambiguous blobs are processed.
+  void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate, BLOBNBOX *blob);
+
+  // Checks the left or right side of the given leader partition and sets the
+  // (opposite) leader_on_right or leader_on_left flags for blobs
+  // that are next to the given side of the given leader partition.
+  void MarkLeaderNeighbours(const ColPartition *part, LeftOrRight side);
+
+  // Partition creation. Accumulates vertical and horizontal text chains,
+  // puts the remaining blobs in as unknowns, and then merges/splits to
+  // minimize overlap and smoothes the types with neighbours and the color
+  // image if provided. rerotation is used to rotate the coordinate space
+  // back to the nontext_map_ image.
+  // If find_problems is true, detects possible noise pollution by the amount
+  // of partition overlap that is created by the diacritics. If excessive, the
+  // noise is separated out into diacritic blobs, and PFR_NOISE is returned.
+  // [TODO(rays): if the partition overlap is caused by heavy skew, deskews
+  // the components, saves the skew_angle and returns PFR_SKEW.] If the return
+  // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
+  // called again after cleaning up the partly done work.
+  PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation,
+                                            bool find_problems, TO_BLOCK *block,
+                                            BLOBNBOX_LIST *diacritic_blobs,
+                                            ColPartitionGrid *part_grid,
+                                            ColPartition_LIST *big_parts, FCOORD *skew_angle);
+  // Detects noise by a significant increase in partition overlap from
+  // pre_overlap to now, and removes noise from the union of all the overlapping
+  // partitions, placing the blobs in diacritic_blobs. Returns true if any noise
+  // was found and removed.
+  bool DetectAndRemoveNoise(int pre_overlap, const TBOX &grid_box, TO_BLOCK *block,
+                            ColPartitionGrid *part_grid, BLOBNBOX_LIST *diacritic_blobs);
+  // Finds vertical chains of text-like blobs and puts them in ColPartitions.
+  void FindVerticalTextChains(ColPartitionGrid *part_grid);
+  // Finds horizontal chains of text-like blobs and puts them in ColPartitions.
+  void FindHorizontalTextChains(ColPartitionGrid *part_grid);
+  // Finds diacritics and saves their base character in the blob.
+  void TestDiacritics(ColPartitionGrid *part_grid, TO_BLOCK *block);
+  // Searches this grid for an appropriately close and sized neighbour of the
+  // given [small] blob. If such a blob is found, the diacritic base is saved
+  // in the blob and true is returned.
+  // The small_grid is a secondary grid that contains the small/noise objects
+  // that are not in this grid, but may be useful for determining a connection
+  // between blob and its potential base character. (See DiacriticXGapFilled.)
+  bool DiacriticBlob(BlobGrid *small_grid, BLOBNBOX *blob);
+  // Returns true if there is no gap between the base char and the diacritic
+  // bigger than a fraction of the height of the base char:
+  // Eg: line end.....'
+  // The quote is a long way from the end of the line, yet it needs to be a
+  // diacritic. To determine that the quote is not part of an image, or
+  // a different text block, we check for other marks in the gap between
+  // the base char and the diacritic.
+  //                          '<--Diacritic
+  // |---------|
+  // |         |<-toobig-gap->
+  // | Base    |<ok gap>
+  // |---------|        x<-----Dot occupying gap
+  // The grid is const really.
+  bool DiacriticXGapFilled(BlobGrid *grid, const TBOX &diacritic_box, const TBOX &base_box);
+  // Merges diacritics with the ColPartition of the base character blob.
+  void MergeDiacritics(TO_BLOCK *block, ColPartitionGrid *part_grid);
+  // Any blobs on the large_blobs list of block that are still unowned by a
+  // ColPartition, are probably drop-cap or vertically touching so the blobs
+  // are removed to the big_parts list and treated separately.
+  void RemoveLargeUnusedBlobs(TO_BLOCK *block, ColPartitionGrid *part_grid,
+                              ColPartition_LIST *big_parts);
+
+  // All remaining unused blobs are put in individual ColPartitions.
+  void PartitionRemainingBlobs(PageSegMode pageseg_mode, ColPartitionGrid *part_grid);
+
+  // If combine, put all blobs in the cell_list into a single partition,
+  // otherwise put each one into its own partition.
+  void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,
+                                  ColPartitionGrid *part_grid, BLOBNBOX_CLIST *cell_list);
+
+  // Helper function to finish setting up a ColPartition and insert into
+  // part_grid.
+  void CompletePartition(PageSegMode pageseg_mode, ColPartition *part, ColPartitionGrid *part_grid);
+
+  // Helper returns true if we are looking only for vertical textlines,
+  // taking into account any rotation that has been done.
+  bool FindingVerticalOnly(PageSegMode pageseg_mode) const {
+    if (rerotation_.y() == 0.0f) {
+      return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
+    }
+    return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
+  }
+  // Helper returns true if we are looking only for horizontal textlines,
+  // taking into account any rotation that has been done.
+  bool FindingHorizontalOnly(PageSegMode pageseg_mode) const {
+    if (rerotation_.y() == 0.0f) {
+      return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
+    }
+    return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
+  }
+
+  // Merge partitions where the merge appears harmless.
+  void EasyMerges(ColPartitionGrid *part_grid);
+
+  // Compute a search box based on the orientation of the partition.
+  // Returns true if a suitable box can be calculated.
+  // Callback for EasyMerges.
+  bool OrientationSearchBox(ColPartition *part, TBOX *box);
+
+  // Merge confirmation callback for EasyMerges.
+  bool ConfirmEasyMerge(const ColPartition *p1, const ColPartition *p2);
+
+  // Returns true if there is no significant noise in between the boxes.
+  bool NoNoiseInBetween(const TBOX &box1, const TBOX &box2) const;
+
+#ifndef GRAPHICS_DISABLED
+  // Displays the blobs colored according to the number of good neighbours
+  // and the vertical/horizontal flow.
+  ScrollView *DisplayGoodBlobs(const char *window_name, int x, int y);
+
+  // Displays blobs colored according to whether or not they are diacritics.
+  ScrollView *DisplayDiacritics(const char *window_name, int x, int y, TO_BLOCK *block);
+#endif
+
+private:
+  // Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
+  Image nontext_map_;
+  // Textline projection map. Borrowed pointer.
+  TextlineProjection *projection_;
+  // DENORM used by projection_ to get back to image coords. Borrowed pointer.
+  const DENORM *denorm_;
+  // Bounding box of the grid.
+  TBOX grid_box_;
+  // Rerotation to get back to the original image.
+  FCOORD rerotation_;
+#ifndef GRAPHICS_DISABLED
+  // Windows for debug display.
+  ScrollView *leaders_win_ = nullptr;
+  ScrollView *initial_widths_win_ = nullptr;
+  ScrollView *widths_win_ = nullptr;
+  ScrollView *chains_win_ = nullptr;
+  ScrollView *diacritics_win_ = nullptr;
+  ScrollView *textlines_win_ = nullptr;
+  ScrollView *smoothed_win_ = nullptr;
+#endif
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_STROKEWIDTH_H_