Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/textord/makerow.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/textord/makerow.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,253 @@ +/********************************************************************** + * File: makerow.h (Formerly makerows.h) + * Description: Code to arrange blobs into rows of text. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef MAKEROW_H +#define MAKEROW_H + +#include "blobbox.h" +#include "blobs.h" +#include "ocrblock.h" +#include "params.h" +#include "statistc.h" + +namespace tesseract { + +enum OVERLAP_STATE { + ASSIGN, // assign it to row + REJECT, // reject it - dual overlap + NEW_ROW +}; + +enum ROW_CATEGORY { + ROW_ASCENDERS_FOUND, + ROW_DESCENDERS_FOUND, + ROW_UNKNOWN, + ROW_INVALID, +}; + +extern BOOL_VAR_H(textord_heavy_nr); +extern BOOL_VAR_H(textord_show_initial_rows); +extern BOOL_VAR_H(textord_show_parallel_rows); +extern BOOL_VAR_H(textord_show_expanded_rows); +extern BOOL_VAR_H(textord_show_final_rows); +extern BOOL_VAR_H(textord_show_final_blobs); +extern BOOL_VAR_H(textord_test_landscape); +extern BOOL_VAR_H(textord_parallel_baselines); +extern BOOL_VAR_H(textord_straight_baselines); +extern BOOL_VAR_H(textord_old_baselines); +extern BOOL_VAR_H(textord_old_xheight); +extern BOOL_VAR_H(textord_fix_xheight_bug); +extern BOOL_VAR_H(textord_fix_makerow_bug); +extern BOOL_VAR_H(textord_debug_xheights); +extern INT_VAR_H(textord_test_x); +extern INT_VAR_H(textord_test_y); +extern INT_VAR_H(textord_min_blobs_in_row); +extern INT_VAR_H(textord_spline_minblobs); +extern INT_VAR_H(textord_spline_medianwin); +extern INT_VAR_H(textord_min_xheight); +extern double_VAR_H(textord_spline_shift_fraction); +extern double_VAR_H(textord_skew_ile); +extern double_VAR_H(textord_skew_lag); +extern double_VAR_H(textord_linespace_iqrlimit); +extern double_VAR_H(textord_width_limit); +extern double_VAR_H(textord_chop_width); +extern double_VAR_H(textord_minxh); +extern double_VAR_H(textord_min_linesize); +extern double_VAR_H(textord_excess_blobsize); +extern double_VAR_H(textord_occupancy_threshold); +extern double_VAR_H(textord_underline_width); +extern double_VAR_H(textord_min_blob_height_fraction); +extern double_VAR_H(textord_xheight_mode_fraction); +extern double_VAR_H(textord_ascheight_mode_fraction); +extern double_VAR_H(textord_ascx_ratio_min); +extern double_VAR_H(textord_ascx_ratio_max); +extern double_VAR_H(textord_descx_ratio_min); +extern double_VAR_H(textord_descx_ratio_max); +extern double_VAR_H(textord_xheight_error_margin); +extern INT_VAR_H(textord_lms_line_trials); +extern BOOL_VAR_H(textord_new_initial_xheight); +extern BOOL_VAR_H(textord_debug_blob); + +inline void get_min_max_xheight(int block_linesize, int *min_height, int *max_height) { + *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh)); + if (*min_height < textord_min_xheight) { + *min_height = textord_min_xheight; + } + *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0)); +} + +inline ROW_CATEGORY get_row_category(const TO_ROW *row) { + if (row->xheight <= 0) { + return ROW_INVALID; + } + return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND + : (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN; +} + +inline bool within_error_margin(float test, float num, float margin) { + return (test >= num * (1 - margin) && test <= num * (1 + margin)); +} + +void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, + STATS *floating_heights); + +float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks); +float make_rows(ICOORD page_tr, // top right + TO_BLOCK_LIST *port_blocks); +void make_initial_textrows(ICOORD page_tr, + TO_BLOCK *block, // block to do + FCOORD rotation, // for drawing + bool testing_on); // correct orientation +void fit_lms_line(TO_ROW *row); +void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks + float &page_m, // average gradient + float &page_err); // average error +void vigorous_noise_removal(TO_BLOCK *block); +void cleanup_rows_making(ICOORD page_tr, // top right + TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on); // correct orientation +void delete_non_dropout_rows( // find lines + TO_BLOCK *block, // block to do + float gradient, // global skew + FCOORD rotation, // deskew vector + int32_t block_edge, // left edge + bool testing_on // correct orientation +); +bool find_best_dropout_row( // find neighbours + TO_ROW *row, // row to test + int32_t distance, // dropout dist + float dist_limit, // threshold distance + int32_t line_index, // index of row + TO_ROW_IT *row_it, // current position + bool testing_on // correct orientation +); +TBOX deskew_block_coords( // block box + TO_BLOCK *block, // block to do + float gradient // global skew +); +void compute_line_occupation( // project blobs + TO_BLOCK *block, // block to do + float gradient, // global skew + int32_t min_y, // min coord in block + int32_t max_y, // in block + int32_t *occupation, // output projection + int32_t *deltas // derivative +); +void compute_occupation_threshold( // project blobs + int32_t low_window, // below result point + int32_t high_window, // above result point + int32_t line_count, // array sizes + int32_t *occupation, // input projection + int32_t *thresholds // output thresholds +); +void compute_dropout_distances( // project blobs + int32_t *occupation, // input projection + int32_t *thresholds, // output thresholds + int32_t line_count // array sizes +); +void expand_rows( // find lines + ICOORD page_tr, // top right + TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on // correct orientation +); +void adjust_row_limits( // tidy limits + TO_BLOCK *block // block to do +); +void compute_row_stats( // find lines + TO_BLOCK *block, // block to do + bool testing_on // correct orientation +); +float median_block_xheight( // find lines + TO_BLOCK *block, // block to do + float gradient // global skew +); + +int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, + int min_height, int max_height, float *xheight, float *ascrise); + +int32_t compute_row_descdrop(TO_ROW *row, // row to do + float gradient, // global skew + int xheight_blob_count, STATS *heights); +int32_t compute_height_modes(STATS *heights, // stats to search + int32_t min_height, // bottom of range + int32_t max_height, // top of range + int32_t *modes, // output array + int32_t maxmodes); // size of modes +void correct_row_xheight(TO_ROW *row, // row to fix + float xheight, // average values + float ascrise, float descdrop); +void separate_underlines(TO_BLOCK *block, // block to do + float gradient, // skew angle + FCOORD rotation, // inverse landscape + bool testing_on); // correct orientation +void pre_associate_blobs(ICOORD page_tr, // top right + TO_BLOCK *block, // block to do + FCOORD rotation, // inverse landscape + bool testing_on); // correct orientation +void fit_parallel_rows(TO_BLOCK *block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on); // correct orientation +void fit_parallel_lms(float gradient, // forced gradient + TO_ROW *row); // row to fit +void make_baseline_spline(TO_ROW *row, // row to fit + TO_BLOCK *block); // block it came from +bool segment_baseline( // split baseline + TO_ROW *row, // row to fit + TO_BLOCK *block, // block it came from + int32_t &segments, // no fo segments + int32_t *xstarts // coords of segments +); +double *linear_spline_baseline( // split baseline + TO_ROW *row, // row to fit + TO_BLOCK *block, // block it came from + int32_t &segments, // no fo segments + int32_t xstarts[] // coords of segments +); +void assign_blobs_to_rows( // find lines + TO_BLOCK *block, // block to do + float *gradient, // block skew + int pass, // identification + bool reject_misses, // chuck big ones out + bool make_new_rows, // add rows for unmatched + bool drawing_skew // draw smoothed skew +); +// find best row +OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, // iterator + TO_ROW *&best_row, // output row + float top, // top of blob + float bottom, // bottom of blob + float rowsize, // max row size + bool testing_blob // test stuff +); +int blob_x_order( // sort function + const void *item1, // items to compare + const void *item2); + +void mark_repeated_chars(TO_ROW *row); + +} // namespace tesseract + +#endif
