Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/tesseract/src/textord/topitch.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/tesseract/src/textord/topitch.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,178 @@ +/********************************************************************** + * File: topitch.h (Formerly to_pitch.h) + * Description: Code to determine fixed pitchness and the pitch if fixed. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef TOPITCH_H +#define TOPITCH_H + +#include "blobbox.h" + +namespace tesseract { + +class Tesseract; + +extern BOOL_VAR_H(textord_debug_pitch_test); +extern BOOL_VAR_H(textord_debug_pitch_metric); +extern BOOL_VAR_H(textord_show_row_cuts); +extern BOOL_VAR_H(textord_show_page_cuts); +extern BOOL_VAR_H(textord_blockndoc_fixed); +extern BOOL_VAR_H(textord_fast_pitch_test); +extern double_VAR_H(textord_projection_scale); +extern double_VAR_H(textord_balance_factor); + +void compute_fixed_pitch(ICOORD page_tr, // top right + TO_BLOCK_LIST *port_blocks, // input list + float gradient, // page skew + FCOORD rotation, // for drawing + bool testing_on); // correct orientation +void fix_row_pitch( // get some value + TO_ROW *bad_row, // row to fix + TO_BLOCK *bad_block, // block of bad_row + TO_BLOCK_LIST *blocks, // blocks to scan + int32_t row_target, // number of row + int32_t block_target // number of block +); +void compute_block_pitch(TO_BLOCK *block, // input list + FCOORD rotation, // for drawing + int32_t block_index, // block number + bool testing_on); // correct orientation +bool compute_rows_pitch( // find line stats + TO_BLOCK *block, // block to do + int32_t block_index, // block number + bool testing_on // correct orientation +); +bool try_doc_fixed( // determine pitch + ICOORD page_tr, // top right + TO_BLOCK_LIST *port_blocks, // input list + float gradient // page skew +); +bool try_block_fixed( // find line stats + TO_BLOCK *block, // block to do + int32_t block_index // block number +); +bool try_rows_fixed( // find line stats + TO_BLOCK *block, // block to do + int32_t block_index, // block number + bool testing_on // correct orientation +); +void print_block_counts( // find line stats + TO_BLOCK *block, // block to do + int32_t block_index // block number +); +void count_block_votes( // find line stats + TO_BLOCK *block, // block to do + int32_t &def_fixed, // add to counts + int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, + int32_t &corr_prop, int32_t &dunno); +bool row_pitch_stats( // find line stats + TO_ROW *row, // current row + int32_t maxwidth, // of spaces + bool testing_on // correct orientation +); +bool find_row_pitch( // find lines + TO_ROW *row, // row to do + int32_t maxwidth, // max permitted space + int32_t dm_gap, // ignorable gaps + TO_BLOCK *block, // block of row + int32_t block_index, // block_number + int32_t row_index, // number of row + bool testing_on // correct orientation +); +bool fixed_pitch_row( // find lines + TO_ROW *row, // row to do + BLOCK *block, + int32_t block_index // block_number +); +bool count_pitch_stats( // find lines + TO_ROW *row, // row to do + STATS *gap_stats, // blob gaps + STATS *pitch_stats, // centre-centre stats + float initial_pitch, // guess at pitch + float min_space, // estimate space size + bool ignore_outsize, // discard big objects + bool split_outsize, // split big objects + int32_t dm_gap // ignorable gaps +); +float tune_row_pitch( // find fp cells + TO_ROW *row, // row to do + STATS *projection, // vertical projection + int16_t projection_left, // edge of projection + int16_t projection_right, // edge of projection + float space_size, // size of blank + float &initial_pitch, // guess at pitch + float &best_sp_sd, // space sd + int16_t &best_mid_cuts, // no of cheap cuts + ICOORDELT_LIST *best_cells, // row cells + bool testing_on // individual words +); +float tune_row_pitch2( // find fp cells + TO_ROW *row, // row to do + STATS *projection, // vertical projection + int16_t projection_left, // edge of projection + int16_t projection_right, // edge of projection + float space_size, // size of blank + float &initial_pitch, // guess at pitch + float &best_sp_sd, // space sd + int16_t &best_mid_cuts, // no of cheap cuts + ICOORDELT_LIST *best_cells, // row cells + bool testing_on // individual words +); +float compute_pitch_sd( // find fp cells + TO_ROW *row, // row to do + STATS *projection, // vertical projection + int16_t projection_left, // edge + int16_t projection_right, // edge + float space_size, // size of blank + float initial_pitch, // guess at pitch + float &sp_sd, // space sd + int16_t &mid_cuts, // no of free cuts + ICOORDELT_LIST *row_cells, // list of chop pts + bool testing_on, // individual words + int16_t start = 0, // start of good range + int16_t end = 0 // end of good range +); +float compute_pitch_sd2( // find fp cells + TO_ROW *row, // row to do + STATS *projection, // vertical projection + int16_t projection_left, // edge + int16_t projection_right, // edge + float initial_pitch, // guess at pitch + int16_t &occupation, // no of occupied cells + int16_t &mid_cuts, // no of free cuts + ICOORDELT_LIST *row_cells, // list of chop pts + bool testing_on, // individual words + int16_t start = 0, // start of good range + int16_t end = 0 // end of good range +); +void print_pitch_sd( // find fp cells + TO_ROW *row, // row to do + STATS *projection, // vertical projection + int16_t projection_left, // edges //size of blank + int16_t projection_right, float space_size, + float initial_pitch // guess at pitch +); +void find_repeated_chars(TO_BLOCK *block, // Block to search. + bool testing_on); // Debug mode. +void plot_fp_word( // draw block of words + TO_BLOCK *block, // block to draw + float pitch, // pitch to draw with + float nonspace // for space threshold +); + +} // namespace tesseract + +#endif
