Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/src/textord/makerow.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /********************************************************************** | |
| 2 * File: makerow.h (Formerly makerows.h) | |
| 3 * Description: Code to arrange blobs into rows of text. | |
| 4 * Author: Ray Smith | |
| 5 * | |
| 6 * (C) Copyright 1992, Hewlett-Packard Ltd. | |
| 7 ** Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 ** you may not use this file except in compliance with the License. | |
| 9 ** You may obtain a copy of the License at | |
| 10 ** http://www.apache.org/licenses/LICENSE-2.0 | |
| 11 ** Unless required by applicable law or agreed to in writing, software | |
| 12 ** distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 ** See the License for the specific language governing permissions and | |
| 15 ** limitations under the License. | |
| 16 * | |
| 17 **********************************************************************/ | |
| 18 | |
| 19 #ifndef MAKEROW_H | |
| 20 #define MAKEROW_H | |
| 21 | |
| 22 #include "blobbox.h" | |
| 23 #include "blobs.h" | |
| 24 #include "ocrblock.h" | |
| 25 #include "params.h" | |
| 26 #include "statistc.h" | |
| 27 | |
| 28 namespace tesseract { | |
| 29 | |
| 30 enum OVERLAP_STATE { | |
| 31 ASSIGN, // assign it to row | |
| 32 REJECT, // reject it - dual overlap | |
| 33 NEW_ROW | |
| 34 }; | |
| 35 | |
| 36 enum ROW_CATEGORY { | |
| 37 ROW_ASCENDERS_FOUND, | |
| 38 ROW_DESCENDERS_FOUND, | |
| 39 ROW_UNKNOWN, | |
| 40 ROW_INVALID, | |
| 41 }; | |
| 42 | |
| 43 extern BOOL_VAR_H(textord_heavy_nr); | |
| 44 extern BOOL_VAR_H(textord_show_initial_rows); | |
| 45 extern BOOL_VAR_H(textord_show_parallel_rows); | |
| 46 extern BOOL_VAR_H(textord_show_expanded_rows); | |
| 47 extern BOOL_VAR_H(textord_show_final_rows); | |
| 48 extern BOOL_VAR_H(textord_show_final_blobs); | |
| 49 extern BOOL_VAR_H(textord_test_landscape); | |
| 50 extern BOOL_VAR_H(textord_parallel_baselines); | |
| 51 extern BOOL_VAR_H(textord_straight_baselines); | |
| 52 extern BOOL_VAR_H(textord_old_baselines); | |
| 53 extern BOOL_VAR_H(textord_old_xheight); | |
| 54 extern BOOL_VAR_H(textord_fix_xheight_bug); | |
| 55 extern BOOL_VAR_H(textord_fix_makerow_bug); | |
| 56 extern BOOL_VAR_H(textord_debug_xheights); | |
| 57 extern INT_VAR_H(textord_test_x); | |
| 58 extern INT_VAR_H(textord_test_y); | |
| 59 extern INT_VAR_H(textord_min_blobs_in_row); | |
| 60 extern INT_VAR_H(textord_spline_minblobs); | |
| 61 extern INT_VAR_H(textord_spline_medianwin); | |
| 62 extern INT_VAR_H(textord_min_xheight); | |
| 63 extern double_VAR_H(textord_spline_shift_fraction); | |
| 64 extern double_VAR_H(textord_skew_ile); | |
| 65 extern double_VAR_H(textord_skew_lag); | |
| 66 extern double_VAR_H(textord_linespace_iqrlimit); | |
| 67 extern double_VAR_H(textord_width_limit); | |
| 68 extern double_VAR_H(textord_chop_width); | |
| 69 extern double_VAR_H(textord_minxh); | |
| 70 extern double_VAR_H(textord_min_linesize); | |
| 71 extern double_VAR_H(textord_excess_blobsize); | |
| 72 extern double_VAR_H(textord_occupancy_threshold); | |
| 73 extern double_VAR_H(textord_underline_width); | |
| 74 extern double_VAR_H(textord_min_blob_height_fraction); | |
| 75 extern double_VAR_H(textord_xheight_mode_fraction); | |
| 76 extern double_VAR_H(textord_ascheight_mode_fraction); | |
| 77 extern double_VAR_H(textord_ascx_ratio_min); | |
| 78 extern double_VAR_H(textord_ascx_ratio_max); | |
| 79 extern double_VAR_H(textord_descx_ratio_min); | |
| 80 extern double_VAR_H(textord_descx_ratio_max); | |
| 81 extern double_VAR_H(textord_xheight_error_margin); | |
| 82 extern INT_VAR_H(textord_lms_line_trials); | |
| 83 extern BOOL_VAR_H(textord_new_initial_xheight); | |
| 84 extern BOOL_VAR_H(textord_debug_blob); | |
| 85 | |
| 86 inline void get_min_max_xheight(int block_linesize, int *min_height, int *max_height) { | |
| 87 *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh)); | |
| 88 if (*min_height < textord_min_xheight) { | |
| 89 *min_height = textord_min_xheight; | |
| 90 } | |
| 91 *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0)); | |
| 92 } | |
| 93 | |
| 94 inline ROW_CATEGORY get_row_category(const TO_ROW *row) { | |
| 95 if (row->xheight <= 0) { | |
| 96 return ROW_INVALID; | |
| 97 } | |
| 98 return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND | |
| 99 : (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN; | |
| 100 } | |
| 101 | |
| 102 inline bool within_error_margin(float test, float num, float margin) { | |
| 103 return (test >= num * (1 - margin) && test <= num * (1 + margin)); | |
| 104 } | |
| 105 | |
| 106 void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, | |
| 107 STATS *floating_heights); | |
| 108 | |
| 109 float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks); | |
| 110 float make_rows(ICOORD page_tr, // top right | |
| 111 TO_BLOCK_LIST *port_blocks); | |
| 112 void make_initial_textrows(ICOORD page_tr, | |
| 113 TO_BLOCK *block, // block to do | |
| 114 FCOORD rotation, // for drawing | |
| 115 bool testing_on); // correct orientation | |
| 116 void fit_lms_line(TO_ROW *row); | |
| 117 void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks | |
| 118 float &page_m, // average gradient | |
| 119 float &page_err); // average error | |
| 120 void vigorous_noise_removal(TO_BLOCK *block); | |
| 121 void cleanup_rows_making(ICOORD page_tr, // top right | |
| 122 TO_BLOCK *block, // block to do | |
| 123 float gradient, // gradient to fit | |
| 124 FCOORD rotation, // for drawing | |
| 125 int32_t block_edge, // edge of block | |
| 126 bool testing_on); // correct orientation | |
| 127 void delete_non_dropout_rows( // find lines | |
| 128 TO_BLOCK *block, // block to do | |
| 129 float gradient, // global skew | |
| 130 FCOORD rotation, // deskew vector | |
| 131 int32_t block_edge, // left edge | |
| 132 bool testing_on // correct orientation | |
| 133 ); | |
| 134 bool find_best_dropout_row( // find neighbours | |
| 135 TO_ROW *row, // row to test | |
| 136 int32_t distance, // dropout dist | |
| 137 float dist_limit, // threshold distance | |
| 138 int32_t line_index, // index of row | |
| 139 TO_ROW_IT *row_it, // current position | |
| 140 bool testing_on // correct orientation | |
| 141 ); | |
| 142 TBOX deskew_block_coords( // block box | |
| 143 TO_BLOCK *block, // block to do | |
| 144 float gradient // global skew | |
| 145 ); | |
| 146 void compute_line_occupation( // project blobs | |
| 147 TO_BLOCK *block, // block to do | |
| 148 float gradient, // global skew | |
| 149 int32_t min_y, // min coord in block | |
| 150 int32_t max_y, // in block | |
| 151 int32_t *occupation, // output projection | |
| 152 int32_t *deltas // derivative | |
| 153 ); | |
| 154 void compute_occupation_threshold( // project blobs | |
| 155 int32_t low_window, // below result point | |
| 156 int32_t high_window, // above result point | |
| 157 int32_t line_count, // array sizes | |
| 158 int32_t *occupation, // input projection | |
| 159 int32_t *thresholds // output thresholds | |
| 160 ); | |
| 161 void compute_dropout_distances( // project blobs | |
| 162 int32_t *occupation, // input projection | |
| 163 int32_t *thresholds, // output thresholds | |
| 164 int32_t line_count // array sizes | |
| 165 ); | |
| 166 void expand_rows( // find lines | |
| 167 ICOORD page_tr, // top right | |
| 168 TO_BLOCK *block, // block to do | |
| 169 float gradient, // gradient to fit | |
| 170 FCOORD rotation, // for drawing | |
| 171 int32_t block_edge, // edge of block | |
| 172 bool testing_on // correct orientation | |
| 173 ); | |
| 174 void adjust_row_limits( // tidy limits | |
| 175 TO_BLOCK *block // block to do | |
| 176 ); | |
| 177 void compute_row_stats( // find lines | |
| 178 TO_BLOCK *block, // block to do | |
| 179 bool testing_on // correct orientation | |
| 180 ); | |
| 181 float median_block_xheight( // find lines | |
| 182 TO_BLOCK *block, // block to do | |
| 183 float gradient // global skew | |
| 184 ); | |
| 185 | |
| 186 int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, | |
| 187 int min_height, int max_height, float *xheight, float *ascrise); | |
| 188 | |
| 189 int32_t compute_row_descdrop(TO_ROW *row, // row to do | |
| 190 float gradient, // global skew | |
| 191 int xheight_blob_count, STATS *heights); | |
| 192 int32_t compute_height_modes(STATS *heights, // stats to search | |
| 193 int32_t min_height, // bottom of range | |
| 194 int32_t max_height, // top of range | |
| 195 int32_t *modes, // output array | |
| 196 int32_t maxmodes); // size of modes | |
| 197 void correct_row_xheight(TO_ROW *row, // row to fix | |
| 198 float xheight, // average values | |
| 199 float ascrise, float descdrop); | |
| 200 void separate_underlines(TO_BLOCK *block, // block to do | |
| 201 float gradient, // skew angle | |
| 202 FCOORD rotation, // inverse landscape | |
| 203 bool testing_on); // correct orientation | |
| 204 void pre_associate_blobs(ICOORD page_tr, // top right | |
| 205 TO_BLOCK *block, // block to do | |
| 206 FCOORD rotation, // inverse landscape | |
| 207 bool testing_on); // correct orientation | |
| 208 void fit_parallel_rows(TO_BLOCK *block, // block to do | |
| 209 float gradient, // gradient to fit | |
| 210 FCOORD rotation, // for drawing | |
| 211 int32_t block_edge, // edge of block | |
| 212 bool testing_on); // correct orientation | |
| 213 void fit_parallel_lms(float gradient, // forced gradient | |
| 214 TO_ROW *row); // row to fit | |
| 215 void make_baseline_spline(TO_ROW *row, // row to fit | |
| 216 TO_BLOCK *block); // block it came from | |
| 217 bool segment_baseline( // split baseline | |
| 218 TO_ROW *row, // row to fit | |
| 219 TO_BLOCK *block, // block it came from | |
| 220 int32_t &segments, // no fo segments | |
| 221 int32_t *xstarts // coords of segments | |
| 222 ); | |
| 223 double *linear_spline_baseline( // split baseline | |
| 224 TO_ROW *row, // row to fit | |
| 225 TO_BLOCK *block, // block it came from | |
| 226 int32_t &segments, // no fo segments | |
| 227 int32_t xstarts[] // coords of segments | |
| 228 ); | |
| 229 void assign_blobs_to_rows( // find lines | |
| 230 TO_BLOCK *block, // block to do | |
| 231 float *gradient, // block skew | |
| 232 int pass, // identification | |
| 233 bool reject_misses, // chuck big ones out | |
| 234 bool make_new_rows, // add rows for unmatched | |
| 235 bool drawing_skew // draw smoothed skew | |
| 236 ); | |
| 237 // find best row | |
| 238 OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, // iterator | |
| 239 TO_ROW *&best_row, // output row | |
| 240 float top, // top of blob | |
| 241 float bottom, // bottom of blob | |
| 242 float rowsize, // max row size | |
| 243 bool testing_blob // test stuff | |
| 244 ); | |
| 245 int blob_x_order( // sort function | |
| 246 const void *item1, // items to compare | |
| 247 const void *item2); | |
| 248 | |
| 249 void mark_repeated_chars(TO_ROW *row); | |
| 250 | |
| 251 } // namespace tesseract | |
| 252 | |
| 253 #endif |
