comparison mupdf-source/thirdparty/tesseract/src/textord/topitch.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /**********************************************************************
2 * File: topitch.cpp (Formerly to_pitch.c)
3 * Description: Code to determine fixed pitchness and the pitch if fixed.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1993, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19 // Include automatically generated configuration file if running autoconf.
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23
24 #include "topitch.h"
25
26 #include "blobbox.h"
27 #include "drawtord.h"
28 #include "makerow.h"
29 #include "pithsync.h"
30 #include "pitsync1.h"
31 #include "statistc.h"
32 #include "tovars.h"
33 #include "wordseg.h"
34
35 #include "helpers.h"
36
37 #include <memory>
38
39 namespace tesseract {
40
41 static BOOL_VAR(textord_all_prop, false, "All doc is proportial text");
42 BOOL_VAR(textord_debug_pitch_test, false, "Debug on fixed pitch test");
43 static BOOL_VAR(textord_disable_pitch_test, false, "Turn off dp fixed pitch algorithm");
44 BOOL_VAR(textord_fast_pitch_test, false, "Do even faster pitch algorithm");
45 BOOL_VAR(textord_debug_pitch_metric, false, "Write full metric stuff");
46 BOOL_VAR(textord_show_row_cuts, false, "Draw row-level cuts");
47 BOOL_VAR(textord_show_page_cuts, false, "Draw page-level cuts");
48 BOOL_VAR(textord_blockndoc_fixed, false, "Attempt whole doc/block fixed pitch");
49 double_VAR(textord_projection_scale, 0.200, "Ding rate for mid-cuts");
50 double_VAR(textord_balance_factor, 1.0, "Ding rate for unbalanced char cells");
51
52 #define BLOCK_STATS_CLUSTERS 10
53 #define MAX_ALLOWED_PITCH 100 // max pixel pitch.
54
55 // qsort function to sort 2 floats.
56 static int sort_floats(const void *arg1, const void *arg2) {
57 float diff = *reinterpret_cast<const float *>(arg1) - *reinterpret_cast<const float *>(arg2);
58 if (diff > 0) {
59 return 1;
60 } else if (diff < 0) {
61 return -1;
62 } else {
63 return 0;
64 }
65 }
66
67 /**********************************************************************
68 * compute_fixed_pitch
69 *
70 * Decide whether each row is fixed pitch individually.
71 * Correlate definite and uncertain results to obtain an individual
72 * result for each row in the TO_ROW class.
73 **********************************************************************/
74
75 void compute_fixed_pitch(ICOORD page_tr, // top right
76 TO_BLOCK_LIST *port_blocks, // input list
77 float gradient, // page skew
78 FCOORD rotation, // for drawing
79 bool testing_on) { // correct orientation
80 TO_BLOCK_IT block_it; // iterator
81 TO_BLOCK *block; // current block;
82 TO_ROW *row; // current row
83 int block_index; // block number
84 int row_index; // row number
85
86 #ifndef GRAPHICS_DISABLED
87 if (textord_show_initial_words && testing_on) {
88 if (to_win == nullptr) {
89 create_to_win(page_tr);
90 }
91 }
92 #endif
93
94 block_it.set_to_list(port_blocks);
95 block_index = 1;
96 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
97 block = block_it.data();
98 compute_block_pitch(block, rotation, block_index, testing_on);
99 block_index++;
100 }
101
102 if (!try_doc_fixed(page_tr, port_blocks, gradient)) {
103 block_index = 1;
104 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
105 block = block_it.data();
106 if (!try_block_fixed(block, block_index)) {
107 try_rows_fixed(block, block_index, testing_on);
108 }
109 block_index++;
110 }
111 }
112
113 block_index = 1;
114 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
115 block = block_it.data();
116 POLY_BLOCK *pb = block->block->pdblk.poly_block();
117 if (pb != nullptr && !pb->IsText()) {
118 continue; // Non-text doesn't exist!
119 }
120 // row iterator
121 TO_ROW_IT row_it(block->get_rows());
122 row_index = 1;
123 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
124 row = row_it.data();
125 fix_row_pitch(row, block, port_blocks, row_index, block_index);
126 row_index++;
127 }
128 block_index++;
129 }
130 #ifndef GRAPHICS_DISABLED
131 if (textord_show_initial_words && testing_on) {
132 ScrollView::Update();
133 }
134 #endif
135 }
136
137 /**********************************************************************
138 * fix_row_pitch
139 *
140 * Get a pitch_decision for this row by voting among similar rows in the
141 * block, then similar rows over all the page, or any other rows at all.
142 **********************************************************************/
143
144 void fix_row_pitch(TO_ROW *bad_row, // row to fix
145 TO_BLOCK *bad_block, // block of bad_row
146 TO_BLOCK_LIST *blocks, // blocks to scan
147 int32_t row_target, // number of row
148 int32_t block_target) { // number of block
149 int16_t mid_cuts;
150 int block_votes; // votes in block
151 int like_votes; // votes over page
152 int other_votes; // votes of unlike blocks
153 int block_index; // number of block
154 int maxwidth; // max pitch
155 TO_BLOCK_IT block_it = blocks; // block iterator
156 TO_BLOCK *block; // current block
157 TO_ROW *row; // current row
158 float sp_sd; // space deviation
159 STATS block_stats; // pitches in block
160 STATS like_stats; // pitches in page
161
162 block_votes = like_votes = other_votes = 0;
163 maxwidth = static_cast<int32_t>(ceil(bad_row->xheight * textord_words_maxspace));
164 if (bad_row->pitch_decision != PITCH_DEF_FIXED && bad_row->pitch_decision != PITCH_DEF_PROP) {
165 block_stats.set_range(0, maxwidth - 1);
166 like_stats.set_range(0, maxwidth - 1);
167 block_index = 1;
168 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
169 block = block_it.data();
170 POLY_BLOCK *pb = block->block->pdblk.poly_block();
171 if (pb != nullptr && !pb->IsText()) {
172 continue; // Non text doesn't exist!
173 }
174 TO_ROW_IT row_it(block->get_rows());
175 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
176 row = row_it.data();
177 if ((bad_row->all_caps &&
178 row->xheight + row->ascrise <
179 (bad_row->xheight + bad_row->ascrise) * (1 + textord_pitch_rowsimilarity) &&
180 row->xheight + row->ascrise >
181 (bad_row->xheight + bad_row->ascrise) * (1 - textord_pitch_rowsimilarity)) ||
182 (!bad_row->all_caps &&
183 row->xheight < bad_row->xheight * (1 + textord_pitch_rowsimilarity) &&
184 row->xheight > bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
185 if (block_index == block_target) {
186 if (row->pitch_decision == PITCH_DEF_FIXED) {
187 block_votes += textord_words_veto_power;
188 block_stats.add(static_cast<int32_t>(row->fixed_pitch), textord_words_veto_power);
189 } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
190 row->pitch_decision == PITCH_CORR_FIXED) {
191 block_votes++;
192 block_stats.add(static_cast<int32_t>(row->fixed_pitch), 1);
193 } else if (row->pitch_decision == PITCH_DEF_PROP) {
194 block_votes -= textord_words_veto_power;
195 } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
196 row->pitch_decision == PITCH_CORR_PROP) {
197 block_votes--;
198 }
199 } else {
200 if (row->pitch_decision == PITCH_DEF_FIXED) {
201 like_votes += textord_words_veto_power;
202 like_stats.add(static_cast<int32_t>(row->fixed_pitch), textord_words_veto_power);
203 } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
204 row->pitch_decision == PITCH_CORR_FIXED) {
205 like_votes++;
206 like_stats.add(static_cast<int32_t>(row->fixed_pitch), 1);
207 } else if (row->pitch_decision == PITCH_DEF_PROP) {
208 like_votes -= textord_words_veto_power;
209 } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
210 row->pitch_decision == PITCH_CORR_PROP) {
211 like_votes--;
212 }
213 }
214 } else {
215 if (row->pitch_decision == PITCH_DEF_FIXED) {
216 other_votes += textord_words_veto_power;
217 } else if (row->pitch_decision == PITCH_MAYBE_FIXED ||
218 row->pitch_decision == PITCH_CORR_FIXED) {
219 other_votes++;
220 } else if (row->pitch_decision == PITCH_DEF_PROP) {
221 other_votes -= textord_words_veto_power;
222 } else if (row->pitch_decision == PITCH_MAYBE_PROP ||
223 row->pitch_decision == PITCH_CORR_PROP) {
224 other_votes--;
225 }
226 }
227 }
228 block_index++;
229 }
230 if (block_votes > textord_words_veto_power) {
231 bad_row->fixed_pitch = block_stats.ile(0.5);
232 bad_row->pitch_decision = PITCH_CORR_FIXED;
233 } else if (block_votes <= textord_words_veto_power && like_votes > 0) {
234 bad_row->fixed_pitch = like_stats.ile(0.5);
235 bad_row->pitch_decision = PITCH_CORR_FIXED;
236 } else {
237 bad_row->pitch_decision = PITCH_CORR_PROP;
238 if (block_votes == 0 && like_votes == 0 && other_votes > 0 &&
239 (textord_debug_pitch_test || textord_debug_pitch_metric)) {
240 tprintf(
241 "Warning:row %d of block %d set prop with no like rows against "
242 "trend\n",
243 row_target, block_target);
244 }
245 }
246 }
247 if (textord_debug_pitch_metric) {
248 tprintf(":b_votes=%d:l_votes=%d:o_votes=%d", block_votes, like_votes, other_votes);
249 tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
250 }
251 if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
252 if (bad_row->fixed_pitch < textord_min_xheight) {
253 if (block_votes > 0) {
254 bad_row->fixed_pitch = block_stats.ile(0.5);
255 } else if (block_votes == 0 && like_votes > 0) {
256 bad_row->fixed_pitch = like_stats.ile(0.5);
257 } else {
258 tprintf("Warning:guessing pitch as xheight on row %d, block %d\n", row_target,
259 block_target);
260 bad_row->fixed_pitch = bad_row->xheight;
261 }
262 }
263 if (bad_row->fixed_pitch < textord_min_xheight) {
264 bad_row->fixed_pitch = (float)textord_min_xheight;
265 }
266 bad_row->kern_size = bad_row->fixed_pitch / 4;
267 bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6);
268 bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4);
269 bad_row->space_threshold = (bad_row->min_space + bad_row->max_nonspace) / 2;
270 bad_row->space_size = bad_row->fixed_pitch;
271 if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
272 tune_row_pitch(bad_row, &bad_row->projection, bad_row->projection_left,
273 bad_row->projection_right,
274 (bad_row->fixed_pitch + bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
275 sp_sd, mid_cuts, &bad_row->char_cells, false);
276 }
277 } else if (bad_row->pitch_decision == PITCH_CORR_PROP ||
278 bad_row->pitch_decision == PITCH_DEF_PROP) {
279 bad_row->fixed_pitch = 0.0f;
280 bad_row->char_cells.clear();
281 }
282 }
283
284 /**********************************************************************
285 * compute_block_pitch
286 *
287 * Decide whether each block is fixed pitch individually.
288 **********************************************************************/
289
290 void compute_block_pitch(TO_BLOCK *block, // input list
291 FCOORD rotation, // for drawing
292 int32_t block_index, // block number
293 bool testing_on) { // correct orientation
294 TBOX block_box; // bounding box
295
296 block_box = block->block->pdblk.bounding_box();
297 if (testing_on && textord_debug_pitch_test) {
298 tprintf("Block %d at (%d,%d)->(%d,%d)\n", block_index, block_box.left(), block_box.bottom(),
299 block_box.right(), block_box.top());
300 }
301 block->min_space = static_cast<int32_t>(floor(block->xheight * textord_words_default_minspace));
302 block->max_nonspace = static_cast<int32_t>(ceil(block->xheight * textord_words_default_nonspace));
303 block->fixed_pitch = 0.0f;
304 block->space_size = static_cast<float>(block->min_space);
305 block->kern_size = static_cast<float>(block->max_nonspace);
306 block->pr_nonsp = block->xheight * words_default_prop_nonspace;
307 block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
308 if (!block->get_rows()->empty()) {
309 ASSERT_HOST(block->xheight > 0);
310 find_repeated_chars(block, textord_show_initial_words && testing_on);
311 #ifndef GRAPHICS_DISABLED
312 if (textord_show_initial_words && testing_on) {
313 // overlap_picture_ops(true);
314 ScrollView::Update();
315 }
316 #endif
317 compute_rows_pitch(block, block_index, textord_debug_pitch_test && testing_on);
318 }
319 }
320
321 /**********************************************************************
322 * compute_rows_pitch
323 *
324 * Decide whether each row is fixed pitch individually.
325 **********************************************************************/
326
327 bool compute_rows_pitch( // find line stats
328 TO_BLOCK *block, // block to do
329 int32_t block_index, // block number
330 bool testing_on // correct orientation
331 ) {
332 int32_t maxwidth; // of spaces
333 TO_ROW *row; // current row
334 int32_t row_index; // row number.
335 float lower, upper; // cluster thresholds
336 TO_ROW_IT row_it = block->get_rows();
337
338 row_index = 1;
339 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
340 row = row_it.data();
341 ASSERT_HOST(row->xheight > 0);
342 row->compute_vertical_projection();
343 maxwidth = static_cast<int32_t>(ceil(row->xheight * textord_words_maxspace));
344 if (row_pitch_stats(row, maxwidth, testing_on) &&
345 find_row_pitch(row, maxwidth, textord_dotmatrix_gap + 1, block, block_index, row_index,
346 testing_on)) {
347 if (row->fixed_pitch == 0) {
348 lower = row->pr_nonsp;
349 upper = row->pr_space;
350 row->space_size = upper;
351 row->kern_size = lower;
352 }
353 } else {
354 row->fixed_pitch = 0.0f; // insufficient data
355 row->pitch_decision = PITCH_DUNNO;
356 }
357 row_index++;
358 }
359 return false;
360 }
361
362 /**********************************************************************
363 * try_doc_fixed
364 *
365 * Attempt to call the entire document fixed pitch.
366 **********************************************************************/
367
368 bool try_doc_fixed( // determine pitch
369 ICOORD page_tr, // top right
370 TO_BLOCK_LIST *port_blocks, // input list
371 float gradient // page skew
372 ) {
373 int16_t master_x; // uniform shifts
374 int16_t pitch; // median pitch.
375 int x; // profile coord
376 int prop_blocks; // correct counts
377 int fixed_blocks;
378 int total_row_count; // total in page
379 // iterator
380 TO_BLOCK_IT block_it = port_blocks;
381 TO_BLOCK *block; // current block;
382 TO_ROW *row; // current row
383 int16_t projection_left; // edges
384 int16_t projection_right;
385 int16_t row_left; // edges of row
386 int16_t row_right;
387 float master_y; // uniform shifts
388 float shift_factor; // page skew correction
389 float final_pitch; // output pitch
390 float row_y; // baseline
391 STATS projection; // entire page
392 STATS pitches(0, MAX_ALLOWED_PITCH - 1);
393 // for median
394 float sp_sd; // space sd
395 int16_t mid_cuts; // no of cheap cuts
396 float pitch_sd; // sync rating
397
398 if (!textord_blockndoc_fixed ||
399 block_it.empty() || block_it.data()->get_rows()->empty()) {
400 return false;
401 }
402 shift_factor = gradient / (gradient * gradient + 1);
403 // row iterator
404 TO_ROW_IT row_it(block_it.data()->get_rows());
405 master_x = row_it.data()->projection_left;
406 master_y = row_it.data()->baseline.y(master_x);
407 projection_left = INT16_MAX;
408 projection_right = -INT16_MAX;
409 prop_blocks = 0;
410 fixed_blocks = 0;
411 total_row_count = 0;
412
413 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
414 block = block_it.data();
415 row_it.set_to_list(block->get_rows());
416 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
417 row = row_it.data();
418 total_row_count++;
419 if (row->fixed_pitch > 0) {
420 pitches.add(static_cast<int32_t>(row->fixed_pitch), 1);
421 }
422 // find median
423 row_y = row->baseline.y(master_x);
424 row_left = static_cast<int16_t>(row->projection_left - shift_factor * (master_y - row_y));
425 row_right = static_cast<int16_t>(row->projection_right - shift_factor * (master_y - row_y));
426 if (row_left < projection_left) {
427 projection_left = row_left;
428 }
429 if (row_right > projection_right) {
430 projection_right = row_right;
431 }
432 }
433 }
434 if (pitches.get_total() == 0) {
435 return false;
436 }
437 projection.set_range(projection_left, projection_right - 1);
438
439 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
440 block = block_it.data();
441 row_it.set_to_list(block->get_rows());
442 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
443 row = row_it.data();
444 row_y = row->baseline.y(master_x);
445 row_left = static_cast<int16_t>(row->projection_left - shift_factor * (master_y - row_y));
446 for (x = row->projection_left; x < row->projection_right; x++, row_left++) {
447 projection.add(row_left, row->projection.pile_count(x));
448 }
449 }
450 }
451
452 row_it.set_to_list(block_it.data()->get_rows());
453 row = row_it.data();
454 #ifndef GRAPHICS_DISABLED
455 if (textord_show_page_cuts && to_win != nullptr) {
456 projection.plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
457 }
458 #endif
459 final_pitch = pitches.ile(0.5);
460 pitch = static_cast<int16_t>(final_pitch);
461 pitch_sd = tune_row_pitch(row, &projection, projection_left, projection_right, pitch * 0.75,
462 final_pitch, sp_sd, mid_cuts, &row->char_cells, false);
463
464 if (textord_debug_pitch_metric) {
465 tprintf(
466 "try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%"
467 "g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
468 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, pitch_sd / total_row_count,
469 pitch_sd / pitch, pitch_sd / total_row_count / pitch);
470 }
471
472 #ifndef GRAPHICS_DISABLED
473 if (textord_show_page_cuts && to_win != nullptr) {
474 float row_shift; // shift for row
475 ICOORDELT_LIST *master_cells; // cells for page
476 master_cells = &row->char_cells;
477 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
478 block = block_it.data();
479 row_it.set_to_list(block->get_rows());
480 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
481 row = row_it.data();
482 row_y = row->baseline.y(master_x);
483 row_shift = shift_factor * (master_y - row_y);
484 plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
485 }
486 }
487 }
488 #endif
489 row->char_cells.clear();
490 return false;
491 }
492
493 /**********************************************************************
494 * try_block_fixed
495 *
496 * Try to call the entire block fixed.
497 **********************************************************************/
498
499 bool try_block_fixed( // find line stats
500 TO_BLOCK *block, // block to do
501 int32_t block_index // block number
502 ) {
503 return false;
504 }
505
506 /**********************************************************************
507 * try_rows_fixed
508 *
509 * Decide whether each row is fixed pitch individually.
510 **********************************************************************/
511
512 bool try_rows_fixed( // find line stats
513 TO_BLOCK *block, // block to do
514 int32_t block_index, // block number
515 bool testing_on // correct orientation
516 ) {
517 TO_ROW *row; // current row
518 int32_t def_fixed = 0; // counters
519 int32_t def_prop = 0;
520 int32_t maybe_fixed = 0;
521 int32_t maybe_prop = 0;
522 int32_t dunno = 0;
523 int32_t corr_fixed = 0;
524 int32_t corr_prop = 0;
525 float lower, upper; // cluster thresholds
526 TO_ROW_IT row_it = block->get_rows();
527
528 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
529 row = row_it.data();
530 ASSERT_HOST(row->xheight > 0);
531 if (row->fixed_pitch > 0 && fixed_pitch_row(row, block->block, block_index)) {
532 if (row->fixed_pitch == 0) {
533 lower = row->pr_nonsp;
534 upper = row->pr_space;
535 row->space_size = upper;
536 row->kern_size = lower;
537 }
538 }
539 }
540 count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop,
541 dunno);
542 if (testing_on &&
543 (textord_debug_pitch_test || textord_blocksall_prop || textord_blocksall_fixed)) {
544 tprintf("Initially:");
545 print_block_counts(block, block_index);
546 }
547 if (def_fixed > def_prop * textord_words_veto_power) {
548 block->pitch_decision = PITCH_DEF_FIXED;
549 } else if (def_prop > def_fixed * textord_words_veto_power) {
550 block->pitch_decision = PITCH_DEF_PROP;
551 } else if (def_fixed > 0 || def_prop > 0) {
552 block->pitch_decision = PITCH_DUNNO;
553 } else if (maybe_fixed > maybe_prop * textord_words_veto_power) {
554 block->pitch_decision = PITCH_MAYBE_FIXED;
555 } else if (maybe_prop > maybe_fixed * textord_words_veto_power) {
556 block->pitch_decision = PITCH_MAYBE_PROP;
557 } else {
558 block->pitch_decision = PITCH_DUNNO;
559 }
560 return false;
561 }
562
563 /**********************************************************************
564 * print_block_counts
565 *
566 * Count up how many rows have what decision and print the results.
567 **********************************************************************/
568
569 void print_block_counts( // find line stats
570 TO_BLOCK *block, // block to do
571 int32_t block_index // block number
572 ) {
573 int32_t def_fixed = 0; // counters
574 int32_t def_prop = 0;
575 int32_t maybe_fixed = 0;
576 int32_t maybe_prop = 0;
577 int32_t dunno = 0;
578 int32_t corr_fixed = 0;
579 int32_t corr_prop = 0;
580
581 count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop,
582 dunno);
583 tprintf("Block %d has (%d,%d,%d)", block_index, def_fixed, maybe_fixed, corr_fixed);
584 if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed)) {
585 tprintf(" (Wrongly)");
586 }
587 tprintf(" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
588 if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop)) {
589 tprintf(" (Wrongly)");
590 }
591 tprintf(" prop, %d dunno\n", dunno);
592 }
593
594 /**********************************************************************
595 * count_block_votes
596 *
597 * Count the number of rows in the block with each kind of pitch_decision.
598 **********************************************************************/
599
600 void count_block_votes( // find line stats
601 TO_BLOCK *block, // block to do
602 int32_t &def_fixed, // add to counts
603 int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed,
604 int32_t &corr_prop, int32_t &dunno) {
605 TO_ROW *row; // current row
606 TO_ROW_IT row_it = block->get_rows();
607
608 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
609 row = row_it.data();
610 switch (row->pitch_decision) {
611 case PITCH_DUNNO:
612 dunno++;
613 break;
614 case PITCH_DEF_PROP:
615 def_prop++;
616 break;
617 case PITCH_MAYBE_PROP:
618 maybe_prop++;
619 break;
620 case PITCH_DEF_FIXED:
621 def_fixed++;
622 break;
623 case PITCH_MAYBE_FIXED:
624 maybe_fixed++;
625 break;
626 case PITCH_CORR_PROP:
627 corr_prop++;
628 break;
629 case PITCH_CORR_FIXED:
630 corr_fixed++;
631 break;
632 }
633 }
634 }
635
636 /**********************************************************************
637 * row_pitch_stats
638 *
639 * Decide whether each row is fixed pitch individually.
640 **********************************************************************/
641
642 bool row_pitch_stats( // find line stats
643 TO_ROW *row, // current row
644 int32_t maxwidth, // of spaces
645 bool testing_on // correct orientation
646 ) {
647 BLOBNBOX *blob; // current blob
648 int gap_index; // current gap
649 int32_t prev_x; // end of prev blob
650 int32_t cluster_count; // no of clusters
651 int32_t prev_count; // of clusters
652 int32_t smooth_factor; // for smoothing stats
653 TBOX blob_box; // bounding box
654 float lower, upper; // cluster thresholds
655 // gap sizes
656 float gaps[BLOCK_STATS_CLUSTERS];
657 // blobs
658 BLOBNBOX_IT blob_it = row->blob_list();
659 STATS gap_stats(0, maxwidth - 1);
660 STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
661 // clusters
662
663 smooth_factor = static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5);
664 if (!blob_it.empty()) {
665 prev_x = blob_it.data()->bounding_box().right();
666 blob_it.forward();
667 while (!blob_it.at_first()) {
668 blob = blob_it.data();
669 if (!blob->joined_to_prev()) {
670 blob_box = blob->bounding_box();
671 if (blob_box.left() - prev_x < maxwidth) {
672 gap_stats.add(blob_box.left() - prev_x, 1);
673 }
674 prev_x = blob_box.right();
675 }
676 blob_it.forward();
677 }
678 }
679 if (gap_stats.get_total() == 0) {
680 return false;
681 }
682 cluster_count = 0;
683 lower = row->xheight * words_initial_lower;
684 upper = row->xheight * words_initial_upper;
685 gap_stats.smooth(smooth_factor);
686 do {
687 prev_count = cluster_count;
688 cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop,
689 BLOCK_STATS_CLUSTERS, cluster_stats);
690 } while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
691 if (cluster_count < 1) {
692 return false;
693 }
694 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
695 gaps[gap_index] = cluster_stats[gap_index + 1].ile(0.5);
696 }
697 // get medians
698 if (testing_on) {
699 tprintf("cluster_count=%d:", cluster_count);
700 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
701 tprintf(" %g(%d)", gaps[gap_index], cluster_stats[gap_index + 1].get_total());
702 }
703 tprintf("\n");
704 }
705 qsort(gaps, cluster_count, sizeof(float), sort_floats);
706
707 // Try to find proportional non-space and space for row.
708 lower = row->xheight * words_default_prop_nonspace;
709 upper = row->xheight * textord_words_min_minspace;
710 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < lower; gap_index++) {
711 ;
712 }
713 if (gap_index == 0) {
714 if (testing_on) {
715 tprintf("No clusters below nonspace threshold!!\n");
716 }
717 if (cluster_count > 1) {
718 row->pr_nonsp = gaps[0];
719 row->pr_space = gaps[1];
720 } else {
721 row->pr_nonsp = lower;
722 row->pr_space = gaps[0];
723 }
724 } else {
725 row->pr_nonsp = gaps[gap_index - 1];
726 while (gap_index < cluster_count && gaps[gap_index] < upper) {
727 gap_index++;
728 }
729 if (gap_index == cluster_count) {
730 if (testing_on) {
731 tprintf("No clusters above nonspace threshold!!\n");
732 }
733 row->pr_space = lower * textord_spacesize_ratioprop;
734 } else {
735 row->pr_space = gaps[gap_index];
736 }
737 }
738
739 // Now try to find the fixed pitch space and non-space.
740 upper = row->xheight * words_default_fixed_space;
741 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < upper; gap_index++) {
742 ;
743 }
744 if (gap_index == 0) {
745 if (testing_on) {
746 tprintf("No clusters below space threshold!!\n");
747 }
748 row->fp_nonsp = upper;
749 row->fp_space = gaps[0];
750 } else {
751 row->fp_nonsp = gaps[gap_index - 1];
752 if (gap_index == cluster_count) {
753 if (testing_on) {
754 tprintf("No clusters above space threshold!!\n");
755 }
756 row->fp_space = row->xheight;
757 } else {
758 row->fp_space = gaps[gap_index];
759 }
760 }
761 if (testing_on) {
762 tprintf(
763 "Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, "
764 "fp_space=%g\n",
765 row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
766 }
767 return true; // computed some stats
768 }
769
770 /**********************************************************************
771 * find_row_pitch
772 *
773 * Check to see if this row could be fixed pitch using the given spacings.
774 * Blobs with gaps smaller than the lower threshold are assumed to be one.
775 * The larger threshold is the word gap threshold.
776 **********************************************************************/
777
778 bool find_row_pitch( // find lines
779 TO_ROW *row, // row to do
780 int32_t maxwidth, // max permitted space
781 int32_t dm_gap, // ignorable gaps
782 TO_BLOCK *block, // block of row
783 int32_t block_index, // block_number
784 int32_t row_index, // number of row
785 bool testing_on // correct orientation
786 ) {
787 bool used_dm_model; // looks like dot matrix
788 float min_space; // estimate threshold
789 float non_space; // gap size
790 float gap_iqr; // interquartile range
791 float pitch_iqr;
792 float dm_gap_iqr; // interquartile range
793 float dm_pitch_iqr;
794 float dm_pitch; // pitch with dm on
795 float pitch; // revised estimate
796 float initial_pitch; // guess at pitch
797 STATS gap_stats(0, maxwidth - 1);
798 // centre-centre
799 STATS pitch_stats(0, maxwidth - 1);
800
801 row->fixed_pitch = 0.0f;
802 initial_pitch = row->fp_space;
803 if (initial_pitch > row->xheight * (1 + words_default_fixed_limit)) {
804 initial_pitch = row->xheight; // keep pitch decent
805 }
806 non_space = row->fp_nonsp;
807 if (non_space > initial_pitch) {
808 non_space = initial_pitch;
809 }
810 min_space = (initial_pitch + non_space) / 2;
811
812 if (!count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, min_space, true, false,
813 dm_gap)) {
814 dm_gap_iqr = 0.0001f;
815 dm_pitch_iqr = maxwidth * 2.0f;
816 dm_pitch = initial_pitch;
817 } else {
818 dm_gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
819 dm_pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
820 dm_pitch = pitch_stats.ile(0.5);
821 }
822 gap_stats.clear();
823 pitch_stats.clear();
824 if (!count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, min_space, true, false, 0)) {
825 gap_iqr = 0.0001f;
826 pitch_iqr = maxwidth * 3.0f;
827 } else {
828 gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
829 pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
830 if (testing_on) {
831 tprintf(
832 "First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, "
833 "pitch=%g\n",
834 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile(0.5));
835 }
836 initial_pitch = pitch_stats.ile(0.5);
837 if (min_space > initial_pitch && count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch,
838 initial_pitch, true, false, 0)) {
839 min_space = initial_pitch;
840 gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
841 pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
842 if (testing_on) {
843 tprintf(
844 "Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, "
845 "pitch=%g\n",
846 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile(0.5));
847 }
848 initial_pitch = pitch_stats.ile(0.5);
849 }
850 }
851 if (textord_debug_pitch_metric) {
852 tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", block_index,
853 row_index, 'X', pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
854 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth
855 ? 'D'
856 : (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
857 }
858 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
859 row->pitch_decision = PITCH_DUNNO;
860 if (textord_debug_pitch_metric) {
861 tprintf("\n");
862 }
863 return false; // insufficient data
864 }
865 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
866 if (testing_on) {
867 tprintf(
868 "Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, "
869 "dm_gap_iqr=%g\n",
870 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
871 }
872 gap_iqr = gap_stats.ile(0.75) - gap_stats.ile(0.25);
873 pitch_iqr = pitch_stats.ile(0.75) - pitch_stats.ile(0.25);
874 pitch = pitch_stats.ile(0.5);
875 used_dm_model = false;
876 } else {
877 if (testing_on) {
878 tprintf(
879 "Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, "
880 "dm_gap_iqr=%g\n",
881 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
882 }
883 gap_iqr = dm_gap_iqr;
884 pitch_iqr = dm_pitch_iqr;
885 pitch = dm_pitch;
886 used_dm_model = true;
887 }
888 if (textord_debug_pitch_metric) {
889 tprintf("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", pitch_iqr, gap_iqr, pitch);
890 tprintf("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
891 pitch_iqr < gap_iqr * textord_fpiqr_ratio &&
892 pitch_iqr < block->xheight * textord_max_pitch_iqr &&
893 pitch < block->xheight * textord_words_default_maxspace
894 ? 'F'
895 : 'P');
896 }
897 if (pitch_iqr < gap_iqr * textord_fpiqr_ratio &&
898 pitch_iqr < block->xheight * textord_max_pitch_iqr &&
899 pitch < block->xheight * textord_words_default_maxspace) {
900 row->pitch_decision = PITCH_MAYBE_FIXED;
901 } else {
902 row->pitch_decision = PITCH_MAYBE_PROP;
903 }
904 row->fixed_pitch = pitch;
905 row->kern_size = gap_stats.ile(0.5);
906 row->min_space = static_cast<int32_t>(row->fixed_pitch + non_space) / 2;
907 if (row->min_space > row->fixed_pitch) {
908 row->min_space = static_cast<int32_t>(row->fixed_pitch);
909 }
910 row->max_nonspace = row->min_space;
911 row->space_size = row->fixed_pitch;
912 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
913 row->used_dm_model = used_dm_model;
914 return true;
915 }
916
917 /**********************************************************************
918 * fixed_pitch_row
919 *
920 * Check to see if this row could be fixed pitch using the given spacings.
921 * Blobs with gaps smaller than the lower threshold are assumed to be one.
922 * The larger threshold is the word gap threshold.
923 **********************************************************************/
924
925 bool fixed_pitch_row(TO_ROW *row, // row to do
926 BLOCK *block,
927 int32_t block_index // block_number
928 ) {
929 const char *res_string; // pitch result
930 int16_t mid_cuts; // no of cheap cuts
931 float non_space; // gap size
932 float pitch_sd; // error on pitch
933 float sp_sd = 0.0f; // space sd
934
935 non_space = row->fp_nonsp;
936 if (non_space > row->fixed_pitch) {
937 non_space = row->fixed_pitch;
938 }
939 POLY_BLOCK *pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
940 if (textord_all_prop || (pb != nullptr && !pb->IsText())) {
941 // Set the decision to definitely proportional.
942 pitch_sd = textord_words_def_prop * row->fixed_pitch;
943 row->pitch_decision = PITCH_DEF_PROP;
944 } else {
945 pitch_sd = tune_row_pitch(row, &row->projection, row->projection_left, row->projection_right,
946 (row->fixed_pitch + non_space * 3) / 4, row->fixed_pitch, sp_sd,
947 mid_cuts, &row->char_cells, block_index == textord_debug_block);
948 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch &&
949 ((pitsync_linear_version & 3) < 3 ||
950 ((pitsync_linear_version & 3) >= 3 &&
951 (row->used_dm_model || sp_sd > 20 || (pitch_sd == 0 && sp_sd > 10))))) {
952 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch && !row->all_caps &&
953 ((pitsync_linear_version & 3) < 3 || sp_sd > 20)) {
954 row->pitch_decision = PITCH_DEF_FIXED;
955 } else {
956 row->pitch_decision = PITCH_MAYBE_FIXED;
957 }
958 } else if ((pitsync_linear_version & 3) < 3 || sp_sd > 20 || mid_cuts > 0 ||
959 pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
960 if (pitch_sd < textord_words_def_prop * row->fixed_pitch) {
961 row->pitch_decision = PITCH_MAYBE_PROP;
962 } else {
963 row->pitch_decision = PITCH_DEF_PROP;
964 }
965 } else {
966 row->pitch_decision = PITCH_DUNNO;
967 }
968 }
969
970 if (textord_debug_pitch_metric) {
971 res_string = "??";
972 switch (row->pitch_decision) {
973 case PITCH_DEF_PROP:
974 res_string = "DP";
975 break;
976 case PITCH_MAYBE_PROP:
977 res_string = "MP";
978 break;
979 case PITCH_DEF_FIXED:
980 res_string = "DF";
981 break;
982 case PITCH_MAYBE_FIXED:
983 res_string = "MF";
984 break;
985 default:
986 res_string = "??";
987 }
988 tprintf(":sd/p=%g:occ=%g:init_res=%s\n", pitch_sd / row->fixed_pitch, sp_sd, res_string);
989 }
990 return true;
991 }
992
993 /**********************************************************************
994 * count_pitch_stats
995 *
996 * Count up the gap and pitch stats on the block to see if it is fixed pitch.
997 * Blobs with gaps smaller than the lower threshold are assumed to be one.
998 * The larger threshold is the word gap threshold.
999 * The return value indicates whether there were any decent values to use.
1000 **********************************************************************/
1001
1002 bool count_pitch_stats( // find lines
1003 TO_ROW *row, // row to do
1004 STATS *gap_stats, // blob gaps
1005 STATS *pitch_stats, // centre-centre stats
1006 float initial_pitch, // guess at pitch
1007 float min_space, // estimate space size
1008 bool ignore_outsize, // discard big objects
1009 bool split_outsize, // split big objects
1010 int32_t dm_gap // ignorable gaps
1011 ) {
1012 bool prev_valid; // not word broken
1013 BLOBNBOX *blob; // current blob
1014 // blobs
1015 BLOBNBOX_IT blob_it = row->blob_list();
1016 int32_t prev_right; // end of prev blob
1017 int32_t prev_centre; // centre of previous blob
1018 int32_t x_centre; // centre of this blob
1019 int32_t blob_width; // width of blob
1020 int32_t width_units; // no of widths in blob
1021 float width; // blob width
1022 TBOX blob_box; // bounding box
1023 TBOX joined_box; // of super blob
1024
1025 gap_stats->clear();
1026 pitch_stats->clear();
1027 if (blob_it.empty()) {
1028 return false;
1029 }
1030 prev_valid = false;
1031 prev_centre = 0;
1032 prev_right = 0; // stop compiler warning
1033 joined_box = blob_it.data()->bounding_box();
1034 do {
1035 blob_it.forward();
1036 blob = blob_it.data();
1037 if (!blob->joined_to_prev()) {
1038 blob_box = blob->bounding_box();
1039 if ((blob_box.left() - joined_box.right() < dm_gap && !blob_it.at_first()) ||
1040 blob->cblob() == nullptr) {
1041 joined_box += blob_box; // merge blobs
1042 } else {
1043 blob_width = joined_box.width();
1044 if (split_outsize) {
1045 width_units =
1046 static_cast<int32_t>(floor(static_cast<float>(blob_width) / initial_pitch + 0.5));
1047 if (width_units < 1) {
1048 width_units = 1;
1049 }
1050 width_units--;
1051 } else if (ignore_outsize) {
1052 width = static_cast<float>(blob_width) / initial_pitch;
1053 width_units =
1054 width < 1 + words_default_fixed_limit && width > 1 - words_default_fixed_limit ? 0
1055 : -1;
1056 } else {
1057 width_units = 0; // everything in
1058 }
1059 x_centre = static_cast<int32_t>(joined_box.left() +
1060 (blob_width - width_units * initial_pitch) / 2);
1061 if (prev_valid && width_units >= 0) {
1062 // if (width_units>0)
1063 // {
1064 // tprintf("wu=%d,
1065 // width=%d,
1066 // xc=%d, adding
1067 // %d\n",
1068 // width_units,blob_width,x_centre,x_centre-prev_centre);
1069 // }
1070 gap_stats->add(joined_box.left() - prev_right, 1);
1071 pitch_stats->add(x_centre - prev_centre, 1);
1072 }
1073 prev_centre = static_cast<int32_t>(x_centre + width_units * initial_pitch);
1074 prev_right = joined_box.right();
1075 prev_valid = blob_box.left() - joined_box.right() < min_space;
1076 prev_valid = prev_valid && width_units >= 0;
1077 joined_box = blob_box;
1078 }
1079 }
1080 } while (!blob_it.at_first());
1081 return gap_stats->get_total() >= 3;
1082 }
1083
1084 /**********************************************************************
1085 * tune_row_pitch
1086 *
1087 * Use a dp algorithm to fit the character cells and return the sd of
1088 * the cell size over the row.
1089 **********************************************************************/
1090
1091 float tune_row_pitch( // find fp cells
1092 TO_ROW *row, // row to do
1093 STATS *projection, // vertical projection
1094 int16_t projection_left, // edge of projection
1095 int16_t projection_right, // edge of projection
1096 float space_size, // size of blank
1097 float &initial_pitch, // guess at pitch
1098 float &best_sp_sd, // space sd
1099 int16_t &best_mid_cuts, // no of cheap cuts
1100 ICOORDELT_LIST *best_cells, // row cells
1101 bool testing_on // individual words
1102 ) {
1103 int pitch_delta; // offset pitch
1104 int16_t mid_cuts; // cheap cuts
1105 float pitch_sd; // current sd
1106 float best_sd; // best result
1107 float best_pitch; // pitch for best result
1108 float initial_sd; // starting error
1109 float sp_sd; // space sd
1110 ICOORDELT_LIST test_cells; // row cells
1111 ICOORDELT_IT best_it; // start of best list
1112
1113 if (textord_fast_pitch_test) {
1114 return tune_row_pitch2(row, projection, projection_left, projection_right, space_size,
1115 initial_pitch, best_sp_sd,
1116 // space sd
1117 best_mid_cuts, best_cells, testing_on);
1118 }
1119 if (textord_disable_pitch_test) {
1120 best_sp_sd = initial_pitch;
1121 return initial_pitch;
1122 }
1123 initial_sd = compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1124 initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on);
1125 best_sd = initial_sd;
1126 best_pitch = initial_pitch;
1127 if (testing_on) {
1128 tprintf("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1129 }
1130 for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1131 pitch_sd =
1132 compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1133 initial_pitch + pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on);
1134 if (testing_on) {
1135 tprintf("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, pitch_sd);
1136 }
1137 if (pitch_sd < best_sd) {
1138 best_sd = pitch_sd;
1139 best_mid_cuts = mid_cuts;
1140 best_sp_sd = sp_sd;
1141 best_pitch = initial_pitch + pitch_delta;
1142 best_cells->clear();
1143 best_it.set_to_list(best_cells);
1144 best_it.add_list_after(&test_cells);
1145 } else {
1146 test_cells.clear();
1147 }
1148 if (pitch_sd > initial_sd) {
1149 break; // getting worse
1150 }
1151 }
1152 for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1153 pitch_sd =
1154 compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1155 initial_pitch - pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on);
1156 if (testing_on) {
1157 tprintf("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, pitch_sd);
1158 }
1159 if (pitch_sd < best_sd) {
1160 best_sd = pitch_sd;
1161 best_mid_cuts = mid_cuts;
1162 best_sp_sd = sp_sd;
1163 best_pitch = initial_pitch - pitch_delta;
1164 best_cells->clear();
1165 best_it.set_to_list(best_cells);
1166 best_it.add_list_after(&test_cells);
1167 } else {
1168 test_cells.clear();
1169 }
1170 if (pitch_sd > initial_sd) {
1171 break;
1172 }
1173 }
1174 initial_pitch = best_pitch;
1175
1176 if (textord_debug_pitch_metric) {
1177 print_pitch_sd(row, projection, projection_left, projection_right, space_size, best_pitch);
1178 }
1179
1180 return best_sd;
1181 }
1182
1183 /**********************************************************************
1184 * tune_row_pitch
1185 *
1186 * Use a dp algorithm to fit the character cells and return the sd of
1187 * the cell size over the row.
1188 **********************************************************************/
1189
1190 float tune_row_pitch2( // find fp cells
1191 TO_ROW *row, // row to do
1192 STATS *projection, // vertical projection
1193 int16_t projection_left, // edge of projection
1194 int16_t projection_right, // edge of projection
1195 float space_size, // size of blank
1196 float &initial_pitch, // guess at pitch
1197 float &best_sp_sd, // space sd
1198 int16_t &best_mid_cuts, // no of cheap cuts
1199 ICOORDELT_LIST *best_cells, // row cells
1200 bool testing_on // individual words
1201 ) {
1202 int pitch_delta; // offset pitch
1203 int16_t pixel; // pixel coord
1204 int16_t best_pixel; // pixel coord
1205 int16_t best_delta; // best pitch
1206 int16_t best_pitch; // best pitch
1207 int16_t start; // of good range
1208 int16_t end; // of good range
1209 int32_t best_count; // lowest sum
1210 float best_sd; // best result
1211
1212 best_sp_sd = initial_pitch;
1213
1214 best_pitch = static_cast<int>(initial_pitch);
1215 if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
1216 return initial_pitch;
1217 }
1218 std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); // summed projection
1219
1220 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
1221 sum_proj[textord_pitch_range + pitch_delta].set_range(0, best_pitch + pitch_delta);
1222 }
1223 for (pixel = projection_left; pixel <= projection_right; pixel++) {
1224 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
1225 sum_proj[textord_pitch_range + pitch_delta].add(
1226 (pixel - projection_left) % (best_pitch + pitch_delta), projection->pile_count(pixel));
1227 }
1228 }
1229 best_count = sum_proj[textord_pitch_range].pile_count(0);
1230 best_delta = 0;
1231 best_pixel = 0;
1232 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
1233 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1234 if (sum_proj[textord_pitch_range + pitch_delta].pile_count(pixel) < best_count) {
1235 best_count = sum_proj[textord_pitch_range + pitch_delta].pile_count(pixel);
1236 best_delta = pitch_delta;
1237 best_pixel = pixel;
1238 }
1239 }
1240 }
1241 if (testing_on) {
1242 tprintf("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", initial_pitch, best_delta,
1243 best_count);
1244 }
1245 best_pitch += best_delta;
1246 initial_pitch = best_pitch;
1247 best_count++;
1248 best_count += best_count;
1249 for (start = best_pixel - 2;
1250 start > best_pixel - best_pitch &&
1251 sum_proj[textord_pitch_range + best_delta].pile_count(start % best_pitch) <= best_count;
1252 start--) {
1253 ;
1254 }
1255 for (end = best_pixel + 2;
1256 end < best_pixel + best_pitch &&
1257 sum_proj[textord_pitch_range + best_delta].pile_count(end % best_pitch) <= best_count;
1258 end++) {
1259 ;
1260 }
1261
1262 best_sd = compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1263 initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on,
1264 start, end);
1265 if (testing_on) {
1266 tprintf("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, best_sd);
1267 }
1268
1269 if (textord_debug_pitch_metric) {
1270 print_pitch_sd(row, projection, projection_left, projection_right, space_size, initial_pitch);
1271 }
1272
1273 return best_sd;
1274 }
1275
1276 /**********************************************************************
1277 * compute_pitch_sd
1278 *
1279 * Use a dp algorithm to fit the character cells and return the sd of
1280 * the cell size over the row.
1281 **********************************************************************/
1282
1283 float compute_pitch_sd( // find fp cells
1284 TO_ROW *row, // row to do
1285 STATS *projection, // vertical projection
1286 int16_t projection_left, // edge
1287 int16_t projection_right, // edge
1288 float space_size, // size of blank
1289 float initial_pitch, // guess at pitch
1290 float &sp_sd, // space sd
1291 int16_t &mid_cuts, // no of free cuts
1292 ICOORDELT_LIST *row_cells, // list of chop pts
1293 bool testing_on, // individual words
1294 int16_t start, // start of good range
1295 int16_t end // end of good range
1296 ) {
1297 int16_t occupation; // no of cells in word.
1298 // blobs
1299 BLOBNBOX_IT blob_it = row->blob_list();
1300 BLOBNBOX_IT start_it; // start of word
1301 BLOBNBOX_IT plot_it; // for plotting
1302 int16_t blob_count; // no of blobs
1303 TBOX blob_box; // bounding box
1304 TBOX prev_box; // of super blob
1305 int32_t prev_right; // of word sync
1306 int scale_factor; // on scores for big words
1307 int32_t sp_count; // spaces
1308 FPSEGPT_LIST seg_list; // char cells
1309 FPSEGPT_IT seg_it; // iterator
1310 int16_t segpos; // position of segment
1311 int16_t cellpos; // previous cell boundary
1312 // iterator
1313 ICOORDELT_IT cell_it = row_cells;
1314 ICOORDELT *cell; // new cell
1315 double sqsum; // sum of squares
1316 double spsum; // of spaces
1317 double sp_var; // space error
1318 double word_sync; // result for word
1319 int32_t total_count; // total blobs
1320
1321 if ((pitsync_linear_version & 3) > 1) {
1322 word_sync = compute_pitch_sd2(row, projection, projection_left, projection_right, initial_pitch,
1323 occupation, mid_cuts, row_cells, testing_on, start, end);
1324 sp_sd = occupation;
1325 return word_sync;
1326 }
1327 mid_cuts = 0;
1328 cellpos = 0;
1329 total_count = 0;
1330 sqsum = 0;
1331 sp_count = 0;
1332 spsum = 0;
1333 prev_right = -1;
1334 if (blob_it.empty()) {
1335 return space_size * 10;
1336 }
1337 #ifndef GRAPHICS_DISABLED
1338 if (testing_on && to_win != nullptr) {
1339 blob_box = blob_it.data()->bounding_box();
1340 projection->plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
1341 }
1342 #endif
1343 start_it = blob_it;
1344 blob_count = 0;
1345 blob_box = box_next(&blob_it); // first blob
1346 blob_it.mark_cycle_pt();
1347 do {
1348 for (; blob_count > 0; blob_count--) {
1349 box_next(&start_it);
1350 }
1351 do {
1352 prev_box = blob_box;
1353 blob_count++;
1354 blob_box = box_next(&blob_it);
1355 } while (!blob_it.cycled_list() && blob_box.left() - prev_box.right() < space_size);
1356 plot_it = start_it;
1357 if (pitsync_linear_version & 3) {
1358 word_sync = check_pitch_sync2(&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1359 projection, projection_left, projection_right,
1360 row->xheight * textord_projection_scale, occupation, &seg_list,
1361 start, end);
1362 } else {
1363 word_sync = check_pitch_sync(&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1364 projection, &seg_list);
1365 }
1366 if (testing_on) {
1367 tprintf("Word ending at (%d,%d), len=%d, sync rating=%g, ", prev_box.right(), prev_box.top(),
1368 seg_list.length() - 1, word_sync);
1369 seg_it.set_to_list(&seg_list);
1370 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1371 if (seg_it.data()->faked) {
1372 tprintf("(F)");
1373 }
1374 tprintf("%d, ", seg_it.data()->position());
1375 // tprintf("C=%g, s=%g, sq=%g\n",
1376 // seg_it.data()->cost_function(),
1377 // seg_it.data()->sum(),
1378 // seg_it.data()->squares());
1379 }
1380 tprintf("\n");
1381 }
1382 #ifndef GRAPHICS_DISABLED
1383 if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) {
1384 plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1385 }
1386 #endif
1387 seg_it.set_to_list(&seg_list);
1388 if (prev_right >= 0) {
1389 sp_var = seg_it.data()->position() - prev_right;
1390 sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch;
1391 sp_var *= sp_var;
1392 spsum += sp_var;
1393 sp_count++;
1394 }
1395 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1396 segpos = seg_it.data()->position();
1397 if (cell_it.empty() || segpos > cellpos + initial_pitch / 2) {
1398 // big gap
1399 while (!cell_it.empty() && segpos > cellpos + initial_pitch * 3 / 2) {
1400 cell = new ICOORDELT(cellpos + static_cast<int16_t>(initial_pitch), 0);
1401 cell_it.add_after_then_move(cell);
1402 cellpos += static_cast<int16_t>(initial_pitch);
1403 }
1404 // make new one
1405 cell = new ICOORDELT(segpos, 0);
1406 cell_it.add_after_then_move(cell);
1407 cellpos = segpos;
1408 } else if (segpos > cellpos - initial_pitch / 2) {
1409 cell = cell_it.data();
1410 // average positions
1411 cell->set_x((cellpos + segpos) / 2);
1412 cellpos = cell->x();
1413 }
1414 }
1415 seg_it.move_to_last();
1416 prev_right = seg_it.data()->position();
1417 if (textord_pitch_scalebigwords) {
1418 scale_factor = (seg_list.length() - 2) / 2;
1419 if (scale_factor < 1) {
1420 scale_factor = 1;
1421 }
1422 } else {
1423 scale_factor = 1;
1424 }
1425 sqsum += word_sync * scale_factor;
1426 total_count += (seg_list.length() - 1) * scale_factor;
1427 seg_list.clear();
1428 } while (!blob_it.cycled_list());
1429 sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0;
1430 return total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10;
1431 }
1432
1433 /**********************************************************************
1434 * compute_pitch_sd2
1435 *
1436 * Use a dp algorithm to fit the character cells and return the sd of
1437 * the cell size over the row.
1438 **********************************************************************/
1439
1440 float compute_pitch_sd2( // find fp cells
1441 TO_ROW *row, // row to do
1442 STATS *projection, // vertical projection
1443 int16_t projection_left, // edge
1444 int16_t projection_right, // edge
1445 float initial_pitch, // guess at pitch
1446 int16_t &occupation, // no of occupied cells
1447 int16_t &mid_cuts, // no of free cuts
1448 ICOORDELT_LIST *row_cells, // list of chop pts
1449 bool testing_on, // individual words
1450 int16_t start, // start of good range
1451 int16_t end // end of good range
1452 ) {
1453 // blobs
1454 BLOBNBOX_IT blob_it = row->blob_list();
1455 BLOBNBOX_IT plot_it;
1456 int16_t blob_count; // no of blobs
1457 TBOX blob_box; // bounding box
1458 FPSEGPT_LIST seg_list; // char cells
1459 FPSEGPT_IT seg_it; // iterator
1460 int16_t segpos; // position of segment
1461 // iterator
1462 ICOORDELT_IT cell_it = row_cells;
1463 ICOORDELT *cell; // new cell
1464 double word_sync; // result for word
1465
1466 mid_cuts = 0;
1467 if (blob_it.empty()) {
1468 occupation = 0;
1469 return initial_pitch * 10;
1470 }
1471 #ifndef GRAPHICS_DISABLED
1472 if (testing_on && to_win != nullptr) {
1473 projection->plot(to_win, projection_left, row->intercept(), 1.0f, -1.0f, ScrollView::CORAL);
1474 }
1475 #endif
1476 blob_count = 0;
1477 blob_it.mark_cycle_pt();
1478 do {
1479 // first blob
1480 blob_box = box_next(&blob_it);
1481 blob_count++;
1482 } while (!blob_it.cycled_list());
1483 plot_it = blob_it;
1484 word_sync = check_pitch_sync2(
1485 &blob_it, blob_count, static_cast<int16_t>(initial_pitch), 2, projection, projection_left,
1486 projection_right, row->xheight * textord_projection_scale, occupation, &seg_list, start, end);
1487 if (testing_on) {
1488 tprintf("Row ending at (%d,%d), len=%d, sync rating=%g, ", blob_box.right(), blob_box.top(),
1489 seg_list.length() - 1, word_sync);
1490 seg_it.set_to_list(&seg_list);
1491 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1492 if (seg_it.data()->faked) {
1493 tprintf("(F)");
1494 }
1495 tprintf("%d, ", seg_it.data()->position());
1496 // tprintf("C=%g, s=%g, sq=%g\n",
1497 // seg_it.data()->cost_function(),
1498 // seg_it.data()->sum(),
1499 // seg_it.data()->squares());
1500 }
1501 tprintf("\n");
1502 }
1503 #ifndef GRAPHICS_DISABLED
1504 if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) {
1505 plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1506 }
1507 #endif
1508 seg_it.set_to_list(&seg_list);
1509 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1510 segpos = seg_it.data()->position();
1511 // make new one
1512 cell = new ICOORDELT(segpos, 0);
1513 cell_it.add_after_then_move(cell);
1514 if (seg_it.at_last()) {
1515 mid_cuts = seg_it.data()->cheap_cuts();
1516 }
1517 }
1518 seg_list.clear();
1519 return occupation > 0 ? sqrt(word_sync / occupation) : initial_pitch * 10;
1520 }
1521
1522 /**********************************************************************
1523 * print_pitch_sd
1524 *
1525 * Use a dp algorithm to fit the character cells and return the sd of
1526 * the cell size over the row.
1527 **********************************************************************/
1528
1529 void print_pitch_sd( // find fp cells
1530 TO_ROW *row, // row to do
1531 STATS *projection, // vertical projection
1532 int16_t projection_left, // edges //size of blank
1533 int16_t projection_right, float space_size,
1534 float initial_pitch // guess at pitch
1535 ) {
1536 const char *res2; // pitch result
1537 int16_t occupation; // used cells
1538 float sp_sd; // space sd
1539 // blobs
1540 BLOBNBOX_IT blob_it = row->blob_list();
1541 BLOBNBOX_IT start_it; // start of word
1542 BLOBNBOX_IT row_start; // start of row
1543 int16_t blob_count; // no of blobs
1544 int16_t total_blob_count; // total blobs in line
1545 TBOX blob_box; // bounding box
1546 TBOX prev_box; // of super blob
1547 int32_t prev_right; // of word sync
1548 int scale_factor; // on scores for big words
1549 int32_t sp_count; // spaces
1550 FPSEGPT_LIST seg_list; // char cells
1551 FPSEGPT_IT seg_it; // iterator
1552 double sqsum; // sum of squares
1553 double spsum; // of spaces
1554 double sp_var; // space error
1555 double word_sync; // result for word
1556 double total_count; // total cuts
1557
1558 if (blob_it.empty()) {
1559 return;
1560 }
1561 row_start = blob_it;
1562 total_blob_count = 0;
1563
1564 total_count = 0;
1565 sqsum = 0;
1566 sp_count = 0;
1567 spsum = 0;
1568 prev_right = -1;
1569 blob_it = row_start;
1570 start_it = blob_it;
1571 blob_count = 0;
1572 blob_box = box_next(&blob_it); // first blob
1573 blob_it.mark_cycle_pt();
1574 do {
1575 for (; blob_count > 0; blob_count--) {
1576 box_next(&start_it);
1577 }
1578 do {
1579 prev_box = blob_box;
1580 blob_count++;
1581 blob_box = box_next(&blob_it);
1582 } while (!blob_it.cycled_list() && blob_box.left() - prev_box.right() < space_size);
1583 word_sync = check_pitch_sync2(
1584 &start_it, blob_count, static_cast<int16_t>(initial_pitch), 2, projection, projection_left,
1585 projection_right, row->xheight * textord_projection_scale, occupation, &seg_list, 0, 0);
1586 total_blob_count += blob_count;
1587 seg_it.set_to_list(&seg_list);
1588 if (prev_right >= 0) {
1589 sp_var = seg_it.data()->position() - prev_right;
1590 sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch;
1591 sp_var *= sp_var;
1592 spsum += sp_var;
1593 sp_count++;
1594 }
1595 seg_it.move_to_last();
1596 prev_right = seg_it.data()->position();
1597 if (textord_pitch_scalebigwords) {
1598 scale_factor = (seg_list.length() - 2) / 2;
1599 if (scale_factor < 1) {
1600 scale_factor = 1;
1601 }
1602 } else {
1603 scale_factor = 1;
1604 }
1605 sqsum += word_sync * scale_factor;
1606 total_count += (seg_list.length() - 1) * scale_factor;
1607 seg_list.clear();
1608 } while (!blob_it.cycled_list());
1609 sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0;
1610 word_sync = total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10;
1611 tprintf("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", word_sync, word_sync / initial_pitch, sp_sd,
1612 word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P');
1613
1614 start_it = row_start;
1615 blob_it = row_start;
1616 word_sync =
1617 check_pitch_sync2(&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
1618 projection, projection_left, projection_right,
1619 row->xheight * textord_projection_scale, occupation, &seg_list, 0, 0);
1620 if (occupation > 1) {
1621 word_sync /= occupation;
1622 }
1623 word_sync = sqrt(word_sync);
1624
1625 #ifndef GRAPHICS_DISABLED
1626 if (textord_show_row_cuts && to_win != nullptr) {
1627 plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
1628 }
1629 #endif
1630 seg_list.clear();
1631 if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
1632 if (word_sync < textord_words_def_fixed * initial_pitch && !row->all_caps) {
1633 res2 = "DF";
1634 } else {
1635 res2 = "MF";
1636 }
1637 } else {
1638 res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
1639 }
1640 tprintf(
1641 "row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, "
1642 "all_caps=%d\n",
1643 word_sync, word_sync / initial_pitch,
1644 word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P', occupation, res2,
1645 initial_pitch, row->fixed_pitch, row->all_caps);
1646 }
1647
1648 /**********************************************************************
1649 * find_repeated_chars
1650 *
1651 * Extract marked leader blobs and put them
1652 * into words in advance of fixed pitch checking and word generation.
1653 **********************************************************************/
1654 void find_repeated_chars(TO_BLOCK *block, // Block to search.
1655 bool testing_on) { // Debug mode.
1656 POLY_BLOCK *pb = block->block->pdblk.poly_block();
1657 if (pb != nullptr && !pb->IsText()) {
1658 return; // Don't find repeated chars in non-text blocks.
1659 }
1660
1661 TO_ROW *row;
1662 BLOBNBOX_IT box_it;
1663 BLOBNBOX_IT search_it; // forward search
1664 WERD *word; // new word
1665 TBOX word_box; // for plotting
1666 int blobcount, repeated_set;
1667
1668 TO_ROW_IT row_it = block->get_rows();
1669 if (row_it.empty()) {
1670 return; // empty block
1671 }
1672 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1673 row = row_it.data();
1674 box_it.set_to_list(row->blob_list());
1675 if (box_it.empty()) {
1676 continue; // no blobs in this row
1677 }
1678 if (!row->rep_chars_marked()) {
1679 mark_repeated_chars(row);
1680 }
1681 if (row->num_repeated_sets() == 0) {
1682 continue; // nothing to do for this row
1683 }
1684 // new words
1685 WERD_IT word_it(&row->rep_words);
1686 do {
1687 if (box_it.data()->repeated_set() != 0 && !box_it.data()->joined_to_prev()) {
1688 blobcount = 1;
1689 repeated_set = box_it.data()->repeated_set();
1690 search_it = box_it;
1691 search_it.forward();
1692 while (!search_it.at_first() && search_it.data()->repeated_set() == repeated_set) {
1693 blobcount++;
1694 search_it.forward();
1695 }
1696 // After the call to make_real_word() all the blobs from this
1697 // repeated set will be removed from the blob list. box_it will be
1698 // set to point to the blob after the end of the extracted sequence.
1699 word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
1700 if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1701 tprintf("Bad box joined to prev at");
1702 box_it.data()->bounding_box().print();
1703 tprintf("After repeated word:");
1704 word->bounding_box().print();
1705 }
1706 ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1707 word->set_flag(W_REP_CHAR, true);
1708 word->set_flag(W_DONT_CHOP, true);
1709 word_it.add_after_then_move(word);
1710 } else {
1711 box_it.forward();
1712 }
1713 } while (!box_it.at_first());
1714 }
1715 }
1716
1717 /**********************************************************************
1718 * plot_fp_word
1719 *
1720 * Plot a block of words as if fixed pitch.
1721 **********************************************************************/
1722
1723 #ifndef GRAPHICS_DISABLED
1724 void plot_fp_word( // draw block of words
1725 TO_BLOCK *block, // block to draw
1726 float pitch, // pitch to draw with
1727 float nonspace // for space threshold
1728 ) {
1729 TO_ROW *row; // current row
1730 TO_ROW_IT row_it = block->get_rows();
1731
1732 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1733 row = row_it.data();
1734 row->min_space = static_cast<int32_t>((pitch + nonspace) / 2);
1735 row->max_nonspace = row->min_space;
1736 row->space_threshold = row->min_space;
1737 plot_word_decisions(to_win, static_cast<int16_t>(pitch), row);
1738 }
1739 }
1740 #endif
1741
1742 } // namespace tesseract