comparison mupdf-source/thirdparty/tesseract/src/textord/oldbasel.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /**********************************************************************
2 * File: oldbasel.h (Formerly oldbl.h)
3 * Description: A re-implementation of the old baseline algorithm.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1993, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19 #ifndef OLDBASEL_H
20 #define OLDBASEL_H
21
22 #include "blobbox.h"
23 #include "params.h"
24
25 namespace tesseract {
26
27 extern BOOL_VAR_H(textord_oldbl_debug);
28
29 int get_blob_coords( // get boxes
30 TO_ROW *row, // row to use
31 int32_t lineheight, // block level
32 TBOX *blobcoords, // output boxes
33 bool &holed_line, // lost a lot of blobs
34 int &outcount // no of real blobs
35 );
36 void make_first_baseline( // initial approximation
37 TBOX blobcoords[], /*blob bounding boxes */
38 int blobcount, /*no of blobcoords */
39 int xcoords[], /*coords for spline */
40 int ycoords[], /*approximator */
41 QSPLINE *spline, /*initial spline */
42 QSPLINE *baseline, /*output spline */
43 float jumplimit /*guess half descenders */
44 );
45 void make_holed_baseline( // initial approximation
46 TBOX blobcoords[], /*blob bounding boxes */
47 int blobcount, /*no of blobcoords */
48 QSPLINE *spline, /*initial spline */
49 QSPLINE *baseline, /*output spline */
50 float gradient // of line
51 );
52 int partition_line( // partition blobs
53 TBOX blobcoords[], // bounding boxes
54 int blobcount, /*no of blobs on row */
55 int *numparts, /*number of partitions */
56 char partids[], /*partition no of each blob */
57 int partsizes[], /*no in each partition */
58 QSPLINE *spline, /*curve to fit to */
59 float jumplimit, /*allowed delta change */
60 float ydiffs[] /*diff from spline */
61 );
62 void merge_oldbl_parts( // partition blobs
63 TBOX blobcoords[], // bounding boxes
64 int blobcount, /*no of blobs on row */
65 char partids[], /*partition no of each blob */
66 int partsizes[], /*no in each partition */
67 int biggestpart, // major partition
68 float jumplimit /*allowed delta change */
69 );
70 int get_ydiffs( // evaluate differences
71 TBOX blobcoords[], // bounding boxes
72 int blobcount, /*no of blobs */
73 QSPLINE *spline, /*approximating spline */
74 float ydiffs[] /*output */
75 );
76 int choose_partition( // select partition
77 float diff, /*diff from spline */
78 float partdiffs[], /*diff on all parts */
79 int lastpart, /*last assigned partition */
80 float jumplimit, /*new part threshold */
81 float *drift, float *last_delta, int *partcount /*no of partitions */
82 );
83 int partition_coords( // find relevant coords
84 TBOX blobcoords[], // bounding boxes
85 int blobcount, /*no of blobs in row */
86 char partids[], /*partition no of each blob */
87 int bestpart, /*best new partition */
88 int xcoords[], /*points to work on */
89 int ycoords[] /*points to work on */
90 );
91 int segment_spline( // make xstarts
92 TBOX blobcoords[], // boundign boxes
93 int blobcount, /*no of blobs in row */
94 int xcoords[], /*points to work on */
95 int ycoords[], /*points to work on */
96 int degree, int pointcount, /*no of points */
97 int xstarts[] // result
98 );
99 bool split_stepped_spline( // make xstarts
100 QSPLINE *baseline, // current shot
101 float jumplimit, // max step function
102 int *xcoords, /*points to work on */
103 int *xstarts, // result
104 int &segments // no of segments
105 );
106 void insert_spline_point( // get descenders
107 int xstarts[], // starts to shuffle
108 int segment, // insertion pt
109 int coord1, // coords to add
110 int coord2, int &segments // total segments
111 );
112 void find_lesser_parts( // get descenders
113 TO_ROW *row, // row to process
114 TBOX blobcoords[], // bounding boxes
115 int blobcount, /*no of blobs */
116 char partids[], /*partition of each blob */
117 int partsizes[], /*size of each part */
118 int partcount, /*no of partitions */
119 int bestpart /*biggest partition */
120 );
121
122 void old_first_xheight( // the wiseowl way
123 TO_ROW *row, /*current row */
124 TBOX blobcoords[], /*blob bounding boxes */
125 int initialheight, // initial guess
126 int blobcount, /*blobs in blobcoords */
127 QSPLINE *baseline, /*established */
128 float jumplimit /*min ascender height */
129 );
130
131 void make_first_xheight( // find xheight
132 TO_ROW *row, /*current row */
133 TBOX blobcoords[], /*blob bounding boxes */
134 int lineheight, // initial guess
135 int init_lineheight, // block level guess
136 int blobcount, /*blobs in blobcoords */
137 QSPLINE *baseline, /*established */
138 float jumplimit /*min ascender height */
139 );
140
141 int *make_height_array( // get array of heights
142 TBOX blobcoords[], /*blob bounding boxes */
143 int blobcount, /*blobs in blobcoords */
144 QSPLINE *baseline /*established */
145 );
146
147 void find_top_modes( // get modes
148 STATS *stats, // stats to hack
149 int statnum, // no of piles
150 int modelist[], int modenum // no of modes to get
151 );
152
153 void pick_x_height(TO_ROW *row, // row to do
154 int modelist[], int lefts[], int rights[], STATS *heightstat,
155 int mode_threshold);
156
157 } // namespace tesseract
158
159 #endif