comparison mupdf-source/thirdparty/leptonica/src/dewarp1.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file dewarp1.c
29 * <pre>
30 *
31 * Basic operations and serialization
32 *
33 * Create/destroy dewarp
34 * L_DEWARP *dewarpCreate()
35 * L_DEWARP *dewarpCreateRef()
36 * void dewarpDestroy()
37 *
38 * Create/destroy dewarpa
39 * L_DEWARPA *dewarpaCreate()
40 * L_DEWARPA *dewarpaCreateFromPixacomp()
41 * void dewarpaDestroy()
42 * l_int32 dewarpaDestroyDewarp()
43 *
44 * Dewarpa insertion/extraction
45 * l_int32 dewarpaInsertDewarp()
46 * static l_int32 dewarpaExtendArraysToSize()
47 * L_DEWARP *dewarpaGetDewarp()
48 *
49 * Setting parameters to control rendering from the model
50 * l_int32 dewarpaSetCurvatures()
51 * l_int32 dewarpaUseBothArrays()
52 * l_int32 dewarpaSetCheckColumns()
53 * l_int32 dewarpaSetMaxDistance()
54 *
55 * Dewarp serialized I/O
56 * L_DEWARP *dewarpRead()
57 * L_DEWARP *dewarpReadStream()
58 * L_DEWARP *dewarpReadMem()
59 * l_int32 dewarpWrite()
60 * l_int32 dewarpWriteStream()
61 * l_int32 dewarpWriteMem()
62 *
63 * Dewarpa serialized I/O
64 * L_DEWARPA *dewarpaRead()
65 * L_DEWARPA *dewarpaReadStream()
66 * L_DEWARPA *dewarpaReadMem()
67 * l_int32 dewarpaWrite()
68 * l_int32 dewarpaWriteStream()
69 * l_int32 dewarpaWriteMem()
70 *
71 *
72 * Examples of usage
73 * =================
74 *
75 * See dewarpaCreateFromPixacomp() for an example of the basic
76 * operations, starting from a set of 1 bpp images.
77 *
78 * Basic functioning to dewarp a specific single page:
79 * \code
80 * // Make the Dewarpa for the pages
81 * L_Dewarpa *dewa = dewarpaCreate(1, 30, 1, 15, 50);
82 * dewarpaSetCurvatures(dewa, -1, 50, -1, -1, -1, -1);
83 * dewarpaUseBothArrays(dewa, 1); // try to use both disparity
84 * // arrays for this example
85 *
86 * // Do the page: start with a binarized image
87 * Pix *pixb = "binarize"(pixs);
88 * // Initialize a Dewarp for this page (say, page 214)
89 * L_Dewarp *dew = dewarpCreate(pixb, 214);
90 * // Insert in Dewarpa and obtain parameters for building the model
91 * dewarpaInsertDewarp(dewa, dew);
92 * // Do the work
93 * dewarpBuildPageModel(dew, NULL); // no debugging
94 * // Optionally set rendering parameters
95 * // Apply model to the input pixs
96 * Pix *pixd;
97 * dewarpaApplyDisparity(dewa, 214, pixs, 255, 0, 0, &pixd, NULL);
98 * pixDestroy(&pixb);
99 * \endcode
100 *
101 * Basic functioning to dewarp many pages:
102 * \code
103 * // Make the Dewarpa for the set of pages; use fullres 1 bpp
104 * L_Dewarpa *dewa = dewarpaCreate(10, 30, 1, 15, 50);
105 * // Optionally set rendering parameters
106 * dewarpaSetCurvatures(dewa, -1, 30, -1, -1, -1, -1);
107 * dewarpaUseBothArrays(dewa, 0); // just use the vertical disparity
108 * // array for this example
109 *
110 * // Do first page: start with a binarized image
111 * Pix *pixb = "binarize"(pixs);
112 * // Initialize a Dewarp for this page (say, page 1)
113 * L_Dewarp *dew = dewarpCreate(pixb, 1);
114 * // Insert in Dewarpa and obtain parameters for building the model
115 * dewarpaInsertDewarp(dewa, dew);
116 * // Do the work
117 * dewarpBuildPageModel(dew, NULL); // no debugging
118 * dewarpMinimze(dew); // remove most heap storage
119 * pixDestroy(&pixb);
120 *
121 * // Do the other pages the same way
122 * ...
123 *
124 * // Apply models to each page; if the page model is invalid,
125 * // try to use a valid neighboring model. Note that the call
126 * // to dewarpaInsertRefModels() is optional, because it is called
127 * // by dewarpaApplyDisparity() on the first page it acts on.
128 * dewarpaInsertRefModels(dewa, 0, 1); // use debug flag to get more
129 * // detailed information about the page models
130 * [For each page, where pixs is the fullres image to be dewarped] {
131 * L_Dewarp *dew = dewarpaGetDewarp(dewa, pageno);
132 * if (dew) { // disparity model exists
133 * Pix *pixd;
134 * dewarpaApplyDisparity(dewa, pageno, pixs, 255,
135 * 0, 0, &pixd, NULL);
136 * dewarpMinimize(dew); // clean out the pix and fpix arrays
137 * // Squirrel pixd away somewhere ...)
138 * }
139 * }
140 * \endcode
141 *
142 * Basic functioning to dewarp a small set of pages, potentially
143 * using models from nearby pages:
144 * \code
145 * // (1) Generate a set of binarized images in the vicinity of the
146 * // pages to be dewarped. We will attempt to compute models
147 * // for pages from 'firstpage' to 'lastpage'.
148 * // Store the binarized images in a compressed array of
149 * // size 'n', where 'n' is the number of images to be stored,
150 * // and where the offset is the first page.
151 * PixaComp *pixac = pixacompCreateInitialized(n, firstpage, NULL,
152 * IFF_TIFF_G4);
153 * for (i = firstpage; i <= lastpage; i++) {
154 * Pix *pixb = "binarize"(pixs);
155 * pixacompReplacePix(pixac, i, pixb, IFF_TIFF_G4);
156 * pixDestroy(&pixb);
157 * }
158 *
159 * // (2) Make the Dewarpa for the pages.
160 * L_Dewarpa *dewa =
161 * dewarpaCreateFromPixacomp(pixac, 30, 15, 20);
162 * dewarpaUseBothArrays(dewa, 1); // try to use both disparity arrays
163 * // in this example
164 *
165 * // (3) Finally, apply the models. For page 'firstpage' with image pixs:
166 * L_Dewarp *dew = dewarpaGetDewarp(dewa, firstpage);
167 * if (dew) { // disparity model exists
168 * Pix *pixd;
169 * dewarpaApplyDisparity(dewa, firstpage, pixs, 255, 0, 0, &pixd, NULL);
170 * dewarpMinimize(dew);
171 * }
172 * \endcode
173 *
174 * Because in general some pages will not have enough text to build a
175 * model, we fill in for those pages with a reference to the page
176 * model to use. Both the target page and the reference page must
177 * have the same parity. We can also choose to use either a partial model
178 * (with only vertical disparity) or the full model of a nearby page.
179 *
180 * Minimizing the data in a model by stripping out images,
181 * numas, and full resolution disparity arrays:
182 * dewarpMinimize(dew);
183 * This can be done at any time to save memory. Serialization does
184 * not use the data that is stripped.
185 *
186 * You can apply any model (in a dew), stripped or not, to another image:
187 * \code
188 * // For all pages with invalid models, assign the nearest valid
189 * // page model with same parity.
190 * dewarpaInsertRefModels(dewa, 0, 0);
191 * // You can then apply to 'newpix' the page model that was assigned
192 * // to 'pageno', giving the result in pixd:
193 * Pix *pixd;
194 * dewarpaApplyDisparity(dewa, pageno, newpix, 255, 0, 0, &pixd, NULL);
195 * \endcode
196 *
197 * You can apply the disparity arrays to a deliberately undercropped
198 * image. Suppose that you undercrop by (left, right, top, bot), so
199 * that the disparity arrays are aligned with their origin at (left, top).
200 * Dewarp the undercropped image with:
201 * \code
202 * Pix *pixd;
203 * dewarpaApplyDisparity(dewa, pageno, undercropped_pix, 255,
204 * left, top, &pixd, NULL);
205 * \endcode
206 *
207 * Description of the approach to analyzing page image distortion
208 * ==============================================================
209 *
210 * When a book page is scanned, there are several possible causes
211 * for the text lines to appear to be curved:
212 * (1) A barrel (fish-eye) effect because the camera is at
213 * a finite distance from the page. Take the normal from
214 * the camera to the page (the 'optic axis'). Lines on
215 * the page "below" this point will appear to curve upward
216 * (negative curvature); lines "above" this will curve downward.
217 * (2) Radial distortion from the camera lens. Probably not
218 * a big factor.
219 * (3) Local curvature of the page in to (or out of) the image
220 * plane (which is perpendicular to the optic axis).
221 * This has no effect if the page is flat.
222 *
223 * In the following, the optic axis is in the z direction and is
224 * perpendicular to the xy plane;, the book is assumed to be aligned
225 * so that y is approximately along the binding.
226 * The goal is to compute the "disparity" field, D(x,y), which
227 * is actually a vector composed of the horizontal and vertical
228 * disparity fields H(x,y) and V(x,y). Each of these is a local
229 * function that gives the amount each point in the image is
230 * required to move in order to rectify the horizontal and vertical
231 * lines. It would also be nice to "flatten" the page to compensate
232 * for effect (3), foreshortening due to bending of the page into
233 * the z direction, but that is more difficult.
234 *
235 * Effects (1) and (2) can be directly compensated by calibrating
236 * the scene, using a flat page with horizontal and vertical lines.
237 * Then H(x,y) and V(x,y) can be found as two (non-parametric) arrays
238 * of values. Suppose this has been done. Then the remaining
239 * distortion is due to (3).
240 *
241 * We consider the simple situation where the page bending is independent
242 * of y, and is described by alpha(x), where alpha is the angle between
243 * the normal to the page and the optic axis. cos(alpha(x)) is the local
244 * compression factor of the page image in the horizontal direction, at x.
245 * Thus, if we know alpha(x), we can compute the disparity H(x) required
246 * to flatten the image by simply integrating 1/cos(alpha), and we could
247 * compute the remaining disparities, H(x,y) and V(x,y), from the
248 * page content, as described below. Unfortunately, we don't know
249 * alpha. What do we know? If there are horizontal text lines
250 * on the page, we can compute the vertical disparity, V(x,y), which
251 * is the local translation required to make the text lines parallel
252 * to the rasters. If the margins are left and right aligned, we can
253 * also estimate the horizontal disparity, H(x,y), required to have
254 * uniform margins. All that can be done from the image alone,
255 * assuming we have text lines covering a sufficient part of the page.
256 *
257 * What about alpha(x)? The basic question relating to (3) is this:
258 *
259 * Is it possible, using the shape of the text lines alone,
260 * to compute both the vertical and horizontal disparity fields?
261 *
262 * The underlying problem is to separate the line curvature effects due
263 * to the camera view from those due to actual bending of the page.
264 * I believe the proper way to do this is to make some measurements
265 * based on the camera setup, which will depend mostly on the distance
266 * of the camera from the page, and to a smaller extent on the location
267 * of the optic axis with respect to the page.
268 *
269 * Here is the procedure. Photograph a page with a fine 2D line grid
270 * several times, each with a different slope near the binding.
271 * This can be done by placing the grid page on books that have
272 * different shapes z(x) near the binding. For each one you can
273 * measure, near the binding:
274 * (1) ds/dy, the vertical rate of change of slope of the horizontal lines
275 * (2) the local horizontal compression of the vertical lines due
276 * to the page angle dz/dx.
277 * As mentioned above, the local horizontal compression is simply
278 * cos(dz/dx). But the measurement you can make on an actual book
279 * page is (1). The difficulty is to generate (2) from (1).
280 *
281 * Back to the procedure. The function in (1), ds/dy, likely needs
282 * to be measured at a few y locations, because the relation
283 * between (1) and (2) may weakly depend on the y-location with
284 * respect to the y-coordinate of the optic axis of the camera.
285 * From these measurements you can determine, for the camera setup
286 * that you have, the local horizontal compression, cos(dz/dx), as a
287 * function of the both vertical location (y) and your measured vertical
288 * derivative of the text line slope there, ds/dy. Then with
289 * appropriate smoothing of your measured values, you can set up a
290 * horizontal disparity array to correct for the compression due
291 * to dz/dx.
292 *
293 * Now consider V(x,0) and V(x,h), the vertical disparity along
294 * the top and bottom of the image. With a little thought you
295 * can convince yourself that the local foreshortening,
296 * as a function of x, is proportional to the difference
297 * between the slope of V(x,0) and V(x,h). The horizontal
298 * disparity can then be computed by integrating the local foreshortening
299 * over x. Integration of the slope of V(x,0) and V(x,h) gives
300 * the vertical disparity itself. We have to normalize to h, the
301 * height of the page. So the very simple result is that
302 *
303 * H(x) ~ (V(x,0) - V(x,h)) / h [1]
304 *
305 * which is easily computed. There is a proportionality constant
306 * that depends on the ratio of h to the distance to the camera.
307 * Can we actually believe this for the case where the bending
308 * is independent of y? I believe the answer is yes,
309 * as long as you first remove the apparent distortion due
310 * to the camera being at a finite distance.
311 *
312 * If you know the intersection of the optical axis with the page
313 * and the distance to the camera, and if the page is perpendicular
314 * to the optic axis, you can compute the horizontal and vertical
315 * disparities due to (1) and (2) and remove them. The resulting
316 * distortion should be entirely due to bending (3), for which
317 * the relation
318 *
319 * Hx(x) dx = C * ((Vx(x,0) - Vx(x, h))/h) dx [2]
320 *
321 * holds for each point in x (Hx and Vx are partial derivatives w/rt x).
322 * Integrating over x, and using H(0) = 0, we get the result [1].
323 *
324 * I believe this result holds differentially for each value of y, so
325 * that in the case where the bending is not independent of y,
326 * the expression (V(x,0) - V(x,h)) / h goes over to Vy(x,y). Then
327 *
328 * H(x,y) = Integral(0,x) (Vyx(x,y) dx) [3]
329 *
330 * where Vyx() is the partial derivative of V w/rt both x and y.
331 *
332 * It would be nice if there were a simple mathematical relation between
333 * the horizontal and vertical disparities for the situation
334 * where the paper bends without stretching or kinking.
335 * I had hoped to get a relation between H and V, such as
336 * Hx(x,y) ~ Vy(x,y), which would imply that H and V are real
337 * and imaginary parts of a complex potential, each of which
338 * satisfy the laplace equation. But then the gradients of the
339 * two potentials would be normal, and that does not appear to be the case.
340 * Thus, the questions of proving the relations above (for small bending),
341 * or finding a simpler relation between H and V than those equations,
342 * remain open. So far, we have only used [1] for the horizontal
343 * disparity H(x).
344 *
345 * In the version of the code that follows, we first use text lines
346 * to find V(x,y). Then, we try to compute H(x,y) that will align
347 * the text vertically on the left and right margins. This is not
348 * always possible -- sometimes the right margin is not right justified.
349 * By default, we don't require the horizontal disparity to have a
350 * valid page model for dewarping a page, but this requirement can
351 * be forced using dewarpaUseFullModel().
352 *
353 * As described above, one can add a y-independent component of
354 * the horizontal disparity H(x) to counter the foreshortening
355 * effect due to the bending of the page near the binding.
356 * This requires widening the image on the side near the binding,
357 * and we do not provide this option here. However, we do provide
358 * a function that will generate this disparity field:
359 * fpixExtraHorizDisparity()
360 *
361 * Here is the basic outline for building the disparity arrays.
362 *
363 * (1) Find lines going approximately through the center of the
364 * text in each text line. Accept only lines that are
365 * close in length to the longest line.
366 * (2) Use these lines to generate a regular and highly subsampled
367 * vertical disparity field V(x,y).
368 * (3) Interpolate this to generate a full resolution vertical
369 * disparity field.
370 * (4) For lines that are sufficiently long, assume they are approximately
371 * left and right-justified, and construct a highly subsampled
372 * horizontal disparity field H(x,y) that will bring them into alignment.
373 * (5) Interpolate this to generate a full resolution horizontal
374 * disparity field.
375 * (6) Apply the vertical dewarping, followed by the horizontal dewarping.
376 *
377 * Step (1) is clearly described by the code in pixGetTextlineCenters().
378 *
379 * Steps (2) and (3) follow directly from the data in step (1),
380 * and constitute the bulk of the work done in dewarpBuildPageModel().
381 * Virtually all the noise in the data is smoothed out by doing
382 * least-square quadratic fits, first horizontally to the data
383 * points representing the text line centers, and then vertically.
384 * The trick is to sample these lines on a regular grid.
385 * First each horizontal line is sampled at equally spaced
386 * intervals horizontally. We thus get a set of points,
387 * one in each line, that are vertically aligned, and
388 * the data we represent is the vertical distance of each point
389 * from the min or max value on the curve, depending on the
390 * sign of the curvature component. Each of these vertically
391 * aligned sets of points constitutes a sampled vertical disparity,
392 * and we do a LS quartic fit to each of them, followed by
393 * vertical sampling at regular intervals. We now have a subsampled
394 * grid of points, all equally spaced, giving at each point the local
395 * vertical disparity. Finally, the full resolution vertical disparity
396 * is formed by interpolation. All the least square fits do a
397 * great job of smoothing everything out, as can be observed by
398 * the contour maps that are generated for the vertical disparity field.
399 *
400 * Steps (4) through (6) again use the line data in step (1).
401 * By default, we do separate quadratic fits to the left and right
402 * line edges. There is also the option to do linear fits to the
403 * line edges, which typically does not give as good a fit, but is
404 * safer for some pages that have text in the margins, or have multiple
405 * columns of text with a large space between the columns. There is
406 * an option, which is the default, to check for multiple columns and
407 * if found to skip dewarping based on the line edges -- we compute but
408 * do not use the horizontal disparity array.
409 * </pre>
410 */
411
412 #ifdef HAVE_CONFIG_H
413 #include <config_auto.h>
414 #endif /* HAVE_CONFIG_H */
415
416 #include <math.h>
417 #include "allheaders.h"
418
419 static l_int32 dewarpaExtendArraysToSize(L_DEWARPA *dewa, l_int32 size);
420
421 /* Parameter values used in dewarpaCreate() */
422 static const l_int32 InitialPtrArraySize = 20; /* n'import quoi */
423 static const l_int32 MaxPtrArraySize = 10000;
424 static const l_int32 DefaultArraySampling = 30;
425 static const l_int32 MinArraySampling = 8;
426 static const l_int32 DefaultMinLines = 15;
427 static const l_int32 MinMinLines = 4;
428 static const l_int32 DefaultMaxRefDist = 16;
429 static const l_int32 DefaultUseBoth = TRUE;
430 static const l_int32 DefaultCheckColumns = TRUE;
431
432 /* Parameter values used in dewarpaSetCurvatures() */
433 static const l_int32 DefaultMaxLineCurv = 150;
434 static const l_int32 DefaultMinDiffLineCurv = 0;
435 static const l_int32 DefaultMaxDiffLineCurv = 170;
436 static const l_int32 DefaultMaxEdgeCurv = 50;
437 static const l_int32 DefaultMaxDiffEdgeCurv = 40;
438 static const l_int32 DefaultMaxEdgeSlope = 80;
439
440 /*----------------------------------------------------------------------*
441 * Create/destroy Dewarp *
442 *----------------------------------------------------------------------*/
443 /*!
444 * \brief dewarpCreate()
445 *
446 * \param[in] pixs 1 bpp
447 * \param[in] pageno page number
448 * \return dew or NULL on error
449 *
450 * <pre>
451 * Notes:
452 * (1) The input pixs is either full resolution or 2x reduced.
453 * (2) The page number is typically 0-based. If scanned from a book,
454 * the even pages are usually on the left. Disparity arrays
455 * built for even pages should only be applied to even pages.
456 * </pre>
457 */
458 L_DEWARP *
459 dewarpCreate(PIX *pixs,
460 l_int32 pageno)
461 {
462 L_DEWARP *dew;
463
464 if (!pixs)
465 return (L_DEWARP *)ERROR_PTR("pixs not defined", __func__, NULL);
466 if (pixGetDepth(pixs) != 1)
467 return (L_DEWARP *)ERROR_PTR("pixs not 1 bpp", __func__, NULL);
468
469 dew = (L_DEWARP *)LEPT_CALLOC(1, sizeof(L_DEWARP));
470 dew->pixs = pixClone(pixs);
471 dew->pageno = pageno;
472 dew->w = pixGetWidth(pixs);
473 dew->h = pixGetHeight(pixs);
474 return dew;
475 }
476
477
478 /*!
479 * \brief dewarpCreateRef()
480 *
481 * \param[in] pageno this page number
482 * \param[in] refpage page number of dewarp disparity arrays to be used
483 * \return dew or NULL on error
484 *
485 * <pre>
486 * Notes:
487 * (1) This specifies which dewarp struct should be used for
488 * the given page. It is placed in dewarpa for pages
489 * for which no model can be built.
490 * (2) This page and the reference page have the same parity and
491 * the reference page is the closest page with a disparity model
492 * to this page.
493 * </pre>
494 */
495 L_DEWARP *
496 dewarpCreateRef(l_int32 pageno,
497 l_int32 refpage)
498 {
499 L_DEWARP *dew;
500
501 dew = (L_DEWARP *)LEPT_CALLOC(1, sizeof(L_DEWARP));
502 dew->pageno = pageno;
503 dew->hasref = 1;
504 dew->refpage = refpage;
505 return dew;
506 }
507
508
509 /*!
510 * \brief dewarpDestroy()
511 *
512 * \param[in,out] pdew will be set to null before returning
513 * \return void
514 */
515 void
516 dewarpDestroy(L_DEWARP **pdew)
517 {
518 L_DEWARP *dew;
519
520 if (pdew == NULL) {
521 L_WARNING("ptr address is null!\n", __func__);
522 return;
523 }
524 if ((dew = *pdew) == NULL)
525 return;
526
527 pixDestroy(&dew->pixs);
528 fpixDestroy(&dew->sampvdispar);
529 fpixDestroy(&dew->samphdispar);
530 fpixDestroy(&dew->sampydispar);
531 fpixDestroy(&dew->fullvdispar);
532 fpixDestroy(&dew->fullhdispar);
533 fpixDestroy(&dew->fullydispar);
534 numaDestroy(&dew->namidys);
535 numaDestroy(&dew->nacurves);
536 LEPT_FREE(dew);
537 *pdew = NULL;
538 }
539
540
541 /*----------------------------------------------------------------------*
542 * Create/destroy Dewarpa *
543 *----------------------------------------------------------------------*/
544 /*!
545 * \brief dewarpaCreate()
546 *
547 * \param[in] nptrs number of dewarp page ptrs; typ. the number of pages
548 * \param[in] sampling use 0 for default value; the minimum allowed is 8
549 * \param[in] redfactor of input images: 1 is full res; 2 is 2x reduced
550 * \param[in] minlines minimum number of lines to accept; use 0 for default
551 * \param[in] maxdist for locating reference disparity; use -1 for default
552 * \return dewa or NULL on error
553 *
554 * <pre>
555 * Notes:
556 * (1) The sampling, minlines and maxdist parameters will be
557 * applied to all images.
558 * (2) The sampling factor is used for generating the disparity arrays
559 * from the input image. For 2x reduced input, use a sampling
560 * factor that is half the sampling you want on the full resolution
561 * images.
562 * (3) Use %redfactor = 1 for full resolution; 2 for 2x reduction.
563 * All input images must be at one of these two resolutions.
564 * (4) %minlines is the minimum number of nearly full-length lines
565 * required to generate a vertical disparity array. The default
566 * number is 15. Use a smaller number to accept a questionable
567 * array, but not smaller than 4.
568 * (5) When a model can't be built for a page, it looks up to %maxdist
569 * in either direction for a valid model with the same page parity.
570 * Use -1 for the default value of %maxdist; use 0 to avoid using
571 * a ref model.
572 * (6) The ptr array is expanded as necessary to accommodate page images.
573 * </pre>
574 */
575 L_DEWARPA *
576 dewarpaCreate(l_int32 nptrs,
577 l_int32 sampling,
578 l_int32 redfactor,
579 l_int32 minlines,
580 l_int32 maxdist)
581 {
582 L_DEWARPA *dewa;
583
584 if (nptrs <= 0)
585 nptrs = InitialPtrArraySize;
586 if (nptrs > MaxPtrArraySize)
587 return (L_DEWARPA *)ERROR_PTR("too many pages", __func__, NULL);
588 if (redfactor != 1 && redfactor != 2)
589 return (L_DEWARPA *)ERROR_PTR("redfactor not in {1,2}",
590 __func__, NULL);
591 if (sampling == 0) {
592 sampling = DefaultArraySampling;
593 } else if (sampling < MinArraySampling) {
594 L_WARNING("sampling too small; setting to %d\n", __func__,
595 MinArraySampling);
596 sampling = MinArraySampling;
597 }
598 if (minlines == 0) {
599 minlines = DefaultMinLines;
600 } else if (minlines < MinMinLines) {
601 L_WARNING("minlines too small; setting to %d\n", __func__,
602 MinMinLines);
603 minlines = DefaultMinLines;
604 }
605 if (maxdist < 0)
606 maxdist = DefaultMaxRefDist;
607
608 dewa = (L_DEWARPA *)LEPT_CALLOC(1, sizeof(L_DEWARPA));
609 dewa->dewarp = (L_DEWARP **)LEPT_CALLOC(nptrs, sizeof(L_DEWARPA *));
610 dewa->dewarpcache = (L_DEWARP **)LEPT_CALLOC(nptrs, sizeof(L_DEWARPA *));
611 if (!dewa->dewarp || !dewa->dewarpcache) {
612 dewarpaDestroy(&dewa);
613 return (L_DEWARPA *)ERROR_PTR("dewarp ptrs not made", __func__, NULL);
614 }
615 dewa->nalloc = nptrs;
616 dewa->sampling = sampling;
617 dewa->redfactor = redfactor;
618 dewa->minlines = minlines;
619 dewa->maxdist = maxdist;
620 dewa->max_linecurv = DefaultMaxLineCurv;
621 dewa->min_diff_linecurv = DefaultMinDiffLineCurv;
622 dewa->max_diff_linecurv = DefaultMaxDiffLineCurv;
623 dewa->max_edgeslope = DefaultMaxEdgeSlope;
624 dewa->max_edgecurv = DefaultMaxEdgeCurv;
625 dewa->max_diff_edgecurv = DefaultMaxDiffEdgeCurv;
626 dewa->check_columns = DefaultCheckColumns;
627 dewa->useboth = DefaultUseBoth;
628 return dewa;
629 }
630
631
632 /*!
633 * \brief dewarpaCreateFromPixacomp()
634 *
635 * \param[in] pixac pixacomp of G4, 1 bpp images; with 1x1x1 placeholders
636 * \param[in] useboth 0 for only vert disparity; 1 for both vert and horiz
637 * \param[in] sampling use -1 or 0 for default value; otherwise minimum of 5
638 * \param[in] minlines minimum number of lines to accept; e.g., 10
639 * \param[in] maxdist for locating reference disparity; use -1 for default
640 * \return dewa or NULL on error
641 *
642 * <pre>
643 * Notes:
644 * (1) The returned dewa has disparity arrays calculated and
645 * is ready for serialization or for use in dewarping.
646 * (2) The sampling, minlines and maxdist parameters are
647 * applied to all images. See notes in dewarpaCreate() for details.
648 * (3) The pixac is full. Placeholders, if any, are w=h=d=1 images,
649 * and the real input images are 1 bpp at full resolution.
650 * They are assumed to be cropped to the actual page regions,
651 * and may be arbitrarily sparse in the array.
652 * (4) The output dewarpa is indexed by the page number.
653 * The offset in the pixac gives the mapping between the
654 * array index in the pixac and the page number.
655 * (5) This adds the ref page models.
656 * (6) This can be used to make models for any desired set of pages.
657 * The direct models are only made for pages with images in
658 * the pixacomp; the ref models are made for pages of the
659 * same parity within %maxdist of the nearest direct model.
660 * </pre>
661 */
662 L_DEWARPA *
663 dewarpaCreateFromPixacomp(PIXAC *pixac,
664 l_int32 useboth,
665 l_int32 sampling,
666 l_int32 minlines,
667 l_int32 maxdist)
668 {
669 l_int32 i, nptrs, pageno;
670 L_DEWARP *dew;
671 L_DEWARPA *dewa;
672 PIX *pixt;
673
674 if (!pixac)
675 return (L_DEWARPA *)ERROR_PTR("pixac not defined", __func__, NULL);
676
677 nptrs = pixacompGetCount(pixac);
678 if ((dewa = dewarpaCreate(pixacompGetOffset(pixac) + nptrs,
679 sampling, 1, minlines, maxdist)) == NULL)
680 return (L_DEWARPA *)ERROR_PTR("dewa not made", __func__, NULL);
681 dewarpaUseBothArrays(dewa, useboth);
682
683 for (i = 0; i < nptrs; i++) {
684 pageno = pixacompGetOffset(pixac) + i; /* index into pixacomp */
685 pixt = pixacompGetPix(pixac, pageno);
686 if (pixt && (pixGetWidth(pixt) > 1)) {
687 dew = dewarpCreate(pixt, pageno);
688 pixDestroy(&pixt);
689 if (!dew) {
690 ERROR_INT("unable to make dew!", __func__, 1);
691 continue;
692 }
693
694 /* Insert into dewa for this page */
695 dewarpaInsertDewarp(dewa, dew);
696
697 /* Build disparity arrays for this page */
698 dewarpBuildPageModel(dew, NULL);
699 if (!dew->vsuccess) { /* will need to use model from nearby page */
700 dewarpaDestroyDewarp(dewa, pageno);
701 L_ERROR("unable to build model for page %d\n", __func__, i);
702 continue;
703 }
704 /* Remove all extraneous data */
705 dewarpMinimize(dew);
706 }
707 pixDestroy(&pixt);
708 }
709 dewarpaInsertRefModels(dewa, 0, 0);
710
711 return dewa;
712 }
713
714
715 /*!
716 * \brief dewarpaDestroy()
717 *
718 * \param[in,out] pdewa will be set to null before returning
719 * \return void
720 */
721 void
722 dewarpaDestroy(L_DEWARPA **pdewa)
723 {
724 l_int32 i;
725 L_DEWARP *dew;
726 L_DEWARPA *dewa;
727
728 if (pdewa == NULL) {
729 L_WARNING("ptr address is null!\n", __func__);
730 return;
731 }
732 if ((dewa = *pdewa) == NULL)
733 return;
734
735 for (i = 0; i < dewa->nalloc; i++) {
736 if ((dew = dewa->dewarp[i]) != NULL)
737 dewarpDestroy(&dew);
738 if ((dew = dewa->dewarpcache[i]) != NULL)
739 dewarpDestroy(&dew);
740 }
741 numaDestroy(&dewa->namodels);
742 numaDestroy(&dewa->napages);
743
744 LEPT_FREE(dewa->dewarp);
745 LEPT_FREE(dewa->dewarpcache);
746 LEPT_FREE(dewa);
747 *pdewa = NULL;
748 }
749
750
751 /*!
752 * \brief dewarpaDestroyDewarp()
753 *
754 * \param[in] dewa
755 * \param[in] pageno of dew to be destroyed
756 * \return 0 if OK, 1 on error
757 */
758 l_ok
759 dewarpaDestroyDewarp(L_DEWARPA *dewa,
760 l_int32 pageno)
761 {
762 L_DEWARP *dew;
763
764 if (!dewa)
765 return ERROR_INT("dewa or dew not defined", __func__, 1);
766 if (pageno < 0 || pageno > dewa->maxpage)
767 return ERROR_INT("page out of bounds", __func__, 1);
768 if ((dew = dewa->dewarp[pageno]) == NULL)
769 return ERROR_INT("dew not defined", __func__, 1);
770
771 dewarpDestroy(&dew);
772 dewa->dewarp[pageno] = NULL;
773 return 0;
774 }
775
776
777 /*----------------------------------------------------------------------*
778 * Dewarpa insertion/extraction *
779 *----------------------------------------------------------------------*/
780 /*!
781 * \brief dewarpaInsertDewarp()
782 *
783 * \param[in] dewa
784 * \param[in] dew to be added
785 * \return 0 if OK, 1 on error
786 *
787 * <pre>
788 * Notes:
789 * (1) This inserts the dewarp into the array, which now owns it.
790 * It also keeps track of the largest page number stored.
791 * It must be done before the disparity model is built.
792 * (2) Note that this differs from the usual method of filling out
793 * arrays in leptonica, where the arrays are compact and
794 * new elements are typically added to the end. Here,
795 * the dewarp can be added anywhere, even beyond the initial
796 * allocation.
797 * </pre>
798 */
799 l_ok
800 dewarpaInsertDewarp(L_DEWARPA *dewa,
801 L_DEWARP *dew)
802 {
803 l_int32 pageno, n, newsize;
804 L_DEWARP *prevdew;
805
806 if (!dewa)
807 return ERROR_INT("dewa not defined", __func__, 1);
808 if (!dew)
809 return ERROR_INT("dew not defined", __func__, 1);
810
811 dew->dewa = dewa;
812 pageno = dew->pageno;
813 if (pageno > MaxPtrArraySize)
814 return ERROR_INT("too many pages", __func__, 1);
815 if (pageno > dewa->maxpage)
816 dewa->maxpage = pageno;
817 dewa->modelsready = 0; /* force re-evaluation at application time */
818
819 /* Extend ptr array if necessary */
820 n = dewa->nalloc;
821 newsize = n;
822 if (pageno >= 2 * n)
823 newsize = 2 * pageno;
824 else if (pageno >= n)
825 newsize = 2 * n;
826 if (newsize > n) {
827 if (dewarpaExtendArraysToSize(dewa, newsize))
828 return ERROR_INT("extension failed", __func__, 1);
829 }
830
831 if ((prevdew = dewarpaGetDewarp(dewa, pageno)) != NULL)
832 dewarpDestroy(&prevdew);
833 dewa->dewarp[pageno] = dew;
834
835 dew->sampling = dewa->sampling;
836 dew->redfactor = dewa->redfactor;
837 dew->minlines = dewa->minlines;
838
839 /* Get the dimensions of the sampled array. This will be
840 * stored in an fpix, and the input resolution version is
841 * guaranteed to be larger than pixs. However, if you
842 * want to apply the disparity to an image with a width
843 * w > nx * s - 2 * s + 2
844 * you will need to extend the input res fpix.
845 * And similarly for h. */
846 dew->nx = (dew->w + 2 * dew->sampling - 2) / dew->sampling;
847 dew->ny = (dew->h + 2 * dew->sampling - 2) / dew->sampling;
848 return 0;
849 }
850
851
852 /*!
853 * \brief dewarpaExtendArraysToSize()
854 *
855 * \param[in] dewa
856 * \param[in] size new size of dewarpa array
857 * \return 0 if OK; 1 on error
858 *
859 * <pre>
860 * Notes:
861 * (1) If necessary, reallocs main and cache dewarpa ptr arrays to %size.
862 * </pre>
863 */
864 static l_int32
865 dewarpaExtendArraysToSize(L_DEWARPA *dewa,
866 l_int32 size)
867 {
868 if (!dewa)
869 return ERROR_INT("dewa not defined", __func__, 1);
870
871 if (size > dewa->nalloc) {
872 if ((dewa->dewarp = (L_DEWARP **)reallocNew((void **)&dewa->dewarp,
873 sizeof(L_DEWARP *) * dewa->nalloc,
874 size * sizeof(L_DEWARP *))) == NULL)
875 return ERROR_INT("new ptr array not returned", __func__, 1);
876 if ((dewa->dewarpcache =
877 (L_DEWARP **)reallocNew((void **)&dewa->dewarpcache,
878 sizeof(L_DEWARP *) * dewa->nalloc,
879 size * sizeof(L_DEWARP *))) == NULL)
880 return ERROR_INT("new ptr cache array not returned", __func__, 1);
881 dewa->nalloc = size;
882 }
883 return 0;
884 }
885
886
887 /*!
888 * \brief dewarpaGetDewarp()
889 *
890 * \param[in] dewa populated with dewarp structs for pages
891 * \param[in] index into dewa: this is the pageno
892 * \return dew handle; still owned by dewa, or NULL on error
893 */
894 L_DEWARP *
895 dewarpaGetDewarp(L_DEWARPA *dewa,
896 l_int32 index)
897 {
898 if (!dewa)
899 return (L_DEWARP *)ERROR_PTR("dewa not defined", __func__, NULL);
900 if (index < 0 || index > dewa->maxpage) {
901 L_ERROR("index = %d is invalid; max index = %d\n",
902 __func__, index, dewa->maxpage);
903 return NULL;
904 }
905
906 return dewa->dewarp[index];
907 }
908
909
910 /*----------------------------------------------------------------------*
911 * Setting parameters to control rendering from the model *
912 *----------------------------------------------------------------------*/
913 /*!
914 * \brief dewarpaSetCurvatures()
915 *
916 * \param[in] dewa
917 * \param[in] max_linecurv -1 for default
918 * \param[in] min_diff_linecurv -1 for default; 0 to accept all models
919 * \param[in] max_diff_linecurv -1 for default
920 * \param[in] max_edgecurv -1 for default; 0 to fit a line
921 * \param[in] max_diff_edgecurv -1 for default
922 * \param[in] max_edgeslope -1 for default
923 * \return 0 if OK, 1 on error
924 *
925 * <pre>
926 * Notes:
927 * (1) Approximating the line by a quadratic, the coefficient
928 * of the quadratic term is the curvature, and distance
929 * units are in pixels (of course). Curvatures are very
930 * small, so we multiply by 10^6 and express the constraints
931 * on the model curvatures in micro-units. The slope parameter
932 * is multiplied by 10^3 and expressed in milli-units.
933 * (2) This sets five curvature thresholds and a slope threshold
934 * for dewarping to take place. Use -1 for default values.
935 * * max_linecurv: the maximum absolute value of the vertical
936 * disparity line curvatures.
937 * * min_diff_linecurv: the minimum absolute value of the
938 * largest difference in vertical disparity line curvatures.
939 * Use a value of 0 to accept all models.
940 * * max_diff_linecurv: the maximum absolute value of the largest
941 * difference in vertical disparity line curvatures.
942 * * max_edgecurv: the maximum absolute value of the left and right
943 * edge curvature for the horizontal disparity. Use a value of
944 * zero to fit a straight line (zero curvature).
945 * * max_diff_edgecurv: the maximum absolute value of the difference
946 * between left and right edge curvature for the horizontal
947 * disparity. This value is ignored if max_edgecurve = 0.
948 * * max_edgeslope: the maximum slope coefficient for left and
949 * right line edges.
950 * (3) An image with a line curvature less than about 0.00001
951 * has fairly straight textlines. This is 10 micro-units.
952 * (4) For example, if %max_linecurv == 100, this would prevent dewarping
953 * if any of the lines has a curvature exceeding 100 micro-units.
954 * A model having maximum line curvature larger than about 150
955 * micro-units should probably not be used.
956 * (5) A model having a left or right edge curvature larger than
957 * about 50 micro-units should probably not be used. Set the
958 * parameter max_edgecurv = 0 for a linear LSF.
959 * </pre>
960 */
961 l_ok
962 dewarpaSetCurvatures(L_DEWARPA *dewa,
963 l_int32 max_linecurv,
964 l_int32 min_diff_linecurv,
965 l_int32 max_diff_linecurv,
966 l_int32 max_edgecurv,
967 l_int32 max_diff_edgecurv,
968 l_int32 max_edgeslope)
969 {
970 if (!dewa)
971 return ERROR_INT("dewa not defined", __func__, 1);
972
973 if (max_linecurv == -1)
974 dewa->max_linecurv = DefaultMaxLineCurv;
975 else
976 dewa->max_linecurv = L_ABS(max_linecurv);
977
978 if (min_diff_linecurv == -1)
979 dewa->min_diff_linecurv = DefaultMinDiffLineCurv;
980 else
981 dewa->min_diff_linecurv = L_ABS(min_diff_linecurv);
982
983 if (max_diff_linecurv == -1)
984 dewa->max_diff_linecurv = DefaultMaxDiffLineCurv;
985 else
986 dewa->max_diff_linecurv = L_ABS(max_diff_linecurv);
987
988 if (max_edgecurv == -1)
989 dewa->max_edgecurv = DefaultMaxEdgeCurv;
990 else
991 dewa->max_edgecurv = L_ABS(max_edgecurv);
992
993 if (max_diff_edgecurv == -1)
994 dewa->max_diff_edgecurv = DefaultMaxDiffEdgeCurv;
995 else
996 dewa->max_diff_edgecurv = L_ABS(max_diff_edgecurv);
997
998 if (max_edgeslope == -1)
999 dewa->max_edgeslope = DefaultMaxEdgeSlope;
1000 else
1001 dewa->max_edgeslope = L_ABS(max_edgeslope);
1002
1003 dewa->modelsready = 0; /* force validation */
1004 return 0;
1005 }
1006
1007
1008 /*!
1009 * \brief dewarpaUseBothArrays()
1010 *
1011 * \param[in] dewa
1012 * \param[in] useboth 0 for false, 1 for true
1013 * \return 0 if OK, 1 on error
1014 *
1015 * <pre>
1016 * Notes:
1017 * (1) This sets the useboth field. If set, this will attempt
1018 * to apply both vertical and horizontal disparity arrays.
1019 * Note that a model with only a vertical disparity array will
1020 * always be valid.
1021 * </pre>
1022 */
1023 l_ok
1024 dewarpaUseBothArrays(L_DEWARPA *dewa,
1025 l_int32 useboth)
1026 {
1027 if (!dewa)
1028 return ERROR_INT("dewa not defined", __func__, 1);
1029
1030 dewa->useboth = useboth;
1031 dewa->modelsready = 0; /* force validation */
1032 return 0;
1033 }
1034
1035
1036 /*!
1037 * \brief dewarpaSetCheckColumns()
1038 *
1039 * \param[in] dewa
1040 * \param[in] check_columns 0 for false, 1 for true
1041 * \return 0 if OK, 1 on error
1042 *
1043 * <pre>
1044 * Notes:
1045 * (1) This sets the 'check_columns" field. If set, and if
1046 * 'useboth' is set, this will count the number of text
1047 * columns. If the number is larger than 1, this will
1048 * prevent the application of horizontal disparity arrays
1049 * if they exist.
1050 * (2) The check_columns field is set to TRUE by default.
1051 * For horizontal disparity correction to take place on a
1052 * single column of text, you must have:
1053 * - a valid horizontal disparity array
1054 * - useboth = 1 (TRUE)
1055 * If there are multiple columns, in addition you need
1056 * - check_columns = 0 (FALSE)
1057 *
1058 * </pre>
1059 */
1060 l_ok
1061 dewarpaSetCheckColumns(L_DEWARPA *dewa,
1062 l_int32 check_columns)
1063 {
1064 if (!dewa)
1065 return ERROR_INT("dewa not defined", __func__, 1);
1066
1067 dewa->check_columns = check_columns;
1068 return 0;
1069 }
1070
1071
1072 /*!
1073 * \brief dewarpaSetMaxDistance()
1074 *
1075 * \param[in] dewa
1076 * \param[in] maxdist for using ref models
1077 * \return 0 if OK, 1 on error
1078 *
1079 * <pre>
1080 * Notes:
1081 * (1) This sets the maxdist field.
1082 * </pre>
1083 */
1084 l_ok
1085 dewarpaSetMaxDistance(L_DEWARPA *dewa,
1086 l_int32 maxdist)
1087 {
1088 if (!dewa)
1089 return ERROR_INT("dewa not defined", __func__, 1);
1090
1091 dewa->maxdist = maxdist;
1092 dewa->modelsready = 0; /* force validation */
1093 return 0;
1094 }
1095
1096
1097 /*----------------------------------------------------------------------*
1098 * Dewarp serialized I/O *
1099 *----------------------------------------------------------------------*/
1100 /*!
1101 * \brief dewarpRead()
1102 *
1103 * \param[in] filename
1104 * \return dew, or NULL on error
1105 */
1106 L_DEWARP *
1107 dewarpRead(const char *filename)
1108 {
1109 FILE *fp;
1110 L_DEWARP *dew;
1111
1112 if (!filename)
1113 return (L_DEWARP *)ERROR_PTR("filename not defined", __func__, NULL);
1114 if ((fp = fopenReadStream(filename)) == NULL)
1115 return (L_DEWARP *)ERROR_PTR_1("stream not opened",
1116 filename, __func__, NULL);
1117
1118 if ((dew = dewarpReadStream(fp)) == NULL) {
1119 fclose(fp);
1120 return (L_DEWARP *)ERROR_PTR_1("dew not read",
1121 filename, __func__, NULL);
1122 }
1123
1124 fclose(fp);
1125 return dew;
1126 }
1127
1128
1129 /*!
1130 * \brief dewarpReadStream()
1131 *
1132 * \param[in] fp file stream
1133 * \return dew dewarp, or NULL on error
1134 *
1135 * <pre>
1136 * Notes:
1137 * (1) The dewarp struct is stored in minimized format, with only
1138 * subsampled disparity arrays.
1139 * (2) The sampling and extra horizontal disparity parameters are
1140 * stored here. During generation of the dewarp struct, they
1141 * are passed in from the dewarpa. In readback, it is assumed
1142 * that they are (a) the same for each page and (b) the same
1143 * as the values used to create the dewarpa.
1144 * </pre>
1145 */
1146 L_DEWARP *
1147 dewarpReadStream(FILE *fp)
1148 {
1149 l_int32 version, sampling, redfactor, minlines, pageno, hasref, refpage;
1150 l_int32 w, h, nx, ny, vdispar, hdispar, nlines;
1151 l_int32 mincurv, maxcurv, leftslope, rightslope, leftcurv, rightcurv;
1152 L_DEWARP *dew;
1153 FPIX *fpixv = NULL, *fpixh = NULL;
1154
1155 if (!fp)
1156 return (L_DEWARP *)ERROR_PTR("stream not defined", __func__, NULL);
1157
1158 if (fscanf(fp, "\nDewarp Version %d\n", &version) != 1)
1159 return (L_DEWARP *)ERROR_PTR("not a dewarp file", __func__, NULL);
1160 if (version != DEWARP_VERSION_NUMBER)
1161 return (L_DEWARP *)ERROR_PTR("invalid dewarp version", __func__, NULL);
1162 if (fscanf(fp, "pageno = %d\n", &pageno) != 1)
1163 return (L_DEWARP *)ERROR_PTR("read fail for pageno", __func__, NULL);
1164 if (fscanf(fp, "hasref = %d, refpage = %d\n", &hasref, &refpage) != 2)
1165 return (L_DEWARP *)ERROR_PTR("read fail for hasref, refpage",
1166 __func__, NULL);
1167 if (fscanf(fp, "sampling = %d, redfactor = %d\n", &sampling, &redfactor)
1168 != 2)
1169 return (L_DEWARP *)ERROR_PTR("read fail for sampling/redfactor",
1170 __func__, NULL);
1171 if (fscanf(fp, "nlines = %d, minlines = %d\n", &nlines, &minlines) != 2)
1172 return (L_DEWARP *)ERROR_PTR("read fail for nlines/minlines",
1173 __func__, NULL);
1174 if (fscanf(fp, "w = %d, h = %d\n", &w, &h) != 2)
1175 return (L_DEWARP *)ERROR_PTR("read fail for w, h", __func__, NULL);
1176 if (fscanf(fp, "nx = %d, ny = %d\n", &nx, &ny) != 2)
1177 return (L_DEWARP *)ERROR_PTR("read fail for nx, ny", __func__, NULL);
1178 if (fscanf(fp, "vert_dispar = %d, horiz_dispar = %d\n", &vdispar, &hdispar)
1179 != 2)
1180 return (L_DEWARP *)ERROR_PTR("read fail for flags", __func__, NULL);
1181 if (vdispar) {
1182 if (fscanf(fp, "min line curvature = %d, max line curvature = %d\n",
1183 &mincurv, &maxcurv) != 2)
1184 return (L_DEWARP *)ERROR_PTR("read fail for mincurv & maxcurv",
1185 __func__, NULL);
1186 }
1187 if (hdispar) {
1188 if (fscanf(fp, "left edge slope = %d, right edge slope = %d\n",
1189 &leftslope, &rightslope) != 2)
1190 return (L_DEWARP *)ERROR_PTR("read fail for leftslope & rightslope",
1191 __func__, NULL);
1192 if (fscanf(fp, "left edge curvature = %d, right edge curvature = %d\n",
1193 &leftcurv, &rightcurv) != 2)
1194 return (L_DEWARP *)ERROR_PTR("read fail for leftcurv & rightcurv",
1195 __func__, NULL);
1196 }
1197 if (vdispar) {
1198 if ((fpixv = fpixReadStream(fp)) == NULL)
1199 return (L_DEWARP *)ERROR_PTR("read fail for vdispar",
1200 __func__, NULL);
1201 }
1202 if (hdispar) {
1203 if ((fpixh = fpixReadStream(fp)) == NULL)
1204 return (L_DEWARP *)ERROR_PTR("read fail for hdispar",
1205 __func__, NULL);
1206 }
1207 getc(fp);
1208
1209 dew = (L_DEWARP *)LEPT_CALLOC(1, sizeof(L_DEWARP));
1210 dew->w = w;
1211 dew->h = h;
1212 dew->pageno = pageno;
1213 dew->sampling = sampling;
1214 dew->redfactor = redfactor;
1215 dew->minlines = minlines;
1216 dew->nlines = nlines;
1217 dew->hasref = hasref;
1218 dew->refpage = refpage;
1219 if (hasref == 0) /* any dew without a ref has an actual model */
1220 dew->vsuccess = 1;
1221 dew->nx = nx;
1222 dew->ny = ny;
1223 if (vdispar) {
1224 dew->mincurv = mincurv;
1225 dew->maxcurv = maxcurv;
1226 dew->vsuccess = 1;
1227 dew->sampvdispar = fpixv;
1228 }
1229 if (hdispar) {
1230 dew->leftslope = leftslope;
1231 dew->rightslope = rightslope;
1232 dew->leftcurv = leftcurv;
1233 dew->rightcurv = rightcurv;
1234 dew->hsuccess = 1;
1235 dew->samphdispar = fpixh;
1236 }
1237
1238 return dew;
1239 }
1240
1241
1242 /*!
1243 * \brief dewarpReadMem()
1244 *
1245 * \param[in] data serialization of dewarp
1246 * \param[in] size of data in bytes
1247 * \return dew dewarp, or NULL on error
1248 */
1249 L_DEWARP *
1250 dewarpReadMem(const l_uint8 *data,
1251 size_t size)
1252 {
1253 FILE *fp;
1254 L_DEWARP *dew;
1255
1256 if (!data)
1257 return (L_DEWARP *)ERROR_PTR("data not defined", __func__, NULL);
1258 if ((fp = fopenReadFromMemory(data, size)) == NULL)
1259 return (L_DEWARP *)ERROR_PTR("stream not opened", __func__, NULL);
1260
1261 dew = dewarpReadStream(fp);
1262 fclose(fp);
1263 if (!dew) L_ERROR("dew not read\n", __func__);
1264 return dew;
1265 }
1266
1267
1268 /*!
1269 * \brief dewarpWrite()
1270 *
1271 * \param[in] filename
1272 * \param[in] dew
1273 * \return 0 if OK, 1 on error
1274 */
1275 l_ok
1276 dewarpWrite(const char *filename,
1277 L_DEWARP *dew)
1278 {
1279 l_int32 ret;
1280 FILE *fp;
1281
1282 if (!filename)
1283 return ERROR_INT("filename not defined", __func__, 1);
1284 if (!dew)
1285 return ERROR_INT("dew not defined", __func__, 1);
1286
1287 if ((fp = fopenWriteStream(filename, "wb")) == NULL)
1288 return ERROR_INT_1("stream not opened", filename, __func__, 1);
1289 ret = dewarpWriteStream(fp, dew);
1290 fclose(fp);
1291 if (ret)
1292 return ERROR_INT_1("dew not written to stream", filename, __func__, 1);
1293 return 0;
1294 }
1295
1296
1297 /*!
1298 * \brief dewarpWriteStream()
1299 *
1300 * \param[in] fp file stream opened for "wb"
1301 * \param[in] dew
1302 * \return 0 if OK, 1 on error
1303 *
1304 * <pre>
1305 * Notes:
1306 * (1) This should not be written if there is no sampled
1307 * vertical disparity array, which means that no model has
1308 * been built for this page.
1309 * </pre>
1310 */
1311 l_ok
1312 dewarpWriteStream(FILE *fp,
1313 L_DEWARP *dew)
1314 {
1315 l_int32 vdispar, hdispar;
1316
1317 if (!fp)
1318 return ERROR_INT("stream not defined", __func__, 1);
1319 if (!dew)
1320 return ERROR_INT("dew not defined", __func__, 1);
1321
1322 fprintf(fp, "\nDewarp Version %d\n", DEWARP_VERSION_NUMBER);
1323 fprintf(fp, "pageno = %d\n", dew->pageno);
1324 fprintf(fp, "hasref = %d, refpage = %d\n", dew->hasref, dew->refpage);
1325 fprintf(fp, "sampling = %d, redfactor = %d\n",
1326 dew->sampling, dew->redfactor);
1327 fprintf(fp, "nlines = %d, minlines = %d\n", dew->nlines, dew->minlines);
1328 fprintf(fp, "w = %d, h = %d\n", dew->w, dew->h);
1329 fprintf(fp, "nx = %d, ny = %d\n", dew->nx, dew->ny);
1330 vdispar = (dew->sampvdispar) ? 1 : 0;
1331 hdispar = (dew->samphdispar) ? 1 : 0;
1332 fprintf(fp, "vert_dispar = %d, horiz_dispar = %d\n", vdispar, hdispar);
1333 if (vdispar)
1334 fprintf(fp, "min line curvature = %d, max line curvature = %d\n",
1335 dew->mincurv, dew->maxcurv);
1336 if (hdispar) {
1337 fprintf(fp, "left edge slope = %d, right edge slope = %d\n",
1338 dew->leftslope, dew->rightslope);
1339 fprintf(fp, "left edge curvature = %d, right edge curvature = %d\n",
1340 dew->leftcurv, dew->rightcurv);
1341 }
1342 if (vdispar) fpixWriteStream(fp, dew->sampvdispar);
1343 if (hdispar) fpixWriteStream(fp, dew->samphdispar);
1344 fprintf(fp, "\n");
1345
1346 if (!vdispar)
1347 L_WARNING("no disparity arrays!\n", __func__);
1348 return 0;
1349 }
1350
1351
1352 /*!
1353 * \brief dewarpWriteMem()
1354 *
1355 * \param[out] pdata data of serialized dewarp (not ascii)
1356 * \param[out] psize size of returned data
1357 * \param[in] dew
1358 * \return 0 if OK, 1 on error
1359 *
1360 * <pre>
1361 * Notes:
1362 * (1) Serializes a dewarp in memory and puts the result in a buffer.
1363 * </pre>
1364 */
1365 l_ok
1366 dewarpWriteMem(l_uint8 **pdata,
1367 size_t *psize,
1368 L_DEWARP *dew)
1369 {
1370 l_int32 ret;
1371 FILE *fp;
1372
1373 if (pdata) *pdata = NULL;
1374 if (psize) *psize = 0;
1375 if (!pdata)
1376 return ERROR_INT("&data not defined", __func__, 1);
1377 if (!psize)
1378 return ERROR_INT("&size not defined", __func__, 1);
1379 if (!dew)
1380 return ERROR_INT("dew not defined", __func__, 1);
1381
1382 #if HAVE_FMEMOPEN
1383 if ((fp = open_memstream((char **)pdata, psize)) == NULL)
1384 return ERROR_INT("stream not opened", __func__, 1);
1385 ret = dewarpWriteStream(fp, dew);
1386 fputc('\0', fp);
1387 fclose(fp);
1388 if (*psize > 0) *psize = *psize - 1;
1389 #else
1390 L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__);
1391 #ifdef _WIN32
1392 if ((fp = fopenWriteWinTempfile()) == NULL)
1393 return ERROR_INT("tmpfile stream not opened", __func__, 1);
1394 #else
1395 if ((fp = tmpfile()) == NULL)
1396 return ERROR_INT("tmpfile stream not opened", __func__, 1);
1397 #endif /* _WIN32 */
1398 ret = dewarpWriteStream(fp, dew);
1399 rewind(fp);
1400 *pdata = l_binaryReadStream(fp, psize);
1401 fclose(fp);
1402 #endif /* HAVE_FMEMOPEN */
1403 return ret;
1404 }
1405
1406
1407 /*----------------------------------------------------------------------*
1408 * Dewarpa serialized I/O *
1409 *----------------------------------------------------------------------*/
1410 /*!
1411 * \brief dewarpaRead()
1412 *
1413 * \param[in] filename
1414 * \return dewa, or NULL on error
1415 */
1416 L_DEWARPA *
1417 dewarpaRead(const char *filename)
1418 {
1419 FILE *fp;
1420 L_DEWARPA *dewa;
1421
1422 if (!filename)
1423 return (L_DEWARPA *)ERROR_PTR("filename not defined", __func__, NULL);
1424 if ((fp = fopenReadStream(filename)) == NULL)
1425 return (L_DEWARPA *)ERROR_PTR_1("stream not opened",
1426 filename, __func__, NULL);
1427
1428 if ((dewa = dewarpaReadStream(fp)) == NULL) {
1429 fclose(fp);
1430 return (L_DEWARPA *)ERROR_PTR_1("dewa not read",
1431 filename, __func__, NULL);
1432 }
1433
1434 fclose(fp);
1435 return dewa;
1436 }
1437
1438
1439 /*!
1440 * \brief dewarpaReadStream()
1441 *
1442 * \param[in] fp file stream
1443 * \return dewa, or NULL on error
1444 *
1445 * <pre>
1446 * Notes:
1447 * (1) The serialized dewarp contains a Numa that gives the
1448 * (increasing) page number of the dewarp structs that are
1449 * contained.
1450 * (2) Reference pages are added in after readback.
1451 * </pre>
1452 */
1453 L_DEWARPA *
1454 dewarpaReadStream(FILE *fp)
1455 {
1456 l_int32 i, version, ndewarp, maxpage;
1457 l_int32 sampling, redfactor, minlines, maxdist, useboth;
1458 l_int32 max_linecurv, min_diff_linecurv, max_diff_linecurv;
1459 l_int32 max_edgeslope, max_edgecurv, max_diff_edgecurv;
1460 L_DEWARP *dew;
1461 L_DEWARPA *dewa;
1462 NUMA *namodels;
1463
1464 if (!fp)
1465 return (L_DEWARPA *)ERROR_PTR("stream not defined", __func__, NULL);
1466
1467 if (fscanf(fp, "\nDewarpa Version %d\n", &version) != 1)
1468 return (L_DEWARPA *)ERROR_PTR("not a dewarpa file", __func__, NULL);
1469 if (version != DEWARP_VERSION_NUMBER)
1470 return (L_DEWARPA *)ERROR_PTR("invalid dewarp version", __func__, NULL);
1471
1472 if (fscanf(fp, "ndewarp = %d, maxpage = %d\n", &ndewarp, &maxpage) != 2)
1473 return (L_DEWARPA *)ERROR_PTR("read fail for maxpage+", __func__, NULL);
1474 if (ndewarp < 1)
1475 return (L_DEWARPA *)ERROR_PTR("pages not >= 1", __func__, NULL);
1476 if (ndewarp > MaxPtrArraySize)
1477 return (L_DEWARPA *)ERROR_PTR("too many pages", __func__, NULL);
1478 if (fscanf(fp,
1479 "sampling = %d, redfactor = %d, minlines = %d, maxdist = %d\n",
1480 &sampling, &redfactor, &minlines, &maxdist) != 4)
1481 return (L_DEWARPA *)ERROR_PTR("read fail for 4 params", __func__, NULL);
1482 if (fscanf(fp,
1483 "max_linecurv = %d, min_diff_linecurv = %d, max_diff_linecurv = %d\n",
1484 &max_linecurv, &min_diff_linecurv, &max_diff_linecurv) != 3)
1485 return (L_DEWARPA *)ERROR_PTR("read fail for linecurv", __func__, NULL);
1486 if (fscanf(fp,
1487 "max_edgeslope = %d, max_edgecurv = %d, max_diff_edgecurv = %d\n",
1488 &max_edgeslope, &max_edgecurv, &max_diff_edgecurv) != 3)
1489 return (L_DEWARPA *)ERROR_PTR("read fail for edgecurv", __func__, NULL);
1490 if (fscanf(fp, "fullmodel = %d\n", &useboth) != 1)
1491 return (L_DEWARPA *)ERROR_PTR("read fail for useboth", __func__, NULL);
1492
1493 dewa = dewarpaCreate(maxpage + 1, sampling, redfactor, minlines, maxdist);
1494 dewa->maxpage = maxpage;
1495 dewa->max_linecurv = max_linecurv;
1496 dewa->min_diff_linecurv = min_diff_linecurv;
1497 dewa->max_diff_linecurv = max_diff_linecurv;
1498 dewa->max_edgeslope = max_edgeslope;
1499 dewa->max_edgecurv = max_edgecurv;
1500 dewa->max_diff_edgecurv = max_diff_edgecurv;
1501 dewa->useboth = useboth;
1502 namodels = numaCreate(ndewarp);
1503 dewa->namodels = namodels;
1504 for (i = 0; i < ndewarp; i++) {
1505 if ((dew = dewarpReadStream(fp)) == NULL) {
1506 L_ERROR("read fail for dew[%d]\n", __func__, i);
1507 dewarpaDestroy(&dewa);
1508 return NULL;
1509 }
1510 dewarpaInsertDewarp(dewa, dew);
1511 numaAddNumber(namodels, dew->pageno);
1512 }
1513
1514 /* Validate the models and insert reference models */
1515 dewarpaInsertRefModels(dewa, 0, 0);
1516 return dewa;
1517 }
1518
1519
1520 /*!
1521 * \brief dewarpaReadMem()
1522 *
1523 * \param[in] data serialization of dewarpa
1524 * \param[in] size of data in bytes
1525 * \return dewa dewarpa, or NULL on error
1526 */
1527 L_DEWARPA *
1528 dewarpaReadMem(const l_uint8 *data,
1529 size_t size)
1530 {
1531 FILE *fp;
1532 L_DEWARPA *dewa;
1533
1534 if (!data)
1535 return (L_DEWARPA *)ERROR_PTR("data not defined", __func__, NULL);
1536 if ((fp = fopenReadFromMemory(data, size)) == NULL)
1537 return (L_DEWARPA *)ERROR_PTR("stream not opened", __func__, NULL);
1538
1539 dewa = dewarpaReadStream(fp);
1540 fclose(fp);
1541 if (!dewa) L_ERROR("dewa not read\n", __func__);
1542 return dewa;
1543 }
1544
1545
1546 /*!
1547 * \brief dewarpaWrite()
1548 *
1549 * \param[in] filename
1550 * \param[in] dewa
1551 * \return 0 if OK, 1 on error
1552 */
1553 l_ok
1554 dewarpaWrite(const char *filename,
1555 L_DEWARPA *dewa)
1556 {
1557 l_int32 ret;
1558 FILE *fp;
1559
1560 if (!filename)
1561 return ERROR_INT("filename not defined", __func__, 1);
1562 if (!dewa)
1563 return ERROR_INT("dewa not defined", __func__, 1);
1564
1565 if ((fp = fopenWriteStream(filename, "wb")) == NULL)
1566 return ERROR_INT_1("stream not opened", filename, __func__, 1);
1567 ret = dewarpaWriteStream(fp, dewa);
1568 fclose(fp);
1569 if (ret)
1570 return ERROR_INT_1("dewa not written to stream", filename, __func__, 1);
1571 return 0;
1572 }
1573
1574
1575 /*!
1576 * \brief dewarpaWriteStream()
1577 *
1578 * \param[in] fp file stream opened for "wb"
1579 * \param[in] dewa
1580 * \return 0 if OK, 1 on error
1581 */
1582 l_ok
1583 dewarpaWriteStream(FILE *fp,
1584 L_DEWARPA *dewa)
1585 {
1586 l_int32 ndewarp, i, pageno;
1587
1588 if (!fp)
1589 return ERROR_INT("stream not defined", __func__, 1);
1590 if (!dewa)
1591 return ERROR_INT("dewa not defined", __func__, 1);
1592
1593 /* Generate the list of page numbers for which a model exists.
1594 * Note that no attempt is made to determine if the model is
1595 * valid, because that determination is associated with
1596 * using the model to remove the warping, which typically
1597 * can happen later, after all the models have been built. */
1598 dewarpaListPages(dewa);
1599 if (!dewa->namodels)
1600 return ERROR_INT("dewa->namodels not made", __func__, 1);
1601 ndewarp = numaGetCount(dewa->namodels); /* with actual page models */
1602
1603 fprintf(fp, "\nDewarpa Version %d\n", DEWARP_VERSION_NUMBER);
1604 fprintf(fp, "ndewarp = %d, maxpage = %d\n", ndewarp, dewa->maxpage);
1605 fprintf(fp, "sampling = %d, redfactor = %d, minlines = %d, maxdist = %d\n",
1606 dewa->sampling, dewa->redfactor, dewa->minlines, dewa->maxdist);
1607 fprintf(fp,
1608 "max_linecurv = %d, min_diff_linecurv = %d, max_diff_linecurv = %d\n",
1609 dewa->max_linecurv, dewa->min_diff_linecurv, dewa->max_diff_linecurv);
1610 fprintf(fp,
1611 "max_edgeslope = %d, max_edgecurv = %d, max_diff_edgecurv = %d\n",
1612 dewa->max_edgeslope, dewa->max_edgecurv, dewa->max_diff_edgecurv);
1613 fprintf(fp, "fullmodel = %d\n", dewa->useboth);
1614 for (i = 0; i < ndewarp; i++) {
1615 numaGetIValue(dewa->namodels, i, &pageno);
1616 dewarpWriteStream(fp, dewarpaGetDewarp(dewa, pageno));
1617 }
1618
1619 return 0;
1620 }
1621
1622
1623 /*!
1624 * \brief dewarpaWriteMem()
1625 *
1626 * \param[out] pdata data of serialized dewarpa (not ascii)
1627 * \param[out] psize size of returned data
1628 * \param[in] dewa
1629 * \return 0 if OK, 1 on error
1630 *
1631 * <pre>
1632 * Notes:
1633 * (1) Serializes a dewarpa in memory and puts the result in a buffer.
1634 * </pre>
1635 */
1636 l_ok
1637 dewarpaWriteMem(l_uint8 **pdata,
1638 size_t *psize,
1639 L_DEWARPA *dewa)
1640 {
1641 l_int32 ret;
1642 FILE *fp;
1643
1644 if (pdata) *pdata = NULL;
1645 if (psize) *psize = 0;
1646 if (!pdata)
1647 return ERROR_INT("&data not defined", __func__, 1);
1648 if (!psize)
1649 return ERROR_INT("&size not defined", __func__, 1);
1650 if (!dewa)
1651 return ERROR_INT("dewa not defined", __func__, 1);
1652
1653 #if HAVE_FMEMOPEN
1654 if ((fp = open_memstream((char **)pdata, psize)) == NULL)
1655 return ERROR_INT("stream not opened", __func__, 1);
1656 ret = dewarpaWriteStream(fp, dewa);
1657 fputc('\0', fp);
1658 fclose(fp);
1659 if (*psize > 0) *psize = *psize - 1;
1660 #else
1661 L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__);
1662 #ifdef _WIN32
1663 if ((fp = fopenWriteWinTempfile()) == NULL)
1664 return ERROR_INT("tmpfile stream not opened", __func__, 1);
1665 #else
1666 if ((fp = tmpfile()) == NULL)
1667 return ERROR_INT("tmpfile stream not opened", __func__, 1);
1668 #endif /* _WIN32 */
1669 ret = dewarpaWriteStream(fp, dewa);
1670 rewind(fp);
1671 *pdata = l_binaryReadStream(fp, psize);
1672 fclose(fp);
1673 #endif /* HAVE_FMEMOPEN */
1674 return ret;
1675 }