comparison mupdf-source/source/fitz/deskew.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24
25 //#define DEBUG_DESKEWER
26
27 //int errprintf_nomem(const char *string, ...);
28 //#define printf errprintf_nomem
29
30 /* We up the mallocs by a small amount to allow for SSE
31 * reading overruns. */
32 #define SSE_SLOP 16
33
34 /* Some notes on the theory.
35 *
36 * It is "well known" that a rotation (of between -90 and 90
37 * degrees at least) can be performed as a process of 3 shears,
38 * one on X, one on Y, one on X. For small angles (such as those
39 * found when deskewing scanned pages), we can do it in just 2;
40 * one on X, then one on Y.
41 *
42 * The standard rotation matrix is:
43 *
44 * R = ( cos(t) -sin(t) )
45 * ( sin(t) cos(t) )
46 *
47 * We can use the following decomposition (thanks to Michael
48 * Vrhel for pointing this out):
49 *
50 * = ( 1 0 ) ( cos(t) -sin(t) )
51 * ( tan(t) 1/cos(t) ) ( 0 1 )
52 *
53 * (noting that tan(t) = sin(t)/cos(t), and sin(t).sin(t)+cos(t)cos(t) = 1)
54 *
55 * = ( 1 0 ) ( 1 0 ) ( 1 -tan(t) ) ( cos(t) 0 )
56 * ( 0 1/cos(t) ) ( sin(t) 1 ) ( 0 1 ) ( 0 1 )
57 *
58 * = scale(Y) . sheer(Y) . sheer(X) . scale(X)
59 *
60 * The process we use to do sub-pixel capable shears allows us to
61 * incorporate 1 dimensional scales for free. If the expected user of
62 * this API is a scanner driver correcting for small errors in document
63 * alignment, let's consider that they may also want to expand/reduce
64 * at the same time.
65 *
66 * I'm not sure whether pre or post scales will be most useful, so
67 * let's do the maths allowing for both, as it'll end up as no
68 * more work.
69 *
70 * Our transformation pipeline can then be naively expressed as:
71 *
72 * (u') = (post_x 0 ) (1 0) (1 0) (1 c) (r 0) (pre_x 0 ) (u)
73 * (v') (0 post_y) (0 R) (b 1) (0 1) (0 1) (0 pre_y) (v)
74 *
75 * where b = sin(t), c = -tan(t), R = 1/cos(t), r = cos(t).
76 *
77 * Rearranging gives us that:
78 *
79 * (u') = (post_x 0 ) (1 0) (1 c) (r.pre_x 0 ) (u)
80 * (v') (0 post_y.R) (b 1) (0 1) (0 pre_y) (v)
81 *
82 * So, let X = post_x, Y = post_y.R, x = r.pre_x, y = pre_y and
83 * we have:
84 *
85 * (u') = (X 0) (1 0) (1 c) (x 0) (u)
86 * (v') (0 Y) (b 1) (0 1) (0 y) (v)
87 *
88 * We need each step in the operation to be of the form:
89 *
90 * (C D) for X and (1 0) for Y
91 * (0 1) (E F)
92 *
93 * for some constants C, D, E and F to work with our 1-d scale/shear
94 * mechanism. So, rearrange the pipeline:
95 *
96 * P = (X 0) (1 0) (1 c) (x 0)
97 * (0 Y) (b 1) (0 1) (0 y)
98 *
99 * = (X 0) (x cy)
100 * (bY Y) (0 y )
101 *
102 * = (1 0) (X 0) (1 0) (x cy)
103 * (bY/X Y) (0 1) (0 y) (0 1 )
104 *
105 * = (1 0) (1 0) (X 0) (x cy)
106 * (bY/X Y) (0 y) (0 1) (0 1 )
107 *
108 * = (1 0 ) (xX cyX) = (xX cyX ) = (xX cyX )
109 * (bY/X yY) (0 1 ) (bxY bcyY + yY) (bxY yY(1+bc))
110 *
111 * The first scale/shear involves us taking an input line of data,
112 * and scaling it into temporary storage. Every line of data supplied
113 * has the same scale done to it, but at a different X offset. We
114 * will therefore have to generate sets of weights for several different
115 * subpixel positions, and pick the appropriate one. 4 or 8 should
116 * be enough?
117 *
118 * For the second scale/shear, we don't run down individual scan-columns.
119 * Instead, we use a block of lines (the output from the first phase) and
120 * traverse through it diagonally, copying data out - effectively
121 * producing 1 pixel output for each of the 'scan columns'.
122 *
123 * This gives us a block of scanlines that we can apply the third
124 * shear to.
125 *
126 * Let us now consider the output position of each of the corners of
127 * our src_w * src_h input rectangle under this transformation. For
128 * simplicity of notation, let's call these just 'w' and 'h'.
129 *
130 * We know that by the properties of 2x2 matrices, (0, 0) maps to
131 * (0, 0) and (w, h) maps the the sum of where (w,0) (0,h) map to,
132 * so we only need calculate the image of 2 corners.
133 *
134 * (1 0 ) (xX cyX) (w 0)
135 * (bY/X Yy) (0 1 ) (0 h)
136 *
137 * = (1 0 ) (wxX chyX)
138 * (bY/X Yy) (0 h )
139 *
140 * = (wxX chyX )
141 * (bwxY cbhYy + hYy)
142 *
143 * = (C E)
144 * (D F)
145 *
146 * where C = wxX
147 * D = bwxY
148 * E = chyX
149 * F = cbhYy + hYy = hYy (1 + cb)
150 *
151 * Some ASCII art to illustrate the images after the different
152 * stages:
153 *
154 * For angles 0..90:
155 *
156 * *--x => *----x => __.x
157 * | | \ \ *' \
158 * o--+ o----+ \ __.+
159 * o'
160 *
161 * For angles 0 to -90:
162 *
163 * *--x => *----x => *.__
164 * | | / / / 'x
165 * o--+ o----+ o.__ /
166 * '+
167 *
168 * How wide does temporary buffer 1 (where the results of the first
169 * scale/shear are stored) need to be?
170 *
171 * From the above diagrams we can see that for angles 0..90 it needs
172 * to be wide enough to stretch horizontally from * to +. i.e. wxX + chyX
173 *
174 * Similarly for angles 0..-90, it needs to be wide enough to stretch
175 * horizontally from o to x. i.e. wxX - chyX
176 *
177 * Given the properties of tan, we can see that it's wxX + |c|hyX in
178 * all cases.
179 *
180 *
181 * How tall is the image after the first scale/shear?
182 *
183 * Well, the height doesn't change, so src_h.
184 *
185 *
186 * How tall does temporary buffer 1 (where the results of the first
187 * scale/shear are stored) need to be?
188 *
189 * Consider the second stage. One of the scanlines produced will be
190 * made by reading in a line of pixels from (0, 0) to (wxX+|c|hyX, i)
191 * and outputting a horizontal line of pixels in the result. What is
192 * the value i?
193 *
194 * We know:
195 *
196 * (1 0 ) (wxX+|c|hyX) = (wx+|c|hy)
197 * (bY/X Yy) (i ) (0 )
198 *
199 * So: bY/X*(wxX+|c|hyX)) + iYy = 0
200 * bY *(wx +|c|hy )) + iYy = 0
201 *
202 * i = -b*(wx+|c|hy)/y (for non-zero Y and y)
203 * = -b*(tmp1_width)/Xy (for non-zero X, Y and y)
204 *
205 * (Sanity check: when t=0, b = 0, no lines needed. As the x
206 * scale increases, we need more lines. Seems sane.)
207 *
208 *
209 * How wide is the image after the second scale/shear?
210 *
211 * Well, the shear on Y doesn't change the width, so it's still
212 * wxX + |c|hyX.
213 *
214 *
215 * How tall is the entire image after second scale/shear?
216 *
217 * For angles 0 to 90, it's the difference in Y between o and x.
218 * i.e. cbhYy + hYy - bwxY
219 *
220 * For angles 0 to -90, it's the difference in Y between + and *.
221 * i.e. cbhYy + hYy + bwxY
222 *
223 * So cbhYy + hYy + |b|wxY in all cases.
224 * = Y*((1+cb)*hy + |b|wx)
225 *
226 *
227 * What is the size of the final image?
228 *
229 * W = wxX + |c|hyX
230 * H = cbhYy + hYy + bwxY
231 *
232 */
233
234
235 /*
236 Border handling:
237
238 We offer 3 styles of border handling, options 0,1,2.
239
240 0: Increase the size of the output image from that of the input image. No pixels are dropped, and new border pixels are generated with the given backdrop color.
241 This is the default, and the safest way to deskew as no pixels are lost.
242
243 1: Keep the size of the page content constant (modulo pre and post scales). Some pixels are dropped, and new border pixels are generated with the given backdrop color.
244
245 2: Decrease the size of the output image from that of the input image. Pixels are dropped. No new border pixels should be generated. If the centre of the input is
246 correct, then this should exactly return the correct unskewed original page.
247
248 */
249
250 #define WEIGHT_SHIFT 12
251 #define WEIGHT_SCALE (1<<WEIGHT_SHIFT)
252 #define WEIGHT_ROUND (1<<(WEIGHT_SHIFT-1))
253
254 /* Auxiliary structures. */
255 typedef struct
256 {
257 uint32_t index; /* index of first element in list of */
258 /* contributors */
259 uint16_t n; /* number of contributors */
260 /* (not multiplied by stride) */
261 uint8_t slow; /* Flag */
262 int32_t first_pixel; /* offset of first value in source data */
263 int32_t last_pixel; /* last pixel number */
264 } index_t;
265
266 #if ARCH_HAS_NEON
267 typedef int16_t weight_t;
268 #else
269 typedef int32_t weight_t;
270 #endif
271
272 typedef void (zoom_y_fn)(uint8_t * dst,
273 const uint8_t * __restrict tmp,
274 const index_t * __restrict index,
275 const weight_t * __restrict weights,
276 uint32_t width,
277 uint32_t channels,
278 uint32_t mod,
279 int32_t y);
280 typedef void (zoom_x_fn)(uint8_t * __restrict tmp,
281 const uint8_t * __restrict src,
282 const index_t * __restrict index,
283 const weight_t * __restrict weights,
284 uint32_t dst_w,
285 uint32_t src_w,
286 uint32_t channels,
287 const uint8_t * __restrict bg);
288
289 #define CLAMP(v, mn, mx)\
290 (v < mn ? mn : v > mx ? mx : v)
291
292 /* Include the cores */
293 #include "deskew_c.h"
294
295 #if ARCH_HAS_SSE
296 #include "deskew_sse.h"
297 #endif
298 #if ARCH_HAS_NEON
299 #include "deskew_neon.h"
300 #endif
301
302 enum {
303 SUB_PIX_X = 4,
304 SUB_PIX_Y = 4,
305 };
306
307 typedef struct
308 {
309 uint32_t channels;
310 uint32_t tmp_w;
311 uint32_t tmp_h;
312 uint32_t src_w;
313 uint32_t src_h;
314 uint32_t dst_w;
315 uint32_t dst_h;
316 uint32_t dst_w_borderless;
317 uint32_t dst_h_borderless;
318 uint32_t skip_l;
319 uint32_t skip_t;
320 index_t *index_x[SUB_PIX_X];
321 weight_t *weights_x[SUB_PIX_X];
322 index_t *index_y[SUB_PIX_Y];
323 weight_t *weights_y[SUB_PIX_Y];
324 uint32_t max_y_weights[SUB_PIX_Y];
325 uint8_t bg[FZ_MAX_COLORS];
326
327 double pre_x_scale;
328 double pre_y_scale;
329 double post_x_scale;
330 double post_y_scale;
331 double beta;
332 double gamma;
333 double chyX;
334 double extent1;
335 double w1;
336 double h2;
337 double diagonal_h;
338 double t_degrees;
339
340 uint32_t pre_fill_y;
341 double start_y;
342
343 zoom_x_fn *zoom_x;
344 zoom_y_fn *zoom_y;
345 } fz_deskewer;
346
347 static void
348 fill_bg(uint8_t *dst, const uint8_t *fill, uint32_t chan, uint32_t len)
349 {
350 while (len--)
351 {
352 memcpy(dst, fill, chan);
353 dst += chan;
354 }
355 }
356
357 #define B (1.0f / 3.0f)
358 #define C (1.0f / 3.0f)
359 static double
360 Mitchell_filter(double t)
361 {
362 double t2 = t * t;
363
364 if (t < 0)
365 t = -t;
366
367 if (t < 1)
368 return ((12 - 9 * B - 6 * C) * (t * t2) +
369 (-18 + 12 * B + 6 * C) * t2 +
370 (6 - 2 * B)) / 6;
371 else if (t < 2)
372 return ((-1 * B - 6 * C) * (t * t2) +
373 (6 * B + 30 * C) * t2 +
374 (-12 * B - 48 * C) * t +
375 (8 * B + 24 * C)) / 6;
376 else
377 return 0;
378 }
379
380 #define FILTER_WIDTH 2
381
382 static void
383 make_x_weights(fz_context *ctx,
384 index_t **indexp,
385 weight_t **weightsp,
386 uint32_t src_w,
387 uint32_t dst_w,
388 double factor,
389 uint32_t offset_f,
390 uint32_t offset_n,
391 int sse_slow)
392 {
393 double squeeze;
394 index_t *index;
395 weight_t *weights;
396 uint32_t i;
397 double offset = ((double)offset_f)/offset_n;
398 uint32_t idx;
399 uint32_t max_weights;
400
401 if (factor <= 1)
402 {
403 squeeze = 1;
404 max_weights = 1 + FILTER_WIDTH * 2;
405 }
406 else
407 {
408 squeeze = factor;
409 max_weights = (uint32_t)ceil(1 + squeeze * FILTER_WIDTH * 2);
410 if (max_weights > 10)
411 {
412 max_weights = 10;
413 squeeze = ((double)max_weights) / (FILTER_WIDTH * 2);
414 }
415 }
416
417 max_weights = (max_weights + 3) & ~3;
418
419 weights = (weight_t *)fz_malloc_aligned(ctx, sizeof(*weights) * max_weights * dst_w + SSE_SLOP, sizeof(weight_t) * 4);
420 memset(weights, 0, sizeof(*weights) * max_weights * dst_w);
421 fz_try(ctx)
422 index = (index_t *)fz_malloc(ctx, sizeof(*index) * dst_w + SSE_SLOP);
423 fz_catch(ctx)
424 {
425 fz_free_aligned(ctx, weights);
426 fz_rethrow(ctx);
427 }
428 *indexp = index;
429 *weightsp = weights;
430
431 idx = 0;
432 for (i = 0; i < dst_w; i++)
433 {
434 /* i is in 0..w (i.e. dst space).
435 * centre, left, right are in 0..src_w (i.e. src_space)
436 */
437 double centre = (i+0.5f)*factor - offset;
438 int32_t left = (int32_t)ceil(centre - squeeze*FILTER_WIDTH);
439 int32_t right = (int32_t)floor(centre + squeeze*FILTER_WIDTH);
440 int32_t j, k;
441
442 if ((centre - left) >= squeeze * FILTER_WIDTH)
443 left++;
444 if ((right - centre) >= squeeze * FILTER_WIDTH)
445 right--;
446
447 /* When we're calculating the second set of X weights, the subpixel adjustment can cause us to
448 * read too far to the right. Adjust for this hackily here. */
449 if (left > (int32_t)src_w) {
450 right -= left - src_w;
451 centre -= left - src_w;
452 left = src_w;
453 }
454
455 assert(right-left+1 <= (int)max_weights && right >= 0 && left <= (int32_t)src_w);
456 index->index = idx;
457 j = left;
458 if (j < 0)
459 {
460 left = -1;
461 weights[idx] = 0;
462 for (; j < 0; j++)
463 {
464 double f = (centre - j) / squeeze;
465 weights[idx] += (int32_t)(Mitchell_filter(f) * WEIGHT_SCALE / squeeze);
466 }
467 idx++;
468 }
469 k = right;
470 if (k > (int32_t)src_w)
471 k = (int32_t)src_w;
472 for (; j <= k; j++)
473 {
474 double f = (centre - j) / squeeze;
475 weights[idx++] = (int32_t)(Mitchell_filter(f) * WEIGHT_SCALE / squeeze);
476 }
477 for (; j < right; j++)
478 {
479 double f = (centre - j) / squeeze;
480 weights[idx-1] += (int32_t)(Mitchell_filter(f) * WEIGHT_SCALE / squeeze);
481 }
482 index->first_pixel = left;
483 index->last_pixel = k;
484 index->n = k-left+1;
485 index->slow = left < 0 || k >= (int32_t)src_w;
486 if (left + sse_slow > (int)src_w)
487 index->slow = 1;
488 idx = (idx + 3) & ~3;
489 index++;
490 }
491 }
492
493 /* The calculations here are different.
494 * We move from offset...offset+h1 in w steps.
495 * At each point, we calculate the weights vertically
496 * with a scale factor of dst_h/src_h.
497 */
498 static void
499 make_y_weights(fz_context *ctx,
500 index_t **indexp,
501 weight_t **weightsp,
502 uint32_t *max_weightsp,
503 uint32_t dst_w,
504 double factor,
505 double factor2,
506 uint32_t offset_f,
507 uint32_t offset_n,
508 uint32_t h)
509 {
510 double squeeze;
511 index_t *index;
512 weight_t *weights;
513 uint32_t i;
514 double offset = ((double)offset_f)/offset_n;
515 uint32_t idx;
516 uint32_t max_weights;
517
518 if (factor >= 1)
519 {
520 squeeze = 1;
521 max_weights = 1 + FILTER_WIDTH * 2;
522 }
523 else
524 {
525 squeeze = 1/factor;
526 max_weights = (uint32_t)ceil(squeeze * FILTER_WIDTH * 2);
527 if (max_weights > 10)
528 {
529 max_weights = 10;
530 squeeze = ((double)max_weights) / (FILTER_WIDTH * 2);
531 }
532 }
533
534 max_weights = (max_weights + 3) & ~3;
535
536 /* Ensure that we never try to access before 0 */
537 offset += (double)FILTER_WIDTH/squeeze;
538
539 weights = (weight_t *)fz_malloc_aligned(ctx, sizeof(*weights) * max_weights * dst_w, sizeof(weight_t) * 4);
540 fz_try(ctx)
541 index = (index_t *)fz_malloc(ctx, sizeof(*index) * dst_w);
542 fz_catch(ctx)
543 {
544 fz_free_aligned(ctx, weights);
545 fz_rethrow(ctx);
546 }
547 *indexp = index;
548 *weightsp = weights;
549 *max_weightsp = max_weights;
550
551 if (factor2 < 0)
552 offset -= (dst_w-1) * factor2;
553
554 idx = 0;
555 for (i = 0; i < dst_w; i++)
556 {
557 /* i is in 0..dst_w (i.e. dst space).
558 * centre, left, right are in 0..src_h (i.e. src_space)
559 */
560 double centre = (i+0.5f)*factor2 + offset;
561 int32_t left = (int32_t)ceil(centre - squeeze*FILTER_WIDTH);
562 int32_t right = (int32_t)floor(centre + squeeze*FILTER_WIDTH);
563 int32_t j;
564
565 if ((centre - left) >= squeeze * FILTER_WIDTH)
566 left++;
567 if ((right - centre) >= squeeze * FILTER_WIDTH)
568 right--;
569
570 assert(right-left+1 <= (int)max_weights);
571 index->index = idx;
572 for (j = left; j <= right; j++)
573 {
574 double f = (centre - j) / squeeze;
575 weights[idx++] = (int32_t)(Mitchell_filter(f) * WEIGHT_SCALE / squeeze);
576 }
577 index->last_pixel = right;
578 index->n = right-left+1;
579 if (left < 0)
580 left += h;
581 index->first_pixel = left;
582 index->slow = 0;
583 index++;
584 idx = (idx + 3) & ~3;
585 }
586 }
587
588 #ifdef DEBUG_DESKEWER
589 static void
590 dump_weights(const index_t *index,
591 const weight_t *weights,
592 uint32_t w,
593 const char *str)
594 {
595 uint32_t i;
596
597 printf("%s weights:\n", str);
598 for (i = 0; i < w; i++)
599 {
600 uint32_t j;
601 int32_t sum = 0;
602 uint32_t n = index[i].n;
603 uint32_t idx = index[i].index;
604 printf(" %d: %d->%d:", i, index[i].first_pixel, index[i].last_pixel);
605 for (j = 0; j < n; j++)
606 {
607 sum += weights[idx];
608 printf(" %x", weights[idx++]);
609 }
610 printf(" (%x)\n", sum);
611 }
612 }
613 #endif
614
615 static void
616 rejig_for_zoom_y1(fz_context *ctx, fz_deskewer *deskewer)
617 {
618 int i, k;
619 uint32_t j, z;
620
621 for (i = 0; i < SUB_PIX_Y; i++)
622 {
623 uint32_t num_w = deskewer->dst_w;
624 weight_t *new_weights = fz_malloc_aligned(ctx, num_w * deskewer->max_y_weights[i] *
625 sizeof(weight_t) * 4, sizeof(weight_t) * 4);
626 index_t *index;
627 weight_t *weights;
628
629 index = deskewer->index_y[i];
630 weights = deskewer->weights_y[i];
631 for (j = 0; j < num_w; j++)
632 {
633 weight_t *neww = new_weights + (index->index * 4);
634 uint32_t n = index[0].n;
635 for (z = 0; z < n; z++)
636 neww[4 * z] = weights[index->index + z];
637 for (k = 1; k < 4 && j + k < num_w; k++)
638 {
639 if (index[0].n != index[k].n)
640 break;
641 if (index[0].first_pixel != index[k].first_pixel)
642 break;
643 for (z = 0; z < n; z++)
644 neww[k + 4 * z] = weights[index[k].index + z];
645 }
646 /* So, we can do j to j+k-1 (inclusive) all in one hit. */
647 index->slow = k;
648 index++;
649 }
650 fz_free_aligned(ctx, deskewer->weights_y[i]);
651 deskewer->weights_y[i] = new_weights;
652 }
653 }
654
655 static void
656 fz_drop_deskewer(fz_context *ctx, fz_deskewer *deskewer)
657 {
658 int i;
659
660 if (!deskewer)
661 return;
662
663 for (i = 0; i < SUB_PIX_X; i++)
664 {
665 fz_free(ctx, deskewer->index_x[i]);
666 fz_free_aligned(ctx, deskewer->weights_x[i]);
667 }
668 for (i = 0; i < SUB_PIX_Y; i++)
669 {
670 fz_free(ctx, deskewer->index_y[i]);
671 fz_free_aligned(ctx, deskewer->weights_y[i]);
672 }
673
674 fz_free(ctx, deskewer);
675 }
676
677 static fz_deskewer *
678 fz_new_deskewer(fz_context *ctx,
679 unsigned int src_w, unsigned int src_h,
680 unsigned int *dst_w, unsigned int *dst_h,
681 double t_degrees, int border,
682 double pre_x_scale, double pre_y_scale,
683 double post_x_scale, double post_y_scale,
684 unsigned char *bg,
685 unsigned int channels)
686 {
687 fz_deskewer *deskewer;
688 double w1, h2, extent1, beta, gamma, chyX, one_plus_cb;
689 double diagonal_h, h2_div_Y;
690 int i;
691
692 #define SIMD_SWITCH_CONDITION 1
693
694 #if ARCH_HAS_SSE
695 #define SIMD_SWITCH(A,B,C) (SIMD_SWITCH_CONDITION ? (A) : (C))
696 #elif ARCH_HAS_NEON
697 #define SIMD_SWITCH(A,B,C) (SIMD_SWITCH_CONDITION ? (B) : (C))
698 #else
699 #define SIMD_SWITCH(A,B,C) (C)
700 #endif
701
702 deskewer = fz_malloc_struct(ctx, fz_deskewer);
703
704 deskewer->channels = channels;
705
706 /* Rotation coefficients */
707 gamma = -tan(t_degrees * M_PI / 180);
708 beta = sin(t_degrees * M_PI / 180);
709
710 /* After the first shear/scale, the image will be w1 x src_h in size. */
711 chyX = gamma * src_h * pre_y_scale * post_x_scale;
712 extent1 = pre_x_scale * post_x_scale * src_w;
713 w1 = extent1 + fabs(chyX);
714 diagonal_h = fabs(beta) * w1 / pre_y_scale / post_x_scale;
715
716 /* After the second shear/scale, the image will be w1 x h2 in size. */
717
718 /* Calculate the size of the destination buffer */
719 one_plus_cb = 1 + gamma * beta;
720 h2_div_Y = src_h * pre_y_scale * one_plus_cb + fabs(src_w * pre_x_scale * beta);
721 h2 = post_y_scale * h2_div_Y;
722
723 deskewer->tmp_h = (uint32_t)ceil(diagonal_h) + FILTER_WIDTH*2 + 1;
724 if (deskewer->tmp_h == 0) /* Allow for the zero degree case */
725 deskewer->tmp_h = 1;
726 deskewer->tmp_w = (uint32_t)ceil(extent1)+1; /* +1 allows for subpixel positioning */
727 deskewer->src_w = src_w;
728 deskewer->src_h = src_h;
729 deskewer->dst_w = (uint32_t)ceil(w1);
730 deskewer->dst_h = (uint32_t)ceil(h2);
731 deskewer->pre_x_scale = pre_x_scale;
732 deskewer->pre_y_scale = pre_y_scale;
733 deskewer->post_x_scale= post_x_scale;
734 deskewer->post_y_scale= post_y_scale;
735 deskewer->beta = beta;
736 deskewer->gamma = gamma;
737 deskewer->chyX = chyX;
738 deskewer->extent1 = extent1;
739 deskewer->w1 = w1;
740 deskewer->diagonal_h = diagonal_h;
741 deskewer->t_degrees = t_degrees;
742 deskewer->h2 = h2;
743 deskewer->skip_l = 0;
744 deskewer->skip_t = 0;
745 switch(border)
746 {
747 case 2:
748 {
749 int w = (int)(src_w * pre_x_scale * post_x_scale + 0.5);
750 int h = (int)(src_h * pre_y_scale * post_y_scale + 0.5);
751 deskewer->skip_l = (deskewer->dst_w - w);
752 deskewer->skip_t = (deskewer->dst_h - h);
753 *dst_w = w - deskewer->skip_l;
754 *dst_h = h - deskewer->skip_t;
755 break;
756 }
757 case 1:
758 {
759 int expected_w = (int)(src_w * pre_x_scale * post_x_scale + 0.5);
760 int expected_h = (int)(src_h * pre_y_scale * post_y_scale + 0.5);
761 deskewer->skip_l = (deskewer->dst_w - expected_w + 1) >> 1;
762 deskewer->skip_t = (deskewer->dst_h - expected_h + 1) >> 1;
763 *dst_w = expected_w;
764 *dst_h = expected_h;
765 break;
766 }
767 default:
768 case 0:
769 *dst_w = deskewer->dst_w;
770 *dst_h = deskewer->dst_h;
771 break;
772 }
773 deskewer->dst_w_borderless = *dst_w;
774 deskewer->dst_h_borderless = *dst_h;
775
776 fz_try(ctx)
777 {
778 memcpy(deskewer->bg, bg, channels);
779
780 /*
781 * Once we have scaled/sheared lines into the tmp1, we have
782 * data such as:
783 *
784 * -ve angles +ve angles
785 * +------+ or +-----+
786 * / / \ \
787 * / / \ \
788 * / / \ \
789 * / / \ \
790 * +-----+ +-----+
791 *
792 * We then need to copy data out into the output. This is done by
793 * reading a series of parallel diagonal lines out. The first
794 * one is as shown here:
795 *
796 * _. or ._ ----
797 * _.-' '-._ } pre fill region
798 * _.+-----+ +-----+._ ----
799 * .-' / / \ \ '-. ____ } lines of data required before we can start
800 * / / \ \
801 * / / \ \
802 * / / \ \
803 * +-----+ +-----+
804 *
805 * We can see that we need to fill the temporary buffer with some
806 * empty lines to start with.
807 *
808 * The total Y extent of the diagonal line has been calculated as
809 * diagonal_h already. The length of the horizontal edge of the data
810 * region is extent1, and the remainder of the width is |c|hyX.
811 *
812 * Thus we need diagonal_h * extent1 / (extent1 + |c|hyX) in the pre
813 * fill region. We can generate our first line out once we have
814 * h1 lines in.
815 */
816
817 deskewer->pre_fill_y = ((uint32_t)ceil(diagonal_h*extent1/w1) + FILTER_WIDTH*2);
818 deskewer->start_y = diagonal_h - deskewer->pre_fill_y;
819
820 /* Each line we scale into the destination starts at a different
821 * X position. We hold that as x2.
822 *
823 * We calculated earlier where the corners of our source rectangle
824 * were mapped to to give us the smallest dst_w x dst_h.
825 * dst_w = X * (wx + hy|c|)
826 * dst_h = Y * (hy(1+cb) + |b|wx)
827 */
828
829 for (i = 0; i < SUB_PIX_X; i++)
830 {
831 make_x_weights(ctx,
832 &deskewer->index_x[i],
833 &deskewer->weights_x[i],
834 src_w,
835 deskewer->tmp_w,
836 (double)src_w / extent1,
837 i, SUB_PIX_X,
838 (16+channels-1)/channels);
839 #ifdef DEBUG_DESKEWER
840 {
841 char text[16];
842 sprintf(text, "X[%d]", i);
843 dump_weights(deskewer->index_x[i],
844 deskewer->weights_x[i],
845 deskewer->tmp_w,
846 text);
847 }
848 #endif
849 }
850
851 for (i = 0; i < SUB_PIX_Y; i++)
852 {
853 make_y_weights(ctx,
854 &deskewer->index_y[i],
855 &deskewer->weights_y[i],
856 &deskewer->max_y_weights[i],
857 deskewer->dst_w,
858 post_y_scale * pre_y_scale,
859 (t_degrees >= 0 ? diagonal_h : -diagonal_h) / w1,
860 i, SUB_PIX_Y,
861 deskewer->tmp_h);
862 #ifdef DEBUG_DESKEWER
863 {
864 char text[16];
865 sprintf(text, "Y[%d]", i);
866 dump_weights(deskewer->index_y[i],
867 deskewer->weights_y[i],
868 deskewer->dst_w,
869 text);
870 }
871 #endif
872 }
873
874 switch (channels)
875 {
876 case 1:
877 deskewer->zoom_x = SIMD_SWITCH(zoom_x1_sse, zoom_x1_neon, zoom_x1);
878 deskewer->zoom_y = SIMD_SWITCH(zoom_y1_sse, zoom_y1_neon, zoom_y1);
879 break;
880 case 3:
881 deskewer->zoom_x = SIMD_SWITCH(zoom_x3_sse, zoom_x3_neon, zoom_x3);
882 deskewer->zoom_y = SIMD_SWITCH(zoom_y3_sse, zoom_y3_neon, zoom_y3);
883 break;
884 case 4:
885 deskewer->zoom_x = SIMD_SWITCH(zoom_x4_sse, zoom_x4_neon, zoom_x4);
886 deskewer->zoom_y = SIMD_SWITCH(zoom_y4_sse, zoom_y4_neon, zoom_y4);
887 break;
888 default:
889 deskewer->zoom_x = zoom_x;
890 deskewer->zoom_y = zoom_y;
891 break;
892 }
893
894 if (channels == 1)
895 rejig_for_zoom_y1(ctx, deskewer);
896 }
897 fz_catch(ctx)
898 {
899 fz_drop_deskewer(ctx, deskewer);
900 fz_rethrow(ctx);
901 }
902
903 return deskewer;
904 }
905
906 /*
907 Our overall forward transform is:
908
909 (u') = (1 0 ) (xX cyX) (u)
910 (v') (bY/X yY) (0 1 ) (v)
911
912 (right hand one is the X shear, left hand one is the Y shear).
913
914 (u') = (xX cyX ) (u)
915 (v') (bxY yY(1+bc)) (v)
916
917 So, inverse...
918
919 (u) = 1/det . (yY(1+bc) -cyX) (u')
920 (v) (-bxY xX ) (v')
921
922 where det = (xyXY.(1+bc) - cyX.bxY) = xyXY(1+bc - bc) = xyXY
923
924 So inverse...
925
926 (u) = ((1+bc)/xX -c/xY) (u')
927 (v) (-b/yX 1/yY) (v')
928
929 Sanity check:
930
931 ((1+bc)/xX -c/xY) (xX cyX ) = ((1+bc) - bc cy(1+bc)/x - cy(1+bc)/x) = (1 0)
932 (-b/yX 1/yY) (bxY yY(1+bc)) (-bx/y + bx/y -bc + (1+bc) ) (0 1)
933 */
934
935 typedef struct fz_deskewer_bander_s {
936 fz_deskewer *deskewer;
937 double diag_y;
938 double diag_dy;
939 uint32_t in_y;
940 uint32_t out_y;
941 uint32_t tmp_width;
942 uint8_t *tmp;
943 uint32_t tmp_size;
944 double tmp_x;
945 double tmp_dx;
946 unsigned int dst_x0;
947 unsigned int dst_x1;
948 unsigned int dst_y0;
949 } fz_deskewer_bander;
950
951 static fz_deskewer_bander *
952 fz_new_deskewer_band(fz_context *ctx,
953 fz_deskewer *deskewer,
954 unsigned int src_y0,
955 unsigned int dst_y0)
956 {
957 fz_deskewer_bander *bander = fz_malloc_struct(ctx, fz_deskewer_bander);
958 bander->deskewer = deskewer;
959
960 bander->tmp_width = deskewer->dst_w_borderless;
961 bander->tmp_size = bander->tmp_width * deskewer->channels * deskewer->tmp_h;
962 bander->dst_x0 = deskewer->skip_l;
963 bander->dst_x1 = deskewer->dst_w_borderless + deskewer->skip_l;
964 bander->dst_y0 = dst_y0;
965
966 fz_try(ctx)
967 bander->tmp = (uint8_t *)fz_malloc(ctx, bander->tmp_size + SSE_SLOP);
968 fz_catch(ctx)
969 {
970 fz_free(ctx, bander);
971 fz_rethrow(ctx);
972 }
973
974 bander->dst_y0 += deskewer->skip_t;
975
976 /* Each line we scale into tmp starts at a different x position.
977 * We hold that as x. Over src_h lines, we want to move from
978 * 0 to chy (or chy to 0). */
979 bander->tmp_x = (deskewer->chyX >= 0 ? deskewer->chyX : 0);
980 bander->tmp_dx = -deskewer->chyX/deskewer->src_h;
981 /* Do a half step */
982 bander->tmp_x += bander->tmp_dx/2;
983 bander->tmp_x += src_y0 * bander->tmp_dx;
984
985 bander->diag_y = FILTER_WIDTH-(double)deskewer->pre_fill_y;
986 bander->diag_dy = (deskewer->src_h + deskewer->diagonal_h * (2 * deskewer->extent1 / deskewer->w1 - 1)) / deskewer->h2;
987 /* Do a half step on y */
988 bander->diag_y += bander->diag_dy/2;
989 bander->diag_y += bander->dst_y0 * bander->diag_dy;
990
991 /* diag_y = Where we start to read the diagonal line from */
992 /* in_y = All lines < this have been written into tmp. */
993 /* out_y = All lines smaller than this have been written out */
994
995 /* The first source line we have is src_y0. After the X skew this will still be src_y0.
996 * The Y skew will mean that this line extends across a range of output scanlines.
997 * Find the range of scanlines that we touch. */
998 bander->in_y = src_y0;
999 /* We need to fill the lines up to and including tmp_y with the background color. */
1000 {
1001 int first_y = (bander->in_y - deskewer->pre_fill_y + deskewer->tmp_h) % deskewer->tmp_h;
1002 int count = deskewer->pre_fill_y;
1003 if (first_y + count > (int)deskewer->tmp_h)
1004 {
1005 int s = deskewer->tmp_h - first_y;
1006 fill_bg(bander->tmp + first_y * bander->tmp_width * deskewer->channels, deskewer->bg, deskewer->channels, bander->tmp_width * s);
1007 first_y = 0;
1008 count -= s;
1009 }
1010 fill_bg(bander->tmp + first_y * bander->tmp_width * deskewer->channels, deskewer->bg, deskewer->channels, bander->tmp_width * count);
1011 }
1012
1013 bander->out_y = dst_y0;
1014
1015 return bander;
1016 }
1017
1018 static int
1019 fz_deskewer_band_pull(fz_deskewer_bander *bander,
1020 unsigned char *dst)
1021 {
1022 /* If we have enough data to make a new y2 line, do so. */
1023 double y = bander->diag_y + 1.0/(2 * SUB_PIX_Y);
1024 int iy = (int)floor(y);
1025 int which_y = (int)floor((y - iy) * SUB_PIX_Y);
1026 fz_deskewer *deskewer = bander->deskewer;
1027
1028 /* Ensure we have enough input lines to generate an output one. */
1029 while (1)
1030 {
1031 /* Which source line do we need to have to generate the next destination line? */
1032 int need_y = deskewer->index_y[which_y][deskewer->t_degrees >= 0 ? bander->dst_x1 - 1 : bander->dst_x0].last_pixel + iy;
1033
1034 /* Have we got enough lines? */
1035 if ((int)bander->in_y >= need_y)
1036 break; /* Yes, break out of the loop to process it. */
1037
1038 /* No. Can we ask the caller for one? */
1039 if (bander->in_y < deskewer->src_h)
1040 /* We are still in the range where we can ask for one. */
1041 return 0;
1042
1043 /* Fill in a line with background pixels. */
1044 fill_bg(&bander->tmp[bander->tmp_width * deskewer->channels * (bander->in_y % deskewer->tmp_h)],
1045 deskewer->bg, deskewer->channels, bander->tmp_width);
1046 bander->in_y++;
1047 }
1048
1049 deskewer->zoom_y(dst,
1050 bander->tmp,
1051 &deskewer->index_y[which_y][bander->dst_x0],
1052 deskewer->weights_y[which_y],
1053 bander->tmp_width,
1054 deskewer->channels,
1055 bander->tmp_size,
1056 (iy + deskewer->tmp_h) % deskewer->tmp_h);
1057 bander->out_y++;
1058 bander->diag_y += bander->diag_dy;
1059
1060 return 1;
1061 }
1062
1063 static void
1064 fz_deskewer_band_push(fz_deskewer_bander *bander,
1065 const unsigned char *src)
1066 {
1067 fz_deskewer *deskewer = bander->deskewer;
1068 uint8_t *line = &bander->tmp[bander->tmp_width * deskewer->channels * (bander->in_y % deskewer->tmp_h)];
1069 double x = bander->tmp_x + 1.0 / (SUB_PIX_X * 2);
1070 int xi = (int)floor(x);
1071 int which = (int)floor((x - xi) * SUB_PIX_X);
1072
1073 {
1074 int r = (int)bander->dst_x1;
1075 if (r > xi)
1076 r = xi;
1077 if ((int)bander->dst_x0 < r)
1078 fill_bg(line,
1079 deskewer->bg,
1080 deskewer->channels,
1081 r - (int)bander->dst_x0);
1082 }
1083 {
1084 int l = xi + deskewer->tmp_w - 1; /* Undo the +1 earlier */
1085 if (l < (int)bander->dst_x0)
1086 l = (int)bander->dst_x0;
1087 if (l < (int)bander->dst_x1)
1088 fill_bg(&line[(l - bander->dst_x0) * deskewer->channels],
1089 deskewer->bg,
1090 deskewer->channels,
1091 (int)bander->dst_x1 - l);
1092 }
1093 {
1094 int l = xi;
1095 int r = xi + deskewer->tmp_w;
1096 if (l < (int)bander->dst_x0)
1097 l = (int)bander->dst_x0;
1098 if (r > (int)bander->dst_x1)
1099 r = (int)bander->dst_x1;
1100 if (l < r)
1101 {
1102 #if 0
1103 memcpy(&line[(l - (int)bander->dst_x0) * deskewer->channels],
1104 src,
1105 (r-l) * deskewer->channels);
1106 #else
1107 deskewer->zoom_x(&line[(l - (int)bander->dst_x0) * deskewer->channels],
1108 src,
1109 &deskewer->index_x[which][l - xi],
1110 deskewer->weights_x[which],
1111 r-l,
1112 deskewer->src_w,
1113 deskewer->channels,
1114 deskewer->bg);
1115 #endif
1116 }
1117 }
1118 bander->in_y++;
1119 bander->tmp_x += bander->tmp_dx;
1120 }
1121
1122 static void
1123 fz_drop_deskewer_band(fz_context *ctx, fz_deskewer_bander *bander)
1124 {
1125 if (bander == NULL)
1126 return;
1127
1128 fz_free(ctx, bander->tmp);
1129 fz_free(ctx, bander);
1130 }
1131
1132 static void
1133 fz_deskewer_band(fz_context *ctx,
1134 fz_deskewer *deskewer,
1135 const unsigned char *src,
1136 int src_stride,
1137 unsigned int src_y0,
1138 unsigned int src_y1,
1139 unsigned char *dst,
1140 int dst_stride,
1141 unsigned int dst_y0,
1142 unsigned int dst_y1)
1143 {
1144 fz_deskewer_bander *bander;
1145 int row = 0;
1146
1147 bander = fz_new_deskewer_band(ctx, deskewer, src_y0, dst_y0);
1148
1149 while (dst_y0 < dst_y1)
1150 {
1151 if (fz_deskewer_band_pull(bander, dst + dst_stride * row) == 1)
1152 {
1153 row++;
1154 dst_y0++;
1155 continue;
1156 }
1157 fz_deskewer_band_push(bander, src + src_stride * row);
1158 }
1159
1160 fz_drop_deskewer_band(ctx, bander);
1161 }
1162
1163 fz_pixmap *fz_deskew_pixmap(fz_context *ctx,
1164 fz_pixmap *src,
1165 double degrees,
1166 int border)
1167 {
1168 uint8_t bg[FZ_MAX_COLORS] = { 0 };
1169 uint8_t bg_rgb[FZ_MAX_COLORS] = { 255, 255, 255 };
1170 unsigned int dst_w, dst_h;
1171 fz_deskewer *deskewer = fz_new_deskewer(ctx, src->w, src->h, &dst_w, &dst_h, degrees, border, 1, 1, 1, 1, src->n - src->alpha > 3 ? bg : bg_rgb, src->n);
1172 fz_pixmap *dst = NULL;
1173
1174 fz_var(dst);
1175
1176 fz_try(ctx)
1177 {
1178 dst = fz_new_pixmap(ctx, src->colorspace, dst_w, dst_h, NULL, src->alpha);
1179
1180 fz_deskewer_band(ctx, deskewer, src->samples, src->stride, 0, src->h, dst->samples, dst->stride, 0, dst->h);
1181 }
1182 fz_always(ctx)
1183 {
1184 fz_drop_deskewer(ctx, deskewer);
1185 }
1186 fz_catch(ctx)
1187 {
1188 fz_drop_pixmap(ctx, dst);
1189 fz_rethrow(ctx);
1190 }
1191
1192 return dst;
1193
1194 }