comparison mupdf-source/thirdparty/libjpeg/jidctint.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*
2 * jidctint.c
3 *
4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * Modification developed 2002-2018 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains a slow-but-accurate integer implementation of the
10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11 * must also perform dequantization of the input coefficients.
12 *
13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14 * on each row (or vice versa, but it's more convenient to emit a row at
15 * a time). Direct algorithms are also available, but they are much more
16 * complex and seem not to be any faster when reduced to code.
17 *
18 * This implementation is based on an algorithm described in
19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22 * The primary algorithm described there uses 11 multiplies and 29 adds.
23 * We use their alternate method with 12 multiplies and 32 adds.
24 * The advantage of this method is that no data path contains more than one
25 * multiplication; this allows a very simple and accurate implementation in
26 * scaled fixed-point arithmetic, with a minimal number of shifts.
27 *
28 * We also provide IDCT routines with various output sample block sizes for
29 * direct resolution reduction or enlargement and for direct resolving the
30 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
32 *
33 * For N<8 we simply take the corresponding low-frequency coefficients of
34 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35 * to yield the downscaled outputs.
36 * This can be seen as direct low-pass downsampling from the DCT domain
37 * point of view rather than the usual spatial domain point of view,
38 * yielding significant computational savings and results at least
39 * as good as common bilinear (averaging) spatial downsampling.
40 *
41 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42 * lower frequencies and higher frequencies assumed to be zero.
43 * It turns out that the computational effort is similar to the 8x8 IDCT
44 * regarding the output size.
45 * Furthermore, the scaling and descaling is the same for all IDCT sizes.
46 *
47 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48 * since there would be too many additional constants to pre-calculate.
49 */
50
51 #define JPEG_INTERNALS
52 #include "jinclude.h"
53 #include "jpeglib.h"
54 #include "jdct.h" /* Private declarations for DCT subsystem */
55
56 #ifdef DCT_ISLOW_SUPPORTED
57
58
59 /*
60 * This module is specialized to the case DCTSIZE = 8.
61 */
62
63 #if DCTSIZE != 8
64 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65 #endif
66
67
68 /*
69 * The poop on this scaling stuff is as follows:
70 *
71 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72 * larger than the true IDCT outputs. The final outputs are therefore
73 * a factor of N larger than desired; since N=8 this can be cured by
74 * a simple right shift at the end of the algorithm. The advantage of
75 * this arrangement is that we save two multiplications per 1-D IDCT,
76 * because the y0 and y4 inputs need not be divided by sqrt(N).
77 *
78 * We have to do addition and subtraction of the integer inputs, which
79 * is no problem, and multiplication by fractional constants, which is
80 * a problem to do in integer arithmetic. We multiply all the constants
81 * by CONST_SCALE and convert them to integer constants (thus retaining
82 * CONST_BITS bits of precision in the constants). After doing a
83 * multiplication we have to divide the product by CONST_SCALE, with proper
84 * rounding, to produce the correct output. This division can be done
85 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
86 * as long as possible so that partial sums can be added together with
87 * full fractional precision.
88 *
89 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90 * they are represented to better-than-integral precision. These outputs
91 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92 * with the recommended scaling. (To scale up 12-bit sample data further, an
93 * intermediate INT32 array would be needed.)
94 *
95 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
96 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
97 * shows that the values given below are the most effective.
98 */
99
100 #if BITS_IN_JSAMPLE == 8
101 #define CONST_BITS 13
102 #define PASS1_BITS 2
103 #else
104 #define CONST_BITS 13
105 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
106 #endif
107
108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109 * causing a lot of useless floating-point operations at run time.
110 * To get around this we use the following pre-calculated constants.
111 * If you change CONST_BITS you may want to add appropriate values.
112 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
113 */
114
115 #if CONST_BITS == 13
116 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
117 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
118 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
119 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
120 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
121 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
122 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
123 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
124 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
125 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
126 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
127 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
128 #else
129 #define FIX_0_298631336 FIX(0.298631336)
130 #define FIX_0_390180644 FIX(0.390180644)
131 #define FIX_0_541196100 FIX(0.541196100)
132 #define FIX_0_765366865 FIX(0.765366865)
133 #define FIX_0_899976223 FIX(0.899976223)
134 #define FIX_1_175875602 FIX(1.175875602)
135 #define FIX_1_501321110 FIX(1.501321110)
136 #define FIX_1_847759065 FIX(1.847759065)
137 #define FIX_1_961570560 FIX(1.961570560)
138 #define FIX_2_053119869 FIX(2.053119869)
139 #define FIX_2_562915447 FIX(2.562915447)
140 #define FIX_3_072711026 FIX(3.072711026)
141 #endif
142
143
144 /* Clamp DC value to acceptable range for bug 697186 */
145 #define CLAMP_DC(dcval) \
146 { \
147 if (dcval < -1024) \
148 dcval = -1024; \
149 else if (dcval > 1023) \
150 dcval = 1023; \
151 }
152
153 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
154 * For 8-bit samples with the recommended scaling, all the variable
155 * and constant values involved are no more than 16 bits wide, so a
156 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
157 * For 12-bit samples, a full 32-bit multiplication will be needed.
158 */
159
160 #if BITS_IN_JSAMPLE == 8
161 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
162 #else
163 #define MULTIPLY(var,const) ((var) * (const))
164 #endif
165
166
167 /* Dequantize a coefficient by multiplying it by the multiplier-table
168 * entry; produce an int result. In this module, both inputs and result
169 * are 16 bits or less, so either int or short multiply will work.
170 */
171
172 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
173
174
175 /*
176 * Perform dequantization and inverse DCT on one block of coefficients.
177 *
178 * Optimized algorithm with 12 multiplications in the 1-D kernel.
179 * cK represents sqrt(2) * cos(K*pi/16).
180 */
181
182 GLOBAL(void)
183 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
184 JCOEFPTR coef_block,
185 JSAMPARRAY output_buf, JDIMENSION output_col)
186 {
187 INT32 tmp0, tmp1, tmp2, tmp3;
188 INT32 tmp10, tmp11, tmp12, tmp13;
189 INT32 z1, z2, z3;
190 JCOEFPTR inptr;
191 ISLOW_MULT_TYPE * quantptr;
192 int * wsptr;
193 JSAMPROW outptr;
194 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
195 int ctr;
196 int workspace[DCTSIZE2]; /* buffers data between passes */
197 SHIFT_TEMPS
198
199 /* Pass 1: process columns from input, store into work array.
200 * Note results are scaled up by sqrt(8) compared to a true IDCT;
201 * furthermore, we scale the results by 2**PASS1_BITS.
202 */
203
204 inptr = coef_block;
205 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
206 wsptr = workspace;
207 for (ctr = DCTSIZE; ctr > 0; ctr--) {
208 /* Due to quantization, we will usually find that many of the input
209 * coefficients are zero, especially the AC terms. We can exploit this
210 * by short-circuiting the IDCT calculation for any column in which all
211 * the AC terms are zero. In that case each output is equal to the
212 * DC coefficient (with scale factor as needed).
213 * With typical images and quantization tables, half or more of the
214 * column DCT calculations can be simplified this way.
215 */
216
217 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
218 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
219 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
220 inptr[DCTSIZE*7] == 0) {
221 /* AC terms all zero */
222 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
223 if (ctr == DCTSIZE)
224 CLAMP_DC(dcval);
225 dcval <<= PASS1_BITS;
226 wsptr[DCTSIZE*0] = dcval;
227 wsptr[DCTSIZE*1] = dcval;
228 wsptr[DCTSIZE*2] = dcval;
229 wsptr[DCTSIZE*3] = dcval;
230 wsptr[DCTSIZE*4] = dcval;
231 wsptr[DCTSIZE*5] = dcval;
232 wsptr[DCTSIZE*6] = dcval;
233 wsptr[DCTSIZE*7] = dcval;
234
235 inptr++; /* advance pointers to next column */
236 quantptr++;
237 wsptr++;
238 continue;
239 }
240
241 /* Even part: reverse the even part of the forward DCT.
242 * The rotator is c(-6).
243 */
244
245 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
246 if (ctr == DCTSIZE)
247 CLAMP_DC(z2);
248 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
249 z2 <<= CONST_BITS;
250 z3 <<= CONST_BITS;
251 /* Add fudge factor here for final descale. */
252 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
253
254 tmp0 = z2 + z3;
255 tmp1 = z2 - z3;
256
257 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
258 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
259
260 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
261 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
262 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
263
264 tmp10 = tmp0 + tmp2;
265 tmp13 = tmp0 - tmp2;
266 tmp11 = tmp1 + tmp3;
267 tmp12 = tmp1 - tmp3;
268
269 /* Odd part per figure 8; the matrix is unitary and hence its
270 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
271 */
272
273 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
274 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
275 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
276 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
277
278 z2 = tmp0 + tmp2;
279 z3 = tmp1 + tmp3;
280
281 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
282 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
283 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
284 z2 += z1;
285 z3 += z1;
286
287 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
288 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
289 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
290 tmp0 += z1 + z2;
291 tmp3 += z1 + z3;
292
293 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
294 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
295 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
296 tmp1 += z1 + z3;
297 tmp2 += z1 + z2;
298
299 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
300
301 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
302 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
303 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
304 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
305 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
306 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
307 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
308 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
309
310 inptr++; /* advance pointers to next column */
311 quantptr++;
312 wsptr++;
313 }
314
315 /* Pass 2: process rows from work array, store into output array.
316 * Note that we must descale the results by a factor of 8 == 2**3,
317 * and also undo the PASS1_BITS scaling.
318 */
319
320 wsptr = workspace;
321 for (ctr = 0; ctr < DCTSIZE; ctr++) {
322 outptr = output_buf[ctr] + output_col;
323
324 /* Add range center and fudge factor for final descale and range-limit. */
325 z2 = (INT32) wsptr[0] +
326 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
327 (ONE << (PASS1_BITS+2)));
328
329 /* Rows of zeroes can be exploited in the same way as we did with columns.
330 * However, the column calculation has created many nonzero AC terms, so
331 * the simplification applies less often (typically 5% to 10% of the time).
332 * On machines with very fast multiplication, it's possible that the
333 * test takes more time than it's worth. In that case this section
334 * may be commented out.
335 */
336
337 #ifndef NO_ZERO_ROW_TEST
338 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
339 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
340 /* AC terms all zero */
341 JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
342 & RANGE_MASK];
343
344 outptr[0] = dcval;
345 outptr[1] = dcval;
346 outptr[2] = dcval;
347 outptr[3] = dcval;
348 outptr[4] = dcval;
349 outptr[5] = dcval;
350 outptr[6] = dcval;
351 outptr[7] = dcval;
352
353 wsptr += DCTSIZE; /* advance pointer to next row */
354 continue;
355 }
356 #endif
357
358 /* Even part: reverse the even part of the forward DCT.
359 * The rotator is c(-6).
360 */
361
362 z3 = (INT32) wsptr[4];
363
364 tmp0 = (z2 + z3) << CONST_BITS;
365 tmp1 = (z2 - z3) << CONST_BITS;
366
367 z2 = (INT32) wsptr[2];
368 z3 = (INT32) wsptr[6];
369
370 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
371 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
372 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
373
374 tmp10 = tmp0 + tmp2;
375 tmp13 = tmp0 - tmp2;
376 tmp11 = tmp1 + tmp3;
377 tmp12 = tmp1 - tmp3;
378
379 /* Odd part per figure 8; the matrix is unitary and hence its
380 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
381 */
382
383 tmp0 = (INT32) wsptr[7];
384 tmp1 = (INT32) wsptr[5];
385 tmp2 = (INT32) wsptr[3];
386 tmp3 = (INT32) wsptr[1];
387
388 z2 = tmp0 + tmp2;
389 z3 = tmp1 + tmp3;
390
391 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
392 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
393 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
394 z2 += z1;
395 z3 += z1;
396
397 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
398 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
399 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
400 tmp0 += z1 + z2;
401 tmp3 += z1 + z3;
402
403 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
404 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
405 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
406 tmp1 += z1 + z3;
407 tmp2 += z1 + z2;
408
409 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
410
411 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
412 CONST_BITS+PASS1_BITS+3)
413 & RANGE_MASK];
414 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
415 CONST_BITS+PASS1_BITS+3)
416 & RANGE_MASK];
417 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
418 CONST_BITS+PASS1_BITS+3)
419 & RANGE_MASK];
420 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
421 CONST_BITS+PASS1_BITS+3)
422 & RANGE_MASK];
423 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
424 CONST_BITS+PASS1_BITS+3)
425 & RANGE_MASK];
426 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
427 CONST_BITS+PASS1_BITS+3)
428 & RANGE_MASK];
429 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
430 CONST_BITS+PASS1_BITS+3)
431 & RANGE_MASK];
432 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
433 CONST_BITS+PASS1_BITS+3)
434 & RANGE_MASK];
435
436 wsptr += DCTSIZE; /* advance pointer to next row */
437 }
438 }
439
440 #ifdef IDCT_SCALING_SUPPORTED
441
442
443 /*
444 * Perform dequantization and inverse DCT on one block of coefficients,
445 * producing a reduced-size 7x7 output block.
446 *
447 * Optimized algorithm with 12 multiplications in the 1-D kernel.
448 * cK represents sqrt(2) * cos(K*pi/14).
449 */
450
451 GLOBAL(void)
452 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
453 JCOEFPTR coef_block,
454 JSAMPARRAY output_buf, JDIMENSION output_col)
455 {
456 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
457 INT32 z1, z2, z3;
458 JCOEFPTR inptr;
459 ISLOW_MULT_TYPE * quantptr;
460 int * wsptr;
461 JSAMPROW outptr;
462 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
463 int ctr;
464 int workspace[7*7]; /* buffers data between passes */
465 SHIFT_TEMPS
466
467 /* Pass 1: process columns from input, store into work array. */
468
469 inptr = coef_block;
470 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
471 wsptr = workspace;
472 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
473 /* Even part */
474
475 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
476 if (ctr == 0)
477 CLAMP_DC(tmp13);
478 tmp13 <<= CONST_BITS;
479 /* Add fudge factor here for final descale. */
480 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
481
482 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
483 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
484 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
485
486 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
487 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
488 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
489 tmp0 = z1 + z3;
490 z2 -= tmp0;
491 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
492 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
493 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
494 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
495
496 /* Odd part */
497
498 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
499 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
500 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
501
502 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
503 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
504 tmp0 = tmp1 - tmp2;
505 tmp1 += tmp2;
506 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
507 tmp1 += tmp2;
508 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
509 tmp0 += z2;
510 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
511
512 /* Final output stage */
513
514 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
515 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
516 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
517 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
518 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
519 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
520 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
521 }
522
523 /* Pass 2: process 7 rows from work array, store into output array. */
524
525 wsptr = workspace;
526 for (ctr = 0; ctr < 7; ctr++) {
527 outptr = output_buf[ctr] + output_col;
528
529 /* Even part */
530
531 /* Add range center and fudge factor for final descale and range-limit. */
532 tmp13 = (INT32) wsptr[0] +
533 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
534 (ONE << (PASS1_BITS+2)));
535 tmp13 <<= CONST_BITS;
536
537 z1 = (INT32) wsptr[2];
538 z2 = (INT32) wsptr[4];
539 z3 = (INT32) wsptr[6];
540
541 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
542 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
543 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
544 tmp0 = z1 + z3;
545 z2 -= tmp0;
546 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
547 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
548 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
549 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
550
551 /* Odd part */
552
553 z1 = (INT32) wsptr[1];
554 z2 = (INT32) wsptr[3];
555 z3 = (INT32) wsptr[5];
556
557 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
558 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
559 tmp0 = tmp1 - tmp2;
560 tmp1 += tmp2;
561 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
562 tmp1 += tmp2;
563 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
564 tmp0 += z2;
565 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
566
567 /* Final output stage */
568
569 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
570 CONST_BITS+PASS1_BITS+3)
571 & RANGE_MASK];
572 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
573 CONST_BITS+PASS1_BITS+3)
574 & RANGE_MASK];
575 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
576 CONST_BITS+PASS1_BITS+3)
577 & RANGE_MASK];
578 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
579 CONST_BITS+PASS1_BITS+3)
580 & RANGE_MASK];
581 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
582 CONST_BITS+PASS1_BITS+3)
583 & RANGE_MASK];
584 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
585 CONST_BITS+PASS1_BITS+3)
586 & RANGE_MASK];
587 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
588 CONST_BITS+PASS1_BITS+3)
589 & RANGE_MASK];
590
591 wsptr += 7; /* advance pointer to next row */
592 }
593 }
594
595
596 /*
597 * Perform dequantization and inverse DCT on one block of coefficients,
598 * producing a reduced-size 6x6 output block.
599 *
600 * Optimized algorithm with 3 multiplications in the 1-D kernel.
601 * cK represents sqrt(2) * cos(K*pi/12).
602 */
603
604 GLOBAL(void)
605 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
606 JCOEFPTR coef_block,
607 JSAMPARRAY output_buf, JDIMENSION output_col)
608 {
609 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
610 INT32 z1, z2, z3;
611 JCOEFPTR inptr;
612 ISLOW_MULT_TYPE * quantptr;
613 int * wsptr;
614 JSAMPROW outptr;
615 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
616 int ctr;
617 int workspace[6*6]; /* buffers data between passes */
618 SHIFT_TEMPS
619
620 /* Pass 1: process columns from input, store into work array. */
621
622 inptr = coef_block;
623 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
624 wsptr = workspace;
625 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
626 /* Even part */
627
628 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
629 if (ctr == 0)
630 CLAMP_DC(tmp0);
631 tmp0 <<= CONST_BITS;
632 /* Add fudge factor here for final descale. */
633 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
634 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
635 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
636 tmp1 = tmp0 + tmp10;
637 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
638 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
639 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
640 tmp10 = tmp1 + tmp0;
641 tmp12 = tmp1 - tmp0;
642
643 /* Odd part */
644
645 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
646 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
647 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
648 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
649 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
650 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
651 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
652
653 /* Final output stage */
654
655 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
656 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
657 wsptr[6*1] = (int) (tmp11 + tmp1);
658 wsptr[6*4] = (int) (tmp11 - tmp1);
659 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
660 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
661 }
662
663 /* Pass 2: process 6 rows from work array, store into output array. */
664
665 wsptr = workspace;
666 for (ctr = 0; ctr < 6; ctr++) {
667 outptr = output_buf[ctr] + output_col;
668
669 /* Even part */
670
671 /* Add range center and fudge factor for final descale and range-limit. */
672 tmp0 = (INT32) wsptr[0] +
673 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
674 (ONE << (PASS1_BITS+2)));
675 tmp0 <<= CONST_BITS;
676 tmp2 = (INT32) wsptr[4];
677 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
678 tmp1 = tmp0 + tmp10;
679 tmp11 = tmp0 - tmp10 - tmp10;
680 tmp10 = (INT32) wsptr[2];
681 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
682 tmp10 = tmp1 + tmp0;
683 tmp12 = tmp1 - tmp0;
684
685 /* Odd part */
686
687 z1 = (INT32) wsptr[1];
688 z2 = (INT32) wsptr[3];
689 z3 = (INT32) wsptr[5];
690 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
691 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
692 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
693 tmp1 = (z1 - z2 - z3) << CONST_BITS;
694
695 /* Final output stage */
696
697 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
698 CONST_BITS+PASS1_BITS+3)
699 & RANGE_MASK];
700 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
701 CONST_BITS+PASS1_BITS+3)
702 & RANGE_MASK];
703 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
704 CONST_BITS+PASS1_BITS+3)
705 & RANGE_MASK];
706 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
707 CONST_BITS+PASS1_BITS+3)
708 & RANGE_MASK];
709 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
710 CONST_BITS+PASS1_BITS+3)
711 & RANGE_MASK];
712 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
713 CONST_BITS+PASS1_BITS+3)
714 & RANGE_MASK];
715
716 wsptr += 6; /* advance pointer to next row */
717 }
718 }
719
720
721 /*
722 * Perform dequantization and inverse DCT on one block of coefficients,
723 * producing a reduced-size 5x5 output block.
724 *
725 * Optimized algorithm with 5 multiplications in the 1-D kernel.
726 * cK represents sqrt(2) * cos(K*pi/10).
727 */
728
729 GLOBAL(void)
730 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
731 JCOEFPTR coef_block,
732 JSAMPARRAY output_buf, JDIMENSION output_col)
733 {
734 INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
735 INT32 z1, z2, z3;
736 JCOEFPTR inptr;
737 ISLOW_MULT_TYPE * quantptr;
738 int * wsptr;
739 JSAMPROW outptr;
740 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
741 int ctr;
742 int workspace[5*5]; /* buffers data between passes */
743 SHIFT_TEMPS
744
745 /* Pass 1: process columns from input, store into work array. */
746
747 inptr = coef_block;
748 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
749 wsptr = workspace;
750 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
751 /* Even part */
752
753 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
754 if (ctr == 0)
755 CLAMP_DC(tmp12);
756 tmp12 <<= CONST_BITS;
757 /* Add fudge factor here for final descale. */
758 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
759 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
760 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
761 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
762 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
763 z3 = tmp12 + z2;
764 tmp10 = z3 + z1;
765 tmp11 = z3 - z1;
766 tmp12 -= z2 << 2;
767
768 /* Odd part */
769
770 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
771 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
772
773 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
774 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
775 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
776
777 /* Final output stage */
778
779 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
780 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
781 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
782 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
783 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
784 }
785
786 /* Pass 2: process 5 rows from work array, store into output array. */
787
788 wsptr = workspace;
789 for (ctr = 0; ctr < 5; ctr++) {
790 outptr = output_buf[ctr] + output_col;
791
792 /* Even part */
793
794 /* Add range center and fudge factor for final descale and range-limit. */
795 tmp12 = (INT32) wsptr[0] +
796 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
797 (ONE << (PASS1_BITS+2)));
798 tmp12 <<= CONST_BITS;
799 tmp0 = (INT32) wsptr[2];
800 tmp1 = (INT32) wsptr[4];
801 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
802 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
803 z3 = tmp12 + z2;
804 tmp10 = z3 + z1;
805 tmp11 = z3 - z1;
806 tmp12 -= z2 << 2;
807
808 /* Odd part */
809
810 z2 = (INT32) wsptr[1];
811 z3 = (INT32) wsptr[3];
812
813 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
814 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
815 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
816
817 /* Final output stage */
818
819 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
820 CONST_BITS+PASS1_BITS+3)
821 & RANGE_MASK];
822 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
823 CONST_BITS+PASS1_BITS+3)
824 & RANGE_MASK];
825 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
826 CONST_BITS+PASS1_BITS+3)
827 & RANGE_MASK];
828 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
829 CONST_BITS+PASS1_BITS+3)
830 & RANGE_MASK];
831 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
832 CONST_BITS+PASS1_BITS+3)
833 & RANGE_MASK];
834
835 wsptr += 5; /* advance pointer to next row */
836 }
837 }
838
839
840 /*
841 * Perform dequantization and inverse DCT on one block of coefficients,
842 * producing a reduced-size 4x4 output block.
843 *
844 * Optimized algorithm with 3 multiplications in the 1-D kernel.
845 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
846 */
847
848 GLOBAL(void)
849 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
850 JCOEFPTR coef_block,
851 JSAMPARRAY output_buf, JDIMENSION output_col)
852 {
853 INT32 tmp0, tmp2, tmp10, tmp12;
854 INT32 z1, z2, z3;
855 JCOEFPTR inptr;
856 ISLOW_MULT_TYPE * quantptr;
857 int * wsptr;
858 JSAMPROW outptr;
859 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
860 int ctr;
861 int workspace[4*4]; /* buffers data between passes */
862 SHIFT_TEMPS
863
864 /* Pass 1: process columns from input, store into work array. */
865
866 inptr = coef_block;
867 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
868 wsptr = workspace;
869 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
870 /* Even part */
871
872 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
873 if (ctr == 0)
874 CLAMP_DC(tmp0);
875 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
876
877 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
878 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
879
880 /* Odd part */
881 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
882
883 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
884 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
885
886 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
887 /* Add fudge factor here for final descale. */
888 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
889 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
890 CONST_BITS-PASS1_BITS);
891 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
892 CONST_BITS-PASS1_BITS);
893
894 /* Final output stage */
895
896 wsptr[4*0] = (int) (tmp10 + tmp0);
897 wsptr[4*3] = (int) (tmp10 - tmp0);
898 wsptr[4*1] = (int) (tmp12 + tmp2);
899 wsptr[4*2] = (int) (tmp12 - tmp2);
900 }
901
902 /* Pass 2: process 4 rows from work array, store into output array. */
903
904 wsptr = workspace;
905 for (ctr = 0; ctr < 4; ctr++) {
906 outptr = output_buf[ctr] + output_col;
907
908 /* Even part */
909
910 /* Add range center and fudge factor for final descale and range-limit. */
911 tmp0 = (INT32) wsptr[0] +
912 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
913 (ONE << (PASS1_BITS+2)));
914 tmp2 = (INT32) wsptr[2];
915
916 tmp10 = (tmp0 + tmp2) << CONST_BITS;
917 tmp12 = (tmp0 - tmp2) << CONST_BITS;
918
919 /* Odd part */
920 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
921
922 z2 = (INT32) wsptr[1];
923 z3 = (INT32) wsptr[3];
924
925 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
926 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
927 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
928
929 /* Final output stage */
930
931 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
932 CONST_BITS+PASS1_BITS+3)
933 & RANGE_MASK];
934 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
935 CONST_BITS+PASS1_BITS+3)
936 & RANGE_MASK];
937 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
938 CONST_BITS+PASS1_BITS+3)
939 & RANGE_MASK];
940 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
941 CONST_BITS+PASS1_BITS+3)
942 & RANGE_MASK];
943
944 wsptr += 4; /* advance pointer to next row */
945 }
946 }
947
948
949 /*
950 * Perform dequantization and inverse DCT on one block of coefficients,
951 * producing a reduced-size 3x3 output block.
952 *
953 * Optimized algorithm with 2 multiplications in the 1-D kernel.
954 * cK represents sqrt(2) * cos(K*pi/6).
955 */
956
957 GLOBAL(void)
958 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
959 JCOEFPTR coef_block,
960 JSAMPARRAY output_buf, JDIMENSION output_col)
961 {
962 INT32 tmp0, tmp2, tmp10, tmp12;
963 JCOEFPTR inptr;
964 ISLOW_MULT_TYPE * quantptr;
965 int * wsptr;
966 JSAMPROW outptr;
967 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
968 int ctr;
969 int workspace[3*3]; /* buffers data between passes */
970 SHIFT_TEMPS
971
972 /* Pass 1: process columns from input, store into work array. */
973
974 inptr = coef_block;
975 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
976 wsptr = workspace;
977 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
978 /* Even part */
979
980 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
981 if (ctr == 0)
982 CLAMP_DC(tmp0);
983 tmp0 <<= CONST_BITS;
984 /* Add fudge factor here for final descale. */
985 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
986 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
987 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
988 tmp10 = tmp0 + tmp12;
989 tmp2 = tmp0 - tmp12 - tmp12;
990
991 /* Odd part */
992
993 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
994 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
995
996 /* Final output stage */
997
998 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
999 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1000 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
1001 }
1002
1003 /* Pass 2: process 3 rows from work array, store into output array. */
1004
1005 wsptr = workspace;
1006 for (ctr = 0; ctr < 3; ctr++) {
1007 outptr = output_buf[ctr] + output_col;
1008
1009 /* Even part */
1010
1011 /* Add range center and fudge factor for final descale and range-limit. */
1012 tmp0 = (INT32) wsptr[0] +
1013 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1014 (ONE << (PASS1_BITS+2)));
1015 tmp0 <<= CONST_BITS;
1016 tmp2 = (INT32) wsptr[2];
1017 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
1018 tmp10 = tmp0 + tmp12;
1019 tmp2 = tmp0 - tmp12 - tmp12;
1020
1021 /* Odd part */
1022
1023 tmp12 = (INT32) wsptr[1];
1024 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1025
1026 /* Final output stage */
1027
1028 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1029 CONST_BITS+PASS1_BITS+3)
1030 & RANGE_MASK];
1031 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1032 CONST_BITS+PASS1_BITS+3)
1033 & RANGE_MASK];
1034 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1035 CONST_BITS+PASS1_BITS+3)
1036 & RANGE_MASK];
1037
1038 wsptr += 3; /* advance pointer to next row */
1039 }
1040 }
1041
1042
1043 /*
1044 * Perform dequantization and inverse DCT on one block of coefficients,
1045 * producing a reduced-size 2x2 output block.
1046 *
1047 * Multiplication-less algorithm.
1048 */
1049
1050 GLOBAL(void)
1051 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1052 JCOEFPTR coef_block,
1053 JSAMPARRAY output_buf, JDIMENSION output_col)
1054 {
1055 DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1056 ISLOW_MULT_TYPE * quantptr;
1057 JSAMPROW outptr;
1058 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1059 ISHIFT_TEMPS
1060
1061 /* Pass 1: process columns from input. */
1062
1063 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1064
1065 /* Column 0 */
1066 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1067 CLAMP_DC(tmp4);
1068 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1069 /* Add range center and fudge factor for final descale and range-limit. */
1070 tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1071
1072 tmp0 = tmp4 + tmp5;
1073 tmp2 = tmp4 - tmp5;
1074
1075 /* Column 1 */
1076 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1077 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1078
1079 tmp1 = tmp4 + tmp5;
1080 tmp3 = tmp4 - tmp5;
1081
1082 /* Pass 2: process 2 rows, store into output array. */
1083
1084 /* Row 0 */
1085 outptr = output_buf[0] + output_col;
1086
1087 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1088 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1089
1090 /* Row 1 */
1091 outptr = output_buf[1] + output_col;
1092
1093 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1094 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1095 }
1096
1097
1098 /*
1099 * Perform dequantization and inverse DCT on one block of coefficients,
1100 * producing a reduced-size 1x1 output block.
1101 *
1102 * We hardly need an inverse DCT routine for this: just take the
1103 * average pixel value, which is one-eighth of the DC coefficient.
1104 */
1105
1106 GLOBAL(void)
1107 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1108 JCOEFPTR coef_block,
1109 JSAMPARRAY output_buf, JDIMENSION output_col)
1110 {
1111 DCTELEM dcval;
1112 ISLOW_MULT_TYPE * quantptr;
1113 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1114 ISHIFT_TEMPS
1115
1116 /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1117
1118 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1119
1120 dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1121 CLAMP_DC(dcval);
1122 /* Add range center and fudge factor for descale and range-limit. */
1123 dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1124
1125 output_buf[0][output_col] =
1126 range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
1127 }
1128
1129
1130 /*
1131 * Perform dequantization and inverse DCT on one block of coefficients,
1132 * producing a 9x9 output block.
1133 *
1134 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1135 * cK represents sqrt(2) * cos(K*pi/18).
1136 */
1137
1138 GLOBAL(void)
1139 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1140 JCOEFPTR coef_block,
1141 JSAMPARRAY output_buf, JDIMENSION output_col)
1142 {
1143 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1144 INT32 z1, z2, z3, z4;
1145 JCOEFPTR inptr;
1146 ISLOW_MULT_TYPE * quantptr;
1147 int * wsptr;
1148 JSAMPROW outptr;
1149 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1150 int ctr;
1151 int workspace[8*9]; /* buffers data between passes */
1152 SHIFT_TEMPS
1153
1154 /* Pass 1: process columns from input, store into work array. */
1155
1156 inptr = coef_block;
1157 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1158 wsptr = workspace;
1159 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1160 /* Even part */
1161
1162 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1163 if (ctr == 0)
1164 CLAMP_DC(tmp0);
1165 tmp0 <<= CONST_BITS;
1166 /* Add fudge factor here for final descale. */
1167 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1168
1169 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1170 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1171 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1172
1173 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1174 tmp1 = tmp0 + tmp3;
1175 tmp2 = tmp0 - tmp3 - tmp3;
1176
1177 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1178 tmp11 = tmp2 + tmp0;
1179 tmp14 = tmp2 - tmp0 - tmp0;
1180
1181 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1182 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1183 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1184
1185 tmp10 = tmp1 + tmp0 - tmp3;
1186 tmp12 = tmp1 - tmp0 + tmp2;
1187 tmp13 = tmp1 - tmp2 + tmp3;
1188
1189 /* Odd part */
1190
1191 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1192 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1193 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1194 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1195
1196 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1197
1198 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1199 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1200 tmp0 = tmp2 + tmp3 - z2;
1201 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1202 tmp2 += z2 - tmp1;
1203 tmp3 += z2 + tmp1;
1204 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1205
1206 /* Final output stage */
1207
1208 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1209 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1210 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1211 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1212 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1213 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1214 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1215 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1216 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1217 }
1218
1219 /* Pass 2: process 9 rows from work array, store into output array. */
1220
1221 wsptr = workspace;
1222 for (ctr = 0; ctr < 9; ctr++) {
1223 outptr = output_buf[ctr] + output_col;
1224
1225 /* Even part */
1226
1227 /* Add range center and fudge factor for final descale and range-limit. */
1228 tmp0 = (INT32) wsptr[0] +
1229 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1230 (ONE << (PASS1_BITS+2)));
1231 tmp0 <<= CONST_BITS;
1232
1233 z1 = (INT32) wsptr[2];
1234 z2 = (INT32) wsptr[4];
1235 z3 = (INT32) wsptr[6];
1236
1237 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1238 tmp1 = tmp0 + tmp3;
1239 tmp2 = tmp0 - tmp3 - tmp3;
1240
1241 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1242 tmp11 = tmp2 + tmp0;
1243 tmp14 = tmp2 - tmp0 - tmp0;
1244
1245 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1246 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1247 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1248
1249 tmp10 = tmp1 + tmp0 - tmp3;
1250 tmp12 = tmp1 - tmp0 + tmp2;
1251 tmp13 = tmp1 - tmp2 + tmp3;
1252
1253 /* Odd part */
1254
1255 z1 = (INT32) wsptr[1];
1256 z2 = (INT32) wsptr[3];
1257 z3 = (INT32) wsptr[5];
1258 z4 = (INT32) wsptr[7];
1259
1260 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1261
1262 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1263 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1264 tmp0 = tmp2 + tmp3 - z2;
1265 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1266 tmp2 += z2 - tmp1;
1267 tmp3 += z2 + tmp1;
1268 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1269
1270 /* Final output stage */
1271
1272 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1273 CONST_BITS+PASS1_BITS+3)
1274 & RANGE_MASK];
1275 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1276 CONST_BITS+PASS1_BITS+3)
1277 & RANGE_MASK];
1278 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1279 CONST_BITS+PASS1_BITS+3)
1280 & RANGE_MASK];
1281 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1282 CONST_BITS+PASS1_BITS+3)
1283 & RANGE_MASK];
1284 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1285 CONST_BITS+PASS1_BITS+3)
1286 & RANGE_MASK];
1287 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1288 CONST_BITS+PASS1_BITS+3)
1289 & RANGE_MASK];
1290 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1291 CONST_BITS+PASS1_BITS+3)
1292 & RANGE_MASK];
1293 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1294 CONST_BITS+PASS1_BITS+3)
1295 & RANGE_MASK];
1296 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1297 CONST_BITS+PASS1_BITS+3)
1298 & RANGE_MASK];
1299
1300 wsptr += 8; /* advance pointer to next row */
1301 }
1302 }
1303
1304
1305 /*
1306 * Perform dequantization and inverse DCT on one block of coefficients,
1307 * producing a 10x10 output block.
1308 *
1309 * Optimized algorithm with 12 multiplications in the 1-D kernel.
1310 * cK represents sqrt(2) * cos(K*pi/20).
1311 */
1312
1313 GLOBAL(void)
1314 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1315 JCOEFPTR coef_block,
1316 JSAMPARRAY output_buf, JDIMENSION output_col)
1317 {
1318 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1319 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1320 INT32 z1, z2, z3, z4, z5;
1321 JCOEFPTR inptr;
1322 ISLOW_MULT_TYPE * quantptr;
1323 int * wsptr;
1324 JSAMPROW outptr;
1325 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1326 int ctr;
1327 int workspace[8*10]; /* buffers data between passes */
1328 SHIFT_TEMPS
1329
1330 /* Pass 1: process columns from input, store into work array. */
1331
1332 inptr = coef_block;
1333 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1334 wsptr = workspace;
1335 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1336 /* Even part */
1337
1338 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1339 if (ctr == 0)
1340 CLAMP_DC(z3);
1341 z3 <<= CONST_BITS;
1342 /* Add fudge factor here for final descale. */
1343 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1344 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1345 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1346 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1347 tmp10 = z3 + z1;
1348 tmp11 = z3 - z2;
1349
1350 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1351 CONST_BITS-PASS1_BITS);
1352
1353 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1354 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1355
1356 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1357 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1358 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1359
1360 tmp20 = tmp10 + tmp12;
1361 tmp24 = tmp10 - tmp12;
1362 tmp21 = tmp11 + tmp13;
1363 tmp23 = tmp11 - tmp13;
1364
1365 /* Odd part */
1366
1367 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1368 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1369 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1370 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1371
1372 tmp11 = z2 + z4;
1373 tmp13 = z2 - z4;
1374
1375 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1376 z5 = z3 << CONST_BITS;
1377
1378 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1379 z4 = z5 + tmp12;
1380
1381 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1382 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1383
1384 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1385 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1386
1387 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1388
1389 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1390 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1391
1392 /* Final output stage */
1393
1394 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1395 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1396 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1397 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1398 wsptr[8*2] = (int) (tmp22 + tmp12);
1399 wsptr[8*7] = (int) (tmp22 - tmp12);
1400 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1401 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1402 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1403 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1404 }
1405
1406 /* Pass 2: process 10 rows from work array, store into output array. */
1407
1408 wsptr = workspace;
1409 for (ctr = 0; ctr < 10; ctr++) {
1410 outptr = output_buf[ctr] + output_col;
1411
1412 /* Even part */
1413
1414 /* Add range center and fudge factor for final descale and range-limit. */
1415 z3 = (INT32) wsptr[0] +
1416 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1417 (ONE << (PASS1_BITS+2)));
1418 z3 <<= CONST_BITS;
1419 z4 = (INT32) wsptr[4];
1420 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1421 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1422 tmp10 = z3 + z1;
1423 tmp11 = z3 - z2;
1424
1425 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1426
1427 z2 = (INT32) wsptr[2];
1428 z3 = (INT32) wsptr[6];
1429
1430 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1431 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1432 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1433
1434 tmp20 = tmp10 + tmp12;
1435 tmp24 = tmp10 - tmp12;
1436 tmp21 = tmp11 + tmp13;
1437 tmp23 = tmp11 - tmp13;
1438
1439 /* Odd part */
1440
1441 z1 = (INT32) wsptr[1];
1442 z2 = (INT32) wsptr[3];
1443 z3 = (INT32) wsptr[5];
1444 z3 <<= CONST_BITS;
1445 z4 = (INT32) wsptr[7];
1446
1447 tmp11 = z2 + z4;
1448 tmp13 = z2 - z4;
1449
1450 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1451
1452 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1453 z4 = z3 + tmp12;
1454
1455 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1456 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1457
1458 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1459 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1460
1461 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1462
1463 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1464 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1465
1466 /* Final output stage */
1467
1468 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1469 CONST_BITS+PASS1_BITS+3)
1470 & RANGE_MASK];
1471 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1472 CONST_BITS+PASS1_BITS+3)
1473 & RANGE_MASK];
1474 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1475 CONST_BITS+PASS1_BITS+3)
1476 & RANGE_MASK];
1477 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1478 CONST_BITS+PASS1_BITS+3)
1479 & RANGE_MASK];
1480 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1481 CONST_BITS+PASS1_BITS+3)
1482 & RANGE_MASK];
1483 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1484 CONST_BITS+PASS1_BITS+3)
1485 & RANGE_MASK];
1486 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1487 CONST_BITS+PASS1_BITS+3)
1488 & RANGE_MASK];
1489 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1490 CONST_BITS+PASS1_BITS+3)
1491 & RANGE_MASK];
1492 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1493 CONST_BITS+PASS1_BITS+3)
1494 & RANGE_MASK];
1495 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1496 CONST_BITS+PASS1_BITS+3)
1497 & RANGE_MASK];
1498
1499 wsptr += 8; /* advance pointer to next row */
1500 }
1501 }
1502
1503
1504 /*
1505 * Perform dequantization and inverse DCT on one block of coefficients,
1506 * producing an 11x11 output block.
1507 *
1508 * Optimized algorithm with 24 multiplications in the 1-D kernel.
1509 * cK represents sqrt(2) * cos(K*pi/22).
1510 */
1511
1512 GLOBAL(void)
1513 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1514 JCOEFPTR coef_block,
1515 JSAMPARRAY output_buf, JDIMENSION output_col)
1516 {
1517 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1518 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1519 INT32 z1, z2, z3, z4;
1520 JCOEFPTR inptr;
1521 ISLOW_MULT_TYPE * quantptr;
1522 int * wsptr;
1523 JSAMPROW outptr;
1524 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1525 int ctr;
1526 int workspace[8*11]; /* buffers data between passes */
1527 SHIFT_TEMPS
1528
1529 /* Pass 1: process columns from input, store into work array. */
1530
1531 inptr = coef_block;
1532 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1533 wsptr = workspace;
1534 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1535 /* Even part */
1536
1537 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1538 if (ctr == 0)
1539 CLAMP_DC(tmp10);
1540 tmp10 <<= CONST_BITS;
1541 /* Add fudge factor here for final descale. */
1542 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1543
1544 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1545 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1546 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1547
1548 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1549 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1550 z4 = z1 + z3;
1551 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1552 z4 -= z2;
1553 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1554 tmp21 = tmp20 + tmp23 + tmp25 -
1555 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1556 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1557 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1558 tmp24 += tmp25;
1559 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1560 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1561 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1562 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1563
1564 /* Odd part */
1565
1566 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1567 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1568 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1569 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1570
1571 tmp11 = z1 + z2;
1572 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1573 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1574 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1575 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1576 tmp10 = tmp11 + tmp12 + tmp13 -
1577 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1578 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1579 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1580 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1581 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1582 tmp11 += z1;
1583 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1584 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1585 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1586 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1587
1588 /* Final output stage */
1589
1590 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1591 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1592 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1593 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1594 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1595 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1596 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1597 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1598 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1599 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1600 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1601 }
1602
1603 /* Pass 2: process 11 rows from work array, store into output array. */
1604
1605 wsptr = workspace;
1606 for (ctr = 0; ctr < 11; ctr++) {
1607 outptr = output_buf[ctr] + output_col;
1608
1609 /* Even part */
1610
1611 /* Add range center and fudge factor for final descale and range-limit. */
1612 tmp10 = (INT32) wsptr[0] +
1613 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1614 (ONE << (PASS1_BITS+2)));
1615 tmp10 <<= CONST_BITS;
1616
1617 z1 = (INT32) wsptr[2];
1618 z2 = (INT32) wsptr[4];
1619 z3 = (INT32) wsptr[6];
1620
1621 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1622 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1623 z4 = z1 + z3;
1624 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1625 z4 -= z2;
1626 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1627 tmp21 = tmp20 + tmp23 + tmp25 -
1628 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1629 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1630 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1631 tmp24 += tmp25;
1632 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1633 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1634 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1635 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1636
1637 /* Odd part */
1638
1639 z1 = (INT32) wsptr[1];
1640 z2 = (INT32) wsptr[3];
1641 z3 = (INT32) wsptr[5];
1642 z4 = (INT32) wsptr[7];
1643
1644 tmp11 = z1 + z2;
1645 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1646 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1647 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1648 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1649 tmp10 = tmp11 + tmp12 + tmp13 -
1650 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1651 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1652 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1653 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1654 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1655 tmp11 += z1;
1656 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1657 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1658 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1659 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1660
1661 /* Final output stage */
1662
1663 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1664 CONST_BITS+PASS1_BITS+3)
1665 & RANGE_MASK];
1666 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1667 CONST_BITS+PASS1_BITS+3)
1668 & RANGE_MASK];
1669 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1670 CONST_BITS+PASS1_BITS+3)
1671 & RANGE_MASK];
1672 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1673 CONST_BITS+PASS1_BITS+3)
1674 & RANGE_MASK];
1675 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1676 CONST_BITS+PASS1_BITS+3)
1677 & RANGE_MASK];
1678 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1679 CONST_BITS+PASS1_BITS+3)
1680 & RANGE_MASK];
1681 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1682 CONST_BITS+PASS1_BITS+3)
1683 & RANGE_MASK];
1684 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1685 CONST_BITS+PASS1_BITS+3)
1686 & RANGE_MASK];
1687 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1688 CONST_BITS+PASS1_BITS+3)
1689 & RANGE_MASK];
1690 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1691 CONST_BITS+PASS1_BITS+3)
1692 & RANGE_MASK];
1693 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1694 CONST_BITS+PASS1_BITS+3)
1695 & RANGE_MASK];
1696
1697 wsptr += 8; /* advance pointer to next row */
1698 }
1699 }
1700
1701
1702 /*
1703 * Perform dequantization and inverse DCT on one block of coefficients,
1704 * producing a 12x12 output block.
1705 *
1706 * Optimized algorithm with 15 multiplications in the 1-D kernel.
1707 * cK represents sqrt(2) * cos(K*pi/24).
1708 */
1709
1710 GLOBAL(void)
1711 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1712 JCOEFPTR coef_block,
1713 JSAMPARRAY output_buf, JDIMENSION output_col)
1714 {
1715 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1716 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1717 INT32 z1, z2, z3, z4;
1718 JCOEFPTR inptr;
1719 ISLOW_MULT_TYPE * quantptr;
1720 int * wsptr;
1721 JSAMPROW outptr;
1722 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1723 int ctr;
1724 int workspace[8*12]; /* buffers data between passes */
1725 SHIFT_TEMPS
1726
1727 /* Pass 1: process columns from input, store into work array. */
1728
1729 inptr = coef_block;
1730 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1731 wsptr = workspace;
1732 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1733 /* Even part */
1734
1735 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1736 if (ctr == 0)
1737 CLAMP_DC(z3);
1738 z3 <<= CONST_BITS;
1739 /* Add fudge factor here for final descale. */
1740 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1741
1742 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1743 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1744
1745 tmp10 = z3 + z4;
1746 tmp11 = z3 - z4;
1747
1748 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1749 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1750 z1 <<= CONST_BITS;
1751 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1752 z2 <<= CONST_BITS;
1753
1754 tmp12 = z1 - z2;
1755
1756 tmp21 = z3 + tmp12;
1757 tmp24 = z3 - tmp12;
1758
1759 tmp12 = z4 + z2;
1760
1761 tmp20 = tmp10 + tmp12;
1762 tmp25 = tmp10 - tmp12;
1763
1764 tmp12 = z4 - z1 - z2;
1765
1766 tmp22 = tmp11 + tmp12;
1767 tmp23 = tmp11 - tmp12;
1768
1769 /* Odd part */
1770
1771 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1772 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1773 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1774 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1775
1776 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1777 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1778
1779 tmp10 = z1 + z3;
1780 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1781 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1782 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1783 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1784 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1785 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1786 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1787 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1788
1789 z1 -= z4;
1790 z2 -= z3;
1791 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1792 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1793 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1794
1795 /* Final output stage */
1796
1797 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1798 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1799 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1800 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1801 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1802 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1803 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1804 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1805 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1806 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1807 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1808 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1809 }
1810
1811 /* Pass 2: process 12 rows from work array, store into output array. */
1812
1813 wsptr = workspace;
1814 for (ctr = 0; ctr < 12; ctr++) {
1815 outptr = output_buf[ctr] + output_col;
1816
1817 /* Even part */
1818
1819 /* Add range center and fudge factor for final descale and range-limit. */
1820 z3 = (INT32) wsptr[0] +
1821 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1822 (ONE << (PASS1_BITS+2)));
1823 z3 <<= CONST_BITS;
1824
1825 z4 = (INT32) wsptr[4];
1826 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1827
1828 tmp10 = z3 + z4;
1829 tmp11 = z3 - z4;
1830
1831 z1 = (INT32) wsptr[2];
1832 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1833 z1 <<= CONST_BITS;
1834 z2 = (INT32) wsptr[6];
1835 z2 <<= CONST_BITS;
1836
1837 tmp12 = z1 - z2;
1838
1839 tmp21 = z3 + tmp12;
1840 tmp24 = z3 - tmp12;
1841
1842 tmp12 = z4 + z2;
1843
1844 tmp20 = tmp10 + tmp12;
1845 tmp25 = tmp10 - tmp12;
1846
1847 tmp12 = z4 - z1 - z2;
1848
1849 tmp22 = tmp11 + tmp12;
1850 tmp23 = tmp11 - tmp12;
1851
1852 /* Odd part */
1853
1854 z1 = (INT32) wsptr[1];
1855 z2 = (INT32) wsptr[3];
1856 z3 = (INT32) wsptr[5];
1857 z4 = (INT32) wsptr[7];
1858
1859 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1860 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1861
1862 tmp10 = z1 + z3;
1863 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1864 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1865 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1866 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1867 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1868 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1869 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1870 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1871
1872 z1 -= z4;
1873 z2 -= z3;
1874 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1875 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1876 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1877
1878 /* Final output stage */
1879
1880 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1881 CONST_BITS+PASS1_BITS+3)
1882 & RANGE_MASK];
1883 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1884 CONST_BITS+PASS1_BITS+3)
1885 & RANGE_MASK];
1886 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1887 CONST_BITS+PASS1_BITS+3)
1888 & RANGE_MASK];
1889 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1890 CONST_BITS+PASS1_BITS+3)
1891 & RANGE_MASK];
1892 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1893 CONST_BITS+PASS1_BITS+3)
1894 & RANGE_MASK];
1895 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1896 CONST_BITS+PASS1_BITS+3)
1897 & RANGE_MASK];
1898 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1899 CONST_BITS+PASS1_BITS+3)
1900 & RANGE_MASK];
1901 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1902 CONST_BITS+PASS1_BITS+3)
1903 & RANGE_MASK];
1904 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1905 CONST_BITS+PASS1_BITS+3)
1906 & RANGE_MASK];
1907 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1908 CONST_BITS+PASS1_BITS+3)
1909 & RANGE_MASK];
1910 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1911 CONST_BITS+PASS1_BITS+3)
1912 & RANGE_MASK];
1913 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1914 CONST_BITS+PASS1_BITS+3)
1915 & RANGE_MASK];
1916
1917 wsptr += 8; /* advance pointer to next row */
1918 }
1919 }
1920
1921
1922 /*
1923 * Perform dequantization and inverse DCT on one block of coefficients,
1924 * producing a 13x13 output block.
1925 *
1926 * Optimized algorithm with 29 multiplications in the 1-D kernel.
1927 * cK represents sqrt(2) * cos(K*pi/26).
1928 */
1929
1930 GLOBAL(void)
1931 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1932 JCOEFPTR coef_block,
1933 JSAMPARRAY output_buf, JDIMENSION output_col)
1934 {
1935 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1936 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1937 INT32 z1, z2, z3, z4;
1938 JCOEFPTR inptr;
1939 ISLOW_MULT_TYPE * quantptr;
1940 int * wsptr;
1941 JSAMPROW outptr;
1942 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1943 int ctr;
1944 int workspace[8*13]; /* buffers data between passes */
1945 SHIFT_TEMPS
1946
1947 /* Pass 1: process columns from input, store into work array. */
1948
1949 inptr = coef_block;
1950 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1951 wsptr = workspace;
1952 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1953 /* Even part */
1954
1955 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1956 if (ctr == 0)
1957 CLAMP_DC(z1);
1958 z1 <<= CONST_BITS;
1959 /* Add fudge factor here for final descale. */
1960 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1961
1962 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1963 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1964 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1965
1966 tmp10 = z3 + z4;
1967 tmp11 = z3 - z4;
1968
1969 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1970 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1971
1972 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1973 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1974
1975 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1976 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1977
1978 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1979 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1980
1981 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1982 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1983
1984 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1985 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1986
1987 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1988
1989 /* Odd part */
1990
1991 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1992 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1993 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1994 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1995
1996 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1997 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1998 tmp15 = z1 + z4;
1999 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2000 tmp10 = tmp11 + tmp12 + tmp13 -
2001 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2002 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2003 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2004 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2005 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2006 tmp11 += tmp14;
2007 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2008 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2009 tmp12 += tmp14;
2010 tmp13 += tmp14;
2011 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2012 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2013 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2014 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2015 tmp14 += z1;
2016 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2017 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2018
2019 /* Final output stage */
2020
2021 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2022 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2023 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2024 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2025 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2026 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2027 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2028 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2029 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2030 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2031 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2032 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2033 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
2034 }
2035
2036 /* Pass 2: process 13 rows from work array, store into output array. */
2037
2038 wsptr = workspace;
2039 for (ctr = 0; ctr < 13; ctr++) {
2040 outptr = output_buf[ctr] + output_col;
2041
2042 /* Even part */
2043
2044 /* Add range center and fudge factor for final descale and range-limit. */
2045 z1 = (INT32) wsptr[0] +
2046 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2047 (ONE << (PASS1_BITS+2)));
2048 z1 <<= CONST_BITS;
2049
2050 z2 = (INT32) wsptr[2];
2051 z3 = (INT32) wsptr[4];
2052 z4 = (INT32) wsptr[6];
2053
2054 tmp10 = z3 + z4;
2055 tmp11 = z3 - z4;
2056
2057 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2058 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2059
2060 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2061 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2062
2063 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2064 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2065
2066 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2067 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2068
2069 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2070 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2071
2072 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2073 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2074
2075 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2076
2077 /* Odd part */
2078
2079 z1 = (INT32) wsptr[1];
2080 z2 = (INT32) wsptr[3];
2081 z3 = (INT32) wsptr[5];
2082 z4 = (INT32) wsptr[7];
2083
2084 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2085 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2086 tmp15 = z1 + z4;
2087 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2088 tmp10 = tmp11 + tmp12 + tmp13 -
2089 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2090 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2091 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2092 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2093 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2094 tmp11 += tmp14;
2095 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2096 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2097 tmp12 += tmp14;
2098 tmp13 += tmp14;
2099 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2100 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2101 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2102 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2103 tmp14 += z1;
2104 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2105 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2106
2107 /* Final output stage */
2108
2109 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2110 CONST_BITS+PASS1_BITS+3)
2111 & RANGE_MASK];
2112 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2113 CONST_BITS+PASS1_BITS+3)
2114 & RANGE_MASK];
2115 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2116 CONST_BITS+PASS1_BITS+3)
2117 & RANGE_MASK];
2118 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2119 CONST_BITS+PASS1_BITS+3)
2120 & RANGE_MASK];
2121 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2122 CONST_BITS+PASS1_BITS+3)
2123 & RANGE_MASK];
2124 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2125 CONST_BITS+PASS1_BITS+3)
2126 & RANGE_MASK];
2127 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2128 CONST_BITS+PASS1_BITS+3)
2129 & RANGE_MASK];
2130 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2131 CONST_BITS+PASS1_BITS+3)
2132 & RANGE_MASK];
2133 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2134 CONST_BITS+PASS1_BITS+3)
2135 & RANGE_MASK];
2136 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2137 CONST_BITS+PASS1_BITS+3)
2138 & RANGE_MASK];
2139 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2140 CONST_BITS+PASS1_BITS+3)
2141 & RANGE_MASK];
2142 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2143 CONST_BITS+PASS1_BITS+3)
2144 & RANGE_MASK];
2145 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2146 CONST_BITS+PASS1_BITS+3)
2147 & RANGE_MASK];
2148
2149 wsptr += 8; /* advance pointer to next row */
2150 }
2151 }
2152
2153
2154 /*
2155 * Perform dequantization and inverse DCT on one block of coefficients,
2156 * producing a 14x14 output block.
2157 *
2158 * Optimized algorithm with 20 multiplications in the 1-D kernel.
2159 * cK represents sqrt(2) * cos(K*pi/28).
2160 */
2161
2162 GLOBAL(void)
2163 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2164 JCOEFPTR coef_block,
2165 JSAMPARRAY output_buf, JDIMENSION output_col)
2166 {
2167 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2168 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2169 INT32 z1, z2, z3, z4;
2170 JCOEFPTR inptr;
2171 ISLOW_MULT_TYPE * quantptr;
2172 int * wsptr;
2173 JSAMPROW outptr;
2174 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2175 int ctr;
2176 int workspace[8*14]; /* buffers data between passes */
2177 SHIFT_TEMPS
2178
2179 /* Pass 1: process columns from input, store into work array. */
2180
2181 inptr = coef_block;
2182 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2183 wsptr = workspace;
2184 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2185 /* Even part */
2186
2187 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2188 if (ctr == 0)
2189 CLAMP_DC(z1);
2190 z1 <<= CONST_BITS;
2191 /* Add fudge factor here for final descale. */
2192 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2193 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2194 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2195 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2196 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2197
2198 tmp10 = z1 + z2;
2199 tmp11 = z1 + z3;
2200 tmp12 = z1 - z4;
2201
2202 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2203 CONST_BITS-PASS1_BITS);
2204
2205 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2206 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2207
2208 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2209
2210 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2211 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2212 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2213 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2214
2215 tmp20 = tmp10 + tmp13;
2216 tmp26 = tmp10 - tmp13;
2217 tmp21 = tmp11 + tmp14;
2218 tmp25 = tmp11 - tmp14;
2219 tmp22 = tmp12 + tmp15;
2220 tmp24 = tmp12 - tmp15;
2221
2222 /* Odd part */
2223
2224 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2225 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2226 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2227 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2228 tmp13 = z4 << CONST_BITS;
2229
2230 tmp14 = z1 + z3;
2231 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2232 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2233 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2234 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2235 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2236 z1 -= z2;
2237 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2238 tmp16 += tmp15;
2239 z1 += z4;
2240 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2241 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2242 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2243 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2244 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2245 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2246
2247 tmp13 = (z1 - z3) << PASS1_BITS;
2248
2249 /* Final output stage */
2250
2251 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2252 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2253 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2254 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2255 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2256 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2257 wsptr[8*3] = (int) (tmp23 + tmp13);
2258 wsptr[8*10] = (int) (tmp23 - tmp13);
2259 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2260 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2261 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2262 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2263 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2264 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2265 }
2266
2267 /* Pass 2: process 14 rows from work array, store into output array. */
2268
2269 wsptr = workspace;
2270 for (ctr = 0; ctr < 14; ctr++) {
2271 outptr = output_buf[ctr] + output_col;
2272
2273 /* Even part */
2274
2275 /* Add range center and fudge factor for final descale and range-limit. */
2276 z1 = (INT32) wsptr[0] +
2277 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2278 (ONE << (PASS1_BITS+2)));
2279 z1 <<= CONST_BITS;
2280 z4 = (INT32) wsptr[4];
2281 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2282 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2283 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2284
2285 tmp10 = z1 + z2;
2286 tmp11 = z1 + z3;
2287 tmp12 = z1 - z4;
2288
2289 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2290
2291 z1 = (INT32) wsptr[2];
2292 z2 = (INT32) wsptr[6];
2293
2294 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2295
2296 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2297 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2298 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2299 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2300
2301 tmp20 = tmp10 + tmp13;
2302 tmp26 = tmp10 - tmp13;
2303 tmp21 = tmp11 + tmp14;
2304 tmp25 = tmp11 - tmp14;
2305 tmp22 = tmp12 + tmp15;
2306 tmp24 = tmp12 - tmp15;
2307
2308 /* Odd part */
2309
2310 z1 = (INT32) wsptr[1];
2311 z2 = (INT32) wsptr[3];
2312 z3 = (INT32) wsptr[5];
2313 z4 = (INT32) wsptr[7];
2314 z4 <<= CONST_BITS;
2315
2316 tmp14 = z1 + z3;
2317 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2318 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2319 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2320 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2321 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2322 z1 -= z2;
2323 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2324 tmp16 += tmp15;
2325 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2326 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2327 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2328 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2329 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2330 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2331
2332 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2333
2334 /* Final output stage */
2335
2336 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2337 CONST_BITS+PASS1_BITS+3)
2338 & RANGE_MASK];
2339 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2340 CONST_BITS+PASS1_BITS+3)
2341 & RANGE_MASK];
2342 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2343 CONST_BITS+PASS1_BITS+3)
2344 & RANGE_MASK];
2345 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2346 CONST_BITS+PASS1_BITS+3)
2347 & RANGE_MASK];
2348 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2349 CONST_BITS+PASS1_BITS+3)
2350 & RANGE_MASK];
2351 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2352 CONST_BITS+PASS1_BITS+3)
2353 & RANGE_MASK];
2354 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2355 CONST_BITS+PASS1_BITS+3)
2356 & RANGE_MASK];
2357 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2358 CONST_BITS+PASS1_BITS+3)
2359 & RANGE_MASK];
2360 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2361 CONST_BITS+PASS1_BITS+3)
2362 & RANGE_MASK];
2363 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2364 CONST_BITS+PASS1_BITS+3)
2365 & RANGE_MASK];
2366 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2367 CONST_BITS+PASS1_BITS+3)
2368 & RANGE_MASK];
2369 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2370 CONST_BITS+PASS1_BITS+3)
2371 & RANGE_MASK];
2372 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2373 CONST_BITS+PASS1_BITS+3)
2374 & RANGE_MASK];
2375 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2376 CONST_BITS+PASS1_BITS+3)
2377 & RANGE_MASK];
2378
2379 wsptr += 8; /* advance pointer to next row */
2380 }
2381 }
2382
2383
2384 /*
2385 * Perform dequantization and inverse DCT on one block of coefficients,
2386 * producing a 15x15 output block.
2387 *
2388 * Optimized algorithm with 22 multiplications in the 1-D kernel.
2389 * cK represents sqrt(2) * cos(K*pi/30).
2390 */
2391
2392 GLOBAL(void)
2393 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2394 JCOEFPTR coef_block,
2395 JSAMPARRAY output_buf, JDIMENSION output_col)
2396 {
2397 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2398 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2399 INT32 z1, z2, z3, z4;
2400 JCOEFPTR inptr;
2401 ISLOW_MULT_TYPE * quantptr;
2402 int * wsptr;
2403 JSAMPROW outptr;
2404 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2405 int ctr;
2406 int workspace[8*15]; /* buffers data between passes */
2407 SHIFT_TEMPS
2408
2409 /* Pass 1: process columns from input, store into work array. */
2410
2411 inptr = coef_block;
2412 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2413 wsptr = workspace;
2414 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2415 /* Even part */
2416
2417 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2418 if (ctr == 0)
2419 CLAMP_DC(z1);
2420 z1 <<= CONST_BITS;
2421 /* Add fudge factor here for final descale. */
2422 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2423
2424 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2425 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2426 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2427
2428 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2429 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2430
2431 tmp12 = z1 - tmp10;
2432 tmp13 = z1 + tmp11;
2433 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2434
2435 z4 = z2 - z3;
2436 z3 += z2;
2437 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2438 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2439 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2440
2441 tmp20 = tmp13 + tmp10 + tmp11;
2442 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2443
2444 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2445 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2446
2447 tmp25 = tmp13 - tmp10 - tmp11;
2448 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2449
2450 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2451 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2452
2453 tmp21 = tmp12 + tmp10 + tmp11;
2454 tmp24 = tmp13 - tmp10 + tmp11;
2455 tmp11 += tmp11;
2456 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2457 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2458
2459 /* Odd part */
2460
2461 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2462 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2463 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2464 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2465 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2466
2467 tmp13 = z2 - z4;
2468 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2469 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2470 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2471
2472 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2473 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2474 z2 = z1 - z4;
2475 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2476
2477 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2478 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2479 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2480 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2481 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2482 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2483
2484 /* Final output stage */
2485
2486 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2487 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2488 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2489 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2490 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2491 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2492 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2493 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2494 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2495 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2496 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2497 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2498 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2499 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2500 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2501 }
2502
2503 /* Pass 2: process 15 rows from work array, store into output array. */
2504
2505 wsptr = workspace;
2506 for (ctr = 0; ctr < 15; ctr++) {
2507 outptr = output_buf[ctr] + output_col;
2508
2509 /* Even part */
2510
2511 /* Add range center and fudge factor for final descale and range-limit. */
2512 z1 = (INT32) wsptr[0] +
2513 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2514 (ONE << (PASS1_BITS+2)));
2515 z1 <<= CONST_BITS;
2516
2517 z2 = (INT32) wsptr[2];
2518 z3 = (INT32) wsptr[4];
2519 z4 = (INT32) wsptr[6];
2520
2521 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2522 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2523
2524 tmp12 = z1 - tmp10;
2525 tmp13 = z1 + tmp11;
2526 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2527
2528 z4 = z2 - z3;
2529 z3 += z2;
2530 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2531 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2532 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2533
2534 tmp20 = tmp13 + tmp10 + tmp11;
2535 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2536
2537 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2538 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2539
2540 tmp25 = tmp13 - tmp10 - tmp11;
2541 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2542
2543 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2544 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2545
2546 tmp21 = tmp12 + tmp10 + tmp11;
2547 tmp24 = tmp13 - tmp10 + tmp11;
2548 tmp11 += tmp11;
2549 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2550 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2551
2552 /* Odd part */
2553
2554 z1 = (INT32) wsptr[1];
2555 z2 = (INT32) wsptr[3];
2556 z4 = (INT32) wsptr[5];
2557 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2558 z4 = (INT32) wsptr[7];
2559
2560 tmp13 = z2 - z4;
2561 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2562 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2563 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2564
2565 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2566 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2567 z2 = z1 - z4;
2568 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2569
2570 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2571 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2572 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2573 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2574 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2575 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2576
2577 /* Final output stage */
2578
2579 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2580 CONST_BITS+PASS1_BITS+3)
2581 & RANGE_MASK];
2582 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2583 CONST_BITS+PASS1_BITS+3)
2584 & RANGE_MASK];
2585 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2586 CONST_BITS+PASS1_BITS+3)
2587 & RANGE_MASK];
2588 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2589 CONST_BITS+PASS1_BITS+3)
2590 & RANGE_MASK];
2591 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2592 CONST_BITS+PASS1_BITS+3)
2593 & RANGE_MASK];
2594 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2595 CONST_BITS+PASS1_BITS+3)
2596 & RANGE_MASK];
2597 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2598 CONST_BITS+PASS1_BITS+3)
2599 & RANGE_MASK];
2600 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2601 CONST_BITS+PASS1_BITS+3)
2602 & RANGE_MASK];
2603 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2604 CONST_BITS+PASS1_BITS+3)
2605 & RANGE_MASK];
2606 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2607 CONST_BITS+PASS1_BITS+3)
2608 & RANGE_MASK];
2609 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2610 CONST_BITS+PASS1_BITS+3)
2611 & RANGE_MASK];
2612 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2613 CONST_BITS+PASS1_BITS+3)
2614 & RANGE_MASK];
2615 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2616 CONST_BITS+PASS1_BITS+3)
2617 & RANGE_MASK];
2618 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2619 CONST_BITS+PASS1_BITS+3)
2620 & RANGE_MASK];
2621 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2622 CONST_BITS+PASS1_BITS+3)
2623 & RANGE_MASK];
2624
2625 wsptr += 8; /* advance pointer to next row */
2626 }
2627 }
2628
2629
2630 /*
2631 * Perform dequantization and inverse DCT on one block of coefficients,
2632 * producing a 16x16 output block.
2633 *
2634 * Optimized algorithm with 28 multiplications in the 1-D kernel.
2635 * cK represents sqrt(2) * cos(K*pi/32).
2636 */
2637
2638 GLOBAL(void)
2639 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2640 JCOEFPTR coef_block,
2641 JSAMPARRAY output_buf, JDIMENSION output_col)
2642 {
2643 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2644 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2645 INT32 z1, z2, z3, z4;
2646 JCOEFPTR inptr;
2647 ISLOW_MULT_TYPE * quantptr;
2648 int * wsptr;
2649 JSAMPROW outptr;
2650 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2651 int ctr;
2652 int workspace[8*16]; /* buffers data between passes */
2653 SHIFT_TEMPS
2654
2655 /* Pass 1: process columns from input, store into work array. */
2656
2657 inptr = coef_block;
2658 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2659 wsptr = workspace;
2660 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2661 /* Even part */
2662
2663 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2664 if (ctr == 0)
2665 CLAMP_DC(tmp0);
2666 tmp0 <<= CONST_BITS;
2667 /* Add fudge factor here for final descale. */
2668 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2669
2670 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2671 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2672 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2673
2674 tmp10 = tmp0 + tmp1;
2675 tmp11 = tmp0 - tmp1;
2676 tmp12 = tmp0 + tmp2;
2677 tmp13 = tmp0 - tmp2;
2678
2679 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2680 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2681 z3 = z1 - z2;
2682 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2683 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2684
2685 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2686 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2687 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2688 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2689
2690 tmp20 = tmp10 + tmp0;
2691 tmp27 = tmp10 - tmp0;
2692 tmp21 = tmp12 + tmp1;
2693 tmp26 = tmp12 - tmp1;
2694 tmp22 = tmp13 + tmp2;
2695 tmp25 = tmp13 - tmp2;
2696 tmp23 = tmp11 + tmp3;
2697 tmp24 = tmp11 - tmp3;
2698
2699 /* Odd part */
2700
2701 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2702 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2703 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2704 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2705
2706 tmp11 = z1 + z3;
2707
2708 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2709 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2710 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2711 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2712 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2713 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2714 tmp0 = tmp1 + tmp2 + tmp3 -
2715 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2716 tmp13 = tmp10 + tmp11 + tmp12 -
2717 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2718 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2719 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2720 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2721 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2722 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2723 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2724 z2 += z4;
2725 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2726 tmp1 += z1;
2727 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2728 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2729 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2730 tmp12 += z2;
2731 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2732 tmp2 += z2;
2733 tmp3 += z2;
2734 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2735 tmp10 += z2;
2736 tmp11 += z2;
2737
2738 /* Final output stage */
2739
2740 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2741 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2742 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2743 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2744 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2745 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2746 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2747 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2748 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2749 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2750 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2751 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2752 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2753 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2754 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2755 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2756 }
2757
2758 /* Pass 2: process 16 rows from work array, store into output array. */
2759
2760 wsptr = workspace;
2761 for (ctr = 0; ctr < 16; ctr++) {
2762 outptr = output_buf[ctr] + output_col;
2763
2764 /* Even part */
2765
2766 /* Add range center and fudge factor for final descale and range-limit. */
2767 tmp0 = (INT32) wsptr[0] +
2768 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2769 (ONE << (PASS1_BITS+2)));
2770 tmp0 <<= CONST_BITS;
2771
2772 z1 = (INT32) wsptr[4];
2773 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2774 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2775
2776 tmp10 = tmp0 + tmp1;
2777 tmp11 = tmp0 - tmp1;
2778 tmp12 = tmp0 + tmp2;
2779 tmp13 = tmp0 - tmp2;
2780
2781 z1 = (INT32) wsptr[2];
2782 z2 = (INT32) wsptr[6];
2783 z3 = z1 - z2;
2784 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2785 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2786
2787 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2788 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2789 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2790 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2791
2792 tmp20 = tmp10 + tmp0;
2793 tmp27 = tmp10 - tmp0;
2794 tmp21 = tmp12 + tmp1;
2795 tmp26 = tmp12 - tmp1;
2796 tmp22 = tmp13 + tmp2;
2797 tmp25 = tmp13 - tmp2;
2798 tmp23 = tmp11 + tmp3;
2799 tmp24 = tmp11 - tmp3;
2800
2801 /* Odd part */
2802
2803 z1 = (INT32) wsptr[1];
2804 z2 = (INT32) wsptr[3];
2805 z3 = (INT32) wsptr[5];
2806 z4 = (INT32) wsptr[7];
2807
2808 tmp11 = z1 + z3;
2809
2810 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2811 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2812 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2813 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2814 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2815 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2816 tmp0 = tmp1 + tmp2 + tmp3 -
2817 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2818 tmp13 = tmp10 + tmp11 + tmp12 -
2819 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2820 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2821 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2822 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2823 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2824 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2825 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2826 z2 += z4;
2827 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2828 tmp1 += z1;
2829 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2830 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2831 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2832 tmp12 += z2;
2833 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2834 tmp2 += z2;
2835 tmp3 += z2;
2836 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2837 tmp10 += z2;
2838 tmp11 += z2;
2839
2840 /* Final output stage */
2841
2842 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2843 CONST_BITS+PASS1_BITS+3)
2844 & RANGE_MASK];
2845 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2846 CONST_BITS+PASS1_BITS+3)
2847 & RANGE_MASK];
2848 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2849 CONST_BITS+PASS1_BITS+3)
2850 & RANGE_MASK];
2851 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2852 CONST_BITS+PASS1_BITS+3)
2853 & RANGE_MASK];
2854 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2855 CONST_BITS+PASS1_BITS+3)
2856 & RANGE_MASK];
2857 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2858 CONST_BITS+PASS1_BITS+3)
2859 & RANGE_MASK];
2860 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2861 CONST_BITS+PASS1_BITS+3)
2862 & RANGE_MASK];
2863 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2864 CONST_BITS+PASS1_BITS+3)
2865 & RANGE_MASK];
2866 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2867 CONST_BITS+PASS1_BITS+3)
2868 & RANGE_MASK];
2869 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2870 CONST_BITS+PASS1_BITS+3)
2871 & RANGE_MASK];
2872 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2873 CONST_BITS+PASS1_BITS+3)
2874 & RANGE_MASK];
2875 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2876 CONST_BITS+PASS1_BITS+3)
2877 & RANGE_MASK];
2878 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2879 CONST_BITS+PASS1_BITS+3)
2880 & RANGE_MASK];
2881 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2882 CONST_BITS+PASS1_BITS+3)
2883 & RANGE_MASK];
2884 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2885 CONST_BITS+PASS1_BITS+3)
2886 & RANGE_MASK];
2887 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2888 CONST_BITS+PASS1_BITS+3)
2889 & RANGE_MASK];
2890
2891 wsptr += 8; /* advance pointer to next row */
2892 }
2893 }
2894
2895
2896 /*
2897 * Perform dequantization and inverse DCT on one block of coefficients,
2898 * producing a 16x8 output block.
2899 *
2900 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2901 */
2902
2903 GLOBAL(void)
2904 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2905 JCOEFPTR coef_block,
2906 JSAMPARRAY output_buf, JDIMENSION output_col)
2907 {
2908 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2909 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2910 INT32 z1, z2, z3, z4;
2911 JCOEFPTR inptr;
2912 ISLOW_MULT_TYPE * quantptr;
2913 int * wsptr;
2914 JSAMPROW outptr;
2915 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2916 int ctr;
2917 int workspace[8*8]; /* buffers data between passes */
2918 SHIFT_TEMPS
2919
2920 /* Pass 1: process columns from input, store into work array.
2921 * Note results are scaled up by sqrt(8) compared to a true IDCT;
2922 * furthermore, we scale the results by 2**PASS1_BITS.
2923 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2924 */
2925
2926 inptr = coef_block;
2927 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2928 wsptr = workspace;
2929 for (ctr = DCTSIZE; ctr > 0; ctr--) {
2930 /* Due to quantization, we will usually find that many of the input
2931 * coefficients are zero, especially the AC terms. We can exploit this
2932 * by short-circuiting the IDCT calculation for any column in which all
2933 * the AC terms are zero. In that case each output is equal to the
2934 * DC coefficient (with scale factor as needed).
2935 * With typical images and quantization tables, half or more of the
2936 * column DCT calculations can be simplified this way.
2937 */
2938
2939 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2940 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2941 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2942 inptr[DCTSIZE*7] == 0) {
2943 /* AC terms all zero */
2944 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2945 if (ctr == DCTSIZE)
2946 CLAMP_DC(dcval);
2947 dcval <<= PASS1_BITS;
2948
2949 wsptr[DCTSIZE*0] = dcval;
2950 wsptr[DCTSIZE*1] = dcval;
2951 wsptr[DCTSIZE*2] = dcval;
2952 wsptr[DCTSIZE*3] = dcval;
2953 wsptr[DCTSIZE*4] = dcval;
2954 wsptr[DCTSIZE*5] = dcval;
2955 wsptr[DCTSIZE*6] = dcval;
2956 wsptr[DCTSIZE*7] = dcval;
2957
2958 inptr++; /* advance pointers to next column */
2959 quantptr++;
2960 wsptr++;
2961 continue;
2962 }
2963
2964 /* Even part: reverse the even part of the forward DCT.
2965 * The rotator is c(-6).
2966 */
2967
2968 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2969 if (ctr == DCTSIZE)
2970 CLAMP_DC(z2);
2971 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2972 z2 <<= CONST_BITS;
2973 z3 <<= CONST_BITS;
2974 /* Add fudge factor here for final descale. */
2975 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2976
2977 tmp0 = z2 + z3;
2978 tmp1 = z2 - z3;
2979
2980 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2981 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2982
2983 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
2984 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
2985 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
2986
2987 tmp10 = tmp0 + tmp2;
2988 tmp13 = tmp0 - tmp2;
2989 tmp11 = tmp1 + tmp3;
2990 tmp12 = tmp1 - tmp3;
2991
2992 /* Odd part per figure 8; the matrix is unitary and hence its
2993 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2994 */
2995
2996 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2997 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2998 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2999 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3000
3001 z2 = tmp0 + tmp2;
3002 z3 = tmp1 + tmp3;
3003
3004 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3005 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3006 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3007 z2 += z1;
3008 z3 += z1;
3009
3010 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3011 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3012 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3013 tmp0 += z1 + z2;
3014 tmp3 += z1 + z3;
3015
3016 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3017 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3018 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3019 tmp1 += z1 + z3;
3020 tmp2 += z1 + z2;
3021
3022 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3023
3024 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
3025 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
3026 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
3027 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
3028 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
3029 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
3030 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
3031 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
3032
3033 inptr++; /* advance pointers to next column */
3034 quantptr++;
3035 wsptr++;
3036 }
3037
3038 /* Pass 2: process 8 rows from work array, store into output array.
3039 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3040 */
3041
3042 wsptr = workspace;
3043 for (ctr = 0; ctr < 8; ctr++) {
3044 outptr = output_buf[ctr] + output_col;
3045
3046 /* Even part */
3047
3048 /* Add range center and fudge factor for final descale and range-limit. */
3049 tmp0 = (INT32) wsptr[0] +
3050 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3051 (ONE << (PASS1_BITS+2)));
3052 tmp0 <<= CONST_BITS;
3053
3054 z1 = (INT32) wsptr[4];
3055 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3056 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3057
3058 tmp10 = tmp0 + tmp1;
3059 tmp11 = tmp0 - tmp1;
3060 tmp12 = tmp0 + tmp2;
3061 tmp13 = tmp0 - tmp2;
3062
3063 z1 = (INT32) wsptr[2];
3064 z2 = (INT32) wsptr[6];
3065 z3 = z1 - z2;
3066 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3067 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3068
3069 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3070 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3071 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3072 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3073
3074 tmp20 = tmp10 + tmp0;
3075 tmp27 = tmp10 - tmp0;
3076 tmp21 = tmp12 + tmp1;
3077 tmp26 = tmp12 - tmp1;
3078 tmp22 = tmp13 + tmp2;
3079 tmp25 = tmp13 - tmp2;
3080 tmp23 = tmp11 + tmp3;
3081 tmp24 = tmp11 - tmp3;
3082
3083 /* Odd part */
3084
3085 z1 = (INT32) wsptr[1];
3086 z2 = (INT32) wsptr[3];
3087 z3 = (INT32) wsptr[5];
3088 z4 = (INT32) wsptr[7];
3089
3090 tmp11 = z1 + z3;
3091
3092 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3093 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3094 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3095 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3096 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3097 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3098 tmp0 = tmp1 + tmp2 + tmp3 -
3099 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3100 tmp13 = tmp10 + tmp11 + tmp12 -
3101 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3102 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3103 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3104 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3105 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3106 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3107 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3108 z2 += z4;
3109 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3110 tmp1 += z1;
3111 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3112 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3113 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3114 tmp12 += z2;
3115 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3116 tmp2 += z2;
3117 tmp3 += z2;
3118 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3119 tmp10 += z2;
3120 tmp11 += z2;
3121
3122 /* Final output stage */
3123
3124 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3125 CONST_BITS+PASS1_BITS+3)
3126 & RANGE_MASK];
3127 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3128 CONST_BITS+PASS1_BITS+3)
3129 & RANGE_MASK];
3130 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3131 CONST_BITS+PASS1_BITS+3)
3132 & RANGE_MASK];
3133 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3134 CONST_BITS+PASS1_BITS+3)
3135 & RANGE_MASK];
3136 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3137 CONST_BITS+PASS1_BITS+3)
3138 & RANGE_MASK];
3139 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3140 CONST_BITS+PASS1_BITS+3)
3141 & RANGE_MASK];
3142 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3143 CONST_BITS+PASS1_BITS+3)
3144 & RANGE_MASK];
3145 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3146 CONST_BITS+PASS1_BITS+3)
3147 & RANGE_MASK];
3148 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3149 CONST_BITS+PASS1_BITS+3)
3150 & RANGE_MASK];
3151 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3152 CONST_BITS+PASS1_BITS+3)
3153 & RANGE_MASK];
3154 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3155 CONST_BITS+PASS1_BITS+3)
3156 & RANGE_MASK];
3157 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3158 CONST_BITS+PASS1_BITS+3)
3159 & RANGE_MASK];
3160 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3161 CONST_BITS+PASS1_BITS+3)
3162 & RANGE_MASK];
3163 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3164 CONST_BITS+PASS1_BITS+3)
3165 & RANGE_MASK];
3166 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3167 CONST_BITS+PASS1_BITS+3)
3168 & RANGE_MASK];
3169 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3170 CONST_BITS+PASS1_BITS+3)
3171 & RANGE_MASK];
3172
3173 wsptr += 8; /* advance pointer to next row */
3174 }
3175 }
3176
3177
3178 /*
3179 * Perform dequantization and inverse DCT on one block of coefficients,
3180 * producing a 14x7 output block.
3181 *
3182 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3183 */
3184
3185 GLOBAL(void)
3186 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3187 JCOEFPTR coef_block,
3188 JSAMPARRAY output_buf, JDIMENSION output_col)
3189 {
3190 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3191 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3192 INT32 z1, z2, z3, z4;
3193 JCOEFPTR inptr;
3194 ISLOW_MULT_TYPE * quantptr;
3195 int * wsptr;
3196 JSAMPROW outptr;
3197 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3198 int ctr;
3199 int workspace[8*7]; /* buffers data between passes */
3200 SHIFT_TEMPS
3201
3202 /* Pass 1: process columns from input, store into work array.
3203 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3204 */
3205
3206 inptr = coef_block;
3207 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3208 wsptr = workspace;
3209 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3210 /* Even part */
3211
3212 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3213 if (ctr == 0)
3214 CLAMP_DC(tmp23);
3215 tmp23 <<= CONST_BITS;
3216 /* Add fudge factor here for final descale. */
3217 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3218
3219 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3220 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3221 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3222
3223 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3224 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3225 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3226 tmp10 = z1 + z3;
3227 z2 -= tmp10;
3228 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3229 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3230 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3231 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3232
3233 /* Odd part */
3234
3235 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3236 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3237 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3238
3239 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3240 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3241 tmp10 = tmp11 - tmp12;
3242 tmp11 += tmp12;
3243 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3244 tmp11 += tmp12;
3245 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3246 tmp10 += z2;
3247 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3248
3249 /* Final output stage */
3250
3251 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3252 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3253 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3254 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3255 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3256 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3257 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3258 }
3259
3260 /* Pass 2: process 7 rows from work array, store into output array.
3261 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3262 */
3263
3264 wsptr = workspace;
3265 for (ctr = 0; ctr < 7; ctr++) {
3266 outptr = output_buf[ctr] + output_col;
3267
3268 /* Even part */
3269
3270 /* Add range center and fudge factor for final descale and range-limit. */
3271 z1 = (INT32) wsptr[0] +
3272 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3273 (ONE << (PASS1_BITS+2)));
3274 z1 <<= CONST_BITS;
3275 z4 = (INT32) wsptr[4];
3276 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3277 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3278 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3279
3280 tmp10 = z1 + z2;
3281 tmp11 = z1 + z3;
3282 tmp12 = z1 - z4;
3283
3284 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3285
3286 z1 = (INT32) wsptr[2];
3287 z2 = (INT32) wsptr[6];
3288
3289 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3290
3291 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3292 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3293 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3294 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3295
3296 tmp20 = tmp10 + tmp13;
3297 tmp26 = tmp10 - tmp13;
3298 tmp21 = tmp11 + tmp14;
3299 tmp25 = tmp11 - tmp14;
3300 tmp22 = tmp12 + tmp15;
3301 tmp24 = tmp12 - tmp15;
3302
3303 /* Odd part */
3304
3305 z1 = (INT32) wsptr[1];
3306 z2 = (INT32) wsptr[3];
3307 z3 = (INT32) wsptr[5];
3308 z4 = (INT32) wsptr[7];
3309 z4 <<= CONST_BITS;
3310
3311 tmp14 = z1 + z3;
3312 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3313 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3314 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3315 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3316 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3317 z1 -= z2;
3318 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3319 tmp16 += tmp15;
3320 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3321 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3322 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3323 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3324 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3325 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3326
3327 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3328
3329 /* Final output stage */
3330
3331 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3332 CONST_BITS+PASS1_BITS+3)
3333 & RANGE_MASK];
3334 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3335 CONST_BITS+PASS1_BITS+3)
3336 & RANGE_MASK];
3337 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3338 CONST_BITS+PASS1_BITS+3)
3339 & RANGE_MASK];
3340 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3341 CONST_BITS+PASS1_BITS+3)
3342 & RANGE_MASK];
3343 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3344 CONST_BITS+PASS1_BITS+3)
3345 & RANGE_MASK];
3346 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3347 CONST_BITS+PASS1_BITS+3)
3348 & RANGE_MASK];
3349 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3350 CONST_BITS+PASS1_BITS+3)
3351 & RANGE_MASK];
3352 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3353 CONST_BITS+PASS1_BITS+3)
3354 & RANGE_MASK];
3355 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3356 CONST_BITS+PASS1_BITS+3)
3357 & RANGE_MASK];
3358 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3359 CONST_BITS+PASS1_BITS+3)
3360 & RANGE_MASK];
3361 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3362 CONST_BITS+PASS1_BITS+3)
3363 & RANGE_MASK];
3364 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3365 CONST_BITS+PASS1_BITS+3)
3366 & RANGE_MASK];
3367 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3368 CONST_BITS+PASS1_BITS+3)
3369 & RANGE_MASK];
3370 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3371 CONST_BITS+PASS1_BITS+3)
3372 & RANGE_MASK];
3373
3374 wsptr += 8; /* advance pointer to next row */
3375 }
3376 }
3377
3378
3379 /*
3380 * Perform dequantization and inverse DCT on one block of coefficients,
3381 * producing a 12x6 output block.
3382 *
3383 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3384 */
3385
3386 GLOBAL(void)
3387 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3388 JCOEFPTR coef_block,
3389 JSAMPARRAY output_buf, JDIMENSION output_col)
3390 {
3391 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3392 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3393 INT32 z1, z2, z3, z4;
3394 JCOEFPTR inptr;
3395 ISLOW_MULT_TYPE * quantptr;
3396 int * wsptr;
3397 JSAMPROW outptr;
3398 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3399 int ctr;
3400 int workspace[8*6]; /* buffers data between passes */
3401 SHIFT_TEMPS
3402
3403 /* Pass 1: process columns from input, store into work array.
3404 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3405 */
3406
3407 inptr = coef_block;
3408 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3409 wsptr = workspace;
3410 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3411 /* Even part */
3412
3413 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3414 if (ctr == 0)
3415 CLAMP_DC(tmp10);
3416 tmp10 <<= CONST_BITS;
3417 /* Add fudge factor here for final descale. */
3418 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3419 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3420 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3421 tmp11 = tmp10 + tmp20;
3422 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3423 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3424 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3425 tmp20 = tmp11 + tmp10;
3426 tmp22 = tmp11 - tmp10;
3427
3428 /* Odd part */
3429
3430 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3431 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3432 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3433 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3434 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3435 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3436 tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3437
3438 /* Final output stage */
3439
3440 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3441 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3442 wsptr[8*1] = (int) (tmp21 + tmp11);
3443 wsptr[8*4] = (int) (tmp21 - tmp11);
3444 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3445 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3446 }
3447
3448 /* Pass 2: process 6 rows from work array, store into output array.
3449 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3450 */
3451
3452 wsptr = workspace;
3453 for (ctr = 0; ctr < 6; ctr++) {
3454 outptr = output_buf[ctr] + output_col;
3455
3456 /* Even part */
3457
3458 /* Add range center and fudge factor for final descale and range-limit. */
3459 z3 = (INT32) wsptr[0] +
3460 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3461 (ONE << (PASS1_BITS+2)));
3462 z3 <<= CONST_BITS;
3463
3464 z4 = (INT32) wsptr[4];
3465 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3466
3467 tmp10 = z3 + z4;
3468 tmp11 = z3 - z4;
3469
3470 z1 = (INT32) wsptr[2];
3471 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3472 z1 <<= CONST_BITS;
3473 z2 = (INT32) wsptr[6];
3474 z2 <<= CONST_BITS;
3475
3476 tmp12 = z1 - z2;
3477
3478 tmp21 = z3 + tmp12;
3479 tmp24 = z3 - tmp12;
3480
3481 tmp12 = z4 + z2;
3482
3483 tmp20 = tmp10 + tmp12;
3484 tmp25 = tmp10 - tmp12;
3485
3486 tmp12 = z4 - z1 - z2;
3487
3488 tmp22 = tmp11 + tmp12;
3489 tmp23 = tmp11 - tmp12;
3490
3491 /* Odd part */
3492
3493 z1 = (INT32) wsptr[1];
3494 z2 = (INT32) wsptr[3];
3495 z3 = (INT32) wsptr[5];
3496 z4 = (INT32) wsptr[7];
3497
3498 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3499 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3500
3501 tmp10 = z1 + z3;
3502 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3503 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3504 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3505 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3506 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3507 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3508 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3509 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3510
3511 z1 -= z4;
3512 z2 -= z3;
3513 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3514 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3515 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3516
3517 /* Final output stage */
3518
3519 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3520 CONST_BITS+PASS1_BITS+3)
3521 & RANGE_MASK];
3522 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3523 CONST_BITS+PASS1_BITS+3)
3524 & RANGE_MASK];
3525 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3526 CONST_BITS+PASS1_BITS+3)
3527 & RANGE_MASK];
3528 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3529 CONST_BITS+PASS1_BITS+3)
3530 & RANGE_MASK];
3531 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3532 CONST_BITS+PASS1_BITS+3)
3533 & RANGE_MASK];
3534 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3535 CONST_BITS+PASS1_BITS+3)
3536 & RANGE_MASK];
3537 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3538 CONST_BITS+PASS1_BITS+3)
3539 & RANGE_MASK];
3540 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3541 CONST_BITS+PASS1_BITS+3)
3542 & RANGE_MASK];
3543 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3544 CONST_BITS+PASS1_BITS+3)
3545 & RANGE_MASK];
3546 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3547 CONST_BITS+PASS1_BITS+3)
3548 & RANGE_MASK];
3549 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3550 CONST_BITS+PASS1_BITS+3)
3551 & RANGE_MASK];
3552 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3553 CONST_BITS+PASS1_BITS+3)
3554 & RANGE_MASK];
3555
3556 wsptr += 8; /* advance pointer to next row */
3557 }
3558 }
3559
3560
3561 /*
3562 * Perform dequantization and inverse DCT on one block of coefficients,
3563 * producing a 10x5 output block.
3564 *
3565 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3566 */
3567
3568 GLOBAL(void)
3569 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3570 JCOEFPTR coef_block,
3571 JSAMPARRAY output_buf, JDIMENSION output_col)
3572 {
3573 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3574 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3575 INT32 z1, z2, z3, z4;
3576 JCOEFPTR inptr;
3577 ISLOW_MULT_TYPE * quantptr;
3578 int * wsptr;
3579 JSAMPROW outptr;
3580 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3581 int ctr;
3582 int workspace[8*5]; /* buffers data between passes */
3583 SHIFT_TEMPS
3584
3585 /* Pass 1: process columns from input, store into work array.
3586 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3587 */
3588
3589 inptr = coef_block;
3590 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3591 wsptr = workspace;
3592 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3593 /* Even part */
3594
3595 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3596 if (ctr == 0)
3597 CLAMP_DC(tmp12);
3598 tmp12 <<= CONST_BITS;
3599 /* Add fudge factor here for final descale. */
3600 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3601 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3602 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3603 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3604 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3605 z3 = tmp12 + z2;
3606 tmp10 = z3 + z1;
3607 tmp11 = z3 - z1;
3608 tmp12 -= z2 << 2;
3609
3610 /* Odd part */
3611
3612 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3613 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3614
3615 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3616 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3617 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3618
3619 /* Final output stage */
3620
3621 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3622 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3623 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3624 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3625 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3626 }
3627
3628 /* Pass 2: process 5 rows from work array, store into output array.
3629 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3630 */
3631
3632 wsptr = workspace;
3633 for (ctr = 0; ctr < 5; ctr++) {
3634 outptr = output_buf[ctr] + output_col;
3635
3636 /* Even part */
3637
3638 /* Add range center and fudge factor for final descale and range-limit. */
3639 z3 = (INT32) wsptr[0] +
3640 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3641 (ONE << (PASS1_BITS+2)));
3642 z3 <<= CONST_BITS;
3643 z4 = (INT32) wsptr[4];
3644 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3645 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3646 tmp10 = z3 + z1;
3647 tmp11 = z3 - z2;
3648
3649 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3650
3651 z2 = (INT32) wsptr[2];
3652 z3 = (INT32) wsptr[6];
3653
3654 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3655 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3656 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3657
3658 tmp20 = tmp10 + tmp12;
3659 tmp24 = tmp10 - tmp12;
3660 tmp21 = tmp11 + tmp13;
3661 tmp23 = tmp11 - tmp13;
3662
3663 /* Odd part */
3664
3665 z1 = (INT32) wsptr[1];
3666 z2 = (INT32) wsptr[3];
3667 z3 = (INT32) wsptr[5];
3668 z3 <<= CONST_BITS;
3669 z4 = (INT32) wsptr[7];
3670
3671 tmp11 = z2 + z4;
3672 tmp13 = z2 - z4;
3673
3674 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3675
3676 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3677 z4 = z3 + tmp12;
3678
3679 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3680 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3681
3682 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3683 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3684
3685 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3686
3687 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3688 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3689
3690 /* Final output stage */
3691
3692 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3693 CONST_BITS+PASS1_BITS+3)
3694 & RANGE_MASK];
3695 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3696 CONST_BITS+PASS1_BITS+3)
3697 & RANGE_MASK];
3698 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3699 CONST_BITS+PASS1_BITS+3)
3700 & RANGE_MASK];
3701 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3702 CONST_BITS+PASS1_BITS+3)
3703 & RANGE_MASK];
3704 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3705 CONST_BITS+PASS1_BITS+3)
3706 & RANGE_MASK];
3707 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3708 CONST_BITS+PASS1_BITS+3)
3709 & RANGE_MASK];
3710 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3711 CONST_BITS+PASS1_BITS+3)
3712 & RANGE_MASK];
3713 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3714 CONST_BITS+PASS1_BITS+3)
3715 & RANGE_MASK];
3716 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3717 CONST_BITS+PASS1_BITS+3)
3718 & RANGE_MASK];
3719 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3720 CONST_BITS+PASS1_BITS+3)
3721 & RANGE_MASK];
3722
3723 wsptr += 8; /* advance pointer to next row */
3724 }
3725 }
3726
3727
3728 /*
3729 * Perform dequantization and inverse DCT on one block of coefficients,
3730 * producing an 8x4 output block.
3731 *
3732 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3733 */
3734
3735 GLOBAL(void)
3736 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3737 JCOEFPTR coef_block,
3738 JSAMPARRAY output_buf, JDIMENSION output_col)
3739 {
3740 INT32 tmp0, tmp1, tmp2, tmp3;
3741 INT32 tmp10, tmp11, tmp12, tmp13;
3742 INT32 z1, z2, z3;
3743 JCOEFPTR inptr;
3744 ISLOW_MULT_TYPE * quantptr;
3745 int * wsptr;
3746 JSAMPROW outptr;
3747 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3748 int ctr;
3749 int workspace[8*4]; /* buffers data between passes */
3750 SHIFT_TEMPS
3751
3752 /* Pass 1: process columns from input, store into work array.
3753 * 4-point IDCT kernel,
3754 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3755 */
3756
3757 inptr = coef_block;
3758 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3759 wsptr = workspace;
3760 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3761 /* Even part */
3762
3763 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3764 if (ctr == 0)
3765 CLAMP_DC(tmp0);
3766 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3767
3768 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3769 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3770
3771 /* Odd part */
3772 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3773
3774 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3775 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3776
3777 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3778 /* Add fudge factor here for final descale. */
3779 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3780 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3781 CONST_BITS-PASS1_BITS);
3782 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3783 CONST_BITS-PASS1_BITS);
3784
3785 /* Final output stage */
3786
3787 wsptr[8*0] = (int) (tmp10 + tmp0);
3788 wsptr[8*3] = (int) (tmp10 - tmp0);
3789 wsptr[8*1] = (int) (tmp12 + tmp2);
3790 wsptr[8*2] = (int) (tmp12 - tmp2);
3791 }
3792
3793 /* Pass 2: process rows from work array, store into output array.
3794 * Note that we must descale the results by a factor of 8 == 2**3,
3795 * and also undo the PASS1_BITS scaling.
3796 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3797 */
3798
3799 wsptr = workspace;
3800 for (ctr = 0; ctr < 4; ctr++) {
3801 outptr = output_buf[ctr] + output_col;
3802
3803 /* Even part: reverse the even part of the forward DCT.
3804 * The rotator is c(-6).
3805 */
3806
3807 /* Add range center and fudge factor for final descale and range-limit. */
3808 z2 = (INT32) wsptr[0] +
3809 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3810 (ONE << (PASS1_BITS+2)));
3811 z3 = (INT32) wsptr[4];
3812
3813 tmp0 = (z2 + z3) << CONST_BITS;
3814 tmp1 = (z2 - z3) << CONST_BITS;
3815
3816 z2 = (INT32) wsptr[2];
3817 z3 = (INT32) wsptr[6];
3818
3819 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3820 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3821 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3822
3823 tmp10 = tmp0 + tmp2;
3824 tmp13 = tmp0 - tmp2;
3825 tmp11 = tmp1 + tmp3;
3826 tmp12 = tmp1 - tmp3;
3827
3828 /* Odd part per figure 8; the matrix is unitary and hence its
3829 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3830 */
3831
3832 tmp0 = (INT32) wsptr[7];
3833 tmp1 = (INT32) wsptr[5];
3834 tmp2 = (INT32) wsptr[3];
3835 tmp3 = (INT32) wsptr[1];
3836
3837 z2 = tmp0 + tmp2;
3838 z3 = tmp1 + tmp3;
3839
3840 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3841 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3842 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3843 z2 += z1;
3844 z3 += z1;
3845
3846 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3847 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3848 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3849 tmp0 += z1 + z2;
3850 tmp3 += z1 + z3;
3851
3852 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3853 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3854 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3855 tmp1 += z1 + z3;
3856 tmp2 += z1 + z2;
3857
3858 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3859
3860 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3861 CONST_BITS+PASS1_BITS+3)
3862 & RANGE_MASK];
3863 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3864 CONST_BITS+PASS1_BITS+3)
3865 & RANGE_MASK];
3866 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3867 CONST_BITS+PASS1_BITS+3)
3868 & RANGE_MASK];
3869 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3870 CONST_BITS+PASS1_BITS+3)
3871 & RANGE_MASK];
3872 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3873 CONST_BITS+PASS1_BITS+3)
3874 & RANGE_MASK];
3875 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3876 CONST_BITS+PASS1_BITS+3)
3877 & RANGE_MASK];
3878 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3879 CONST_BITS+PASS1_BITS+3)
3880 & RANGE_MASK];
3881 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3882 CONST_BITS+PASS1_BITS+3)
3883 & RANGE_MASK];
3884
3885 wsptr += DCTSIZE; /* advance pointer to next row */
3886 }
3887 }
3888
3889
3890 /*
3891 * Perform dequantization and inverse DCT on one block of coefficients,
3892 * producing a 6x3 output block.
3893 *
3894 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3895 */
3896
3897 GLOBAL(void)
3898 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3899 JCOEFPTR coef_block,
3900 JSAMPARRAY output_buf, JDIMENSION output_col)
3901 {
3902 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3903 INT32 z1, z2, z3;
3904 JCOEFPTR inptr;
3905 ISLOW_MULT_TYPE * quantptr;
3906 int * wsptr;
3907 JSAMPROW outptr;
3908 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3909 int ctr;
3910 int workspace[6*3]; /* buffers data between passes */
3911 SHIFT_TEMPS
3912
3913 /* Pass 1: process columns from input, store into work array.
3914 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3915 */
3916
3917 inptr = coef_block;
3918 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3919 wsptr = workspace;
3920 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3921 /* Even part */
3922
3923 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3924 if (ctr == 0)
3925 CLAMP_DC(tmp0);
3926 tmp0 <<= CONST_BITS;
3927 /* Add fudge factor here for final descale. */
3928 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3929 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3930 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3931 tmp10 = tmp0 + tmp12;
3932 tmp2 = tmp0 - tmp12 - tmp12;
3933
3934 /* Odd part */
3935
3936 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3937 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3938
3939 /* Final output stage */
3940
3941 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3942 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3943 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3944 }
3945
3946 /* Pass 2: process 3 rows from work array, store into output array.
3947 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3948 */
3949
3950 wsptr = workspace;
3951 for (ctr = 0; ctr < 3; ctr++) {
3952 outptr = output_buf[ctr] + output_col;
3953
3954 /* Even part */
3955
3956 /* Add range center and fudge factor for final descale and range-limit. */
3957 tmp0 = (INT32) wsptr[0] +
3958 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3959 (ONE << (PASS1_BITS+2)));
3960 tmp0 <<= CONST_BITS;
3961 tmp2 = (INT32) wsptr[4];
3962 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3963 tmp1 = tmp0 + tmp10;
3964 tmp11 = tmp0 - tmp10 - tmp10;
3965 tmp10 = (INT32) wsptr[2];
3966 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3967 tmp10 = tmp1 + tmp0;
3968 tmp12 = tmp1 - tmp0;
3969
3970 /* Odd part */
3971
3972 z1 = (INT32) wsptr[1];
3973 z2 = (INT32) wsptr[3];
3974 z3 = (INT32) wsptr[5];
3975 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3976 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3977 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3978 tmp1 = (z1 - z2 - z3) << CONST_BITS;
3979
3980 /* Final output stage */
3981
3982 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3983 CONST_BITS+PASS1_BITS+3)
3984 & RANGE_MASK];
3985 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
3986 CONST_BITS+PASS1_BITS+3)
3987 & RANGE_MASK];
3988 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
3989 CONST_BITS+PASS1_BITS+3)
3990 & RANGE_MASK];
3991 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
3992 CONST_BITS+PASS1_BITS+3)
3993 & RANGE_MASK];
3994 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
3995 CONST_BITS+PASS1_BITS+3)
3996 & RANGE_MASK];
3997 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
3998 CONST_BITS+PASS1_BITS+3)
3999 & RANGE_MASK];
4000
4001 wsptr += 6; /* advance pointer to next row */
4002 }
4003 }
4004
4005
4006 /*
4007 * Perform dequantization and inverse DCT on one block of coefficients,
4008 * producing a 4x2 output block.
4009 *
4010 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4011 */
4012
4013 GLOBAL(void)
4014 jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4015 JCOEFPTR coef_block,
4016 JSAMPARRAY output_buf, JDIMENSION output_col)
4017 {
4018 INT32 tmp0, tmp2, tmp10, tmp12;
4019 INT32 z1, z2, z3;
4020 JCOEFPTR inptr;
4021 ISLOW_MULT_TYPE * quantptr;
4022 INT32 * wsptr;
4023 JSAMPROW outptr;
4024 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4025 int ctr;
4026 INT32 workspace[4*2]; /* buffers data between passes */
4027 SHIFT_TEMPS
4028
4029 /* Pass 1: process columns from input, store into work array. */
4030
4031 inptr = coef_block;
4032 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4033 wsptr = workspace;
4034 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
4035 /* Even part */
4036
4037 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4038 if (ctr == 0)
4039 CLAMP_DC(tmp10);
4040
4041 /* Odd part */
4042
4043 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4044
4045 /* Final output stage */
4046
4047 wsptr[4*0] = tmp10 + tmp0;
4048 wsptr[4*1] = tmp10 - tmp0;
4049 }
4050
4051 /* Pass 2: process 2 rows from work array, store into output array.
4052 * 4-point IDCT kernel,
4053 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4054 */
4055
4056 wsptr = workspace;
4057 for (ctr = 0; ctr < 2; ctr++) {
4058 outptr = output_buf[ctr] + output_col;
4059
4060 /* Even part */
4061
4062 /* Add range center and fudge factor for final descale and range-limit. */
4063 tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
4064 tmp2 = wsptr[2];
4065
4066 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4067 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4068
4069 /* Odd part */
4070 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4071
4072 z2 = wsptr[1];
4073 z3 = wsptr[3];
4074
4075 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4076 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4077 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4078
4079 /* Final output stage */
4080
4081 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4082 CONST_BITS+3)
4083 & RANGE_MASK];
4084 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4085 CONST_BITS+3)
4086 & RANGE_MASK];
4087 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4088 CONST_BITS+3)
4089 & RANGE_MASK];
4090 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4091 CONST_BITS+3)
4092 & RANGE_MASK];
4093
4094 wsptr += 4; /* advance pointer to next row */
4095 }
4096 }
4097
4098
4099 /*
4100 * Perform dequantization and inverse DCT on one block of coefficients,
4101 * producing a 2x1 output block.
4102 *
4103 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4104 */
4105
4106 GLOBAL(void)
4107 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4108 JCOEFPTR coef_block,
4109 JSAMPARRAY output_buf, JDIMENSION output_col)
4110 {
4111 DCTELEM tmp0, tmp1;
4112 ISLOW_MULT_TYPE * quantptr;
4113 JSAMPROW outptr;
4114 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4115 ISHIFT_TEMPS
4116
4117 /* Pass 1: empty. */
4118
4119 /* Pass 2: process 1 row from input, store into output array. */
4120
4121 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4122 outptr = output_buf[0] + output_col;
4123
4124 /* Even part */
4125
4126 tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4127 CLAMP_DC(tmp0);
4128 /* Add range center and fudge factor for final descale and range-limit. */
4129 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
4130
4131 /* Odd part */
4132
4133 tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4134
4135 /* Final output stage */
4136
4137 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
4138 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
4139 }
4140
4141
4142 /*
4143 * Perform dequantization and inverse DCT on one block of coefficients,
4144 * producing an 8x16 output block.
4145 *
4146 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4147 */
4148
4149 GLOBAL(void)
4150 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4151 JCOEFPTR coef_block,
4152 JSAMPARRAY output_buf, JDIMENSION output_col)
4153 {
4154 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4155 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4156 INT32 z1, z2, z3, z4;
4157 JCOEFPTR inptr;
4158 ISLOW_MULT_TYPE * quantptr;
4159 int * wsptr;
4160 JSAMPROW outptr;
4161 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4162 int ctr;
4163 int workspace[8*16]; /* buffers data between passes */
4164 SHIFT_TEMPS
4165
4166 /* Pass 1: process columns from input, store into work array.
4167 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4168 */
4169
4170 inptr = coef_block;
4171 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4172 wsptr = workspace;
4173 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4174 /* Even part */
4175
4176 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4177 if (ctr == 0)
4178 CLAMP_DC(tmp0);
4179 tmp0 <<= CONST_BITS;
4180 /* Add fudge factor here for final descale. */
4181 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4182
4183 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4184 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4185 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4186
4187 tmp10 = tmp0 + tmp1;
4188 tmp11 = tmp0 - tmp1;
4189 tmp12 = tmp0 + tmp2;
4190 tmp13 = tmp0 - tmp2;
4191
4192 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4193 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4194 z3 = z1 - z2;
4195 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4196 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4197
4198 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4199 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4200 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4201 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4202
4203 tmp20 = tmp10 + tmp0;
4204 tmp27 = tmp10 - tmp0;
4205 tmp21 = tmp12 + tmp1;
4206 tmp26 = tmp12 - tmp1;
4207 tmp22 = tmp13 + tmp2;
4208 tmp25 = tmp13 - tmp2;
4209 tmp23 = tmp11 + tmp3;
4210 tmp24 = tmp11 - tmp3;
4211
4212 /* Odd part */
4213
4214 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4215 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4216 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4217 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4218
4219 tmp11 = z1 + z3;
4220
4221 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4222 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4223 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4224 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4225 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4226 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4227 tmp0 = tmp1 + tmp2 + tmp3 -
4228 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4229 tmp13 = tmp10 + tmp11 + tmp12 -
4230 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4231 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4232 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4233 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4234 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4235 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4236 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4237 z2 += z4;
4238 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4239 tmp1 += z1;
4240 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4241 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4242 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4243 tmp12 += z2;
4244 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4245 tmp2 += z2;
4246 tmp3 += z2;
4247 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4248 tmp10 += z2;
4249 tmp11 += z2;
4250
4251 /* Final output stage */
4252
4253 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4254 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4255 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4256 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4257 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4258 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4259 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4260 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4261 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4262 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4263 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4264 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4265 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4266 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4267 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4268 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4269 }
4270
4271 /* Pass 2: process rows from work array, store into output array.
4272 * Note that we must descale the results by a factor of 8 == 2**3,
4273 * and also undo the PASS1_BITS scaling.
4274 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4275 */
4276
4277 wsptr = workspace;
4278 for (ctr = 0; ctr < 16; ctr++) {
4279 outptr = output_buf[ctr] + output_col;
4280
4281 /* Even part: reverse the even part of the forward DCT.
4282 * The rotator is c(-6).
4283 */
4284
4285 /* Add range center and fudge factor for final descale and range-limit. */
4286 z2 = (INT32) wsptr[0] +
4287 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4288 (ONE << (PASS1_BITS+2)));
4289 z3 = (INT32) wsptr[4];
4290
4291 tmp0 = (z2 + z3) << CONST_BITS;
4292 tmp1 = (z2 - z3) << CONST_BITS;
4293
4294 z2 = (INT32) wsptr[2];
4295 z3 = (INT32) wsptr[6];
4296
4297 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4298 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4299 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4300
4301 tmp10 = tmp0 + tmp2;
4302 tmp13 = tmp0 - tmp2;
4303 tmp11 = tmp1 + tmp3;
4304 tmp12 = tmp1 - tmp3;
4305
4306 /* Odd part per figure 8; the matrix is unitary and hence its
4307 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4308 */
4309
4310 tmp0 = (INT32) wsptr[7];
4311 tmp1 = (INT32) wsptr[5];
4312 tmp2 = (INT32) wsptr[3];
4313 tmp3 = (INT32) wsptr[1];
4314
4315 z2 = tmp0 + tmp2;
4316 z3 = tmp1 + tmp3;
4317
4318 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4319 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4320 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4321 z2 += z1;
4322 z3 += z1;
4323
4324 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4325 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4326 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4327 tmp0 += z1 + z2;
4328 tmp3 += z1 + z3;
4329
4330 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4331 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4332 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4333 tmp1 += z1 + z3;
4334 tmp2 += z1 + z2;
4335
4336 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4337
4338 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4339 CONST_BITS+PASS1_BITS+3)
4340 & RANGE_MASK];
4341 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4342 CONST_BITS+PASS1_BITS+3)
4343 & RANGE_MASK];
4344 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4345 CONST_BITS+PASS1_BITS+3)
4346 & RANGE_MASK];
4347 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4348 CONST_BITS+PASS1_BITS+3)
4349 & RANGE_MASK];
4350 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4351 CONST_BITS+PASS1_BITS+3)
4352 & RANGE_MASK];
4353 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4354 CONST_BITS+PASS1_BITS+3)
4355 & RANGE_MASK];
4356 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4357 CONST_BITS+PASS1_BITS+3)
4358 & RANGE_MASK];
4359 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4360 CONST_BITS+PASS1_BITS+3)
4361 & RANGE_MASK];
4362
4363 wsptr += DCTSIZE; /* advance pointer to next row */
4364 }
4365 }
4366
4367
4368 /*
4369 * Perform dequantization and inverse DCT on one block of coefficients,
4370 * producing a 7x14 output block.
4371 *
4372 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4373 */
4374
4375 GLOBAL(void)
4376 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4377 JCOEFPTR coef_block,
4378 JSAMPARRAY output_buf, JDIMENSION output_col)
4379 {
4380 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4381 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4382 INT32 z1, z2, z3, z4;
4383 JCOEFPTR inptr;
4384 ISLOW_MULT_TYPE * quantptr;
4385 int * wsptr;
4386 JSAMPROW outptr;
4387 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4388 int ctr;
4389 int workspace[7*14]; /* buffers data between passes */
4390 SHIFT_TEMPS
4391
4392 /* Pass 1: process columns from input, store into work array.
4393 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4394 */
4395
4396 inptr = coef_block;
4397 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4398 wsptr = workspace;
4399 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4400 /* Even part */
4401
4402 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4403 if (ctr == 0)
4404 CLAMP_DC(z1);
4405 z1 <<= CONST_BITS;
4406 /* Add fudge factor here for final descale. */
4407 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4408 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4409 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4410 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4411 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4412
4413 tmp10 = z1 + z2;
4414 tmp11 = z1 + z3;
4415 tmp12 = z1 - z4;
4416
4417 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4418 CONST_BITS-PASS1_BITS);
4419
4420 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4421 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4422
4423 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4424
4425 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4426 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4427 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4428 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4429
4430 tmp20 = tmp10 + tmp13;
4431 tmp26 = tmp10 - tmp13;
4432 tmp21 = tmp11 + tmp14;
4433 tmp25 = tmp11 - tmp14;
4434 tmp22 = tmp12 + tmp15;
4435 tmp24 = tmp12 - tmp15;
4436
4437 /* Odd part */
4438
4439 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4440 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4441 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4442 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4443 tmp13 = z4 << CONST_BITS;
4444
4445 tmp14 = z1 + z3;
4446 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4447 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4448 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4449 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4450 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4451 z1 -= z2;
4452 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4453 tmp16 += tmp15;
4454 z1 += z4;
4455 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4456 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4457 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4458 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4459 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4460 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4461
4462 tmp13 = (z1 - z3) << PASS1_BITS;
4463
4464 /* Final output stage */
4465
4466 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4467 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4468 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4469 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4470 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4471 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4472 wsptr[7*3] = (int) (tmp23 + tmp13);
4473 wsptr[7*10] = (int) (tmp23 - tmp13);
4474 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4475 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4476 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4477 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4478 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4479 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4480 }
4481
4482 /* Pass 2: process 14 rows from work array, store into output array.
4483 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4484 */
4485
4486 wsptr = workspace;
4487 for (ctr = 0; ctr < 14; ctr++) {
4488 outptr = output_buf[ctr] + output_col;
4489
4490 /* Even part */
4491
4492 /* Add range center and fudge factor for final descale and range-limit. */
4493 tmp23 = (INT32) wsptr[0] +
4494 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4495 (ONE << (PASS1_BITS+2)));
4496 tmp23 <<= CONST_BITS;
4497
4498 z1 = (INT32) wsptr[2];
4499 z2 = (INT32) wsptr[4];
4500 z3 = (INT32) wsptr[6];
4501
4502 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4503 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4504 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4505 tmp10 = z1 + z3;
4506 z2 -= tmp10;
4507 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4508 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4509 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4510 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4511
4512 /* Odd part */
4513
4514 z1 = (INT32) wsptr[1];
4515 z2 = (INT32) wsptr[3];
4516 z3 = (INT32) wsptr[5];
4517
4518 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4519 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4520 tmp10 = tmp11 - tmp12;
4521 tmp11 += tmp12;
4522 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4523 tmp11 += tmp12;
4524 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4525 tmp10 += z2;
4526 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4527
4528 /* Final output stage */
4529
4530 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4531 CONST_BITS+PASS1_BITS+3)
4532 & RANGE_MASK];
4533 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4534 CONST_BITS+PASS1_BITS+3)
4535 & RANGE_MASK];
4536 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4537 CONST_BITS+PASS1_BITS+3)
4538 & RANGE_MASK];
4539 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4540 CONST_BITS+PASS1_BITS+3)
4541 & RANGE_MASK];
4542 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4543 CONST_BITS+PASS1_BITS+3)
4544 & RANGE_MASK];
4545 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4546 CONST_BITS+PASS1_BITS+3)
4547 & RANGE_MASK];
4548 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4549 CONST_BITS+PASS1_BITS+3)
4550 & RANGE_MASK];
4551
4552 wsptr += 7; /* advance pointer to next row */
4553 }
4554 }
4555
4556
4557 /*
4558 * Perform dequantization and inverse DCT on one block of coefficients,
4559 * producing a 6x12 output block.
4560 *
4561 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4562 */
4563
4564 GLOBAL(void)
4565 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4566 JCOEFPTR coef_block,
4567 JSAMPARRAY output_buf, JDIMENSION output_col)
4568 {
4569 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4570 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4571 INT32 z1, z2, z3, z4;
4572 JCOEFPTR inptr;
4573 ISLOW_MULT_TYPE * quantptr;
4574 int * wsptr;
4575 JSAMPROW outptr;
4576 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4577 int ctr;
4578 int workspace[6*12]; /* buffers data between passes */
4579 SHIFT_TEMPS
4580
4581 /* Pass 1: process columns from input, store into work array.
4582 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4583 */
4584
4585 inptr = coef_block;
4586 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4587 wsptr = workspace;
4588 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4589 /* Even part */
4590
4591 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4592 if (ctr == 0)
4593 CLAMP_DC(z3);
4594 z3 <<= CONST_BITS;
4595 /* Add fudge factor here for final descale. */
4596 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4597
4598 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4599 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4600
4601 tmp10 = z3 + z4;
4602 tmp11 = z3 - z4;
4603
4604 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4605 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4606 z1 <<= CONST_BITS;
4607 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4608 z2 <<= CONST_BITS;
4609
4610 tmp12 = z1 - z2;
4611
4612 tmp21 = z3 + tmp12;
4613 tmp24 = z3 - tmp12;
4614
4615 tmp12 = z4 + z2;
4616
4617 tmp20 = tmp10 + tmp12;
4618 tmp25 = tmp10 - tmp12;
4619
4620 tmp12 = z4 - z1 - z2;
4621
4622 tmp22 = tmp11 + tmp12;
4623 tmp23 = tmp11 - tmp12;
4624
4625 /* Odd part */
4626
4627 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4628 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4629 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4630 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4631
4632 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4633 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4634
4635 tmp10 = z1 + z3;
4636 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4637 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4638 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4639 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4640 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4641 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4642 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4643 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4644
4645 z1 -= z4;
4646 z2 -= z3;
4647 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4648 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4649 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4650
4651 /* Final output stage */
4652
4653 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4654 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4655 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4656 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4657 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4658 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4659 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4660 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4661 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4662 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4663 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4664 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4665 }
4666
4667 /* Pass 2: process 12 rows from work array, store into output array.
4668 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4669 */
4670
4671 wsptr = workspace;
4672 for (ctr = 0; ctr < 12; ctr++) {
4673 outptr = output_buf[ctr] + output_col;
4674
4675 /* Even part */
4676
4677 /* Add range center and fudge factor for final descale and range-limit. */
4678 tmp10 = (INT32) wsptr[0] +
4679 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4680 (ONE << (PASS1_BITS+2)));
4681 tmp10 <<= CONST_BITS;
4682 tmp12 = (INT32) wsptr[4];
4683 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4684 tmp11 = tmp10 + tmp20;
4685 tmp21 = tmp10 - tmp20 - tmp20;
4686 tmp20 = (INT32) wsptr[2];
4687 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4688 tmp20 = tmp11 + tmp10;
4689 tmp22 = tmp11 - tmp10;
4690
4691 /* Odd part */
4692
4693 z1 = (INT32) wsptr[1];
4694 z2 = (INT32) wsptr[3];
4695 z3 = (INT32) wsptr[5];
4696 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4697 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4698 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4699 tmp11 = (z1 - z2 - z3) << CONST_BITS;
4700
4701 /* Final output stage */
4702
4703 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4704 CONST_BITS+PASS1_BITS+3)
4705 & RANGE_MASK];
4706 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4707 CONST_BITS+PASS1_BITS+3)
4708 & RANGE_MASK];
4709 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4710 CONST_BITS+PASS1_BITS+3)
4711 & RANGE_MASK];
4712 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4713 CONST_BITS+PASS1_BITS+3)
4714 & RANGE_MASK];
4715 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4716 CONST_BITS+PASS1_BITS+3)
4717 & RANGE_MASK];
4718 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4719 CONST_BITS+PASS1_BITS+3)
4720 & RANGE_MASK];
4721
4722 wsptr += 6; /* advance pointer to next row */
4723 }
4724 }
4725
4726
4727 /*
4728 * Perform dequantization and inverse DCT on one block of coefficients,
4729 * producing a 5x10 output block.
4730 *
4731 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4732 */
4733
4734 GLOBAL(void)
4735 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4736 JCOEFPTR coef_block,
4737 JSAMPARRAY output_buf, JDIMENSION output_col)
4738 {
4739 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4740 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4741 INT32 z1, z2, z3, z4, z5;
4742 JCOEFPTR inptr;
4743 ISLOW_MULT_TYPE * quantptr;
4744 int * wsptr;
4745 JSAMPROW outptr;
4746 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4747 int ctr;
4748 int workspace[5*10]; /* buffers data between passes */
4749 SHIFT_TEMPS
4750
4751 /* Pass 1: process columns from input, store into work array.
4752 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4753 */
4754
4755 inptr = coef_block;
4756 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4757 wsptr = workspace;
4758 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4759 /* Even part */
4760
4761 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4762 if (ctr == 0)
4763 CLAMP_DC(z3);
4764 z3 <<= CONST_BITS;
4765 /* Add fudge factor here for final descale. */
4766 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4767 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4768 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4769 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4770 tmp10 = z3 + z1;
4771 tmp11 = z3 - z2;
4772
4773 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4774 CONST_BITS-PASS1_BITS);
4775
4776 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4777 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4778
4779 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4780 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4781 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4782
4783 tmp20 = tmp10 + tmp12;
4784 tmp24 = tmp10 - tmp12;
4785 tmp21 = tmp11 + tmp13;
4786 tmp23 = tmp11 - tmp13;
4787
4788 /* Odd part */
4789
4790 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4791 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4792 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4793 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4794
4795 tmp11 = z2 + z4;
4796 tmp13 = z2 - z4;
4797
4798 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4799 z5 = z3 << CONST_BITS;
4800
4801 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4802 z4 = z5 + tmp12;
4803
4804 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4805 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4806
4807 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4808 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4809
4810 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4811
4812 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4813 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4814
4815 /* Final output stage */
4816
4817 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4818 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4819 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4820 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4821 wsptr[5*2] = (int) (tmp22 + tmp12);
4822 wsptr[5*7] = (int) (tmp22 - tmp12);
4823 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4824 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4825 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4826 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4827 }
4828
4829 /* Pass 2: process 10 rows from work array, store into output array.
4830 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4831 */
4832
4833 wsptr = workspace;
4834 for (ctr = 0; ctr < 10; ctr++) {
4835 outptr = output_buf[ctr] + output_col;
4836
4837 /* Even part */
4838
4839 /* Add range center and fudge factor for final descale and range-limit. */
4840 tmp12 = (INT32) wsptr[0] +
4841 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4842 (ONE << (PASS1_BITS+2)));
4843 tmp12 <<= CONST_BITS;
4844 tmp13 = (INT32) wsptr[2];
4845 tmp14 = (INT32) wsptr[4];
4846 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4847 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4848 z3 = tmp12 + z2;
4849 tmp10 = z3 + z1;
4850 tmp11 = z3 - z1;
4851 tmp12 -= z2 << 2;
4852
4853 /* Odd part */
4854
4855 z2 = (INT32) wsptr[1];
4856 z3 = (INT32) wsptr[3];
4857
4858 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4859 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4860 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4861
4862 /* Final output stage */
4863
4864 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
4865 CONST_BITS+PASS1_BITS+3)
4866 & RANGE_MASK];
4867 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
4868 CONST_BITS+PASS1_BITS+3)
4869 & RANGE_MASK];
4870 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
4871 CONST_BITS+PASS1_BITS+3)
4872 & RANGE_MASK];
4873 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
4874 CONST_BITS+PASS1_BITS+3)
4875 & RANGE_MASK];
4876 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
4877 CONST_BITS+PASS1_BITS+3)
4878 & RANGE_MASK];
4879
4880 wsptr += 5; /* advance pointer to next row */
4881 }
4882 }
4883
4884
4885 /*
4886 * Perform dequantization and inverse DCT on one block of coefficients,
4887 * producing a 4x8 output block.
4888 *
4889 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4890 */
4891
4892 GLOBAL(void)
4893 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4894 JCOEFPTR coef_block,
4895 JSAMPARRAY output_buf, JDIMENSION output_col)
4896 {
4897 INT32 tmp0, tmp1, tmp2, tmp3;
4898 INT32 tmp10, tmp11, tmp12, tmp13;
4899 INT32 z1, z2, z3;
4900 JCOEFPTR inptr;
4901 ISLOW_MULT_TYPE * quantptr;
4902 int * wsptr;
4903 JSAMPROW outptr;
4904 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4905 int ctr;
4906 int workspace[4*8]; /* buffers data between passes */
4907 SHIFT_TEMPS
4908
4909 /* Pass 1: process columns from input, store into work array.
4910 * Note results are scaled up by sqrt(8) compared to a true IDCT;
4911 * furthermore, we scale the results by 2**PASS1_BITS.
4912 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4913 */
4914
4915 inptr = coef_block;
4916 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4917 wsptr = workspace;
4918 for (ctr = 4; ctr > 0; ctr--) {
4919 /* Due to quantization, we will usually find that many of the input
4920 * coefficients are zero, especially the AC terms. We can exploit this
4921 * by short-circuiting the IDCT calculation for any column in which all
4922 * the AC terms are zero. In that case each output is equal to the
4923 * DC coefficient (with scale factor as needed).
4924 * With typical images and quantization tables, half or more of the
4925 * column DCT calculations can be simplified this way.
4926 */
4927
4928 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4929 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4930 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4931 inptr[DCTSIZE*7] == 0) {
4932 /* AC terms all zero */
4933 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4934 if (ctr == 4)
4935 CLAMP_DC(dcval);
4936 dcval <<= PASS1_BITS;
4937 wsptr[4*0] = dcval;
4938 wsptr[4*1] = dcval;
4939 wsptr[4*2] = dcval;
4940 wsptr[4*3] = dcval;
4941 wsptr[4*4] = dcval;
4942 wsptr[4*5] = dcval;
4943 wsptr[4*6] = dcval;
4944 wsptr[4*7] = dcval;
4945
4946 inptr++; /* advance pointers to next column */
4947 quantptr++;
4948 wsptr++;
4949 continue;
4950 }
4951
4952 /* Even part: reverse the even part of the forward DCT.
4953 * The rotator is c(-6).
4954 */
4955
4956 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4957 if (ctr == 4)
4958 CLAMP_DC(z2);
4959 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4960 z2 <<= CONST_BITS;
4961 z3 <<= CONST_BITS;
4962 /* Add fudge factor here for final descale. */
4963 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4964
4965 tmp0 = z2 + z3;
4966 tmp1 = z2 - z3;
4967
4968 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4969 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4970
4971 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4972 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4973 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4974
4975 tmp10 = tmp0 + tmp2;
4976 tmp13 = tmp0 - tmp2;
4977 tmp11 = tmp1 + tmp3;
4978 tmp12 = tmp1 - tmp3;
4979
4980 /* Odd part per figure 8; the matrix is unitary and hence its
4981 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4982 */
4983
4984 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4985 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4986 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4987 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4988
4989 z2 = tmp0 + tmp2;
4990 z3 = tmp1 + tmp3;
4991
4992 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4993 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4994 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4995 z2 += z1;
4996 z3 += z1;
4997
4998 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4999 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
5000 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
5001 tmp0 += z1 + z2;
5002 tmp3 += z1 + z3;
5003
5004 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
5005 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
5006 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
5007 tmp1 += z1 + z3;
5008 tmp2 += z1 + z2;
5009
5010 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
5011
5012 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
5013 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
5014 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
5015 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
5016 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
5017 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
5018 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
5019 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
5020
5021 inptr++; /* advance pointers to next column */
5022 quantptr++;
5023 wsptr++;
5024 }
5025
5026 /* Pass 2: process 8 rows from work array, store into output array.
5027 * 4-point IDCT kernel,
5028 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5029 */
5030
5031 wsptr = workspace;
5032 for (ctr = 0; ctr < 8; ctr++) {
5033 outptr = output_buf[ctr] + output_col;
5034
5035 /* Even part */
5036
5037 /* Add range center and fudge factor for final descale and range-limit. */
5038 tmp0 = (INT32) wsptr[0] +
5039 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5040 (ONE << (PASS1_BITS+2)));
5041 tmp2 = (INT32) wsptr[2];
5042
5043 tmp10 = (tmp0 + tmp2) << CONST_BITS;
5044 tmp12 = (tmp0 - tmp2) << CONST_BITS;
5045
5046 /* Odd part */
5047 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5048
5049 z2 = (INT32) wsptr[1];
5050 z3 = (INT32) wsptr[3];
5051
5052 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5053 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5054 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5055
5056 /* Final output stage */
5057
5058 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5059 CONST_BITS+PASS1_BITS+3)
5060 & RANGE_MASK];
5061 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5062 CONST_BITS+PASS1_BITS+3)
5063 & RANGE_MASK];
5064 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
5065 CONST_BITS+PASS1_BITS+3)
5066 & RANGE_MASK];
5067 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
5068 CONST_BITS+PASS1_BITS+3)
5069 & RANGE_MASK];
5070
5071 wsptr += 4; /* advance pointer to next row */
5072 }
5073 }
5074
5075
5076 /*
5077 * Perform dequantization and inverse DCT on one block of coefficients,
5078 * producing a 3x6 output block.
5079 *
5080 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5081 */
5082
5083 GLOBAL(void)
5084 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5085 JCOEFPTR coef_block,
5086 JSAMPARRAY output_buf, JDIMENSION output_col)
5087 {
5088 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5089 INT32 z1, z2, z3;
5090 JCOEFPTR inptr;
5091 ISLOW_MULT_TYPE * quantptr;
5092 int * wsptr;
5093 JSAMPROW outptr;
5094 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5095 int ctr;
5096 int workspace[3*6]; /* buffers data between passes */
5097 SHIFT_TEMPS
5098
5099 /* Pass 1: process columns from input, store into work array.
5100 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5101 */
5102
5103 inptr = coef_block;
5104 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5105 wsptr = workspace;
5106 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5107 /* Even part */
5108
5109 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5110 if (ctr == 0)
5111 CLAMP_DC(tmp0);
5112 tmp0 <<= CONST_BITS;
5113 /* Add fudge factor here for final descale. */
5114 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5115 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5116 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5117 tmp1 = tmp0 + tmp10;
5118 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5119 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5120 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5121 tmp10 = tmp1 + tmp0;
5122 tmp12 = tmp1 - tmp0;
5123
5124 /* Odd part */
5125
5126 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5127 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5128 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5129 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5130 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5131 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5132 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5133
5134 /* Final output stage */
5135
5136 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5137 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5138 wsptr[3*1] = (int) (tmp11 + tmp1);
5139 wsptr[3*4] = (int) (tmp11 - tmp1);
5140 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5141 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5142 }
5143
5144 /* Pass 2: process 6 rows from work array, store into output array.
5145 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5146 */
5147
5148 wsptr = workspace;
5149 for (ctr = 0; ctr < 6; ctr++) {
5150 outptr = output_buf[ctr] + output_col;
5151
5152 /* Even part */
5153
5154 /* Add range center and fudge factor for final descale and range-limit. */
5155 tmp0 = (INT32) wsptr[0] +
5156 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5157 (ONE << (PASS1_BITS+2)));
5158 tmp0 <<= CONST_BITS;
5159 tmp2 = (INT32) wsptr[2];
5160 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5161 tmp10 = tmp0 + tmp12;
5162 tmp2 = tmp0 - tmp12 - tmp12;
5163
5164 /* Odd part */
5165
5166 tmp12 = (INT32) wsptr[1];
5167 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5168
5169 /* Final output stage */
5170
5171 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5172 CONST_BITS+PASS1_BITS+3)
5173 & RANGE_MASK];
5174 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5175 CONST_BITS+PASS1_BITS+3)
5176 & RANGE_MASK];
5177 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5178 CONST_BITS+PASS1_BITS+3)
5179 & RANGE_MASK];
5180
5181 wsptr += 3; /* advance pointer to next row */
5182 }
5183 }
5184
5185
5186 /*
5187 * Perform dequantization and inverse DCT on one block of coefficients,
5188 * producing a 2x4 output block.
5189 *
5190 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5191 */
5192
5193 GLOBAL(void)
5194 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5195 JCOEFPTR coef_block,
5196 JSAMPARRAY output_buf, JDIMENSION output_col)
5197 {
5198 INT32 tmp0, tmp2, tmp10, tmp12;
5199 INT32 z1, z2, z3;
5200 JCOEFPTR inptr;
5201 ISLOW_MULT_TYPE * quantptr;
5202 INT32 * wsptr;
5203 JSAMPROW outptr;
5204 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5205 int ctr;
5206 INT32 workspace[2*4]; /* buffers data between passes */
5207 SHIFT_TEMPS
5208
5209 /* Pass 1: process columns from input, store into work array.
5210 * 4-point IDCT kernel,
5211 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5212 */
5213
5214 inptr = coef_block;
5215 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5216 wsptr = workspace;
5217 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5218 /* Even part */
5219
5220 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5221 if (ctr == 0)
5222 CLAMP_DC(tmp0);
5223 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5224
5225 tmp10 = (tmp0 + tmp2) << CONST_BITS;
5226 tmp12 = (tmp0 - tmp2) << CONST_BITS;
5227
5228 /* Odd part */
5229 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5230
5231 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5232 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5233
5234 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5235 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5236 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5237
5238 /* Final output stage */
5239
5240 wsptr[2*0] = tmp10 + tmp0;
5241 wsptr[2*3] = tmp10 - tmp0;
5242 wsptr[2*1] = tmp12 + tmp2;
5243 wsptr[2*2] = tmp12 - tmp2;
5244 }
5245
5246 /* Pass 2: process 4 rows from work array, store into output array. */
5247
5248 wsptr = workspace;
5249 for (ctr = 0; ctr < 4; ctr++) {
5250 outptr = output_buf[ctr] + output_col;
5251
5252 /* Even part */
5253
5254 /* Add range center and fudge factor for final descale and range-limit. */
5255 tmp10 = wsptr[0] +
5256 ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
5257 (ONE << (CONST_BITS+2)));
5258
5259 /* Odd part */
5260
5261 tmp0 = wsptr[1];
5262
5263 /* Final output stage */
5264
5265 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5266 & RANGE_MASK];
5267 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5268 & RANGE_MASK];
5269
5270 wsptr += 2; /* advance pointer to next row */
5271 }
5272 }
5273
5274
5275 /*
5276 * Perform dequantization and inverse DCT on one block of coefficients,
5277 * producing a 1x2 output block.
5278 *
5279 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5280 */
5281
5282 GLOBAL(void)
5283 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5284 JCOEFPTR coef_block,
5285 JSAMPARRAY output_buf, JDIMENSION output_col)
5286 {
5287 DCTELEM tmp0, tmp1;
5288 ISLOW_MULT_TYPE * quantptr;
5289 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5290 ISHIFT_TEMPS
5291
5292 /* Process 1 column from input, store into output array. */
5293
5294 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5295
5296 /* Even part */
5297
5298 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5299 CLAMP_DC(tmp0);
5300 /* Add range center and fudge factor for final descale and range-limit. */
5301 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
5302
5303 /* Odd part */
5304
5305 tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5306
5307 /* Final output stage */
5308
5309 output_buf[0][output_col] =
5310 range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
5311 output_buf[1][output_col] =
5312 range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
5313 }
5314
5315 #endif /* IDCT_SCALING_SUPPORTED */
5316 #endif /* DCT_ISLOW_SUPPORTED */