Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/libjpeg/jidctint.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * jidctint.c | |
| 3 * | |
| 4 * Copyright (C) 1991-1998, Thomas G. Lane. | |
| 5 * Modification developed 2002-2018 by Guido Vollbeding. | |
| 6 * This file is part of the Independent JPEG Group's software. | |
| 7 * For conditions of distribution and use, see the accompanying README file. | |
| 8 * | |
| 9 * This file contains a slow-but-accurate integer implementation of the | |
| 10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine | |
| 11 * must also perform dequantization of the input coefficients. | |
| 12 * | |
| 13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT | |
| 14 * on each row (or vice versa, but it's more convenient to emit a row at | |
| 15 * a time). Direct algorithms are also available, but they are much more | |
| 16 * complex and seem not to be any faster when reduced to code. | |
| 17 * | |
| 18 * This implementation is based on an algorithm described in | |
| 19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | |
| 20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | |
| 21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | |
| 22 * The primary algorithm described there uses 11 multiplies and 29 adds. | |
| 23 * We use their alternate method with 12 multiplies and 32 adds. | |
| 24 * The advantage of this method is that no data path contains more than one | |
| 25 * multiplication; this allows a very simple and accurate implementation in | |
| 26 * scaled fixed-point arithmetic, with a minimal number of shifts. | |
| 27 * | |
| 28 * We also provide IDCT routines with various output sample block sizes for | |
| 29 * direct resolution reduction or enlargement and for direct resolving the | |
| 30 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN | |
| 31 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block. | |
| 32 * | |
| 33 * For N<8 we simply take the corresponding low-frequency coefficients of | |
| 34 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block | |
| 35 * to yield the downscaled outputs. | |
| 36 * This can be seen as direct low-pass downsampling from the DCT domain | |
| 37 * point of view rather than the usual spatial domain point of view, | |
| 38 * yielding significant computational savings and results at least | |
| 39 * as good as common bilinear (averaging) spatial downsampling. | |
| 40 * | |
| 41 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as | |
| 42 * lower frequencies and higher frequencies assumed to be zero. | |
| 43 * It turns out that the computational effort is similar to the 8x8 IDCT | |
| 44 * regarding the output size. | |
| 45 * Furthermore, the scaling and descaling is the same for all IDCT sizes. | |
| 46 * | |
| 47 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases | |
| 48 * since there would be too many additional constants to pre-calculate. | |
| 49 */ | |
| 50 | |
| 51 #define JPEG_INTERNALS | |
| 52 #include "jinclude.h" | |
| 53 #include "jpeglib.h" | |
| 54 #include "jdct.h" /* Private declarations for DCT subsystem */ | |
| 55 | |
| 56 #ifdef DCT_ISLOW_SUPPORTED | |
| 57 | |
| 58 | |
| 59 /* | |
| 60 * This module is specialized to the case DCTSIZE = 8. | |
| 61 */ | |
| 62 | |
| 63 #if DCTSIZE != 8 | |
| 64 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ | |
| 65 #endif | |
| 66 | |
| 67 | |
| 68 /* | |
| 69 * The poop on this scaling stuff is as follows: | |
| 70 * | |
| 71 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) | |
| 72 * larger than the true IDCT outputs. The final outputs are therefore | |
| 73 * a factor of N larger than desired; since N=8 this can be cured by | |
| 74 * a simple right shift at the end of the algorithm. The advantage of | |
| 75 * this arrangement is that we save two multiplications per 1-D IDCT, | |
| 76 * because the y0 and y4 inputs need not be divided by sqrt(N). | |
| 77 * | |
| 78 * We have to do addition and subtraction of the integer inputs, which | |
| 79 * is no problem, and multiplication by fractional constants, which is | |
| 80 * a problem to do in integer arithmetic. We multiply all the constants | |
| 81 * by CONST_SCALE and convert them to integer constants (thus retaining | |
| 82 * CONST_BITS bits of precision in the constants). After doing a | |
| 83 * multiplication we have to divide the product by CONST_SCALE, with proper | |
| 84 * rounding, to produce the correct output. This division can be done | |
| 85 * cheaply as a right shift of CONST_BITS bits. We postpone shifting | |
| 86 * as long as possible so that partial sums can be added together with | |
| 87 * full fractional precision. | |
| 88 * | |
| 89 * The outputs of the first pass are scaled up by PASS1_BITS bits so that | |
| 90 * they are represented to better-than-integral precision. These outputs | |
| 91 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | |
| 92 * with the recommended scaling. (To scale up 12-bit sample data further, an | |
| 93 * intermediate INT32 array would be needed.) | |
| 94 * | |
| 95 * To avoid overflow of the 32-bit intermediate results in pass 2, we must | |
| 96 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis | |
| 97 * shows that the values given below are the most effective. | |
| 98 */ | |
| 99 | |
| 100 #if BITS_IN_JSAMPLE == 8 | |
| 101 #define CONST_BITS 13 | |
| 102 #define PASS1_BITS 2 | |
| 103 #else | |
| 104 #define CONST_BITS 13 | |
| 105 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | |
| 106 #endif | |
| 107 | |
| 108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus | |
| 109 * causing a lot of useless floating-point operations at run time. | |
| 110 * To get around this we use the following pre-calculated constants. | |
| 111 * If you change CONST_BITS you may want to add appropriate values. | |
| 112 * (With a reasonable C compiler, you can just rely on the FIX() macro...) | |
| 113 */ | |
| 114 | |
| 115 #if CONST_BITS == 13 | |
| 116 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ | |
| 117 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ | |
| 118 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ | |
| 119 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ | |
| 120 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ | |
| 121 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ | |
| 122 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ | |
| 123 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ | |
| 124 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ | |
| 125 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ | |
| 126 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ | |
| 127 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ | |
| 128 #else | |
| 129 #define FIX_0_298631336 FIX(0.298631336) | |
| 130 #define FIX_0_390180644 FIX(0.390180644) | |
| 131 #define FIX_0_541196100 FIX(0.541196100) | |
| 132 #define FIX_0_765366865 FIX(0.765366865) | |
| 133 #define FIX_0_899976223 FIX(0.899976223) | |
| 134 #define FIX_1_175875602 FIX(1.175875602) | |
| 135 #define FIX_1_501321110 FIX(1.501321110) | |
| 136 #define FIX_1_847759065 FIX(1.847759065) | |
| 137 #define FIX_1_961570560 FIX(1.961570560) | |
| 138 #define FIX_2_053119869 FIX(2.053119869) | |
| 139 #define FIX_2_562915447 FIX(2.562915447) | |
| 140 #define FIX_3_072711026 FIX(3.072711026) | |
| 141 #endif | |
| 142 | |
| 143 | |
| 144 /* Clamp DC value to acceptable range for bug 697186 */ | |
| 145 #define CLAMP_DC(dcval) \ | |
| 146 { \ | |
| 147 if (dcval < -1024) \ | |
| 148 dcval = -1024; \ | |
| 149 else if (dcval > 1023) \ | |
| 150 dcval = 1023; \ | |
| 151 } | |
| 152 | |
| 153 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. | |
| 154 * For 8-bit samples with the recommended scaling, all the variable | |
| 155 * and constant values involved are no more than 16 bits wide, so a | |
| 156 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. | |
| 157 * For 12-bit samples, a full 32-bit multiplication will be needed. | |
| 158 */ | |
| 159 | |
| 160 #if BITS_IN_JSAMPLE == 8 | |
| 161 #define MULTIPLY(var,const) MULTIPLY16C16(var,const) | |
| 162 #else | |
| 163 #define MULTIPLY(var,const) ((var) * (const)) | |
| 164 #endif | |
| 165 | |
| 166 | |
| 167 /* Dequantize a coefficient by multiplying it by the multiplier-table | |
| 168 * entry; produce an int result. In this module, both inputs and result | |
| 169 * are 16 bits or less, so either int or short multiply will work. | |
| 170 */ | |
| 171 | |
| 172 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) | |
| 173 | |
| 174 | |
| 175 /* | |
| 176 * Perform dequantization and inverse DCT on one block of coefficients. | |
| 177 * | |
| 178 * Optimized algorithm with 12 multiplications in the 1-D kernel. | |
| 179 * cK represents sqrt(2) * cos(K*pi/16). | |
| 180 */ | |
| 181 | |
| 182 GLOBAL(void) | |
| 183 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 184 JCOEFPTR coef_block, | |
| 185 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 186 { | |
| 187 INT32 tmp0, tmp1, tmp2, tmp3; | |
| 188 INT32 tmp10, tmp11, tmp12, tmp13; | |
| 189 INT32 z1, z2, z3; | |
| 190 JCOEFPTR inptr; | |
| 191 ISLOW_MULT_TYPE * quantptr; | |
| 192 int * wsptr; | |
| 193 JSAMPROW outptr; | |
| 194 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 195 int ctr; | |
| 196 int workspace[DCTSIZE2]; /* buffers data between passes */ | |
| 197 SHIFT_TEMPS | |
| 198 | |
| 199 /* Pass 1: process columns from input, store into work array. | |
| 200 * Note results are scaled up by sqrt(8) compared to a true IDCT; | |
| 201 * furthermore, we scale the results by 2**PASS1_BITS. | |
| 202 */ | |
| 203 | |
| 204 inptr = coef_block; | |
| 205 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 206 wsptr = workspace; | |
| 207 for (ctr = DCTSIZE; ctr > 0; ctr--) { | |
| 208 /* Due to quantization, we will usually find that many of the input | |
| 209 * coefficients are zero, especially the AC terms. We can exploit this | |
| 210 * by short-circuiting the IDCT calculation for any column in which all | |
| 211 * the AC terms are zero. In that case each output is equal to the | |
| 212 * DC coefficient (with scale factor as needed). | |
| 213 * With typical images and quantization tables, half or more of the | |
| 214 * column DCT calculations can be simplified this way. | |
| 215 */ | |
| 216 | |
| 217 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && | |
| 218 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && | |
| 219 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && | |
| 220 inptr[DCTSIZE*7] == 0) { | |
| 221 /* AC terms all zero */ | |
| 222 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 223 if (ctr == DCTSIZE) | |
| 224 CLAMP_DC(dcval); | |
| 225 dcval <<= PASS1_BITS; | |
| 226 wsptr[DCTSIZE*0] = dcval; | |
| 227 wsptr[DCTSIZE*1] = dcval; | |
| 228 wsptr[DCTSIZE*2] = dcval; | |
| 229 wsptr[DCTSIZE*3] = dcval; | |
| 230 wsptr[DCTSIZE*4] = dcval; | |
| 231 wsptr[DCTSIZE*5] = dcval; | |
| 232 wsptr[DCTSIZE*6] = dcval; | |
| 233 wsptr[DCTSIZE*7] = dcval; | |
| 234 | |
| 235 inptr++; /* advance pointers to next column */ | |
| 236 quantptr++; | |
| 237 wsptr++; | |
| 238 continue; | |
| 239 } | |
| 240 | |
| 241 /* Even part: reverse the even part of the forward DCT. | |
| 242 * The rotator is c(-6). | |
| 243 */ | |
| 244 | |
| 245 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 246 if (ctr == DCTSIZE) | |
| 247 CLAMP_DC(z2); | |
| 248 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 249 z2 <<= CONST_BITS; | |
| 250 z3 <<= CONST_BITS; | |
| 251 /* Add fudge factor here for final descale. */ | |
| 252 z2 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 253 | |
| 254 tmp0 = z2 + z3; | |
| 255 tmp1 = z2 - z3; | |
| 256 | |
| 257 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 258 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 259 | |
| 260 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 261 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 262 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 263 | |
| 264 tmp10 = tmp0 + tmp2; | |
| 265 tmp13 = tmp0 - tmp2; | |
| 266 tmp11 = tmp1 + tmp3; | |
| 267 tmp12 = tmp1 - tmp3; | |
| 268 | |
| 269 /* Odd part per figure 8; the matrix is unitary and hence its | |
| 270 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
| 271 */ | |
| 272 | |
| 273 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 274 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 275 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 276 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 277 | |
| 278 z2 = tmp0 + tmp2; | |
| 279 z3 = tmp1 + tmp3; | |
| 280 | |
| 281 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ | |
| 282 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ | |
| 283 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ | |
| 284 z2 += z1; | |
| 285 z3 += z1; | |
| 286 | |
| 287 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ | |
| 288 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ | |
| 289 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ | |
| 290 tmp0 += z1 + z2; | |
| 291 tmp3 += z1 + z3; | |
| 292 | |
| 293 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ | |
| 294 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ | |
| 295 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ | |
| 296 tmp1 += z1 + z3; | |
| 297 tmp2 += z1 + z2; | |
| 298 | |
| 299 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 300 | |
| 301 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
| 302 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
| 303 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
| 304 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
| 305 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
| 306 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
| 307 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
| 308 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
| 309 | |
| 310 inptr++; /* advance pointers to next column */ | |
| 311 quantptr++; | |
| 312 wsptr++; | |
| 313 } | |
| 314 | |
| 315 /* Pass 2: process rows from work array, store into output array. | |
| 316 * Note that we must descale the results by a factor of 8 == 2**3, | |
| 317 * and also undo the PASS1_BITS scaling. | |
| 318 */ | |
| 319 | |
| 320 wsptr = workspace; | |
| 321 for (ctr = 0; ctr < DCTSIZE; ctr++) { | |
| 322 outptr = output_buf[ctr] + output_col; | |
| 323 | |
| 324 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 325 z2 = (INT32) wsptr[0] + | |
| 326 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 327 (ONE << (PASS1_BITS+2))); | |
| 328 | |
| 329 /* Rows of zeroes can be exploited in the same way as we did with columns. | |
| 330 * However, the column calculation has created many nonzero AC terms, so | |
| 331 * the simplification applies less often (typically 5% to 10% of the time). | |
| 332 * On machines with very fast multiplication, it's possible that the | |
| 333 * test takes more time than it's worth. In that case this section | |
| 334 * may be commented out. | |
| 335 */ | |
| 336 | |
| 337 #ifndef NO_ZERO_ROW_TEST | |
| 338 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && | |
| 339 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { | |
| 340 /* AC terms all zero */ | |
| 341 JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3) | |
| 342 & RANGE_MASK]; | |
| 343 | |
| 344 outptr[0] = dcval; | |
| 345 outptr[1] = dcval; | |
| 346 outptr[2] = dcval; | |
| 347 outptr[3] = dcval; | |
| 348 outptr[4] = dcval; | |
| 349 outptr[5] = dcval; | |
| 350 outptr[6] = dcval; | |
| 351 outptr[7] = dcval; | |
| 352 | |
| 353 wsptr += DCTSIZE; /* advance pointer to next row */ | |
| 354 continue; | |
| 355 } | |
| 356 #endif | |
| 357 | |
| 358 /* Even part: reverse the even part of the forward DCT. | |
| 359 * The rotator is c(-6). | |
| 360 */ | |
| 361 | |
| 362 z3 = (INT32) wsptr[4]; | |
| 363 | |
| 364 tmp0 = (z2 + z3) << CONST_BITS; | |
| 365 tmp1 = (z2 - z3) << CONST_BITS; | |
| 366 | |
| 367 z2 = (INT32) wsptr[2]; | |
| 368 z3 = (INT32) wsptr[6]; | |
| 369 | |
| 370 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 371 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 372 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 373 | |
| 374 tmp10 = tmp0 + tmp2; | |
| 375 tmp13 = tmp0 - tmp2; | |
| 376 tmp11 = tmp1 + tmp3; | |
| 377 tmp12 = tmp1 - tmp3; | |
| 378 | |
| 379 /* Odd part per figure 8; the matrix is unitary and hence its | |
| 380 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
| 381 */ | |
| 382 | |
| 383 tmp0 = (INT32) wsptr[7]; | |
| 384 tmp1 = (INT32) wsptr[5]; | |
| 385 tmp2 = (INT32) wsptr[3]; | |
| 386 tmp3 = (INT32) wsptr[1]; | |
| 387 | |
| 388 z2 = tmp0 + tmp2; | |
| 389 z3 = tmp1 + tmp3; | |
| 390 | |
| 391 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ | |
| 392 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ | |
| 393 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ | |
| 394 z2 += z1; | |
| 395 z3 += z1; | |
| 396 | |
| 397 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ | |
| 398 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ | |
| 399 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ | |
| 400 tmp0 += z1 + z2; | |
| 401 tmp3 += z1 + z3; | |
| 402 | |
| 403 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ | |
| 404 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ | |
| 405 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ | |
| 406 tmp1 += z1 + z3; | |
| 407 tmp2 += z1 + z2; | |
| 408 | |
| 409 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 410 | |
| 411 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, | |
| 412 CONST_BITS+PASS1_BITS+3) | |
| 413 & RANGE_MASK]; | |
| 414 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, | |
| 415 CONST_BITS+PASS1_BITS+3) | |
| 416 & RANGE_MASK]; | |
| 417 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, | |
| 418 CONST_BITS+PASS1_BITS+3) | |
| 419 & RANGE_MASK]; | |
| 420 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, | |
| 421 CONST_BITS+PASS1_BITS+3) | |
| 422 & RANGE_MASK]; | |
| 423 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, | |
| 424 CONST_BITS+PASS1_BITS+3) | |
| 425 & RANGE_MASK]; | |
| 426 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, | |
| 427 CONST_BITS+PASS1_BITS+3) | |
| 428 & RANGE_MASK]; | |
| 429 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, | |
| 430 CONST_BITS+PASS1_BITS+3) | |
| 431 & RANGE_MASK]; | |
| 432 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, | |
| 433 CONST_BITS+PASS1_BITS+3) | |
| 434 & RANGE_MASK]; | |
| 435 | |
| 436 wsptr += DCTSIZE; /* advance pointer to next row */ | |
| 437 } | |
| 438 } | |
| 439 | |
| 440 #ifdef IDCT_SCALING_SUPPORTED | |
| 441 | |
| 442 | |
| 443 /* | |
| 444 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 445 * producing a reduced-size 7x7 output block. | |
| 446 * | |
| 447 * Optimized algorithm with 12 multiplications in the 1-D kernel. | |
| 448 * cK represents sqrt(2) * cos(K*pi/14). | |
| 449 */ | |
| 450 | |
| 451 GLOBAL(void) | |
| 452 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 453 JCOEFPTR coef_block, | |
| 454 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 455 { | |
| 456 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; | |
| 457 INT32 z1, z2, z3; | |
| 458 JCOEFPTR inptr; | |
| 459 ISLOW_MULT_TYPE * quantptr; | |
| 460 int * wsptr; | |
| 461 JSAMPROW outptr; | |
| 462 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 463 int ctr; | |
| 464 int workspace[7*7]; /* buffers data between passes */ | |
| 465 SHIFT_TEMPS | |
| 466 | |
| 467 /* Pass 1: process columns from input, store into work array. */ | |
| 468 | |
| 469 inptr = coef_block; | |
| 470 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 471 wsptr = workspace; | |
| 472 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { | |
| 473 /* Even part */ | |
| 474 | |
| 475 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 476 if (ctr == 0) | |
| 477 CLAMP_DC(tmp13); | |
| 478 tmp13 <<= CONST_BITS; | |
| 479 /* Add fudge factor here for final descale. */ | |
| 480 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 481 | |
| 482 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 483 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 484 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 485 | |
| 486 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ | |
| 487 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ | |
| 488 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ | |
| 489 tmp0 = z1 + z3; | |
| 490 z2 -= tmp0; | |
| 491 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ | |
| 492 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ | |
| 493 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ | |
| 494 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ | |
| 495 | |
| 496 /* Odd part */ | |
| 497 | |
| 498 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 499 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 500 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 501 | |
| 502 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ | |
| 503 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ | |
| 504 tmp0 = tmp1 - tmp2; | |
| 505 tmp1 += tmp2; | |
| 506 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ | |
| 507 tmp1 += tmp2; | |
| 508 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ | |
| 509 tmp0 += z2; | |
| 510 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ | |
| 511 | |
| 512 /* Final output stage */ | |
| 513 | |
| 514 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); | |
| 515 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); | |
| 516 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); | |
| 517 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); | |
| 518 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); | |
| 519 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); | |
| 520 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); | |
| 521 } | |
| 522 | |
| 523 /* Pass 2: process 7 rows from work array, store into output array. */ | |
| 524 | |
| 525 wsptr = workspace; | |
| 526 for (ctr = 0; ctr < 7; ctr++) { | |
| 527 outptr = output_buf[ctr] + output_col; | |
| 528 | |
| 529 /* Even part */ | |
| 530 | |
| 531 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 532 tmp13 = (INT32) wsptr[0] + | |
| 533 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 534 (ONE << (PASS1_BITS+2))); | |
| 535 tmp13 <<= CONST_BITS; | |
| 536 | |
| 537 z1 = (INT32) wsptr[2]; | |
| 538 z2 = (INT32) wsptr[4]; | |
| 539 z3 = (INT32) wsptr[6]; | |
| 540 | |
| 541 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ | |
| 542 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ | |
| 543 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ | |
| 544 tmp0 = z1 + z3; | |
| 545 z2 -= tmp0; | |
| 546 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ | |
| 547 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ | |
| 548 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ | |
| 549 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ | |
| 550 | |
| 551 /* Odd part */ | |
| 552 | |
| 553 z1 = (INT32) wsptr[1]; | |
| 554 z2 = (INT32) wsptr[3]; | |
| 555 z3 = (INT32) wsptr[5]; | |
| 556 | |
| 557 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ | |
| 558 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ | |
| 559 tmp0 = tmp1 - tmp2; | |
| 560 tmp1 += tmp2; | |
| 561 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ | |
| 562 tmp1 += tmp2; | |
| 563 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ | |
| 564 tmp0 += z2; | |
| 565 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ | |
| 566 | |
| 567 /* Final output stage */ | |
| 568 | |
| 569 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 570 CONST_BITS+PASS1_BITS+3) | |
| 571 & RANGE_MASK]; | |
| 572 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 573 CONST_BITS+PASS1_BITS+3) | |
| 574 & RANGE_MASK]; | |
| 575 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, | |
| 576 CONST_BITS+PASS1_BITS+3) | |
| 577 & RANGE_MASK]; | |
| 578 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, | |
| 579 CONST_BITS+PASS1_BITS+3) | |
| 580 & RANGE_MASK]; | |
| 581 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, | |
| 582 CONST_BITS+PASS1_BITS+3) | |
| 583 & RANGE_MASK]; | |
| 584 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, | |
| 585 CONST_BITS+PASS1_BITS+3) | |
| 586 & RANGE_MASK]; | |
| 587 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, | |
| 588 CONST_BITS+PASS1_BITS+3) | |
| 589 & RANGE_MASK]; | |
| 590 | |
| 591 wsptr += 7; /* advance pointer to next row */ | |
| 592 } | |
| 593 } | |
| 594 | |
| 595 | |
| 596 /* | |
| 597 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 598 * producing a reduced-size 6x6 output block. | |
| 599 * | |
| 600 * Optimized algorithm with 3 multiplications in the 1-D kernel. | |
| 601 * cK represents sqrt(2) * cos(K*pi/12). | |
| 602 */ | |
| 603 | |
| 604 GLOBAL(void) | |
| 605 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 606 JCOEFPTR coef_block, | |
| 607 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 608 { | |
| 609 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; | |
| 610 INT32 z1, z2, z3; | |
| 611 JCOEFPTR inptr; | |
| 612 ISLOW_MULT_TYPE * quantptr; | |
| 613 int * wsptr; | |
| 614 JSAMPROW outptr; | |
| 615 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 616 int ctr; | |
| 617 int workspace[6*6]; /* buffers data between passes */ | |
| 618 SHIFT_TEMPS | |
| 619 | |
| 620 /* Pass 1: process columns from input, store into work array. */ | |
| 621 | |
| 622 inptr = coef_block; | |
| 623 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 624 wsptr = workspace; | |
| 625 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { | |
| 626 /* Even part */ | |
| 627 | |
| 628 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 629 if (ctr == 0) | |
| 630 CLAMP_DC(tmp0); | |
| 631 tmp0 <<= CONST_BITS; | |
| 632 /* Add fudge factor here for final descale. */ | |
| 633 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 634 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 635 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ | |
| 636 tmp1 = tmp0 + tmp10; | |
| 637 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); | |
| 638 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 639 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ | |
| 640 tmp10 = tmp1 + tmp0; | |
| 641 tmp12 = tmp1 - tmp0; | |
| 642 | |
| 643 /* Odd part */ | |
| 644 | |
| 645 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 646 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 647 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 648 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ | |
| 649 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); | |
| 650 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); | |
| 651 tmp1 = (z1 - z2 - z3) << PASS1_BITS; | |
| 652 | |
| 653 /* Final output stage */ | |
| 654 | |
| 655 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); | |
| 656 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); | |
| 657 wsptr[6*1] = (int) (tmp11 + tmp1); | |
| 658 wsptr[6*4] = (int) (tmp11 - tmp1); | |
| 659 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); | |
| 660 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); | |
| 661 } | |
| 662 | |
| 663 /* Pass 2: process 6 rows from work array, store into output array. */ | |
| 664 | |
| 665 wsptr = workspace; | |
| 666 for (ctr = 0; ctr < 6; ctr++) { | |
| 667 outptr = output_buf[ctr] + output_col; | |
| 668 | |
| 669 /* Even part */ | |
| 670 | |
| 671 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 672 tmp0 = (INT32) wsptr[0] + | |
| 673 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 674 (ONE << (PASS1_BITS+2))); | |
| 675 tmp0 <<= CONST_BITS; | |
| 676 tmp2 = (INT32) wsptr[4]; | |
| 677 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ | |
| 678 tmp1 = tmp0 + tmp10; | |
| 679 tmp11 = tmp0 - tmp10 - tmp10; | |
| 680 tmp10 = (INT32) wsptr[2]; | |
| 681 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ | |
| 682 tmp10 = tmp1 + tmp0; | |
| 683 tmp12 = tmp1 - tmp0; | |
| 684 | |
| 685 /* Odd part */ | |
| 686 | |
| 687 z1 = (INT32) wsptr[1]; | |
| 688 z2 = (INT32) wsptr[3]; | |
| 689 z3 = (INT32) wsptr[5]; | |
| 690 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ | |
| 691 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); | |
| 692 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); | |
| 693 tmp1 = (z1 - z2 - z3) << CONST_BITS; | |
| 694 | |
| 695 /* Final output stage */ | |
| 696 | |
| 697 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 698 CONST_BITS+PASS1_BITS+3) | |
| 699 & RANGE_MASK]; | |
| 700 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 701 CONST_BITS+PASS1_BITS+3) | |
| 702 & RANGE_MASK]; | |
| 703 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, | |
| 704 CONST_BITS+PASS1_BITS+3) | |
| 705 & RANGE_MASK]; | |
| 706 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, | |
| 707 CONST_BITS+PASS1_BITS+3) | |
| 708 & RANGE_MASK]; | |
| 709 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, | |
| 710 CONST_BITS+PASS1_BITS+3) | |
| 711 & RANGE_MASK]; | |
| 712 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, | |
| 713 CONST_BITS+PASS1_BITS+3) | |
| 714 & RANGE_MASK]; | |
| 715 | |
| 716 wsptr += 6; /* advance pointer to next row */ | |
| 717 } | |
| 718 } | |
| 719 | |
| 720 | |
| 721 /* | |
| 722 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 723 * producing a reduced-size 5x5 output block. | |
| 724 * | |
| 725 * Optimized algorithm with 5 multiplications in the 1-D kernel. | |
| 726 * cK represents sqrt(2) * cos(K*pi/10). | |
| 727 */ | |
| 728 | |
| 729 GLOBAL(void) | |
| 730 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 731 JCOEFPTR coef_block, | |
| 732 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 733 { | |
| 734 INT32 tmp0, tmp1, tmp10, tmp11, tmp12; | |
| 735 INT32 z1, z2, z3; | |
| 736 JCOEFPTR inptr; | |
| 737 ISLOW_MULT_TYPE * quantptr; | |
| 738 int * wsptr; | |
| 739 JSAMPROW outptr; | |
| 740 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 741 int ctr; | |
| 742 int workspace[5*5]; /* buffers data between passes */ | |
| 743 SHIFT_TEMPS | |
| 744 | |
| 745 /* Pass 1: process columns from input, store into work array. */ | |
| 746 | |
| 747 inptr = coef_block; | |
| 748 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 749 wsptr = workspace; | |
| 750 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { | |
| 751 /* Even part */ | |
| 752 | |
| 753 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 754 if (ctr == 0) | |
| 755 CLAMP_DC(tmp12); | |
| 756 tmp12 <<= CONST_BITS; | |
| 757 /* Add fudge factor here for final descale. */ | |
| 758 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 759 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 760 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 761 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ | |
| 762 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ | |
| 763 z3 = tmp12 + z2; | |
| 764 tmp10 = z3 + z1; | |
| 765 tmp11 = z3 - z1; | |
| 766 tmp12 -= z2 << 2; | |
| 767 | |
| 768 /* Odd part */ | |
| 769 | |
| 770 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 771 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 772 | |
| 773 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ | |
| 774 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ | |
| 775 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ | |
| 776 | |
| 777 /* Final output stage */ | |
| 778 | |
| 779 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); | |
| 780 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); | |
| 781 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); | |
| 782 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); | |
| 783 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); | |
| 784 } | |
| 785 | |
| 786 /* Pass 2: process 5 rows from work array, store into output array. */ | |
| 787 | |
| 788 wsptr = workspace; | |
| 789 for (ctr = 0; ctr < 5; ctr++) { | |
| 790 outptr = output_buf[ctr] + output_col; | |
| 791 | |
| 792 /* Even part */ | |
| 793 | |
| 794 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 795 tmp12 = (INT32) wsptr[0] + | |
| 796 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 797 (ONE << (PASS1_BITS+2))); | |
| 798 tmp12 <<= CONST_BITS; | |
| 799 tmp0 = (INT32) wsptr[2]; | |
| 800 tmp1 = (INT32) wsptr[4]; | |
| 801 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ | |
| 802 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ | |
| 803 z3 = tmp12 + z2; | |
| 804 tmp10 = z3 + z1; | |
| 805 tmp11 = z3 - z1; | |
| 806 tmp12 -= z2 << 2; | |
| 807 | |
| 808 /* Odd part */ | |
| 809 | |
| 810 z2 = (INT32) wsptr[1]; | |
| 811 z3 = (INT32) wsptr[3]; | |
| 812 | |
| 813 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ | |
| 814 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ | |
| 815 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ | |
| 816 | |
| 817 /* Final output stage */ | |
| 818 | |
| 819 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 820 CONST_BITS+PASS1_BITS+3) | |
| 821 & RANGE_MASK]; | |
| 822 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 823 CONST_BITS+PASS1_BITS+3) | |
| 824 & RANGE_MASK]; | |
| 825 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, | |
| 826 CONST_BITS+PASS1_BITS+3) | |
| 827 & RANGE_MASK]; | |
| 828 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, | |
| 829 CONST_BITS+PASS1_BITS+3) | |
| 830 & RANGE_MASK]; | |
| 831 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, | |
| 832 CONST_BITS+PASS1_BITS+3) | |
| 833 & RANGE_MASK]; | |
| 834 | |
| 835 wsptr += 5; /* advance pointer to next row */ | |
| 836 } | |
| 837 } | |
| 838 | |
| 839 | |
| 840 /* | |
| 841 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 842 * producing a reduced-size 4x4 output block. | |
| 843 * | |
| 844 * Optimized algorithm with 3 multiplications in the 1-D kernel. | |
| 845 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. | |
| 846 */ | |
| 847 | |
| 848 GLOBAL(void) | |
| 849 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 850 JCOEFPTR coef_block, | |
| 851 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 852 { | |
| 853 INT32 tmp0, tmp2, tmp10, tmp12; | |
| 854 INT32 z1, z2, z3; | |
| 855 JCOEFPTR inptr; | |
| 856 ISLOW_MULT_TYPE * quantptr; | |
| 857 int * wsptr; | |
| 858 JSAMPROW outptr; | |
| 859 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 860 int ctr; | |
| 861 int workspace[4*4]; /* buffers data between passes */ | |
| 862 SHIFT_TEMPS | |
| 863 | |
| 864 /* Pass 1: process columns from input, store into work array. */ | |
| 865 | |
| 866 inptr = coef_block; | |
| 867 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 868 wsptr = workspace; | |
| 869 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { | |
| 870 /* Even part */ | |
| 871 | |
| 872 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 873 if (ctr == 0) | |
| 874 CLAMP_DC(tmp0); | |
| 875 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 876 | |
| 877 tmp10 = (tmp0 + tmp2) << PASS1_BITS; | |
| 878 tmp12 = (tmp0 - tmp2) << PASS1_BITS; | |
| 879 | |
| 880 /* Odd part */ | |
| 881 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ | |
| 882 | |
| 883 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 884 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 885 | |
| 886 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 887 /* Add fudge factor here for final descale. */ | |
| 888 z1 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 889 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ | |
| 890 CONST_BITS-PASS1_BITS); | |
| 891 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ | |
| 892 CONST_BITS-PASS1_BITS); | |
| 893 | |
| 894 /* Final output stage */ | |
| 895 | |
| 896 wsptr[4*0] = (int) (tmp10 + tmp0); | |
| 897 wsptr[4*3] = (int) (tmp10 - tmp0); | |
| 898 wsptr[4*1] = (int) (tmp12 + tmp2); | |
| 899 wsptr[4*2] = (int) (tmp12 - tmp2); | |
| 900 } | |
| 901 | |
| 902 /* Pass 2: process 4 rows from work array, store into output array. */ | |
| 903 | |
| 904 wsptr = workspace; | |
| 905 for (ctr = 0; ctr < 4; ctr++) { | |
| 906 outptr = output_buf[ctr] + output_col; | |
| 907 | |
| 908 /* Even part */ | |
| 909 | |
| 910 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 911 tmp0 = (INT32) wsptr[0] + | |
| 912 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 913 (ONE << (PASS1_BITS+2))); | |
| 914 tmp2 = (INT32) wsptr[2]; | |
| 915 | |
| 916 tmp10 = (tmp0 + tmp2) << CONST_BITS; | |
| 917 tmp12 = (tmp0 - tmp2) << CONST_BITS; | |
| 918 | |
| 919 /* Odd part */ | |
| 920 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ | |
| 921 | |
| 922 z2 = (INT32) wsptr[1]; | |
| 923 z3 = (INT32) wsptr[3]; | |
| 924 | |
| 925 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 926 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 927 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 928 | |
| 929 /* Final output stage */ | |
| 930 | |
| 931 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 932 CONST_BITS+PASS1_BITS+3) | |
| 933 & RANGE_MASK]; | |
| 934 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 935 CONST_BITS+PASS1_BITS+3) | |
| 936 & RANGE_MASK]; | |
| 937 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, | |
| 938 CONST_BITS+PASS1_BITS+3) | |
| 939 & RANGE_MASK]; | |
| 940 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, | |
| 941 CONST_BITS+PASS1_BITS+3) | |
| 942 & RANGE_MASK]; | |
| 943 | |
| 944 wsptr += 4; /* advance pointer to next row */ | |
| 945 } | |
| 946 } | |
| 947 | |
| 948 | |
| 949 /* | |
| 950 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 951 * producing a reduced-size 3x3 output block. | |
| 952 * | |
| 953 * Optimized algorithm with 2 multiplications in the 1-D kernel. | |
| 954 * cK represents sqrt(2) * cos(K*pi/6). | |
| 955 */ | |
| 956 | |
| 957 GLOBAL(void) | |
| 958 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 959 JCOEFPTR coef_block, | |
| 960 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 961 { | |
| 962 INT32 tmp0, tmp2, tmp10, tmp12; | |
| 963 JCOEFPTR inptr; | |
| 964 ISLOW_MULT_TYPE * quantptr; | |
| 965 int * wsptr; | |
| 966 JSAMPROW outptr; | |
| 967 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 968 int ctr; | |
| 969 int workspace[3*3]; /* buffers data between passes */ | |
| 970 SHIFT_TEMPS | |
| 971 | |
| 972 /* Pass 1: process columns from input, store into work array. */ | |
| 973 | |
| 974 inptr = coef_block; | |
| 975 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 976 wsptr = workspace; | |
| 977 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { | |
| 978 /* Even part */ | |
| 979 | |
| 980 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 981 if (ctr == 0) | |
| 982 CLAMP_DC(tmp0); | |
| 983 tmp0 <<= CONST_BITS; | |
| 984 /* Add fudge factor here for final descale. */ | |
| 985 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 986 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 987 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ | |
| 988 tmp10 = tmp0 + tmp12; | |
| 989 tmp2 = tmp0 - tmp12 - tmp12; | |
| 990 | |
| 991 /* Odd part */ | |
| 992 | |
| 993 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 994 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ | |
| 995 | |
| 996 /* Final output stage */ | |
| 997 | |
| 998 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); | |
| 999 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); | |
| 1000 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); | |
| 1001 } | |
| 1002 | |
| 1003 /* Pass 2: process 3 rows from work array, store into output array. */ | |
| 1004 | |
| 1005 wsptr = workspace; | |
| 1006 for (ctr = 0; ctr < 3; ctr++) { | |
| 1007 outptr = output_buf[ctr] + output_col; | |
| 1008 | |
| 1009 /* Even part */ | |
| 1010 | |
| 1011 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 1012 tmp0 = (INT32) wsptr[0] + | |
| 1013 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 1014 (ONE << (PASS1_BITS+2))); | |
| 1015 tmp0 <<= CONST_BITS; | |
| 1016 tmp2 = (INT32) wsptr[2]; | |
| 1017 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ | |
| 1018 tmp10 = tmp0 + tmp12; | |
| 1019 tmp2 = tmp0 - tmp12 - tmp12; | |
| 1020 | |
| 1021 /* Odd part */ | |
| 1022 | |
| 1023 tmp12 = (INT32) wsptr[1]; | |
| 1024 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ | |
| 1025 | |
| 1026 /* Final output stage */ | |
| 1027 | |
| 1028 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 1029 CONST_BITS+PASS1_BITS+3) | |
| 1030 & RANGE_MASK]; | |
| 1031 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 1032 CONST_BITS+PASS1_BITS+3) | |
| 1033 & RANGE_MASK]; | |
| 1034 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, | |
| 1035 CONST_BITS+PASS1_BITS+3) | |
| 1036 & RANGE_MASK]; | |
| 1037 | |
| 1038 wsptr += 3; /* advance pointer to next row */ | |
| 1039 } | |
| 1040 } | |
| 1041 | |
| 1042 | |
| 1043 /* | |
| 1044 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 1045 * producing a reduced-size 2x2 output block. | |
| 1046 * | |
| 1047 * Multiplication-less algorithm. | |
| 1048 */ | |
| 1049 | |
| 1050 GLOBAL(void) | |
| 1051 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 1052 JCOEFPTR coef_block, | |
| 1053 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 1054 { | |
| 1055 DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; | |
| 1056 ISLOW_MULT_TYPE * quantptr; | |
| 1057 JSAMPROW outptr; | |
| 1058 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 1059 ISHIFT_TEMPS | |
| 1060 | |
| 1061 /* Pass 1: process columns from input. */ | |
| 1062 | |
| 1063 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 1064 | |
| 1065 /* Column 0 */ | |
| 1066 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 1067 CLAMP_DC(tmp4); | |
| 1068 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 1069 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 1070 tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); | |
| 1071 | |
| 1072 tmp0 = tmp4 + tmp5; | |
| 1073 tmp2 = tmp4 - tmp5; | |
| 1074 | |
| 1075 /* Column 1 */ | |
| 1076 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]); | |
| 1077 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]); | |
| 1078 | |
| 1079 tmp1 = tmp4 + tmp5; | |
| 1080 tmp3 = tmp4 - tmp5; | |
| 1081 | |
| 1082 /* Pass 2: process 2 rows, store into output array. */ | |
| 1083 | |
| 1084 /* Row 0 */ | |
| 1085 outptr = output_buf[0] + output_col; | |
| 1086 | |
| 1087 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; | |
| 1088 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; | |
| 1089 | |
| 1090 /* Row 1 */ | |
| 1091 outptr = output_buf[1] + output_col; | |
| 1092 | |
| 1093 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; | |
| 1094 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; | |
| 1095 } | |
| 1096 | |
| 1097 | |
| 1098 /* | |
| 1099 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 1100 * producing a reduced-size 1x1 output block. | |
| 1101 * | |
| 1102 * We hardly need an inverse DCT routine for this: just take the | |
| 1103 * average pixel value, which is one-eighth of the DC coefficient. | |
| 1104 */ | |
| 1105 | |
| 1106 GLOBAL(void) | |
| 1107 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 1108 JCOEFPTR coef_block, | |
| 1109 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 1110 { | |
| 1111 DCTELEM dcval; | |
| 1112 ISLOW_MULT_TYPE * quantptr; | |
| 1113 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 1114 ISHIFT_TEMPS | |
| 1115 | |
| 1116 /* 1x1 is trivial: just take the DC coefficient divided by 8. */ | |
| 1117 | |
| 1118 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 1119 | |
| 1120 dcval = DEQUANTIZE(coef_block[0], quantptr[0]); | |
| 1121 CLAMP_DC(dcval); | |
| 1122 /* Add range center and fudge factor for descale and range-limit. */ | |
| 1123 dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); | |
| 1124 | |
| 1125 output_buf[0][output_col] = | |
| 1126 range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK]; | |
| 1127 } | |
| 1128 | |
| 1129 | |
| 1130 /* | |
| 1131 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 1132 * producing a 9x9 output block. | |
| 1133 * | |
| 1134 * Optimized algorithm with 10 multiplications in the 1-D kernel. | |
| 1135 * cK represents sqrt(2) * cos(K*pi/18). | |
| 1136 */ | |
| 1137 | |
| 1138 GLOBAL(void) | |
| 1139 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 1140 JCOEFPTR coef_block, | |
| 1141 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 1142 { | |
| 1143 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; | |
| 1144 INT32 z1, z2, z3, z4; | |
| 1145 JCOEFPTR inptr; | |
| 1146 ISLOW_MULT_TYPE * quantptr; | |
| 1147 int * wsptr; | |
| 1148 JSAMPROW outptr; | |
| 1149 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 1150 int ctr; | |
| 1151 int workspace[8*9]; /* buffers data between passes */ | |
| 1152 SHIFT_TEMPS | |
| 1153 | |
| 1154 /* Pass 1: process columns from input, store into work array. */ | |
| 1155 | |
| 1156 inptr = coef_block; | |
| 1157 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 1158 wsptr = workspace; | |
| 1159 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 1160 /* Even part */ | |
| 1161 | |
| 1162 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 1163 if (ctr == 0) | |
| 1164 CLAMP_DC(tmp0); | |
| 1165 tmp0 <<= CONST_BITS; | |
| 1166 /* Add fudge factor here for final descale. */ | |
| 1167 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 1168 | |
| 1169 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 1170 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 1171 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 1172 | |
| 1173 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ | |
| 1174 tmp1 = tmp0 + tmp3; | |
| 1175 tmp2 = tmp0 - tmp3 - tmp3; | |
| 1176 | |
| 1177 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ | |
| 1178 tmp11 = tmp2 + tmp0; | |
| 1179 tmp14 = tmp2 - tmp0 - tmp0; | |
| 1180 | |
| 1181 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ | |
| 1182 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ | |
| 1183 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ | |
| 1184 | |
| 1185 tmp10 = tmp1 + tmp0 - tmp3; | |
| 1186 tmp12 = tmp1 - tmp0 + tmp2; | |
| 1187 tmp13 = tmp1 - tmp2 + tmp3; | |
| 1188 | |
| 1189 /* Odd part */ | |
| 1190 | |
| 1191 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 1192 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 1193 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 1194 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 1195 | |
| 1196 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ | |
| 1197 | |
| 1198 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ | |
| 1199 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ | |
| 1200 tmp0 = tmp2 + tmp3 - z2; | |
| 1201 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ | |
| 1202 tmp2 += z2 - tmp1; | |
| 1203 tmp3 += z2 + tmp1; | |
| 1204 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ | |
| 1205 | |
| 1206 /* Final output stage */ | |
| 1207 | |
| 1208 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); | |
| 1209 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); | |
| 1210 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); | |
| 1211 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); | |
| 1212 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); | |
| 1213 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); | |
| 1214 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); | |
| 1215 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); | |
| 1216 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); | |
| 1217 } | |
| 1218 | |
| 1219 /* Pass 2: process 9 rows from work array, store into output array. */ | |
| 1220 | |
| 1221 wsptr = workspace; | |
| 1222 for (ctr = 0; ctr < 9; ctr++) { | |
| 1223 outptr = output_buf[ctr] + output_col; | |
| 1224 | |
| 1225 /* Even part */ | |
| 1226 | |
| 1227 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 1228 tmp0 = (INT32) wsptr[0] + | |
| 1229 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 1230 (ONE << (PASS1_BITS+2))); | |
| 1231 tmp0 <<= CONST_BITS; | |
| 1232 | |
| 1233 z1 = (INT32) wsptr[2]; | |
| 1234 z2 = (INT32) wsptr[4]; | |
| 1235 z3 = (INT32) wsptr[6]; | |
| 1236 | |
| 1237 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ | |
| 1238 tmp1 = tmp0 + tmp3; | |
| 1239 tmp2 = tmp0 - tmp3 - tmp3; | |
| 1240 | |
| 1241 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ | |
| 1242 tmp11 = tmp2 + tmp0; | |
| 1243 tmp14 = tmp2 - tmp0 - tmp0; | |
| 1244 | |
| 1245 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ | |
| 1246 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ | |
| 1247 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ | |
| 1248 | |
| 1249 tmp10 = tmp1 + tmp0 - tmp3; | |
| 1250 tmp12 = tmp1 - tmp0 + tmp2; | |
| 1251 tmp13 = tmp1 - tmp2 + tmp3; | |
| 1252 | |
| 1253 /* Odd part */ | |
| 1254 | |
| 1255 z1 = (INT32) wsptr[1]; | |
| 1256 z2 = (INT32) wsptr[3]; | |
| 1257 z3 = (INT32) wsptr[5]; | |
| 1258 z4 = (INT32) wsptr[7]; | |
| 1259 | |
| 1260 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ | |
| 1261 | |
| 1262 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ | |
| 1263 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ | |
| 1264 tmp0 = tmp2 + tmp3 - z2; | |
| 1265 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ | |
| 1266 tmp2 += z2 - tmp1; | |
| 1267 tmp3 += z2 + tmp1; | |
| 1268 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ | |
| 1269 | |
| 1270 /* Final output stage */ | |
| 1271 | |
| 1272 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 1273 CONST_BITS+PASS1_BITS+3) | |
| 1274 & RANGE_MASK]; | |
| 1275 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 1276 CONST_BITS+PASS1_BITS+3) | |
| 1277 & RANGE_MASK]; | |
| 1278 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, | |
| 1279 CONST_BITS+PASS1_BITS+3) | |
| 1280 & RANGE_MASK]; | |
| 1281 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, | |
| 1282 CONST_BITS+PASS1_BITS+3) | |
| 1283 & RANGE_MASK]; | |
| 1284 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, | |
| 1285 CONST_BITS+PASS1_BITS+3) | |
| 1286 & RANGE_MASK]; | |
| 1287 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, | |
| 1288 CONST_BITS+PASS1_BITS+3) | |
| 1289 & RANGE_MASK]; | |
| 1290 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, | |
| 1291 CONST_BITS+PASS1_BITS+3) | |
| 1292 & RANGE_MASK]; | |
| 1293 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, | |
| 1294 CONST_BITS+PASS1_BITS+3) | |
| 1295 & RANGE_MASK]; | |
| 1296 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, | |
| 1297 CONST_BITS+PASS1_BITS+3) | |
| 1298 & RANGE_MASK]; | |
| 1299 | |
| 1300 wsptr += 8; /* advance pointer to next row */ | |
| 1301 } | |
| 1302 } | |
| 1303 | |
| 1304 | |
| 1305 /* | |
| 1306 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 1307 * producing a 10x10 output block. | |
| 1308 * | |
| 1309 * Optimized algorithm with 12 multiplications in the 1-D kernel. | |
| 1310 * cK represents sqrt(2) * cos(K*pi/20). | |
| 1311 */ | |
| 1312 | |
| 1313 GLOBAL(void) | |
| 1314 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 1315 JCOEFPTR coef_block, | |
| 1316 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 1317 { | |
| 1318 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; | |
| 1319 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; | |
| 1320 INT32 z1, z2, z3, z4, z5; | |
| 1321 JCOEFPTR inptr; | |
| 1322 ISLOW_MULT_TYPE * quantptr; | |
| 1323 int * wsptr; | |
| 1324 JSAMPROW outptr; | |
| 1325 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 1326 int ctr; | |
| 1327 int workspace[8*10]; /* buffers data between passes */ | |
| 1328 SHIFT_TEMPS | |
| 1329 | |
| 1330 /* Pass 1: process columns from input, store into work array. */ | |
| 1331 | |
| 1332 inptr = coef_block; | |
| 1333 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 1334 wsptr = workspace; | |
| 1335 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 1336 /* Even part */ | |
| 1337 | |
| 1338 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 1339 if (ctr == 0) | |
| 1340 CLAMP_DC(z3); | |
| 1341 z3 <<= CONST_BITS; | |
| 1342 /* Add fudge factor here for final descale. */ | |
| 1343 z3 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 1344 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 1345 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ | |
| 1346 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ | |
| 1347 tmp10 = z3 + z1; | |
| 1348 tmp11 = z3 - z2; | |
| 1349 | |
| 1350 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ | |
| 1351 CONST_BITS-PASS1_BITS); | |
| 1352 | |
| 1353 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 1354 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 1355 | |
| 1356 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ | |
| 1357 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ | |
| 1358 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ | |
| 1359 | |
| 1360 tmp20 = tmp10 + tmp12; | |
| 1361 tmp24 = tmp10 - tmp12; | |
| 1362 tmp21 = tmp11 + tmp13; | |
| 1363 tmp23 = tmp11 - tmp13; | |
| 1364 | |
| 1365 /* Odd part */ | |
| 1366 | |
| 1367 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 1368 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 1369 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 1370 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 1371 | |
| 1372 tmp11 = z2 + z4; | |
| 1373 tmp13 = z2 - z4; | |
| 1374 | |
| 1375 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ | |
| 1376 z5 = z3 << CONST_BITS; | |
| 1377 | |
| 1378 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ | |
| 1379 z4 = z5 + tmp12; | |
| 1380 | |
| 1381 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ | |
| 1382 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ | |
| 1383 | |
| 1384 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ | |
| 1385 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); | |
| 1386 | |
| 1387 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; | |
| 1388 | |
| 1389 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ | |
| 1390 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ | |
| 1391 | |
| 1392 /* Final output stage */ | |
| 1393 | |
| 1394 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 1395 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 1396 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 1397 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 1398 wsptr[8*2] = (int) (tmp22 + tmp12); | |
| 1399 wsptr[8*7] = (int) (tmp22 - tmp12); | |
| 1400 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); | |
| 1401 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); | |
| 1402 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 1403 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 1404 } | |
| 1405 | |
| 1406 /* Pass 2: process 10 rows from work array, store into output array. */ | |
| 1407 | |
| 1408 wsptr = workspace; | |
| 1409 for (ctr = 0; ctr < 10; ctr++) { | |
| 1410 outptr = output_buf[ctr] + output_col; | |
| 1411 | |
| 1412 /* Even part */ | |
| 1413 | |
| 1414 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 1415 z3 = (INT32) wsptr[0] + | |
| 1416 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 1417 (ONE << (PASS1_BITS+2))); | |
| 1418 z3 <<= CONST_BITS; | |
| 1419 z4 = (INT32) wsptr[4]; | |
| 1420 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ | |
| 1421 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ | |
| 1422 tmp10 = z3 + z1; | |
| 1423 tmp11 = z3 - z2; | |
| 1424 | |
| 1425 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ | |
| 1426 | |
| 1427 z2 = (INT32) wsptr[2]; | |
| 1428 z3 = (INT32) wsptr[6]; | |
| 1429 | |
| 1430 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ | |
| 1431 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ | |
| 1432 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ | |
| 1433 | |
| 1434 tmp20 = tmp10 + tmp12; | |
| 1435 tmp24 = tmp10 - tmp12; | |
| 1436 tmp21 = tmp11 + tmp13; | |
| 1437 tmp23 = tmp11 - tmp13; | |
| 1438 | |
| 1439 /* Odd part */ | |
| 1440 | |
| 1441 z1 = (INT32) wsptr[1]; | |
| 1442 z2 = (INT32) wsptr[3]; | |
| 1443 z3 = (INT32) wsptr[5]; | |
| 1444 z3 <<= CONST_BITS; | |
| 1445 z4 = (INT32) wsptr[7]; | |
| 1446 | |
| 1447 tmp11 = z2 + z4; | |
| 1448 tmp13 = z2 - z4; | |
| 1449 | |
| 1450 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ | |
| 1451 | |
| 1452 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ | |
| 1453 z4 = z3 + tmp12; | |
| 1454 | |
| 1455 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ | |
| 1456 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ | |
| 1457 | |
| 1458 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ | |
| 1459 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); | |
| 1460 | |
| 1461 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; | |
| 1462 | |
| 1463 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ | |
| 1464 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ | |
| 1465 | |
| 1466 /* Final output stage */ | |
| 1467 | |
| 1468 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 1469 CONST_BITS+PASS1_BITS+3) | |
| 1470 & RANGE_MASK]; | |
| 1471 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 1472 CONST_BITS+PASS1_BITS+3) | |
| 1473 & RANGE_MASK]; | |
| 1474 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 1475 CONST_BITS+PASS1_BITS+3) | |
| 1476 & RANGE_MASK]; | |
| 1477 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 1478 CONST_BITS+PASS1_BITS+3) | |
| 1479 & RANGE_MASK]; | |
| 1480 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 1481 CONST_BITS+PASS1_BITS+3) | |
| 1482 & RANGE_MASK]; | |
| 1483 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 1484 CONST_BITS+PASS1_BITS+3) | |
| 1485 & RANGE_MASK]; | |
| 1486 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 1487 CONST_BITS+PASS1_BITS+3) | |
| 1488 & RANGE_MASK]; | |
| 1489 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 1490 CONST_BITS+PASS1_BITS+3) | |
| 1491 & RANGE_MASK]; | |
| 1492 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 1493 CONST_BITS+PASS1_BITS+3) | |
| 1494 & RANGE_MASK]; | |
| 1495 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 1496 CONST_BITS+PASS1_BITS+3) | |
| 1497 & RANGE_MASK]; | |
| 1498 | |
| 1499 wsptr += 8; /* advance pointer to next row */ | |
| 1500 } | |
| 1501 } | |
| 1502 | |
| 1503 | |
| 1504 /* | |
| 1505 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 1506 * producing an 11x11 output block. | |
| 1507 * | |
| 1508 * Optimized algorithm with 24 multiplications in the 1-D kernel. | |
| 1509 * cK represents sqrt(2) * cos(K*pi/22). | |
| 1510 */ | |
| 1511 | |
| 1512 GLOBAL(void) | |
| 1513 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 1514 JCOEFPTR coef_block, | |
| 1515 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 1516 { | |
| 1517 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; | |
| 1518 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; | |
| 1519 INT32 z1, z2, z3, z4; | |
| 1520 JCOEFPTR inptr; | |
| 1521 ISLOW_MULT_TYPE * quantptr; | |
| 1522 int * wsptr; | |
| 1523 JSAMPROW outptr; | |
| 1524 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 1525 int ctr; | |
| 1526 int workspace[8*11]; /* buffers data between passes */ | |
| 1527 SHIFT_TEMPS | |
| 1528 | |
| 1529 /* Pass 1: process columns from input, store into work array. */ | |
| 1530 | |
| 1531 inptr = coef_block; | |
| 1532 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 1533 wsptr = workspace; | |
| 1534 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 1535 /* Even part */ | |
| 1536 | |
| 1537 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 1538 if (ctr == 0) | |
| 1539 CLAMP_DC(tmp10); | |
| 1540 tmp10 <<= CONST_BITS; | |
| 1541 /* Add fudge factor here for final descale. */ | |
| 1542 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 1543 | |
| 1544 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 1545 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 1546 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 1547 | |
| 1548 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ | |
| 1549 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ | |
| 1550 z4 = z1 + z3; | |
| 1551 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ | |
| 1552 z4 -= z2; | |
| 1553 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ | |
| 1554 tmp21 = tmp20 + tmp23 + tmp25 - | |
| 1555 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ | |
| 1556 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ | |
| 1557 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ | |
| 1558 tmp24 += tmp25; | |
| 1559 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ | |
| 1560 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ | |
| 1561 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ | |
| 1562 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ | |
| 1563 | |
| 1564 /* Odd part */ | |
| 1565 | |
| 1566 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 1567 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 1568 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 1569 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 1570 | |
| 1571 tmp11 = z1 + z2; | |
| 1572 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ | |
| 1573 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ | |
| 1574 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ | |
| 1575 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ | |
| 1576 tmp10 = tmp11 + tmp12 + tmp13 - | |
| 1577 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ | |
| 1578 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ | |
| 1579 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ | |
| 1580 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ | |
| 1581 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ | |
| 1582 tmp11 += z1; | |
| 1583 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ | |
| 1584 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ | |
| 1585 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ | |
| 1586 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ | |
| 1587 | |
| 1588 /* Final output stage */ | |
| 1589 | |
| 1590 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 1591 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 1592 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 1593 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 1594 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 1595 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 1596 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); | |
| 1597 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); | |
| 1598 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 1599 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 1600 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); | |
| 1601 } | |
| 1602 | |
| 1603 /* Pass 2: process 11 rows from work array, store into output array. */ | |
| 1604 | |
| 1605 wsptr = workspace; | |
| 1606 for (ctr = 0; ctr < 11; ctr++) { | |
| 1607 outptr = output_buf[ctr] + output_col; | |
| 1608 | |
| 1609 /* Even part */ | |
| 1610 | |
| 1611 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 1612 tmp10 = (INT32) wsptr[0] + | |
| 1613 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 1614 (ONE << (PASS1_BITS+2))); | |
| 1615 tmp10 <<= CONST_BITS; | |
| 1616 | |
| 1617 z1 = (INT32) wsptr[2]; | |
| 1618 z2 = (INT32) wsptr[4]; | |
| 1619 z3 = (INT32) wsptr[6]; | |
| 1620 | |
| 1621 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ | |
| 1622 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ | |
| 1623 z4 = z1 + z3; | |
| 1624 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ | |
| 1625 z4 -= z2; | |
| 1626 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ | |
| 1627 tmp21 = tmp20 + tmp23 + tmp25 - | |
| 1628 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ | |
| 1629 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ | |
| 1630 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ | |
| 1631 tmp24 += tmp25; | |
| 1632 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ | |
| 1633 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ | |
| 1634 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ | |
| 1635 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ | |
| 1636 | |
| 1637 /* Odd part */ | |
| 1638 | |
| 1639 z1 = (INT32) wsptr[1]; | |
| 1640 z2 = (INT32) wsptr[3]; | |
| 1641 z3 = (INT32) wsptr[5]; | |
| 1642 z4 = (INT32) wsptr[7]; | |
| 1643 | |
| 1644 tmp11 = z1 + z2; | |
| 1645 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ | |
| 1646 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ | |
| 1647 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ | |
| 1648 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ | |
| 1649 tmp10 = tmp11 + tmp12 + tmp13 - | |
| 1650 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ | |
| 1651 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ | |
| 1652 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ | |
| 1653 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ | |
| 1654 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ | |
| 1655 tmp11 += z1; | |
| 1656 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ | |
| 1657 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ | |
| 1658 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ | |
| 1659 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ | |
| 1660 | |
| 1661 /* Final output stage */ | |
| 1662 | |
| 1663 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 1664 CONST_BITS+PASS1_BITS+3) | |
| 1665 & RANGE_MASK]; | |
| 1666 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 1667 CONST_BITS+PASS1_BITS+3) | |
| 1668 & RANGE_MASK]; | |
| 1669 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 1670 CONST_BITS+PASS1_BITS+3) | |
| 1671 & RANGE_MASK]; | |
| 1672 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 1673 CONST_BITS+PASS1_BITS+3) | |
| 1674 & RANGE_MASK]; | |
| 1675 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 1676 CONST_BITS+PASS1_BITS+3) | |
| 1677 & RANGE_MASK]; | |
| 1678 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 1679 CONST_BITS+PASS1_BITS+3) | |
| 1680 & RANGE_MASK]; | |
| 1681 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 1682 CONST_BITS+PASS1_BITS+3) | |
| 1683 & RANGE_MASK]; | |
| 1684 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 1685 CONST_BITS+PASS1_BITS+3) | |
| 1686 & RANGE_MASK]; | |
| 1687 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 1688 CONST_BITS+PASS1_BITS+3) | |
| 1689 & RANGE_MASK]; | |
| 1690 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 1691 CONST_BITS+PASS1_BITS+3) | |
| 1692 & RANGE_MASK]; | |
| 1693 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, | |
| 1694 CONST_BITS+PASS1_BITS+3) | |
| 1695 & RANGE_MASK]; | |
| 1696 | |
| 1697 wsptr += 8; /* advance pointer to next row */ | |
| 1698 } | |
| 1699 } | |
| 1700 | |
| 1701 | |
| 1702 /* | |
| 1703 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 1704 * producing a 12x12 output block. | |
| 1705 * | |
| 1706 * Optimized algorithm with 15 multiplications in the 1-D kernel. | |
| 1707 * cK represents sqrt(2) * cos(K*pi/24). | |
| 1708 */ | |
| 1709 | |
| 1710 GLOBAL(void) | |
| 1711 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 1712 JCOEFPTR coef_block, | |
| 1713 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 1714 { | |
| 1715 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; | |
| 1716 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; | |
| 1717 INT32 z1, z2, z3, z4; | |
| 1718 JCOEFPTR inptr; | |
| 1719 ISLOW_MULT_TYPE * quantptr; | |
| 1720 int * wsptr; | |
| 1721 JSAMPROW outptr; | |
| 1722 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 1723 int ctr; | |
| 1724 int workspace[8*12]; /* buffers data between passes */ | |
| 1725 SHIFT_TEMPS | |
| 1726 | |
| 1727 /* Pass 1: process columns from input, store into work array. */ | |
| 1728 | |
| 1729 inptr = coef_block; | |
| 1730 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 1731 wsptr = workspace; | |
| 1732 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 1733 /* Even part */ | |
| 1734 | |
| 1735 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 1736 if (ctr == 0) | |
| 1737 CLAMP_DC(z3); | |
| 1738 z3 <<= CONST_BITS; | |
| 1739 /* Add fudge factor here for final descale. */ | |
| 1740 z3 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 1741 | |
| 1742 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 1743 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ | |
| 1744 | |
| 1745 tmp10 = z3 + z4; | |
| 1746 tmp11 = z3 - z4; | |
| 1747 | |
| 1748 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 1749 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ | |
| 1750 z1 <<= CONST_BITS; | |
| 1751 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 1752 z2 <<= CONST_BITS; | |
| 1753 | |
| 1754 tmp12 = z1 - z2; | |
| 1755 | |
| 1756 tmp21 = z3 + tmp12; | |
| 1757 tmp24 = z3 - tmp12; | |
| 1758 | |
| 1759 tmp12 = z4 + z2; | |
| 1760 | |
| 1761 tmp20 = tmp10 + tmp12; | |
| 1762 tmp25 = tmp10 - tmp12; | |
| 1763 | |
| 1764 tmp12 = z4 - z1 - z2; | |
| 1765 | |
| 1766 tmp22 = tmp11 + tmp12; | |
| 1767 tmp23 = tmp11 - tmp12; | |
| 1768 | |
| 1769 /* Odd part */ | |
| 1770 | |
| 1771 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 1772 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 1773 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 1774 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 1775 | |
| 1776 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ | |
| 1777 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ | |
| 1778 | |
| 1779 tmp10 = z1 + z3; | |
| 1780 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ | |
| 1781 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ | |
| 1782 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ | |
| 1783 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ | |
| 1784 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ | |
| 1785 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ | |
| 1786 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ | |
| 1787 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ | |
| 1788 | |
| 1789 z1 -= z4; | |
| 1790 z2 -= z3; | |
| 1791 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ | |
| 1792 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ | |
| 1793 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ | |
| 1794 | |
| 1795 /* Final output stage */ | |
| 1796 | |
| 1797 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 1798 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 1799 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 1800 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 1801 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 1802 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 1803 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); | |
| 1804 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); | |
| 1805 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 1806 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 1807 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); | |
| 1808 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); | |
| 1809 } | |
| 1810 | |
| 1811 /* Pass 2: process 12 rows from work array, store into output array. */ | |
| 1812 | |
| 1813 wsptr = workspace; | |
| 1814 for (ctr = 0; ctr < 12; ctr++) { | |
| 1815 outptr = output_buf[ctr] + output_col; | |
| 1816 | |
| 1817 /* Even part */ | |
| 1818 | |
| 1819 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 1820 z3 = (INT32) wsptr[0] + | |
| 1821 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 1822 (ONE << (PASS1_BITS+2))); | |
| 1823 z3 <<= CONST_BITS; | |
| 1824 | |
| 1825 z4 = (INT32) wsptr[4]; | |
| 1826 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ | |
| 1827 | |
| 1828 tmp10 = z3 + z4; | |
| 1829 tmp11 = z3 - z4; | |
| 1830 | |
| 1831 z1 = (INT32) wsptr[2]; | |
| 1832 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ | |
| 1833 z1 <<= CONST_BITS; | |
| 1834 z2 = (INT32) wsptr[6]; | |
| 1835 z2 <<= CONST_BITS; | |
| 1836 | |
| 1837 tmp12 = z1 - z2; | |
| 1838 | |
| 1839 tmp21 = z3 + tmp12; | |
| 1840 tmp24 = z3 - tmp12; | |
| 1841 | |
| 1842 tmp12 = z4 + z2; | |
| 1843 | |
| 1844 tmp20 = tmp10 + tmp12; | |
| 1845 tmp25 = tmp10 - tmp12; | |
| 1846 | |
| 1847 tmp12 = z4 - z1 - z2; | |
| 1848 | |
| 1849 tmp22 = tmp11 + tmp12; | |
| 1850 tmp23 = tmp11 - tmp12; | |
| 1851 | |
| 1852 /* Odd part */ | |
| 1853 | |
| 1854 z1 = (INT32) wsptr[1]; | |
| 1855 z2 = (INT32) wsptr[3]; | |
| 1856 z3 = (INT32) wsptr[5]; | |
| 1857 z4 = (INT32) wsptr[7]; | |
| 1858 | |
| 1859 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ | |
| 1860 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ | |
| 1861 | |
| 1862 tmp10 = z1 + z3; | |
| 1863 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ | |
| 1864 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ | |
| 1865 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ | |
| 1866 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ | |
| 1867 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ | |
| 1868 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ | |
| 1869 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ | |
| 1870 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ | |
| 1871 | |
| 1872 z1 -= z4; | |
| 1873 z2 -= z3; | |
| 1874 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ | |
| 1875 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ | |
| 1876 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ | |
| 1877 | |
| 1878 /* Final output stage */ | |
| 1879 | |
| 1880 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 1881 CONST_BITS+PASS1_BITS+3) | |
| 1882 & RANGE_MASK]; | |
| 1883 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 1884 CONST_BITS+PASS1_BITS+3) | |
| 1885 & RANGE_MASK]; | |
| 1886 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 1887 CONST_BITS+PASS1_BITS+3) | |
| 1888 & RANGE_MASK]; | |
| 1889 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 1890 CONST_BITS+PASS1_BITS+3) | |
| 1891 & RANGE_MASK]; | |
| 1892 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 1893 CONST_BITS+PASS1_BITS+3) | |
| 1894 & RANGE_MASK]; | |
| 1895 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 1896 CONST_BITS+PASS1_BITS+3) | |
| 1897 & RANGE_MASK]; | |
| 1898 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 1899 CONST_BITS+PASS1_BITS+3) | |
| 1900 & RANGE_MASK]; | |
| 1901 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 1902 CONST_BITS+PASS1_BITS+3) | |
| 1903 & RANGE_MASK]; | |
| 1904 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 1905 CONST_BITS+PASS1_BITS+3) | |
| 1906 & RANGE_MASK]; | |
| 1907 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 1908 CONST_BITS+PASS1_BITS+3) | |
| 1909 & RANGE_MASK]; | |
| 1910 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, | |
| 1911 CONST_BITS+PASS1_BITS+3) | |
| 1912 & RANGE_MASK]; | |
| 1913 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, | |
| 1914 CONST_BITS+PASS1_BITS+3) | |
| 1915 & RANGE_MASK]; | |
| 1916 | |
| 1917 wsptr += 8; /* advance pointer to next row */ | |
| 1918 } | |
| 1919 } | |
| 1920 | |
| 1921 | |
| 1922 /* | |
| 1923 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 1924 * producing a 13x13 output block. | |
| 1925 * | |
| 1926 * Optimized algorithm with 29 multiplications in the 1-D kernel. | |
| 1927 * cK represents sqrt(2) * cos(K*pi/26). | |
| 1928 */ | |
| 1929 | |
| 1930 GLOBAL(void) | |
| 1931 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 1932 JCOEFPTR coef_block, | |
| 1933 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 1934 { | |
| 1935 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; | |
| 1936 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; | |
| 1937 INT32 z1, z2, z3, z4; | |
| 1938 JCOEFPTR inptr; | |
| 1939 ISLOW_MULT_TYPE * quantptr; | |
| 1940 int * wsptr; | |
| 1941 JSAMPROW outptr; | |
| 1942 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 1943 int ctr; | |
| 1944 int workspace[8*13]; /* buffers data between passes */ | |
| 1945 SHIFT_TEMPS | |
| 1946 | |
| 1947 /* Pass 1: process columns from input, store into work array. */ | |
| 1948 | |
| 1949 inptr = coef_block; | |
| 1950 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 1951 wsptr = workspace; | |
| 1952 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 1953 /* Even part */ | |
| 1954 | |
| 1955 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 1956 if (ctr == 0) | |
| 1957 CLAMP_DC(z1); | |
| 1958 z1 <<= CONST_BITS; | |
| 1959 /* Add fudge factor here for final descale. */ | |
| 1960 z1 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 1961 | |
| 1962 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 1963 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 1964 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 1965 | |
| 1966 tmp10 = z3 + z4; | |
| 1967 tmp11 = z3 - z4; | |
| 1968 | |
| 1969 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ | |
| 1970 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ | |
| 1971 | |
| 1972 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ | |
| 1973 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ | |
| 1974 | |
| 1975 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ | |
| 1976 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ | |
| 1977 | |
| 1978 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ | |
| 1979 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ | |
| 1980 | |
| 1981 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ | |
| 1982 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ | |
| 1983 | |
| 1984 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ | |
| 1985 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ | |
| 1986 | |
| 1987 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ | |
| 1988 | |
| 1989 /* Odd part */ | |
| 1990 | |
| 1991 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 1992 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 1993 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 1994 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 1995 | |
| 1996 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ | |
| 1997 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ | |
| 1998 tmp15 = z1 + z4; | |
| 1999 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ | |
| 2000 tmp10 = tmp11 + tmp12 + tmp13 - | |
| 2001 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ | |
| 2002 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ | |
| 2003 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ | |
| 2004 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ | |
| 2005 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ | |
| 2006 tmp11 += tmp14; | |
| 2007 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ | |
| 2008 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ | |
| 2009 tmp12 += tmp14; | |
| 2010 tmp13 += tmp14; | |
| 2011 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ | |
| 2012 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ | |
| 2013 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ | |
| 2014 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ | |
| 2015 tmp14 += z1; | |
| 2016 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ | |
| 2017 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ | |
| 2018 | |
| 2019 /* Final output stage */ | |
| 2020 | |
| 2021 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 2022 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 2023 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 2024 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 2025 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 2026 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 2027 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); | |
| 2028 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); | |
| 2029 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 2030 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 2031 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); | |
| 2032 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); | |
| 2033 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); | |
| 2034 } | |
| 2035 | |
| 2036 /* Pass 2: process 13 rows from work array, store into output array. */ | |
| 2037 | |
| 2038 wsptr = workspace; | |
| 2039 for (ctr = 0; ctr < 13; ctr++) { | |
| 2040 outptr = output_buf[ctr] + output_col; | |
| 2041 | |
| 2042 /* Even part */ | |
| 2043 | |
| 2044 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 2045 z1 = (INT32) wsptr[0] + | |
| 2046 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 2047 (ONE << (PASS1_BITS+2))); | |
| 2048 z1 <<= CONST_BITS; | |
| 2049 | |
| 2050 z2 = (INT32) wsptr[2]; | |
| 2051 z3 = (INT32) wsptr[4]; | |
| 2052 z4 = (INT32) wsptr[6]; | |
| 2053 | |
| 2054 tmp10 = z3 + z4; | |
| 2055 tmp11 = z3 - z4; | |
| 2056 | |
| 2057 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ | |
| 2058 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ | |
| 2059 | |
| 2060 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ | |
| 2061 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ | |
| 2062 | |
| 2063 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ | |
| 2064 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ | |
| 2065 | |
| 2066 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ | |
| 2067 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ | |
| 2068 | |
| 2069 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ | |
| 2070 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ | |
| 2071 | |
| 2072 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ | |
| 2073 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ | |
| 2074 | |
| 2075 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ | |
| 2076 | |
| 2077 /* Odd part */ | |
| 2078 | |
| 2079 z1 = (INT32) wsptr[1]; | |
| 2080 z2 = (INT32) wsptr[3]; | |
| 2081 z3 = (INT32) wsptr[5]; | |
| 2082 z4 = (INT32) wsptr[7]; | |
| 2083 | |
| 2084 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ | |
| 2085 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ | |
| 2086 tmp15 = z1 + z4; | |
| 2087 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ | |
| 2088 tmp10 = tmp11 + tmp12 + tmp13 - | |
| 2089 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ | |
| 2090 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ | |
| 2091 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ | |
| 2092 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ | |
| 2093 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ | |
| 2094 tmp11 += tmp14; | |
| 2095 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ | |
| 2096 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ | |
| 2097 tmp12 += tmp14; | |
| 2098 tmp13 += tmp14; | |
| 2099 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ | |
| 2100 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ | |
| 2101 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ | |
| 2102 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ | |
| 2103 tmp14 += z1; | |
| 2104 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ | |
| 2105 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ | |
| 2106 | |
| 2107 /* Final output stage */ | |
| 2108 | |
| 2109 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 2110 CONST_BITS+PASS1_BITS+3) | |
| 2111 & RANGE_MASK]; | |
| 2112 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 2113 CONST_BITS+PASS1_BITS+3) | |
| 2114 & RANGE_MASK]; | |
| 2115 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 2116 CONST_BITS+PASS1_BITS+3) | |
| 2117 & RANGE_MASK]; | |
| 2118 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 2119 CONST_BITS+PASS1_BITS+3) | |
| 2120 & RANGE_MASK]; | |
| 2121 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 2122 CONST_BITS+PASS1_BITS+3) | |
| 2123 & RANGE_MASK]; | |
| 2124 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 2125 CONST_BITS+PASS1_BITS+3) | |
| 2126 & RANGE_MASK]; | |
| 2127 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 2128 CONST_BITS+PASS1_BITS+3) | |
| 2129 & RANGE_MASK]; | |
| 2130 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 2131 CONST_BITS+PASS1_BITS+3) | |
| 2132 & RANGE_MASK]; | |
| 2133 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 2134 CONST_BITS+PASS1_BITS+3) | |
| 2135 & RANGE_MASK]; | |
| 2136 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 2137 CONST_BITS+PASS1_BITS+3) | |
| 2138 & RANGE_MASK]; | |
| 2139 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, | |
| 2140 CONST_BITS+PASS1_BITS+3) | |
| 2141 & RANGE_MASK]; | |
| 2142 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, | |
| 2143 CONST_BITS+PASS1_BITS+3) | |
| 2144 & RANGE_MASK]; | |
| 2145 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, | |
| 2146 CONST_BITS+PASS1_BITS+3) | |
| 2147 & RANGE_MASK]; | |
| 2148 | |
| 2149 wsptr += 8; /* advance pointer to next row */ | |
| 2150 } | |
| 2151 } | |
| 2152 | |
| 2153 | |
| 2154 /* | |
| 2155 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 2156 * producing a 14x14 output block. | |
| 2157 * | |
| 2158 * Optimized algorithm with 20 multiplications in the 1-D kernel. | |
| 2159 * cK represents sqrt(2) * cos(K*pi/28). | |
| 2160 */ | |
| 2161 | |
| 2162 GLOBAL(void) | |
| 2163 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 2164 JCOEFPTR coef_block, | |
| 2165 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 2166 { | |
| 2167 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; | |
| 2168 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; | |
| 2169 INT32 z1, z2, z3, z4; | |
| 2170 JCOEFPTR inptr; | |
| 2171 ISLOW_MULT_TYPE * quantptr; | |
| 2172 int * wsptr; | |
| 2173 JSAMPROW outptr; | |
| 2174 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 2175 int ctr; | |
| 2176 int workspace[8*14]; /* buffers data between passes */ | |
| 2177 SHIFT_TEMPS | |
| 2178 | |
| 2179 /* Pass 1: process columns from input, store into work array. */ | |
| 2180 | |
| 2181 inptr = coef_block; | |
| 2182 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 2183 wsptr = workspace; | |
| 2184 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 2185 /* Even part */ | |
| 2186 | |
| 2187 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 2188 if (ctr == 0) | |
| 2189 CLAMP_DC(z1); | |
| 2190 z1 <<= CONST_BITS; | |
| 2191 /* Add fudge factor here for final descale. */ | |
| 2192 z1 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 2193 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 2194 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ | |
| 2195 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ | |
| 2196 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ | |
| 2197 | |
| 2198 tmp10 = z1 + z2; | |
| 2199 tmp11 = z1 + z3; | |
| 2200 tmp12 = z1 - z4; | |
| 2201 | |
| 2202 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ | |
| 2203 CONST_BITS-PASS1_BITS); | |
| 2204 | |
| 2205 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 2206 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 2207 | |
| 2208 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ | |
| 2209 | |
| 2210 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ | |
| 2211 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ | |
| 2212 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ | |
| 2213 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ | |
| 2214 | |
| 2215 tmp20 = tmp10 + tmp13; | |
| 2216 tmp26 = tmp10 - tmp13; | |
| 2217 tmp21 = tmp11 + tmp14; | |
| 2218 tmp25 = tmp11 - tmp14; | |
| 2219 tmp22 = tmp12 + tmp15; | |
| 2220 tmp24 = tmp12 - tmp15; | |
| 2221 | |
| 2222 /* Odd part */ | |
| 2223 | |
| 2224 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 2225 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 2226 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 2227 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 2228 tmp13 = z4 << CONST_BITS; | |
| 2229 | |
| 2230 tmp14 = z1 + z3; | |
| 2231 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ | |
| 2232 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ | |
| 2233 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ | |
| 2234 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ | |
| 2235 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ | |
| 2236 z1 -= z2; | |
| 2237 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ | |
| 2238 tmp16 += tmp15; | |
| 2239 z1 += z4; | |
| 2240 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ | |
| 2241 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ | |
| 2242 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ | |
| 2243 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ | |
| 2244 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ | |
| 2245 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ | |
| 2246 | |
| 2247 tmp13 = (z1 - z3) << PASS1_BITS; | |
| 2248 | |
| 2249 /* Final output stage */ | |
| 2250 | |
| 2251 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 2252 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 2253 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 2254 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 2255 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 2256 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 2257 wsptr[8*3] = (int) (tmp23 + tmp13); | |
| 2258 wsptr[8*10] = (int) (tmp23 - tmp13); | |
| 2259 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 2260 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 2261 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); | |
| 2262 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); | |
| 2263 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); | |
| 2264 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); | |
| 2265 } | |
| 2266 | |
| 2267 /* Pass 2: process 14 rows from work array, store into output array. */ | |
| 2268 | |
| 2269 wsptr = workspace; | |
| 2270 for (ctr = 0; ctr < 14; ctr++) { | |
| 2271 outptr = output_buf[ctr] + output_col; | |
| 2272 | |
| 2273 /* Even part */ | |
| 2274 | |
| 2275 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 2276 z1 = (INT32) wsptr[0] + | |
| 2277 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 2278 (ONE << (PASS1_BITS+2))); | |
| 2279 z1 <<= CONST_BITS; | |
| 2280 z4 = (INT32) wsptr[4]; | |
| 2281 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ | |
| 2282 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ | |
| 2283 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ | |
| 2284 | |
| 2285 tmp10 = z1 + z2; | |
| 2286 tmp11 = z1 + z3; | |
| 2287 tmp12 = z1 - z4; | |
| 2288 | |
| 2289 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ | |
| 2290 | |
| 2291 z1 = (INT32) wsptr[2]; | |
| 2292 z2 = (INT32) wsptr[6]; | |
| 2293 | |
| 2294 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ | |
| 2295 | |
| 2296 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ | |
| 2297 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ | |
| 2298 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ | |
| 2299 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ | |
| 2300 | |
| 2301 tmp20 = tmp10 + tmp13; | |
| 2302 tmp26 = tmp10 - tmp13; | |
| 2303 tmp21 = tmp11 + tmp14; | |
| 2304 tmp25 = tmp11 - tmp14; | |
| 2305 tmp22 = tmp12 + tmp15; | |
| 2306 tmp24 = tmp12 - tmp15; | |
| 2307 | |
| 2308 /* Odd part */ | |
| 2309 | |
| 2310 z1 = (INT32) wsptr[1]; | |
| 2311 z2 = (INT32) wsptr[3]; | |
| 2312 z3 = (INT32) wsptr[5]; | |
| 2313 z4 = (INT32) wsptr[7]; | |
| 2314 z4 <<= CONST_BITS; | |
| 2315 | |
| 2316 tmp14 = z1 + z3; | |
| 2317 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ | |
| 2318 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ | |
| 2319 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ | |
| 2320 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ | |
| 2321 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ | |
| 2322 z1 -= z2; | |
| 2323 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ | |
| 2324 tmp16 += tmp15; | |
| 2325 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ | |
| 2326 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ | |
| 2327 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ | |
| 2328 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ | |
| 2329 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ | |
| 2330 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ | |
| 2331 | |
| 2332 tmp13 = ((z1 - z3) << CONST_BITS) + z4; | |
| 2333 | |
| 2334 /* Final output stage */ | |
| 2335 | |
| 2336 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 2337 CONST_BITS+PASS1_BITS+3) | |
| 2338 & RANGE_MASK]; | |
| 2339 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 2340 CONST_BITS+PASS1_BITS+3) | |
| 2341 & RANGE_MASK]; | |
| 2342 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 2343 CONST_BITS+PASS1_BITS+3) | |
| 2344 & RANGE_MASK]; | |
| 2345 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 2346 CONST_BITS+PASS1_BITS+3) | |
| 2347 & RANGE_MASK]; | |
| 2348 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 2349 CONST_BITS+PASS1_BITS+3) | |
| 2350 & RANGE_MASK]; | |
| 2351 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 2352 CONST_BITS+PASS1_BITS+3) | |
| 2353 & RANGE_MASK]; | |
| 2354 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 2355 CONST_BITS+PASS1_BITS+3) | |
| 2356 & RANGE_MASK]; | |
| 2357 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 2358 CONST_BITS+PASS1_BITS+3) | |
| 2359 & RANGE_MASK]; | |
| 2360 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 2361 CONST_BITS+PASS1_BITS+3) | |
| 2362 & RANGE_MASK]; | |
| 2363 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 2364 CONST_BITS+PASS1_BITS+3) | |
| 2365 & RANGE_MASK]; | |
| 2366 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, | |
| 2367 CONST_BITS+PASS1_BITS+3) | |
| 2368 & RANGE_MASK]; | |
| 2369 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, | |
| 2370 CONST_BITS+PASS1_BITS+3) | |
| 2371 & RANGE_MASK]; | |
| 2372 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, | |
| 2373 CONST_BITS+PASS1_BITS+3) | |
| 2374 & RANGE_MASK]; | |
| 2375 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, | |
| 2376 CONST_BITS+PASS1_BITS+3) | |
| 2377 & RANGE_MASK]; | |
| 2378 | |
| 2379 wsptr += 8; /* advance pointer to next row */ | |
| 2380 } | |
| 2381 } | |
| 2382 | |
| 2383 | |
| 2384 /* | |
| 2385 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 2386 * producing a 15x15 output block. | |
| 2387 * | |
| 2388 * Optimized algorithm with 22 multiplications in the 1-D kernel. | |
| 2389 * cK represents sqrt(2) * cos(K*pi/30). | |
| 2390 */ | |
| 2391 | |
| 2392 GLOBAL(void) | |
| 2393 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 2394 JCOEFPTR coef_block, | |
| 2395 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 2396 { | |
| 2397 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; | |
| 2398 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; | |
| 2399 INT32 z1, z2, z3, z4; | |
| 2400 JCOEFPTR inptr; | |
| 2401 ISLOW_MULT_TYPE * quantptr; | |
| 2402 int * wsptr; | |
| 2403 JSAMPROW outptr; | |
| 2404 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 2405 int ctr; | |
| 2406 int workspace[8*15]; /* buffers data between passes */ | |
| 2407 SHIFT_TEMPS | |
| 2408 | |
| 2409 /* Pass 1: process columns from input, store into work array. */ | |
| 2410 | |
| 2411 inptr = coef_block; | |
| 2412 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 2413 wsptr = workspace; | |
| 2414 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 2415 /* Even part */ | |
| 2416 | |
| 2417 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 2418 if (ctr == 0) | |
| 2419 CLAMP_DC(z1); | |
| 2420 z1 <<= CONST_BITS; | |
| 2421 /* Add fudge factor here for final descale. */ | |
| 2422 z1 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 2423 | |
| 2424 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 2425 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 2426 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 2427 | |
| 2428 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ | |
| 2429 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ | |
| 2430 | |
| 2431 tmp12 = z1 - tmp10; | |
| 2432 tmp13 = z1 + tmp11; | |
| 2433 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ | |
| 2434 | |
| 2435 z4 = z2 - z3; | |
| 2436 z3 += z2; | |
| 2437 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ | |
| 2438 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ | |
| 2439 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ | |
| 2440 | |
| 2441 tmp20 = tmp13 + tmp10 + tmp11; | |
| 2442 tmp23 = tmp12 - tmp10 + tmp11 + z2; | |
| 2443 | |
| 2444 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ | |
| 2445 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ | |
| 2446 | |
| 2447 tmp25 = tmp13 - tmp10 - tmp11; | |
| 2448 tmp26 = tmp12 + tmp10 - tmp11 - z2; | |
| 2449 | |
| 2450 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ | |
| 2451 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ | |
| 2452 | |
| 2453 tmp21 = tmp12 + tmp10 + tmp11; | |
| 2454 tmp24 = tmp13 - tmp10 + tmp11; | |
| 2455 tmp11 += tmp11; | |
| 2456 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ | |
| 2457 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ | |
| 2458 | |
| 2459 /* Odd part */ | |
| 2460 | |
| 2461 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 2462 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 2463 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 2464 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ | |
| 2465 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 2466 | |
| 2467 tmp13 = z2 - z4; | |
| 2468 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ | |
| 2469 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ | |
| 2470 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ | |
| 2471 | |
| 2472 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ | |
| 2473 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ | |
| 2474 z2 = z1 - z4; | |
| 2475 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ | |
| 2476 | |
| 2477 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ | |
| 2478 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ | |
| 2479 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ | |
| 2480 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ | |
| 2481 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ | |
| 2482 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ | |
| 2483 | |
| 2484 /* Final output stage */ | |
| 2485 | |
| 2486 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 2487 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 2488 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 2489 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 2490 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 2491 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 2492 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); | |
| 2493 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); | |
| 2494 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 2495 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 2496 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); | |
| 2497 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); | |
| 2498 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); | |
| 2499 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); | |
| 2500 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); | |
| 2501 } | |
| 2502 | |
| 2503 /* Pass 2: process 15 rows from work array, store into output array. */ | |
| 2504 | |
| 2505 wsptr = workspace; | |
| 2506 for (ctr = 0; ctr < 15; ctr++) { | |
| 2507 outptr = output_buf[ctr] + output_col; | |
| 2508 | |
| 2509 /* Even part */ | |
| 2510 | |
| 2511 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 2512 z1 = (INT32) wsptr[0] + | |
| 2513 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 2514 (ONE << (PASS1_BITS+2))); | |
| 2515 z1 <<= CONST_BITS; | |
| 2516 | |
| 2517 z2 = (INT32) wsptr[2]; | |
| 2518 z3 = (INT32) wsptr[4]; | |
| 2519 z4 = (INT32) wsptr[6]; | |
| 2520 | |
| 2521 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ | |
| 2522 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ | |
| 2523 | |
| 2524 tmp12 = z1 - tmp10; | |
| 2525 tmp13 = z1 + tmp11; | |
| 2526 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ | |
| 2527 | |
| 2528 z4 = z2 - z3; | |
| 2529 z3 += z2; | |
| 2530 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ | |
| 2531 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ | |
| 2532 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ | |
| 2533 | |
| 2534 tmp20 = tmp13 + tmp10 + tmp11; | |
| 2535 tmp23 = tmp12 - tmp10 + tmp11 + z2; | |
| 2536 | |
| 2537 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ | |
| 2538 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ | |
| 2539 | |
| 2540 tmp25 = tmp13 - tmp10 - tmp11; | |
| 2541 tmp26 = tmp12 + tmp10 - tmp11 - z2; | |
| 2542 | |
| 2543 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ | |
| 2544 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ | |
| 2545 | |
| 2546 tmp21 = tmp12 + tmp10 + tmp11; | |
| 2547 tmp24 = tmp13 - tmp10 + tmp11; | |
| 2548 tmp11 += tmp11; | |
| 2549 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ | |
| 2550 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ | |
| 2551 | |
| 2552 /* Odd part */ | |
| 2553 | |
| 2554 z1 = (INT32) wsptr[1]; | |
| 2555 z2 = (INT32) wsptr[3]; | |
| 2556 z4 = (INT32) wsptr[5]; | |
| 2557 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ | |
| 2558 z4 = (INT32) wsptr[7]; | |
| 2559 | |
| 2560 tmp13 = z2 - z4; | |
| 2561 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ | |
| 2562 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ | |
| 2563 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ | |
| 2564 | |
| 2565 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ | |
| 2566 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ | |
| 2567 z2 = z1 - z4; | |
| 2568 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ | |
| 2569 | |
| 2570 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ | |
| 2571 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ | |
| 2572 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ | |
| 2573 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ | |
| 2574 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ | |
| 2575 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ | |
| 2576 | |
| 2577 /* Final output stage */ | |
| 2578 | |
| 2579 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 2580 CONST_BITS+PASS1_BITS+3) | |
| 2581 & RANGE_MASK]; | |
| 2582 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 2583 CONST_BITS+PASS1_BITS+3) | |
| 2584 & RANGE_MASK]; | |
| 2585 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 2586 CONST_BITS+PASS1_BITS+3) | |
| 2587 & RANGE_MASK]; | |
| 2588 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 2589 CONST_BITS+PASS1_BITS+3) | |
| 2590 & RANGE_MASK]; | |
| 2591 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 2592 CONST_BITS+PASS1_BITS+3) | |
| 2593 & RANGE_MASK]; | |
| 2594 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 2595 CONST_BITS+PASS1_BITS+3) | |
| 2596 & RANGE_MASK]; | |
| 2597 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 2598 CONST_BITS+PASS1_BITS+3) | |
| 2599 & RANGE_MASK]; | |
| 2600 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 2601 CONST_BITS+PASS1_BITS+3) | |
| 2602 & RANGE_MASK]; | |
| 2603 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 2604 CONST_BITS+PASS1_BITS+3) | |
| 2605 & RANGE_MASK]; | |
| 2606 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 2607 CONST_BITS+PASS1_BITS+3) | |
| 2608 & RANGE_MASK]; | |
| 2609 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, | |
| 2610 CONST_BITS+PASS1_BITS+3) | |
| 2611 & RANGE_MASK]; | |
| 2612 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, | |
| 2613 CONST_BITS+PASS1_BITS+3) | |
| 2614 & RANGE_MASK]; | |
| 2615 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, | |
| 2616 CONST_BITS+PASS1_BITS+3) | |
| 2617 & RANGE_MASK]; | |
| 2618 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, | |
| 2619 CONST_BITS+PASS1_BITS+3) | |
| 2620 & RANGE_MASK]; | |
| 2621 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, | |
| 2622 CONST_BITS+PASS1_BITS+3) | |
| 2623 & RANGE_MASK]; | |
| 2624 | |
| 2625 wsptr += 8; /* advance pointer to next row */ | |
| 2626 } | |
| 2627 } | |
| 2628 | |
| 2629 | |
| 2630 /* | |
| 2631 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 2632 * producing a 16x16 output block. | |
| 2633 * | |
| 2634 * Optimized algorithm with 28 multiplications in the 1-D kernel. | |
| 2635 * cK represents sqrt(2) * cos(K*pi/32). | |
| 2636 */ | |
| 2637 | |
| 2638 GLOBAL(void) | |
| 2639 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 2640 JCOEFPTR coef_block, | |
| 2641 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 2642 { | |
| 2643 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; | |
| 2644 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; | |
| 2645 INT32 z1, z2, z3, z4; | |
| 2646 JCOEFPTR inptr; | |
| 2647 ISLOW_MULT_TYPE * quantptr; | |
| 2648 int * wsptr; | |
| 2649 JSAMPROW outptr; | |
| 2650 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 2651 int ctr; | |
| 2652 int workspace[8*16]; /* buffers data between passes */ | |
| 2653 SHIFT_TEMPS | |
| 2654 | |
| 2655 /* Pass 1: process columns from input, store into work array. */ | |
| 2656 | |
| 2657 inptr = coef_block; | |
| 2658 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 2659 wsptr = workspace; | |
| 2660 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 2661 /* Even part */ | |
| 2662 | |
| 2663 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 2664 if (ctr == 0) | |
| 2665 CLAMP_DC(tmp0); | |
| 2666 tmp0 <<= CONST_BITS; | |
| 2667 /* Add fudge factor here for final descale. */ | |
| 2668 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 2669 | |
| 2670 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 2671 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ | |
| 2672 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ | |
| 2673 | |
| 2674 tmp10 = tmp0 + tmp1; | |
| 2675 tmp11 = tmp0 - tmp1; | |
| 2676 tmp12 = tmp0 + tmp2; | |
| 2677 tmp13 = tmp0 - tmp2; | |
| 2678 | |
| 2679 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 2680 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 2681 z3 = z1 - z2; | |
| 2682 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ | |
| 2683 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ | |
| 2684 | |
| 2685 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ | |
| 2686 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ | |
| 2687 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ | |
| 2688 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ | |
| 2689 | |
| 2690 tmp20 = tmp10 + tmp0; | |
| 2691 tmp27 = tmp10 - tmp0; | |
| 2692 tmp21 = tmp12 + tmp1; | |
| 2693 tmp26 = tmp12 - tmp1; | |
| 2694 tmp22 = tmp13 + tmp2; | |
| 2695 tmp25 = tmp13 - tmp2; | |
| 2696 tmp23 = tmp11 + tmp3; | |
| 2697 tmp24 = tmp11 - tmp3; | |
| 2698 | |
| 2699 /* Odd part */ | |
| 2700 | |
| 2701 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 2702 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 2703 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 2704 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 2705 | |
| 2706 tmp11 = z1 + z3; | |
| 2707 | |
| 2708 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ | |
| 2709 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ | |
| 2710 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ | |
| 2711 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ | |
| 2712 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ | |
| 2713 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ | |
| 2714 tmp0 = tmp1 + tmp2 + tmp3 - | |
| 2715 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ | |
| 2716 tmp13 = tmp10 + tmp11 + tmp12 - | |
| 2717 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ | |
| 2718 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ | |
| 2719 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ | |
| 2720 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ | |
| 2721 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ | |
| 2722 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ | |
| 2723 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ | |
| 2724 z2 += z4; | |
| 2725 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ | |
| 2726 tmp1 += z1; | |
| 2727 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ | |
| 2728 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ | |
| 2729 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ | |
| 2730 tmp12 += z2; | |
| 2731 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ | |
| 2732 tmp2 += z2; | |
| 2733 tmp3 += z2; | |
| 2734 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ | |
| 2735 tmp10 += z2; | |
| 2736 tmp11 += z2; | |
| 2737 | |
| 2738 /* Final output stage */ | |
| 2739 | |
| 2740 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); | |
| 2741 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); | |
| 2742 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); | |
| 2743 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); | |
| 2744 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); | |
| 2745 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); | |
| 2746 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); | |
| 2747 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); | |
| 2748 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); | |
| 2749 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); | |
| 2750 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); | |
| 2751 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); | |
| 2752 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); | |
| 2753 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); | |
| 2754 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); | |
| 2755 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); | |
| 2756 } | |
| 2757 | |
| 2758 /* Pass 2: process 16 rows from work array, store into output array. */ | |
| 2759 | |
| 2760 wsptr = workspace; | |
| 2761 for (ctr = 0; ctr < 16; ctr++) { | |
| 2762 outptr = output_buf[ctr] + output_col; | |
| 2763 | |
| 2764 /* Even part */ | |
| 2765 | |
| 2766 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 2767 tmp0 = (INT32) wsptr[0] + | |
| 2768 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 2769 (ONE << (PASS1_BITS+2))); | |
| 2770 tmp0 <<= CONST_BITS; | |
| 2771 | |
| 2772 z1 = (INT32) wsptr[4]; | |
| 2773 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ | |
| 2774 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ | |
| 2775 | |
| 2776 tmp10 = tmp0 + tmp1; | |
| 2777 tmp11 = tmp0 - tmp1; | |
| 2778 tmp12 = tmp0 + tmp2; | |
| 2779 tmp13 = tmp0 - tmp2; | |
| 2780 | |
| 2781 z1 = (INT32) wsptr[2]; | |
| 2782 z2 = (INT32) wsptr[6]; | |
| 2783 z3 = z1 - z2; | |
| 2784 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ | |
| 2785 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ | |
| 2786 | |
| 2787 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ | |
| 2788 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ | |
| 2789 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ | |
| 2790 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ | |
| 2791 | |
| 2792 tmp20 = tmp10 + tmp0; | |
| 2793 tmp27 = tmp10 - tmp0; | |
| 2794 tmp21 = tmp12 + tmp1; | |
| 2795 tmp26 = tmp12 - tmp1; | |
| 2796 tmp22 = tmp13 + tmp2; | |
| 2797 tmp25 = tmp13 - tmp2; | |
| 2798 tmp23 = tmp11 + tmp3; | |
| 2799 tmp24 = tmp11 - tmp3; | |
| 2800 | |
| 2801 /* Odd part */ | |
| 2802 | |
| 2803 z1 = (INT32) wsptr[1]; | |
| 2804 z2 = (INT32) wsptr[3]; | |
| 2805 z3 = (INT32) wsptr[5]; | |
| 2806 z4 = (INT32) wsptr[7]; | |
| 2807 | |
| 2808 tmp11 = z1 + z3; | |
| 2809 | |
| 2810 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ | |
| 2811 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ | |
| 2812 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ | |
| 2813 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ | |
| 2814 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ | |
| 2815 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ | |
| 2816 tmp0 = tmp1 + tmp2 + tmp3 - | |
| 2817 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ | |
| 2818 tmp13 = tmp10 + tmp11 + tmp12 - | |
| 2819 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ | |
| 2820 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ | |
| 2821 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ | |
| 2822 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ | |
| 2823 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ | |
| 2824 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ | |
| 2825 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ | |
| 2826 z2 += z4; | |
| 2827 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ | |
| 2828 tmp1 += z1; | |
| 2829 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ | |
| 2830 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ | |
| 2831 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ | |
| 2832 tmp12 += z2; | |
| 2833 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ | |
| 2834 tmp2 += z2; | |
| 2835 tmp3 += z2; | |
| 2836 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ | |
| 2837 tmp10 += z2; | |
| 2838 tmp11 += z2; | |
| 2839 | |
| 2840 /* Final output stage */ | |
| 2841 | |
| 2842 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, | |
| 2843 CONST_BITS+PASS1_BITS+3) | |
| 2844 & RANGE_MASK]; | |
| 2845 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, | |
| 2846 CONST_BITS+PASS1_BITS+3) | |
| 2847 & RANGE_MASK]; | |
| 2848 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, | |
| 2849 CONST_BITS+PASS1_BITS+3) | |
| 2850 & RANGE_MASK]; | |
| 2851 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, | |
| 2852 CONST_BITS+PASS1_BITS+3) | |
| 2853 & RANGE_MASK]; | |
| 2854 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, | |
| 2855 CONST_BITS+PASS1_BITS+3) | |
| 2856 & RANGE_MASK]; | |
| 2857 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, | |
| 2858 CONST_BITS+PASS1_BITS+3) | |
| 2859 & RANGE_MASK]; | |
| 2860 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, | |
| 2861 CONST_BITS+PASS1_BITS+3) | |
| 2862 & RANGE_MASK]; | |
| 2863 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, | |
| 2864 CONST_BITS+PASS1_BITS+3) | |
| 2865 & RANGE_MASK]; | |
| 2866 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, | |
| 2867 CONST_BITS+PASS1_BITS+3) | |
| 2868 & RANGE_MASK]; | |
| 2869 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, | |
| 2870 CONST_BITS+PASS1_BITS+3) | |
| 2871 & RANGE_MASK]; | |
| 2872 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, | |
| 2873 CONST_BITS+PASS1_BITS+3) | |
| 2874 & RANGE_MASK]; | |
| 2875 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, | |
| 2876 CONST_BITS+PASS1_BITS+3) | |
| 2877 & RANGE_MASK]; | |
| 2878 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, | |
| 2879 CONST_BITS+PASS1_BITS+3) | |
| 2880 & RANGE_MASK]; | |
| 2881 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, | |
| 2882 CONST_BITS+PASS1_BITS+3) | |
| 2883 & RANGE_MASK]; | |
| 2884 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, | |
| 2885 CONST_BITS+PASS1_BITS+3) | |
| 2886 & RANGE_MASK]; | |
| 2887 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, | |
| 2888 CONST_BITS+PASS1_BITS+3) | |
| 2889 & RANGE_MASK]; | |
| 2890 | |
| 2891 wsptr += 8; /* advance pointer to next row */ | |
| 2892 } | |
| 2893 } | |
| 2894 | |
| 2895 | |
| 2896 /* | |
| 2897 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 2898 * producing a 16x8 output block. | |
| 2899 * | |
| 2900 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows). | |
| 2901 */ | |
| 2902 | |
| 2903 GLOBAL(void) | |
| 2904 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 2905 JCOEFPTR coef_block, | |
| 2906 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 2907 { | |
| 2908 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; | |
| 2909 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; | |
| 2910 INT32 z1, z2, z3, z4; | |
| 2911 JCOEFPTR inptr; | |
| 2912 ISLOW_MULT_TYPE * quantptr; | |
| 2913 int * wsptr; | |
| 2914 JSAMPROW outptr; | |
| 2915 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 2916 int ctr; | |
| 2917 int workspace[8*8]; /* buffers data between passes */ | |
| 2918 SHIFT_TEMPS | |
| 2919 | |
| 2920 /* Pass 1: process columns from input, store into work array. | |
| 2921 * Note results are scaled up by sqrt(8) compared to a true IDCT; | |
| 2922 * furthermore, we scale the results by 2**PASS1_BITS. | |
| 2923 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | |
| 2924 */ | |
| 2925 | |
| 2926 inptr = coef_block; | |
| 2927 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 2928 wsptr = workspace; | |
| 2929 for (ctr = DCTSIZE; ctr > 0; ctr--) { | |
| 2930 /* Due to quantization, we will usually find that many of the input | |
| 2931 * coefficients are zero, especially the AC terms. We can exploit this | |
| 2932 * by short-circuiting the IDCT calculation for any column in which all | |
| 2933 * the AC terms are zero. In that case each output is equal to the | |
| 2934 * DC coefficient (with scale factor as needed). | |
| 2935 * With typical images and quantization tables, half or more of the | |
| 2936 * column DCT calculations can be simplified this way. | |
| 2937 */ | |
| 2938 | |
| 2939 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && | |
| 2940 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && | |
| 2941 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && | |
| 2942 inptr[DCTSIZE*7] == 0) { | |
| 2943 /* AC terms all zero */ | |
| 2944 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 2945 if (ctr == DCTSIZE) | |
| 2946 CLAMP_DC(dcval); | |
| 2947 dcval <<= PASS1_BITS; | |
| 2948 | |
| 2949 wsptr[DCTSIZE*0] = dcval; | |
| 2950 wsptr[DCTSIZE*1] = dcval; | |
| 2951 wsptr[DCTSIZE*2] = dcval; | |
| 2952 wsptr[DCTSIZE*3] = dcval; | |
| 2953 wsptr[DCTSIZE*4] = dcval; | |
| 2954 wsptr[DCTSIZE*5] = dcval; | |
| 2955 wsptr[DCTSIZE*6] = dcval; | |
| 2956 wsptr[DCTSIZE*7] = dcval; | |
| 2957 | |
| 2958 inptr++; /* advance pointers to next column */ | |
| 2959 quantptr++; | |
| 2960 wsptr++; | |
| 2961 continue; | |
| 2962 } | |
| 2963 | |
| 2964 /* Even part: reverse the even part of the forward DCT. | |
| 2965 * The rotator is c(-6). | |
| 2966 */ | |
| 2967 | |
| 2968 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 2969 if (ctr == DCTSIZE) | |
| 2970 CLAMP_DC(z2); | |
| 2971 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 2972 z2 <<= CONST_BITS; | |
| 2973 z3 <<= CONST_BITS; | |
| 2974 /* Add fudge factor here for final descale. */ | |
| 2975 z2 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 2976 | |
| 2977 tmp0 = z2 + z3; | |
| 2978 tmp1 = z2 - z3; | |
| 2979 | |
| 2980 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 2981 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 2982 | |
| 2983 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 2984 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 2985 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 2986 | |
| 2987 tmp10 = tmp0 + tmp2; | |
| 2988 tmp13 = tmp0 - tmp2; | |
| 2989 tmp11 = tmp1 + tmp3; | |
| 2990 tmp12 = tmp1 - tmp3; | |
| 2991 | |
| 2992 /* Odd part per figure 8; the matrix is unitary and hence its | |
| 2993 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
| 2994 */ | |
| 2995 | |
| 2996 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 2997 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 2998 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 2999 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 3000 | |
| 3001 z2 = tmp0 + tmp2; | |
| 3002 z3 = tmp1 + tmp3; | |
| 3003 | |
| 3004 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ | |
| 3005 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ | |
| 3006 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ | |
| 3007 z2 += z1; | |
| 3008 z3 += z1; | |
| 3009 | |
| 3010 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ | |
| 3011 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ | |
| 3012 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ | |
| 3013 tmp0 += z1 + z2; | |
| 3014 tmp3 += z1 + z3; | |
| 3015 | |
| 3016 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ | |
| 3017 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ | |
| 3018 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ | |
| 3019 tmp1 += z1 + z3; | |
| 3020 tmp2 += z1 + z2; | |
| 3021 | |
| 3022 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 3023 | |
| 3024 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
| 3025 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
| 3026 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
| 3027 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
| 3028 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
| 3029 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
| 3030 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
| 3031 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
| 3032 | |
| 3033 inptr++; /* advance pointers to next column */ | |
| 3034 quantptr++; | |
| 3035 wsptr++; | |
| 3036 } | |
| 3037 | |
| 3038 /* Pass 2: process 8 rows from work array, store into output array. | |
| 3039 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). | |
| 3040 */ | |
| 3041 | |
| 3042 wsptr = workspace; | |
| 3043 for (ctr = 0; ctr < 8; ctr++) { | |
| 3044 outptr = output_buf[ctr] + output_col; | |
| 3045 | |
| 3046 /* Even part */ | |
| 3047 | |
| 3048 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 3049 tmp0 = (INT32) wsptr[0] + | |
| 3050 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 3051 (ONE << (PASS1_BITS+2))); | |
| 3052 tmp0 <<= CONST_BITS; | |
| 3053 | |
| 3054 z1 = (INT32) wsptr[4]; | |
| 3055 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ | |
| 3056 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ | |
| 3057 | |
| 3058 tmp10 = tmp0 + tmp1; | |
| 3059 tmp11 = tmp0 - tmp1; | |
| 3060 tmp12 = tmp0 + tmp2; | |
| 3061 tmp13 = tmp0 - tmp2; | |
| 3062 | |
| 3063 z1 = (INT32) wsptr[2]; | |
| 3064 z2 = (INT32) wsptr[6]; | |
| 3065 z3 = z1 - z2; | |
| 3066 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ | |
| 3067 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ | |
| 3068 | |
| 3069 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ | |
| 3070 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ | |
| 3071 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ | |
| 3072 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ | |
| 3073 | |
| 3074 tmp20 = tmp10 + tmp0; | |
| 3075 tmp27 = tmp10 - tmp0; | |
| 3076 tmp21 = tmp12 + tmp1; | |
| 3077 tmp26 = tmp12 - tmp1; | |
| 3078 tmp22 = tmp13 + tmp2; | |
| 3079 tmp25 = tmp13 - tmp2; | |
| 3080 tmp23 = tmp11 + tmp3; | |
| 3081 tmp24 = tmp11 - tmp3; | |
| 3082 | |
| 3083 /* Odd part */ | |
| 3084 | |
| 3085 z1 = (INT32) wsptr[1]; | |
| 3086 z2 = (INT32) wsptr[3]; | |
| 3087 z3 = (INT32) wsptr[5]; | |
| 3088 z4 = (INT32) wsptr[7]; | |
| 3089 | |
| 3090 tmp11 = z1 + z3; | |
| 3091 | |
| 3092 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ | |
| 3093 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ | |
| 3094 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ | |
| 3095 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ | |
| 3096 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ | |
| 3097 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ | |
| 3098 tmp0 = tmp1 + tmp2 + tmp3 - | |
| 3099 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ | |
| 3100 tmp13 = tmp10 + tmp11 + tmp12 - | |
| 3101 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ | |
| 3102 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ | |
| 3103 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ | |
| 3104 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ | |
| 3105 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ | |
| 3106 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ | |
| 3107 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ | |
| 3108 z2 += z4; | |
| 3109 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ | |
| 3110 tmp1 += z1; | |
| 3111 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ | |
| 3112 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ | |
| 3113 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ | |
| 3114 tmp12 += z2; | |
| 3115 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ | |
| 3116 tmp2 += z2; | |
| 3117 tmp3 += z2; | |
| 3118 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ | |
| 3119 tmp10 += z2; | |
| 3120 tmp11 += z2; | |
| 3121 | |
| 3122 /* Final output stage */ | |
| 3123 | |
| 3124 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, | |
| 3125 CONST_BITS+PASS1_BITS+3) | |
| 3126 & RANGE_MASK]; | |
| 3127 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, | |
| 3128 CONST_BITS+PASS1_BITS+3) | |
| 3129 & RANGE_MASK]; | |
| 3130 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, | |
| 3131 CONST_BITS+PASS1_BITS+3) | |
| 3132 & RANGE_MASK]; | |
| 3133 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, | |
| 3134 CONST_BITS+PASS1_BITS+3) | |
| 3135 & RANGE_MASK]; | |
| 3136 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, | |
| 3137 CONST_BITS+PASS1_BITS+3) | |
| 3138 & RANGE_MASK]; | |
| 3139 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, | |
| 3140 CONST_BITS+PASS1_BITS+3) | |
| 3141 & RANGE_MASK]; | |
| 3142 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, | |
| 3143 CONST_BITS+PASS1_BITS+3) | |
| 3144 & RANGE_MASK]; | |
| 3145 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, | |
| 3146 CONST_BITS+PASS1_BITS+3) | |
| 3147 & RANGE_MASK]; | |
| 3148 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, | |
| 3149 CONST_BITS+PASS1_BITS+3) | |
| 3150 & RANGE_MASK]; | |
| 3151 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, | |
| 3152 CONST_BITS+PASS1_BITS+3) | |
| 3153 & RANGE_MASK]; | |
| 3154 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, | |
| 3155 CONST_BITS+PASS1_BITS+3) | |
| 3156 & RANGE_MASK]; | |
| 3157 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, | |
| 3158 CONST_BITS+PASS1_BITS+3) | |
| 3159 & RANGE_MASK]; | |
| 3160 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, | |
| 3161 CONST_BITS+PASS1_BITS+3) | |
| 3162 & RANGE_MASK]; | |
| 3163 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, | |
| 3164 CONST_BITS+PASS1_BITS+3) | |
| 3165 & RANGE_MASK]; | |
| 3166 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, | |
| 3167 CONST_BITS+PASS1_BITS+3) | |
| 3168 & RANGE_MASK]; | |
| 3169 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, | |
| 3170 CONST_BITS+PASS1_BITS+3) | |
| 3171 & RANGE_MASK]; | |
| 3172 | |
| 3173 wsptr += 8; /* advance pointer to next row */ | |
| 3174 } | |
| 3175 } | |
| 3176 | |
| 3177 | |
| 3178 /* | |
| 3179 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 3180 * producing a 14x7 output block. | |
| 3181 * | |
| 3182 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows). | |
| 3183 */ | |
| 3184 | |
| 3185 GLOBAL(void) | |
| 3186 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 3187 JCOEFPTR coef_block, | |
| 3188 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 3189 { | |
| 3190 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; | |
| 3191 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; | |
| 3192 INT32 z1, z2, z3, z4; | |
| 3193 JCOEFPTR inptr; | |
| 3194 ISLOW_MULT_TYPE * quantptr; | |
| 3195 int * wsptr; | |
| 3196 JSAMPROW outptr; | |
| 3197 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 3198 int ctr; | |
| 3199 int workspace[8*7]; /* buffers data between passes */ | |
| 3200 SHIFT_TEMPS | |
| 3201 | |
| 3202 /* Pass 1: process columns from input, store into work array. | |
| 3203 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). | |
| 3204 */ | |
| 3205 | |
| 3206 inptr = coef_block; | |
| 3207 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 3208 wsptr = workspace; | |
| 3209 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 3210 /* Even part */ | |
| 3211 | |
| 3212 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 3213 if (ctr == 0) | |
| 3214 CLAMP_DC(tmp23); | |
| 3215 tmp23 <<= CONST_BITS; | |
| 3216 /* Add fudge factor here for final descale. */ | |
| 3217 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 3218 | |
| 3219 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 3220 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 3221 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 3222 | |
| 3223 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ | |
| 3224 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ | |
| 3225 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ | |
| 3226 tmp10 = z1 + z3; | |
| 3227 z2 -= tmp10; | |
| 3228 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ | |
| 3229 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ | |
| 3230 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ | |
| 3231 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ | |
| 3232 | |
| 3233 /* Odd part */ | |
| 3234 | |
| 3235 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 3236 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 3237 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 3238 | |
| 3239 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ | |
| 3240 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ | |
| 3241 tmp10 = tmp11 - tmp12; | |
| 3242 tmp11 += tmp12; | |
| 3243 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ | |
| 3244 tmp11 += tmp12; | |
| 3245 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ | |
| 3246 tmp10 += z2; | |
| 3247 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ | |
| 3248 | |
| 3249 /* Final output stage */ | |
| 3250 | |
| 3251 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 3252 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 3253 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 3254 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 3255 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 3256 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 3257 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS); | |
| 3258 } | |
| 3259 | |
| 3260 /* Pass 2: process 7 rows from work array, store into output array. | |
| 3261 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). | |
| 3262 */ | |
| 3263 | |
| 3264 wsptr = workspace; | |
| 3265 for (ctr = 0; ctr < 7; ctr++) { | |
| 3266 outptr = output_buf[ctr] + output_col; | |
| 3267 | |
| 3268 /* Even part */ | |
| 3269 | |
| 3270 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 3271 z1 = (INT32) wsptr[0] + | |
| 3272 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 3273 (ONE << (PASS1_BITS+2))); | |
| 3274 z1 <<= CONST_BITS; | |
| 3275 z4 = (INT32) wsptr[4]; | |
| 3276 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ | |
| 3277 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ | |
| 3278 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ | |
| 3279 | |
| 3280 tmp10 = z1 + z2; | |
| 3281 tmp11 = z1 + z3; | |
| 3282 tmp12 = z1 - z4; | |
| 3283 | |
| 3284 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ | |
| 3285 | |
| 3286 z1 = (INT32) wsptr[2]; | |
| 3287 z2 = (INT32) wsptr[6]; | |
| 3288 | |
| 3289 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ | |
| 3290 | |
| 3291 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ | |
| 3292 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ | |
| 3293 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ | |
| 3294 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ | |
| 3295 | |
| 3296 tmp20 = tmp10 + tmp13; | |
| 3297 tmp26 = tmp10 - tmp13; | |
| 3298 tmp21 = tmp11 + tmp14; | |
| 3299 tmp25 = tmp11 - tmp14; | |
| 3300 tmp22 = tmp12 + tmp15; | |
| 3301 tmp24 = tmp12 - tmp15; | |
| 3302 | |
| 3303 /* Odd part */ | |
| 3304 | |
| 3305 z1 = (INT32) wsptr[1]; | |
| 3306 z2 = (INT32) wsptr[3]; | |
| 3307 z3 = (INT32) wsptr[5]; | |
| 3308 z4 = (INT32) wsptr[7]; | |
| 3309 z4 <<= CONST_BITS; | |
| 3310 | |
| 3311 tmp14 = z1 + z3; | |
| 3312 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ | |
| 3313 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ | |
| 3314 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ | |
| 3315 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ | |
| 3316 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ | |
| 3317 z1 -= z2; | |
| 3318 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ | |
| 3319 tmp16 += tmp15; | |
| 3320 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ | |
| 3321 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ | |
| 3322 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ | |
| 3323 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ | |
| 3324 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ | |
| 3325 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ | |
| 3326 | |
| 3327 tmp13 = ((z1 - z3) << CONST_BITS) + z4; | |
| 3328 | |
| 3329 /* Final output stage */ | |
| 3330 | |
| 3331 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 3332 CONST_BITS+PASS1_BITS+3) | |
| 3333 & RANGE_MASK]; | |
| 3334 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 3335 CONST_BITS+PASS1_BITS+3) | |
| 3336 & RANGE_MASK]; | |
| 3337 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 3338 CONST_BITS+PASS1_BITS+3) | |
| 3339 & RANGE_MASK]; | |
| 3340 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 3341 CONST_BITS+PASS1_BITS+3) | |
| 3342 & RANGE_MASK]; | |
| 3343 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 3344 CONST_BITS+PASS1_BITS+3) | |
| 3345 & RANGE_MASK]; | |
| 3346 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 3347 CONST_BITS+PASS1_BITS+3) | |
| 3348 & RANGE_MASK]; | |
| 3349 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 3350 CONST_BITS+PASS1_BITS+3) | |
| 3351 & RANGE_MASK]; | |
| 3352 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 3353 CONST_BITS+PASS1_BITS+3) | |
| 3354 & RANGE_MASK]; | |
| 3355 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 3356 CONST_BITS+PASS1_BITS+3) | |
| 3357 & RANGE_MASK]; | |
| 3358 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 3359 CONST_BITS+PASS1_BITS+3) | |
| 3360 & RANGE_MASK]; | |
| 3361 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, | |
| 3362 CONST_BITS+PASS1_BITS+3) | |
| 3363 & RANGE_MASK]; | |
| 3364 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, | |
| 3365 CONST_BITS+PASS1_BITS+3) | |
| 3366 & RANGE_MASK]; | |
| 3367 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, | |
| 3368 CONST_BITS+PASS1_BITS+3) | |
| 3369 & RANGE_MASK]; | |
| 3370 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, | |
| 3371 CONST_BITS+PASS1_BITS+3) | |
| 3372 & RANGE_MASK]; | |
| 3373 | |
| 3374 wsptr += 8; /* advance pointer to next row */ | |
| 3375 } | |
| 3376 } | |
| 3377 | |
| 3378 | |
| 3379 /* | |
| 3380 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 3381 * producing a 12x6 output block. | |
| 3382 * | |
| 3383 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows). | |
| 3384 */ | |
| 3385 | |
| 3386 GLOBAL(void) | |
| 3387 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 3388 JCOEFPTR coef_block, | |
| 3389 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 3390 { | |
| 3391 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; | |
| 3392 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; | |
| 3393 INT32 z1, z2, z3, z4; | |
| 3394 JCOEFPTR inptr; | |
| 3395 ISLOW_MULT_TYPE * quantptr; | |
| 3396 int * wsptr; | |
| 3397 JSAMPROW outptr; | |
| 3398 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 3399 int ctr; | |
| 3400 int workspace[8*6]; /* buffers data between passes */ | |
| 3401 SHIFT_TEMPS | |
| 3402 | |
| 3403 /* Pass 1: process columns from input, store into work array. | |
| 3404 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). | |
| 3405 */ | |
| 3406 | |
| 3407 inptr = coef_block; | |
| 3408 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 3409 wsptr = workspace; | |
| 3410 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 3411 /* Even part */ | |
| 3412 | |
| 3413 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 3414 if (ctr == 0) | |
| 3415 CLAMP_DC(tmp10); | |
| 3416 tmp10 <<= CONST_BITS; | |
| 3417 /* Add fudge factor here for final descale. */ | |
| 3418 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 3419 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 3420 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ | |
| 3421 tmp11 = tmp10 + tmp20; | |
| 3422 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS); | |
| 3423 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 3424 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ | |
| 3425 tmp20 = tmp11 + tmp10; | |
| 3426 tmp22 = tmp11 - tmp10; | |
| 3427 | |
| 3428 /* Odd part */ | |
| 3429 | |
| 3430 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 3431 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 3432 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 3433 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ | |
| 3434 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); | |
| 3435 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); | |
| 3436 tmp11 = (z1 - z2 - z3) << PASS1_BITS; | |
| 3437 | |
| 3438 /* Final output stage */ | |
| 3439 | |
| 3440 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 3441 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 3442 wsptr[8*1] = (int) (tmp21 + tmp11); | |
| 3443 wsptr[8*4] = (int) (tmp21 - tmp11); | |
| 3444 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 3445 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 3446 } | |
| 3447 | |
| 3448 /* Pass 2: process 6 rows from work array, store into output array. | |
| 3449 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). | |
| 3450 */ | |
| 3451 | |
| 3452 wsptr = workspace; | |
| 3453 for (ctr = 0; ctr < 6; ctr++) { | |
| 3454 outptr = output_buf[ctr] + output_col; | |
| 3455 | |
| 3456 /* Even part */ | |
| 3457 | |
| 3458 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 3459 z3 = (INT32) wsptr[0] + | |
| 3460 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 3461 (ONE << (PASS1_BITS+2))); | |
| 3462 z3 <<= CONST_BITS; | |
| 3463 | |
| 3464 z4 = (INT32) wsptr[4]; | |
| 3465 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ | |
| 3466 | |
| 3467 tmp10 = z3 + z4; | |
| 3468 tmp11 = z3 - z4; | |
| 3469 | |
| 3470 z1 = (INT32) wsptr[2]; | |
| 3471 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ | |
| 3472 z1 <<= CONST_BITS; | |
| 3473 z2 = (INT32) wsptr[6]; | |
| 3474 z2 <<= CONST_BITS; | |
| 3475 | |
| 3476 tmp12 = z1 - z2; | |
| 3477 | |
| 3478 tmp21 = z3 + tmp12; | |
| 3479 tmp24 = z3 - tmp12; | |
| 3480 | |
| 3481 tmp12 = z4 + z2; | |
| 3482 | |
| 3483 tmp20 = tmp10 + tmp12; | |
| 3484 tmp25 = tmp10 - tmp12; | |
| 3485 | |
| 3486 tmp12 = z4 - z1 - z2; | |
| 3487 | |
| 3488 tmp22 = tmp11 + tmp12; | |
| 3489 tmp23 = tmp11 - tmp12; | |
| 3490 | |
| 3491 /* Odd part */ | |
| 3492 | |
| 3493 z1 = (INT32) wsptr[1]; | |
| 3494 z2 = (INT32) wsptr[3]; | |
| 3495 z3 = (INT32) wsptr[5]; | |
| 3496 z4 = (INT32) wsptr[7]; | |
| 3497 | |
| 3498 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ | |
| 3499 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ | |
| 3500 | |
| 3501 tmp10 = z1 + z3; | |
| 3502 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ | |
| 3503 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ | |
| 3504 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ | |
| 3505 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ | |
| 3506 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ | |
| 3507 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ | |
| 3508 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ | |
| 3509 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ | |
| 3510 | |
| 3511 z1 -= z4; | |
| 3512 z2 -= z3; | |
| 3513 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ | |
| 3514 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ | |
| 3515 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ | |
| 3516 | |
| 3517 /* Final output stage */ | |
| 3518 | |
| 3519 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 3520 CONST_BITS+PASS1_BITS+3) | |
| 3521 & RANGE_MASK]; | |
| 3522 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 3523 CONST_BITS+PASS1_BITS+3) | |
| 3524 & RANGE_MASK]; | |
| 3525 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 3526 CONST_BITS+PASS1_BITS+3) | |
| 3527 & RANGE_MASK]; | |
| 3528 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 3529 CONST_BITS+PASS1_BITS+3) | |
| 3530 & RANGE_MASK]; | |
| 3531 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 3532 CONST_BITS+PASS1_BITS+3) | |
| 3533 & RANGE_MASK]; | |
| 3534 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 3535 CONST_BITS+PASS1_BITS+3) | |
| 3536 & RANGE_MASK]; | |
| 3537 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 3538 CONST_BITS+PASS1_BITS+3) | |
| 3539 & RANGE_MASK]; | |
| 3540 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 3541 CONST_BITS+PASS1_BITS+3) | |
| 3542 & RANGE_MASK]; | |
| 3543 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 3544 CONST_BITS+PASS1_BITS+3) | |
| 3545 & RANGE_MASK]; | |
| 3546 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 3547 CONST_BITS+PASS1_BITS+3) | |
| 3548 & RANGE_MASK]; | |
| 3549 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, | |
| 3550 CONST_BITS+PASS1_BITS+3) | |
| 3551 & RANGE_MASK]; | |
| 3552 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, | |
| 3553 CONST_BITS+PASS1_BITS+3) | |
| 3554 & RANGE_MASK]; | |
| 3555 | |
| 3556 wsptr += 8; /* advance pointer to next row */ | |
| 3557 } | |
| 3558 } | |
| 3559 | |
| 3560 | |
| 3561 /* | |
| 3562 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 3563 * producing a 10x5 output block. | |
| 3564 * | |
| 3565 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows). | |
| 3566 */ | |
| 3567 | |
| 3568 GLOBAL(void) | |
| 3569 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 3570 JCOEFPTR coef_block, | |
| 3571 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 3572 { | |
| 3573 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; | |
| 3574 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; | |
| 3575 INT32 z1, z2, z3, z4; | |
| 3576 JCOEFPTR inptr; | |
| 3577 ISLOW_MULT_TYPE * quantptr; | |
| 3578 int * wsptr; | |
| 3579 JSAMPROW outptr; | |
| 3580 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 3581 int ctr; | |
| 3582 int workspace[8*5]; /* buffers data between passes */ | |
| 3583 SHIFT_TEMPS | |
| 3584 | |
| 3585 /* Pass 1: process columns from input, store into work array. | |
| 3586 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). | |
| 3587 */ | |
| 3588 | |
| 3589 inptr = coef_block; | |
| 3590 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 3591 wsptr = workspace; | |
| 3592 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 3593 /* Even part */ | |
| 3594 | |
| 3595 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 3596 if (ctr == 0) | |
| 3597 CLAMP_DC(tmp12); | |
| 3598 tmp12 <<= CONST_BITS; | |
| 3599 /* Add fudge factor here for final descale. */ | |
| 3600 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 3601 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 3602 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 3603 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ | |
| 3604 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ | |
| 3605 z3 = tmp12 + z2; | |
| 3606 tmp10 = z3 + z1; | |
| 3607 tmp11 = z3 - z1; | |
| 3608 tmp12 -= z2 << 2; | |
| 3609 | |
| 3610 /* Odd part */ | |
| 3611 | |
| 3612 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 3613 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 3614 | |
| 3615 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ | |
| 3616 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ | |
| 3617 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ | |
| 3618 | |
| 3619 /* Final output stage */ | |
| 3620 | |
| 3621 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS); | |
| 3622 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS); | |
| 3623 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS); | |
| 3624 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS); | |
| 3625 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); | |
| 3626 } | |
| 3627 | |
| 3628 /* Pass 2: process 5 rows from work array, store into output array. | |
| 3629 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). | |
| 3630 */ | |
| 3631 | |
| 3632 wsptr = workspace; | |
| 3633 for (ctr = 0; ctr < 5; ctr++) { | |
| 3634 outptr = output_buf[ctr] + output_col; | |
| 3635 | |
| 3636 /* Even part */ | |
| 3637 | |
| 3638 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 3639 z3 = (INT32) wsptr[0] + | |
| 3640 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 3641 (ONE << (PASS1_BITS+2))); | |
| 3642 z3 <<= CONST_BITS; | |
| 3643 z4 = (INT32) wsptr[4]; | |
| 3644 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ | |
| 3645 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ | |
| 3646 tmp10 = z3 + z1; | |
| 3647 tmp11 = z3 - z2; | |
| 3648 | |
| 3649 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ | |
| 3650 | |
| 3651 z2 = (INT32) wsptr[2]; | |
| 3652 z3 = (INT32) wsptr[6]; | |
| 3653 | |
| 3654 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ | |
| 3655 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ | |
| 3656 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ | |
| 3657 | |
| 3658 tmp20 = tmp10 + tmp12; | |
| 3659 tmp24 = tmp10 - tmp12; | |
| 3660 tmp21 = tmp11 + tmp13; | |
| 3661 tmp23 = tmp11 - tmp13; | |
| 3662 | |
| 3663 /* Odd part */ | |
| 3664 | |
| 3665 z1 = (INT32) wsptr[1]; | |
| 3666 z2 = (INT32) wsptr[3]; | |
| 3667 z3 = (INT32) wsptr[5]; | |
| 3668 z3 <<= CONST_BITS; | |
| 3669 z4 = (INT32) wsptr[7]; | |
| 3670 | |
| 3671 tmp11 = z2 + z4; | |
| 3672 tmp13 = z2 - z4; | |
| 3673 | |
| 3674 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ | |
| 3675 | |
| 3676 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ | |
| 3677 z4 = z3 + tmp12; | |
| 3678 | |
| 3679 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ | |
| 3680 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ | |
| 3681 | |
| 3682 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ | |
| 3683 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); | |
| 3684 | |
| 3685 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; | |
| 3686 | |
| 3687 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ | |
| 3688 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ | |
| 3689 | |
| 3690 /* Final output stage */ | |
| 3691 | |
| 3692 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 3693 CONST_BITS+PASS1_BITS+3) | |
| 3694 & RANGE_MASK]; | |
| 3695 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 3696 CONST_BITS+PASS1_BITS+3) | |
| 3697 & RANGE_MASK]; | |
| 3698 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 3699 CONST_BITS+PASS1_BITS+3) | |
| 3700 & RANGE_MASK]; | |
| 3701 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 3702 CONST_BITS+PASS1_BITS+3) | |
| 3703 & RANGE_MASK]; | |
| 3704 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 3705 CONST_BITS+PASS1_BITS+3) | |
| 3706 & RANGE_MASK]; | |
| 3707 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 3708 CONST_BITS+PASS1_BITS+3) | |
| 3709 & RANGE_MASK]; | |
| 3710 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, | |
| 3711 CONST_BITS+PASS1_BITS+3) | |
| 3712 & RANGE_MASK]; | |
| 3713 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, | |
| 3714 CONST_BITS+PASS1_BITS+3) | |
| 3715 & RANGE_MASK]; | |
| 3716 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, | |
| 3717 CONST_BITS+PASS1_BITS+3) | |
| 3718 & RANGE_MASK]; | |
| 3719 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, | |
| 3720 CONST_BITS+PASS1_BITS+3) | |
| 3721 & RANGE_MASK]; | |
| 3722 | |
| 3723 wsptr += 8; /* advance pointer to next row */ | |
| 3724 } | |
| 3725 } | |
| 3726 | |
| 3727 | |
| 3728 /* | |
| 3729 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 3730 * producing an 8x4 output block. | |
| 3731 * | |
| 3732 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). | |
| 3733 */ | |
| 3734 | |
| 3735 GLOBAL(void) | |
| 3736 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 3737 JCOEFPTR coef_block, | |
| 3738 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 3739 { | |
| 3740 INT32 tmp0, tmp1, tmp2, tmp3; | |
| 3741 INT32 tmp10, tmp11, tmp12, tmp13; | |
| 3742 INT32 z1, z2, z3; | |
| 3743 JCOEFPTR inptr; | |
| 3744 ISLOW_MULT_TYPE * quantptr; | |
| 3745 int * wsptr; | |
| 3746 JSAMPROW outptr; | |
| 3747 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 3748 int ctr; | |
| 3749 int workspace[8*4]; /* buffers data between passes */ | |
| 3750 SHIFT_TEMPS | |
| 3751 | |
| 3752 /* Pass 1: process columns from input, store into work array. | |
| 3753 * 4-point IDCT kernel, | |
| 3754 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. | |
| 3755 */ | |
| 3756 | |
| 3757 inptr = coef_block; | |
| 3758 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 3759 wsptr = workspace; | |
| 3760 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 3761 /* Even part */ | |
| 3762 | |
| 3763 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 3764 if (ctr == 0) | |
| 3765 CLAMP_DC(tmp0); | |
| 3766 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 3767 | |
| 3768 tmp10 = (tmp0 + tmp2) << PASS1_BITS; | |
| 3769 tmp12 = (tmp0 - tmp2) << PASS1_BITS; | |
| 3770 | |
| 3771 /* Odd part */ | |
| 3772 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ | |
| 3773 | |
| 3774 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 3775 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 3776 | |
| 3777 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 3778 /* Add fudge factor here for final descale. */ | |
| 3779 z1 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 3780 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ | |
| 3781 CONST_BITS-PASS1_BITS); | |
| 3782 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ | |
| 3783 CONST_BITS-PASS1_BITS); | |
| 3784 | |
| 3785 /* Final output stage */ | |
| 3786 | |
| 3787 wsptr[8*0] = (int) (tmp10 + tmp0); | |
| 3788 wsptr[8*3] = (int) (tmp10 - tmp0); | |
| 3789 wsptr[8*1] = (int) (tmp12 + tmp2); | |
| 3790 wsptr[8*2] = (int) (tmp12 - tmp2); | |
| 3791 } | |
| 3792 | |
| 3793 /* Pass 2: process rows from work array, store into output array. | |
| 3794 * Note that we must descale the results by a factor of 8 == 2**3, | |
| 3795 * and also undo the PASS1_BITS scaling. | |
| 3796 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | |
| 3797 */ | |
| 3798 | |
| 3799 wsptr = workspace; | |
| 3800 for (ctr = 0; ctr < 4; ctr++) { | |
| 3801 outptr = output_buf[ctr] + output_col; | |
| 3802 | |
| 3803 /* Even part: reverse the even part of the forward DCT. | |
| 3804 * The rotator is c(-6). | |
| 3805 */ | |
| 3806 | |
| 3807 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 3808 z2 = (INT32) wsptr[0] + | |
| 3809 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 3810 (ONE << (PASS1_BITS+2))); | |
| 3811 z3 = (INT32) wsptr[4]; | |
| 3812 | |
| 3813 tmp0 = (z2 + z3) << CONST_BITS; | |
| 3814 tmp1 = (z2 - z3) << CONST_BITS; | |
| 3815 | |
| 3816 z2 = (INT32) wsptr[2]; | |
| 3817 z3 = (INT32) wsptr[6]; | |
| 3818 | |
| 3819 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 3820 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 3821 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 3822 | |
| 3823 tmp10 = tmp0 + tmp2; | |
| 3824 tmp13 = tmp0 - tmp2; | |
| 3825 tmp11 = tmp1 + tmp3; | |
| 3826 tmp12 = tmp1 - tmp3; | |
| 3827 | |
| 3828 /* Odd part per figure 8; the matrix is unitary and hence its | |
| 3829 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
| 3830 */ | |
| 3831 | |
| 3832 tmp0 = (INT32) wsptr[7]; | |
| 3833 tmp1 = (INT32) wsptr[5]; | |
| 3834 tmp2 = (INT32) wsptr[3]; | |
| 3835 tmp3 = (INT32) wsptr[1]; | |
| 3836 | |
| 3837 z2 = tmp0 + tmp2; | |
| 3838 z3 = tmp1 + tmp3; | |
| 3839 | |
| 3840 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ | |
| 3841 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ | |
| 3842 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ | |
| 3843 z2 += z1; | |
| 3844 z3 += z1; | |
| 3845 | |
| 3846 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ | |
| 3847 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ | |
| 3848 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ | |
| 3849 tmp0 += z1 + z2; | |
| 3850 tmp3 += z1 + z3; | |
| 3851 | |
| 3852 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ | |
| 3853 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ | |
| 3854 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ | |
| 3855 tmp1 += z1 + z3; | |
| 3856 tmp2 += z1 + z2; | |
| 3857 | |
| 3858 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 3859 | |
| 3860 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, | |
| 3861 CONST_BITS+PASS1_BITS+3) | |
| 3862 & RANGE_MASK]; | |
| 3863 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, | |
| 3864 CONST_BITS+PASS1_BITS+3) | |
| 3865 & RANGE_MASK]; | |
| 3866 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, | |
| 3867 CONST_BITS+PASS1_BITS+3) | |
| 3868 & RANGE_MASK]; | |
| 3869 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, | |
| 3870 CONST_BITS+PASS1_BITS+3) | |
| 3871 & RANGE_MASK]; | |
| 3872 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, | |
| 3873 CONST_BITS+PASS1_BITS+3) | |
| 3874 & RANGE_MASK]; | |
| 3875 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, | |
| 3876 CONST_BITS+PASS1_BITS+3) | |
| 3877 & RANGE_MASK]; | |
| 3878 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, | |
| 3879 CONST_BITS+PASS1_BITS+3) | |
| 3880 & RANGE_MASK]; | |
| 3881 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, | |
| 3882 CONST_BITS+PASS1_BITS+3) | |
| 3883 & RANGE_MASK]; | |
| 3884 | |
| 3885 wsptr += DCTSIZE; /* advance pointer to next row */ | |
| 3886 } | |
| 3887 } | |
| 3888 | |
| 3889 | |
| 3890 /* | |
| 3891 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 3892 * producing a 6x3 output block. | |
| 3893 * | |
| 3894 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). | |
| 3895 */ | |
| 3896 | |
| 3897 GLOBAL(void) | |
| 3898 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 3899 JCOEFPTR coef_block, | |
| 3900 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 3901 { | |
| 3902 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; | |
| 3903 INT32 z1, z2, z3; | |
| 3904 JCOEFPTR inptr; | |
| 3905 ISLOW_MULT_TYPE * quantptr; | |
| 3906 int * wsptr; | |
| 3907 JSAMPROW outptr; | |
| 3908 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 3909 int ctr; | |
| 3910 int workspace[6*3]; /* buffers data between passes */ | |
| 3911 SHIFT_TEMPS | |
| 3912 | |
| 3913 /* Pass 1: process columns from input, store into work array. | |
| 3914 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). | |
| 3915 */ | |
| 3916 | |
| 3917 inptr = coef_block; | |
| 3918 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 3919 wsptr = workspace; | |
| 3920 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { | |
| 3921 /* Even part */ | |
| 3922 | |
| 3923 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 3924 if (ctr == 0) | |
| 3925 CLAMP_DC(tmp0); | |
| 3926 tmp0 <<= CONST_BITS; | |
| 3927 /* Add fudge factor here for final descale. */ | |
| 3928 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 3929 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 3930 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ | |
| 3931 tmp10 = tmp0 + tmp12; | |
| 3932 tmp2 = tmp0 - tmp12 - tmp12; | |
| 3933 | |
| 3934 /* Odd part */ | |
| 3935 | |
| 3936 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 3937 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ | |
| 3938 | |
| 3939 /* Final output stage */ | |
| 3940 | |
| 3941 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); | |
| 3942 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); | |
| 3943 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); | |
| 3944 } | |
| 3945 | |
| 3946 /* Pass 2: process 3 rows from work array, store into output array. | |
| 3947 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). | |
| 3948 */ | |
| 3949 | |
| 3950 wsptr = workspace; | |
| 3951 for (ctr = 0; ctr < 3; ctr++) { | |
| 3952 outptr = output_buf[ctr] + output_col; | |
| 3953 | |
| 3954 /* Even part */ | |
| 3955 | |
| 3956 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 3957 tmp0 = (INT32) wsptr[0] + | |
| 3958 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 3959 (ONE << (PASS1_BITS+2))); | |
| 3960 tmp0 <<= CONST_BITS; | |
| 3961 tmp2 = (INT32) wsptr[4]; | |
| 3962 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ | |
| 3963 tmp1 = tmp0 + tmp10; | |
| 3964 tmp11 = tmp0 - tmp10 - tmp10; | |
| 3965 tmp10 = (INT32) wsptr[2]; | |
| 3966 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ | |
| 3967 tmp10 = tmp1 + tmp0; | |
| 3968 tmp12 = tmp1 - tmp0; | |
| 3969 | |
| 3970 /* Odd part */ | |
| 3971 | |
| 3972 z1 = (INT32) wsptr[1]; | |
| 3973 z2 = (INT32) wsptr[3]; | |
| 3974 z3 = (INT32) wsptr[5]; | |
| 3975 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ | |
| 3976 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); | |
| 3977 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); | |
| 3978 tmp1 = (z1 - z2 - z3) << CONST_BITS; | |
| 3979 | |
| 3980 /* Final output stage */ | |
| 3981 | |
| 3982 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 3983 CONST_BITS+PASS1_BITS+3) | |
| 3984 & RANGE_MASK]; | |
| 3985 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 3986 CONST_BITS+PASS1_BITS+3) | |
| 3987 & RANGE_MASK]; | |
| 3988 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, | |
| 3989 CONST_BITS+PASS1_BITS+3) | |
| 3990 & RANGE_MASK]; | |
| 3991 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, | |
| 3992 CONST_BITS+PASS1_BITS+3) | |
| 3993 & RANGE_MASK]; | |
| 3994 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, | |
| 3995 CONST_BITS+PASS1_BITS+3) | |
| 3996 & RANGE_MASK]; | |
| 3997 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, | |
| 3998 CONST_BITS+PASS1_BITS+3) | |
| 3999 & RANGE_MASK]; | |
| 4000 | |
| 4001 wsptr += 6; /* advance pointer to next row */ | |
| 4002 } | |
| 4003 } | |
| 4004 | |
| 4005 | |
| 4006 /* | |
| 4007 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 4008 * producing a 4x2 output block. | |
| 4009 * | |
| 4010 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). | |
| 4011 */ | |
| 4012 | |
| 4013 GLOBAL(void) | |
| 4014 jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 4015 JCOEFPTR coef_block, | |
| 4016 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 4017 { | |
| 4018 INT32 tmp0, tmp2, tmp10, tmp12; | |
| 4019 INT32 z1, z2, z3; | |
| 4020 JCOEFPTR inptr; | |
| 4021 ISLOW_MULT_TYPE * quantptr; | |
| 4022 INT32 * wsptr; | |
| 4023 JSAMPROW outptr; | |
| 4024 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 4025 int ctr; | |
| 4026 INT32 workspace[4*2]; /* buffers data between passes */ | |
| 4027 SHIFT_TEMPS | |
| 4028 | |
| 4029 /* Pass 1: process columns from input, store into work array. */ | |
| 4030 | |
| 4031 inptr = coef_block; | |
| 4032 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 4033 wsptr = workspace; | |
| 4034 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { | |
| 4035 /* Even part */ | |
| 4036 | |
| 4037 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 4038 if (ctr == 0) | |
| 4039 CLAMP_DC(tmp10); | |
| 4040 | |
| 4041 /* Odd part */ | |
| 4042 | |
| 4043 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 4044 | |
| 4045 /* Final output stage */ | |
| 4046 | |
| 4047 wsptr[4*0] = tmp10 + tmp0; | |
| 4048 wsptr[4*1] = tmp10 - tmp0; | |
| 4049 } | |
| 4050 | |
| 4051 /* Pass 2: process 2 rows from work array, store into output array. | |
| 4052 * 4-point IDCT kernel, | |
| 4053 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. | |
| 4054 */ | |
| 4055 | |
| 4056 wsptr = workspace; | |
| 4057 for (ctr = 0; ctr < 2; ctr++) { | |
| 4058 outptr = output_buf[ctr] + output_col; | |
| 4059 | |
| 4060 /* Even part */ | |
| 4061 | |
| 4062 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 4063 tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2)); | |
| 4064 tmp2 = wsptr[2]; | |
| 4065 | |
| 4066 tmp10 = (tmp0 + tmp2) << CONST_BITS; | |
| 4067 tmp12 = (tmp0 - tmp2) << CONST_BITS; | |
| 4068 | |
| 4069 /* Odd part */ | |
| 4070 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ | |
| 4071 | |
| 4072 z2 = wsptr[1]; | |
| 4073 z3 = wsptr[3]; | |
| 4074 | |
| 4075 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 4076 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 4077 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 4078 | |
| 4079 /* Final output stage */ | |
| 4080 | |
| 4081 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 4082 CONST_BITS+3) | |
| 4083 & RANGE_MASK]; | |
| 4084 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 4085 CONST_BITS+3) | |
| 4086 & RANGE_MASK]; | |
| 4087 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, | |
| 4088 CONST_BITS+3) | |
| 4089 & RANGE_MASK]; | |
| 4090 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, | |
| 4091 CONST_BITS+3) | |
| 4092 & RANGE_MASK]; | |
| 4093 | |
| 4094 wsptr += 4; /* advance pointer to next row */ | |
| 4095 } | |
| 4096 } | |
| 4097 | |
| 4098 | |
| 4099 /* | |
| 4100 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 4101 * producing a 2x1 output block. | |
| 4102 * | |
| 4103 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). | |
| 4104 */ | |
| 4105 | |
| 4106 GLOBAL(void) | |
| 4107 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 4108 JCOEFPTR coef_block, | |
| 4109 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 4110 { | |
| 4111 DCTELEM tmp0, tmp1; | |
| 4112 ISLOW_MULT_TYPE * quantptr; | |
| 4113 JSAMPROW outptr; | |
| 4114 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 4115 ISHIFT_TEMPS | |
| 4116 | |
| 4117 /* Pass 1: empty. */ | |
| 4118 | |
| 4119 /* Pass 2: process 1 row from input, store into output array. */ | |
| 4120 | |
| 4121 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 4122 outptr = output_buf[0] + output_col; | |
| 4123 | |
| 4124 /* Even part */ | |
| 4125 | |
| 4126 tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]); | |
| 4127 CLAMP_DC(tmp0); | |
| 4128 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 4129 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); | |
| 4130 | |
| 4131 /* Odd part */ | |
| 4132 | |
| 4133 tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]); | |
| 4134 | |
| 4135 /* Final output stage */ | |
| 4136 | |
| 4137 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; | |
| 4138 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; | |
| 4139 } | |
| 4140 | |
| 4141 | |
| 4142 /* | |
| 4143 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 4144 * producing an 8x16 output block. | |
| 4145 * | |
| 4146 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). | |
| 4147 */ | |
| 4148 | |
| 4149 GLOBAL(void) | |
| 4150 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 4151 JCOEFPTR coef_block, | |
| 4152 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 4153 { | |
| 4154 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; | |
| 4155 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; | |
| 4156 INT32 z1, z2, z3, z4; | |
| 4157 JCOEFPTR inptr; | |
| 4158 ISLOW_MULT_TYPE * quantptr; | |
| 4159 int * wsptr; | |
| 4160 JSAMPROW outptr; | |
| 4161 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 4162 int ctr; | |
| 4163 int workspace[8*16]; /* buffers data between passes */ | |
| 4164 SHIFT_TEMPS | |
| 4165 | |
| 4166 /* Pass 1: process columns from input, store into work array. | |
| 4167 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). | |
| 4168 */ | |
| 4169 | |
| 4170 inptr = coef_block; | |
| 4171 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 4172 wsptr = workspace; | |
| 4173 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { | |
| 4174 /* Even part */ | |
| 4175 | |
| 4176 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 4177 if (ctr == 0) | |
| 4178 CLAMP_DC(tmp0); | |
| 4179 tmp0 <<= CONST_BITS; | |
| 4180 /* Add fudge factor here for final descale. */ | |
| 4181 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 4182 | |
| 4183 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 4184 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ | |
| 4185 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ | |
| 4186 | |
| 4187 tmp10 = tmp0 + tmp1; | |
| 4188 tmp11 = tmp0 - tmp1; | |
| 4189 tmp12 = tmp0 + tmp2; | |
| 4190 tmp13 = tmp0 - tmp2; | |
| 4191 | |
| 4192 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 4193 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 4194 z3 = z1 - z2; | |
| 4195 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ | |
| 4196 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ | |
| 4197 | |
| 4198 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ | |
| 4199 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ | |
| 4200 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ | |
| 4201 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ | |
| 4202 | |
| 4203 tmp20 = tmp10 + tmp0; | |
| 4204 tmp27 = tmp10 - tmp0; | |
| 4205 tmp21 = tmp12 + tmp1; | |
| 4206 tmp26 = tmp12 - tmp1; | |
| 4207 tmp22 = tmp13 + tmp2; | |
| 4208 tmp25 = tmp13 - tmp2; | |
| 4209 tmp23 = tmp11 + tmp3; | |
| 4210 tmp24 = tmp11 - tmp3; | |
| 4211 | |
| 4212 /* Odd part */ | |
| 4213 | |
| 4214 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 4215 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 4216 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 4217 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 4218 | |
| 4219 tmp11 = z1 + z3; | |
| 4220 | |
| 4221 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ | |
| 4222 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ | |
| 4223 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ | |
| 4224 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ | |
| 4225 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ | |
| 4226 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ | |
| 4227 tmp0 = tmp1 + tmp2 + tmp3 - | |
| 4228 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ | |
| 4229 tmp13 = tmp10 + tmp11 + tmp12 - | |
| 4230 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ | |
| 4231 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ | |
| 4232 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ | |
| 4233 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ | |
| 4234 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ | |
| 4235 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ | |
| 4236 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ | |
| 4237 z2 += z4; | |
| 4238 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ | |
| 4239 tmp1 += z1; | |
| 4240 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ | |
| 4241 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ | |
| 4242 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ | |
| 4243 tmp12 += z2; | |
| 4244 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ | |
| 4245 tmp2 += z2; | |
| 4246 tmp3 += z2; | |
| 4247 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ | |
| 4248 tmp10 += z2; | |
| 4249 tmp11 += z2; | |
| 4250 | |
| 4251 /* Final output stage */ | |
| 4252 | |
| 4253 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); | |
| 4254 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); | |
| 4255 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); | |
| 4256 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); | |
| 4257 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); | |
| 4258 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); | |
| 4259 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); | |
| 4260 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); | |
| 4261 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); | |
| 4262 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); | |
| 4263 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); | |
| 4264 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); | |
| 4265 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); | |
| 4266 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); | |
| 4267 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); | |
| 4268 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); | |
| 4269 } | |
| 4270 | |
| 4271 /* Pass 2: process rows from work array, store into output array. | |
| 4272 * Note that we must descale the results by a factor of 8 == 2**3, | |
| 4273 * and also undo the PASS1_BITS scaling. | |
| 4274 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | |
| 4275 */ | |
| 4276 | |
| 4277 wsptr = workspace; | |
| 4278 for (ctr = 0; ctr < 16; ctr++) { | |
| 4279 outptr = output_buf[ctr] + output_col; | |
| 4280 | |
| 4281 /* Even part: reverse the even part of the forward DCT. | |
| 4282 * The rotator is c(-6). | |
| 4283 */ | |
| 4284 | |
| 4285 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 4286 z2 = (INT32) wsptr[0] + | |
| 4287 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 4288 (ONE << (PASS1_BITS+2))); | |
| 4289 z3 = (INT32) wsptr[4]; | |
| 4290 | |
| 4291 tmp0 = (z2 + z3) << CONST_BITS; | |
| 4292 tmp1 = (z2 - z3) << CONST_BITS; | |
| 4293 | |
| 4294 z2 = (INT32) wsptr[2]; | |
| 4295 z3 = (INT32) wsptr[6]; | |
| 4296 | |
| 4297 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 4298 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 4299 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 4300 | |
| 4301 tmp10 = tmp0 + tmp2; | |
| 4302 tmp13 = tmp0 - tmp2; | |
| 4303 tmp11 = tmp1 + tmp3; | |
| 4304 tmp12 = tmp1 - tmp3; | |
| 4305 | |
| 4306 /* Odd part per figure 8; the matrix is unitary and hence its | |
| 4307 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
| 4308 */ | |
| 4309 | |
| 4310 tmp0 = (INT32) wsptr[7]; | |
| 4311 tmp1 = (INT32) wsptr[5]; | |
| 4312 tmp2 = (INT32) wsptr[3]; | |
| 4313 tmp3 = (INT32) wsptr[1]; | |
| 4314 | |
| 4315 z2 = tmp0 + tmp2; | |
| 4316 z3 = tmp1 + tmp3; | |
| 4317 | |
| 4318 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ | |
| 4319 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ | |
| 4320 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ | |
| 4321 z2 += z1; | |
| 4322 z3 += z1; | |
| 4323 | |
| 4324 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ | |
| 4325 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ | |
| 4326 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ | |
| 4327 tmp0 += z1 + z2; | |
| 4328 tmp3 += z1 + z3; | |
| 4329 | |
| 4330 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ | |
| 4331 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ | |
| 4332 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ | |
| 4333 tmp1 += z1 + z3; | |
| 4334 tmp2 += z1 + z2; | |
| 4335 | |
| 4336 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 4337 | |
| 4338 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, | |
| 4339 CONST_BITS+PASS1_BITS+3) | |
| 4340 & RANGE_MASK]; | |
| 4341 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, | |
| 4342 CONST_BITS+PASS1_BITS+3) | |
| 4343 & RANGE_MASK]; | |
| 4344 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, | |
| 4345 CONST_BITS+PASS1_BITS+3) | |
| 4346 & RANGE_MASK]; | |
| 4347 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, | |
| 4348 CONST_BITS+PASS1_BITS+3) | |
| 4349 & RANGE_MASK]; | |
| 4350 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, | |
| 4351 CONST_BITS+PASS1_BITS+3) | |
| 4352 & RANGE_MASK]; | |
| 4353 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, | |
| 4354 CONST_BITS+PASS1_BITS+3) | |
| 4355 & RANGE_MASK]; | |
| 4356 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, | |
| 4357 CONST_BITS+PASS1_BITS+3) | |
| 4358 & RANGE_MASK]; | |
| 4359 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, | |
| 4360 CONST_BITS+PASS1_BITS+3) | |
| 4361 & RANGE_MASK]; | |
| 4362 | |
| 4363 wsptr += DCTSIZE; /* advance pointer to next row */ | |
| 4364 } | |
| 4365 } | |
| 4366 | |
| 4367 | |
| 4368 /* | |
| 4369 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 4370 * producing a 7x14 output block. | |
| 4371 * | |
| 4372 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows). | |
| 4373 */ | |
| 4374 | |
| 4375 GLOBAL(void) | |
| 4376 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 4377 JCOEFPTR coef_block, | |
| 4378 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 4379 { | |
| 4380 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; | |
| 4381 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; | |
| 4382 INT32 z1, z2, z3, z4; | |
| 4383 JCOEFPTR inptr; | |
| 4384 ISLOW_MULT_TYPE * quantptr; | |
| 4385 int * wsptr; | |
| 4386 JSAMPROW outptr; | |
| 4387 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 4388 int ctr; | |
| 4389 int workspace[7*14]; /* buffers data between passes */ | |
| 4390 SHIFT_TEMPS | |
| 4391 | |
| 4392 /* Pass 1: process columns from input, store into work array. | |
| 4393 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). | |
| 4394 */ | |
| 4395 | |
| 4396 inptr = coef_block; | |
| 4397 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 4398 wsptr = workspace; | |
| 4399 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { | |
| 4400 /* Even part */ | |
| 4401 | |
| 4402 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 4403 if (ctr == 0) | |
| 4404 CLAMP_DC(z1); | |
| 4405 z1 <<= CONST_BITS; | |
| 4406 /* Add fudge factor here for final descale. */ | |
| 4407 z1 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 4408 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 4409 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ | |
| 4410 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ | |
| 4411 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ | |
| 4412 | |
| 4413 tmp10 = z1 + z2; | |
| 4414 tmp11 = z1 + z3; | |
| 4415 tmp12 = z1 - z4; | |
| 4416 | |
| 4417 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ | |
| 4418 CONST_BITS-PASS1_BITS); | |
| 4419 | |
| 4420 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 4421 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 4422 | |
| 4423 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ | |
| 4424 | |
| 4425 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ | |
| 4426 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ | |
| 4427 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ | |
| 4428 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ | |
| 4429 | |
| 4430 tmp20 = tmp10 + tmp13; | |
| 4431 tmp26 = tmp10 - tmp13; | |
| 4432 tmp21 = tmp11 + tmp14; | |
| 4433 tmp25 = tmp11 - tmp14; | |
| 4434 tmp22 = tmp12 + tmp15; | |
| 4435 tmp24 = tmp12 - tmp15; | |
| 4436 | |
| 4437 /* Odd part */ | |
| 4438 | |
| 4439 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 4440 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 4441 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 4442 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 4443 tmp13 = z4 << CONST_BITS; | |
| 4444 | |
| 4445 tmp14 = z1 + z3; | |
| 4446 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ | |
| 4447 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ | |
| 4448 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ | |
| 4449 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ | |
| 4450 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ | |
| 4451 z1 -= z2; | |
| 4452 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ | |
| 4453 tmp16 += tmp15; | |
| 4454 z1 += z4; | |
| 4455 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ | |
| 4456 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ | |
| 4457 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ | |
| 4458 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ | |
| 4459 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ | |
| 4460 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ | |
| 4461 | |
| 4462 tmp13 = (z1 - z3) << PASS1_BITS; | |
| 4463 | |
| 4464 /* Final output stage */ | |
| 4465 | |
| 4466 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 4467 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 4468 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 4469 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 4470 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 4471 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 4472 wsptr[7*3] = (int) (tmp23 + tmp13); | |
| 4473 wsptr[7*10] = (int) (tmp23 - tmp13); | |
| 4474 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 4475 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 4476 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); | |
| 4477 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); | |
| 4478 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); | |
| 4479 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); | |
| 4480 } | |
| 4481 | |
| 4482 /* Pass 2: process 14 rows from work array, store into output array. | |
| 4483 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). | |
| 4484 */ | |
| 4485 | |
| 4486 wsptr = workspace; | |
| 4487 for (ctr = 0; ctr < 14; ctr++) { | |
| 4488 outptr = output_buf[ctr] + output_col; | |
| 4489 | |
| 4490 /* Even part */ | |
| 4491 | |
| 4492 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 4493 tmp23 = (INT32) wsptr[0] + | |
| 4494 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 4495 (ONE << (PASS1_BITS+2))); | |
| 4496 tmp23 <<= CONST_BITS; | |
| 4497 | |
| 4498 z1 = (INT32) wsptr[2]; | |
| 4499 z2 = (INT32) wsptr[4]; | |
| 4500 z3 = (INT32) wsptr[6]; | |
| 4501 | |
| 4502 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ | |
| 4503 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ | |
| 4504 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ | |
| 4505 tmp10 = z1 + z3; | |
| 4506 z2 -= tmp10; | |
| 4507 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ | |
| 4508 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ | |
| 4509 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ | |
| 4510 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ | |
| 4511 | |
| 4512 /* Odd part */ | |
| 4513 | |
| 4514 z1 = (INT32) wsptr[1]; | |
| 4515 z2 = (INT32) wsptr[3]; | |
| 4516 z3 = (INT32) wsptr[5]; | |
| 4517 | |
| 4518 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ | |
| 4519 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ | |
| 4520 tmp10 = tmp11 - tmp12; | |
| 4521 tmp11 += tmp12; | |
| 4522 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ | |
| 4523 tmp11 += tmp12; | |
| 4524 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ | |
| 4525 tmp10 += z2; | |
| 4526 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ | |
| 4527 | |
| 4528 /* Final output stage */ | |
| 4529 | |
| 4530 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 4531 CONST_BITS+PASS1_BITS+3) | |
| 4532 & RANGE_MASK]; | |
| 4533 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 4534 CONST_BITS+PASS1_BITS+3) | |
| 4535 & RANGE_MASK]; | |
| 4536 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 4537 CONST_BITS+PASS1_BITS+3) | |
| 4538 & RANGE_MASK]; | |
| 4539 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 4540 CONST_BITS+PASS1_BITS+3) | |
| 4541 & RANGE_MASK]; | |
| 4542 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 4543 CONST_BITS+PASS1_BITS+3) | |
| 4544 & RANGE_MASK]; | |
| 4545 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 4546 CONST_BITS+PASS1_BITS+3) | |
| 4547 & RANGE_MASK]; | |
| 4548 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23, | |
| 4549 CONST_BITS+PASS1_BITS+3) | |
| 4550 & RANGE_MASK]; | |
| 4551 | |
| 4552 wsptr += 7; /* advance pointer to next row */ | |
| 4553 } | |
| 4554 } | |
| 4555 | |
| 4556 | |
| 4557 /* | |
| 4558 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 4559 * producing a 6x12 output block. | |
| 4560 * | |
| 4561 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). | |
| 4562 */ | |
| 4563 | |
| 4564 GLOBAL(void) | |
| 4565 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 4566 JCOEFPTR coef_block, | |
| 4567 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 4568 { | |
| 4569 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; | |
| 4570 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; | |
| 4571 INT32 z1, z2, z3, z4; | |
| 4572 JCOEFPTR inptr; | |
| 4573 ISLOW_MULT_TYPE * quantptr; | |
| 4574 int * wsptr; | |
| 4575 JSAMPROW outptr; | |
| 4576 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 4577 int ctr; | |
| 4578 int workspace[6*12]; /* buffers data between passes */ | |
| 4579 SHIFT_TEMPS | |
| 4580 | |
| 4581 /* Pass 1: process columns from input, store into work array. | |
| 4582 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). | |
| 4583 */ | |
| 4584 | |
| 4585 inptr = coef_block; | |
| 4586 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 4587 wsptr = workspace; | |
| 4588 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { | |
| 4589 /* Even part */ | |
| 4590 | |
| 4591 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 4592 if (ctr == 0) | |
| 4593 CLAMP_DC(z3); | |
| 4594 z3 <<= CONST_BITS; | |
| 4595 /* Add fudge factor here for final descale. */ | |
| 4596 z3 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 4597 | |
| 4598 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 4599 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ | |
| 4600 | |
| 4601 tmp10 = z3 + z4; | |
| 4602 tmp11 = z3 - z4; | |
| 4603 | |
| 4604 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 4605 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ | |
| 4606 z1 <<= CONST_BITS; | |
| 4607 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 4608 z2 <<= CONST_BITS; | |
| 4609 | |
| 4610 tmp12 = z1 - z2; | |
| 4611 | |
| 4612 tmp21 = z3 + tmp12; | |
| 4613 tmp24 = z3 - tmp12; | |
| 4614 | |
| 4615 tmp12 = z4 + z2; | |
| 4616 | |
| 4617 tmp20 = tmp10 + tmp12; | |
| 4618 tmp25 = tmp10 - tmp12; | |
| 4619 | |
| 4620 tmp12 = z4 - z1 - z2; | |
| 4621 | |
| 4622 tmp22 = tmp11 + tmp12; | |
| 4623 tmp23 = tmp11 - tmp12; | |
| 4624 | |
| 4625 /* Odd part */ | |
| 4626 | |
| 4627 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 4628 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 4629 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 4630 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 4631 | |
| 4632 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ | |
| 4633 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ | |
| 4634 | |
| 4635 tmp10 = z1 + z3; | |
| 4636 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ | |
| 4637 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ | |
| 4638 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ | |
| 4639 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ | |
| 4640 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ | |
| 4641 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ | |
| 4642 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ | |
| 4643 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ | |
| 4644 | |
| 4645 z1 -= z4; | |
| 4646 z2 -= z3; | |
| 4647 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ | |
| 4648 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ | |
| 4649 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ | |
| 4650 | |
| 4651 /* Final output stage */ | |
| 4652 | |
| 4653 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 4654 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 4655 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 4656 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 4657 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); | |
| 4658 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); | |
| 4659 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); | |
| 4660 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); | |
| 4661 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 4662 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 4663 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); | |
| 4664 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); | |
| 4665 } | |
| 4666 | |
| 4667 /* Pass 2: process 12 rows from work array, store into output array. | |
| 4668 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). | |
| 4669 */ | |
| 4670 | |
| 4671 wsptr = workspace; | |
| 4672 for (ctr = 0; ctr < 12; ctr++) { | |
| 4673 outptr = output_buf[ctr] + output_col; | |
| 4674 | |
| 4675 /* Even part */ | |
| 4676 | |
| 4677 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 4678 tmp10 = (INT32) wsptr[0] + | |
| 4679 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 4680 (ONE << (PASS1_BITS+2))); | |
| 4681 tmp10 <<= CONST_BITS; | |
| 4682 tmp12 = (INT32) wsptr[4]; | |
| 4683 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ | |
| 4684 tmp11 = tmp10 + tmp20; | |
| 4685 tmp21 = tmp10 - tmp20 - tmp20; | |
| 4686 tmp20 = (INT32) wsptr[2]; | |
| 4687 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ | |
| 4688 tmp20 = tmp11 + tmp10; | |
| 4689 tmp22 = tmp11 - tmp10; | |
| 4690 | |
| 4691 /* Odd part */ | |
| 4692 | |
| 4693 z1 = (INT32) wsptr[1]; | |
| 4694 z2 = (INT32) wsptr[3]; | |
| 4695 z3 = (INT32) wsptr[5]; | |
| 4696 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ | |
| 4697 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); | |
| 4698 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); | |
| 4699 tmp11 = (z1 - z2 - z3) << CONST_BITS; | |
| 4700 | |
| 4701 /* Final output stage */ | |
| 4702 | |
| 4703 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, | |
| 4704 CONST_BITS+PASS1_BITS+3) | |
| 4705 & RANGE_MASK]; | |
| 4706 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, | |
| 4707 CONST_BITS+PASS1_BITS+3) | |
| 4708 & RANGE_MASK]; | |
| 4709 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, | |
| 4710 CONST_BITS+PASS1_BITS+3) | |
| 4711 & RANGE_MASK]; | |
| 4712 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, | |
| 4713 CONST_BITS+PASS1_BITS+3) | |
| 4714 & RANGE_MASK]; | |
| 4715 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, | |
| 4716 CONST_BITS+PASS1_BITS+3) | |
| 4717 & RANGE_MASK]; | |
| 4718 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, | |
| 4719 CONST_BITS+PASS1_BITS+3) | |
| 4720 & RANGE_MASK]; | |
| 4721 | |
| 4722 wsptr += 6; /* advance pointer to next row */ | |
| 4723 } | |
| 4724 } | |
| 4725 | |
| 4726 | |
| 4727 /* | |
| 4728 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 4729 * producing a 5x10 output block. | |
| 4730 * | |
| 4731 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows). | |
| 4732 */ | |
| 4733 | |
| 4734 GLOBAL(void) | |
| 4735 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 4736 JCOEFPTR coef_block, | |
| 4737 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 4738 { | |
| 4739 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; | |
| 4740 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; | |
| 4741 INT32 z1, z2, z3, z4, z5; | |
| 4742 JCOEFPTR inptr; | |
| 4743 ISLOW_MULT_TYPE * quantptr; | |
| 4744 int * wsptr; | |
| 4745 JSAMPROW outptr; | |
| 4746 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 4747 int ctr; | |
| 4748 int workspace[5*10]; /* buffers data between passes */ | |
| 4749 SHIFT_TEMPS | |
| 4750 | |
| 4751 /* Pass 1: process columns from input, store into work array. | |
| 4752 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). | |
| 4753 */ | |
| 4754 | |
| 4755 inptr = coef_block; | |
| 4756 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 4757 wsptr = workspace; | |
| 4758 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { | |
| 4759 /* Even part */ | |
| 4760 | |
| 4761 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 4762 if (ctr == 0) | |
| 4763 CLAMP_DC(z3); | |
| 4764 z3 <<= CONST_BITS; | |
| 4765 /* Add fudge factor here for final descale. */ | |
| 4766 z3 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 4767 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 4768 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ | |
| 4769 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ | |
| 4770 tmp10 = z3 + z1; | |
| 4771 tmp11 = z3 - z2; | |
| 4772 | |
| 4773 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ | |
| 4774 CONST_BITS-PASS1_BITS); | |
| 4775 | |
| 4776 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 4777 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 4778 | |
| 4779 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ | |
| 4780 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ | |
| 4781 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ | |
| 4782 | |
| 4783 tmp20 = tmp10 + tmp12; | |
| 4784 tmp24 = tmp10 - tmp12; | |
| 4785 tmp21 = tmp11 + tmp13; | |
| 4786 tmp23 = tmp11 - tmp13; | |
| 4787 | |
| 4788 /* Odd part */ | |
| 4789 | |
| 4790 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 4791 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 4792 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 4793 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 4794 | |
| 4795 tmp11 = z2 + z4; | |
| 4796 tmp13 = z2 - z4; | |
| 4797 | |
| 4798 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ | |
| 4799 z5 = z3 << CONST_BITS; | |
| 4800 | |
| 4801 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ | |
| 4802 z4 = z5 + tmp12; | |
| 4803 | |
| 4804 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ | |
| 4805 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ | |
| 4806 | |
| 4807 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ | |
| 4808 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); | |
| 4809 | |
| 4810 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; | |
| 4811 | |
| 4812 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ | |
| 4813 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ | |
| 4814 | |
| 4815 /* Final output stage */ | |
| 4816 | |
| 4817 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); | |
| 4818 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); | |
| 4819 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); | |
| 4820 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); | |
| 4821 wsptr[5*2] = (int) (tmp22 + tmp12); | |
| 4822 wsptr[5*7] = (int) (tmp22 - tmp12); | |
| 4823 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); | |
| 4824 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); | |
| 4825 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); | |
| 4826 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); | |
| 4827 } | |
| 4828 | |
| 4829 /* Pass 2: process 10 rows from work array, store into output array. | |
| 4830 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). | |
| 4831 */ | |
| 4832 | |
| 4833 wsptr = workspace; | |
| 4834 for (ctr = 0; ctr < 10; ctr++) { | |
| 4835 outptr = output_buf[ctr] + output_col; | |
| 4836 | |
| 4837 /* Even part */ | |
| 4838 | |
| 4839 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 4840 tmp12 = (INT32) wsptr[0] + | |
| 4841 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 4842 (ONE << (PASS1_BITS+2))); | |
| 4843 tmp12 <<= CONST_BITS; | |
| 4844 tmp13 = (INT32) wsptr[2]; | |
| 4845 tmp14 = (INT32) wsptr[4]; | |
| 4846 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ | |
| 4847 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ | |
| 4848 z3 = tmp12 + z2; | |
| 4849 tmp10 = z3 + z1; | |
| 4850 tmp11 = z3 - z1; | |
| 4851 tmp12 -= z2 << 2; | |
| 4852 | |
| 4853 /* Odd part */ | |
| 4854 | |
| 4855 z2 = (INT32) wsptr[1]; | |
| 4856 z3 = (INT32) wsptr[3]; | |
| 4857 | |
| 4858 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ | |
| 4859 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ | |
| 4860 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ | |
| 4861 | |
| 4862 /* Final output stage */ | |
| 4863 | |
| 4864 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13, | |
| 4865 CONST_BITS+PASS1_BITS+3) | |
| 4866 & RANGE_MASK]; | |
| 4867 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13, | |
| 4868 CONST_BITS+PASS1_BITS+3) | |
| 4869 & RANGE_MASK]; | |
| 4870 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14, | |
| 4871 CONST_BITS+PASS1_BITS+3) | |
| 4872 & RANGE_MASK]; | |
| 4873 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14, | |
| 4874 CONST_BITS+PASS1_BITS+3) | |
| 4875 & RANGE_MASK]; | |
| 4876 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, | |
| 4877 CONST_BITS+PASS1_BITS+3) | |
| 4878 & RANGE_MASK]; | |
| 4879 | |
| 4880 wsptr += 5; /* advance pointer to next row */ | |
| 4881 } | |
| 4882 } | |
| 4883 | |
| 4884 | |
| 4885 /* | |
| 4886 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 4887 * producing a 4x8 output block. | |
| 4888 * | |
| 4889 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). | |
| 4890 */ | |
| 4891 | |
| 4892 GLOBAL(void) | |
| 4893 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 4894 JCOEFPTR coef_block, | |
| 4895 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 4896 { | |
| 4897 INT32 tmp0, tmp1, tmp2, tmp3; | |
| 4898 INT32 tmp10, tmp11, tmp12, tmp13; | |
| 4899 INT32 z1, z2, z3; | |
| 4900 JCOEFPTR inptr; | |
| 4901 ISLOW_MULT_TYPE * quantptr; | |
| 4902 int * wsptr; | |
| 4903 JSAMPROW outptr; | |
| 4904 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 4905 int ctr; | |
| 4906 int workspace[4*8]; /* buffers data between passes */ | |
| 4907 SHIFT_TEMPS | |
| 4908 | |
| 4909 /* Pass 1: process columns from input, store into work array. | |
| 4910 * Note results are scaled up by sqrt(8) compared to a true IDCT; | |
| 4911 * furthermore, we scale the results by 2**PASS1_BITS. | |
| 4912 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | |
| 4913 */ | |
| 4914 | |
| 4915 inptr = coef_block; | |
| 4916 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 4917 wsptr = workspace; | |
| 4918 for (ctr = 4; ctr > 0; ctr--) { | |
| 4919 /* Due to quantization, we will usually find that many of the input | |
| 4920 * coefficients are zero, especially the AC terms. We can exploit this | |
| 4921 * by short-circuiting the IDCT calculation for any column in which all | |
| 4922 * the AC terms are zero. In that case each output is equal to the | |
| 4923 * DC coefficient (with scale factor as needed). | |
| 4924 * With typical images and quantization tables, half or more of the | |
| 4925 * column DCT calculations can be simplified this way. | |
| 4926 */ | |
| 4927 | |
| 4928 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && | |
| 4929 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && | |
| 4930 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && | |
| 4931 inptr[DCTSIZE*7] == 0) { | |
| 4932 /* AC terms all zero */ | |
| 4933 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 4934 if (ctr == 4) | |
| 4935 CLAMP_DC(dcval); | |
| 4936 dcval <<= PASS1_BITS; | |
| 4937 wsptr[4*0] = dcval; | |
| 4938 wsptr[4*1] = dcval; | |
| 4939 wsptr[4*2] = dcval; | |
| 4940 wsptr[4*3] = dcval; | |
| 4941 wsptr[4*4] = dcval; | |
| 4942 wsptr[4*5] = dcval; | |
| 4943 wsptr[4*6] = dcval; | |
| 4944 wsptr[4*7] = dcval; | |
| 4945 | |
| 4946 inptr++; /* advance pointers to next column */ | |
| 4947 quantptr++; | |
| 4948 wsptr++; | |
| 4949 continue; | |
| 4950 } | |
| 4951 | |
| 4952 /* Even part: reverse the even part of the forward DCT. | |
| 4953 * The rotator is c(-6). | |
| 4954 */ | |
| 4955 | |
| 4956 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 4957 if (ctr == 4) | |
| 4958 CLAMP_DC(z2); | |
| 4959 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 4960 z2 <<= CONST_BITS; | |
| 4961 z3 <<= CONST_BITS; | |
| 4962 /* Add fudge factor here for final descale. */ | |
| 4963 z2 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 4964 | |
| 4965 tmp0 = z2 + z3; | |
| 4966 tmp1 = z2 - z3; | |
| 4967 | |
| 4968 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 4969 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | |
| 4970 | |
| 4971 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 4972 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 4973 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 4974 | |
| 4975 tmp10 = tmp0 + tmp2; | |
| 4976 tmp13 = tmp0 - tmp2; | |
| 4977 tmp11 = tmp1 + tmp3; | |
| 4978 tmp12 = tmp1 - tmp3; | |
| 4979 | |
| 4980 /* Odd part per figure 8; the matrix is unitary and hence its | |
| 4981 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
| 4982 */ | |
| 4983 | |
| 4984 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | |
| 4985 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 4986 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 4987 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 4988 | |
| 4989 z2 = tmp0 + tmp2; | |
| 4990 z3 = tmp1 + tmp3; | |
| 4991 | |
| 4992 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ | |
| 4993 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ | |
| 4994 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ | |
| 4995 z2 += z1; | |
| 4996 z3 += z1; | |
| 4997 | |
| 4998 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ | |
| 4999 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ | |
| 5000 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ | |
| 5001 tmp0 += z1 + z2; | |
| 5002 tmp3 += z1 + z3; | |
| 5003 | |
| 5004 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ | |
| 5005 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ | |
| 5006 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ | |
| 5007 tmp1 += z1 + z3; | |
| 5008 tmp2 += z1 + z2; | |
| 5009 | |
| 5010 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
| 5011 | |
| 5012 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
| 5013 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
| 5014 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
| 5015 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
| 5016 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
| 5017 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
| 5018 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
| 5019 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
| 5020 | |
| 5021 inptr++; /* advance pointers to next column */ | |
| 5022 quantptr++; | |
| 5023 wsptr++; | |
| 5024 } | |
| 5025 | |
| 5026 /* Pass 2: process 8 rows from work array, store into output array. | |
| 5027 * 4-point IDCT kernel, | |
| 5028 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. | |
| 5029 */ | |
| 5030 | |
| 5031 wsptr = workspace; | |
| 5032 for (ctr = 0; ctr < 8; ctr++) { | |
| 5033 outptr = output_buf[ctr] + output_col; | |
| 5034 | |
| 5035 /* Even part */ | |
| 5036 | |
| 5037 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 5038 tmp0 = (INT32) wsptr[0] + | |
| 5039 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 5040 (ONE << (PASS1_BITS+2))); | |
| 5041 tmp2 = (INT32) wsptr[2]; | |
| 5042 | |
| 5043 tmp10 = (tmp0 + tmp2) << CONST_BITS; | |
| 5044 tmp12 = (tmp0 - tmp2) << CONST_BITS; | |
| 5045 | |
| 5046 /* Odd part */ | |
| 5047 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ | |
| 5048 | |
| 5049 z2 = (INT32) wsptr[1]; | |
| 5050 z3 = (INT32) wsptr[3]; | |
| 5051 | |
| 5052 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 5053 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 5054 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 5055 | |
| 5056 /* Final output stage */ | |
| 5057 | |
| 5058 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 5059 CONST_BITS+PASS1_BITS+3) | |
| 5060 & RANGE_MASK]; | |
| 5061 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 5062 CONST_BITS+PASS1_BITS+3) | |
| 5063 & RANGE_MASK]; | |
| 5064 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, | |
| 5065 CONST_BITS+PASS1_BITS+3) | |
| 5066 & RANGE_MASK]; | |
| 5067 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, | |
| 5068 CONST_BITS+PASS1_BITS+3) | |
| 5069 & RANGE_MASK]; | |
| 5070 | |
| 5071 wsptr += 4; /* advance pointer to next row */ | |
| 5072 } | |
| 5073 } | |
| 5074 | |
| 5075 | |
| 5076 /* | |
| 5077 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 5078 * producing a 3x6 output block. | |
| 5079 * | |
| 5080 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows). | |
| 5081 */ | |
| 5082 | |
| 5083 GLOBAL(void) | |
| 5084 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 5085 JCOEFPTR coef_block, | |
| 5086 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 5087 { | |
| 5088 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; | |
| 5089 INT32 z1, z2, z3; | |
| 5090 JCOEFPTR inptr; | |
| 5091 ISLOW_MULT_TYPE * quantptr; | |
| 5092 int * wsptr; | |
| 5093 JSAMPROW outptr; | |
| 5094 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 5095 int ctr; | |
| 5096 int workspace[3*6]; /* buffers data between passes */ | |
| 5097 SHIFT_TEMPS | |
| 5098 | |
| 5099 /* Pass 1: process columns from input, store into work array. | |
| 5100 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). | |
| 5101 */ | |
| 5102 | |
| 5103 inptr = coef_block; | |
| 5104 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 5105 wsptr = workspace; | |
| 5106 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { | |
| 5107 /* Even part */ | |
| 5108 | |
| 5109 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 5110 if (ctr == 0) | |
| 5111 CLAMP_DC(tmp0); | |
| 5112 tmp0 <<= CONST_BITS; | |
| 5113 /* Add fudge factor here for final descale. */ | |
| 5114 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); | |
| 5115 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | |
| 5116 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ | |
| 5117 tmp1 = tmp0 + tmp10; | |
| 5118 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); | |
| 5119 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 5120 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ | |
| 5121 tmp10 = tmp1 + tmp0; | |
| 5122 tmp12 = tmp1 - tmp0; | |
| 5123 | |
| 5124 /* Odd part */ | |
| 5125 | |
| 5126 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 5127 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 5128 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | |
| 5129 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ | |
| 5130 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); | |
| 5131 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); | |
| 5132 tmp1 = (z1 - z2 - z3) << PASS1_BITS; | |
| 5133 | |
| 5134 /* Final output stage */ | |
| 5135 | |
| 5136 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); | |
| 5137 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); | |
| 5138 wsptr[3*1] = (int) (tmp11 + tmp1); | |
| 5139 wsptr[3*4] = (int) (tmp11 - tmp1); | |
| 5140 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); | |
| 5141 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); | |
| 5142 } | |
| 5143 | |
| 5144 /* Pass 2: process 6 rows from work array, store into output array. | |
| 5145 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). | |
| 5146 */ | |
| 5147 | |
| 5148 wsptr = workspace; | |
| 5149 for (ctr = 0; ctr < 6; ctr++) { | |
| 5150 outptr = output_buf[ctr] + output_col; | |
| 5151 | |
| 5152 /* Even part */ | |
| 5153 | |
| 5154 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 5155 tmp0 = (INT32) wsptr[0] + | |
| 5156 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + | |
| 5157 (ONE << (PASS1_BITS+2))); | |
| 5158 tmp0 <<= CONST_BITS; | |
| 5159 tmp2 = (INT32) wsptr[2]; | |
| 5160 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ | |
| 5161 tmp10 = tmp0 + tmp12; | |
| 5162 tmp2 = tmp0 - tmp12 - tmp12; | |
| 5163 | |
| 5164 /* Odd part */ | |
| 5165 | |
| 5166 tmp12 = (INT32) wsptr[1]; | |
| 5167 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ | |
| 5168 | |
| 5169 /* Final output stage */ | |
| 5170 | |
| 5171 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, | |
| 5172 CONST_BITS+PASS1_BITS+3) | |
| 5173 & RANGE_MASK]; | |
| 5174 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, | |
| 5175 CONST_BITS+PASS1_BITS+3) | |
| 5176 & RANGE_MASK]; | |
| 5177 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, | |
| 5178 CONST_BITS+PASS1_BITS+3) | |
| 5179 & RANGE_MASK]; | |
| 5180 | |
| 5181 wsptr += 3; /* advance pointer to next row */ | |
| 5182 } | |
| 5183 } | |
| 5184 | |
| 5185 | |
| 5186 /* | |
| 5187 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 5188 * producing a 2x4 output block. | |
| 5189 * | |
| 5190 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). | |
| 5191 */ | |
| 5192 | |
| 5193 GLOBAL(void) | |
| 5194 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 5195 JCOEFPTR coef_block, | |
| 5196 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 5197 { | |
| 5198 INT32 tmp0, tmp2, tmp10, tmp12; | |
| 5199 INT32 z1, z2, z3; | |
| 5200 JCOEFPTR inptr; | |
| 5201 ISLOW_MULT_TYPE * quantptr; | |
| 5202 INT32 * wsptr; | |
| 5203 JSAMPROW outptr; | |
| 5204 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 5205 int ctr; | |
| 5206 INT32 workspace[2*4]; /* buffers data between passes */ | |
| 5207 SHIFT_TEMPS | |
| 5208 | |
| 5209 /* Pass 1: process columns from input, store into work array. | |
| 5210 * 4-point IDCT kernel, | |
| 5211 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. | |
| 5212 */ | |
| 5213 | |
| 5214 inptr = coef_block; | |
| 5215 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 5216 wsptr = workspace; | |
| 5217 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) { | |
| 5218 /* Even part */ | |
| 5219 | |
| 5220 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 5221 if (ctr == 0) | |
| 5222 CLAMP_DC(tmp0); | |
| 5223 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | |
| 5224 | |
| 5225 tmp10 = (tmp0 + tmp2) << CONST_BITS; | |
| 5226 tmp12 = (tmp0 - tmp2) << CONST_BITS; | |
| 5227 | |
| 5228 /* Odd part */ | |
| 5229 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ | |
| 5230 | |
| 5231 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 5232 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | |
| 5233 | |
| 5234 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ | |
| 5235 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ | |
| 5236 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ | |
| 5237 | |
| 5238 /* Final output stage */ | |
| 5239 | |
| 5240 wsptr[2*0] = tmp10 + tmp0; | |
| 5241 wsptr[2*3] = tmp10 - tmp0; | |
| 5242 wsptr[2*1] = tmp12 + tmp2; | |
| 5243 wsptr[2*2] = tmp12 - tmp2; | |
| 5244 } | |
| 5245 | |
| 5246 /* Pass 2: process 4 rows from work array, store into output array. */ | |
| 5247 | |
| 5248 wsptr = workspace; | |
| 5249 for (ctr = 0; ctr < 4; ctr++) { | |
| 5250 outptr = output_buf[ctr] + output_col; | |
| 5251 | |
| 5252 /* Even part */ | |
| 5253 | |
| 5254 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 5255 tmp10 = wsptr[0] + | |
| 5256 ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) + | |
| 5257 (ONE << (CONST_BITS+2))); | |
| 5258 | |
| 5259 /* Odd part */ | |
| 5260 | |
| 5261 tmp0 = wsptr[1]; | |
| 5262 | |
| 5263 /* Final output stage */ | |
| 5264 | |
| 5265 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3) | |
| 5266 & RANGE_MASK]; | |
| 5267 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3) | |
| 5268 & RANGE_MASK]; | |
| 5269 | |
| 5270 wsptr += 2; /* advance pointer to next row */ | |
| 5271 } | |
| 5272 } | |
| 5273 | |
| 5274 | |
| 5275 /* | |
| 5276 * Perform dequantization and inverse DCT on one block of coefficients, | |
| 5277 * producing a 1x2 output block. | |
| 5278 * | |
| 5279 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows). | |
| 5280 */ | |
| 5281 | |
| 5282 GLOBAL(void) | |
| 5283 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, | |
| 5284 JCOEFPTR coef_block, | |
| 5285 JSAMPARRAY output_buf, JDIMENSION output_col) | |
| 5286 { | |
| 5287 DCTELEM tmp0, tmp1; | |
| 5288 ISLOW_MULT_TYPE * quantptr; | |
| 5289 JSAMPLE *range_limit = IDCT_range_limit(cinfo); | |
| 5290 ISHIFT_TEMPS | |
| 5291 | |
| 5292 /* Process 1 column from input, store into output array. */ | |
| 5293 | |
| 5294 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; | |
| 5295 | |
| 5296 /* Even part */ | |
| 5297 | |
| 5298 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); | |
| 5299 CLAMP_DC(tmp0); | |
| 5300 /* Add range center and fudge factor for final descale and range-limit. */ | |
| 5301 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); | |
| 5302 | |
| 5303 /* Odd part */ | |
| 5304 | |
| 5305 tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); | |
| 5306 | |
| 5307 /* Final output stage */ | |
| 5308 | |
| 5309 output_buf[0][output_col] = | |
| 5310 range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; | |
| 5311 output_buf[1][output_col] = | |
| 5312 range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; | |
| 5313 } | |
| 5314 | |
| 5315 #endif /* IDCT_SCALING_SUPPORTED */ | |
| 5316 #endif /* DCT_ISLOW_SUPPORTED */ |
