Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | pj | 1 | /* |
2 | * jrevdct.c |
||
3 | * |
||
4 | * This file is part of the Independent JPEG Group's software. |
||
5 | * The IJG code is distributed under the terms reproduced here: |
||
6 | * |
||
7 | * LEGAL ISSUES |
||
8 | * ============ |
||
9 | * |
||
10 | * In plain English: |
||
11 | * |
||
12 | * 1. We don't promise that this software works. (But if you find any bugs, |
||
13 | * please let us know!) |
||
14 | * 2. You can use this software for whatever you want. You don't have to |
||
15 | * pay us. |
||
16 | * 3. You may not pretend that you wrote this software. If you use it in a |
||
17 | * program, you must acknowledge somewhere in your documentation that |
||
18 | * you've used the IJG code. |
||
19 | * |
||
20 | * In legalese: |
||
21 | * |
||
22 | * The authors make NO WARRANTY or representation, either express or implied, |
||
23 | * with respect to this software, its quality, accuracy, merchantability, or |
||
24 | * fitness for a particular purpose. This software is provided "AS IS", and |
||
25 | * you, its user, assume the entire risk as to its quality and accuracy. |
||
26 | * |
||
27 | * This software is copyright (C) 1991, 1992, Thomas G. Lane. |
||
28 | * All Rights Reserved except as specified below. |
||
29 | * |
||
30 | * Permission is hereby granted to use, copy, modify, and distribute this |
||
31 | * software (or portions thereof) for any purpose, without fee, subject to |
||
32 | * these conditions: |
||
33 | * (1) If any part of the source code for this software is distributed, then |
||
34 | * this copyright and no-warranty notice must be included unaltered; and any |
||
35 | * additions, deletions, or changes to the original files must be clearly |
||
36 | * indicated in accompanying documentation. |
||
37 | * (2) If only executable code is distributed, then the accompanying |
||
38 | * documentation must state that "this software is based in part on the |
||
39 | * work of the Independent JPEG Group". |
||
40 | * (3) Permission for use of this software is granted only if the user |
||
41 | * accepts full responsibility for any undesirable consequences; the authors |
||
42 | * accept NO LIABILITY for damages of any kind. |
||
43 | * |
||
44 | * These conditions apply to any software derived from or based on the IJG |
||
45 | * code, not just to the unmodified library. If you use our work, you ought |
||
46 | * to acknowledge us. |
||
47 | * |
||
48 | * Permission is NOT granted for the use of any IJG author's name or company |
||
49 | * name in advertising or publicity relating to this software or products |
||
50 | * derived from it. This software may be referred to only as |
||
51 | * "the Independent JPEG Group's software". |
||
52 | * |
||
53 | * We specifically permit and encourage the use of this software as the |
||
54 | * basis of commercial products, provided that all warranty or liability |
||
55 | * claims are assumed by the product vendor. |
||
56 | * |
||
57 | * |
||
58 | * ARCHIVE LOCATIONS |
||
59 | * ================= |
||
60 | * |
||
61 | * The "official" archive site for this software is ftp.uu.net (Internet |
||
62 | * address 192.48.96.9). The most recent released version can always be |
||
63 | * found there in directory graphics/jpeg. This particular version will |
||
64 | * be archived as graphics/jpeg/jpegsrc.v6a.tar.gz. If you are on the |
||
65 | * Internet, you can retrieve files from ftp.uu.net by standard anonymous |
||
66 | * FTP. If you don't have FTP access, UUNET's archives are also available |
||
67 | * via UUCP; contact help@uunet.uu.net for information on retrieving files |
||
68 | * that way. |
||
69 | * |
||
70 | * Numerous Internet sites maintain copies of the UUNET files. However, |
||
71 | * only ftp.uu.net is guaranteed to have the latest official version. |
||
72 | * |
||
73 | * You can also obtain this software in DOS-compatible "zip" archive |
||
74 | * format from the SimTel archives (ftp.coast.net:/SimTel/msdos/graphics/), |
||
75 | * or on CompuServe in the Graphics Support forum (GO CIS:GRAPHSUP), |
||
76 | * library 12 "JPEG Tools". Again, these versions may sometimes lag behind |
||
77 | * the ftp.uu.net release. |
||
78 | * |
||
79 | * The JPEG FAQ (Frequently Asked Questions) article is a useful source of |
||
80 | * general information about JPEG. It is updated constantly and therefore |
||
81 | * is not included in this distribution. The FAQ is posted every two weeks |
||
82 | * to Usenet newsgroups comp.graphics.misc, news.answers, and other groups. |
||
83 | * You can always obtain the latest version from the news.answers archive |
||
84 | * at rtfm.mit.edu. By FTP, fetch /pub/usenet/news.answers/jpeg-faq/part1 |
||
85 | * and .../part2. If you don't have FTP, send e-mail to |
||
86 | * mail-server@rtfm.mit.edu with body |
||
87 | * send usenet/news.answers/jpeg-faq/part1 |
||
88 | * send usenet/news.answers/jpeg-faq/part2 |
||
89 | * |
||
90 | * ============== |
||
91 | * |
||
92 | * |
||
93 | * This file contains the basic inverse-DCT transformation subroutine. |
||
94 | * |
||
95 | * This implementation is based on an algorithm described in |
||
96 | * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT |
||
97 | * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, |
||
98 | * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. |
||
99 | * The primary algorithm described there uses 11 multiplies and 29 adds. |
||
100 | * We use their alternate method with 12 multiplies and 32 adds. |
||
101 | * The advantage of this method is that no data path contains more than one |
||
102 | * multiplication; this allows a very simple and accurate implementation in |
||
103 | * scaled fixed-point arithmetic, with a minimal number of shifts. |
||
104 | * |
||
105 | * |
||
106 | * CHANGES FOR BERKELEY MPEG |
||
107 | * ========================= |
||
108 | * |
||
109 | * This file has been altered to use the Berkeley MPEG header files, |
||
110 | * to add the capability to handle sparse DCT matrices efficiently, |
||
111 | * and to relabel the inverse DCT function as well as the file |
||
112 | * (formerly jidctint.c). |
||
113 | * |
||
114 | * I've made lots of modifications to attempt to take advantage of the |
||
115 | * sparse nature of the DCT matrices we're getting. Although the logic |
||
116 | * is cumbersome, it's straightforward and the resulting code is much |
||
117 | * faster. |
||
118 | * |
||
119 | * A better way to do this would be to pass in the DCT block as a sparse |
||
120 | * matrix, perhaps with the difference cases encoded. |
||
121 | */ |
||
122 | |||
123 | #include <string.h> |
||
124 | #include "video.h" |
||
125 | #include "proto.h" |
||
126 | |||
127 | #define GLOBAL /* a function referenced thru EXTERNs */ |
||
128 | |||
129 | /* We assume that right shift corresponds to signed division by 2 with |
||
130 | * rounding towards minus infinity. This is correct for typical "arithmetic |
||
131 | * shift" instructions that shift in copies of the sign bit. But some |
||
132 | * C compilers implement >> with an unsigned shift. For these machines you |
||
133 | * must define RIGHT_SHIFT_IS_UNSIGNED. |
||
134 | * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity. |
||
135 | * It is only applied with constant shift counts. SHIFT_TEMPS must be |
||
136 | * included in the variables of any routine using RIGHT_SHIFT. |
||
137 | */ |
||
138 | |||
139 | #ifdef RIGHT_SHIFT_IS_UNSIGNED |
||
140 | #define SHIFT_TEMPS INT32 shift_temp; |
||
141 | #define RIGHT_SHIFT(x,shft) \ |
||
142 | ((shift_temp = (x)) < 0 ? \ |
||
143 | (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \ |
||
144 | (shift_temp >> (shft))) |
||
145 | #else |
||
146 | #define SHIFT_TEMPS |
||
147 | #define RIGHT_SHIFT(x,shft) ((x) >> (shft)) |
||
148 | #endif |
||
149 | |||
150 | /* |
||
151 | * This routine is specialized to the case DCTSIZE = 8. |
||
152 | */ |
||
153 | |||
154 | #if DCTSIZE != 8 |
||
155 | Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ |
||
156 | #endif |
||
157 | |||
158 | |||
159 | /* |
||
160 | * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT |
||
161 | * on each column. Direct algorithms are also available, but they are |
||
162 | * much more complex and seem not to be any faster when reduced to code. |
||
163 | * |
||
164 | * The poop on this scaling stuff is as follows: |
||
165 | * |
||
166 | * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) |
||
167 | * larger than the true IDCT outputs. The final outputs are therefore |
||
168 | * a factor of N larger than desired; since N=8 this can be cured by |
||
169 | * a simple right shift at the end of the algorithm. The advantage of |
||
170 | * this arrangement is that we save two multiplications per 1-D IDCT, |
||
171 | * because the y0 and y4 inputs need not be divided by sqrt(N). |
||
172 | * |
||
173 | * We have to do addition and subtraction of the integer inputs, which |
||
174 | * is no problem, and multiplication by fractional constants, which is |
||
175 | * a problem to do in integer arithmetic. We multiply all the constants |
||
176 | * by CONST_SCALE and convert them to integer constants (thus retaining |
||
177 | * CONST_BITS bits of precision in the constants). After doing a |
||
178 | * multiplication we have to divide the product by CONST_SCALE, with proper |
||
179 | * rounding, to produce the correct output. This division can be done |
||
180 | * cheaply as a right shift of CONST_BITS bits. We postpone shifting |
||
181 | * as long as possible so that partial sums can be added together with |
||
182 | * full fractional precision. |
||
183 | * |
||
184 | * The outputs of the first pass are scaled up by PASS1_BITS bits so that |
||
185 | * they are represented to better-than-integral precision. These outputs |
||
186 | * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word |
||
187 | * with the recommended scaling. (To scale up 12-bit sample data further, an |
||
188 | * intermediate INT32 array would be needed.) |
||
189 | * |
||
190 | * To avoid overflow of the 32-bit intermediate results in pass 2, we must |
||
191 | * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis |
||
192 | * shows that the values given below are the most effective. |
||
193 | */ |
||
194 | |||
195 | #ifdef EIGHT_BIT_SAMPLES |
||
196 | #define PASS1_BITS 2 |
||
197 | #else |
||
198 | #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
||
199 | #endif |
||
200 | |||
201 | #define ONE ((INT32) 1) |
||
202 | |||
203 | #define CONST_SCALE (ONE << CONST_BITS) |
||
204 | |||
205 | /* Convert a positive real constant to an integer scaled by CONST_SCALE. |
||
206 | * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, |
||
207 | * you will pay a significant penalty in run time. In that case, figure |
||
208 | * the correct integer constant values and insert them by hand. |
||
209 | */ |
||
210 | |||
211 | #define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) |
||
212 | |||
213 | /* When adding two opposite-signed fixes, the 0.5 cancels */ |
||
214 | #define FIX2(x) ((INT32) ((x) * CONST_SCALE)) |
||
215 | |||
216 | /* Descale and correctly round an INT32 value that's scaled by N bits. |
||
217 | * We assume RIGHT_SHIFT rounds towards minus infinity, so adding |
||
218 | * the fudge factor is correct for either sign of X. |
||
219 | */ |
||
220 | |||
221 | #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) |
||
222 | |||
223 | /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. |
||
224 | * For 8-bit samples with the recommended scaling, all the variable |
||
225 | * and constant values involved are no more than 16 bits wide, so a |
||
226 | * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; |
||
227 | * this provides a useful speedup on many machines. |
||
228 | * There is no way to specify a 16x16->32 multiply in portable C, but |
||
229 | * some C compilers will do the right thing if you provide the correct |
||
230 | * combination of casts. |
||
231 | * NB: for 12-bit samples, a full 32-bit multiplication will be needed. |
||
232 | */ |
||
233 | |||
234 | #ifdef EIGHT_BIT_SAMPLES |
||
235 | #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ |
||
236 | #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) |
||
237 | #endif |
||
238 | #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ |
||
239 | #define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const))) |
||
240 | #endif |
||
241 | #endif |
||
242 | |||
243 | #ifndef MULTIPLY /* default definition */ |
||
244 | #define MULTIPLY(var,const) ((var) * (const)) |
||
245 | #endif |
||
246 | |||
247 | #ifndef NO_SPARSE_DCT |
||
248 | #define SPARSE_SCALE_FACTOR 8 |
||
249 | #endif |
||
250 | |||
251 | /* Precomputed idct value arrays. */ |
||
252 | |||
253 | static DCTELEM PreIDCT[64][64]; |
||
254 | |||
255 | |||
256 | /* |
||
257 | *-------------------------------------------------------------- |
||
258 | * |
||
259 | * init_pre_idct -- |
||
260 | * |
||
261 | * Pre-computes singleton coefficient IDCT values. |
||
262 | * |
||
263 | * Results: |
||
264 | * None. |
||
265 | * |
||
266 | * Side effects: |
||
267 | * None. |
||
268 | * |
||
269 | *-------------------------------------------------------------- |
||
270 | */ |
||
271 | void |
||
272 | init_pre_idct() { |
||
273 | int i; |
||
274 | |||
275 | for (i=0; i<64; i++) { |
||
276 | memset((char *) PreIDCT[i], 0, 64*sizeof(DCTELEM)); |
||
277 | PreIDCT[i][i] = 1 << SPARSE_SCALE_FACTOR; |
||
278 | j_rev_dct(PreIDCT[i]); |
||
279 | } |
||
280 | } |
||
281 | |||
282 | #ifndef NO_SPARSE_DCT |
||
283 | |||
284 | |||
285 | |||
286 | /* |
||
287 | *-------------------------------------------------------------- |
||
288 | * |
||
289 | * j_rev_dct_sparse -- |
||
290 | * |
||
291 | * Performs the inverse DCT on one block of coefficients. |
||
292 | * |
||
293 | * Results: |
||
294 | * None. |
||
295 | * |
||
296 | * Side effects: |
||
297 | * None. |
||
298 | * |
||
299 | *-------------------------------------------------------------- |
||
300 | */ |
||
301 | |||
302 | void |
||
303 | j_rev_dct_sparse (data, pos) |
||
304 | DCTBLOCK data; |
||
305 | int pos; |
||
306 | { |
||
307 | short int val; |
||
308 | register int *dp; |
||
309 | register int v; |
||
310 | int quant; |
||
311 | |||
312 | #ifdef SPARSE_AC |
||
313 | register DCTELEM *dataptr; |
||
314 | DCTELEM *ndataptr; |
||
315 | int coeff, rr; |
||
316 | DCTBLOCK tmpdata, tmp2data; |
||
317 | DCTELEM *tmpdataptr, *tmp2dataptr; |
||
318 | int printFlag = 1; |
||
319 | #endif |
||
320 | |||
321 | |||
322 | /* If DC Coefficient. */ |
||
323 | |||
324 | if (pos == 0) { |
||
325 | dp = (int *)data; |
||
326 | v = *data; |
||
327 | quant = 8; |
||
328 | |||
329 | /* Compute 32 bit value to assign. This speeds things up a bit */ |
||
330 | if (v < 0) { |
||
331 | val = -v; |
||
332 | val += (quant >> 1); |
||
333 | val /= quant; |
||
334 | val = -val; |
||
335 | } |
||
336 | else { |
||
337 | val = (v + (quant >> 1)) / quant; |
||
338 | } |
||
339 | |||
340 | v = ((val & 0xffff) | (val << 16)); |
||
341 | |||
342 | dp[0] = v; dp[1] = v; dp[2] = v; dp[3] = v; |
||
343 | dp[4] = v; dp[5] = v; dp[6] = v; dp[7] = v; |
||
344 | dp[8] = v; dp[9] = v; dp[10] = v; dp[11] = v; |
||
345 | dp[12] = v; dp[13] = v; dp[14] = v; dp[15] = v; |
||
346 | dp[16] = v; dp[17] = v; dp[18] = v; dp[19] = v; |
||
347 | dp[20] = v; dp[21] = v; dp[22] = v; dp[23] = v; |
||
348 | dp[24] = v; dp[25] = v; dp[26] = v; dp[27] = v; |
||
349 | dp[28] = v; dp[29] = v; dp[30] = v; dp[31] = v; |
||
350 | |||
351 | return; |
||
352 | } |
||
353 | |||
354 | /* Some other coefficient. */ |
||
355 | |||
356 | #ifdef SPARSE_AC |
||
357 | dataptr = (DCTELEM *)data; |
||
358 | coeff = dataptr[pos]; |
||
359 | ndataptr = PreIDCT[pos]; |
||
360 | |||
361 | printf ("\n"); |
||
362 | printf ("COEFFICIENT = %3d, POSITION = %2d\n", coeff, pos); |
||
363 | |||
364 | for (v=0; v<64; v++) { |
||
365 | memcpy((char *) tmpdata, data, 64*sizeof(DCTELEM)); |
||
366 | } |
||
367 | tmpdataptr = (DCTELEM *)tmpdata; |
||
368 | |||
369 | for (v=0; v<64; v++) { |
||
370 | memcpy((char *) tmp2data, data, 64*sizeof(DCTELEM)); |
||
371 | } |
||
372 | tmp2dataptr = (DCTELEM *)tmp2data; |
||
373 | |||
374 | #ifdef DEBUG |
||
375 | printf ("original DCTBLOCK:\n"); |
||
376 | for (rr=0; rr<8; rr++) { |
||
377 | for (v=0; v<8; v++) { |
||
378 | if (dataptr[8*rr+v] != tmpdataptr[8*rr+v]) |
||
379 | fprintf(stderr, "Error in copy\n"); |
||
380 | printf ("%3d ", dataptr[8*rr+v]); |
||
381 | } |
||
382 | printf("\n"); |
||
383 | } |
||
384 | #endif |
||
385 | |||
386 | printf("\n"); |
||
387 | |||
388 | for (rr=0; rr<4; rr++) { |
||
389 | dataptr[0] = (ndataptr[0] * coeff) >> SPARSE_SCALE_FACTOR; |
||
390 | dataptr[1] = (ndataptr[1] * coeff) >> SPARSE_SCALE_FACTOR; |
||
391 | dataptr[2] = (ndataptr[2] * coeff) >> SPARSE_SCALE_FACTOR; |
||
392 | dataptr[3] = (ndataptr[3] * coeff) >> SPARSE_SCALE_FACTOR; |
||
393 | dataptr[4] = (ndataptr[4] * coeff) >> SPARSE_SCALE_FACTOR; |
||
394 | dataptr[5] = (ndataptr[5] * coeff) >> SPARSE_SCALE_FACTOR; |
||
395 | dataptr[6] = (ndataptr[6] * coeff) >> SPARSE_SCALE_FACTOR; |
||
396 | dataptr[7] = (ndataptr[7] * coeff) >> SPARSE_SCALE_FACTOR; |
||
397 | dataptr[8] = (ndataptr[8] * coeff) >> SPARSE_SCALE_FACTOR; |
||
398 | dataptr[9] = (ndataptr[9] * coeff) >> SPARSE_SCALE_FACTOR; |
||
399 | dataptr[10] = (ndataptr[10] * coeff) >> SPARSE_SCALE_FACTOR; |
||
400 | dataptr[11] = (ndataptr[11] * coeff) >> SPARSE_SCALE_FACTOR; |
||
401 | dataptr[12] = (ndataptr[12] * coeff) >> SPARSE_SCALE_FACTOR; |
||
402 | dataptr[13] = (ndataptr[13] * coeff) >> SPARSE_SCALE_FACTOR; |
||
403 | dataptr[14] = (ndataptr[14] * coeff) >> SPARSE_SCALE_FACTOR; |
||
404 | dataptr[15] = (ndataptr[15] * coeff) >> SPARSE_SCALE_FACTOR; |
||
405 | dataptr += 16; |
||
406 | ndataptr += 16; |
||
407 | } |
||
408 | |||
409 | dataptr = (DCTELEM *) data; |
||
410 | |||
411 | #ifdef DEBUG |
||
412 | printf ("sparse IDCT:\n"); |
||
413 | for (rr=0; rr<8; rr++) { |
||
414 | for (v=0; v<8; v++) { |
||
415 | printf ("%3d ", dataptr[8*rr+v]); |
||
416 | } |
||
417 | printf("\n"); |
||
418 | } |
||
419 | printf("\n"); |
||
420 | #endif /* DEBUG */ |
||
421 | #else /* NO_SPARSE_AC */ |
||
422 | #ifdef FLOATDCT |
||
423 | if (qualityFlag) |
||
424 | float_idct(data); |
||
425 | else |
||
426 | #endif /* FLOATDCT */ |
||
427 | j_rev_dct(data); |
||
428 | #endif /* SPARSE_AC */ |
||
429 | return; |
||
430 | |||
431 | } |
||
432 | |||
433 | #else |
||
434 | |||
435 | /* |
||
436 | *-------------------------------------------------------------- |
||
437 | * |
||
438 | * j_rev_dct_sparse -- |
||
439 | * |
||
440 | * Performs the original inverse DCT on one block of |
||
441 | * coefficients. |
||
442 | * |
||
443 | * Results: |
||
444 | * None. |
||
445 | * |
||
446 | * Side effects: |
||
447 | * None. |
||
448 | * |
||
449 | *-------------------------------------------------------------- |
||
450 | */ |
||
451 | void |
||
452 | j_rev_dct_sparse (data, pos) |
||
453 | DCTBLOCK data; |
||
454 | int pos; |
||
455 | { |
||
456 | j_rev_dct(data); |
||
457 | } |
||
458 | #endif /* SPARSE_DCT */ |
||
459 | |||
460 | |||
461 | #ifndef FIVE_DCT |
||
462 | |||
463 | #ifndef ORIG_DCT |
||
464 | |||
465 | |||
466 | /* |
||
467 | *-------------------------------------------------------------- |
||
468 | * |
||
469 | * j_rev_dct -- |
||
470 | * |
||
471 | * The inverse DCT function. |
||
472 | * |
||
473 | * Results: |
||
474 | * None. |
||
475 | * |
||
476 | * Side effects: |
||
477 | * None. |
||
478 | * |
||
479 | *-------------------------------------------------------------- |
||
480 | */ |
||
481 | void |
||
482 | j_rev_dct (data) |
||
483 | DCTBLOCK data; |
||
484 | { |
||
485 | INT32 tmp0, tmp1, tmp2, tmp3; |
||
486 | INT32 tmp10, tmp11, tmp12, tmp13; |
||
487 | INT32 z1, z2, z3, z4, z5; |
||
488 | INT32 d0, d1, d2, d3, d4, d5, d6, d7; |
||
489 | register DCTELEM *dataptr; |
||
490 | int rowctr; |
||
491 | SHIFT_TEMPS |
||
492 | |||
493 | /* Pass 1: process rows. */ |
||
494 | /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ |
||
495 | /* furthermore, we scale the results by 2**PASS1_BITS. */ |
||
496 | |||
497 | dataptr = data; |
||
498 | |||
499 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
||
500 | /* Due to quantization, we will usually find that many of the input |
||
501 | * coefficients are zero, especially the AC terms. We can exploit this |
||
502 | * by short-circuiting the IDCT calculation for any row in which all |
||
503 | * the AC terms are zero. In that case each output is equal to the |
||
504 | * DC coefficient (with scale factor as needed). |
||
505 | * With typical images and quantization tables, half or more of the |
||
506 | * row DCT calculations can be simplified this way. |
||
507 | */ |
||
508 | |||
509 | register int *idataptr = (int*)dataptr; |
||
510 | d0 = dataptr[0]; |
||
511 | d1 = dataptr[1]; |
||
512 | if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) { |
||
513 | /* AC terms all zero */ |
||
514 | if (d0) { |
||
515 | /* Compute a 32 bit value to assign. */ |
||
516 | DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); |
||
517 | register int v = (dcval & 0xffff) | (dcval << 16); |
||
518 | |||
519 | idataptr[0] = v; |
||
520 | idataptr[1] = v; |
||
521 | idataptr[2] = v; |
||
522 | idataptr[3] = v; |
||
523 | } |
||
524 | |||
525 | dataptr += DCTSIZE; /* advance pointer to next row */ |
||
526 | continue; |
||
527 | } |
||
528 | d2 = dataptr[2]; |
||
529 | d3 = dataptr[3]; |
||
530 | d4 = dataptr[4]; |
||
531 | d5 = dataptr[5]; |
||
532 | d6 = dataptr[6]; |
||
533 | d7 = dataptr[7]; |
||
534 | |||
535 | /* Even part: reverse the even part of the forward DCT. */ |
||
536 | /* The rotator is sqrt(2)*c(-6). */ |
||
537 | if (d6) { |
||
538 | if (d4) { |
||
539 | if (d2) { |
||
540 | if (d0) { |
||
541 | /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ |
||
542 | z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); |
||
543 | tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); |
||
544 | tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); |
||
545 | |||
546 | tmp0 = (d0 + d4) << CONST_BITS; |
||
547 | tmp1 = (d0 - d4) << CONST_BITS; |
||
548 | |||
549 | tmp10 = tmp0 + tmp3; |
||
550 | tmp13 = tmp0 - tmp3; |
||
551 | tmp11 = tmp1 + tmp2; |
||
552 | tmp12 = tmp1 - tmp2; |
||
553 | } else { |
||
554 | /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ |
||
555 | z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); |
||
556 | tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); |
||
557 | tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); |
||
558 | |||
559 | tmp0 = d4 << CONST_BITS; |
||
560 | |||
561 | tmp10 = tmp0 + tmp3; |
||
562 | tmp13 = tmp0 - tmp3; |
||
563 | tmp11 = tmp2 - tmp0; |
||
564 | tmp12 = -(tmp0 + tmp2); |
||
565 | } |
||
566 | } else { |
||
567 | if (d0) { |
||
568 | /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ |
||
569 | tmp2 = MULTIPLY(d6, - FIX2(1.306562965)); |
||
570 | tmp3 = MULTIPLY(d6, FIX(0.541196100)); |
||
571 | |||
572 | tmp0 = (d0 + d4) << CONST_BITS; |
||
573 | tmp1 = (d0 - d4) << CONST_BITS; |
||
574 | |||
575 | tmp10 = tmp0 + tmp3; |
||
576 | tmp13 = tmp0 - tmp3; |
||
577 | tmp11 = tmp1 + tmp2; |
||
578 | tmp12 = tmp1 - tmp2; |
||
579 | } else { |
||
580 | /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ |
||
581 | tmp2 = MULTIPLY(d6, - FIX2(1.306562965)); |
||
582 | tmp3 = MULTIPLY(d6, FIX(0.541196100)); |
||
583 | |||
584 | tmp0 = d4 << CONST_BITS; |
||
585 | |||
586 | tmp10 = tmp0 + tmp3; |
||
587 | tmp13 = tmp0 - tmp3; |
||
588 | tmp11 = tmp2 - tmp0; |
||
589 | tmp12 = -(tmp0 + tmp2); |
||
590 | } |
||
591 | } |
||
592 | } else { |
||
593 | if (d2) { |
||
594 | if (d0) { |
||
595 | /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ |
||
596 | z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); |
||
597 | tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); |
||
598 | tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); |
||
599 | |||
600 | tmp0 = d0 << CONST_BITS; |
||
601 | |||
602 | tmp10 = tmp0 + tmp3; |
||
603 | tmp13 = tmp0 - tmp3; |
||
604 | tmp11 = tmp0 + tmp2; |
||
605 | tmp12 = tmp0 - tmp2; |
||
606 | } else { |
||
607 | /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ |
||
608 | z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); |
||
609 | tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); |
||
610 | tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); |
||
611 | |||
612 | tmp10 = tmp3; |
||
613 | tmp13 = -tmp3; |
||
614 | tmp11 = tmp2; |
||
615 | tmp12 = -tmp2; |
||
616 | } |
||
617 | } else { |
||
618 | if (d0) { |
||
619 | /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ |
||
620 | tmp2 = MULTIPLY(d6, - FIX2(1.306562965)); |
||
621 | tmp3 = MULTIPLY(d6, FIX(0.541196100)); |
||
622 | |||
623 | tmp0 = d0 << CONST_BITS; |
||
624 | |||
625 | tmp10 = tmp0 + tmp3; |
||
626 | tmp13 = tmp0 - tmp3; |
||
627 | tmp11 = tmp0 + tmp2; |
||
628 | tmp12 = tmp0 - tmp2; |
||
629 | } else { |
||
630 | /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ |
||
631 | tmp2 = MULTIPLY(d6, - FIX2(1.306562965)); |
||
632 | tmp3 = MULTIPLY(d6, FIX(0.541196100)); |
||
633 | |||
634 | tmp10 = tmp3; |
||
635 | tmp13 = -tmp3; |
||
636 | tmp11 = tmp2; |
||
637 | tmp12 = -tmp2; |
||
638 | } |
||
639 | } |
||
640 | } |
||
641 | } else { |
||
642 | if (d4) { |
||
643 | if (d2) { |
||
644 | if (d0) { |
||
645 | /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ |
||
646 | tmp2 = MULTIPLY(d2, FIX(0.541196100)); |
||
647 | tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5)); |
||
648 | |||
649 | tmp0 = (d0 + d4) << CONST_BITS; |
||
650 | tmp1 = (d0 - d4) << CONST_BITS; |
||
651 | |||
652 | tmp10 = tmp0 + tmp3; |
||
653 | tmp13 = tmp0 - tmp3; |
||
654 | tmp11 = tmp1 + tmp2; |
||
655 | tmp12 = tmp1 - tmp2; |
||
656 | } else { |
||
657 | /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ |
||
658 | tmp2 = MULTIPLY(d2, FIX(0.541196100)); |
||
659 | tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5)); |
||
660 | |||
661 | tmp0 = d4 << CONST_BITS; |
||
662 | |||
663 | tmp10 = tmp0 + tmp3; |
||
664 | tmp13 = tmp0 - tmp3; |
||
665 | tmp11 = tmp2 - tmp0; |
||
666 | tmp12 = -(tmp0 + tmp2); |
||
667 | } |
||
668 | } else { |
||
669 | if (d0) { |
||
670 | /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ |
||
671 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
||
672 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
||
673 | } else { |
||
674 | /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ |
||
675 | tmp10 = tmp13 = d4 << CONST_BITS; |
||
676 | tmp11 = tmp12 = -tmp10; |
||
677 | } |
||
678 | } |
||
679 | } else { |
||
680 | if (d2) { |
||
681 | if (d0) { |
||
682 | /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ |
||
683 | tmp2 = MULTIPLY(d2, FIX(0.541196100)); |
||
684 | tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5)); |
||
685 | |||
686 | tmp0 = d0 << CONST_BITS; |
||
687 | |||
688 | tmp10 = tmp0 + tmp3; |
||
689 | tmp13 = tmp0 - tmp3; |
||
690 | tmp11 = tmp0 + tmp2; |
||
691 | tmp12 = tmp0 - tmp2; |
||
692 | } else { |
||
693 | /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ |
||
694 | tmp2 = MULTIPLY(d2, FIX(0.541196100)); |
||
695 | tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5)); |
||
696 | |||
697 | tmp10 = tmp3; |
||
698 | tmp13 = -tmp3; |
||
699 | tmp11 = tmp2; |
||
700 | tmp12 = -tmp2; |
||
701 | } |
||
702 | } else { |
||
703 | if (d0) { |
||
704 | /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ |
||
705 | tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; |
||
706 | } else { |
||
707 | /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ |
||
708 | tmp10 = tmp13 = tmp11 = tmp12 = 0; |
||
709 | } |
||
710 | } |
||
711 | } |
||
712 | } |
||
713 | |||
714 | |||
715 | /* Odd part per figure 8; the matrix is unitary and hence its |
||
716 | * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
||
717 | */ |
||
718 | |||
719 | if (d7) { |
||
720 | if (d5) { |
||
721 | if (d3) { |
||
722 | if (d1) { |
||
723 | /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ |
||
724 | z1 = d7 + d1; |
||
725 | z2 = d5 + d3; |
||
726 | z3 = d7 + d3; |
||
727 | z4 = d5 + d1; |
||
728 | z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); |
||
729 | |||
730 | tmp0 = MULTIPLY(d7, FIX(0.298631336)); |
||
731 | tmp1 = MULTIPLY(d5, FIX(2.053119869)); |
||
732 | tmp2 = MULTIPLY(d3, FIX(3.072711026)); |
||
733 | tmp3 = MULTIPLY(d1, FIX(1.501321110)); |
||
734 | z1 = MULTIPLY(z1, - FIX(0.899976223)); |
||
735 | z2 = MULTIPLY(z2, - FIX(2.562915447)); |
||
736 | z3 = MULTIPLY(z3, - FIX(1.961570560)); |
||
737 | z4 = MULTIPLY(z4, - FIX(0.390180644)); |
||
738 | |||
739 | z3 += z5; |
||
740 | z4 += z5; |
||
741 | |||
742 | tmp0 += z1 + z3; |
||
743 | tmp1 += z2 + z4; |
||
744 | tmp2 += z2 + z3; |
||
745 | tmp3 += z1 + z4; |
||
746 | } else { |
||
747 | /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ |
||
748 | z2 = d5 + d3; |
||
749 | z3 = d7 + d3; |
||
750 | z5 = MULTIPLY(z3 + d5, FIX(1.175875602)); |
||
751 | |||
752 | tmp0 = MULTIPLY(d7, FIX(0.298631336)); |
||
753 | tmp1 = MULTIPLY(d5, FIX(2.053119869)); |
||
754 | tmp2 = MULTIPLY(d3, FIX(3.072711026)); |
||
755 | z1 = MULTIPLY(d7, - FIX(0.899976223)); |
||
756 | z2 = MULTIPLY(z2, - FIX(2.562915447)); |
||
757 | z3 = MULTIPLY(z3, - FIX(1.961570560)); |
||
758 | z4 = MULTIPLY(d5, - FIX(0.390180644)); |
||
759 | |||
760 | z3 += z5; |
||
761 | z4 += z5; |
||
762 | |||
763 | tmp0 += z1 + z3; |
||
764 | tmp1 += z2 + z4; |
||
765 | tmp2 += z2 + z3; |
||
766 | tmp3 = z1 + z4; |
||
767 | } |
||
768 | } else { |
||
769 | if (d1) { |
||
770 | /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ |
||
771 | z1 = d7 + d1; |
||
772 | z4 = d5 + d1; |
||
773 | z5 = MULTIPLY(d7 + z4, FIX(1.175875602)); |
||
774 | |||
775 | tmp0 = MULTIPLY(d7, FIX(0.298631336)); |
||
776 | tmp1 = MULTIPLY(d5, FIX(2.053119869)); |
||
777 | tmp3 = MULTIPLY(d1, FIX(1.501321110)); |
||
778 | z1 = MULTIPLY(z1, - FIX(0.899976223)); |
||
779 | z2 = MULTIPLY(d5, - FIX(2.562915447)); |
||
780 | z3 = MULTIPLY(d7, - FIX(1.961570560)); |
||
781 | z4 = MULTIPLY(z4, - FIX(0.390180644)); |
||
782 | |||
783 | z3 += z5; |
||
784 | z4 += z5; |
||
785 | |||
786 | tmp0 += z1 + z3; |
||
787 | tmp1 += z2 + z4; |
||
788 | tmp2 = z2 + z3; |
||
789 | tmp3 += z1 + z4; |
||
790 | } else { |
||
791 | /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ |
||
792 | z5 = MULTIPLY(d7 + d5, FIX(1.175875602)); |
||
793 | |||
794 | tmp0 = MULTIPLY(d7, - FIX2(0.601344887)); |
||
795 | tmp1 = MULTIPLY(d5, - FIX2(0.509795578)); |
||
796 | z1 = MULTIPLY(d7, - FIX(0.899976223)); |
||
797 | z3 = MULTIPLY(d7, - FIX(1.961570560)); |
||
798 | z2 = MULTIPLY(d5, - FIX(2.562915447)); |
||
799 | z4 = MULTIPLY(d5, - FIX(0.390180644)); |
||
800 | |||
801 | z3 += z5; |
||
802 | z4 += z5; |
||
803 | |||
804 | tmp0 += z3; |
||
805 | tmp1 += z4; |
||
806 | tmp2 = z2 + z3; |
||
807 | tmp3 = z1 + z4; |
||
808 | } |
||
809 | } |
||
810 | } else { |
||
811 | if (d3) { |
||
812 | if (d1) { |
||
813 | /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ |
||
814 | z1 = d7 + d1; |
||
815 | z3 = d7 + d3; |
||
816 | z5 = MULTIPLY(z3 + d1, FIX(1.175875602)); |
||
817 | |||
818 | tmp0 = MULTIPLY(d7, FIX(0.298631336)); |
||
819 | tmp2 = MULTIPLY(d3, FIX(3.072711026)); |
||
820 | tmp3 = MULTIPLY(d1, FIX(1.501321110)); |
||
821 | z1 = MULTIPLY(z1, - FIX(0.899976223)); |
||
822 | z2 = MULTIPLY(d3, - FIX(2.562915447)); |
||
823 | z3 = MULTIPLY(z3, - FIX(1.961570560)); |
||
824 | z4 = MULTIPLY(d1, - FIX(0.390180644)); |
||
825 | |||
826 | z3 += z5; |
||
827 | z4 += z5; |
||
828 | |||
829 | tmp0 += z1 + z3; |
||
830 | tmp1 = z2 + z4; |
||
831 | tmp2 += z2 + z3; |
||
832 | tmp3 += z1 + z4; |
||
833 | } else { |
||
834 | /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ |
||
835 | z3 = d7 + d3; |
||
836 | z5 = MULTIPLY(z3, FIX(1.175875602)); |
||
837 | |||
838 | tmp0 = MULTIPLY(d7, - FIX2(0.601344887)); |
||
839 | tmp2 = MULTIPLY(d3, FIX(0.509795579)); |
||
840 | z1 = MULTIPLY(d7, - FIX(0.899976223)); |
||
841 | z2 = MULTIPLY(d3, - FIX(2.562915447)); |
||
842 | z3 = MULTIPLY(z3, - FIX2(0.785694958)); |
||
843 | |||
844 | tmp0 += z3; |
||
845 | tmp1 = z2 + z5; |
||
846 | tmp2 += z3; |
||
847 | tmp3 = z1 + z5; |
||
848 | } |
||
849 | } else { |
||
850 | if (d1) { |
||
851 | /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ |
||
852 | z1 = d7 + d1; |
||
853 | z5 = MULTIPLY(z1, FIX(1.175875602)); |
||
854 | |||
855 | tmp0 = MULTIPLY(d7, - FIX2(1.662939224)); |
||
856 | tmp3 = MULTIPLY(d1, FIX2(1.111140466)); |
||
857 | z1 = MULTIPLY(z1, FIX2(0.275899379)); |
||
858 | z3 = MULTIPLY(d7, - FIX(1.961570560)); |
||
859 | z4 = MULTIPLY(d1, - FIX(0.390180644)); |
||
860 | |||
861 | tmp0 += z1; |
||
862 | tmp1 = z4 + z5; |
||
863 | tmp2 = z3 + z5; |
||
864 | tmp3 += z1; |
||
865 | } else { |
||
866 | /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ |
||
867 | tmp0 = MULTIPLY(d7, - FIX2(1.387039845)); |
||
868 | tmp1 = MULTIPLY(d7, FIX(1.175875602)); |
||
869 | tmp2 = MULTIPLY(d7, - FIX2(0.785694958)); |
||
870 | tmp3 = MULTIPLY(d7, FIX2(0.275899379)); |
||
871 | } |
||
872 | } |
||
873 | } |
||
874 | } else { |
||
875 | if (d5) { |
||
876 | if (d3) { |
||
877 | if (d1) { |
||
878 | /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ |
||
879 | z2 = d5 + d3; |
||
880 | z4 = d5 + d1; |
||
881 | z5 = MULTIPLY(d3 + z4, FIX(1.175875602)); |
||
882 | |||
883 | tmp1 = MULTIPLY(d5, FIX(2.053119869)); |
||
884 | tmp2 = MULTIPLY(d3, FIX(3.072711026)); |
||
885 | tmp3 = MULTIPLY(d1, FIX(1.501321110)); |
||
886 | z1 = MULTIPLY(d1, - FIX(0.899976223)); |
||
887 | z2 = MULTIPLY(z2, - FIX(2.562915447)); |
||
888 | z3 = MULTIPLY(d3, - FIX(1.961570560)); |
||
889 | z4 = MULTIPLY(z4, - FIX(0.390180644)); |
||
890 | |||
891 | z3 += z5; |
||
892 | z4 += z5; |
||
893 | |||
894 | tmp0 = z1 + z3; |
||
895 | tmp1 += z2 + z4; |
||
896 | tmp2 += z2 + z3; |
||
897 | tmp3 += z1 + z4; |
||
898 | } else { |
||
899 | /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ |
||
900 | z2 = d5 + d3; |
||
901 | z5 = MULTIPLY(z2, FIX(1.175875602)); |
||
902 | |||
903 | tmp1 = MULTIPLY(d5, FIX2(1.662939225)); |
||
904 | tmp2 = MULTIPLY(d3, FIX2(1.111140466)); |
||
905 | z2 = MULTIPLY(z2, - FIX2(1.387039845)); |
||
906 | z3 = MULTIPLY(d3, - FIX(1.961570560)); |
||
907 | z4 = MULTIPLY(d5, - FIX(0.390180644)); |
||
908 | |||
909 | tmp0 = z3 + z5; |
||
910 | tmp1 += z2; |
||
911 | tmp2 += z2; |
||
912 | tmp3 = z4 + z5; |
||
913 | } |
||
914 | } else { |
||
915 | if (d1) { |
||
916 | /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ |
||
917 | z4 = d5 + d1; |
||
918 | z5 = MULTIPLY(z4, FIX(1.175875602)); |
||
919 | |||
920 | tmp1 = MULTIPLY(d5, - FIX2(0.509795578)); |
||
921 | tmp3 = MULTIPLY(d1, FIX2(0.601344887)); |
||
922 | z1 = MULTIPLY(d1, - FIX(0.899976223)); |
||
923 | z2 = MULTIPLY(d5, - FIX(2.562915447)); |
||
924 | z4 = MULTIPLY(z4, FIX2(0.785694958)); |
||
925 | |||
926 | tmp0 = z1 + z5; |
||
927 | tmp1 += z4; |
||
928 | tmp2 = z2 + z5; |
||
929 | tmp3 += z4; |
||
930 | } else { |
||
931 | /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ |
||
932 | tmp0 = MULTIPLY(d5, FIX(1.175875602)); |
||
933 | tmp1 = MULTIPLY(d5, FIX2(0.275899380)); |
||
934 | tmp2 = MULTIPLY(d5, - FIX2(1.387039845)); |
||
935 | tmp3 = MULTIPLY(d5, FIX2(0.785694958)); |
||
936 | } |
||
937 | } |
||
938 | } else { |
||
939 | if (d3) { |
||
940 | if (d1) { |
||
941 | /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ |
||
942 | z5 = d3 + d1; |
||
943 | |||
944 | tmp2 = MULTIPLY(d3, - FIX(1.451774981)); |
||
945 | tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1)); |
||
946 | z1 = MULTIPLY(d1, FIX(1.061594337)); |
||
947 | z2 = MULTIPLY(d3, - FIX(2.172734803)); |
||
948 | z4 = MULTIPLY(z5, FIX(0.785694958)); |
||
949 | z5 = MULTIPLY(z5, FIX(1.175875602)); |
||
950 | |||
951 | tmp0 = z1 - z4; |
||
952 | tmp1 = z2 + z4; |
||
953 | tmp2 += z5; |
||
954 | tmp3 += z5; |
||
955 | } else { |
||
956 | /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ |
||
957 | tmp0 = MULTIPLY(d3, - FIX2(0.785694958)); |
||
958 | tmp1 = MULTIPLY(d3, - FIX2(1.387039845)); |
||
959 | tmp2 = MULTIPLY(d3, - FIX2(0.275899379)); |
||
960 | tmp3 = MULTIPLY(d3, FIX(1.175875602)); |
||
961 | } |
||
962 | } else { |
||
963 | if (d1) { |
||
964 | /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ |
||
965 | tmp0 = MULTIPLY(d1, FIX2(0.275899379)); |
||
966 | tmp1 = MULTIPLY(d1, FIX2(0.785694958)); |
||
967 | tmp2 = MULTIPLY(d1, FIX(1.175875602)); |
||
968 | tmp3 = MULTIPLY(d1, FIX2(1.387039845)); |
||
969 | } else { |
||
970 | /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ |
||
971 | tmp0 = tmp1 = tmp2 = tmp3 = 0; |
||
972 | } |
||
973 | } |
||
974 | } |
||
975 | } |
||
976 | |||
977 | /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
||
978 | |||
979 | dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); |
||
980 | dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); |
||
981 | dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); |
||
982 | dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); |
||
983 | dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); |
||
984 | dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); |
||
985 | dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); |
||
986 | dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); |
||
987 | |||
988 | dataptr += DCTSIZE; /* advance pointer to next row */ |
||
989 | } |
||
990 | |||
991 | /* Pass 2: process columns. */ |
||
992 | /* Note that we must descale the results by a factor of 8 == 2**3, */ |
||
993 | /* and also undo the PASS1_BITS scaling. */ |
||
994 | |||
995 | dataptr = data; |
||
996 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
||
997 | /* Columns of zeroes can be exploited in the same way as we did with rows. |
||
998 | * However, the row calculation has created many nonzero AC terms, so the |
||
999 | * simplification applies less often (typically 5% to 10% of the time). |
||
1000 | * On machines with very fast multiplication, it's possible that the |
||
1001 | * test takes more time than it's worth. In that case this section |
||
1002 | * may be commented out. |
||
1003 | */ |
||
1004 | |||
1005 | d0 = dataptr[DCTSIZE*0]; |
||
1006 | d1 = dataptr[DCTSIZE*1]; |
||
1007 | d2 = dataptr[DCTSIZE*2]; |
||
1008 | d3 = dataptr[DCTSIZE*3]; |
||
1009 | d4 = dataptr[DCTSIZE*4]; |
||
1010 | d5 = dataptr[DCTSIZE*5]; |
||
1011 | d6 = dataptr[DCTSIZE*6]; |
||
1012 | d7 = dataptr[DCTSIZE*7]; |
||
1013 | |||
1014 | /* Even part: reverse the even part of the forward DCT. */ |
||
1015 | /* The rotator is sqrt(2)*c(-6). */ |
||
1016 | if (d6) { |
||
1017 | if (d4) { |
||
1018 | if (d2) { |
||
1019 | if (d0) { |
||
1020 | /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ |
||
1021 | z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); |
||
1022 | tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); |
||
1023 | tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); |
||
1024 | |||
1025 | tmp0 = (d0 + d4) << CONST_BITS; |
||
1026 | tmp1 = (d0 - d4) << CONST_BITS; |
||
1027 | |||
1028 | tmp10 = tmp0 + tmp3; |
||
1029 | tmp13 = tmp0 - tmp3; |
||
1030 | tmp11 = tmp1 + tmp2; |
||
1031 | tmp12 = tmp1 - tmp2; |
||
1032 | } else { |
||
1033 | /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ |
||
1034 | z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); |
||
1035 | tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); |
||
1036 | tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); |
||
1037 | |||
1038 | tmp0 = d4 << CONST_BITS; |
||
1039 | |||
1040 | tmp10 = tmp0 + tmp3; |
||
1041 | tmp13 = tmp0 - tmp3; |
||
1042 | tmp11 = tmp2 - tmp0; |
||
1043 | tmp12 = -(tmp0 + tmp2); |
||
1044 | } |
||
1045 | } else { |
||
1046 | if (d0) { |
||
1047 | /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ |
||
1048 | tmp2 = MULTIPLY(d6, - FIX2(1.306562965)); |
||
1049 | tmp3 = MULTIPLY(d6, FIX(0.541196100)); |
||
1050 | |||
1051 | tmp0 = (d0 + d4) << CONST_BITS; |
||
1052 | tmp1 = (d0 - d4) << CONST_BITS; |
||
1053 | |||
1054 | tmp10 = tmp0 + tmp3; |
||
1055 | tmp13 = tmp0 - tmp3; |
||
1056 | tmp11 = tmp1 + tmp2; |
||
1057 | tmp12 = tmp1 - tmp2; |
||
1058 | } else { |
||
1059 | /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ |
||
1060 | tmp2 = MULTIPLY(d6, -FIX2(1.306562965)); |
||
1061 | tmp3 = MULTIPLY(d6, FIX(0.541196100)); |
||
1062 | |||
1063 | tmp0 = d4 << CONST_BITS; |
||
1064 | |||
1065 | tmp10 = tmp0 + tmp3; |
||
1066 | tmp13 = tmp0 - tmp3; |
||
1067 | tmp11 = tmp2 - tmp0; |
||
1068 | tmp12 = -(tmp0 + tmp2); |
||
1069 | } |
||
1070 | } |
||
1071 | } else { |
||
1072 | if (d2) { |
||
1073 | if (d0) { |
||
1074 | /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ |
||
1075 | z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); |
||
1076 | tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); |
||
1077 | tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); |
||
1078 | |||
1079 | tmp0 = d0 << CONST_BITS; |
||
1080 | |||
1081 | tmp10 = tmp0 + tmp3; |
||
1082 | tmp13 = tmp0 - tmp3; |
||
1083 | tmp11 = tmp0 + tmp2; |
||
1084 | tmp12 = tmp0 - tmp2; |
||
1085 | } else { |
||
1086 | /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ |
||
1087 | z1 = MULTIPLY(d2 + d6, FIX(0.541196100)); |
||
1088 | tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065)); |
||
1089 | tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865)); |
||
1090 | |||
1091 | tmp10 = tmp3; |
||
1092 | tmp13 = -tmp3; |
||
1093 | tmp11 = tmp2; |
||
1094 | tmp12 = -tmp2; |
||
1095 | } |
||
1096 | } else { |
||
1097 | if (d0) { |
||
1098 | /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ |
||
1099 | tmp2 = MULTIPLY(d6, - FIX2(1.306562965)); |
||
1100 | tmp3 = MULTIPLY(d6, FIX(0.541196100)); |
||
1101 | |||
1102 | tmp0 = d0 << CONST_BITS; |
||
1103 | |||
1104 | tmp10 = tmp0 + tmp3; |
||
1105 | tmp13 = tmp0 - tmp3; |
||
1106 | tmp11 = tmp0 + tmp2; |
||
1107 | tmp12 = tmp0 - tmp2; |
||
1108 | } else { |
||
1109 | /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ |
||
1110 | tmp2 = MULTIPLY(d6, - FIX2(1.306562965)); |
||
1111 | tmp3 = MULTIPLY(d6, FIX(0.541196100)); |
||
1112 | |||
1113 | tmp10 = tmp3; |
||
1114 | tmp13 = -tmp3; |
||
1115 | tmp11 = tmp2; |
||
1116 | tmp12 = -tmp2; |
||
1117 | } |
||
1118 | } |
||
1119 | } |
||
1120 | } else { |
||
1121 | if (d4) { |
||
1122 | if (d2) { |
||
1123 | if (d0) { |
||
1124 | /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ |
||
1125 | tmp2 = MULTIPLY(d2, FIX(0.541196100)); |
||
1126 | tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5)); |
||
1127 | |||
1128 | tmp0 = (d0 + d4) << CONST_BITS; |
||
1129 | tmp1 = (d0 - d4) << CONST_BITS; |
||
1130 | |||
1131 | tmp10 = tmp0 + tmp3; |
||
1132 | tmp13 = tmp0 - tmp3; |
||
1133 | tmp11 = tmp1 + tmp2; |
||
1134 | tmp12 = tmp1 - tmp2; |
||
1135 | } else { |
||
1136 | /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ |
||
1137 | tmp2 = MULTIPLY(d2, FIX(0.541196100)); |
||
1138 | tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5)); |
||
1139 | |||
1140 | tmp0 = d4 << CONST_BITS; |
||
1141 | |||
1142 | tmp10 = tmp0 + tmp3; |
||
1143 | tmp13 = tmp0 - tmp3; |
||
1144 | tmp11 = tmp2 - tmp0; |
||
1145 | tmp12 = -(tmp0 + tmp2); |
||
1146 | } |
||
1147 | } else { |
||
1148 | if (d0) { |
||
1149 | /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ |
||
1150 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
||
1151 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
||
1152 | } else { |
||
1153 | /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ |
||
1154 | tmp10 = tmp13 = d4 << CONST_BITS; |
||
1155 | tmp11 = tmp12 = -tmp10; |
||
1156 | } |
||
1157 | } |
||
1158 | } else { |
||
1159 | if (d2) { |
||
1160 | if (d0) { |
||
1161 | /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ |
||
1162 | tmp2 = MULTIPLY(d2, FIX(0.541196100)); |
||
1163 | tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5)); |
||
1164 | |||
1165 | tmp0 = d0 << CONST_BITS; |
||
1166 | |||
1167 | tmp10 = tmp0 + tmp3; |
||
1168 | tmp13 = tmp0 - tmp3; |
||
1169 | tmp11 = tmp0 + tmp2; |
||
1170 | tmp12 = tmp0 - tmp2; |
||
1171 | } else { |
||
1172 | /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ |
||
1173 | tmp2 = MULTIPLY(d2, FIX(0.541196100)); |
||
1174 | tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5)); |
||
1175 | |||
1176 | tmp10 = tmp3; |
||
1177 | tmp13 = -tmp3; |
||
1178 | tmp11 = tmp2; |
||
1179 | tmp12 = -tmp2; |
||
1180 | } |
||
1181 | } else { |
||
1182 | if (d0) { |
||
1183 | /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ |
||
1184 | tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; |
||
1185 | } else { |
||
1186 | /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ |
||
1187 | tmp10 = tmp13 = tmp11 = tmp12 = 0; |
||
1188 | } |
||
1189 | } |
||
1190 | } |
||
1191 | } |
||
1192 | |||
1193 | /* Odd part per figure 8; the matrix is unitary and hence its |
||
1194 | * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
||
1195 | */ |
||
1196 | if (d7) { |
||
1197 | if (d5) { |
||
1198 | if (d3) { |
||
1199 | if (d1) { |
||
1200 | /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ |
||
1201 | z1 = d7 + d1; |
||
1202 | z2 = d5 + d3; |
||
1203 | z3 = d7 + d3; |
||
1204 | z4 = d5 + d1; |
||
1205 | z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); |
||
1206 | |||
1207 | tmp0 = MULTIPLY(d7, FIX(0.298631336)); |
||
1208 | tmp1 = MULTIPLY(d5, FIX(2.053119869)); |
||
1209 | tmp2 = MULTIPLY(d3, FIX(3.072711026)); |
||
1210 | tmp3 = MULTIPLY(d1, FIX(1.501321110)); |
||
1211 | z1 = MULTIPLY(z1, - FIX(0.899976223)); |
||
1212 | z2 = MULTIPLY(z2, - FIX(2.562915447)); |
||
1213 | z3 = MULTIPLY(z3, - FIX(1.961570560)); |
||
1214 | z4 = MULTIPLY(z4, - FIX(0.390180644)); |
||
1215 | |||
1216 | z3 += z5; |
||
1217 | z4 += z5; |
||
1218 | |||
1219 | tmp0 += z1 + z3; |
||
1220 | tmp1 += z2 + z4; |
||
1221 | tmp2 += z2 + z3; |
||
1222 | tmp3 += z1 + z4; |
||
1223 | } else { |
||
1224 | /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ |
||
1225 | z2 = d5 + d3; |
||
1226 | z3 = d7 + d3; |
||
1227 | z5 = MULTIPLY(z3 + d5, FIX(1.175875602)); |
||
1228 | |||
1229 | tmp0 = MULTIPLY(d7, FIX(0.298631336)); |
||
1230 | tmp1 = MULTIPLY(d5, FIX(2.053119869)); |
||
1231 | tmp2 = MULTIPLY(d3, FIX(3.072711026)); |
||
1232 | z1 = MULTIPLY(d7, - FIX(0.899976223)); |
||
1233 | z2 = MULTIPLY(z2, - FIX(2.562915447)); |
||
1234 | z3 = MULTIPLY(z3, - FIX(1.961570560)); |
||
1235 | z4 = MULTIPLY(d5, - FIX(0.390180644)); |
||
1236 | |||
1237 | z3 += z5; |
||
1238 | z4 += z5; |
||
1239 | |||
1240 | tmp0 += z1 + z3; |
||
1241 | tmp1 += z2 + z4; |
||
1242 | tmp2 += z2 + z3; |
||
1243 | tmp3 = z1 + z4; |
||
1244 | } |
||
1245 | } else { |
||
1246 | if (d1) { |
||
1247 | /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ |
||
1248 | z1 = d7 + d1; |
||
1249 | z4 = d5 + d1; |
||
1250 | z5 = MULTIPLY(d7 + z4, FIX(1.175875602)); |
||
1251 | |||
1252 | tmp0 = MULTIPLY(d7, FIX(0.298631336)); |
||
1253 | tmp1 = MULTIPLY(d5, FIX(2.053119869)); |
||
1254 | tmp3 = MULTIPLY(d1, FIX(1.501321110)); |
||
1255 | z1 = MULTIPLY(z1, - FIX(0.899976223)); |
||
1256 | z2 = MULTIPLY(d5, - FIX(2.562915447)); |
||
1257 | z3 = MULTIPLY(d7, - FIX(1.961570560)); |
||
1258 | z4 = MULTIPLY(z4, - FIX(0.390180644)); |
||
1259 | |||
1260 | z3 += z5; |
||
1261 | z4 += z5; |
||
1262 | |||
1263 | tmp0 += z1 + z3; |
||
1264 | tmp1 += z2 + z4; |
||
1265 | tmp2 = z2 + z3; |
||
1266 | tmp3 += z1 + z4; |
||
1267 | } else { |
||
1268 | /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ |
||
1269 | z5 = MULTIPLY(d5 + d7, FIX(1.175875602)); |
||
1270 | |||
1271 | tmp0 = MULTIPLY(d7, - FIX2(0.601344887)); |
||
1272 | tmp1 = MULTIPLY(d5, - FIX2(0.509795578)); |
||
1273 | z1 = MULTIPLY(d7, - FIX(0.899976223)); |
||
1274 | z3 = MULTIPLY(d7, - FIX(1.961570560)); |
||
1275 | z2 = MULTIPLY(d5, - FIX(2.562915447)); |
||
1276 | z4 = MULTIPLY(d5, - FIX(0.390180644)); |
||
1277 | |||
1278 | z3 += z5; |
||
1279 | z4 += z5; |
||
1280 | |||
1281 | tmp0 += z3; |
||
1282 | tmp1 += z4; |
||
1283 | tmp2 = z2 + z3; |
||
1284 | tmp3 = z1 + z4; |
||
1285 | } |
||
1286 | } |
||
1287 | } else { |
||
1288 | if (d3) { |
||
1289 | if (d1) { |
||
1290 | /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ |
||
1291 | z1 = d7 + d1; |
||
1292 | z3 = d7 + d3; |
||
1293 | z5 = MULTIPLY(z3 + d1, FIX(1.175875602)); |
||
1294 | |||
1295 | tmp0 = MULTIPLY(d7, FIX(0.298631336)); |
||
1296 | tmp2 = MULTIPLY(d3, FIX(3.072711026)); |
||
1297 | tmp3 = MULTIPLY(d1, FIX(1.501321110)); |
||
1298 | z1 = MULTIPLY(z1, - FIX(0.899976223)); |
||
1299 | z2 = MULTIPLY(d3, - FIX(2.562915447)); |
||
1300 | z3 = MULTIPLY(z3, - FIX(1.961570560)); |
||
1301 | z4 = MULTIPLY(d1, - FIX(0.390180644)); |
||
1302 | |||
1303 | z3 += z5; |
||
1304 | z4 += z5; |
||
1305 | |||
1306 | tmp0 += z1 + z3; |
||
1307 | tmp1 = z2 + z4; |
||
1308 | tmp2 += z2 + z3; |
||
1309 | tmp3 += z1 + z4; |
||
1310 | } else { |
||
1311 | /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ |
||
1312 | z3 = d7 + d3; |
||
1313 | z5 = MULTIPLY(z3, FIX(1.175875602)); |
||
1314 | |||
1315 | tmp0 = MULTIPLY(d7, - FIX2(0.601344887)); |
||
1316 | z1 = MULTIPLY(d7, - FIX(0.899976223)); |
||
1317 | tmp2 = MULTIPLY(d3, FIX(0.509795579)); |
||
1318 | z2 = MULTIPLY(d3, - FIX(2.562915447)); |
||
1319 | z3 = MULTIPLY(z3, - FIX2(0.785694958)); |
||
1320 | |||
1321 | tmp0 += z3; |
||
1322 | tmp1 = z2 + z5; |
||
1323 | tmp2 += z3; |
||
1324 | tmp3 = z1 + z5; |
||
1325 | } |
||
1326 | } else { |
||
1327 | if (d1) { |
||
1328 | /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ |
||
1329 | z1 = d7 + d1; |
||
1330 | z5 = MULTIPLY(z1, FIX(1.175875602)); |
||
1331 | |||
1332 | tmp0 = MULTIPLY(d7, - FIX2(1.662939224)); |
||
1333 | tmp3 = MULTIPLY(d1, FIX2(1.111140466)); |
||
1334 | z1 = MULTIPLY(z1, FIX2(0.275899379)); |
||
1335 | z3 = MULTIPLY(d7, - FIX(1.961570560)); |
||
1336 | z4 = MULTIPLY(d1, - FIX(0.390180644)); |
||
1337 | |||
1338 | tmp0 += z1; |
||
1339 | tmp1 = z4 + z5; |
||
1340 | tmp2 = z3 + z5; |
||
1341 | tmp3 += z1; |
||
1342 | } else { |
||
1343 | /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ |
||
1344 | tmp0 = MULTIPLY(d7, - FIX2(1.387039845)); |
||
1345 | tmp1 = MULTIPLY(d7, FIX(1.175875602)); |
||
1346 | tmp2 = MULTIPLY(d7, - FIX2(0.785694958)); |
||
1347 | tmp3 = MULTIPLY(d7, FIX2(0.275899379)); |
||
1348 | } |
||
1349 | } |
||
1350 | } |
||
1351 | } else { |
||
1352 | if (d5) { |
||
1353 | if (d3) { |
||
1354 | if (d1) { |
||
1355 | /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ |
||
1356 | z2 = d5 + d3; |
||
1357 | z4 = d5 + d1; |
||
1358 | z5 = MULTIPLY(d3 + z4, FIX(1.175875602)); |
||
1359 | |||
1360 | tmp1 = MULTIPLY(d5, FIX(2.053119869)); |
||
1361 | tmp2 = MULTIPLY(d3, FIX(3.072711026)); |
||
1362 | tmp3 = MULTIPLY(d1, FIX(1.501321110)); |
||
1363 | z1 = MULTIPLY(d1, - FIX(0.899976223)); |
||
1364 | z2 = MULTIPLY(z2, - FIX(2.562915447)); |
||
1365 | z3 = MULTIPLY(d3, - FIX(1.961570560)); |
||
1366 | z4 = MULTIPLY(z4, - FIX(0.390180644)); |
||
1367 | |||
1368 | z3 += z5; |
||
1369 | z4 += z5; |
||
1370 | |||
1371 | tmp0 = z1 + z3; |
||
1372 | tmp1 += z2 + z4; |
||
1373 | tmp2 += z2 + z3; |
||
1374 | tmp3 += z1 + z4; |
||
1375 | } else { |
||
1376 | /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ |
||
1377 | z2 = d5 + d3; |
||
1378 | z5 = MULTIPLY(z2, FIX(1.175875602)); |
||
1379 | |||
1380 | tmp1 = MULTIPLY(d5, FIX2(1.662939225)); |
||
1381 | tmp2 = MULTIPLY(d3, FIX2(1.111140466)); |
||
1382 | z2 = MULTIPLY(z2, - FIX2(1.387039845)); |
||
1383 | z3 = MULTIPLY(d3, - FIX(1.961570560)); |
||
1384 | z4 = MULTIPLY(d5, - FIX(0.390180644)); |
||
1385 | |||
1386 | tmp0 = z3 + z5; |
||
1387 | tmp1 += z2; |
||
1388 | tmp2 += z2; |
||
1389 | tmp3 = z4 + z5; |
||
1390 | } |
||
1391 | } else { |
||
1392 | if (d1) { |
||
1393 | /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ |
||
1394 | z4 = d5 + d1; |
||
1395 | z5 = MULTIPLY(z4, FIX(1.175875602)); |
||
1396 | |||
1397 | tmp1 = MULTIPLY(d5, - FIX2(0.509795578)); |
||
1398 | tmp3 = MULTIPLY(d1, FIX2(0.601344887)); |
||
1399 | z1 = MULTIPLY(d1, - FIX(0.899976223)); |
||
1400 | z2 = MULTIPLY(d5, - FIX(2.562915447)); |
||
1401 | z4 = MULTIPLY(z4, FIX2(0.785694958)); |
||
1402 | |||
1403 | tmp0 = z1 + z5; |
||
1404 | tmp1 += z4; |
||
1405 | tmp2 = z2 + z5; |
||
1406 | tmp3 += z4; |
||
1407 | } else { |
||
1408 | /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ |
||
1409 | tmp0 = MULTIPLY(d5, FIX(1.175875602)); |
||
1410 | tmp1 = MULTIPLY(d5, FIX2(0.275899380)); |
||
1411 | tmp2 = MULTIPLY(d5, - FIX2(1.387039845)); |
||
1412 | tmp3 = MULTIPLY(d5, FIX2(0.785694958)); |
||
1413 | } |
||
1414 | } |
||
1415 | } else { |
||
1416 | if (d3) { |
||
1417 | if (d1) { |
||
1418 | /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ |
||
1419 | z5 = d3 + d1; |
||
1420 | |||
1421 | tmp2 = MULTIPLY(d3, - FIX(1.451774981)); |
||
1422 | tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1)); |
||
1423 | z1 = MULTIPLY(d1, FIX(1.061594337)); |
||
1424 | z2 = MULTIPLY(d3, - FIX(2.172734803)); |
||
1425 | z4 = MULTIPLY(z5, FIX(0.785694958)); |
||
1426 | z5 = MULTIPLY(z5, FIX(1.175875602)); |
||
1427 | |||
1428 | tmp0 = z1 - z4; |
||
1429 | tmp1 = z2 + z4; |
||
1430 | tmp2 += z5; |
||
1431 | tmp3 += z5; |
||
1432 | } else { |
||
1433 | /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ |
||
1434 | tmp0 = MULTIPLY(d3, - FIX2(0.785694958)); |
||
1435 | tmp1 = MULTIPLY(d3, - FIX2(1.387039845)); |
||
1436 | tmp2 = MULTIPLY(d3, - FIX2(0.275899379)); |
||
1437 | tmp3 = MULTIPLY(d3, FIX(1.175875602)); |
||
1438 | } |
||
1439 | } else { |
||
1440 | if (d1) { |
||
1441 | /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ |
||
1442 | tmp0 = MULTIPLY(d1, FIX2(0.275899379)); |
||
1443 | tmp1 = MULTIPLY(d1, FIX2(0.785694958)); |
||
1444 | tmp2 = MULTIPLY(d1, FIX(1.175875602)); |
||
1445 | tmp3 = MULTIPLY(d1, FIX2(1.387039845)); |
||
1446 | } else { |
||
1447 | /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ |
||
1448 | tmp0 = tmp1 = tmp2 = tmp3 = 0; |
||
1449 | } |
||
1450 | } |
||
1451 | } |
||
1452 | } |
||
1453 | |||
1454 | /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
||
1455 | |||
1456 | dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, |
||
1457 | CONST_BITS+PASS1_BITS+3); |
||
1458 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, |
||
1459 | CONST_BITS+PASS1_BITS+3); |
||
1460 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, |
||
1461 | CONST_BITS+PASS1_BITS+3); |
||
1462 | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, |
||
1463 | CONST_BITS+PASS1_BITS+3); |
||
1464 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, |
||
1465 | CONST_BITS+PASS1_BITS+3); |
||
1466 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, |
||
1467 | CONST_BITS+PASS1_BITS+3); |
||
1468 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, |
||
1469 | CONST_BITS+PASS1_BITS+3); |
||
1470 | dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, |
||
1471 | CONST_BITS+PASS1_BITS+3); |
||
1472 | |||
1473 | dataptr++; /* advance pointer to next column */ |
||
1474 | } |
||
1475 | } |
||
1476 | |||
1477 | #else |
||
1478 | |||
1479 | |||
1480 | |||
1481 | /* |
||
1482 | *-------------------------------------------------------------- |
||
1483 | * |
||
1484 | * j_rev_dct -- |
||
1485 | * |
||
1486 | * The original inverse DCT function. |
||
1487 | * |
||
1488 | * Results: |
||
1489 | * None. |
||
1490 | * |
||
1491 | * Side effects: |
||
1492 | * None. |
||
1493 | * |
||
1494 | *-------------------------------------------------------------- |
||
1495 | */ |
||
1496 | void |
||
1497 | j_rev_dct (data) |
||
1498 | DCTBLOCK data; |
||
1499 | { |
||
1500 | INT32 tmp0, tmp1, tmp2, tmp3; |
||
1501 | INT32 tmp10, tmp11, tmp12, tmp13; |
||
1502 | INT32 z1, z2, z3, z4, z5; |
||
1503 | register DCTELEM *dataptr; |
||
1504 | int rowctr; |
||
1505 | SHIFT_TEMPS |
||
1506 | |||
1507 | /* Pass 1: process rows. */ |
||
1508 | /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ |
||
1509 | /* furthermore, we scale the results by 2**PASS1_BITS. */ |
||
1510 | |||
1511 | dataptr = data; |
||
1512 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
||
1513 | /* Due to quantization, we will usually find that many of the input |
||
1514 | * coefficients are zero, especially the AC terms. We can exploit this |
||
1515 | * by short-circuiting the IDCT calculation for any row in which all |
||
1516 | * the AC terms are zero. In that case each output is equal to the |
||
1517 | * DC coefficient (with scale factor as needed). |
||
1518 | * With typical images and quantization tables, half or more of the |
||
1519 | * row DCT calculations can be simplified this way. |
||
1520 | */ |
||
1521 | |||
1522 | if ((dataptr[1] | dataptr[2] | dataptr[3] | dataptr[4] | |
||
1523 | dataptr[5] | dataptr[6] | dataptr[7]) == 0) { |
||
1524 | /* AC terms all zero */ |
||
1525 | DCTELEM dcval = (DCTELEM) (dataptr[0] << PASS1_BITS); |
||
1526 | |||
1527 | dataptr[0] = dcval; |
||
1528 | dataptr[1] = dcval; |
||
1529 | dataptr[2] = dcval; |
||
1530 | dataptr[3] = dcval; |
||
1531 | dataptr[4] = dcval; |
||
1532 | dataptr[5] = dcval; |
||
1533 | dataptr[6] = dcval; |
||
1534 | dataptr[7] = dcval; |
||
1535 | |||
1536 | dataptr += DCTSIZE; /* advance pointer to next row */ |
||
1537 | continue; |
||
1538 | } |
||
1539 | |||
1540 | /* Even part: reverse the even part of the forward DCT. */ |
||
1541 | /* The rotator is sqrt(2)*c(-6). */ |
||
1542 | |||
1543 | z2 = (INT32) dataptr[2]; |
||
1544 | z3 = (INT32) dataptr[6]; |
||
1545 | |||
1546 | z1 = MULTIPLY(z2 + z3, FIX(0.541196100)); |
||
1547 | tmp2 = z1 + MULTIPLY(z3, - FIX(1.847759065)); |
||
1548 | tmp3 = z1 + MULTIPLY(z2, FIX(0.765366865)); |
||
1549 | |||
1550 | tmp0 = ((INT32) dataptr[0] + (INT32) dataptr[4]) << CONST_BITS; |
||
1551 | tmp1 = ((INT32) dataptr[0] - (INT32) dataptr[4]) << CONST_BITS; |
||
1552 | |||
1553 | tmp10 = tmp0 + tmp3; |
||
1554 | tmp13 = tmp0 - tmp3; |
||
1555 | tmp11 = tmp1 + tmp2; |
||
1556 | tmp12 = tmp1 - tmp2; |
||
1557 | |||
1558 | /* Odd part per figure 8; the matrix is unitary and hence its |
||
1559 | * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
||
1560 | */ |
||
1561 | |||
1562 | tmp0 = (INT32) dataptr[7]; |
||
1563 | tmp1 = (INT32) dataptr[5]; |
||
1564 | tmp2 = (INT32) dataptr[3]; |
||
1565 | tmp3 = (INT32) dataptr[1]; |
||
1566 | |||
1567 | z1 = tmp0 + tmp3; |
||
1568 | z2 = tmp1 + tmp2; |
||
1569 | z3 = tmp0 + tmp2; |
||
1570 | z4 = tmp1 + tmp3; |
||
1571 | z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */ |
||
1572 | |||
1573 | tmp0 = MULTIPLY(tmp0, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */ |
||
1574 | tmp1 = MULTIPLY(tmp1, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */ |
||
1575 | tmp2 = MULTIPLY(tmp2, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */ |
||
1576 | tmp3 = MULTIPLY(tmp3, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */ |
||
1577 | z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */ |
||
1578 | z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */ |
||
1579 | z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */ |
||
1580 | z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */ |
||
1581 | |||
1582 | z3 += z5; |
||
1583 | z4 += z5; |
||
1584 | |||
1585 | tmp0 += z1 + z3; |
||
1586 | tmp1 += z2 + z4; |
||
1587 | tmp2 += z2 + z3; |
||
1588 | tmp3 += z1 + z4; |
||
1589 | |||
1590 | /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
||
1591 | |||
1592 | dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); |
||
1593 | dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); |
||
1594 | dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); |
||
1595 | dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); |
||
1596 | dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); |
||
1597 | dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); |
||
1598 | dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); |
||
1599 | dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); |
||
1600 | |||
1601 | dataptr += DCTSIZE; /* advance pointer to next row */ |
||
1602 | } |
||
1603 | |||
1604 | /* Pass 2: process columns. */ |
||
1605 | /* Note that we must descale the results by a factor of 8 == 2**3, */ |
||
1606 | /* and also undo the PASS1_BITS scaling. */ |
||
1607 | |||
1608 | dataptr = data; |
||
1609 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
||
1610 | /* Columns of zeroes can be exploited in the same way as we did with rows. |
||
1611 | * However, the row calculation has created many nonzero AC terms, so the |
||
1612 | * simplification applies less often (typically 5% to 10% of the time). |
||
1613 | * On machines with very fast multiplication, it's possible that the |
||
1614 | * test takes more time than it's worth. In that case this section |
||
1615 | * may be commented out. |
||
1616 | */ |
||
1617 | |||
1618 | #ifndef NO_ZERO_COLUMN_TEST |
||
1619 | if ((dataptr[DCTSIZE*1] | dataptr[DCTSIZE*2] | dataptr[DCTSIZE*3] | |
||
1620 | dataptr[DCTSIZE*4] | dataptr[DCTSIZE*5] | dataptr[DCTSIZE*6] | |
||
1621 | dataptr[DCTSIZE*7]) == 0) { |
||
1622 | /* AC terms all zero */ |
||
1623 | DCTELEM dcval = (DCTELEM) DESCALE((INT32) dataptr[0], PASS1_BITS+3); |
||
1624 | |||
1625 | dataptr[DCTSIZE*0] = dcval; |
||
1626 | dataptr[DCTSIZE*1] = dcval; |
||
1627 | dataptr[DCTSIZE*2] = dcval; |
||
1628 | dataptr[DCTSIZE*3] = dcval; |
||
1629 | dataptr[DCTSIZE*4] = dcval; |
||
1630 | dataptr[DCTSIZE*5] = dcval; |
||
1631 | dataptr[DCTSIZE*6] = dcval; |
||
1632 | dataptr[DCTSIZE*7] = dcval; |
||
1633 | |||
1634 | dataptr++; /* advance pointer to next column */ |
||
1635 | continue; |
||
1636 | } |
||
1637 | #endif |
||
1638 | |||
1639 | /* Even part: reverse the even part of the forward DCT. */ |
||
1640 | /* The rotator is sqrt(2)*c(-6). */ |
||
1641 | |||
1642 | z2 = (INT32) dataptr[DCTSIZE*2]; |
||
1643 | z3 = (INT32) dataptr[DCTSIZE*6]; |
||
1644 | |||
1645 | z1 = MULTIPLY(z2 + z3, FIX(0.541196100)); |
||
1646 | tmp2 = z1 + MULTIPLY(z3, - FIX(1.847759065)); |
||
1647 | tmp3 = z1 + MULTIPLY(z2, FIX(0.765366865)); |
||
1648 | |||
1649 | tmp0 = ((INT32) dataptr[DCTSIZE*0] + (INT32) dataptr[DCTSIZE*4]) << CONST_BITS; |
||
1650 | tmp1 = ((INT32) dataptr[DCTSIZE*0] - (INT32) dataptr[DCTSIZE*4]) << CONST_BITS; |
||
1651 | |||
1652 | tmp10 = tmp0 + tmp3; |
||
1653 | tmp13 = tmp0 - tmp3; |
||
1654 | tmp11 = tmp1 + tmp2; |
||
1655 | tmp12 = tmp1 - tmp2; |
||
1656 | |||
1657 | /* Odd part per figure 8; the matrix is unitary and hence its |
||
1658 | * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
||
1659 | */ |
||
1660 | |||
1661 | tmp0 = (INT32) dataptr[DCTSIZE*7]; |
||
1662 | tmp1 = (INT32) dataptr[DCTSIZE*5]; |
||
1663 | tmp2 = (INT32) dataptr[DCTSIZE*3]; |
||
1664 | tmp3 = (INT32) dataptr[DCTSIZE*1]; |
||
1665 | |||
1666 | z1 = tmp0 + tmp3; |
||
1667 | z2 = tmp1 + tmp2; |
||
1668 | z3 = tmp0 + tmp2; |
||
1669 | z4 = tmp1 + tmp3; |
||
1670 | z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */ |
||
1671 | |||
1672 | tmp0 = MULTIPLY(tmp0, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */ |
||
1673 | tmp1 = MULTIPLY(tmp1, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */ |
||
1674 | tmp2 = MULTIPLY(tmp2, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */ |
||
1675 | tmp3 = MULTIPLY(tmp3, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */ |
||
1676 | z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */ |
||
1677 | z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */ |
||
1678 | z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */ |
||
1679 | z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */ |
||
1680 | |||
1681 | z3 += z5; |
||
1682 | z4 += z5; |
||
1683 | |||
1684 | tmp0 += z1 + z3; |
||
1685 | tmp1 += z2 + z4; |
||
1686 | tmp2 += z2 + z3; |
||
1687 | tmp3 += z1 + z4; |
||
1688 | |||
1689 | /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
||
1690 | |||
1691 | dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, |
||
1692 | CONST_BITS+PASS1_BITS+3); |
||
1693 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, |
||
1694 | CONST_BITS+PASS1_BITS+3); |
||
1695 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, |
||
1696 | CONST_BITS+PASS1_BITS+3); |
||
1697 | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, |
||
1698 | CONST_BITS+PASS1_BITS+3); |
||
1699 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, |
||
1700 | CONST_BITS+PASS1_BITS+3); |
||
1701 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, |
||
1702 | CONST_BITS+PASS1_BITS+3); |
||
1703 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, |
||
1704 | CONST_BITS+PASS1_BITS+3); |
||
1705 | dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, |
||
1706 | CONST_BITS+PASS1_BITS+3); |
||
1707 | |||
1708 | dataptr++; /* advance pointer to next column */ |
||
1709 | } |
||
1710 | } |
||
1711 | |||
1712 | |||
1713 | #endif /* ORIG_DCT */ |
||
1714 | #endif /* FIVE_DCT */ |
||
1715 |