1 | /*
|
---|
2 | * Simple IDCT
|
---|
3 | *
|
---|
4 | * Copyright (c) 2001 Michael Niedermayer <[email protected]>
|
---|
5 | *
|
---|
6 | * This library is free software; you can redistribute it and/or
|
---|
7 | * modify it under the terms of the GNU Lesser General Public
|
---|
8 | * License as published by the Free Software Foundation; either
|
---|
9 | * version 2 of the License, or (at your option) any later version.
|
---|
10 | *
|
---|
11 | * This library is distributed in the hope that it will be useful,
|
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
14 | * Lesser General Public License for more details.
|
---|
15 | *
|
---|
16 | * You should have received a copy of the GNU Lesser General Public
|
---|
17 | * License along with this library; if not, write to the Free Software
|
---|
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
---|
19 | */
|
---|
20 |
|
---|
21 | /**
|
---|
22 | * @file simple_idct.c
|
---|
23 | * simpleidct in C.
|
---|
24 | */
|
---|
25 |
|
---|
26 | /*
|
---|
27 | based upon some outcommented c code from mpeg2dec (idct_mmx.c
|
---|
28 | written by Aaron Holtzman <[email protected]>)
|
---|
29 | */
|
---|
30 | #include "avcodec.h"
|
---|
31 | #include "dsputil.h"
|
---|
32 | #include "simple_idct.h"
|
---|
33 |
|
---|
34 | #if 0
|
---|
35 | #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
|
---|
36 | #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
|
---|
37 | #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
|
---|
38 | #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
|
---|
39 | #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
|
---|
40 | #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
|
---|
41 | #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
|
---|
42 | #define ROW_SHIFT 8
|
---|
43 | #define COL_SHIFT 17
|
---|
44 | #else
|
---|
45 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
---|
46 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
---|
47 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
---|
48 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
---|
49 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
---|
50 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
---|
51 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
|
---|
52 | #define ROW_SHIFT 11
|
---|
53 | #define COL_SHIFT 20 // 6
|
---|
54 | #endif
|
---|
55 |
|
---|
56 | #if defined(ARCH_POWERPC_405)
|
---|
57 |
|
---|
58 | /* signed 16x16 -> 32 multiply add accumulate */
|
---|
59 | #define MAC16(rt, ra, rb) \
|
---|
60 | asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
|
---|
61 |
|
---|
62 | /* signed 16x16 -> 32 multiply */
|
---|
63 | #define MUL16(rt, ra, rb) \
|
---|
64 | asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
|
---|
65 |
|
---|
66 | #else
|
---|
67 |
|
---|
68 | /* signed 16x16 -> 32 multiply add accumulate */
|
---|
69 | #define MAC16(rt, ra, rb) rt += (ra) * (rb)
|
---|
70 |
|
---|
71 | /* signed 16x16 -> 32 multiply */
|
---|
72 | #define MUL16(rt, ra, rb) rt = (ra) * (rb)
|
---|
73 |
|
---|
74 | #endif
|
---|
75 |
|
---|
76 | static inline void idctRowCondDC (DCTELEM * row)
|
---|
77 | {
|
---|
78 | int a0, a1, a2, a3, b0, b1, b2, b3;
|
---|
79 | #ifdef FAST_64BIT
|
---|
80 | uint64_t temp;
|
---|
81 | #else
|
---|
82 | uint32_t temp;
|
---|
83 | #endif
|
---|
84 |
|
---|
85 | #ifdef FAST_64BIT
|
---|
86 | #ifdef WORDS_BIGENDIAN
|
---|
87 | #define ROW0_MASK 0xffff000000000000LL
|
---|
88 | #else
|
---|
89 | #define ROW0_MASK 0xffffLL
|
---|
90 | #endif
|
---|
91 | if(sizeof(DCTELEM)==2){
|
---|
92 | if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
|
---|
93 | ((uint64_t *)row)[1]) == 0) {
|
---|
94 | temp = (row[0] << 3) & 0xffff;
|
---|
95 | temp += temp << 16;
|
---|
96 | temp += temp << 32;
|
---|
97 | ((uint64_t *)row)[0] = temp;
|
---|
98 | ((uint64_t *)row)[1] = temp;
|
---|
99 | return;
|
---|
100 | }
|
---|
101 | }else{
|
---|
102 | if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
|
---|
103 | row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
|
---|
104 | return;
|
---|
105 | }
|
---|
106 | }
|
---|
107 | #else
|
---|
108 | if(sizeof(DCTELEM)==2){
|
---|
109 | if (!(((uint32_t*)row)[1] |
|
---|
110 | ((uint32_t*)row)[2] |
|
---|
111 | ((uint32_t*)row)[3] |
|
---|
112 | row[1])) {
|
---|
113 | temp = (row[0] << 3) & 0xffff;
|
---|
114 | temp += temp << 16;
|
---|
115 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
|
---|
116 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
|
---|
117 | return;
|
---|
118 | }
|
---|
119 | }else{
|
---|
120 | if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
|
---|
121 | row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
|
---|
122 | return;
|
---|
123 | }
|
---|
124 | }
|
---|
125 | #endif
|
---|
126 |
|
---|
127 | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
|
---|
128 | a1 = a0;
|
---|
129 | a2 = a0;
|
---|
130 | a3 = a0;
|
---|
131 |
|
---|
132 | /* no need to optimize : gcc does it */
|
---|
133 | a0 += W2 * row[2];
|
---|
134 | a1 += W6 * row[2];
|
---|
135 | a2 -= W6 * row[2];
|
---|
136 | a3 -= W2 * row[2];
|
---|
137 |
|
---|
138 | MUL16(b0, W1, row[1]);
|
---|
139 | MAC16(b0, W3, row[3]);
|
---|
140 | MUL16(b1, W3, row[1]);
|
---|
141 | MAC16(b1, -W7, row[3]);
|
---|
142 | MUL16(b2, W5, row[1]);
|
---|
143 | MAC16(b2, -W1, row[3]);
|
---|
144 | MUL16(b3, W7, row[1]);
|
---|
145 | MAC16(b3, -W5, row[3]);
|
---|
146 |
|
---|
147 | #ifdef FAST_64BIT
|
---|
148 | temp = ((uint64_t*)row)[1];
|
---|
149 | #else
|
---|
150 | temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
|
---|
151 | #endif
|
---|
152 | if (temp != 0) {
|
---|
153 | a0 += W4*row[4] + W6*row[6];
|
---|
154 | a1 += - W4*row[4] - W2*row[6];
|
---|
155 | a2 += - W4*row[4] + W2*row[6];
|
---|
156 | a3 += W4*row[4] - W6*row[6];
|
---|
157 |
|
---|
158 | MAC16(b0, W5, row[5]);
|
---|
159 | MAC16(b0, W7, row[7]);
|
---|
160 |
|
---|
161 | MAC16(b1, -W1, row[5]);
|
---|
162 | MAC16(b1, -W5, row[7]);
|
---|
163 |
|
---|
164 | MAC16(b2, W7, row[5]);
|
---|
165 | MAC16(b2, W3, row[7]);
|
---|
166 |
|
---|
167 | MAC16(b3, W3, row[5]);
|
---|
168 | MAC16(b3, -W1, row[7]);
|
---|
169 | }
|
---|
170 |
|
---|
171 | row[0] = (a0 + b0) >> ROW_SHIFT;
|
---|
172 | row[7] = (a0 - b0) >> ROW_SHIFT;
|
---|
173 | row[1] = (a1 + b1) >> ROW_SHIFT;
|
---|
174 | row[6] = (a1 - b1) >> ROW_SHIFT;
|
---|
175 | row[2] = (a2 + b2) >> ROW_SHIFT;
|
---|
176 | row[5] = (a2 - b2) >> ROW_SHIFT;
|
---|
177 | row[3] = (a3 + b3) >> ROW_SHIFT;
|
---|
178 | row[4] = (a3 - b3) >> ROW_SHIFT;
|
---|
179 | }
|
---|
180 |
|
---|
181 | static inline void idctSparseColPut (uint8_t *dest, int line_size,
|
---|
182 | DCTELEM * col)
|
---|
183 | {
|
---|
184 | int a0, a1, a2, a3, b0, b1, b2, b3;
|
---|
185 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
186 |
|
---|
187 | /* XXX: I did that only to give same values as previous code */
|
---|
188 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
|
---|
189 | a1 = a0;
|
---|
190 | a2 = a0;
|
---|
191 | a3 = a0;
|
---|
192 |
|
---|
193 | a0 += + W2*col[8*2];
|
---|
194 | a1 += + W6*col[8*2];
|
---|
195 | a2 += - W6*col[8*2];
|
---|
196 | a3 += - W2*col[8*2];
|
---|
197 |
|
---|
198 | MUL16(b0, W1, col[8*1]);
|
---|
199 | MUL16(b1, W3, col[8*1]);
|
---|
200 | MUL16(b2, W5, col[8*1]);
|
---|
201 | MUL16(b3, W7, col[8*1]);
|
---|
202 |
|
---|
203 | MAC16(b0, + W3, col[8*3]);
|
---|
204 | MAC16(b1, - W7, col[8*3]);
|
---|
205 | MAC16(b2, - W1, col[8*3]);
|
---|
206 | MAC16(b3, - W5, col[8*3]);
|
---|
207 |
|
---|
208 | if(col[8*4]){
|
---|
209 | a0 += + W4*col[8*4];
|
---|
210 | a1 += - W4*col[8*4];
|
---|
211 | a2 += - W4*col[8*4];
|
---|
212 | a3 += + W4*col[8*4];
|
---|
213 | }
|
---|
214 |
|
---|
215 | if (col[8*5]) {
|
---|
216 | MAC16(b0, + W5, col[8*5]);
|
---|
217 | MAC16(b1, - W1, col[8*5]);
|
---|
218 | MAC16(b2, + W7, col[8*5]);
|
---|
219 | MAC16(b3, + W3, col[8*5]);
|
---|
220 | }
|
---|
221 |
|
---|
222 | if(col[8*6]){
|
---|
223 | a0 += + W6*col[8*6];
|
---|
224 | a1 += - W2*col[8*6];
|
---|
225 | a2 += + W2*col[8*6];
|
---|
226 | a3 += - W6*col[8*6];
|
---|
227 | }
|
---|
228 |
|
---|
229 | if (col[8*7]) {
|
---|
230 | MAC16(b0, + W7, col[8*7]);
|
---|
231 | MAC16(b1, - W5, col[8*7]);
|
---|
232 | MAC16(b2, + W3, col[8*7]);
|
---|
233 | MAC16(b3, - W1, col[8*7]);
|
---|
234 | }
|
---|
235 |
|
---|
236 | dest[0] = cm[(a0 + b0) >> COL_SHIFT];
|
---|
237 | dest += line_size;
|
---|
238 | dest[0] = cm[(a1 + b1) >> COL_SHIFT];
|
---|
239 | dest += line_size;
|
---|
240 | dest[0] = cm[(a2 + b2) >> COL_SHIFT];
|
---|
241 | dest += line_size;
|
---|
242 | dest[0] = cm[(a3 + b3) >> COL_SHIFT];
|
---|
243 | dest += line_size;
|
---|
244 | dest[0] = cm[(a3 - b3) >> COL_SHIFT];
|
---|
245 | dest += line_size;
|
---|
246 | dest[0] = cm[(a2 - b2) >> COL_SHIFT];
|
---|
247 | dest += line_size;
|
---|
248 | dest[0] = cm[(a1 - b1) >> COL_SHIFT];
|
---|
249 | dest += line_size;
|
---|
250 | dest[0] = cm[(a0 - b0) >> COL_SHIFT];
|
---|
251 | }
|
---|
252 |
|
---|
253 | static inline void idctSparseColAdd (uint8_t *dest, int line_size,
|
---|
254 | DCTELEM * col)
|
---|
255 | {
|
---|
256 | int a0, a1, a2, a3, b0, b1, b2, b3;
|
---|
257 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
258 |
|
---|
259 | /* XXX: I did that only to give same values as previous code */
|
---|
260 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
|
---|
261 | a1 = a0;
|
---|
262 | a2 = a0;
|
---|
263 | a3 = a0;
|
---|
264 |
|
---|
265 | a0 += + W2*col[8*2];
|
---|
266 | a1 += + W6*col[8*2];
|
---|
267 | a2 += - W6*col[8*2];
|
---|
268 | a3 += - W2*col[8*2];
|
---|
269 |
|
---|
270 | MUL16(b0, W1, col[8*1]);
|
---|
271 | MUL16(b1, W3, col[8*1]);
|
---|
272 | MUL16(b2, W5, col[8*1]);
|
---|
273 | MUL16(b3, W7, col[8*1]);
|
---|
274 |
|
---|
275 | MAC16(b0, + W3, col[8*3]);
|
---|
276 | MAC16(b1, - W7, col[8*3]);
|
---|
277 | MAC16(b2, - W1, col[8*3]);
|
---|
278 | MAC16(b3, - W5, col[8*3]);
|
---|
279 |
|
---|
280 | if(col[8*4]){
|
---|
281 | a0 += + W4*col[8*4];
|
---|
282 | a1 += - W4*col[8*4];
|
---|
283 | a2 += - W4*col[8*4];
|
---|
284 | a3 += + W4*col[8*4];
|
---|
285 | }
|
---|
286 |
|
---|
287 | if (col[8*5]) {
|
---|
288 | MAC16(b0, + W5, col[8*5]);
|
---|
289 | MAC16(b1, - W1, col[8*5]);
|
---|
290 | MAC16(b2, + W7, col[8*5]);
|
---|
291 | MAC16(b3, + W3, col[8*5]);
|
---|
292 | }
|
---|
293 |
|
---|
294 | if(col[8*6]){
|
---|
295 | a0 += + W6*col[8*6];
|
---|
296 | a1 += - W2*col[8*6];
|
---|
297 | a2 += + W2*col[8*6];
|
---|
298 | a3 += - W6*col[8*6];
|
---|
299 | }
|
---|
300 |
|
---|
301 | if (col[8*7]) {
|
---|
302 | MAC16(b0, + W7, col[8*7]);
|
---|
303 | MAC16(b1, - W5, col[8*7]);
|
---|
304 | MAC16(b2, + W3, col[8*7]);
|
---|
305 | MAC16(b3, - W1, col[8*7]);
|
---|
306 | }
|
---|
307 |
|
---|
308 | dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
|
---|
309 | dest += line_size;
|
---|
310 | dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
|
---|
311 | dest += line_size;
|
---|
312 | dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
|
---|
313 | dest += line_size;
|
---|
314 | dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
|
---|
315 | dest += line_size;
|
---|
316 | dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
|
---|
317 | dest += line_size;
|
---|
318 | dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
|
---|
319 | dest += line_size;
|
---|
320 | dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
|
---|
321 | dest += line_size;
|
---|
322 | dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
|
---|
323 | }
|
---|
324 |
|
---|
325 | static inline void idctSparseCol (DCTELEM * col)
|
---|
326 | {
|
---|
327 | int a0, a1, a2, a3, b0, b1, b2, b3;
|
---|
328 |
|
---|
329 | /* XXX: I did that only to give same values as previous code */
|
---|
330 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
|
---|
331 | a1 = a0;
|
---|
332 | a2 = a0;
|
---|
333 | a3 = a0;
|
---|
334 |
|
---|
335 | a0 += + W2*col[8*2];
|
---|
336 | a1 += + W6*col[8*2];
|
---|
337 | a2 += - W6*col[8*2];
|
---|
338 | a3 += - W2*col[8*2];
|
---|
339 |
|
---|
340 | MUL16(b0, W1, col[8*1]);
|
---|
341 | MUL16(b1, W3, col[8*1]);
|
---|
342 | MUL16(b2, W5, col[8*1]);
|
---|
343 | MUL16(b3, W7, col[8*1]);
|
---|
344 |
|
---|
345 | MAC16(b0, + W3, col[8*3]);
|
---|
346 | MAC16(b1, - W7, col[8*3]);
|
---|
347 | MAC16(b2, - W1, col[8*3]);
|
---|
348 | MAC16(b3, - W5, col[8*3]);
|
---|
349 |
|
---|
350 | if(col[8*4]){
|
---|
351 | a0 += + W4*col[8*4];
|
---|
352 | a1 += - W4*col[8*4];
|
---|
353 | a2 += - W4*col[8*4];
|
---|
354 | a3 += + W4*col[8*4];
|
---|
355 | }
|
---|
356 |
|
---|
357 | if (col[8*5]) {
|
---|
358 | MAC16(b0, + W5, col[8*5]);
|
---|
359 | MAC16(b1, - W1, col[8*5]);
|
---|
360 | MAC16(b2, + W7, col[8*5]);
|
---|
361 | MAC16(b3, + W3, col[8*5]);
|
---|
362 | }
|
---|
363 |
|
---|
364 | if(col[8*6]){
|
---|
365 | a0 += + W6*col[8*6];
|
---|
366 | a1 += - W2*col[8*6];
|
---|
367 | a2 += + W2*col[8*6];
|
---|
368 | a3 += - W6*col[8*6];
|
---|
369 | }
|
---|
370 |
|
---|
371 | if (col[8*7]) {
|
---|
372 | MAC16(b0, + W7, col[8*7]);
|
---|
373 | MAC16(b1, - W5, col[8*7]);
|
---|
374 | MAC16(b2, + W3, col[8*7]);
|
---|
375 | MAC16(b3, - W1, col[8*7]);
|
---|
376 | }
|
---|
377 |
|
---|
378 | col[0 ] = ((a0 + b0) >> COL_SHIFT);
|
---|
379 | col[8 ] = ((a1 + b1) >> COL_SHIFT);
|
---|
380 | col[16] = ((a2 + b2) >> COL_SHIFT);
|
---|
381 | col[24] = ((a3 + b3) >> COL_SHIFT);
|
---|
382 | col[32] = ((a3 - b3) >> COL_SHIFT);
|
---|
383 | col[40] = ((a2 - b2) >> COL_SHIFT);
|
---|
384 | col[48] = ((a1 - b1) >> COL_SHIFT);
|
---|
385 | col[56] = ((a0 - b0) >> COL_SHIFT);
|
---|
386 | }
|
---|
387 |
|
---|
388 | void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
389 | {
|
---|
390 | int i;
|
---|
391 | for(i=0; i<8; i++)
|
---|
392 | idctRowCondDC(block + i*8);
|
---|
393 |
|
---|
394 | for(i=0; i<8; i++)
|
---|
395 | idctSparseColPut(dest + i, line_size, block + i);
|
---|
396 | }
|
---|
397 |
|
---|
398 | void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
399 | {
|
---|
400 | int i;
|
---|
401 | for(i=0; i<8; i++)
|
---|
402 | idctRowCondDC(block + i*8);
|
---|
403 |
|
---|
404 | for(i=0; i<8; i++)
|
---|
405 | idctSparseColAdd(dest + i, line_size, block + i);
|
---|
406 | }
|
---|
407 |
|
---|
408 | void simple_idct(DCTELEM *block)
|
---|
409 | {
|
---|
410 | int i;
|
---|
411 | for(i=0; i<8; i++)
|
---|
412 | idctRowCondDC(block + i*8);
|
---|
413 |
|
---|
414 | for(i=0; i<8; i++)
|
---|
415 | idctSparseCol(block + i);
|
---|
416 | }
|
---|
417 |
|
---|
418 | /* 2x4x8 idct */
|
---|
419 |
|
---|
420 | #define CN_SHIFT 12
|
---|
421 | #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
|
---|
422 | #define C1 C_FIX(0.6532814824)
|
---|
423 | #define C2 C_FIX(0.2705980501)
|
---|
424 |
|
---|
425 | /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
|
---|
426 | and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
|
---|
427 | #define C_SHIFT (4+1+12)
|
---|
428 |
|
---|
429 | static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col)
|
---|
430 | {
|
---|
431 | int c0, c1, c2, c3, a0, a1, a2, a3;
|
---|
432 | const uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
433 |
|
---|
434 | a0 = col[8*0];
|
---|
435 | a1 = col[8*2];
|
---|
436 | a2 = col[8*4];
|
---|
437 | a3 = col[8*6];
|
---|
438 | c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
|
---|
439 | c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
|
---|
440 | c1 = a1 * C1 + a3 * C2;
|
---|
441 | c3 = a1 * C2 - a3 * C1;
|
---|
442 | dest[0] = cm[(c0 + c1) >> C_SHIFT];
|
---|
443 | dest += line_size;
|
---|
444 | dest[0] = cm[(c2 + c3) >> C_SHIFT];
|
---|
445 | dest += line_size;
|
---|
446 | dest[0] = cm[(c2 - c3) >> C_SHIFT];
|
---|
447 | dest += line_size;
|
---|
448 | dest[0] = cm[(c0 - c1) >> C_SHIFT];
|
---|
449 | }
|
---|
450 |
|
---|
451 | #define BF(k) \
|
---|
452 | {\
|
---|
453 | int a0, a1;\
|
---|
454 | a0 = ptr[k];\
|
---|
455 | a1 = ptr[8 + k];\
|
---|
456 | ptr[k] = a0 + a1;\
|
---|
457 | ptr[8 + k] = a0 - a1;\
|
---|
458 | }
|
---|
459 |
|
---|
460 | /* only used by DV codec. The input must be interlaced. 128 is added
|
---|
461 | to the pixels before clamping to avoid systematic error
|
---|
462 | (1024*sqrt(2)) offset would be needed otherwise. */
|
---|
463 | /* XXX: I think a 1.0/sqrt(2) normalization should be needed to
|
---|
464 | compensate the extra butterfly stage - I don't have the full DV
|
---|
465 | specification */
|
---|
466 | void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
467 | {
|
---|
468 | int i;
|
---|
469 | DCTELEM *ptr;
|
---|
470 |
|
---|
471 | /* butterfly */
|
---|
472 | ptr = block;
|
---|
473 | for(i=0;i<4;i++) {
|
---|
474 | BF(0);
|
---|
475 | BF(1);
|
---|
476 | BF(2);
|
---|
477 | BF(3);
|
---|
478 | BF(4);
|
---|
479 | BF(5);
|
---|
480 | BF(6);
|
---|
481 | BF(7);
|
---|
482 | ptr += 2 * 8;
|
---|
483 | }
|
---|
484 |
|
---|
485 | /* IDCT8 on each line */
|
---|
486 | for(i=0; i<8; i++) {
|
---|
487 | idctRowCondDC(block + i*8);
|
---|
488 | }
|
---|
489 |
|
---|
490 | /* IDCT4 and store */
|
---|
491 | for(i=0;i<8;i++) {
|
---|
492 | idct4col(dest + i, 2 * line_size, block + i);
|
---|
493 | idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
|
---|
494 | }
|
---|
495 | }
|
---|
496 |
|
---|
497 | /* 8x4 & 4x8 WMV2 IDCT */
|
---|
498 | #undef CN_SHIFT
|
---|
499 | #undef C_SHIFT
|
---|
500 | #undef C_FIX
|
---|
501 | #undef C1
|
---|
502 | #undef C2
|
---|
503 | #define CN_SHIFT 12
|
---|
504 | #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
|
---|
505 | #define C1 C_FIX(0.6532814824)
|
---|
506 | #define C2 C_FIX(0.2705980501)
|
---|
507 | #define C3 C_FIX(0.5)
|
---|
508 | #define C_SHIFT (4+1+12)
|
---|
509 | static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
|
---|
510 | {
|
---|
511 | int c0, c1, c2, c3, a0, a1, a2, a3;
|
---|
512 | const uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
513 |
|
---|
514 | a0 = col[8*0];
|
---|
515 | a1 = col[8*1];
|
---|
516 | a2 = col[8*2];
|
---|
517 | a3 = col[8*3];
|
---|
518 | c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
|
---|
519 | c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
|
---|
520 | c1 = a1 * C1 + a3 * C2;
|
---|
521 | c3 = a1 * C2 - a3 * C1;
|
---|
522 | dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
|
---|
523 | dest += line_size;
|
---|
524 | dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
|
---|
525 | dest += line_size;
|
---|
526 | dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
|
---|
527 | dest += line_size;
|
---|
528 | dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
|
---|
529 | }
|
---|
530 |
|
---|
531 | #define RN_SHIFT 15
|
---|
532 | #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
|
---|
533 | #define R1 R_FIX(0.6532814824)
|
---|
534 | #define R2 R_FIX(0.2705980501)
|
---|
535 | #define R3 R_FIX(0.5)
|
---|
536 | #define R_SHIFT 11
|
---|
537 | static inline void idct4row(DCTELEM *row)
|
---|
538 | {
|
---|
539 | int c0, c1, c2, c3, a0, a1, a2, a3;
|
---|
540 | //const uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
541 |
|
---|
542 | a0 = row[0];
|
---|
543 | a1 = row[1];
|
---|
544 | a2 = row[2];
|
---|
545 | a3 = row[3];
|
---|
546 | c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
|
---|
547 | c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
|
---|
548 | c1 = a1 * R1 + a3 * R2;
|
---|
549 | c3 = a1 * R2 - a3 * R1;
|
---|
550 | row[0]= (c0 + c1) >> R_SHIFT;
|
---|
551 | row[1]= (c2 + c3) >> R_SHIFT;
|
---|
552 | row[2]= (c2 - c3) >> R_SHIFT;
|
---|
553 | row[3]= (c0 - c1) >> R_SHIFT;
|
---|
554 | }
|
---|
555 |
|
---|
556 | void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
557 | {
|
---|
558 | int i;
|
---|
559 |
|
---|
560 | /* IDCT8 on each line */
|
---|
561 | for(i=0; i<4; i++) {
|
---|
562 | idctRowCondDC(block + i*8);
|
---|
563 | }
|
---|
564 |
|
---|
565 | /* IDCT4 and store */
|
---|
566 | for(i=0;i<8;i++) {
|
---|
567 | idct4col_add(dest + i, line_size, block + i);
|
---|
568 | }
|
---|
569 | }
|
---|
570 |
|
---|
571 | void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
572 | {
|
---|
573 | int i;
|
---|
574 |
|
---|
575 | /* IDCT4 on each line */
|
---|
576 | for(i=0; i<8; i++) {
|
---|
577 | idct4row(block + i*8);
|
---|
578 | }
|
---|
579 |
|
---|
580 | /* IDCT8 and store */
|
---|
581 | for(i=0; i<4; i++){
|
---|
582 | idctSparseColAdd(dest + i, line_size, block + i);
|
---|
583 | }
|
---|
584 | }
|
---|
585 |
|
---|