simple_idct.c@ 9441

Last change on this file since 9441 was 5776, checked in by vboxsync, 17 years ago
ffmpeg: exported to OSE
File size: 15.9 KB

Line
1	/*
2	* Simple IDCT
3	*
4	* Copyright (c) 2001 Michael Niedermayer <[email protected]>
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public
17	* License along with this library; if not, write to the Free Software
18	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19	*/
20
21	/**
22	* @file simple_idct.c
23	* simpleidct in C.
24	*/
25
26	/*
27	based upon some outcommented c code from mpeg2dec (idct_mmx.c
28	written by Aaron Holtzman <[email protected]>)
29	*/
30	#include "avcodec.h"
31	#include "dsputil.h"
32	#include "simple_idct.h"
33
34	#if 0
35	#define W1 2841 /* 2048sqrt (2)cos (1pi/16) /
36	#define W2 2676 /* 2048sqrt (2)cos (2pi/16) /
37	#define W3 2408 /* 2048sqrt (2)cos (3pi/16) /
38	#define W4 2048 /* 2048sqrt (2)cos (4pi/16) /
39	#define W5 1609 /* 2048sqrt (2)cos (5pi/16) /
40	#define W6 1108 /* 2048sqrt (2)cos (6pi/16) /
41	#define W7 565 /* 2048sqrt (2)cos (7pi/16) /
42	#define ROW_SHIFT 8
43	#define COL_SHIFT 17
44	#else
45	#define W1 22725 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
46	#define W2 21407 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
47	#define W3 19266 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
48	#define W4 16383 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
49	#define W5 12873 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
50	#define W6 8867 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
51	#define W7 4520 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
52	#define ROW_SHIFT 11
53	#define COL_SHIFT 20 // 6
54	#endif
55
56	#if defined(ARCH_POWERPC_405)
57
58	/* signed 16x16 -> 32 multiply add accumulate */
59	#define MAC16(rt, ra, rb) \
60	asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
61
62	/* signed 16x16 -> 32 multiply */
63	#define MUL16(rt, ra, rb) \
64	asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
65
66	#else
67
68	/* signed 16x16 -> 32 multiply add accumulate */
69	#define MAC16(rt, ra, rb) rt += (ra) * (rb)
70
71	/* signed 16x16 -> 32 multiply */
72	#define MUL16(rt, ra, rb) rt = (ra) * (rb)
73
74	#endif
75
76	static inline void idctRowCondDC (DCTELEM * row)
77	{
78	int a0, a1, a2, a3, b0, b1, b2, b3;
79	#ifdef FAST_64BIT
80	uint64_t temp;
81	#else
82	uint32_t temp;
83	#endif
84
85	#ifdef FAST_64BIT
86	#ifdef WORDS_BIGENDIAN
87	#define ROW0_MASK 0xffff000000000000LL
88	#else
89	#define ROW0_MASK 0xffffLL
90	#endif
91	if(sizeof(DCTELEM)==2){
92	if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) \|
93	((uint64_t *)row)[1]) == 0) {
94	temp = (row[0] << 3) & 0xffff;
95	temp += temp << 16;
96	temp += temp << 32;
97	((uint64_t *)row)[0] = temp;
98	((uint64_t *)row)[1] = temp;
99	return;
100	}
101	}else{
102	if (!(row[1]\|row[2]\|row[3]\|row[4]\|row[5]\|row[6]\|row[7])) {
103	row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
104	return;
105	}
106	}
107	#else
108	if(sizeof(DCTELEM)==2){
109	if (!(((uint32_t*)row)[1] \|
110	((uint32_t*)row)[2] \|
111	((uint32_t*)row)[3] \|
112	row[1])) {
113	temp = (row[0] << 3) & 0xffff;
114	temp += temp << 16;
115	((uint32_t)row)[0]=((uint32_t)row)[1] =
116	((uint32_t)row)[2]=((uint32_t)row)[3] = temp;
117	return;
118	}
119	}else{
120	if (!(row[1]\|row[2]\|row[3]\|row[4]\|row[5]\|row[6]\|row[7])) {
121	row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
122	return;
123	}
124	}
125	#endif
126
127	a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
128	a1 = a0;
129	a2 = a0;
130	a3 = a0;
131
132	/* no need to optimize : gcc does it */
133	a0 += W2 * row[2];
134	a1 += W6 * row[2];
135	a2 -= W6 * row[2];
136	a3 -= W2 * row[2];
137
138	MUL16(b0, W1, row[1]);
139	MAC16(b0, W3, row[3]);
140	MUL16(b1, W3, row[1]);
141	MAC16(b1, -W7, row[3]);
142	MUL16(b2, W5, row[1]);
143	MAC16(b2, -W1, row[3]);
144	MUL16(b3, W7, row[1]);
145	MAC16(b3, -W5, row[3]);
146
147	#ifdef FAST_64BIT
148	temp = ((uint64_t*)row)[1];
149	#else
150	temp = ((uint32_t)row)[2] \| ((uint32_t)row)[3];
151	#endif
152	if (temp != 0) {
153	a0 += W4row[4] + W6row[6];
154	a1 += - W4row[4] - W2row[6];
155	a2 += - W4row[4] + W2row[6];
156	a3 += W4row[4] - W6row[6];
157
158	MAC16(b0, W5, row[5]);
159	MAC16(b0, W7, row[7]);
160
161	MAC16(b1, -W1, row[5]);
162	MAC16(b1, -W5, row[7]);
163
164	MAC16(b2, W7, row[5]);
165	MAC16(b2, W3, row[7]);
166
167	MAC16(b3, W3, row[5]);
168	MAC16(b3, -W1, row[7]);
169	}
170
171	row[0] = (a0 + b0) >> ROW_SHIFT;
172	row[7] = (a0 - b0) >> ROW_SHIFT;
173	row[1] = (a1 + b1) >> ROW_SHIFT;
174	row[6] = (a1 - b1) >> ROW_SHIFT;
175	row[2] = (a2 + b2) >> ROW_SHIFT;
176	row[5] = (a2 - b2) >> ROW_SHIFT;
177	row[3] = (a3 + b3) >> ROW_SHIFT;
178	row[4] = (a3 - b3) >> ROW_SHIFT;
179	}
180
181	static inline void idctSparseColPut (uint8_t *dest, int line_size,
182	DCTELEM * col)
183	{
184	int a0, a1, a2, a3, b0, b1, b2, b3;
185	uint8_t *cm = cropTbl + MAX_NEG_CROP;
186
187	/* XXX: I did that only to give same values as previous code */
188	a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
189	a1 = a0;
190	a2 = a0;
191	a3 = a0;
192
193	a0 += + W2col[82];
194	a1 += + W6col[82];
195	a2 += - W6col[82];
196	a3 += - W2col[82];
197
198	MUL16(b0, W1, col[8*1]);
199	MUL16(b1, W3, col[8*1]);
200	MUL16(b2, W5, col[8*1]);
201	MUL16(b3, W7, col[8*1]);
202
203	MAC16(b0, + W3, col[8*3]);
204	MAC16(b1, - W7, col[8*3]);
205	MAC16(b2, - W1, col[8*3]);
206	MAC16(b3, - W5, col[8*3]);
207
208	if(col[8*4]){
209	a0 += + W4col[84];
210	a1 += - W4col[84];
211	a2 += - W4col[84];
212	a3 += + W4col[84];
213	}
214
215	if (col[8*5]) {
216	MAC16(b0, + W5, col[8*5]);
217	MAC16(b1, - W1, col[8*5]);
218	MAC16(b2, + W7, col[8*5]);
219	MAC16(b3, + W3, col[8*5]);
220	}
221
222	if(col[8*6]){
223	a0 += + W6col[86];
224	a1 += - W2col[86];
225	a2 += + W2col[86];
226	a3 += - W6col[86];
227	}
228
229	if (col[8*7]) {
230	MAC16(b0, + W7, col[8*7]);
231	MAC16(b1, - W5, col[8*7]);
232	MAC16(b2, + W3, col[8*7]);
233	MAC16(b3, - W1, col[8*7]);
234	}
235
236	dest[0] = cm[(a0 + b0) >> COL_SHIFT];
237	dest += line_size;
238	dest[0] = cm[(a1 + b1) >> COL_SHIFT];
239	dest += line_size;
240	dest[0] = cm[(a2 + b2) >> COL_SHIFT];
241	dest += line_size;
242	dest[0] = cm[(a3 + b3) >> COL_SHIFT];
243	dest += line_size;
244	dest[0] = cm[(a3 - b3) >> COL_SHIFT];
245	dest += line_size;
246	dest[0] = cm[(a2 - b2) >> COL_SHIFT];
247	dest += line_size;
248	dest[0] = cm[(a1 - b1) >> COL_SHIFT];
249	dest += line_size;
250	dest[0] = cm[(a0 - b0) >> COL_SHIFT];
251	}
252
253	static inline void idctSparseColAdd (uint8_t *dest, int line_size,
254	DCTELEM * col)
255	{
256	int a0, a1, a2, a3, b0, b1, b2, b3;
257	uint8_t *cm = cropTbl + MAX_NEG_CROP;
258
259	/* XXX: I did that only to give same values as previous code */
260	a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
261	a1 = a0;
262	a2 = a0;
263	a3 = a0;
264
265	a0 += + W2col[82];
266	a1 += + W6col[82];
267	a2 += - W6col[82];
268	a3 += - W2col[82];
269
270	MUL16(b0, W1, col[8*1]);
271	MUL16(b1, W3, col[8*1]);
272	MUL16(b2, W5, col[8*1]);
273	MUL16(b3, W7, col[8*1]);
274
275	MAC16(b0, + W3, col[8*3]);
276	MAC16(b1, - W7, col[8*3]);
277	MAC16(b2, - W1, col[8*3]);
278	MAC16(b3, - W5, col[8*3]);
279
280	if(col[8*4]){
281	a0 += + W4col[84];
282	a1 += - W4col[84];
283	a2 += - W4col[84];
284	a3 += + W4col[84];
285	}
286
287	if (col[8*5]) {
288	MAC16(b0, + W5, col[8*5]);
289	MAC16(b1, - W1, col[8*5]);
290	MAC16(b2, + W7, col[8*5]);
291	MAC16(b3, + W3, col[8*5]);
292	}
293
294	if(col[8*6]){
295	a0 += + W6col[86];
296	a1 += - W2col[86];
297	a2 += + W2col[86];
298	a3 += - W6col[86];
299	}
300
301	if (col[8*7]) {
302	MAC16(b0, + W7, col[8*7]);
303	MAC16(b1, - W5, col[8*7]);
304	MAC16(b2, + W3, col[8*7]);
305	MAC16(b3, - W1, col[8*7]);
306	}
307
308	dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
309	dest += line_size;
310	dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
311	dest += line_size;
312	dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
313	dest += line_size;
314	dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
315	dest += line_size;
316	dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
317	dest += line_size;
318	dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
319	dest += line_size;
320	dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
321	dest += line_size;
322	dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
323	}
324
325	static inline void idctSparseCol (DCTELEM * col)
326	{
327	int a0, a1, a2, a3, b0, b1, b2, b3;
328
329	/* XXX: I did that only to give same values as previous code */
330	a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
331	a1 = a0;
332	a2 = a0;
333	a3 = a0;
334
335	a0 += + W2col[82];
336	a1 += + W6col[82];
337	a2 += - W6col[82];
338	a3 += - W2col[82];
339
340	MUL16(b0, W1, col[8*1]);
341	MUL16(b1, W3, col[8*1]);
342	MUL16(b2, W5, col[8*1]);
343	MUL16(b3, W7, col[8*1]);
344
345	MAC16(b0, + W3, col[8*3]);
346	MAC16(b1, - W7, col[8*3]);
347	MAC16(b2, - W1, col[8*3]);
348	MAC16(b3, - W5, col[8*3]);
349
350	if(col[8*4]){
351	a0 += + W4col[84];
352	a1 += - W4col[84];
353	a2 += - W4col[84];
354	a3 += + W4col[84];
355	}
356
357	if (col[8*5]) {
358	MAC16(b0, + W5, col[8*5]);
359	MAC16(b1, - W1, col[8*5]);
360	MAC16(b2, + W7, col[8*5]);
361	MAC16(b3, + W3, col[8*5]);
362	}
363
364	if(col[8*6]){
365	a0 += + W6col[86];
366	a1 += - W2col[86];
367	a2 += + W2col[86];
368	a3 += - W6col[86];
369	}
370
371	if (col[8*7]) {
372	MAC16(b0, + W7, col[8*7]);
373	MAC16(b1, - W5, col[8*7]);
374	MAC16(b2, + W3, col[8*7]);
375	MAC16(b3, - W1, col[8*7]);
376	}
377
378	col[0 ] = ((a0 + b0) >> COL_SHIFT);
379	col[8 ] = ((a1 + b1) >> COL_SHIFT);
380	col[16] = ((a2 + b2) >> COL_SHIFT);
381	col[24] = ((a3 + b3) >> COL_SHIFT);
382	col[32] = ((a3 - b3) >> COL_SHIFT);
383	col[40] = ((a2 - b2) >> COL_SHIFT);
384	col[48] = ((a1 - b1) >> COL_SHIFT);
385	col[56] = ((a0 - b0) >> COL_SHIFT);
386	}
387
388	void simple_idct_put(uint8_t dest, int line_size, DCTELEM block)
389	{
390	int i;
391	for(i=0; i<8; i++)
392	idctRowCondDC(block + i*8);
393
394	for(i=0; i<8; i++)
395	idctSparseColPut(dest + i, line_size, block + i);
396	}
397
398	void simple_idct_add(uint8_t dest, int line_size, DCTELEM block)
399	{
400	int i;
401	for(i=0; i<8; i++)
402	idctRowCondDC(block + i*8);
403
404	for(i=0; i<8; i++)
405	idctSparseColAdd(dest + i, line_size, block + i);
406	}
407
408	void simple_idct(DCTELEM *block)
409	{
410	int i;
411	for(i=0; i<8; i++)
412	idctRowCondDC(block + i*8);
413
414	for(i=0; i<8; i++)
415	idctSparseCol(block + i);
416	}
417
418	/* 2x4x8 idct */
419
420	#define CN_SHIFT 12
421	#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
422	#define C1 C_FIX(0.6532814824)
423	#define C2 C_FIX(0.2705980501)
424
425	/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
426	and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
427	#define C_SHIFT (4+1+12)
428
429	static inline void idct4col(uint8_t dest, int line_size, const DCTELEM col)
430	{
431	int c0, c1, c2, c3, a0, a1, a2, a3;
432	const uint8_t *cm = cropTbl + MAX_NEG_CROP;
433
434	a0 = col[8*0];
435	a1 = col[8*2];
436	a2 = col[8*4];
437	a3 = col[8*6];
438	c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
439	c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
440	c1 = a1 * C1 + a3 * C2;
441	c3 = a1 * C2 - a3 * C1;
442	dest[0] = cm[(c0 + c1) >> C_SHIFT];
443	dest += line_size;
444	dest[0] = cm[(c2 + c3) >> C_SHIFT];
445	dest += line_size;
446	dest[0] = cm[(c2 - c3) >> C_SHIFT];
447	dest += line_size;
448	dest[0] = cm[(c0 - c1) >> C_SHIFT];
449	}
450
451	#define BF(k) \
452	{\
453	int a0, a1;\
454	a0 = ptr[k];\
455	a1 = ptr[8 + k];\
456	ptr[k] = a0 + a1;\
457	ptr[8 + k] = a0 - a1;\
458	}
459
460	/* only used by DV codec. The input must be interlaced. 128 is added
461	to the pixels before clamping to avoid systematic error
462	(1024sqrt(2)) offset would be needed otherwise. /
463	/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
464	compensate the extra butterfly stage - I don't have the full DV
465	specification */
466	void simple_idct248_put(uint8_t dest, int line_size, DCTELEM block)
467	{
468	int i;
469	DCTELEM *ptr;
470
471	/* butterfly */
472	ptr = block;
473	for(i=0;i<4;i++) {
474	BF(0);
475	BF(1);
476	BF(2);
477	BF(3);
478	BF(4);
479	BF(5);
480	BF(6);
481	BF(7);
482	ptr += 2 * 8;
483	}
484
485	/* IDCT8 on each line */
486	for(i=0; i<8; i++) {
487	idctRowCondDC(block + i*8);
488	}
489
490	/* IDCT4 and store */
491	for(i=0;i<8;i++) {
492	idct4col(dest + i, 2 * line_size, block + i);
493	idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
494	}
495	}
496
497	/* 8x4 & 4x8 WMV2 IDCT */
498	#undef CN_SHIFT
499	#undef C_SHIFT
500	#undef C_FIX
501	#undef C1
502	#undef C2
503	#define CN_SHIFT 12
504	#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
505	#define C1 C_FIX(0.6532814824)
506	#define C2 C_FIX(0.2705980501)
507	#define C3 C_FIX(0.5)
508	#define C_SHIFT (4+1+12)
509	static inline void idct4col_add(uint8_t dest, int line_size, const DCTELEM col)
510	{
511	int c0, c1, c2, c3, a0, a1, a2, a3;
512	const uint8_t *cm = cropTbl + MAX_NEG_CROP;
513
514	a0 = col[8*0];
515	a1 = col[8*1];
516	a2 = col[8*2];
517	a3 = col[8*3];
518	c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
519	c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
520	c1 = a1 * C1 + a3 * C2;
521	c3 = a1 * C2 - a3 * C1;
522	dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
523	dest += line_size;
524	dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
525	dest += line_size;
526	dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
527	dest += line_size;
528	dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
529	}
530
531	#define RN_SHIFT 15
532	#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
533	#define R1 R_FIX(0.6532814824)
534	#define R2 R_FIX(0.2705980501)
535	#define R3 R_FIX(0.5)
536	#define R_SHIFT 11
537	static inline void idct4row(DCTELEM *row)
538	{
539	int c0, c1, c2, c3, a0, a1, a2, a3;
540	//const uint8_t *cm = cropTbl + MAX_NEG_CROP;
541
542	a0 = row[0];
543	a1 = row[1];
544	a2 = row[2];
545	a3 = row[3];
546	c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
547	c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
548	c1 = a1 * R1 + a3 * R2;
549	c3 = a1 * R2 - a3 * R1;
550	row[0]= (c0 + c1) >> R_SHIFT;
551	row[1]= (c2 + c3) >> R_SHIFT;
552	row[2]= (c2 - c3) >> R_SHIFT;
553	row[3]= (c0 - c1) >> R_SHIFT;
554	}
555
556	void simple_idct84_add(uint8_t dest, int line_size, DCTELEM block)
557	{
558	int i;
559
560	/* IDCT8 on each line */
561	for(i=0; i<4; i++) {
562	idctRowCondDC(block + i*8);
563	}
564
565	/* IDCT4 and store */
566	for(i=0;i<8;i++) {
567	idct4col_add(dest + i, line_size, block + i);
568	}
569	}
570
571	void simple_idct48_add(uint8_t dest, int line_size, DCTELEM block)
572	{
573	int i;
574
575	/* IDCT4 on each line */
576	for(i=0; i<8; i++) {
577	idct4row(block + i*8);
578	}
579
580	/* IDCT8 and store */
581	for(i=0; i<4; i++){
582	idctSparseColAdd(dest + i, line_size, block + i);
583	}
584	}
585

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/ffmpeg-20060710/libavcodec/simple_idct.c@ 9441

Download in other formats: