faandct.c@ 7164

Last change on this file since 7164 was 5776, checked in by vboxsync, 17 years ago
ffmpeg: exported to OSE
File size: 6.6 KB

Line
1	/*
2	* Floating point AAN DCT
3	* Copyright (c) 2003 Michael Niedermayer <[email protected]>
4	*
5	* This library is free software; you can redistribute it and/or
6	* modify it under the terms of the GNU Lesser General Public
7	* License as published by the Free Software Foundation; either
8	* version 2 of the License, or (at your option) any later version.
9	*
10	* This library is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	* Lesser General Public License for more details.
14	*
15	* You should have received a copy of the GNU Lesser General Public
16	* License along with this library; if not, write to the Free Software
17	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18	*
19	* this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
20	*/
21
22	/**
23	* @file faandct.c
24	* @brief
25	* Floating point AAN DCT
26	* @author Michael Niedermayer <[email protected]>
27	*/
28
29	#include "dsputil.h"
30	#include "faandct.h"
31
32	#define FLOAT float
33	#ifdef FAAN_POSTSCALE
34	# define SCALE(x) postscale[x]
35	#else
36	# define SCALE(x) 1
37	#endif
38
39	//numbers generated by simple c code (not as accurate as they could be)
40	/*
41	for(i=0; i<8; i++){
42	printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(iacosl(-1.0)/(long double)16.0)sqrtl(2)));
43	}
44	*/
45	#define B0 1.00000000000000000000
46	#define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1
47	#define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1
48	#define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1
49	#define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
50	#define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1
51	#define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1
52	#define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1
53
54
55	#define A1 0.70710678118654752438 // cos(pi*4/16)
56	#define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
57	#define A5 0.38268343236508977170 // cos(pi*6/16)
58	#define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
59
60	static FLOAT postscale[64]={
61	B0B0, B0B1, B0B2, B0B3, B0B4, B0B5, B0B6, B0B7,
62	B1B0, B1B1, B1B2, B1B3, B1B4, B1B5, B1B6, B1B7,
63	B2B0, B2B1, B2B2, B2B3, B2B4, B2B5, B2B6, B2B7,
64	B3B0, B3B1, B3B2, B3B3, B3B4, B3B5, B3B6, B3B7,
65	B4B0, B4B1, B4B2, B4B3, B4B4, B4B5, B4B6, B4B7,
66	B5B0, B5B1, B5B2, B5B3, B5B4, B5B5, B5B6, B5B7,
67	B6B0, B6B1, B6B2, B6B3, B6B4, B6B5, B6B6, B6B7,
68	B7B0, B7B1, B7B2, B7B3, B7B4, B7B5, B7B6, B7B7,
69	};
70
71	static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
72	{
73	FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
74	FLOAT tmp10, tmp11, tmp12, tmp13;
75	FLOAT z1, z2, z3, z4, z5, z11, z13;
76	int i;
77
78	for (i=0; i<8*8; i+=8) {
79	tmp0= data[0 + i] + data[7 + i];
80	tmp7= data[0 + i] - data[7 + i];
81	tmp1= data[1 + i] + data[6 + i];
82	tmp6= data[1 + i] - data[6 + i];
83	tmp2= data[2 + i] + data[5 + i];
84	tmp5= data[2 + i] - data[5 + i];
85	tmp3= data[3 + i] + data[4 + i];
86	tmp4= data[3 + i] - data[4 + i];
87
88	tmp10= tmp0 + tmp3;
89	tmp13= tmp0 - tmp3;
90	tmp11= tmp1 + tmp2;
91	tmp12= tmp1 - tmp2;
92
93	temp[0 + i]= tmp10 + tmp11;
94	temp[4 + i]= tmp10 - tmp11;
95
96	z1= (tmp12 + tmp13)*A1;
97	temp[2 + i]= tmp13 + z1;
98	temp[6 + i]= tmp13 - z1;
99
100	tmp10= tmp4 + tmp5;
101	tmp11= tmp5 + tmp6;
102	tmp12= tmp6 + tmp7;
103
104	z5= (tmp10 - tmp12) * A5;
105	z2= tmp10*A2 + z5;
106	z4= tmp12*A4 + z5;
107	z3= tmp11*A1;
108
109	z11= tmp7 + z3;
110	z13= tmp7 - z3;
111
112	temp[5 + i]= z13 + z2;
113	temp[3 + i]= z13 - z2;
114	temp[1 + i]= z11 + z4;
115	temp[7 + i]= z11 - z4;
116	}
117	}
118
119	void ff_faandct(DCTELEM * data)
120	{
121	FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
122	FLOAT tmp10, tmp11, tmp12, tmp13;
123	FLOAT z1, z2, z3, z4, z5, z11, z13;
124	FLOAT temp[64];
125	int i;
126
127	emms_c();
128
129	row_fdct(temp, data);
130
131	for (i=0; i<8; i++) {
132	tmp0= temp[80 + i] + temp[87 + i];
133	tmp7= temp[80 + i] - temp[87 + i];
134	tmp1= temp[81 + i] + temp[86 + i];
135	tmp6= temp[81 + i] - temp[86 + i];
136	tmp2= temp[82 + i] + temp[85 + i];
137	tmp5= temp[82 + i] - temp[85 + i];
138	tmp3= temp[83 + i] + temp[84 + i];
139	tmp4= temp[83 + i] - temp[84 + i];
140
141	tmp10= tmp0 + tmp3;
142	tmp13= tmp0 - tmp3;
143	tmp11= tmp1 + tmp2;
144	tmp12= tmp1 - tmp2;
145
146	data[80 + i]= lrintf(SCALE(80 + i) * (tmp10 + tmp11));
147	data[84 + i]= lrintf(SCALE(84 + i) * (tmp10 - tmp11));
148
149	z1= (tmp12 + tmp13)* A1;
150	data[82 + i]= lrintf(SCALE(82 + i) * (tmp13 + z1));
151	data[86 + i]= lrintf(SCALE(86 + i) * (tmp13 - z1));
152
153	tmp10= tmp4 + tmp5;
154	tmp11= tmp5 + tmp6;
155	tmp12= tmp6 + tmp7;
156
157	z5= (tmp10 - tmp12) * A5;
158	z2= tmp10*A2 + z5;
159	z4= tmp12*A4 + z5;
160	z3= tmp11*A1;
161
162	z11= tmp7 + z3;
163	z13= tmp7 - z3;
164
165	data[85 + i]= lrintf(SCALE(85 + i) * (z13 + z2));
166	data[83 + i]= lrintf(SCALE(83 + i) * (z13 - z2));
167	data[81 + i]= lrintf(SCALE(81 + i) * (z11 + z4));
168	data[87 + i]= lrintf(SCALE(87 + i) * (z11 - z4));
169	}
170	}
171
172	void ff_faandct248(DCTELEM * data)
173	{
174	FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
175	FLOAT tmp10, tmp11, tmp12, tmp13;
176	FLOAT z1;
177	FLOAT temp[64];
178	int i;
179
180	emms_c();
181
182	row_fdct(temp, data);
183
184	for (i=0; i<8; i++) {
185	tmp0 = temp[80 + i] + temp[81 + i];
186	tmp1 = temp[82 + i] + temp[83 + i];
187	tmp2 = temp[84 + i] + temp[85 + i];
188	tmp3 = temp[86 + i] + temp[87 + i];
189	tmp4 = temp[80 + i] - temp[81 + i];
190	tmp5 = temp[82 + i] - temp[83 + i];
191	tmp6 = temp[84 + i] - temp[85 + i];
192	tmp7 = temp[86 + i] - temp[87 + i];
193
194	tmp10 = tmp0 + tmp3;
195	tmp11 = tmp1 + tmp2;
196	tmp12 = tmp1 - tmp2;
197	tmp13 = tmp0 - tmp3;
198
199	data[80 + i] = lrintf(SCALE(80 + i) * (tmp10 + tmp11));
200	data[84 + i] = lrintf(SCALE(84 + i) * (tmp10 - tmp11));
201
202	z1 = (tmp12 + tmp13)* A1;
203	data[82 + i] = lrintf(SCALE(82 + i) * (tmp13 + z1));
204	data[86 + i] = lrintf(SCALE(86 + i) * (tmp13 - z1));
205
206	tmp10 = tmp4 + tmp7;
207	tmp11 = tmp5 + tmp6;
208	tmp12 = tmp5 - tmp6;
209	tmp13 = tmp4 - tmp7;
210
211	data[81 + i] = lrintf(SCALE(80 + i) * (tmp10 + tmp11));
212	data[85 + i] = lrintf(SCALE(84 + i) * (tmp10 - tmp11));
213
214	z1 = (tmp12 + tmp13)* A1;
215	data[83 + i] = lrintf(SCALE(82 + i) * (tmp13 + z1));
216	data[87 + i] = lrintf(SCALE(86 + i) * (tmp13 - z1));
217	}
218	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/ffmpeg-20060710/libavcodec/faandct.c@ 7164

Download in other formats: