dsputil_h264_altivec.c@ 5776

Last change on this file since 5776 was 5776, checked in by vboxsync, 17 years ago
ffmpeg: exported to OSE
File size: 13.8 KB

Line
1	/*
2	* Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3	*
4	* This library is free software; you can redistribute it and/or
5	* modify it under the terms of the GNU Lesser General Public
6	* License as published by the Free Software Foundation; either
7	* version 2 of the License, or (at your option) any later version.
8	*
9	* This library is distributed in the hope that it will be useful,
10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	* Lesser General Public License for more details.
13	*
14	* You should have received a copy of the GNU Lesser General Public
15	* License along with this library; if not, write to the Free Software
16	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17	*/
18
19	#include "../dsputil.h"
20
21	#include "gcc_fixes.h"
22
23	#include "dsputil_altivec.h"
24
25	#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
26	#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
27
28	#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
29	#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
30	#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
31	#define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
32	#define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
33	#define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
34	#define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
35	#define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
36	#define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
37	#include "dsputil_h264_template_altivec.c"
38	#undef OP_U8_ALTIVEC
39	#undef PREFIX_h264_chroma_mc8_altivec
40	#undef PREFIX_h264_chroma_mc8_num
41	#undef PREFIX_h264_qpel16_h_lowpass_altivec
42	#undef PREFIX_h264_qpel16_h_lowpass_num
43	#undef PREFIX_h264_qpel16_v_lowpass_altivec
44	#undef PREFIX_h264_qpel16_v_lowpass_num
45	#undef PREFIX_h264_qpel16_hv_lowpass_altivec
46	#undef PREFIX_h264_qpel16_hv_lowpass_num
47
48	#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
49	#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
50	#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
51	#define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
52	#define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
53	#define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
54	#define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
55	#define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
56	#define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
57	#include "dsputil_h264_template_altivec.c"
58	#undef OP_U8_ALTIVEC
59	#undef PREFIX_h264_chroma_mc8_altivec
60	#undef PREFIX_h264_chroma_mc8_num
61	#undef PREFIX_h264_qpel16_h_lowpass_altivec
62	#undef PREFIX_h264_qpel16_h_lowpass_num
63	#undef PREFIX_h264_qpel16_v_lowpass_altivec
64	#undef PREFIX_h264_qpel16_v_lowpass_num
65	#undef PREFIX_h264_qpel16_hv_lowpass_altivec
66	#undef PREFIX_h264_qpel16_hv_lowpass_num
67
68	#define H264_MC(OPNAME, SIZE, CODETYPE) \
69	static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t dst, uint8_t src, int stride){\
70	OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
71	}\
72	\
73	static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){ \
74	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
75	uint8_t * const half= (uint8_t*)temp;\
76	put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
77	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
78	}\
79	\
80	static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
81	OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
82	}\
83	\
84	static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
85	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
86	uint8_t * const half= (uint8_t*)temp;\
87	put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
88	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
89	}\
90	\
91	static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
92	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
93	uint8_t * const half= (uint8_t*)temp;\
94	put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
95	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
96	}\
97	\
98	static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
99	OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
100	}\
101	\
102	static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
103	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
104	uint8_t * const half= (uint8_t*)temp;\
105	put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
106	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
107	}\
108	\
109	static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
110	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
111	uint8_t * const halfH= (uint8_t*)temp;\
112	uint8_t * const halfV= ((uint8_t)temp) + SIZESIZE;\
113	put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
114	put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
115	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
116	}\
117	\
118	static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
119	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
120	uint8_t * const halfH= (uint8_t*)temp;\
121	uint8_t * const halfV= ((uint8_t)temp) + SIZESIZE;\
122	put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
123	put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
124	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
125	}\
126	\
127	static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
128	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
129	uint8_t * const halfH= (uint8_t*)temp;\
130	uint8_t * const halfV= ((uint8_t)temp) + SIZESIZE;\
131	put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
132	put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
133	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
134	}\
135	\
136	static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
137	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
138	uint8_t * const halfH= (uint8_t*)temp;\
139	uint8_t * const halfV= ((uint8_t)temp) + SIZESIZE;\
140	put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
141	put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
142	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
143	}\
144	\
145	static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
146	DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4]);\
147	int16_t * const tmp= (int16_t*)temp;\
148	OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
149	}\
150	\
151	static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
152	DECLARE_ALIGNED_16(uint64_t, temp[SIZE(SIZE+8)/4 + SIZESIZE/4]);\
153	uint8_t * const halfH= (uint8_t*)temp;\
154	uint8_t * const halfHV= ((uint8_t)temp) + SIZESIZE;\
155	int16_t * const tmp= ((int16_t)temp) + SIZESIZE;\
156	put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
157	put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
158	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
159	}\
160	\
161	static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
162	DECLARE_ALIGNED_16(uint64_t, temp[SIZE(SIZE+8)/4 + SIZESIZE/4]);\
163	uint8_t * const halfH= (uint8_t*)temp;\
164	uint8_t * const halfHV= ((uint8_t)temp) + SIZESIZE;\
165	int16_t * const tmp= ((int16_t)temp) + SIZESIZE;\
166	put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
167	put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
168	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
169	}\
170	\
171	static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
172	DECLARE_ALIGNED_16(uint64_t, temp[SIZE(SIZE+8)/4 + SIZESIZE/4]);\
173	uint8_t * const halfV= (uint8_t*)temp;\
174	uint8_t * const halfHV= ((uint8_t)temp) + SIZESIZE;\
175	int16_t * const tmp= ((int16_t)temp) + SIZESIZE;\
176	put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
177	put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
178	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
179	}\
180	\
181	static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t dst, uint8_t src, int stride){\
182	DECLARE_ALIGNED_16(uint64_t, temp[SIZE(SIZE+8)/4 + SIZESIZE/4]);\
183	uint8_t * const halfV= (uint8_t*)temp;\
184	uint8_t * const halfHV= ((uint8_t)temp) + SIZESIZE;\
185	int16_t * const tmp= ((int16_t)temp) + SIZESIZE;\
186	put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
187	put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
188	OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
189	}\
190
191	static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
192	const uint8_t * src2, int dst_stride,
193	int src_stride1, int h)
194	{
195	int i;
196	vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
197
198	mask_ = vec_lvsl(0, src2);
199
200	for (i = 0; i < h; i++) {
201
202	tmp1 = vec_ld(i * src_stride1, src1);
203	mask = vec_lvsl(i * src_stride1, src1);
204	tmp2 = vec_ld(i * src_stride1 + 15, src1);
205
206	a = vec_perm(tmp1, tmp2, mask);
207
208	tmp1 = vec_ld(i * 16, src2);
209	tmp2 = vec_ld(i * 16 + 15, src2);
210
211	b = vec_perm(tmp1, tmp2, mask_);
212
213	tmp1 = vec_ld(0, dst);
214	mask = vec_lvsl(0, dst);
215	tmp2 = vec_ld(15, dst);
216
217	d = vec_avg(a, b);
218
219	edges = vec_perm(tmp2, tmp1, mask);
220
221	align = vec_lvsr(0, dst);
222
223	tmp1 = vec_perm(edges, d, align);
224	tmp2 = vec_perm(d, edges, align);
225
226	vec_st(tmp2, 15, dst);
227	vec_st(tmp1, 0 , dst);
228
229	dst += dst_stride;
230	}
231	}
232
233	static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
234	const uint8_t * src2, int dst_stride,
235	int src_stride1, int h)
236	{
237	int i;
238	vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
239
240	mask_ = vec_lvsl(0, src2);
241
242	for (i = 0; i < h; i++) {
243
244	tmp1 = vec_ld(i * src_stride1, src1);
245	mask = vec_lvsl(i * src_stride1, src1);
246	tmp2 = vec_ld(i * src_stride1 + 15, src1);
247
248	a = vec_perm(tmp1, tmp2, mask);
249
250	tmp1 = vec_ld(i * 16, src2);
251	tmp2 = vec_ld(i * 16 + 15, src2);
252
253	b = vec_perm(tmp1, tmp2, mask_);
254
255	tmp1 = vec_ld(0, dst);
256	mask = vec_lvsl(0, dst);
257	tmp2 = vec_ld(15, dst);
258
259	d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
260
261	edges = vec_perm(tmp2, tmp1, mask);
262
263	align = vec_lvsr(0, dst);
264
265	tmp1 = vec_perm(edges, d, align);
266	tmp2 = vec_perm(d, edges, align);
267
268	vec_st(tmp2, 15, dst);
269	vec_st(tmp1, 0 , dst);
270
271	dst += dst_stride;
272	}
273	}
274
275	/* Implemented but could be faster
276	#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
277	#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
278	*/
279
280	H264_MC(put_, 16, altivec)
281	H264_MC(avg_, 16, altivec)
282
283	void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
284
285	#ifdef HAVE_ALTIVEC
286	if (has_altivec()) {
287	c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
288	c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
289
290	#define dspfunc(PFX, IDX, NUM) \
291	c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
292	c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
293	c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
294	c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
295	c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
296	c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
297	c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
298	c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
299	c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
300	c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
301	c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
302	c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
303	c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
304	c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
305	c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
306	c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
307
308	dspfunc(put_h264_qpel, 0, 16);
309	dspfunc(avg_h264_qpel, 0, 16);
310	#undef dspfunc
311
312	} else
313	#endif /* HAVE_ALTIVEC */
314	{
315	// Non-AltiVec PPC optimisations
316
317	// ... pending ...
318	}
319	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/ffmpeg-20060710/libavcodec/ppc/dsputil_h264_altivec.c@ 5776

Download in other formats: