VirtualBox

source: vbox/trunk/src/libs/ffmpeg-20060710/libavcodec/ppc/dsputil_h264_altivec.c@ 5776

Last change on this file since 5776 was 5776, checked in by vboxsync, 17 years ago

ffmpeg: exported to OSE

File size: 13.8 KB
Line 
1/*
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "../dsputil.h"
20
21#include "gcc_fixes.h"
22
23#include "dsputil_altivec.h"
24
25#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
26#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
27
28#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
29#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
30#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
31#define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
32#define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
33#define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
34#define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
35#define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
36#define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
37#include "dsputil_h264_template_altivec.c"
38#undef OP_U8_ALTIVEC
39#undef PREFIX_h264_chroma_mc8_altivec
40#undef PREFIX_h264_chroma_mc8_num
41#undef PREFIX_h264_qpel16_h_lowpass_altivec
42#undef PREFIX_h264_qpel16_h_lowpass_num
43#undef PREFIX_h264_qpel16_v_lowpass_altivec
44#undef PREFIX_h264_qpel16_v_lowpass_num
45#undef PREFIX_h264_qpel16_hv_lowpass_altivec
46#undef PREFIX_h264_qpel16_hv_lowpass_num
47
48#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
49#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
50#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
51#define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
52#define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
53#define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
54#define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
55#define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
56#define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
57#include "dsputil_h264_template_altivec.c"
58#undef OP_U8_ALTIVEC
59#undef PREFIX_h264_chroma_mc8_altivec
60#undef PREFIX_h264_chroma_mc8_num
61#undef PREFIX_h264_qpel16_h_lowpass_altivec
62#undef PREFIX_h264_qpel16_h_lowpass_num
63#undef PREFIX_h264_qpel16_v_lowpass_altivec
64#undef PREFIX_h264_qpel16_v_lowpass_num
65#undef PREFIX_h264_qpel16_hv_lowpass_altivec
66#undef PREFIX_h264_qpel16_hv_lowpass_num
67
68#define H264_MC(OPNAME, SIZE, CODETYPE) \
69static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
70 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
71}\
72\
73static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
74 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
75 uint8_t * const half= (uint8_t*)temp;\
76 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
77 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
78}\
79\
80static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
81 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
82}\
83\
84static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
85 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
86 uint8_t * const half= (uint8_t*)temp;\
87 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
88 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
89}\
90\
91static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
92 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
93 uint8_t * const half= (uint8_t*)temp;\
94 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
95 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
96}\
97\
98static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
99 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
100}\
101\
102static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
103 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
104 uint8_t * const half= (uint8_t*)temp;\
105 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
106 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
107}\
108\
109static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
110 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
111 uint8_t * const halfH= (uint8_t*)temp;\
112 uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
113 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
114 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
115 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
116}\
117\
118static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
119 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
120 uint8_t * const halfH= (uint8_t*)temp;\
121 uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
122 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
123 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
124 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
125}\
126\
127static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
128 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
129 uint8_t * const halfH= (uint8_t*)temp;\
130 uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
131 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
132 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
133 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
134}\
135\
136static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
137 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
138 uint8_t * const halfH= (uint8_t*)temp;\
139 uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
143}\
144\
145static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
146 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4]);\
147 int16_t * const tmp= (int16_t*)temp;\
148 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
149}\
150\
151static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
152 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\
153 uint8_t * const halfH= (uint8_t*)temp;\
154 uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
155 int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
156 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
157 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
158 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
159}\
160\
161static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
162 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\
163 uint8_t * const halfH= (uint8_t*)temp;\
164 uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
165 int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
166 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
167 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
168 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
169}\
170\
171static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
172 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\
173 uint8_t * const halfV= (uint8_t*)temp;\
174 uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
175 int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
176 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
177 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
178 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
179}\
180\
181static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
182 DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\
183 uint8_t * const halfV= (uint8_t*)temp;\
184 uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
185 int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
186 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
187 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
188 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
189}\
190
191static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
192 const uint8_t * src2, int dst_stride,
193 int src_stride1, int h)
194{
195 int i;
196 vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
197
198 mask_ = vec_lvsl(0, src2);
199
200 for (i = 0; i < h; i++) {
201
202 tmp1 = vec_ld(i * src_stride1, src1);
203 mask = vec_lvsl(i * src_stride1, src1);
204 tmp2 = vec_ld(i * src_stride1 + 15, src1);
205
206 a = vec_perm(tmp1, tmp2, mask);
207
208 tmp1 = vec_ld(i * 16, src2);
209 tmp2 = vec_ld(i * 16 + 15, src2);
210
211 b = vec_perm(tmp1, tmp2, mask_);
212
213 tmp1 = vec_ld(0, dst);
214 mask = vec_lvsl(0, dst);
215 tmp2 = vec_ld(15, dst);
216
217 d = vec_avg(a, b);
218
219 edges = vec_perm(tmp2, tmp1, mask);
220
221 align = vec_lvsr(0, dst);
222
223 tmp1 = vec_perm(edges, d, align);
224 tmp2 = vec_perm(d, edges, align);
225
226 vec_st(tmp2, 15, dst);
227 vec_st(tmp1, 0 , dst);
228
229 dst += dst_stride;
230 }
231}
232
233static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
234 const uint8_t * src2, int dst_stride,
235 int src_stride1, int h)
236{
237 int i;
238 vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
239
240 mask_ = vec_lvsl(0, src2);
241
242 for (i = 0; i < h; i++) {
243
244 tmp1 = vec_ld(i * src_stride1, src1);
245 mask = vec_lvsl(i * src_stride1, src1);
246 tmp2 = vec_ld(i * src_stride1 + 15, src1);
247
248 a = vec_perm(tmp1, tmp2, mask);
249
250 tmp1 = vec_ld(i * 16, src2);
251 tmp2 = vec_ld(i * 16 + 15, src2);
252
253 b = vec_perm(tmp1, tmp2, mask_);
254
255 tmp1 = vec_ld(0, dst);
256 mask = vec_lvsl(0, dst);
257 tmp2 = vec_ld(15, dst);
258
259 d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
260
261 edges = vec_perm(tmp2, tmp1, mask);
262
263 align = vec_lvsr(0, dst);
264
265 tmp1 = vec_perm(edges, d, align);
266 tmp2 = vec_perm(d, edges, align);
267
268 vec_st(tmp2, 15, dst);
269 vec_st(tmp1, 0 , dst);
270
271 dst += dst_stride;
272 }
273}
274
275/* Implemented but could be faster
276#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
277#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
278 */
279
280 H264_MC(put_, 16, altivec)
281 H264_MC(avg_, 16, altivec)
282
283void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
284
285#ifdef HAVE_ALTIVEC
286 if (has_altivec()) {
287 c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
288 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
289
290#define dspfunc(PFX, IDX, NUM) \
291 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
292 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
293 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
294 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
295 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
296 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
297 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
298 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
299 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
300 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
301 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
302 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
303 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
304 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
305 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
306 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
307
308 dspfunc(put_h264_qpel, 0, 16);
309 dspfunc(avg_h264_qpel, 0, 16);
310#undef dspfunc
311
312 } else
313#endif /* HAVE_ALTIVEC */
314 {
315 // Non-AltiVec PPC optimisations
316
317 // ... pending ...
318 }
319}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette