1 | /*
|
---|
2 | * DSP utils
|
---|
3 | * Copyright (c) 2000, 2001 Fabrice Bellard.
|
---|
4 | * Copyright (c) 2002-2004 Michael Niedermayer <[email protected]>
|
---|
5 | *
|
---|
6 | * This library is free software; you can redistribute it and/or
|
---|
7 | * modify it under the terms of the GNU Lesser General Public
|
---|
8 | * License as published by the Free Software Foundation; either
|
---|
9 | * version 2 of the License, or (at your option) any later version.
|
---|
10 | *
|
---|
11 | * This library is distributed in the hope that it will be useful,
|
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
14 | * Lesser General Public License for more details.
|
---|
15 | *
|
---|
16 | * You should have received a copy of the GNU Lesser General Public
|
---|
17 | * License along with this library; if not, write to the Free Software
|
---|
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
---|
19 | *
|
---|
20 | * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <[email protected]>
|
---|
21 | */
|
---|
22 |
|
---|
23 | /**
|
---|
24 | * @file dsputil.c
|
---|
25 | * DSP utils
|
---|
26 | */
|
---|
27 |
|
---|
28 | #include "avcodec.h"
|
---|
29 | #include "dsputil.h"
|
---|
30 | #include "mpegvideo.h"
|
---|
31 | #include "simple_idct.h"
|
---|
32 | #include "faandct.h"
|
---|
33 | #include "snow.h"
|
---|
34 |
|
---|
35 | /* snow.c */
|
---|
36 | void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
|
---|
37 |
|
---|
38 | uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
|
---|
39 | uint32_t squareTbl[512] = {0, };
|
---|
40 |
|
---|
41 | const uint8_t ff_zigzag_direct[64] = {
|
---|
42 | 0, 1, 8, 16, 9, 2, 3, 10,
|
---|
43 | 17, 24, 32, 25, 18, 11, 4, 5,
|
---|
44 | 12, 19, 26, 33, 40, 48, 41, 34,
|
---|
45 | 27, 20, 13, 6, 7, 14, 21, 28,
|
---|
46 | 35, 42, 49, 56, 57, 50, 43, 36,
|
---|
47 | 29, 22, 15, 23, 30, 37, 44, 51,
|
---|
48 | 58, 59, 52, 45, 38, 31, 39, 46,
|
---|
49 | 53, 60, 61, 54, 47, 55, 62, 63
|
---|
50 | };
|
---|
51 |
|
---|
52 | /* Specific zigzag scan for 248 idct. NOTE that unlike the
|
---|
53 | specification, we interleave the fields */
|
---|
54 | const uint8_t ff_zigzag248_direct[64] = {
|
---|
55 | 0, 8, 1, 9, 16, 24, 2, 10,
|
---|
56 | 17, 25, 32, 40, 48, 56, 33, 41,
|
---|
57 | 18, 26, 3, 11, 4, 12, 19, 27,
|
---|
58 | 34, 42, 49, 57, 50, 58, 35, 43,
|
---|
59 | 20, 28, 5, 13, 6, 14, 21, 29,
|
---|
60 | 36, 44, 51, 59, 52, 60, 37, 45,
|
---|
61 | 22, 30, 7, 15, 23, 31, 38, 46,
|
---|
62 | 53, 61, 54, 62, 39, 47, 55, 63,
|
---|
63 | };
|
---|
64 |
|
---|
65 | /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
|
---|
66 | DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
|
---|
67 |
|
---|
68 | const uint8_t ff_alternate_horizontal_scan[64] = {
|
---|
69 | 0, 1, 2, 3, 8, 9, 16, 17,
|
---|
70 | 10, 11, 4, 5, 6, 7, 15, 14,
|
---|
71 | 13, 12, 19, 18, 24, 25, 32, 33,
|
---|
72 | 26, 27, 20, 21, 22, 23, 28, 29,
|
---|
73 | 30, 31, 34, 35, 40, 41, 48, 49,
|
---|
74 | 42, 43, 36, 37, 38, 39, 44, 45,
|
---|
75 | 46, 47, 50, 51, 56, 57, 58, 59,
|
---|
76 | 52, 53, 54, 55, 60, 61, 62, 63,
|
---|
77 | };
|
---|
78 |
|
---|
79 | const uint8_t ff_alternate_vertical_scan[64] = {
|
---|
80 | 0, 8, 16, 24, 1, 9, 2, 10,
|
---|
81 | 17, 25, 32, 40, 48, 56, 57, 49,
|
---|
82 | 41, 33, 26, 18, 3, 11, 4, 12,
|
---|
83 | 19, 27, 34, 42, 50, 58, 35, 43,
|
---|
84 | 51, 59, 20, 28, 5, 13, 6, 14,
|
---|
85 | 21, 29, 36, 44, 52, 60, 37, 45,
|
---|
86 | 53, 61, 22, 30, 7, 15, 23, 31,
|
---|
87 | 38, 46, 54, 62, 39, 47, 55, 63,
|
---|
88 | };
|
---|
89 |
|
---|
90 | /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
|
---|
91 | const uint32_t inverse[256]={
|
---|
92 | 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
|
---|
93 | 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
|
---|
94 | 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
|
---|
95 | 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
|
---|
96 | 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
|
---|
97 | 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
|
---|
98 | 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
|
---|
99 | 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
|
---|
100 | 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
|
---|
101 | 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
|
---|
102 | 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
|
---|
103 | 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
|
---|
104 | 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
|
---|
105 | 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
|
---|
106 | 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
|
---|
107 | 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
|
---|
108 | 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
|
---|
109 | 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
|
---|
110 | 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
|
---|
111 | 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
|
---|
112 | 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
|
---|
113 | 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
|
---|
114 | 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
|
---|
115 | 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
|
---|
116 | 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
|
---|
117 | 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
|
---|
118 | 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
|
---|
119 | 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
|
---|
120 | 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
|
---|
121 | 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
|
---|
122 | 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
|
---|
123 | 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
|
---|
124 | };
|
---|
125 |
|
---|
126 | /* Input permutation for the simple_idct_mmx */
|
---|
127 | static const uint8_t simple_mmx_permutation[64]={
|
---|
128 | 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
---|
129 | 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
---|
130 | 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
---|
131 | 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
---|
132 | 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
---|
133 | 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
---|
134 | 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
---|
135 | 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
---|
136 | };
|
---|
137 |
|
---|
138 | static int pix_sum_c(uint8_t * pix, int line_size)
|
---|
139 | {
|
---|
140 | int s, i, j;
|
---|
141 |
|
---|
142 | s = 0;
|
---|
143 | for (i = 0; i < 16; i++) {
|
---|
144 | for (j = 0; j < 16; j += 8) {
|
---|
145 | s += pix[0];
|
---|
146 | s += pix[1];
|
---|
147 | s += pix[2];
|
---|
148 | s += pix[3];
|
---|
149 | s += pix[4];
|
---|
150 | s += pix[5];
|
---|
151 | s += pix[6];
|
---|
152 | s += pix[7];
|
---|
153 | pix += 8;
|
---|
154 | }
|
---|
155 | pix += line_size - 16;
|
---|
156 | }
|
---|
157 | return s;
|
---|
158 | }
|
---|
159 |
|
---|
160 | static int pix_norm1_c(uint8_t * pix, int line_size)
|
---|
161 | {
|
---|
162 | int s, i, j;
|
---|
163 | uint32_t *sq = squareTbl + 256;
|
---|
164 |
|
---|
165 | s = 0;
|
---|
166 | for (i = 0; i < 16; i++) {
|
---|
167 | for (j = 0; j < 16; j += 8) {
|
---|
168 | #if 0
|
---|
169 | s += sq[pix[0]];
|
---|
170 | s += sq[pix[1]];
|
---|
171 | s += sq[pix[2]];
|
---|
172 | s += sq[pix[3]];
|
---|
173 | s += sq[pix[4]];
|
---|
174 | s += sq[pix[5]];
|
---|
175 | s += sq[pix[6]];
|
---|
176 | s += sq[pix[7]];
|
---|
177 | #else
|
---|
178 | #if LONG_MAX > 2147483647
|
---|
179 | register uint64_t x=*(uint64_t*)pix;
|
---|
180 | s += sq[x&0xff];
|
---|
181 | s += sq[(x>>8)&0xff];
|
---|
182 | s += sq[(x>>16)&0xff];
|
---|
183 | s += sq[(x>>24)&0xff];
|
---|
184 | s += sq[(x>>32)&0xff];
|
---|
185 | s += sq[(x>>40)&0xff];
|
---|
186 | s += sq[(x>>48)&0xff];
|
---|
187 | s += sq[(x>>56)&0xff];
|
---|
188 | #else
|
---|
189 | register uint32_t x=*(uint32_t*)pix;
|
---|
190 | s += sq[x&0xff];
|
---|
191 | s += sq[(x>>8)&0xff];
|
---|
192 | s += sq[(x>>16)&0xff];
|
---|
193 | s += sq[(x>>24)&0xff];
|
---|
194 | x=*(uint32_t*)(pix+4);
|
---|
195 | s += sq[x&0xff];
|
---|
196 | s += sq[(x>>8)&0xff];
|
---|
197 | s += sq[(x>>16)&0xff];
|
---|
198 | s += sq[(x>>24)&0xff];
|
---|
199 | #endif
|
---|
200 | #endif
|
---|
201 | pix += 8;
|
---|
202 | }
|
---|
203 | pix += line_size - 16;
|
---|
204 | }
|
---|
205 | return s;
|
---|
206 | }
|
---|
207 |
|
---|
208 | static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
|
---|
209 | int i;
|
---|
210 |
|
---|
211 | for(i=0; i+8<=w; i+=8){
|
---|
212 | dst[i+0]= bswap_32(src[i+0]);
|
---|
213 | dst[i+1]= bswap_32(src[i+1]);
|
---|
214 | dst[i+2]= bswap_32(src[i+2]);
|
---|
215 | dst[i+3]= bswap_32(src[i+3]);
|
---|
216 | dst[i+4]= bswap_32(src[i+4]);
|
---|
217 | dst[i+5]= bswap_32(src[i+5]);
|
---|
218 | dst[i+6]= bswap_32(src[i+6]);
|
---|
219 | dst[i+7]= bswap_32(src[i+7]);
|
---|
220 | }
|
---|
221 | for(;i<w; i++){
|
---|
222 | dst[i+0]= bswap_32(src[i+0]);
|
---|
223 | }
|
---|
224 | }
|
---|
225 |
|
---|
226 | static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
|
---|
227 | {
|
---|
228 | int s, i;
|
---|
229 | uint32_t *sq = squareTbl + 256;
|
---|
230 |
|
---|
231 | s = 0;
|
---|
232 | for (i = 0; i < h; i++) {
|
---|
233 | s += sq[pix1[0] - pix2[0]];
|
---|
234 | s += sq[pix1[1] - pix2[1]];
|
---|
235 | s += sq[pix1[2] - pix2[2]];
|
---|
236 | s += sq[pix1[3] - pix2[3]];
|
---|
237 | pix1 += line_size;
|
---|
238 | pix2 += line_size;
|
---|
239 | }
|
---|
240 | return s;
|
---|
241 | }
|
---|
242 |
|
---|
243 | static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
|
---|
244 | {
|
---|
245 | int s, i;
|
---|
246 | uint32_t *sq = squareTbl + 256;
|
---|
247 |
|
---|
248 | s = 0;
|
---|
249 | for (i = 0; i < h; i++) {
|
---|
250 | s += sq[pix1[0] - pix2[0]];
|
---|
251 | s += sq[pix1[1] - pix2[1]];
|
---|
252 | s += sq[pix1[2] - pix2[2]];
|
---|
253 | s += sq[pix1[3] - pix2[3]];
|
---|
254 | s += sq[pix1[4] - pix2[4]];
|
---|
255 | s += sq[pix1[5] - pix2[5]];
|
---|
256 | s += sq[pix1[6] - pix2[6]];
|
---|
257 | s += sq[pix1[7] - pix2[7]];
|
---|
258 | pix1 += line_size;
|
---|
259 | pix2 += line_size;
|
---|
260 | }
|
---|
261 | return s;
|
---|
262 | }
|
---|
263 |
|
---|
264 | static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
265 | {
|
---|
266 | int s, i;
|
---|
267 | uint32_t *sq = squareTbl + 256;
|
---|
268 |
|
---|
269 | s = 0;
|
---|
270 | for (i = 0; i < h; i++) {
|
---|
271 | s += sq[pix1[ 0] - pix2[ 0]];
|
---|
272 | s += sq[pix1[ 1] - pix2[ 1]];
|
---|
273 | s += sq[pix1[ 2] - pix2[ 2]];
|
---|
274 | s += sq[pix1[ 3] - pix2[ 3]];
|
---|
275 | s += sq[pix1[ 4] - pix2[ 4]];
|
---|
276 | s += sq[pix1[ 5] - pix2[ 5]];
|
---|
277 | s += sq[pix1[ 6] - pix2[ 6]];
|
---|
278 | s += sq[pix1[ 7] - pix2[ 7]];
|
---|
279 | s += sq[pix1[ 8] - pix2[ 8]];
|
---|
280 | s += sq[pix1[ 9] - pix2[ 9]];
|
---|
281 | s += sq[pix1[10] - pix2[10]];
|
---|
282 | s += sq[pix1[11] - pix2[11]];
|
---|
283 | s += sq[pix1[12] - pix2[12]];
|
---|
284 | s += sq[pix1[13] - pix2[13]];
|
---|
285 | s += sq[pix1[14] - pix2[14]];
|
---|
286 | s += sq[pix1[15] - pix2[15]];
|
---|
287 |
|
---|
288 | pix1 += line_size;
|
---|
289 | pix2 += line_size;
|
---|
290 | }
|
---|
291 | return s;
|
---|
292 | }
|
---|
293 |
|
---|
294 |
|
---|
295 | #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
|
---|
296 | static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
|
---|
297 | int s, i, j;
|
---|
298 | const int dec_count= w==8 ? 3 : 4;
|
---|
299 | int tmp[32*32];
|
---|
300 | int level, ori;
|
---|
301 | static const int scale[2][2][4][4]={
|
---|
302 | {
|
---|
303 | {
|
---|
304 | // 9/7 8x8 dec=3
|
---|
305 | {268, 239, 239, 213},
|
---|
306 | { 0, 224, 224, 152},
|
---|
307 | { 0, 135, 135, 110},
|
---|
308 | },{
|
---|
309 | // 9/7 16x16 or 32x32 dec=4
|
---|
310 | {344, 310, 310, 280},
|
---|
311 | { 0, 320, 320, 228},
|
---|
312 | { 0, 175, 175, 136},
|
---|
313 | { 0, 129, 129, 102},
|
---|
314 | }
|
---|
315 | },{
|
---|
316 | {
|
---|
317 | // 5/3 8x8 dec=3
|
---|
318 | {275, 245, 245, 218},
|
---|
319 | { 0, 230, 230, 156},
|
---|
320 | { 0, 138, 138, 113},
|
---|
321 | },{
|
---|
322 | // 5/3 16x16 or 32x32 dec=4
|
---|
323 | {352, 317, 317, 286},
|
---|
324 | { 0, 328, 328, 233},
|
---|
325 | { 0, 180, 180, 140},
|
---|
326 | { 0, 132, 132, 105},
|
---|
327 | }
|
---|
328 | }
|
---|
329 | };
|
---|
330 |
|
---|
331 | for (i = 0; i < h; i++) {
|
---|
332 | for (j = 0; j < w; j+=4) {
|
---|
333 | tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
|
---|
334 | tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
|
---|
335 | tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
|
---|
336 | tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
|
---|
337 | }
|
---|
338 | pix1 += line_size;
|
---|
339 | pix2 += line_size;
|
---|
340 | }
|
---|
341 |
|
---|
342 | ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
|
---|
343 |
|
---|
344 | s=0;
|
---|
345 | assert(w==h);
|
---|
346 | for(level=0; level<dec_count; level++){
|
---|
347 | for(ori= level ? 1 : 0; ori<4; ori++){
|
---|
348 | int size= w>>(dec_count-level);
|
---|
349 | int sx= (ori&1) ? size : 0;
|
---|
350 | int stride= 32<<(dec_count-level);
|
---|
351 | int sy= (ori&2) ? stride>>1 : 0;
|
---|
352 |
|
---|
353 | for(i=0; i<size; i++){
|
---|
354 | for(j=0; j<size; j++){
|
---|
355 | int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
|
---|
356 | s += ABS(v);
|
---|
357 | }
|
---|
358 | }
|
---|
359 | }
|
---|
360 | }
|
---|
361 | assert(s>=0);
|
---|
362 | return s>>9;
|
---|
363 | }
|
---|
364 |
|
---|
365 | static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
---|
366 | return w_c(v, pix1, pix2, line_size, 8, h, 1);
|
---|
367 | }
|
---|
368 |
|
---|
369 | static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
---|
370 | return w_c(v, pix1, pix2, line_size, 8, h, 0);
|
---|
371 | }
|
---|
372 |
|
---|
373 | static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
---|
374 | return w_c(v, pix1, pix2, line_size, 16, h, 1);
|
---|
375 | }
|
---|
376 |
|
---|
377 | static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
---|
378 | return w_c(v, pix1, pix2, line_size, 16, h, 0);
|
---|
379 | }
|
---|
380 |
|
---|
381 | int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
---|
382 | return w_c(v, pix1, pix2, line_size, 32, h, 1);
|
---|
383 | }
|
---|
384 |
|
---|
385 | int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
---|
386 | return w_c(v, pix1, pix2, line_size, 32, h, 0);
|
---|
387 | }
|
---|
388 | #endif
|
---|
389 |
|
---|
390 | static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
|
---|
391 | {
|
---|
392 | int i;
|
---|
393 |
|
---|
394 | /* read the pixels */
|
---|
395 | for(i=0;i<8;i++) {
|
---|
396 | block[0] = pixels[0];
|
---|
397 | block[1] = pixels[1];
|
---|
398 | block[2] = pixels[2];
|
---|
399 | block[3] = pixels[3];
|
---|
400 | block[4] = pixels[4];
|
---|
401 | block[5] = pixels[5];
|
---|
402 | block[6] = pixels[6];
|
---|
403 | block[7] = pixels[7];
|
---|
404 | pixels += line_size;
|
---|
405 | block += 8;
|
---|
406 | }
|
---|
407 | }
|
---|
408 |
|
---|
409 | static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
|
---|
410 | const uint8_t *s2, int stride){
|
---|
411 | int i;
|
---|
412 |
|
---|
413 | /* read the pixels */
|
---|
414 | for(i=0;i<8;i++) {
|
---|
415 | block[0] = s1[0] - s2[0];
|
---|
416 | block[1] = s1[1] - s2[1];
|
---|
417 | block[2] = s1[2] - s2[2];
|
---|
418 | block[3] = s1[3] - s2[3];
|
---|
419 | block[4] = s1[4] - s2[4];
|
---|
420 | block[5] = s1[5] - s2[5];
|
---|
421 | block[6] = s1[6] - s2[6];
|
---|
422 | block[7] = s1[7] - s2[7];
|
---|
423 | s1 += stride;
|
---|
424 | s2 += stride;
|
---|
425 | block += 8;
|
---|
426 | }
|
---|
427 | }
|
---|
428 |
|
---|
429 |
|
---|
430 | static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
---|
431 | int line_size)
|
---|
432 | {
|
---|
433 | int i;
|
---|
434 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
435 |
|
---|
436 | /* read the pixels */
|
---|
437 | for(i=0;i<8;i++) {
|
---|
438 | pixels[0] = cm[block[0]];
|
---|
439 | pixels[1] = cm[block[1]];
|
---|
440 | pixels[2] = cm[block[2]];
|
---|
441 | pixels[3] = cm[block[3]];
|
---|
442 | pixels[4] = cm[block[4]];
|
---|
443 | pixels[5] = cm[block[5]];
|
---|
444 | pixels[6] = cm[block[6]];
|
---|
445 | pixels[7] = cm[block[7]];
|
---|
446 |
|
---|
447 | pixels += line_size;
|
---|
448 | block += 8;
|
---|
449 | }
|
---|
450 | }
|
---|
451 |
|
---|
452 | static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
|
---|
453 | int line_size)
|
---|
454 | {
|
---|
455 | int i;
|
---|
456 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
457 |
|
---|
458 | /* read the pixels */
|
---|
459 | for(i=0;i<4;i++) {
|
---|
460 | pixels[0] = cm[block[0]];
|
---|
461 | pixels[1] = cm[block[1]];
|
---|
462 | pixels[2] = cm[block[2]];
|
---|
463 | pixels[3] = cm[block[3]];
|
---|
464 |
|
---|
465 | pixels += line_size;
|
---|
466 | block += 8;
|
---|
467 | }
|
---|
468 | }
|
---|
469 |
|
---|
470 | static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
|
---|
471 | int line_size)
|
---|
472 | {
|
---|
473 | int i;
|
---|
474 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
475 |
|
---|
476 | /* read the pixels */
|
---|
477 | for(i=0;i<2;i++) {
|
---|
478 | pixels[0] = cm[block[0]];
|
---|
479 | pixels[1] = cm[block[1]];
|
---|
480 |
|
---|
481 | pixels += line_size;
|
---|
482 | block += 8;
|
---|
483 | }
|
---|
484 | }
|
---|
485 |
|
---|
486 | static void put_signed_pixels_clamped_c(const DCTELEM *block,
|
---|
487 | uint8_t *restrict pixels,
|
---|
488 | int line_size)
|
---|
489 | {
|
---|
490 | int i, j;
|
---|
491 |
|
---|
492 | for (i = 0; i < 8; i++) {
|
---|
493 | for (j = 0; j < 8; j++) {
|
---|
494 | if (*block < -128)
|
---|
495 | *pixels = 0;
|
---|
496 | else if (*block > 127)
|
---|
497 | *pixels = 255;
|
---|
498 | else
|
---|
499 | *pixels = (uint8_t)(*block + 128);
|
---|
500 | block++;
|
---|
501 | pixels++;
|
---|
502 | }
|
---|
503 | pixels += (line_size - 8);
|
---|
504 | }
|
---|
505 | }
|
---|
506 |
|
---|
507 | static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
---|
508 | int line_size)
|
---|
509 | {
|
---|
510 | int i;
|
---|
511 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
512 |
|
---|
513 | /* read the pixels */
|
---|
514 | for(i=0;i<8;i++) {
|
---|
515 | pixels[0] = cm[pixels[0] + block[0]];
|
---|
516 | pixels[1] = cm[pixels[1] + block[1]];
|
---|
517 | pixels[2] = cm[pixels[2] + block[2]];
|
---|
518 | pixels[3] = cm[pixels[3] + block[3]];
|
---|
519 | pixels[4] = cm[pixels[4] + block[4]];
|
---|
520 | pixels[5] = cm[pixels[5] + block[5]];
|
---|
521 | pixels[6] = cm[pixels[6] + block[6]];
|
---|
522 | pixels[7] = cm[pixels[7] + block[7]];
|
---|
523 | pixels += line_size;
|
---|
524 | block += 8;
|
---|
525 | }
|
---|
526 | }
|
---|
527 |
|
---|
528 | static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
|
---|
529 | int line_size)
|
---|
530 | {
|
---|
531 | int i;
|
---|
532 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
533 |
|
---|
534 | /* read the pixels */
|
---|
535 | for(i=0;i<4;i++) {
|
---|
536 | pixels[0] = cm[pixels[0] + block[0]];
|
---|
537 | pixels[1] = cm[pixels[1] + block[1]];
|
---|
538 | pixels[2] = cm[pixels[2] + block[2]];
|
---|
539 | pixels[3] = cm[pixels[3] + block[3]];
|
---|
540 | pixels += line_size;
|
---|
541 | block += 8;
|
---|
542 | }
|
---|
543 | }
|
---|
544 |
|
---|
545 | static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
|
---|
546 | int line_size)
|
---|
547 | {
|
---|
548 | int i;
|
---|
549 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
550 |
|
---|
551 | /* read the pixels */
|
---|
552 | for(i=0;i<2;i++) {
|
---|
553 | pixels[0] = cm[pixels[0] + block[0]];
|
---|
554 | pixels[1] = cm[pixels[1] + block[1]];
|
---|
555 | pixels += line_size;
|
---|
556 | block += 8;
|
---|
557 | }
|
---|
558 | }
|
---|
559 |
|
---|
560 | static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
|
---|
561 | {
|
---|
562 | int i;
|
---|
563 | for(i=0;i<8;i++) {
|
---|
564 | pixels[0] += block[0];
|
---|
565 | pixels[1] += block[1];
|
---|
566 | pixels[2] += block[2];
|
---|
567 | pixels[3] += block[3];
|
---|
568 | pixels[4] += block[4];
|
---|
569 | pixels[5] += block[5];
|
---|
570 | pixels[6] += block[6];
|
---|
571 | pixels[7] += block[7];
|
---|
572 | pixels += line_size;
|
---|
573 | block += 8;
|
---|
574 | }
|
---|
575 | }
|
---|
576 |
|
---|
577 | static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
|
---|
578 | {
|
---|
579 | int i;
|
---|
580 | for(i=0;i<4;i++) {
|
---|
581 | pixels[0] += block[0];
|
---|
582 | pixels[1] += block[1];
|
---|
583 | pixels[2] += block[2];
|
---|
584 | pixels[3] += block[3];
|
---|
585 | pixels += line_size;
|
---|
586 | block += 4;
|
---|
587 | }
|
---|
588 | }
|
---|
589 |
|
---|
590 | #if 0
|
---|
591 |
|
---|
592 | #define PIXOP2(OPNAME, OP) \
|
---|
593 | static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
594 | {\
|
---|
595 | int i;\
|
---|
596 | for(i=0; i<h; i++){\
|
---|
597 | OP(*((uint64_t*)block), LD64(pixels));\
|
---|
598 | pixels+=line_size;\
|
---|
599 | block +=line_size;\
|
---|
600 | }\
|
---|
601 | }\
|
---|
602 | \
|
---|
603 | static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
604 | {\
|
---|
605 | int i;\
|
---|
606 | for(i=0; i<h; i++){\
|
---|
607 | const uint64_t a= LD64(pixels );\
|
---|
608 | const uint64_t b= LD64(pixels+1);\
|
---|
609 | OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
---|
610 | pixels+=line_size;\
|
---|
611 | block +=line_size;\
|
---|
612 | }\
|
---|
613 | }\
|
---|
614 | \
|
---|
615 | static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
616 | {\
|
---|
617 | int i;\
|
---|
618 | for(i=0; i<h; i++){\
|
---|
619 | const uint64_t a= LD64(pixels );\
|
---|
620 | const uint64_t b= LD64(pixels+1);\
|
---|
621 | OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
---|
622 | pixels+=line_size;\
|
---|
623 | block +=line_size;\
|
---|
624 | }\
|
---|
625 | }\
|
---|
626 | \
|
---|
627 | static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
628 | {\
|
---|
629 | int i;\
|
---|
630 | for(i=0; i<h; i++){\
|
---|
631 | const uint64_t a= LD64(pixels );\
|
---|
632 | const uint64_t b= LD64(pixels+line_size);\
|
---|
633 | OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
---|
634 | pixels+=line_size;\
|
---|
635 | block +=line_size;\
|
---|
636 | }\
|
---|
637 | }\
|
---|
638 | \
|
---|
639 | static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
640 | {\
|
---|
641 | int i;\
|
---|
642 | for(i=0; i<h; i++){\
|
---|
643 | const uint64_t a= LD64(pixels );\
|
---|
644 | const uint64_t b= LD64(pixels+line_size);\
|
---|
645 | OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
---|
646 | pixels+=line_size;\
|
---|
647 | block +=line_size;\
|
---|
648 | }\
|
---|
649 | }\
|
---|
650 | \
|
---|
651 | static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
652 | {\
|
---|
653 | int i;\
|
---|
654 | const uint64_t a= LD64(pixels );\
|
---|
655 | const uint64_t b= LD64(pixels+1);\
|
---|
656 | uint64_t l0= (a&0x0303030303030303ULL)\
|
---|
657 | + (b&0x0303030303030303ULL)\
|
---|
658 | + 0x0202020202020202ULL;\
|
---|
659 | uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
---|
660 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
---|
661 | uint64_t l1,h1;\
|
---|
662 | \
|
---|
663 | pixels+=line_size;\
|
---|
664 | for(i=0; i<h; i+=2){\
|
---|
665 | uint64_t a= LD64(pixels );\
|
---|
666 | uint64_t b= LD64(pixels+1);\
|
---|
667 | l1= (a&0x0303030303030303ULL)\
|
---|
668 | + (b&0x0303030303030303ULL);\
|
---|
669 | h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
---|
670 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
---|
671 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
---|
672 | pixels+=line_size;\
|
---|
673 | block +=line_size;\
|
---|
674 | a= LD64(pixels );\
|
---|
675 | b= LD64(pixels+1);\
|
---|
676 | l0= (a&0x0303030303030303ULL)\
|
---|
677 | + (b&0x0303030303030303ULL)\
|
---|
678 | + 0x0202020202020202ULL;\
|
---|
679 | h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
---|
680 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
---|
681 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
---|
682 | pixels+=line_size;\
|
---|
683 | block +=line_size;\
|
---|
684 | }\
|
---|
685 | }\
|
---|
686 | \
|
---|
687 | static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
688 | {\
|
---|
689 | int i;\
|
---|
690 | const uint64_t a= LD64(pixels );\
|
---|
691 | const uint64_t b= LD64(pixels+1);\
|
---|
692 | uint64_t l0= (a&0x0303030303030303ULL)\
|
---|
693 | + (b&0x0303030303030303ULL)\
|
---|
694 | + 0x0101010101010101ULL;\
|
---|
695 | uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
---|
696 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
---|
697 | uint64_t l1,h1;\
|
---|
698 | \
|
---|
699 | pixels+=line_size;\
|
---|
700 | for(i=0; i<h; i+=2){\
|
---|
701 | uint64_t a= LD64(pixels );\
|
---|
702 | uint64_t b= LD64(pixels+1);\
|
---|
703 | l1= (a&0x0303030303030303ULL)\
|
---|
704 | + (b&0x0303030303030303ULL);\
|
---|
705 | h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
---|
706 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
---|
707 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
---|
708 | pixels+=line_size;\
|
---|
709 | block +=line_size;\
|
---|
710 | a= LD64(pixels );\
|
---|
711 | b= LD64(pixels+1);\
|
---|
712 | l0= (a&0x0303030303030303ULL)\
|
---|
713 | + (b&0x0303030303030303ULL)\
|
---|
714 | + 0x0101010101010101ULL;\
|
---|
715 | h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
---|
716 | + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
---|
717 | OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
---|
718 | pixels+=line_size;\
|
---|
719 | block +=line_size;\
|
---|
720 | }\
|
---|
721 | }\
|
---|
722 | \
|
---|
723 | CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
|
---|
724 | CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
|
---|
725 | CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
|
---|
726 | CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
|
---|
727 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
|
---|
728 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
|
---|
729 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
|
---|
730 |
|
---|
731 | #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
|
---|
732 | #else // 64 bit variant
|
---|
733 |
|
---|
734 | #define PIXOP2(OPNAME, OP) \
|
---|
735 | static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
736 | int i;\
|
---|
737 | for(i=0; i<h; i++){\
|
---|
738 | OP(*((uint16_t*)(block )), LD16(pixels ));\
|
---|
739 | pixels+=line_size;\
|
---|
740 | block +=line_size;\
|
---|
741 | }\
|
---|
742 | }\
|
---|
743 | static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
744 | int i;\
|
---|
745 | for(i=0; i<h; i++){\
|
---|
746 | OP(*((uint32_t*)(block )), LD32(pixels ));\
|
---|
747 | pixels+=line_size;\
|
---|
748 | block +=line_size;\
|
---|
749 | }\
|
---|
750 | }\
|
---|
751 | static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
752 | int i;\
|
---|
753 | for(i=0; i<h; i++){\
|
---|
754 | OP(*((uint32_t*)(block )), LD32(pixels ));\
|
---|
755 | OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
|
---|
756 | pixels+=line_size;\
|
---|
757 | block +=line_size;\
|
---|
758 | }\
|
---|
759 | }\
|
---|
760 | static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
761 | OPNAME ## _pixels8_c(block, pixels, line_size, h);\
|
---|
762 | }\
|
---|
763 | \
|
---|
764 | static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
---|
765 | int src_stride1, int src_stride2, int h){\
|
---|
766 | int i;\
|
---|
767 | for(i=0; i<h; i++){\
|
---|
768 | uint32_t a,b;\
|
---|
769 | a= LD32(&src1[i*src_stride1 ]);\
|
---|
770 | b= LD32(&src2[i*src_stride2 ]);\
|
---|
771 | OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
|
---|
772 | a= LD32(&src1[i*src_stride1+4]);\
|
---|
773 | b= LD32(&src2[i*src_stride2+4]);\
|
---|
774 | OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
|
---|
775 | }\
|
---|
776 | }\
|
---|
777 | \
|
---|
778 | static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
---|
779 | int src_stride1, int src_stride2, int h){\
|
---|
780 | int i;\
|
---|
781 | for(i=0; i<h; i++){\
|
---|
782 | uint32_t a,b;\
|
---|
783 | a= LD32(&src1[i*src_stride1 ]);\
|
---|
784 | b= LD32(&src2[i*src_stride2 ]);\
|
---|
785 | OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
---|
786 | a= LD32(&src1[i*src_stride1+4]);\
|
---|
787 | b= LD32(&src2[i*src_stride2+4]);\
|
---|
788 | OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
|
---|
789 | }\
|
---|
790 | }\
|
---|
791 | \
|
---|
792 | static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
---|
793 | int src_stride1, int src_stride2, int h){\
|
---|
794 | int i;\
|
---|
795 | for(i=0; i<h; i++){\
|
---|
796 | uint32_t a,b;\
|
---|
797 | a= LD32(&src1[i*src_stride1 ]);\
|
---|
798 | b= LD32(&src2[i*src_stride2 ]);\
|
---|
799 | OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
---|
800 | }\
|
---|
801 | }\
|
---|
802 | \
|
---|
803 | static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
---|
804 | int src_stride1, int src_stride2, int h){\
|
---|
805 | int i;\
|
---|
806 | for(i=0; i<h; i++){\
|
---|
807 | uint32_t a,b;\
|
---|
808 | a= LD16(&src1[i*src_stride1 ]);\
|
---|
809 | b= LD16(&src2[i*src_stride2 ]);\
|
---|
810 | OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
---|
811 | }\
|
---|
812 | }\
|
---|
813 | \
|
---|
814 | static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
---|
815 | int src_stride1, int src_stride2, int h){\
|
---|
816 | OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
|
---|
817 | OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
|
---|
818 | }\
|
---|
819 | \
|
---|
820 | static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
---|
821 | int src_stride1, int src_stride2, int h){\
|
---|
822 | OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
|
---|
823 | OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
|
---|
824 | }\
|
---|
825 | \
|
---|
826 | static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
827 | OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
---|
828 | }\
|
---|
829 | \
|
---|
830 | static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
831 | OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
---|
832 | }\
|
---|
833 | \
|
---|
834 | static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
835 | OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
---|
836 | }\
|
---|
837 | \
|
---|
838 | static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
839 | OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
---|
840 | }\
|
---|
841 | \
|
---|
842 | static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
---|
843 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
---|
844 | int i;\
|
---|
845 | for(i=0; i<h; i++){\
|
---|
846 | uint32_t a, b, c, d, l0, l1, h0, h1;\
|
---|
847 | a= LD32(&src1[i*src_stride1]);\
|
---|
848 | b= LD32(&src2[i*src_stride2]);\
|
---|
849 | c= LD32(&src3[i*src_stride3]);\
|
---|
850 | d= LD32(&src4[i*src_stride4]);\
|
---|
851 | l0= (a&0x03030303UL)\
|
---|
852 | + (b&0x03030303UL)\
|
---|
853 | + 0x02020202UL;\
|
---|
854 | h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
855 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
856 | l1= (c&0x03030303UL)\
|
---|
857 | + (d&0x03030303UL);\
|
---|
858 | h1= ((c&0xFCFCFCFCUL)>>2)\
|
---|
859 | + ((d&0xFCFCFCFCUL)>>2);\
|
---|
860 | OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
861 | a= LD32(&src1[i*src_stride1+4]);\
|
---|
862 | b= LD32(&src2[i*src_stride2+4]);\
|
---|
863 | c= LD32(&src3[i*src_stride3+4]);\
|
---|
864 | d= LD32(&src4[i*src_stride4+4]);\
|
---|
865 | l0= (a&0x03030303UL)\
|
---|
866 | + (b&0x03030303UL)\
|
---|
867 | + 0x02020202UL;\
|
---|
868 | h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
869 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
870 | l1= (c&0x03030303UL)\
|
---|
871 | + (d&0x03030303UL);\
|
---|
872 | h1= ((c&0xFCFCFCFCUL)>>2)\
|
---|
873 | + ((d&0xFCFCFCFCUL)>>2);\
|
---|
874 | OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
875 | }\
|
---|
876 | }\
|
---|
877 | \
|
---|
878 | static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
879 | OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
---|
880 | }\
|
---|
881 | \
|
---|
882 | static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
883 | OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
---|
884 | }\
|
---|
885 | \
|
---|
886 | static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
887 | OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
---|
888 | }\
|
---|
889 | \
|
---|
890 | static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
---|
891 | OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
---|
892 | }\
|
---|
893 | \
|
---|
894 | static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
---|
895 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
---|
896 | int i;\
|
---|
897 | for(i=0; i<h; i++){\
|
---|
898 | uint32_t a, b, c, d, l0, l1, h0, h1;\
|
---|
899 | a= LD32(&src1[i*src_stride1]);\
|
---|
900 | b= LD32(&src2[i*src_stride2]);\
|
---|
901 | c= LD32(&src3[i*src_stride3]);\
|
---|
902 | d= LD32(&src4[i*src_stride4]);\
|
---|
903 | l0= (a&0x03030303UL)\
|
---|
904 | + (b&0x03030303UL)\
|
---|
905 | + 0x01010101UL;\
|
---|
906 | h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
907 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
908 | l1= (c&0x03030303UL)\
|
---|
909 | + (d&0x03030303UL);\
|
---|
910 | h1= ((c&0xFCFCFCFCUL)>>2)\
|
---|
911 | + ((d&0xFCFCFCFCUL)>>2);\
|
---|
912 | OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
913 | a= LD32(&src1[i*src_stride1+4]);\
|
---|
914 | b= LD32(&src2[i*src_stride2+4]);\
|
---|
915 | c= LD32(&src3[i*src_stride3+4]);\
|
---|
916 | d= LD32(&src4[i*src_stride4+4]);\
|
---|
917 | l0= (a&0x03030303UL)\
|
---|
918 | + (b&0x03030303UL)\
|
---|
919 | + 0x01010101UL;\
|
---|
920 | h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
921 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
922 | l1= (c&0x03030303UL)\
|
---|
923 | + (d&0x03030303UL);\
|
---|
924 | h1= ((c&0xFCFCFCFCUL)>>2)\
|
---|
925 | + ((d&0xFCFCFCFCUL)>>2);\
|
---|
926 | OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
927 | }\
|
---|
928 | }\
|
---|
929 | static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
---|
930 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
---|
931 | OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
---|
932 | OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
---|
933 | }\
|
---|
934 | static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
---|
935 | int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
---|
936 | OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
---|
937 | OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
---|
938 | }\
|
---|
939 | \
|
---|
940 | static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
941 | {\
|
---|
942 | int i, a0, b0, a1, b1;\
|
---|
943 | a0= pixels[0];\
|
---|
944 | b0= pixels[1] + 2;\
|
---|
945 | a0 += b0;\
|
---|
946 | b0 += pixels[2];\
|
---|
947 | \
|
---|
948 | pixels+=line_size;\
|
---|
949 | for(i=0; i<h; i+=2){\
|
---|
950 | a1= pixels[0];\
|
---|
951 | b1= pixels[1];\
|
---|
952 | a1 += b1;\
|
---|
953 | b1 += pixels[2];\
|
---|
954 | \
|
---|
955 | block[0]= (a1+a0)>>2; /* FIXME non put */\
|
---|
956 | block[1]= (b1+b0)>>2;\
|
---|
957 | \
|
---|
958 | pixels+=line_size;\
|
---|
959 | block +=line_size;\
|
---|
960 | \
|
---|
961 | a0= pixels[0];\
|
---|
962 | b0= pixels[1] + 2;\
|
---|
963 | a0 += b0;\
|
---|
964 | b0 += pixels[2];\
|
---|
965 | \
|
---|
966 | block[0]= (a1+a0)>>2;\
|
---|
967 | block[1]= (b1+b0)>>2;\
|
---|
968 | pixels+=line_size;\
|
---|
969 | block +=line_size;\
|
---|
970 | }\
|
---|
971 | }\
|
---|
972 | \
|
---|
973 | static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
974 | {\
|
---|
975 | int i;\
|
---|
976 | const uint32_t a= LD32(pixels );\
|
---|
977 | const uint32_t b= LD32(pixels+1);\
|
---|
978 | uint32_t l0= (a&0x03030303UL)\
|
---|
979 | + (b&0x03030303UL)\
|
---|
980 | + 0x02020202UL;\
|
---|
981 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
982 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
983 | uint32_t l1,h1;\
|
---|
984 | \
|
---|
985 | pixels+=line_size;\
|
---|
986 | for(i=0; i<h; i+=2){\
|
---|
987 | uint32_t a= LD32(pixels );\
|
---|
988 | uint32_t b= LD32(pixels+1);\
|
---|
989 | l1= (a&0x03030303UL)\
|
---|
990 | + (b&0x03030303UL);\
|
---|
991 | h1= ((a&0xFCFCFCFCUL)>>2)\
|
---|
992 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
993 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
994 | pixels+=line_size;\
|
---|
995 | block +=line_size;\
|
---|
996 | a= LD32(pixels );\
|
---|
997 | b= LD32(pixels+1);\
|
---|
998 | l0= (a&0x03030303UL)\
|
---|
999 | + (b&0x03030303UL)\
|
---|
1000 | + 0x02020202UL;\
|
---|
1001 | h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
1002 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
1003 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
1004 | pixels+=line_size;\
|
---|
1005 | block +=line_size;\
|
---|
1006 | }\
|
---|
1007 | }\
|
---|
1008 | \
|
---|
1009 | static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
1010 | {\
|
---|
1011 | int j;\
|
---|
1012 | for(j=0; j<2; j++){\
|
---|
1013 | int i;\
|
---|
1014 | const uint32_t a= LD32(pixels );\
|
---|
1015 | const uint32_t b= LD32(pixels+1);\
|
---|
1016 | uint32_t l0= (a&0x03030303UL)\
|
---|
1017 | + (b&0x03030303UL)\
|
---|
1018 | + 0x02020202UL;\
|
---|
1019 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
1020 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
1021 | uint32_t l1,h1;\
|
---|
1022 | \
|
---|
1023 | pixels+=line_size;\
|
---|
1024 | for(i=0; i<h; i+=2){\
|
---|
1025 | uint32_t a= LD32(pixels );\
|
---|
1026 | uint32_t b= LD32(pixels+1);\
|
---|
1027 | l1= (a&0x03030303UL)\
|
---|
1028 | + (b&0x03030303UL);\
|
---|
1029 | h1= ((a&0xFCFCFCFCUL)>>2)\
|
---|
1030 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
1031 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
1032 | pixels+=line_size;\
|
---|
1033 | block +=line_size;\
|
---|
1034 | a= LD32(pixels );\
|
---|
1035 | b= LD32(pixels+1);\
|
---|
1036 | l0= (a&0x03030303UL)\
|
---|
1037 | + (b&0x03030303UL)\
|
---|
1038 | + 0x02020202UL;\
|
---|
1039 | h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
1040 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
1041 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
1042 | pixels+=line_size;\
|
---|
1043 | block +=line_size;\
|
---|
1044 | }\
|
---|
1045 | pixels+=4-line_size*(h+1);\
|
---|
1046 | block +=4-line_size*h;\
|
---|
1047 | }\
|
---|
1048 | }\
|
---|
1049 | \
|
---|
1050 | static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
---|
1051 | {\
|
---|
1052 | int j;\
|
---|
1053 | for(j=0; j<2; j++){\
|
---|
1054 | int i;\
|
---|
1055 | const uint32_t a= LD32(pixels );\
|
---|
1056 | const uint32_t b= LD32(pixels+1);\
|
---|
1057 | uint32_t l0= (a&0x03030303UL)\
|
---|
1058 | + (b&0x03030303UL)\
|
---|
1059 | + 0x01010101UL;\
|
---|
1060 | uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
1061 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
1062 | uint32_t l1,h1;\
|
---|
1063 | \
|
---|
1064 | pixels+=line_size;\
|
---|
1065 | for(i=0; i<h; i+=2){\
|
---|
1066 | uint32_t a= LD32(pixels );\
|
---|
1067 | uint32_t b= LD32(pixels+1);\
|
---|
1068 | l1= (a&0x03030303UL)\
|
---|
1069 | + (b&0x03030303UL);\
|
---|
1070 | h1= ((a&0xFCFCFCFCUL)>>2)\
|
---|
1071 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
1072 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
1073 | pixels+=line_size;\
|
---|
1074 | block +=line_size;\
|
---|
1075 | a= LD32(pixels );\
|
---|
1076 | b= LD32(pixels+1);\
|
---|
1077 | l0= (a&0x03030303UL)\
|
---|
1078 | + (b&0x03030303UL)\
|
---|
1079 | + 0x01010101UL;\
|
---|
1080 | h0= ((a&0xFCFCFCFCUL)>>2)\
|
---|
1081 | + ((b&0xFCFCFCFCUL)>>2);\
|
---|
1082 | OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
---|
1083 | pixels+=line_size;\
|
---|
1084 | block +=line_size;\
|
---|
1085 | }\
|
---|
1086 | pixels+=4-line_size*(h+1);\
|
---|
1087 | block +=4-line_size*h;\
|
---|
1088 | }\
|
---|
1089 | }\
|
---|
1090 | \
|
---|
1091 | CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
|
---|
1092 | CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
|
---|
1093 | CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
|
---|
1094 | CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
|
---|
1095 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
|
---|
1096 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
|
---|
1097 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
|
---|
1098 | CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
|
---|
1099 |
|
---|
1100 | #define op_avg(a, b) a = rnd_avg32(a, b)
|
---|
1101 | #endif
|
---|
1102 | #define op_put(a, b) a = b
|
---|
1103 |
|
---|
1104 | PIXOP2(avg, op_avg)
|
---|
1105 | PIXOP2(put, op_put)
|
---|
1106 | #undef op_avg
|
---|
1107 | #undef op_put
|
---|
1108 |
|
---|
1109 | #define avg2(a,b) ((a+b+1)>>1)
|
---|
1110 | #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
|
---|
1111 |
|
---|
1112 | static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
|
---|
1113 | put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
|
---|
1114 | }
|
---|
1115 |
|
---|
1116 | static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
|
---|
1117 | put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
|
---|
1118 | }
|
---|
1119 |
|
---|
1120 | static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
|
---|
1121 | {
|
---|
1122 | const int A=(16-x16)*(16-y16);
|
---|
1123 | const int B=( x16)*(16-y16);
|
---|
1124 | const int C=(16-x16)*( y16);
|
---|
1125 | const int D=( x16)*( y16);
|
---|
1126 | int i;
|
---|
1127 |
|
---|
1128 | for(i=0; i<h; i++)
|
---|
1129 | {
|
---|
1130 | dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
|
---|
1131 | dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
|
---|
1132 | dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
|
---|
1133 | dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
|
---|
1134 | dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
|
---|
1135 | dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
|
---|
1136 | dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
|
---|
1137 | dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
|
---|
1138 | dst+= stride;
|
---|
1139 | src+= stride;
|
---|
1140 | }
|
---|
1141 | }
|
---|
1142 |
|
---|
1143 | void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
|
---|
1144 | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
|
---|
1145 | {
|
---|
1146 | int y, vx, vy;
|
---|
1147 | const int s= 1<<shift;
|
---|
1148 |
|
---|
1149 | width--;
|
---|
1150 | height--;
|
---|
1151 |
|
---|
1152 | for(y=0; y<h; y++){
|
---|
1153 | int x;
|
---|
1154 |
|
---|
1155 | vx= ox;
|
---|
1156 | vy= oy;
|
---|
1157 | for(x=0; x<8; x++){ //XXX FIXME optimize
|
---|
1158 | int src_x, src_y, frac_x, frac_y, index;
|
---|
1159 |
|
---|
1160 | src_x= vx>>16;
|
---|
1161 | src_y= vy>>16;
|
---|
1162 | frac_x= src_x&(s-1);
|
---|
1163 | frac_y= src_y&(s-1);
|
---|
1164 | src_x>>=shift;
|
---|
1165 | src_y>>=shift;
|
---|
1166 |
|
---|
1167 | if((unsigned)src_x < width){
|
---|
1168 | if((unsigned)src_y < height){
|
---|
1169 | index= src_x + src_y*stride;
|
---|
1170 | dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
|
---|
1171 | + src[index +1]* frac_x )*(s-frac_y)
|
---|
1172 | + ( src[index+stride ]*(s-frac_x)
|
---|
1173 | + src[index+stride+1]* frac_x )* frac_y
|
---|
1174 | + r)>>(shift*2);
|
---|
1175 | }else{
|
---|
1176 | index= src_x + clip(src_y, 0, height)*stride;
|
---|
1177 | dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
|
---|
1178 | + src[index +1]* frac_x )*s
|
---|
1179 | + r)>>(shift*2);
|
---|
1180 | }
|
---|
1181 | }else{
|
---|
1182 | if((unsigned)src_y < height){
|
---|
1183 | index= clip(src_x, 0, width) + src_y*stride;
|
---|
1184 | dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
|
---|
1185 | + src[index+stride ]* frac_y )*s
|
---|
1186 | + r)>>(shift*2);
|
---|
1187 | }else{
|
---|
1188 | index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
|
---|
1189 | dst[y*stride + x]= src[index ];
|
---|
1190 | }
|
---|
1191 | }
|
---|
1192 |
|
---|
1193 | vx+= dxx;
|
---|
1194 | vy+= dyx;
|
---|
1195 | }
|
---|
1196 | ox += dxy;
|
---|
1197 | oy += dyy;
|
---|
1198 | }
|
---|
1199 | }
|
---|
1200 |
|
---|
1201 | static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1202 | switch(width){
|
---|
1203 | case 2: put_pixels2_c (dst, src, stride, height); break;
|
---|
1204 | case 4: put_pixels4_c (dst, src, stride, height); break;
|
---|
1205 | case 8: put_pixels8_c (dst, src, stride, height); break;
|
---|
1206 | case 16:put_pixels16_c(dst, src, stride, height); break;
|
---|
1207 | }
|
---|
1208 | }
|
---|
1209 |
|
---|
1210 | static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1211 | int i,j;
|
---|
1212 | for (i=0; i < height; i++) {
|
---|
1213 | for (j=0; j < width; j++) {
|
---|
1214 | dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
---|
1215 | }
|
---|
1216 | src += stride;
|
---|
1217 | dst += stride;
|
---|
1218 | }
|
---|
1219 | }
|
---|
1220 |
|
---|
1221 | static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1222 | int i,j;
|
---|
1223 | for (i=0; i < height; i++) {
|
---|
1224 | for (j=0; j < width; j++) {
|
---|
1225 | dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
---|
1226 | }
|
---|
1227 | src += stride;
|
---|
1228 | dst += stride;
|
---|
1229 | }
|
---|
1230 | }
|
---|
1231 |
|
---|
1232 | static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1233 | int i,j;
|
---|
1234 | for (i=0; i < height; i++) {
|
---|
1235 | for (j=0; j < width; j++) {
|
---|
1236 | dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
---|
1237 | }
|
---|
1238 | src += stride;
|
---|
1239 | dst += stride;
|
---|
1240 | }
|
---|
1241 | }
|
---|
1242 |
|
---|
1243 | static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1244 | int i,j;
|
---|
1245 | for (i=0; i < height; i++) {
|
---|
1246 | for (j=0; j < width; j++) {
|
---|
1247 | dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
---|
1248 | }
|
---|
1249 | src += stride;
|
---|
1250 | dst += stride;
|
---|
1251 | }
|
---|
1252 | }
|
---|
1253 |
|
---|
1254 | static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1255 | int i,j;
|
---|
1256 | for (i=0; i < height; i++) {
|
---|
1257 | for (j=0; j < width; j++) {
|
---|
1258 | dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
---|
1259 | }
|
---|
1260 | src += stride;
|
---|
1261 | dst += stride;
|
---|
1262 | }
|
---|
1263 | }
|
---|
1264 |
|
---|
1265 | static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1266 | int i,j;
|
---|
1267 | for (i=0; i < height; i++) {
|
---|
1268 | for (j=0; j < width; j++) {
|
---|
1269 | dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
---|
1270 | }
|
---|
1271 | src += stride;
|
---|
1272 | dst += stride;
|
---|
1273 | }
|
---|
1274 | }
|
---|
1275 |
|
---|
1276 | static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1277 | int i,j;
|
---|
1278 | for (i=0; i < height; i++) {
|
---|
1279 | for (j=0; j < width; j++) {
|
---|
1280 | dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
---|
1281 | }
|
---|
1282 | src += stride;
|
---|
1283 | dst += stride;
|
---|
1284 | }
|
---|
1285 | }
|
---|
1286 |
|
---|
1287 | static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1288 | int i,j;
|
---|
1289 | for (i=0; i < height; i++) {
|
---|
1290 | for (j=0; j < width; j++) {
|
---|
1291 | dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
---|
1292 | }
|
---|
1293 | src += stride;
|
---|
1294 | dst += stride;
|
---|
1295 | }
|
---|
1296 | }
|
---|
1297 |
|
---|
1298 | static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1299 | switch(width){
|
---|
1300 | case 2: avg_pixels2_c (dst, src, stride, height); break;
|
---|
1301 | case 4: avg_pixels4_c (dst, src, stride, height); break;
|
---|
1302 | case 8: avg_pixels8_c (dst, src, stride, height); break;
|
---|
1303 | case 16:avg_pixels16_c(dst, src, stride, height); break;
|
---|
1304 | }
|
---|
1305 | }
|
---|
1306 |
|
---|
1307 | static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1308 | int i,j;
|
---|
1309 | for (i=0; i < height; i++) {
|
---|
1310 | for (j=0; j < width; j++) {
|
---|
1311 | dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
|
---|
1312 | }
|
---|
1313 | src += stride;
|
---|
1314 | dst += stride;
|
---|
1315 | }
|
---|
1316 | }
|
---|
1317 |
|
---|
1318 | static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1319 | int i,j;
|
---|
1320 | for (i=0; i < height; i++) {
|
---|
1321 | for (j=0; j < width; j++) {
|
---|
1322 | dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
|
---|
1323 | }
|
---|
1324 | src += stride;
|
---|
1325 | dst += stride;
|
---|
1326 | }
|
---|
1327 | }
|
---|
1328 |
|
---|
1329 | static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1330 | int i,j;
|
---|
1331 | for (i=0; i < height; i++) {
|
---|
1332 | for (j=0; j < width; j++) {
|
---|
1333 | dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
|
---|
1334 | }
|
---|
1335 | src += stride;
|
---|
1336 | dst += stride;
|
---|
1337 | }
|
---|
1338 | }
|
---|
1339 |
|
---|
1340 | static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1341 | int i,j;
|
---|
1342 | for (i=0; i < height; i++) {
|
---|
1343 | for (j=0; j < width; j++) {
|
---|
1344 | dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
---|
1345 | }
|
---|
1346 | src += stride;
|
---|
1347 | dst += stride;
|
---|
1348 | }
|
---|
1349 | }
|
---|
1350 |
|
---|
1351 | static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1352 | int i,j;
|
---|
1353 | for (i=0; i < height; i++) {
|
---|
1354 | for (j=0; j < width; j++) {
|
---|
1355 | dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
---|
1356 | }
|
---|
1357 | src += stride;
|
---|
1358 | dst += stride;
|
---|
1359 | }
|
---|
1360 | }
|
---|
1361 |
|
---|
1362 | static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1363 | int i,j;
|
---|
1364 | for (i=0; i < height; i++) {
|
---|
1365 | for (j=0; j < width; j++) {
|
---|
1366 | dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
|
---|
1367 | }
|
---|
1368 | src += stride;
|
---|
1369 | dst += stride;
|
---|
1370 | }
|
---|
1371 | }
|
---|
1372 |
|
---|
1373 | static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1374 | int i,j;
|
---|
1375 | for (i=0; i < height; i++) {
|
---|
1376 | for (j=0; j < width; j++) {
|
---|
1377 | dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
---|
1378 | }
|
---|
1379 | src += stride;
|
---|
1380 | dst += stride;
|
---|
1381 | }
|
---|
1382 | }
|
---|
1383 |
|
---|
1384 | static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
---|
1385 | int i,j;
|
---|
1386 | for (i=0; i < height; i++) {
|
---|
1387 | for (j=0; j < width; j++) {
|
---|
1388 | dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
---|
1389 | }
|
---|
1390 | src += stride;
|
---|
1391 | dst += stride;
|
---|
1392 | }
|
---|
1393 | }
|
---|
1394 | #if 0
|
---|
1395 | #define TPEL_WIDTH(width)\
|
---|
1396 | static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1397 | void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
|
---|
1398 | static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1399 | void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
|
---|
1400 | static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1401 | void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
|
---|
1402 | static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1403 | void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
|
---|
1404 | static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1405 | void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
|
---|
1406 | static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1407 | void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
|
---|
1408 | static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1409 | void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
|
---|
1410 | static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1411 | void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
|
---|
1412 | static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
---|
1413 | void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
|
---|
1414 | #endif
|
---|
1415 |
|
---|
1416 | #define H264_CHROMA_MC(OPNAME, OP)\
|
---|
1417 | static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
---|
1418 | const int A=(8-x)*(8-y);\
|
---|
1419 | const int B=( x)*(8-y);\
|
---|
1420 | const int C=(8-x)*( y);\
|
---|
1421 | const int D=( x)*( y);\
|
---|
1422 | int i;\
|
---|
1423 | \
|
---|
1424 | assert(x<8 && y<8 && x>=0 && y>=0);\
|
---|
1425 | \
|
---|
1426 | for(i=0; i<h; i++)\
|
---|
1427 | {\
|
---|
1428 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
---|
1429 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
---|
1430 | dst+= stride;\
|
---|
1431 | src+= stride;\
|
---|
1432 | }\
|
---|
1433 | }\
|
---|
1434 | \
|
---|
1435 | static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
---|
1436 | const int A=(8-x)*(8-y);\
|
---|
1437 | const int B=( x)*(8-y);\
|
---|
1438 | const int C=(8-x)*( y);\
|
---|
1439 | const int D=( x)*( y);\
|
---|
1440 | int i;\
|
---|
1441 | \
|
---|
1442 | assert(x<8 && y<8 && x>=0 && y>=0);\
|
---|
1443 | \
|
---|
1444 | for(i=0; i<h; i++)\
|
---|
1445 | {\
|
---|
1446 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
---|
1447 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
---|
1448 | OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
|
---|
1449 | OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
|
---|
1450 | dst+= stride;\
|
---|
1451 | src+= stride;\
|
---|
1452 | }\
|
---|
1453 | }\
|
---|
1454 | \
|
---|
1455 | static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
---|
1456 | const int A=(8-x)*(8-y);\
|
---|
1457 | const int B=( x)*(8-y);\
|
---|
1458 | const int C=(8-x)*( y);\
|
---|
1459 | const int D=( x)*( y);\
|
---|
1460 | int i;\
|
---|
1461 | \
|
---|
1462 | assert(x<8 && y<8 && x>=0 && y>=0);\
|
---|
1463 | \
|
---|
1464 | for(i=0; i<h; i++)\
|
---|
1465 | {\
|
---|
1466 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
---|
1467 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
---|
1468 | OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
|
---|
1469 | OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
|
---|
1470 | OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
|
---|
1471 | OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
|
---|
1472 | OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
|
---|
1473 | OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
|
---|
1474 | dst+= stride;\
|
---|
1475 | src+= stride;\
|
---|
1476 | }\
|
---|
1477 | }
|
---|
1478 |
|
---|
1479 | #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
|
---|
1480 | #define op_put(a, b) a = (((b) + 32)>>6)
|
---|
1481 |
|
---|
1482 | H264_CHROMA_MC(put_ , op_put)
|
---|
1483 | H264_CHROMA_MC(avg_ , op_avg)
|
---|
1484 | #undef op_avg
|
---|
1485 | #undef op_put
|
---|
1486 |
|
---|
1487 | static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
---|
1488 | {
|
---|
1489 | int i;
|
---|
1490 | for(i=0; i<h; i++)
|
---|
1491 | {
|
---|
1492 | ST16(dst , LD16(src ));
|
---|
1493 | dst+=dstStride;
|
---|
1494 | src+=srcStride;
|
---|
1495 | }
|
---|
1496 | }
|
---|
1497 |
|
---|
1498 | static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
---|
1499 | {
|
---|
1500 | int i;
|
---|
1501 | for(i=0; i<h; i++)
|
---|
1502 | {
|
---|
1503 | ST32(dst , LD32(src ));
|
---|
1504 | dst+=dstStride;
|
---|
1505 | src+=srcStride;
|
---|
1506 | }
|
---|
1507 | }
|
---|
1508 |
|
---|
1509 | static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
---|
1510 | {
|
---|
1511 | int i;
|
---|
1512 | for(i=0; i<h; i++)
|
---|
1513 | {
|
---|
1514 | ST32(dst , LD32(src ));
|
---|
1515 | ST32(dst+4 , LD32(src+4 ));
|
---|
1516 | dst+=dstStride;
|
---|
1517 | src+=srcStride;
|
---|
1518 | }
|
---|
1519 | }
|
---|
1520 |
|
---|
1521 | static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
---|
1522 | {
|
---|
1523 | int i;
|
---|
1524 | for(i=0; i<h; i++)
|
---|
1525 | {
|
---|
1526 | ST32(dst , LD32(src ));
|
---|
1527 | ST32(dst+4 , LD32(src+4 ));
|
---|
1528 | ST32(dst+8 , LD32(src+8 ));
|
---|
1529 | ST32(dst+12, LD32(src+12));
|
---|
1530 | dst+=dstStride;
|
---|
1531 | src+=srcStride;
|
---|
1532 | }
|
---|
1533 | }
|
---|
1534 |
|
---|
1535 | static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
---|
1536 | {
|
---|
1537 | int i;
|
---|
1538 | for(i=0; i<h; i++)
|
---|
1539 | {
|
---|
1540 | ST32(dst , LD32(src ));
|
---|
1541 | ST32(dst+4 , LD32(src+4 ));
|
---|
1542 | ST32(dst+8 , LD32(src+8 ));
|
---|
1543 | ST32(dst+12, LD32(src+12));
|
---|
1544 | dst[16]= src[16];
|
---|
1545 | dst+=dstStride;
|
---|
1546 | src+=srcStride;
|
---|
1547 | }
|
---|
1548 | }
|
---|
1549 |
|
---|
1550 | static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
---|
1551 | {
|
---|
1552 | int i;
|
---|
1553 | for(i=0; i<h; i++)
|
---|
1554 | {
|
---|
1555 | ST32(dst , LD32(src ));
|
---|
1556 | ST32(dst+4 , LD32(src+4 ));
|
---|
1557 | dst[8]= src[8];
|
---|
1558 | dst+=dstStride;
|
---|
1559 | src+=srcStride;
|
---|
1560 | }
|
---|
1561 | }
|
---|
1562 |
|
---|
1563 |
|
---|
1564 | #define QPEL_MC(r, OPNAME, RND, OP) \
|
---|
1565 | static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
|
---|
1566 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
1567 | int i;\
|
---|
1568 | for(i=0; i<h; i++)\
|
---|
1569 | {\
|
---|
1570 | OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
|
---|
1571 | OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
|
---|
1572 | OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
|
---|
1573 | OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
|
---|
1574 | OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
|
---|
1575 | OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
|
---|
1576 | OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
|
---|
1577 | OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
|
---|
1578 | dst+=dstStride;\
|
---|
1579 | src+=srcStride;\
|
---|
1580 | }\
|
---|
1581 | }\
|
---|
1582 | \
|
---|
1583 | static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
1584 | const int w=8;\
|
---|
1585 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
1586 | int i;\
|
---|
1587 | for(i=0; i<w; i++)\
|
---|
1588 | {\
|
---|
1589 | const int src0= src[0*srcStride];\
|
---|
1590 | const int src1= src[1*srcStride];\
|
---|
1591 | const int src2= src[2*srcStride];\
|
---|
1592 | const int src3= src[3*srcStride];\
|
---|
1593 | const int src4= src[4*srcStride];\
|
---|
1594 | const int src5= src[5*srcStride];\
|
---|
1595 | const int src6= src[6*srcStride];\
|
---|
1596 | const int src7= src[7*srcStride];\
|
---|
1597 | const int src8= src[8*srcStride];\
|
---|
1598 | OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
|
---|
1599 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
|
---|
1600 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
|
---|
1601 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
|
---|
1602 | OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
|
---|
1603 | OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
|
---|
1604 | OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
|
---|
1605 | OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
|
---|
1606 | dst++;\
|
---|
1607 | src++;\
|
---|
1608 | }\
|
---|
1609 | }\
|
---|
1610 | \
|
---|
1611 | static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
|
---|
1612 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
1613 | int i;\
|
---|
1614 | \
|
---|
1615 | for(i=0; i<h; i++)\
|
---|
1616 | {\
|
---|
1617 | OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
|
---|
1618 | OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
|
---|
1619 | OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
|
---|
1620 | OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
|
---|
1621 | OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
|
---|
1622 | OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
|
---|
1623 | OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
|
---|
1624 | OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
|
---|
1625 | OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
|
---|
1626 | OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
|
---|
1627 | OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
|
---|
1628 | OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
|
---|
1629 | OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
|
---|
1630 | OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
|
---|
1631 | OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
|
---|
1632 | OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
|
---|
1633 | dst+=dstStride;\
|
---|
1634 | src+=srcStride;\
|
---|
1635 | }\
|
---|
1636 | }\
|
---|
1637 | \
|
---|
1638 | static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
1639 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
1640 | int i;\
|
---|
1641 | const int w=16;\
|
---|
1642 | for(i=0; i<w; i++)\
|
---|
1643 | {\
|
---|
1644 | const int src0= src[0*srcStride];\
|
---|
1645 | const int src1= src[1*srcStride];\
|
---|
1646 | const int src2= src[2*srcStride];\
|
---|
1647 | const int src3= src[3*srcStride];\
|
---|
1648 | const int src4= src[4*srcStride];\
|
---|
1649 | const int src5= src[5*srcStride];\
|
---|
1650 | const int src6= src[6*srcStride];\
|
---|
1651 | const int src7= src[7*srcStride];\
|
---|
1652 | const int src8= src[8*srcStride];\
|
---|
1653 | const int src9= src[9*srcStride];\
|
---|
1654 | const int src10= src[10*srcStride];\
|
---|
1655 | const int src11= src[11*srcStride];\
|
---|
1656 | const int src12= src[12*srcStride];\
|
---|
1657 | const int src13= src[13*srcStride];\
|
---|
1658 | const int src14= src[14*srcStride];\
|
---|
1659 | const int src15= src[15*srcStride];\
|
---|
1660 | const int src16= src[16*srcStride];\
|
---|
1661 | OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
|
---|
1662 | OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
|
---|
1663 | OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
|
---|
1664 | OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
|
---|
1665 | OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
|
---|
1666 | OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
|
---|
1667 | OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
|
---|
1668 | OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
|
---|
1669 | OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
|
---|
1670 | OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
|
---|
1671 | OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
|
---|
1672 | OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
|
---|
1673 | OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
|
---|
1674 | OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
|
---|
1675 | OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
|
---|
1676 | OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
|
---|
1677 | dst++;\
|
---|
1678 | src++;\
|
---|
1679 | }\
|
---|
1680 | }\
|
---|
1681 | \
|
---|
1682 | static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
|
---|
1683 | OPNAME ## pixels8_c(dst, src, stride, 8);\
|
---|
1684 | }\
|
---|
1685 | \
|
---|
1686 | static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1687 | uint8_t half[64];\
|
---|
1688 | put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
|
---|
1689 | OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
|
---|
1690 | }\
|
---|
1691 | \
|
---|
1692 | static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1693 | OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
|
---|
1694 | }\
|
---|
1695 | \
|
---|
1696 | static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1697 | uint8_t half[64];\
|
---|
1698 | put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
|
---|
1699 | OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
|
---|
1700 | }\
|
---|
1701 | \
|
---|
1702 | static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1703 | uint8_t full[16*9];\
|
---|
1704 | uint8_t half[64];\
|
---|
1705 | copy_block9(full, src, 16, stride, 9);\
|
---|
1706 | put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
|
---|
1707 | OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
|
---|
1708 | }\
|
---|
1709 | \
|
---|
1710 | static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1711 | uint8_t full[16*9];\
|
---|
1712 | copy_block9(full, src, 16, stride, 9);\
|
---|
1713 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
|
---|
1714 | }\
|
---|
1715 | \
|
---|
1716 | static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1717 | uint8_t full[16*9];\
|
---|
1718 | uint8_t half[64];\
|
---|
1719 | copy_block9(full, src, 16, stride, 9);\
|
---|
1720 | put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
|
---|
1721 | OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
|
---|
1722 | }\
|
---|
1723 | void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1724 | uint8_t full[16*9];\
|
---|
1725 | uint8_t halfH[72];\
|
---|
1726 | uint8_t halfV[64];\
|
---|
1727 | uint8_t halfHV[64];\
|
---|
1728 | copy_block9(full, src, 16, stride, 9);\
|
---|
1729 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1730 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
---|
1731 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1732 | OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
---|
1733 | }\
|
---|
1734 | static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1735 | uint8_t full[16*9];\
|
---|
1736 | uint8_t halfH[72];\
|
---|
1737 | uint8_t halfHV[64];\
|
---|
1738 | copy_block9(full, src, 16, stride, 9);\
|
---|
1739 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1740 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
|
---|
1741 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1742 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
|
---|
1743 | }\
|
---|
1744 | void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1745 | uint8_t full[16*9];\
|
---|
1746 | uint8_t halfH[72];\
|
---|
1747 | uint8_t halfV[64];\
|
---|
1748 | uint8_t halfHV[64];\
|
---|
1749 | copy_block9(full, src, 16, stride, 9);\
|
---|
1750 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1751 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
---|
1752 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1753 | OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
---|
1754 | }\
|
---|
1755 | static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1756 | uint8_t full[16*9];\
|
---|
1757 | uint8_t halfH[72];\
|
---|
1758 | uint8_t halfHV[64];\
|
---|
1759 | copy_block9(full, src, 16, stride, 9);\
|
---|
1760 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1761 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
|
---|
1762 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1763 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
|
---|
1764 | }\
|
---|
1765 | void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1766 | uint8_t full[16*9];\
|
---|
1767 | uint8_t halfH[72];\
|
---|
1768 | uint8_t halfV[64];\
|
---|
1769 | uint8_t halfHV[64];\
|
---|
1770 | copy_block9(full, src, 16, stride, 9);\
|
---|
1771 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1772 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
---|
1773 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1774 | OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
---|
1775 | }\
|
---|
1776 | static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1777 | uint8_t full[16*9];\
|
---|
1778 | uint8_t halfH[72];\
|
---|
1779 | uint8_t halfHV[64];\
|
---|
1780 | copy_block9(full, src, 16, stride, 9);\
|
---|
1781 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1782 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
|
---|
1783 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1784 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
---|
1785 | }\
|
---|
1786 | void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1787 | uint8_t full[16*9];\
|
---|
1788 | uint8_t halfH[72];\
|
---|
1789 | uint8_t halfV[64];\
|
---|
1790 | uint8_t halfHV[64];\
|
---|
1791 | copy_block9(full, src, 16, stride, 9);\
|
---|
1792 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
|
---|
1793 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
---|
1794 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1795 | OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
---|
1796 | }\
|
---|
1797 | static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1798 | uint8_t full[16*9];\
|
---|
1799 | uint8_t halfH[72];\
|
---|
1800 | uint8_t halfHV[64];\
|
---|
1801 | copy_block9(full, src, 16, stride, 9);\
|
---|
1802 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1803 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
|
---|
1804 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1805 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
---|
1806 | }\
|
---|
1807 | static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1808 | uint8_t halfH[72];\
|
---|
1809 | uint8_t halfHV[64];\
|
---|
1810 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
---|
1811 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1812 | OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
|
---|
1813 | }\
|
---|
1814 | static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1815 | uint8_t halfH[72];\
|
---|
1816 | uint8_t halfHV[64];\
|
---|
1817 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
---|
1818 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1819 | OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
---|
1820 | }\
|
---|
1821 | void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1822 | uint8_t full[16*9];\
|
---|
1823 | uint8_t halfH[72];\
|
---|
1824 | uint8_t halfV[64];\
|
---|
1825 | uint8_t halfHV[64];\
|
---|
1826 | copy_block9(full, src, 16, stride, 9);\
|
---|
1827 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1828 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
---|
1829 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1830 | OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
|
---|
1831 | }\
|
---|
1832 | static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1833 | uint8_t full[16*9];\
|
---|
1834 | uint8_t halfH[72];\
|
---|
1835 | copy_block9(full, src, 16, stride, 9);\
|
---|
1836 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1837 | put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
|
---|
1838 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
---|
1839 | }\
|
---|
1840 | void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1841 | uint8_t full[16*9];\
|
---|
1842 | uint8_t halfH[72];\
|
---|
1843 | uint8_t halfV[64];\
|
---|
1844 | uint8_t halfHV[64];\
|
---|
1845 | copy_block9(full, src, 16, stride, 9);\
|
---|
1846 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1847 | put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
---|
1848 | put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
---|
1849 | OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
|
---|
1850 | }\
|
---|
1851 | static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1852 | uint8_t full[16*9];\
|
---|
1853 | uint8_t halfH[72];\
|
---|
1854 | copy_block9(full, src, 16, stride, 9);\
|
---|
1855 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
---|
1856 | put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
|
---|
1857 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
---|
1858 | }\
|
---|
1859 | static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1860 | uint8_t halfH[72];\
|
---|
1861 | put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
---|
1862 | OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
---|
1863 | }\
|
---|
1864 | static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
|
---|
1865 | OPNAME ## pixels16_c(dst, src, stride, 16);\
|
---|
1866 | }\
|
---|
1867 | \
|
---|
1868 | static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1869 | uint8_t half[256];\
|
---|
1870 | put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
|
---|
1871 | OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
|
---|
1872 | }\
|
---|
1873 | \
|
---|
1874 | static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1875 | OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
|
---|
1876 | }\
|
---|
1877 | \
|
---|
1878 | static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1879 | uint8_t half[256];\
|
---|
1880 | put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
|
---|
1881 | OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
|
---|
1882 | }\
|
---|
1883 | \
|
---|
1884 | static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1885 | uint8_t full[24*17];\
|
---|
1886 | uint8_t half[256];\
|
---|
1887 | copy_block17(full, src, 24, stride, 17);\
|
---|
1888 | put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
|
---|
1889 | OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
|
---|
1890 | }\
|
---|
1891 | \
|
---|
1892 | static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1893 | uint8_t full[24*17];\
|
---|
1894 | copy_block17(full, src, 24, stride, 17);\
|
---|
1895 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
|
---|
1896 | }\
|
---|
1897 | \
|
---|
1898 | static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1899 | uint8_t full[24*17];\
|
---|
1900 | uint8_t half[256];\
|
---|
1901 | copy_block17(full, src, 24, stride, 17);\
|
---|
1902 | put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
|
---|
1903 | OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
|
---|
1904 | }\
|
---|
1905 | void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1906 | uint8_t full[24*17];\
|
---|
1907 | uint8_t halfH[272];\
|
---|
1908 | uint8_t halfV[256];\
|
---|
1909 | uint8_t halfHV[256];\
|
---|
1910 | copy_block17(full, src, 24, stride, 17);\
|
---|
1911 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
1912 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
---|
1913 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1914 | OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
---|
1915 | }\
|
---|
1916 | static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1917 | uint8_t full[24*17];\
|
---|
1918 | uint8_t halfH[272];\
|
---|
1919 | uint8_t halfHV[256];\
|
---|
1920 | copy_block17(full, src, 24, stride, 17);\
|
---|
1921 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
1922 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
|
---|
1923 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1924 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
|
---|
1925 | }\
|
---|
1926 | void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1927 | uint8_t full[24*17];\
|
---|
1928 | uint8_t halfH[272];\
|
---|
1929 | uint8_t halfV[256];\
|
---|
1930 | uint8_t halfHV[256];\
|
---|
1931 | copy_block17(full, src, 24, stride, 17);\
|
---|
1932 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
1933 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
---|
1934 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1935 | OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
---|
1936 | }\
|
---|
1937 | static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1938 | uint8_t full[24*17];\
|
---|
1939 | uint8_t halfH[272];\
|
---|
1940 | uint8_t halfHV[256];\
|
---|
1941 | copy_block17(full, src, 24, stride, 17);\
|
---|
1942 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
1943 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
|
---|
1944 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1945 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
|
---|
1946 | }\
|
---|
1947 | void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1948 | uint8_t full[24*17];\
|
---|
1949 | uint8_t halfH[272];\
|
---|
1950 | uint8_t halfV[256];\
|
---|
1951 | uint8_t halfHV[256];\
|
---|
1952 | copy_block17(full, src, 24, stride, 17);\
|
---|
1953 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
1954 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
---|
1955 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1956 | OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
---|
1957 | }\
|
---|
1958 | static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1959 | uint8_t full[24*17];\
|
---|
1960 | uint8_t halfH[272];\
|
---|
1961 | uint8_t halfHV[256];\
|
---|
1962 | copy_block17(full, src, 24, stride, 17);\
|
---|
1963 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
1964 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
|
---|
1965 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1966 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
---|
1967 | }\
|
---|
1968 | void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1969 | uint8_t full[24*17];\
|
---|
1970 | uint8_t halfH[272];\
|
---|
1971 | uint8_t halfV[256];\
|
---|
1972 | uint8_t halfHV[256];\
|
---|
1973 | copy_block17(full, src, 24, stride, 17);\
|
---|
1974 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
|
---|
1975 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
---|
1976 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1977 | OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
---|
1978 | }\
|
---|
1979 | static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1980 | uint8_t full[24*17];\
|
---|
1981 | uint8_t halfH[272];\
|
---|
1982 | uint8_t halfHV[256];\
|
---|
1983 | copy_block17(full, src, 24, stride, 17);\
|
---|
1984 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
1985 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
|
---|
1986 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1987 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
---|
1988 | }\
|
---|
1989 | static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1990 | uint8_t halfH[272];\
|
---|
1991 | uint8_t halfHV[256];\
|
---|
1992 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
---|
1993 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
1994 | OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
|
---|
1995 | }\
|
---|
1996 | static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
1997 | uint8_t halfH[272];\
|
---|
1998 | uint8_t halfHV[256];\
|
---|
1999 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
---|
2000 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
2001 | OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
---|
2002 | }\
|
---|
2003 | void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2004 | uint8_t full[24*17];\
|
---|
2005 | uint8_t halfH[272];\
|
---|
2006 | uint8_t halfV[256];\
|
---|
2007 | uint8_t halfHV[256];\
|
---|
2008 | copy_block17(full, src, 24, stride, 17);\
|
---|
2009 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
2010 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
---|
2011 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
2012 | OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
|
---|
2013 | }\
|
---|
2014 | static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2015 | uint8_t full[24*17];\
|
---|
2016 | uint8_t halfH[272];\
|
---|
2017 | copy_block17(full, src, 24, stride, 17);\
|
---|
2018 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
2019 | put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
|
---|
2020 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
---|
2021 | }\
|
---|
2022 | void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2023 | uint8_t full[24*17];\
|
---|
2024 | uint8_t halfH[272];\
|
---|
2025 | uint8_t halfV[256];\
|
---|
2026 | uint8_t halfHV[256];\
|
---|
2027 | copy_block17(full, src, 24, stride, 17);\
|
---|
2028 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
2029 | put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
---|
2030 | put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
---|
2031 | OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
|
---|
2032 | }\
|
---|
2033 | static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2034 | uint8_t full[24*17];\
|
---|
2035 | uint8_t halfH[272];\
|
---|
2036 | copy_block17(full, src, 24, stride, 17);\
|
---|
2037 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
---|
2038 | put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
|
---|
2039 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
---|
2040 | }\
|
---|
2041 | static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2042 | uint8_t halfH[272];\
|
---|
2043 | put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
---|
2044 | OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
---|
2045 | }
|
---|
2046 |
|
---|
2047 | #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
|
---|
2048 | #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
|
---|
2049 | #define op_put(a, b) a = cm[((b) + 16)>>5]
|
---|
2050 | #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
|
---|
2051 |
|
---|
2052 | QPEL_MC(0, put_ , _ , op_put)
|
---|
2053 | QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
|
---|
2054 | QPEL_MC(0, avg_ , _ , op_avg)
|
---|
2055 | //QPEL_MC(1, avg_no_rnd , _ , op_avg)
|
---|
2056 | #undef op_avg
|
---|
2057 | #undef op_avg_no_rnd
|
---|
2058 | #undef op_put
|
---|
2059 | #undef op_put_no_rnd
|
---|
2060 |
|
---|
2061 | #if 1
|
---|
2062 | #define H264_LOWPASS(OPNAME, OP, OP2) \
|
---|
2063 | static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
2064 | const int h=2;\
|
---|
2065 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2066 | int i;\
|
---|
2067 | for(i=0; i<h; i++)\
|
---|
2068 | {\
|
---|
2069 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
|
---|
2070 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
|
---|
2071 | dst+=dstStride;\
|
---|
2072 | src+=srcStride;\
|
---|
2073 | }\
|
---|
2074 | }\
|
---|
2075 | \
|
---|
2076 | static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
2077 | const int w=2;\
|
---|
2078 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2079 | int i;\
|
---|
2080 | for(i=0; i<w; i++)\
|
---|
2081 | {\
|
---|
2082 | const int srcB= src[-2*srcStride];\
|
---|
2083 | const int srcA= src[-1*srcStride];\
|
---|
2084 | const int src0= src[0 *srcStride];\
|
---|
2085 | const int src1= src[1 *srcStride];\
|
---|
2086 | const int src2= src[2 *srcStride];\
|
---|
2087 | const int src3= src[3 *srcStride];\
|
---|
2088 | const int src4= src[4 *srcStride];\
|
---|
2089 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
|
---|
2090 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
|
---|
2091 | dst++;\
|
---|
2092 | src++;\
|
---|
2093 | }\
|
---|
2094 | }\
|
---|
2095 | \
|
---|
2096 | static void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
|
---|
2097 | const int h=2;\
|
---|
2098 | const int w=2;\
|
---|
2099 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2100 | int i;\
|
---|
2101 | src -= 2*srcStride;\
|
---|
2102 | for(i=0; i<h+5; i++)\
|
---|
2103 | {\
|
---|
2104 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
|
---|
2105 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
|
---|
2106 | tmp+=tmpStride;\
|
---|
2107 | src+=srcStride;\
|
---|
2108 | }\
|
---|
2109 | tmp -= tmpStride*(h+5-2);\
|
---|
2110 | for(i=0; i<w; i++)\
|
---|
2111 | {\
|
---|
2112 | const int tmpB= tmp[-2*tmpStride];\
|
---|
2113 | const int tmpA= tmp[-1*tmpStride];\
|
---|
2114 | const int tmp0= tmp[0 *tmpStride];\
|
---|
2115 | const int tmp1= tmp[1 *tmpStride];\
|
---|
2116 | const int tmp2= tmp[2 *tmpStride];\
|
---|
2117 | const int tmp3= tmp[3 *tmpStride];\
|
---|
2118 | const int tmp4= tmp[4 *tmpStride];\
|
---|
2119 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
|
---|
2120 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
|
---|
2121 | dst++;\
|
---|
2122 | tmp++;\
|
---|
2123 | }\
|
---|
2124 | }\
|
---|
2125 | static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
2126 | const int h=4;\
|
---|
2127 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2128 | int i;\
|
---|
2129 | for(i=0; i<h; i++)\
|
---|
2130 | {\
|
---|
2131 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
|
---|
2132 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
|
---|
2133 | OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
|
---|
2134 | OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
|
---|
2135 | dst+=dstStride;\
|
---|
2136 | src+=srcStride;\
|
---|
2137 | }\
|
---|
2138 | }\
|
---|
2139 | \
|
---|
2140 | static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
2141 | const int w=4;\
|
---|
2142 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2143 | int i;\
|
---|
2144 | for(i=0; i<w; i++)\
|
---|
2145 | {\
|
---|
2146 | const int srcB= src[-2*srcStride];\
|
---|
2147 | const int srcA= src[-1*srcStride];\
|
---|
2148 | const int src0= src[0 *srcStride];\
|
---|
2149 | const int src1= src[1 *srcStride];\
|
---|
2150 | const int src2= src[2 *srcStride];\
|
---|
2151 | const int src3= src[3 *srcStride];\
|
---|
2152 | const int src4= src[4 *srcStride];\
|
---|
2153 | const int src5= src[5 *srcStride];\
|
---|
2154 | const int src6= src[6 *srcStride];\
|
---|
2155 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
|
---|
2156 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
|
---|
2157 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
|
---|
2158 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
|
---|
2159 | dst++;\
|
---|
2160 | src++;\
|
---|
2161 | }\
|
---|
2162 | }\
|
---|
2163 | \
|
---|
2164 | static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
|
---|
2165 | const int h=4;\
|
---|
2166 | const int w=4;\
|
---|
2167 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2168 | int i;\
|
---|
2169 | src -= 2*srcStride;\
|
---|
2170 | for(i=0; i<h+5; i++)\
|
---|
2171 | {\
|
---|
2172 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
|
---|
2173 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
|
---|
2174 | tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
|
---|
2175 | tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
|
---|
2176 | tmp+=tmpStride;\
|
---|
2177 | src+=srcStride;\
|
---|
2178 | }\
|
---|
2179 | tmp -= tmpStride*(h+5-2);\
|
---|
2180 | for(i=0; i<w; i++)\
|
---|
2181 | {\
|
---|
2182 | const int tmpB= tmp[-2*tmpStride];\
|
---|
2183 | const int tmpA= tmp[-1*tmpStride];\
|
---|
2184 | const int tmp0= tmp[0 *tmpStride];\
|
---|
2185 | const int tmp1= tmp[1 *tmpStride];\
|
---|
2186 | const int tmp2= tmp[2 *tmpStride];\
|
---|
2187 | const int tmp3= tmp[3 *tmpStride];\
|
---|
2188 | const int tmp4= tmp[4 *tmpStride];\
|
---|
2189 | const int tmp5= tmp[5 *tmpStride];\
|
---|
2190 | const int tmp6= tmp[6 *tmpStride];\
|
---|
2191 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
|
---|
2192 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
|
---|
2193 | OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
|
---|
2194 | OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
|
---|
2195 | dst++;\
|
---|
2196 | tmp++;\
|
---|
2197 | }\
|
---|
2198 | }\
|
---|
2199 | \
|
---|
2200 | static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
2201 | const int h=8;\
|
---|
2202 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2203 | int i;\
|
---|
2204 | for(i=0; i<h; i++)\
|
---|
2205 | {\
|
---|
2206 | OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
|
---|
2207 | OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
|
---|
2208 | OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
|
---|
2209 | OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
|
---|
2210 | OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
|
---|
2211 | OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
|
---|
2212 | OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
|
---|
2213 | OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
|
---|
2214 | dst+=dstStride;\
|
---|
2215 | src+=srcStride;\
|
---|
2216 | }\
|
---|
2217 | }\
|
---|
2218 | \
|
---|
2219 | static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
2220 | const int w=8;\
|
---|
2221 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2222 | int i;\
|
---|
2223 | for(i=0; i<w; i++)\
|
---|
2224 | {\
|
---|
2225 | const int srcB= src[-2*srcStride];\
|
---|
2226 | const int srcA= src[-1*srcStride];\
|
---|
2227 | const int src0= src[0 *srcStride];\
|
---|
2228 | const int src1= src[1 *srcStride];\
|
---|
2229 | const int src2= src[2 *srcStride];\
|
---|
2230 | const int src3= src[3 *srcStride];\
|
---|
2231 | const int src4= src[4 *srcStride];\
|
---|
2232 | const int src5= src[5 *srcStride];\
|
---|
2233 | const int src6= src[6 *srcStride];\
|
---|
2234 | const int src7= src[7 *srcStride];\
|
---|
2235 | const int src8= src[8 *srcStride];\
|
---|
2236 | const int src9= src[9 *srcStride];\
|
---|
2237 | const int src10=src[10*srcStride];\
|
---|
2238 | OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
|
---|
2239 | OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
|
---|
2240 | OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
|
---|
2241 | OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
|
---|
2242 | OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
|
---|
2243 | OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
|
---|
2244 | OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
|
---|
2245 | OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
|
---|
2246 | dst++;\
|
---|
2247 | src++;\
|
---|
2248 | }\
|
---|
2249 | }\
|
---|
2250 | \
|
---|
2251 | static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
|
---|
2252 | const int h=8;\
|
---|
2253 | const int w=8;\
|
---|
2254 | uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
---|
2255 | int i;\
|
---|
2256 | src -= 2*srcStride;\
|
---|
2257 | for(i=0; i<h+5; i++)\
|
---|
2258 | {\
|
---|
2259 | tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
|
---|
2260 | tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
|
---|
2261 | tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
|
---|
2262 | tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
|
---|
2263 | tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
|
---|
2264 | tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
|
---|
2265 | tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
|
---|
2266 | tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
|
---|
2267 | tmp+=tmpStride;\
|
---|
2268 | src+=srcStride;\
|
---|
2269 | }\
|
---|
2270 | tmp -= tmpStride*(h+5-2);\
|
---|
2271 | for(i=0; i<w; i++)\
|
---|
2272 | {\
|
---|
2273 | const int tmpB= tmp[-2*tmpStride];\
|
---|
2274 | const int tmpA= tmp[-1*tmpStride];\
|
---|
2275 | const int tmp0= tmp[0 *tmpStride];\
|
---|
2276 | const int tmp1= tmp[1 *tmpStride];\
|
---|
2277 | const int tmp2= tmp[2 *tmpStride];\
|
---|
2278 | const int tmp3= tmp[3 *tmpStride];\
|
---|
2279 | const int tmp4= tmp[4 *tmpStride];\
|
---|
2280 | const int tmp5= tmp[5 *tmpStride];\
|
---|
2281 | const int tmp6= tmp[6 *tmpStride];\
|
---|
2282 | const int tmp7= tmp[7 *tmpStride];\
|
---|
2283 | const int tmp8= tmp[8 *tmpStride];\
|
---|
2284 | const int tmp9= tmp[9 *tmpStride];\
|
---|
2285 | const int tmp10=tmp[10*tmpStride];\
|
---|
2286 | OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
|
---|
2287 | OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
|
---|
2288 | OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
|
---|
2289 | OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
|
---|
2290 | OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
|
---|
2291 | OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
|
---|
2292 | OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
|
---|
2293 | OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
|
---|
2294 | dst++;\
|
---|
2295 | tmp++;\
|
---|
2296 | }\
|
---|
2297 | }\
|
---|
2298 | \
|
---|
2299 | static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
2300 | OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
|
---|
2301 | OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
|
---|
2302 | src += 8*srcStride;\
|
---|
2303 | dst += 8*dstStride;\
|
---|
2304 | OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
|
---|
2305 | OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
|
---|
2306 | }\
|
---|
2307 | \
|
---|
2308 | static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
---|
2309 | OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
|
---|
2310 | OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
|
---|
2311 | src += 8*srcStride;\
|
---|
2312 | dst += 8*dstStride;\
|
---|
2313 | OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
|
---|
2314 | OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
|
---|
2315 | }\
|
---|
2316 | \
|
---|
2317 | static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
|
---|
2318 | OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
|
---|
2319 | OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
|
---|
2320 | src += 8*srcStride;\
|
---|
2321 | dst += 8*dstStride;\
|
---|
2322 | OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
|
---|
2323 | OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
|
---|
2324 | }\
|
---|
2325 |
|
---|
2326 | #define H264_MC(OPNAME, SIZE) \
|
---|
2327 | static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
|
---|
2328 | OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
|
---|
2329 | }\
|
---|
2330 | \
|
---|
2331 | static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2332 | uint8_t half[SIZE*SIZE];\
|
---|
2333 | put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
|
---|
2334 | OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
|
---|
2335 | }\
|
---|
2336 | \
|
---|
2337 | static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2338 | OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
|
---|
2339 | }\
|
---|
2340 | \
|
---|
2341 | static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2342 | uint8_t half[SIZE*SIZE];\
|
---|
2343 | put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
|
---|
2344 | OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
|
---|
2345 | }\
|
---|
2346 | \
|
---|
2347 | static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2348 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2349 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2350 | uint8_t half[SIZE*SIZE];\
|
---|
2351 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
---|
2352 | put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
|
---|
2353 | OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
|
---|
2354 | }\
|
---|
2355 | \
|
---|
2356 | static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2357 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2358 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2359 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
---|
2360 | OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
|
---|
2361 | }\
|
---|
2362 | \
|
---|
2363 | static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2364 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2365 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2366 | uint8_t half[SIZE*SIZE];\
|
---|
2367 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
---|
2368 | put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
|
---|
2369 | OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
|
---|
2370 | }\
|
---|
2371 | \
|
---|
2372 | static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2373 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2374 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2375 | uint8_t halfH[SIZE*SIZE];\
|
---|
2376 | uint8_t halfV[SIZE*SIZE];\
|
---|
2377 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
|
---|
2378 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
---|
2379 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
---|
2380 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
|
---|
2381 | }\
|
---|
2382 | \
|
---|
2383 | static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2384 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2385 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2386 | uint8_t halfH[SIZE*SIZE];\
|
---|
2387 | uint8_t halfV[SIZE*SIZE];\
|
---|
2388 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
|
---|
2389 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
|
---|
2390 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
---|
2391 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
|
---|
2392 | }\
|
---|
2393 | \
|
---|
2394 | static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2395 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2396 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2397 | uint8_t halfH[SIZE*SIZE];\
|
---|
2398 | uint8_t halfV[SIZE*SIZE];\
|
---|
2399 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
|
---|
2400 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
---|
2401 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
---|
2402 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
|
---|
2403 | }\
|
---|
2404 | \
|
---|
2405 | static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2406 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2407 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2408 | uint8_t halfH[SIZE*SIZE];\
|
---|
2409 | uint8_t halfV[SIZE*SIZE];\
|
---|
2410 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
|
---|
2411 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
|
---|
2412 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
---|
2413 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
|
---|
2414 | }\
|
---|
2415 | \
|
---|
2416 | static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2417 | int16_t tmp[SIZE*(SIZE+5)];\
|
---|
2418 | OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
|
---|
2419 | }\
|
---|
2420 | \
|
---|
2421 | static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2422 | int16_t tmp[SIZE*(SIZE+5)];\
|
---|
2423 | uint8_t halfH[SIZE*SIZE];\
|
---|
2424 | uint8_t halfHV[SIZE*SIZE];\
|
---|
2425 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
|
---|
2426 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
|
---|
2427 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
|
---|
2428 | }\
|
---|
2429 | \
|
---|
2430 | static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2431 | int16_t tmp[SIZE*(SIZE+5)];\
|
---|
2432 | uint8_t halfH[SIZE*SIZE];\
|
---|
2433 | uint8_t halfHV[SIZE*SIZE];\
|
---|
2434 | put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
|
---|
2435 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
|
---|
2436 | OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
|
---|
2437 | }\
|
---|
2438 | \
|
---|
2439 | static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2440 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2441 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2442 | int16_t tmp[SIZE*(SIZE+5)];\
|
---|
2443 | uint8_t halfV[SIZE*SIZE];\
|
---|
2444 | uint8_t halfHV[SIZE*SIZE];\
|
---|
2445 | copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
---|
2446 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
---|
2447 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
|
---|
2448 | OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
|
---|
2449 | }\
|
---|
2450 | \
|
---|
2451 | static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
|
---|
2452 | uint8_t full[SIZE*(SIZE+5)];\
|
---|
2453 | uint8_t * const full_mid= full + SIZE*2;\
|
---|
2454 | int16_t tmp[SIZE*(SIZE+5)];\
|
---|
2455 | uint8_t halfV[SIZE*SIZE];\
|
---|
2456 | uint8_t halfHV[SIZE*SIZE];\
|
---|
2457 | copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
|
---|
2458 | put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
---|
2459 | put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
|
---|
2460 | OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
|
---|
2461 | }\
|
---|
2462 |
|
---|
2463 | #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
|
---|
2464 | //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
|
---|
2465 | #define op_put(a, b) a = cm[((b) + 16)>>5]
|
---|
2466 | #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
|
---|
2467 | #define op2_put(a, b) a = cm[((b) + 512)>>10]
|
---|
2468 |
|
---|
2469 | H264_LOWPASS(put_ , op_put, op2_put)
|
---|
2470 | H264_LOWPASS(avg_ , op_avg, op2_avg)
|
---|
2471 | H264_MC(put_, 2)
|
---|
2472 | H264_MC(put_, 4)
|
---|
2473 | H264_MC(put_, 8)
|
---|
2474 | H264_MC(put_, 16)
|
---|
2475 | H264_MC(avg_, 4)
|
---|
2476 | H264_MC(avg_, 8)
|
---|
2477 | H264_MC(avg_, 16)
|
---|
2478 |
|
---|
2479 | #undef op_avg
|
---|
2480 | #undef op_put
|
---|
2481 | #undef op2_avg
|
---|
2482 | #undef op2_put
|
---|
2483 | #endif
|
---|
2484 |
|
---|
2485 | #define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom )
|
---|
2486 | #define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
|
---|
2487 | #define H264_WEIGHT(W,H) \
|
---|
2488 | static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
|
---|
2489 | int y; \
|
---|
2490 | offset <<= log2_denom; \
|
---|
2491 | if(log2_denom) offset += 1<<(log2_denom-1); \
|
---|
2492 | for(y=0; y<H; y++, block += stride){ \
|
---|
2493 | op_scale1(0); \
|
---|
2494 | op_scale1(1); \
|
---|
2495 | if(W==2) continue; \
|
---|
2496 | op_scale1(2); \
|
---|
2497 | op_scale1(3); \
|
---|
2498 | if(W==4) continue; \
|
---|
2499 | op_scale1(4); \
|
---|
2500 | op_scale1(5); \
|
---|
2501 | op_scale1(6); \
|
---|
2502 | op_scale1(7); \
|
---|
2503 | if(W==8) continue; \
|
---|
2504 | op_scale1(8); \
|
---|
2505 | op_scale1(9); \
|
---|
2506 | op_scale1(10); \
|
---|
2507 | op_scale1(11); \
|
---|
2508 | op_scale1(12); \
|
---|
2509 | op_scale1(13); \
|
---|
2510 | op_scale1(14); \
|
---|
2511 | op_scale1(15); \
|
---|
2512 | } \
|
---|
2513 | } \
|
---|
2514 | static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
|
---|
2515 | int y; \
|
---|
2516 | offset = ((offset + 1) | 1) << log2_denom; \
|
---|
2517 | for(y=0; y<H; y++, dst += stride, src += stride){ \
|
---|
2518 | op_scale2(0); \
|
---|
2519 | op_scale2(1); \
|
---|
2520 | if(W==2) continue; \
|
---|
2521 | op_scale2(2); \
|
---|
2522 | op_scale2(3); \
|
---|
2523 | if(W==4) continue; \
|
---|
2524 | op_scale2(4); \
|
---|
2525 | op_scale2(5); \
|
---|
2526 | op_scale2(6); \
|
---|
2527 | op_scale2(7); \
|
---|
2528 | if(W==8) continue; \
|
---|
2529 | op_scale2(8); \
|
---|
2530 | op_scale2(9); \
|
---|
2531 | op_scale2(10); \
|
---|
2532 | op_scale2(11); \
|
---|
2533 | op_scale2(12); \
|
---|
2534 | op_scale2(13); \
|
---|
2535 | op_scale2(14); \
|
---|
2536 | op_scale2(15); \
|
---|
2537 | } \
|
---|
2538 | }
|
---|
2539 |
|
---|
2540 | H264_WEIGHT(16,16)
|
---|
2541 | H264_WEIGHT(16,8)
|
---|
2542 | H264_WEIGHT(8,16)
|
---|
2543 | H264_WEIGHT(8,8)
|
---|
2544 | H264_WEIGHT(8,4)
|
---|
2545 | H264_WEIGHT(4,8)
|
---|
2546 | H264_WEIGHT(4,4)
|
---|
2547 | H264_WEIGHT(4,2)
|
---|
2548 | H264_WEIGHT(2,4)
|
---|
2549 | H264_WEIGHT(2,2)
|
---|
2550 |
|
---|
2551 | #undef op_scale1
|
---|
2552 | #undef op_scale2
|
---|
2553 | #undef H264_WEIGHT
|
---|
2554 |
|
---|
2555 | static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
|
---|
2556 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
2557 | int i;
|
---|
2558 |
|
---|
2559 | for(i=0; i<h; i++){
|
---|
2560 | dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
|
---|
2561 | dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
|
---|
2562 | dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
|
---|
2563 | dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
|
---|
2564 | dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
|
---|
2565 | dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
|
---|
2566 | dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
|
---|
2567 | dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
|
---|
2568 | dst+=dstStride;
|
---|
2569 | src+=srcStride;
|
---|
2570 | }
|
---|
2571 | }
|
---|
2572 |
|
---|
2573 | #ifdef CONFIG_CAVS_DECODER
|
---|
2574 | /* AVS specific */
|
---|
2575 | void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
|
---|
2576 |
|
---|
2577 | void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
|
---|
2578 | put_pixels8_c(dst, src, stride, 8);
|
---|
2579 | }
|
---|
2580 | void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
|
---|
2581 | avg_pixels8_c(dst, src, stride, 8);
|
---|
2582 | }
|
---|
2583 | void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
|
---|
2584 | put_pixels16_c(dst, src, stride, 16);
|
---|
2585 | }
|
---|
2586 | void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
|
---|
2587 | avg_pixels16_c(dst, src, stride, 16);
|
---|
2588 | }
|
---|
2589 | #endif /* CONFIG_CAVS_DECODER */
|
---|
2590 |
|
---|
2591 | static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
|
---|
2592 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
2593 | int i;
|
---|
2594 |
|
---|
2595 | for(i=0; i<w; i++){
|
---|
2596 | const int src_1= src[ -srcStride];
|
---|
2597 | const int src0 = src[0 ];
|
---|
2598 | const int src1 = src[ srcStride];
|
---|
2599 | const int src2 = src[2*srcStride];
|
---|
2600 | const int src3 = src[3*srcStride];
|
---|
2601 | const int src4 = src[4*srcStride];
|
---|
2602 | const int src5 = src[5*srcStride];
|
---|
2603 | const int src6 = src[6*srcStride];
|
---|
2604 | const int src7 = src[7*srcStride];
|
---|
2605 | const int src8 = src[8*srcStride];
|
---|
2606 | const int src9 = src[9*srcStride];
|
---|
2607 | dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
|
---|
2608 | dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
|
---|
2609 | dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
|
---|
2610 | dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
|
---|
2611 | dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
|
---|
2612 | dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
|
---|
2613 | dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
|
---|
2614 | dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
|
---|
2615 | src++;
|
---|
2616 | dst++;
|
---|
2617 | }
|
---|
2618 | }
|
---|
2619 |
|
---|
2620 | static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
|
---|
2621 | put_pixels8_c(dst, src, stride, 8);
|
---|
2622 | }
|
---|
2623 |
|
---|
2624 | static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
|
---|
2625 | uint8_t half[64];
|
---|
2626 | wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
|
---|
2627 | put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
|
---|
2628 | }
|
---|
2629 |
|
---|
2630 | static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
|
---|
2631 | wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
|
---|
2632 | }
|
---|
2633 |
|
---|
2634 | static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
|
---|
2635 | uint8_t half[64];
|
---|
2636 | wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
|
---|
2637 | put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
|
---|
2638 | }
|
---|
2639 |
|
---|
2640 | static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
|
---|
2641 | wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
|
---|
2642 | }
|
---|
2643 |
|
---|
2644 | static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
|
---|
2645 | uint8_t halfH[88];
|
---|
2646 | uint8_t halfV[64];
|
---|
2647 | uint8_t halfHV[64];
|
---|
2648 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
---|
2649 | wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
|
---|
2650 | wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
|
---|
2651 | put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
|
---|
2652 | }
|
---|
2653 | static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
|
---|
2654 | uint8_t halfH[88];
|
---|
2655 | uint8_t halfV[64];
|
---|
2656 | uint8_t halfHV[64];
|
---|
2657 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
---|
2658 | wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
|
---|
2659 | wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
|
---|
2660 | put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
|
---|
2661 | }
|
---|
2662 | static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
|
---|
2663 | uint8_t halfH[88];
|
---|
2664 | wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
---|
2665 | wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
|
---|
2666 | }
|
---|
2667 |
|
---|
2668 | static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
|
---|
2669 | int x;
|
---|
2670 | const int strength= ff_h263_loop_filter_strength[qscale];
|
---|
2671 |
|
---|
2672 | for(x=0; x<8; x++){
|
---|
2673 | int d1, d2, ad1;
|
---|
2674 | int p0= src[x-2*stride];
|
---|
2675 | int p1= src[x-1*stride];
|
---|
2676 | int p2= src[x+0*stride];
|
---|
2677 | int p3= src[x+1*stride];
|
---|
2678 | int d = (p0 - p3 + 4*(p2 - p1)) / 8;
|
---|
2679 |
|
---|
2680 | if (d<-2*strength) d1= 0;
|
---|
2681 | else if(d<- strength) d1=-2*strength - d;
|
---|
2682 | else if(d< strength) d1= d;
|
---|
2683 | else if(d< 2*strength) d1= 2*strength - d;
|
---|
2684 | else d1= 0;
|
---|
2685 |
|
---|
2686 | p1 += d1;
|
---|
2687 | p2 -= d1;
|
---|
2688 | if(p1&256) p1= ~(p1>>31);
|
---|
2689 | if(p2&256) p2= ~(p2>>31);
|
---|
2690 |
|
---|
2691 | src[x-1*stride] = p1;
|
---|
2692 | src[x+0*stride] = p2;
|
---|
2693 |
|
---|
2694 | ad1= ABS(d1)>>1;
|
---|
2695 |
|
---|
2696 | d2= clip((p0-p3)/4, -ad1, ad1);
|
---|
2697 |
|
---|
2698 | src[x-2*stride] = p0 - d2;
|
---|
2699 | src[x+ stride] = p3 + d2;
|
---|
2700 | }
|
---|
2701 | }
|
---|
2702 |
|
---|
2703 | static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
|
---|
2704 | int y;
|
---|
2705 | const int strength= ff_h263_loop_filter_strength[qscale];
|
---|
2706 |
|
---|
2707 | for(y=0; y<8; y++){
|
---|
2708 | int d1, d2, ad1;
|
---|
2709 | int p0= src[y*stride-2];
|
---|
2710 | int p1= src[y*stride-1];
|
---|
2711 | int p2= src[y*stride+0];
|
---|
2712 | int p3= src[y*stride+1];
|
---|
2713 | int d = (p0 - p3 + 4*(p2 - p1)) / 8;
|
---|
2714 |
|
---|
2715 | if (d<-2*strength) d1= 0;
|
---|
2716 | else if(d<- strength) d1=-2*strength - d;
|
---|
2717 | else if(d< strength) d1= d;
|
---|
2718 | else if(d< 2*strength) d1= 2*strength - d;
|
---|
2719 | else d1= 0;
|
---|
2720 |
|
---|
2721 | p1 += d1;
|
---|
2722 | p2 -= d1;
|
---|
2723 | if(p1&256) p1= ~(p1>>31);
|
---|
2724 | if(p2&256) p2= ~(p2>>31);
|
---|
2725 |
|
---|
2726 | src[y*stride-1] = p1;
|
---|
2727 | src[y*stride+0] = p2;
|
---|
2728 |
|
---|
2729 | ad1= ABS(d1)>>1;
|
---|
2730 |
|
---|
2731 | d2= clip((p0-p3)/4, -ad1, ad1);
|
---|
2732 |
|
---|
2733 | src[y*stride-2] = p0 - d2;
|
---|
2734 | src[y*stride+1] = p3 + d2;
|
---|
2735 | }
|
---|
2736 | }
|
---|
2737 |
|
---|
2738 | static void h261_loop_filter_c(uint8_t *src, int stride){
|
---|
2739 | int x,y,xy,yz;
|
---|
2740 | int temp[64];
|
---|
2741 |
|
---|
2742 | for(x=0; x<8; x++){
|
---|
2743 | temp[x ] = 4*src[x ];
|
---|
2744 | temp[x + 7*8] = 4*src[x + 7*stride];
|
---|
2745 | }
|
---|
2746 | for(y=1; y<7; y++){
|
---|
2747 | for(x=0; x<8; x++){
|
---|
2748 | xy = y * stride + x;
|
---|
2749 | yz = y * 8 + x;
|
---|
2750 | temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
|
---|
2751 | }
|
---|
2752 | }
|
---|
2753 |
|
---|
2754 | for(y=0; y<8; y++){
|
---|
2755 | src[ y*stride] = (temp[ y*8] + 2)>>2;
|
---|
2756 | src[7+y*stride] = (temp[7+y*8] + 2)>>2;
|
---|
2757 | for(x=1; x<7; x++){
|
---|
2758 | xy = y * stride + x;
|
---|
2759 | yz = y * 8 + x;
|
---|
2760 | src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
|
---|
2761 | }
|
---|
2762 | }
|
---|
2763 | }
|
---|
2764 |
|
---|
2765 | static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
|
---|
2766 | {
|
---|
2767 | int i, d;
|
---|
2768 | for( i = 0; i < 4; i++ ) {
|
---|
2769 | if( tc0[i] < 0 ) {
|
---|
2770 | pix += 4*ystride;
|
---|
2771 | continue;
|
---|
2772 | }
|
---|
2773 | for( d = 0; d < 4; d++ ) {
|
---|
2774 | const int p0 = pix[-1*xstride];
|
---|
2775 | const int p1 = pix[-2*xstride];
|
---|
2776 | const int p2 = pix[-3*xstride];
|
---|
2777 | const int q0 = pix[0];
|
---|
2778 | const int q1 = pix[1*xstride];
|
---|
2779 | const int q2 = pix[2*xstride];
|
---|
2780 |
|
---|
2781 | if( ABS( p0 - q0 ) < alpha &&
|
---|
2782 | ABS( p1 - p0 ) < beta &&
|
---|
2783 | ABS( q1 - q0 ) < beta ) {
|
---|
2784 |
|
---|
2785 | int tc = tc0[i];
|
---|
2786 | int i_delta;
|
---|
2787 |
|
---|
2788 | if( ABS( p2 - p0 ) < beta ) {
|
---|
2789 | pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
|
---|
2790 | tc++;
|
---|
2791 | }
|
---|
2792 | if( ABS( q2 - q0 ) < beta ) {
|
---|
2793 | pix[ xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
|
---|
2794 | tc++;
|
---|
2795 | }
|
---|
2796 |
|
---|
2797 | i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
|
---|
2798 | pix[-xstride] = clip_uint8( p0 + i_delta ); /* p0' */
|
---|
2799 | pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
|
---|
2800 | }
|
---|
2801 | pix += ystride;
|
---|
2802 | }
|
---|
2803 | }
|
---|
2804 | }
|
---|
2805 | static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
---|
2806 | {
|
---|
2807 | h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
|
---|
2808 | }
|
---|
2809 | static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
---|
2810 | {
|
---|
2811 | h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
|
---|
2812 | }
|
---|
2813 |
|
---|
2814 | static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
|
---|
2815 | {
|
---|
2816 | int i, d;
|
---|
2817 | for( i = 0; i < 4; i++ ) {
|
---|
2818 | const int tc = tc0[i];
|
---|
2819 | if( tc <= 0 ) {
|
---|
2820 | pix += 2*ystride;
|
---|
2821 | continue;
|
---|
2822 | }
|
---|
2823 | for( d = 0; d < 2; d++ ) {
|
---|
2824 | const int p0 = pix[-1*xstride];
|
---|
2825 | const int p1 = pix[-2*xstride];
|
---|
2826 | const int q0 = pix[0];
|
---|
2827 | const int q1 = pix[1*xstride];
|
---|
2828 |
|
---|
2829 | if( ABS( p0 - q0 ) < alpha &&
|
---|
2830 | ABS( p1 - p0 ) < beta &&
|
---|
2831 | ABS( q1 - q0 ) < beta ) {
|
---|
2832 |
|
---|
2833 | int delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
|
---|
2834 |
|
---|
2835 | pix[-xstride] = clip_uint8( p0 + delta ); /* p0' */
|
---|
2836 | pix[0] = clip_uint8( q0 - delta ); /* q0' */
|
---|
2837 | }
|
---|
2838 | pix += ystride;
|
---|
2839 | }
|
---|
2840 | }
|
---|
2841 | }
|
---|
2842 | static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
---|
2843 | {
|
---|
2844 | h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
|
---|
2845 | }
|
---|
2846 | static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
---|
2847 | {
|
---|
2848 | h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
|
---|
2849 | }
|
---|
2850 |
|
---|
2851 | static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
|
---|
2852 | {
|
---|
2853 | int d;
|
---|
2854 | for( d = 0; d < 8; d++ ) {
|
---|
2855 | const int p0 = pix[-1*xstride];
|
---|
2856 | const int p1 = pix[-2*xstride];
|
---|
2857 | const int q0 = pix[0];
|
---|
2858 | const int q1 = pix[1*xstride];
|
---|
2859 |
|
---|
2860 | if( ABS( p0 - q0 ) < alpha &&
|
---|
2861 | ABS( p1 - p0 ) < beta &&
|
---|
2862 | ABS( q1 - q0 ) < beta ) {
|
---|
2863 |
|
---|
2864 | pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
|
---|
2865 | pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
|
---|
2866 | }
|
---|
2867 | pix += ystride;
|
---|
2868 | }
|
---|
2869 | }
|
---|
2870 | static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
|
---|
2871 | {
|
---|
2872 | h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
|
---|
2873 | }
|
---|
2874 | static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
|
---|
2875 | {
|
---|
2876 | h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
|
---|
2877 | }
|
---|
2878 |
|
---|
2879 | static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
2880 | {
|
---|
2881 | int s, i;
|
---|
2882 |
|
---|
2883 | s = 0;
|
---|
2884 | for(i=0;i<h;i++) {
|
---|
2885 | s += abs(pix1[0] - pix2[0]);
|
---|
2886 | s += abs(pix1[1] - pix2[1]);
|
---|
2887 | s += abs(pix1[2] - pix2[2]);
|
---|
2888 | s += abs(pix1[3] - pix2[3]);
|
---|
2889 | s += abs(pix1[4] - pix2[4]);
|
---|
2890 | s += abs(pix1[5] - pix2[5]);
|
---|
2891 | s += abs(pix1[6] - pix2[6]);
|
---|
2892 | s += abs(pix1[7] - pix2[7]);
|
---|
2893 | s += abs(pix1[8] - pix2[8]);
|
---|
2894 | s += abs(pix1[9] - pix2[9]);
|
---|
2895 | s += abs(pix1[10] - pix2[10]);
|
---|
2896 | s += abs(pix1[11] - pix2[11]);
|
---|
2897 | s += abs(pix1[12] - pix2[12]);
|
---|
2898 | s += abs(pix1[13] - pix2[13]);
|
---|
2899 | s += abs(pix1[14] - pix2[14]);
|
---|
2900 | s += abs(pix1[15] - pix2[15]);
|
---|
2901 | pix1 += line_size;
|
---|
2902 | pix2 += line_size;
|
---|
2903 | }
|
---|
2904 | return s;
|
---|
2905 | }
|
---|
2906 |
|
---|
2907 | static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
2908 | {
|
---|
2909 | int s, i;
|
---|
2910 |
|
---|
2911 | s = 0;
|
---|
2912 | for(i=0;i<h;i++) {
|
---|
2913 | s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
|
---|
2914 | s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
|
---|
2915 | s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
|
---|
2916 | s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
|
---|
2917 | s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
|
---|
2918 | s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
|
---|
2919 | s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
|
---|
2920 | s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
|
---|
2921 | s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
|
---|
2922 | s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
|
---|
2923 | s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
|
---|
2924 | s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
|
---|
2925 | s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
|
---|
2926 | s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
|
---|
2927 | s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
|
---|
2928 | s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
|
---|
2929 | pix1 += line_size;
|
---|
2930 | pix2 += line_size;
|
---|
2931 | }
|
---|
2932 | return s;
|
---|
2933 | }
|
---|
2934 |
|
---|
2935 | static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
2936 | {
|
---|
2937 | int s, i;
|
---|
2938 | uint8_t *pix3 = pix2 + line_size;
|
---|
2939 |
|
---|
2940 | s = 0;
|
---|
2941 | for(i=0;i<h;i++) {
|
---|
2942 | s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
|
---|
2943 | s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
|
---|
2944 | s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
|
---|
2945 | s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
|
---|
2946 | s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
|
---|
2947 | s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
|
---|
2948 | s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
|
---|
2949 | s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
|
---|
2950 | s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
|
---|
2951 | s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
|
---|
2952 | s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
|
---|
2953 | s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
|
---|
2954 | s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
|
---|
2955 | s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
|
---|
2956 | s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
|
---|
2957 | s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
|
---|
2958 | pix1 += line_size;
|
---|
2959 | pix2 += line_size;
|
---|
2960 | pix3 += line_size;
|
---|
2961 | }
|
---|
2962 | return s;
|
---|
2963 | }
|
---|
2964 |
|
---|
2965 | static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
2966 | {
|
---|
2967 | int s, i;
|
---|
2968 | uint8_t *pix3 = pix2 + line_size;
|
---|
2969 |
|
---|
2970 | s = 0;
|
---|
2971 | for(i=0;i<h;i++) {
|
---|
2972 | s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
|
---|
2973 | s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
|
---|
2974 | s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
|
---|
2975 | s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
|
---|
2976 | s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
|
---|
2977 | s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
|
---|
2978 | s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
|
---|
2979 | s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
|
---|
2980 | s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
|
---|
2981 | s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
|
---|
2982 | s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
|
---|
2983 | s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
|
---|
2984 | s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
|
---|
2985 | s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
|
---|
2986 | s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
|
---|
2987 | s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
|
---|
2988 | pix1 += line_size;
|
---|
2989 | pix2 += line_size;
|
---|
2990 | pix3 += line_size;
|
---|
2991 | }
|
---|
2992 | return s;
|
---|
2993 | }
|
---|
2994 |
|
---|
2995 | static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
2996 | {
|
---|
2997 | int s, i;
|
---|
2998 |
|
---|
2999 | s = 0;
|
---|
3000 | for(i=0;i<h;i++) {
|
---|
3001 | s += abs(pix1[0] - pix2[0]);
|
---|
3002 | s += abs(pix1[1] - pix2[1]);
|
---|
3003 | s += abs(pix1[2] - pix2[2]);
|
---|
3004 | s += abs(pix1[3] - pix2[3]);
|
---|
3005 | s += abs(pix1[4] - pix2[4]);
|
---|
3006 | s += abs(pix1[5] - pix2[5]);
|
---|
3007 | s += abs(pix1[6] - pix2[6]);
|
---|
3008 | s += abs(pix1[7] - pix2[7]);
|
---|
3009 | pix1 += line_size;
|
---|
3010 | pix2 += line_size;
|
---|
3011 | }
|
---|
3012 | return s;
|
---|
3013 | }
|
---|
3014 |
|
---|
3015 | static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
3016 | {
|
---|
3017 | int s, i;
|
---|
3018 |
|
---|
3019 | s = 0;
|
---|
3020 | for(i=0;i<h;i++) {
|
---|
3021 | s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
|
---|
3022 | s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
|
---|
3023 | s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
|
---|
3024 | s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
|
---|
3025 | s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
|
---|
3026 | s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
|
---|
3027 | s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
|
---|
3028 | s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
|
---|
3029 | pix1 += line_size;
|
---|
3030 | pix2 += line_size;
|
---|
3031 | }
|
---|
3032 | return s;
|
---|
3033 | }
|
---|
3034 |
|
---|
3035 | static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
3036 | {
|
---|
3037 | int s, i;
|
---|
3038 | uint8_t *pix3 = pix2 + line_size;
|
---|
3039 |
|
---|
3040 | s = 0;
|
---|
3041 | for(i=0;i<h;i++) {
|
---|
3042 | s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
|
---|
3043 | s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
|
---|
3044 | s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
|
---|
3045 | s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
|
---|
3046 | s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
|
---|
3047 | s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
|
---|
3048 | s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
|
---|
3049 | s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
|
---|
3050 | pix1 += line_size;
|
---|
3051 | pix2 += line_size;
|
---|
3052 | pix3 += line_size;
|
---|
3053 | }
|
---|
3054 | return s;
|
---|
3055 | }
|
---|
3056 |
|
---|
3057 | static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
---|
3058 | {
|
---|
3059 | int s, i;
|
---|
3060 | uint8_t *pix3 = pix2 + line_size;
|
---|
3061 |
|
---|
3062 | s = 0;
|
---|
3063 | for(i=0;i<h;i++) {
|
---|
3064 | s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
|
---|
3065 | s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
|
---|
3066 | s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
|
---|
3067 | s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
|
---|
3068 | s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
|
---|
3069 | s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
|
---|
3070 | s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
|
---|
3071 | s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
|
---|
3072 | pix1 += line_size;
|
---|
3073 | pix2 += line_size;
|
---|
3074 | pix3 += line_size;
|
---|
3075 | }
|
---|
3076 | return s;
|
---|
3077 | }
|
---|
3078 |
|
---|
3079 | static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
|
---|
3080 | MpegEncContext *c = v;
|
---|
3081 | int score1=0;
|
---|
3082 | int score2=0;
|
---|
3083 | int x,y;
|
---|
3084 |
|
---|
3085 | for(y=0; y<h; y++){
|
---|
3086 | for(x=0; x<16; x++){
|
---|
3087 | score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
|
---|
3088 | }
|
---|
3089 | if(y+1<h){
|
---|
3090 | for(x=0; x<15; x++){
|
---|
3091 | score2+= ABS( s1[x ] - s1[x +stride]
|
---|
3092 | - s1[x+1] + s1[x+1+stride])
|
---|
3093 | -ABS( s2[x ] - s2[x +stride]
|
---|
3094 | - s2[x+1] + s2[x+1+stride]);
|
---|
3095 | }
|
---|
3096 | }
|
---|
3097 | s1+= stride;
|
---|
3098 | s2+= stride;
|
---|
3099 | }
|
---|
3100 |
|
---|
3101 | if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
|
---|
3102 | else return score1 + ABS(score2)*8;
|
---|
3103 | }
|
---|
3104 |
|
---|
3105 | static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
|
---|
3106 | MpegEncContext *c = v;
|
---|
3107 | int score1=0;
|
---|
3108 | int score2=0;
|
---|
3109 | int x,y;
|
---|
3110 |
|
---|
3111 | for(y=0; y<h; y++){
|
---|
3112 | for(x=0; x<8; x++){
|
---|
3113 | score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
|
---|
3114 | }
|
---|
3115 | if(y+1<h){
|
---|
3116 | for(x=0; x<7; x++){
|
---|
3117 | score2+= ABS( s1[x ] - s1[x +stride]
|
---|
3118 | - s1[x+1] + s1[x+1+stride])
|
---|
3119 | -ABS( s2[x ] - s2[x +stride]
|
---|
3120 | - s2[x+1] + s2[x+1+stride]);
|
---|
3121 | }
|
---|
3122 | }
|
---|
3123 | s1+= stride;
|
---|
3124 | s2+= stride;
|
---|
3125 | }
|
---|
3126 |
|
---|
3127 | if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
|
---|
3128 | else return score1 + ABS(score2)*8;
|
---|
3129 | }
|
---|
3130 |
|
---|
3131 | static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
|
---|
3132 | int i;
|
---|
3133 | unsigned int sum=0;
|
---|
3134 |
|
---|
3135 | for(i=0; i<8*8; i++){
|
---|
3136 | int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
|
---|
3137 | int w= weight[i];
|
---|
3138 | b>>= RECON_SHIFT;
|
---|
3139 | assert(-512<b && b<512);
|
---|
3140 |
|
---|
3141 | sum += (w*b)*(w*b)>>4;
|
---|
3142 | }
|
---|
3143 | return sum>>2;
|
---|
3144 | }
|
---|
3145 |
|
---|
3146 | static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
|
---|
3147 | int i;
|
---|
3148 |
|
---|
3149 | for(i=0; i<8*8; i++){
|
---|
3150 | rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
|
---|
3151 | }
|
---|
3152 | }
|
---|
3153 |
|
---|
3154 | /**
|
---|
3155 | * permutes an 8x8 block.
|
---|
3156 | * @param block the block which will be permuted according to the given permutation vector
|
---|
3157 | * @param permutation the permutation vector
|
---|
3158 | * @param last the last non zero coefficient in scantable order, used to speed the permutation up
|
---|
3159 | * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
|
---|
3160 | * (inverse) permutated to scantable order!
|
---|
3161 | */
|
---|
3162 | void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
|
---|
3163 | {
|
---|
3164 | int i;
|
---|
3165 | DCTELEM temp[64];
|
---|
3166 |
|
---|
3167 | if(last<=0) return;
|
---|
3168 | //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
|
---|
3169 |
|
---|
3170 | for(i=0; i<=last; i++){
|
---|
3171 | const int j= scantable[i];
|
---|
3172 | temp[j]= block[j];
|
---|
3173 | block[j]=0;
|
---|
3174 | }
|
---|
3175 |
|
---|
3176 | for(i=0; i<=last; i++){
|
---|
3177 | const int j= scantable[i];
|
---|
3178 | const int perm_j= permutation[j];
|
---|
3179 | block[perm_j]= temp[j];
|
---|
3180 | }
|
---|
3181 | }
|
---|
3182 |
|
---|
3183 | static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
|
---|
3184 | return 0;
|
---|
3185 | }
|
---|
3186 |
|
---|
3187 | void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
|
---|
3188 | int i;
|
---|
3189 |
|
---|
3190 | memset(cmp, 0, sizeof(void*)*5);
|
---|
3191 |
|
---|
3192 | for(i=0; i<5; i++){
|
---|
3193 | switch(type&0xFF){
|
---|
3194 | case FF_CMP_SAD:
|
---|
3195 | cmp[i]= c->sad[i];
|
---|
3196 | break;
|
---|
3197 | case FF_CMP_SATD:
|
---|
3198 | cmp[i]= c->hadamard8_diff[i];
|
---|
3199 | break;
|
---|
3200 | case FF_CMP_SSE:
|
---|
3201 | cmp[i]= c->sse[i];
|
---|
3202 | break;
|
---|
3203 | case FF_CMP_DCT:
|
---|
3204 | cmp[i]= c->dct_sad[i];
|
---|
3205 | break;
|
---|
3206 | case FF_CMP_DCT264:
|
---|
3207 | cmp[i]= c->dct264_sad[i];
|
---|
3208 | break;
|
---|
3209 | case FF_CMP_DCTMAX:
|
---|
3210 | cmp[i]= c->dct_max[i];
|
---|
3211 | break;
|
---|
3212 | case FF_CMP_PSNR:
|
---|
3213 | cmp[i]= c->quant_psnr[i];
|
---|
3214 | break;
|
---|
3215 | case FF_CMP_BIT:
|
---|
3216 | cmp[i]= c->bit[i];
|
---|
3217 | break;
|
---|
3218 | case FF_CMP_RD:
|
---|
3219 | cmp[i]= c->rd[i];
|
---|
3220 | break;
|
---|
3221 | case FF_CMP_VSAD:
|
---|
3222 | cmp[i]= c->vsad[i];
|
---|
3223 | break;
|
---|
3224 | case FF_CMP_VSSE:
|
---|
3225 | cmp[i]= c->vsse[i];
|
---|
3226 | break;
|
---|
3227 | case FF_CMP_ZERO:
|
---|
3228 | cmp[i]= zero_cmp;
|
---|
3229 | break;
|
---|
3230 | case FF_CMP_NSSE:
|
---|
3231 | cmp[i]= c->nsse[i];
|
---|
3232 | break;
|
---|
3233 | #ifdef CONFIG_SNOW_ENCODER
|
---|
3234 | case FF_CMP_W53:
|
---|
3235 | cmp[i]= c->w53[i];
|
---|
3236 | break;
|
---|
3237 | case FF_CMP_W97:
|
---|
3238 | cmp[i]= c->w97[i];
|
---|
3239 | break;
|
---|
3240 | #endif
|
---|
3241 | default:
|
---|
3242 | av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
|
---|
3243 | }
|
---|
3244 | }
|
---|
3245 | }
|
---|
3246 |
|
---|
3247 | /**
|
---|
3248 | * memset(blocks, 0, sizeof(DCTELEM)*6*64)
|
---|
3249 | */
|
---|
3250 | static void clear_blocks_c(DCTELEM *blocks)
|
---|
3251 | {
|
---|
3252 | memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
---|
3253 | }
|
---|
3254 |
|
---|
3255 | static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
|
---|
3256 | int i;
|
---|
3257 | for(i=0; i+7<w; i+=8){
|
---|
3258 | dst[i+0] += src[i+0];
|
---|
3259 | dst[i+1] += src[i+1];
|
---|
3260 | dst[i+2] += src[i+2];
|
---|
3261 | dst[i+3] += src[i+3];
|
---|
3262 | dst[i+4] += src[i+4];
|
---|
3263 | dst[i+5] += src[i+5];
|
---|
3264 | dst[i+6] += src[i+6];
|
---|
3265 | dst[i+7] += src[i+7];
|
---|
3266 | }
|
---|
3267 | for(; i<w; i++)
|
---|
3268 | dst[i+0] += src[i+0];
|
---|
3269 | }
|
---|
3270 |
|
---|
3271 | static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
|
---|
3272 | int i;
|
---|
3273 | for(i=0; i+7<w; i+=8){
|
---|
3274 | dst[i+0] = src1[i+0]-src2[i+0];
|
---|
3275 | dst[i+1] = src1[i+1]-src2[i+1];
|
---|
3276 | dst[i+2] = src1[i+2]-src2[i+2];
|
---|
3277 | dst[i+3] = src1[i+3]-src2[i+3];
|
---|
3278 | dst[i+4] = src1[i+4]-src2[i+4];
|
---|
3279 | dst[i+5] = src1[i+5]-src2[i+5];
|
---|
3280 | dst[i+6] = src1[i+6]-src2[i+6];
|
---|
3281 | dst[i+7] = src1[i+7]-src2[i+7];
|
---|
3282 | }
|
---|
3283 | for(; i<w; i++)
|
---|
3284 | dst[i+0] = src1[i+0]-src2[i+0];
|
---|
3285 | }
|
---|
3286 |
|
---|
3287 | static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
|
---|
3288 | int i;
|
---|
3289 | uint8_t l, lt;
|
---|
3290 |
|
---|
3291 | l= *left;
|
---|
3292 | lt= *left_top;
|
---|
3293 |
|
---|
3294 | for(i=0; i<w; i++){
|
---|
3295 | const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
|
---|
3296 | lt= src1[i];
|
---|
3297 | l= src2[i];
|
---|
3298 | dst[i]= l - pred;
|
---|
3299 | }
|
---|
3300 |
|
---|
3301 | *left= l;
|
---|
3302 | *left_top= lt;
|
---|
3303 | }
|
---|
3304 |
|
---|
3305 | #define BUTTERFLY2(o1,o2,i1,i2) \
|
---|
3306 | o1= (i1)+(i2);\
|
---|
3307 | o2= (i1)-(i2);
|
---|
3308 |
|
---|
3309 | #define BUTTERFLY1(x,y) \
|
---|
3310 | {\
|
---|
3311 | int a,b;\
|
---|
3312 | a= x;\
|
---|
3313 | b= y;\
|
---|
3314 | x= a+b;\
|
---|
3315 | y= a-b;\
|
---|
3316 | }
|
---|
3317 |
|
---|
3318 | #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
|
---|
3319 |
|
---|
3320 | static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
|
---|
3321 | int i;
|
---|
3322 | int temp[64];
|
---|
3323 | int sum=0;
|
---|
3324 |
|
---|
3325 | assert(h==8);
|
---|
3326 |
|
---|
3327 | for(i=0; i<8; i++){
|
---|
3328 | //FIXME try pointer walks
|
---|
3329 | BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
|
---|
3330 | BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
|
---|
3331 | BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
|
---|
3332 | BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
|
---|
3333 |
|
---|
3334 | BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
|
---|
3335 | BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
|
---|
3336 | BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
|
---|
3337 | BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
|
---|
3338 |
|
---|
3339 | BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
|
---|
3340 | BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
|
---|
3341 | BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
|
---|
3342 | BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
|
---|
3343 | }
|
---|
3344 |
|
---|
3345 | for(i=0; i<8; i++){
|
---|
3346 | BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
|
---|
3347 | BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
|
---|
3348 | BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
|
---|
3349 | BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
|
---|
3350 |
|
---|
3351 | BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
|
---|
3352 | BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
|
---|
3353 | BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
|
---|
3354 | BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
|
---|
3355 |
|
---|
3356 | sum +=
|
---|
3357 | BUTTERFLYA(temp[8*0+i], temp[8*4+i])
|
---|
3358 | +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
|
---|
3359 | +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
|
---|
3360 | +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
|
---|
3361 | }
|
---|
3362 | #if 0
|
---|
3363 | static int maxi=0;
|
---|
3364 | if(sum>maxi){
|
---|
3365 | maxi=sum;
|
---|
3366 | printf("MAX:%d\n", maxi);
|
---|
3367 | }
|
---|
3368 | #endif
|
---|
3369 | return sum;
|
---|
3370 | }
|
---|
3371 |
|
---|
3372 | static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
|
---|
3373 | int i;
|
---|
3374 | int temp[64];
|
---|
3375 | int sum=0;
|
---|
3376 |
|
---|
3377 | assert(h==8);
|
---|
3378 |
|
---|
3379 | for(i=0; i<8; i++){
|
---|
3380 | //FIXME try pointer walks
|
---|
3381 | BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
|
---|
3382 | BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
|
---|
3383 | BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
|
---|
3384 | BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
|
---|
3385 |
|
---|
3386 | BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
|
---|
3387 | BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
|
---|
3388 | BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
|
---|
3389 | BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
|
---|
3390 |
|
---|
3391 | BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
|
---|
3392 | BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
|
---|
3393 | BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
|
---|
3394 | BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
|
---|
3395 | }
|
---|
3396 |
|
---|
3397 | for(i=0; i<8; i++){
|
---|
3398 | BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
|
---|
3399 | BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
|
---|
3400 | BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
|
---|
3401 | BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
|
---|
3402 |
|
---|
3403 | BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
|
---|
3404 | BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
|
---|
3405 | BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
|
---|
3406 | BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
|
---|
3407 |
|
---|
3408 | sum +=
|
---|
3409 | BUTTERFLYA(temp[8*0+i], temp[8*4+i])
|
---|
3410 | +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
|
---|
3411 | +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
|
---|
3412 | +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
|
---|
3413 | }
|
---|
3414 |
|
---|
3415 | sum -= ABS(temp[8*0] + temp[8*4]); // -mean
|
---|
3416 |
|
---|
3417 | return sum;
|
---|
3418 | }
|
---|
3419 |
|
---|
3420 | static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
---|
3421 | MpegEncContext * const s= (MpegEncContext *)c;
|
---|
3422 | DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
|
---|
3423 | DCTELEM * const temp= (DCTELEM*)aligned_temp;
|
---|
3424 | int sum=0, i;
|
---|
3425 |
|
---|
3426 | assert(h==8);
|
---|
3427 |
|
---|
3428 | s->dsp.diff_pixels(temp, src1, src2, stride);
|
---|
3429 | s->dsp.fdct(temp);
|
---|
3430 |
|
---|
3431 | for(i=0; i<64; i++)
|
---|
3432 | sum+= ABS(temp[i]);
|
---|
3433 |
|
---|
3434 | return sum;
|
---|
3435 | }
|
---|
3436 |
|
---|
3437 | static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
---|
3438 | MpegEncContext * const s= (MpegEncContext *)c;
|
---|
3439 | DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
|
---|
3440 | DCTELEM * const temp= (DCTELEM*)aligned_temp;
|
---|
3441 | int sum=0, i;
|
---|
3442 |
|
---|
3443 | assert(h==8);
|
---|
3444 |
|
---|
3445 | s->dsp.diff_pixels(temp, src1, src2, stride);
|
---|
3446 | s->dsp.fdct(temp);
|
---|
3447 |
|
---|
3448 | for(i=0; i<64; i++)
|
---|
3449 | sum= FFMAX(sum, ABS(temp[i]));
|
---|
3450 |
|
---|
3451 | return sum;
|
---|
3452 | }
|
---|
3453 |
|
---|
3454 | void simple_idct(DCTELEM *block); //FIXME
|
---|
3455 |
|
---|
3456 | static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
---|
3457 | MpegEncContext * const s= (MpegEncContext *)c;
|
---|
3458 | DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
|
---|
3459 | DCTELEM * const temp= (DCTELEM*)aligned_temp;
|
---|
3460 | DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
|
---|
3461 | int sum=0, i;
|
---|
3462 |
|
---|
3463 | assert(h==8);
|
---|
3464 | s->mb_intra=0;
|
---|
3465 |
|
---|
3466 | s->dsp.diff_pixels(temp, src1, src2, stride);
|
---|
3467 |
|
---|
3468 | memcpy(bak, temp, 64*sizeof(DCTELEM));
|
---|
3469 |
|
---|
3470 | s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
|
---|
3471 | s->dct_unquantize_inter(s, temp, 0, s->qscale);
|
---|
3472 | simple_idct(temp); //FIXME
|
---|
3473 |
|
---|
3474 | for(i=0; i<64; i++)
|
---|
3475 | sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
|
---|
3476 |
|
---|
3477 | return sum;
|
---|
3478 | }
|
---|
3479 |
|
---|
3480 | static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
---|
3481 | MpegEncContext * const s= (MpegEncContext *)c;
|
---|
3482 | const uint8_t *scantable= s->intra_scantable.permutated;
|
---|
3483 | DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
|
---|
3484 | DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
|
---|
3485 | DCTELEM * const temp= (DCTELEM*)aligned_temp;
|
---|
3486 | uint8_t * const bak= (uint8_t*)aligned_bak;
|
---|
3487 | int i, last, run, bits, level, distoration, start_i;
|
---|
3488 | const int esc_length= s->ac_esc_length;
|
---|
3489 | uint8_t * length;
|
---|
3490 | uint8_t * last_length;
|
---|
3491 |
|
---|
3492 | assert(h==8);
|
---|
3493 |
|
---|
3494 | for(i=0; i<8; i++){
|
---|
3495 | ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
|
---|
3496 | ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
|
---|
3497 | }
|
---|
3498 |
|
---|
3499 | s->dsp.diff_pixels(temp, src1, src2, stride);
|
---|
3500 |
|
---|
3501 | s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
|
---|
3502 |
|
---|
3503 | bits=0;
|
---|
3504 |
|
---|
3505 | if (s->mb_intra) {
|
---|
3506 | start_i = 1;
|
---|
3507 | length = s->intra_ac_vlc_length;
|
---|
3508 | last_length= s->intra_ac_vlc_last_length;
|
---|
3509 | bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
|
---|
3510 | } else {
|
---|
3511 | start_i = 0;
|
---|
3512 | length = s->inter_ac_vlc_length;
|
---|
3513 | last_length= s->inter_ac_vlc_last_length;
|
---|
3514 | }
|
---|
3515 |
|
---|
3516 | if(last>=start_i){
|
---|
3517 | run=0;
|
---|
3518 | for(i=start_i; i<last; i++){
|
---|
3519 | int j= scantable[i];
|
---|
3520 | level= temp[j];
|
---|
3521 |
|
---|
3522 | if(level){
|
---|
3523 | level+=64;
|
---|
3524 | if((level&(~127)) == 0){
|
---|
3525 | bits+= length[UNI_AC_ENC_INDEX(run, level)];
|
---|
3526 | }else
|
---|
3527 | bits+= esc_length;
|
---|
3528 | run=0;
|
---|
3529 | }else
|
---|
3530 | run++;
|
---|
3531 | }
|
---|
3532 | i= scantable[last];
|
---|
3533 |
|
---|
3534 | level= temp[i] + 64;
|
---|
3535 |
|
---|
3536 | assert(level - 64);
|
---|
3537 |
|
---|
3538 | if((level&(~127)) == 0){
|
---|
3539 | bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
|
---|
3540 | }else
|
---|
3541 | bits+= esc_length;
|
---|
3542 |
|
---|
3543 | }
|
---|
3544 |
|
---|
3545 | if(last>=0){
|
---|
3546 | if(s->mb_intra)
|
---|
3547 | s->dct_unquantize_intra(s, temp, 0, s->qscale);
|
---|
3548 | else
|
---|
3549 | s->dct_unquantize_inter(s, temp, 0, s->qscale);
|
---|
3550 | }
|
---|
3551 |
|
---|
3552 | s->dsp.idct_add(bak, stride, temp);
|
---|
3553 |
|
---|
3554 | distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
|
---|
3555 |
|
---|
3556 | return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
|
---|
3557 | }
|
---|
3558 |
|
---|
3559 | static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
---|
3560 | MpegEncContext * const s= (MpegEncContext *)c;
|
---|
3561 | const uint8_t *scantable= s->intra_scantable.permutated;
|
---|
3562 | DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
|
---|
3563 | DCTELEM * const temp= (DCTELEM*)aligned_temp;
|
---|
3564 | int i, last, run, bits, level, start_i;
|
---|
3565 | const int esc_length= s->ac_esc_length;
|
---|
3566 | uint8_t * length;
|
---|
3567 | uint8_t * last_length;
|
---|
3568 |
|
---|
3569 | assert(h==8);
|
---|
3570 |
|
---|
3571 | s->dsp.diff_pixels(temp, src1, src2, stride);
|
---|
3572 |
|
---|
3573 | s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
|
---|
3574 |
|
---|
3575 | bits=0;
|
---|
3576 |
|
---|
3577 | if (s->mb_intra) {
|
---|
3578 | start_i = 1;
|
---|
3579 | length = s->intra_ac_vlc_length;
|
---|
3580 | last_length= s->intra_ac_vlc_last_length;
|
---|
3581 | bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
|
---|
3582 | } else {
|
---|
3583 | start_i = 0;
|
---|
3584 | length = s->inter_ac_vlc_length;
|
---|
3585 | last_length= s->inter_ac_vlc_last_length;
|
---|
3586 | }
|
---|
3587 |
|
---|
3588 | if(last>=start_i){
|
---|
3589 | run=0;
|
---|
3590 | for(i=start_i; i<last; i++){
|
---|
3591 | int j= scantable[i];
|
---|
3592 | level= temp[j];
|
---|
3593 |
|
---|
3594 | if(level){
|
---|
3595 | level+=64;
|
---|
3596 | if((level&(~127)) == 0){
|
---|
3597 | bits+= length[UNI_AC_ENC_INDEX(run, level)];
|
---|
3598 | }else
|
---|
3599 | bits+= esc_length;
|
---|
3600 | run=0;
|
---|
3601 | }else
|
---|
3602 | run++;
|
---|
3603 | }
|
---|
3604 | i= scantable[last];
|
---|
3605 |
|
---|
3606 | level= temp[i] + 64;
|
---|
3607 |
|
---|
3608 | assert(level - 64);
|
---|
3609 |
|
---|
3610 | if((level&(~127)) == 0){
|
---|
3611 | bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
|
---|
3612 | }else
|
---|
3613 | bits+= esc_length;
|
---|
3614 | }
|
---|
3615 |
|
---|
3616 | return bits;
|
---|
3617 | }
|
---|
3618 |
|
---|
3619 | static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
|
---|
3620 | int score=0;
|
---|
3621 | int x,y;
|
---|
3622 |
|
---|
3623 | for(y=1; y<h; y++){
|
---|
3624 | for(x=0; x<16; x+=4){
|
---|
3625 | score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride])
|
---|
3626 | +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
|
---|
3627 | }
|
---|
3628 | s+= stride;
|
---|
3629 | }
|
---|
3630 |
|
---|
3631 | return score;
|
---|
3632 | }
|
---|
3633 |
|
---|
3634 | static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
|
---|
3635 | int score=0;
|
---|
3636 | int x,y;
|
---|
3637 |
|
---|
3638 | for(y=1; y<h; y++){
|
---|
3639 | for(x=0; x<16; x++){
|
---|
3640 | score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
|
---|
3641 | }
|
---|
3642 | s1+= stride;
|
---|
3643 | s2+= stride;
|
---|
3644 | }
|
---|
3645 |
|
---|
3646 | return score;
|
---|
3647 | }
|
---|
3648 |
|
---|
3649 | #define SQ(a) ((a)*(a))
|
---|
3650 | static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
|
---|
3651 | int score=0;
|
---|
3652 | int x,y;
|
---|
3653 |
|
---|
3654 | for(y=1; y<h; y++){
|
---|
3655 | for(x=0; x<16; x+=4){
|
---|
3656 | score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
|
---|
3657 | +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
|
---|
3658 | }
|
---|
3659 | s+= stride;
|
---|
3660 | }
|
---|
3661 |
|
---|
3662 | return score;
|
---|
3663 | }
|
---|
3664 |
|
---|
3665 | static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
|
---|
3666 | int score=0;
|
---|
3667 | int x,y;
|
---|
3668 |
|
---|
3669 | for(y=1; y<h; y++){
|
---|
3670 | for(x=0; x<16; x++){
|
---|
3671 | score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
|
---|
3672 | }
|
---|
3673 | s1+= stride;
|
---|
3674 | s2+= stride;
|
---|
3675 | }
|
---|
3676 |
|
---|
3677 | return score;
|
---|
3678 | }
|
---|
3679 |
|
---|
3680 | WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
|
---|
3681 | WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
|
---|
3682 | WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
|
---|
3683 | WARPER8_16_SQ(dct_max8x8_c, dct_max16_c)
|
---|
3684 | WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
|
---|
3685 | WARPER8_16_SQ(rd8x8_c, rd16_c)
|
---|
3686 | WARPER8_16_SQ(bit8x8_c, bit16_c)
|
---|
3687 |
|
---|
3688 | /* XXX: those functions should be suppressed ASAP when all IDCTs are
|
---|
3689 | converted */
|
---|
3690 | static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
3691 | {
|
---|
3692 | j_rev_dct (block);
|
---|
3693 | put_pixels_clamped_c(block, dest, line_size);
|
---|
3694 | }
|
---|
3695 | static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
3696 | {
|
---|
3697 | j_rev_dct (block);
|
---|
3698 | add_pixels_clamped_c(block, dest, line_size);
|
---|
3699 | }
|
---|
3700 |
|
---|
3701 | static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
3702 | {
|
---|
3703 | j_rev_dct4 (block);
|
---|
3704 | put_pixels_clamped4_c(block, dest, line_size);
|
---|
3705 | }
|
---|
3706 | static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
3707 | {
|
---|
3708 | j_rev_dct4 (block);
|
---|
3709 | add_pixels_clamped4_c(block, dest, line_size);
|
---|
3710 | }
|
---|
3711 |
|
---|
3712 | static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
3713 | {
|
---|
3714 | j_rev_dct2 (block);
|
---|
3715 | put_pixels_clamped2_c(block, dest, line_size);
|
---|
3716 | }
|
---|
3717 | static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
3718 | {
|
---|
3719 | j_rev_dct2 (block);
|
---|
3720 | add_pixels_clamped2_c(block, dest, line_size);
|
---|
3721 | }
|
---|
3722 |
|
---|
3723 | static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
3724 | {
|
---|
3725 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
3726 |
|
---|
3727 | dest[0] = cm[(block[0] + 4)>>3];
|
---|
3728 | }
|
---|
3729 | static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
|
---|
3730 | {
|
---|
3731 | uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
---|
3732 |
|
---|
3733 | dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
|
---|
3734 | }
|
---|
3735 |
|
---|
3736 | static void just_return() { return; }
|
---|
3737 |
|
---|
3738 | /* init static data */
|
---|
3739 | void dsputil_static_init(void)
|
---|
3740 | {
|
---|
3741 | int i;
|
---|
3742 |
|
---|
3743 | for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
|
---|
3744 | for(i=0;i<MAX_NEG_CROP;i++) {
|
---|
3745 | cropTbl[i] = 0;
|
---|
3746 | cropTbl[i + MAX_NEG_CROP + 256] = 255;
|
---|
3747 | }
|
---|
3748 |
|
---|
3749 | for(i=0;i<512;i++) {
|
---|
3750 | squareTbl[i] = (i - 256) * (i - 256);
|
---|
3751 | }
|
---|
3752 |
|
---|
3753 | for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
|
---|
3754 | }
|
---|
3755 |
|
---|
3756 |
|
---|
3757 | void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
---|
3758 | {
|
---|
3759 | int i;
|
---|
3760 |
|
---|
3761 | #ifdef CONFIG_ENCODERS
|
---|
3762 | if(avctx->dct_algo==FF_DCT_FASTINT) {
|
---|
3763 | c->fdct = fdct_ifast;
|
---|
3764 | c->fdct248 = fdct_ifast248;
|
---|
3765 | }
|
---|
3766 | else if(avctx->dct_algo==FF_DCT_FAAN) {
|
---|
3767 | c->fdct = ff_faandct;
|
---|
3768 | c->fdct248 = ff_faandct248;
|
---|
3769 | }
|
---|
3770 | else {
|
---|
3771 | c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
|
---|
3772 | c->fdct248 = ff_fdct248_islow;
|
---|
3773 | }
|
---|
3774 | #endif //CONFIG_ENCODERS
|
---|
3775 |
|
---|
3776 | if(avctx->lowres==1){
|
---|
3777 | if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){
|
---|
3778 | c->idct_put= ff_jref_idct4_put;
|
---|
3779 | c->idct_add= ff_jref_idct4_add;
|
---|
3780 | }else{
|
---|
3781 | c->idct_put= ff_h264_lowres_idct_put_c;
|
---|
3782 | c->idct_add= ff_h264_lowres_idct_add_c;
|
---|
3783 | }
|
---|
3784 | c->idct = j_rev_dct4;
|
---|
3785 | c->idct_permutation_type= FF_NO_IDCT_PERM;
|
---|
3786 | }else if(avctx->lowres==2){
|
---|
3787 | c->idct_put= ff_jref_idct2_put;
|
---|
3788 | c->idct_add= ff_jref_idct2_add;
|
---|
3789 | c->idct = j_rev_dct2;
|
---|
3790 | c->idct_permutation_type= FF_NO_IDCT_PERM;
|
---|
3791 | }else if(avctx->lowres==3){
|
---|
3792 | c->idct_put= ff_jref_idct1_put;
|
---|
3793 | c->idct_add= ff_jref_idct1_add;
|
---|
3794 | c->idct = j_rev_dct1;
|
---|
3795 | c->idct_permutation_type= FF_NO_IDCT_PERM;
|
---|
3796 | }else{
|
---|
3797 | if(avctx->idct_algo==FF_IDCT_INT){
|
---|
3798 | c->idct_put= ff_jref_idct_put;
|
---|
3799 | c->idct_add= ff_jref_idct_add;
|
---|
3800 | c->idct = j_rev_dct;
|
---|
3801 | c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
|
---|
3802 | }else if(avctx->idct_algo==FF_IDCT_VP3){
|
---|
3803 | c->idct_put= ff_vp3_idct_put_c;
|
---|
3804 | c->idct_add= ff_vp3_idct_add_c;
|
---|
3805 | c->idct = ff_vp3_idct_c;
|
---|
3806 | c->idct_permutation_type= FF_NO_IDCT_PERM;
|
---|
3807 | }else{ //accurate/default
|
---|
3808 | c->idct_put= simple_idct_put;
|
---|
3809 | c->idct_add= simple_idct_add;
|
---|
3810 | c->idct = simple_idct;
|
---|
3811 | c->idct_permutation_type= FF_NO_IDCT_PERM;
|
---|
3812 | }
|
---|
3813 | }
|
---|
3814 |
|
---|
3815 | c->h264_idct_add= ff_h264_idct_add_c;
|
---|
3816 | c->h264_idct8_add= ff_h264_idct8_add_c;
|
---|
3817 | c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
|
---|
3818 | c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
|
---|
3819 |
|
---|
3820 | c->get_pixels = get_pixels_c;
|
---|
3821 | c->diff_pixels = diff_pixels_c;
|
---|
3822 | c->put_pixels_clamped = put_pixels_clamped_c;
|
---|
3823 | c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
|
---|
3824 | c->add_pixels_clamped = add_pixels_clamped_c;
|
---|
3825 | c->add_pixels8 = add_pixels8_c;
|
---|
3826 | c->add_pixels4 = add_pixels4_c;
|
---|
3827 | c->gmc1 = gmc1_c;
|
---|
3828 | c->gmc = ff_gmc_c;
|
---|
3829 | c->clear_blocks = clear_blocks_c;
|
---|
3830 | c->pix_sum = pix_sum_c;
|
---|
3831 | c->pix_norm1 = pix_norm1_c;
|
---|
3832 |
|
---|
3833 | /* TODO [0] 16 [1] 8 */
|
---|
3834 | c->pix_abs[0][0] = pix_abs16_c;
|
---|
3835 | c->pix_abs[0][1] = pix_abs16_x2_c;
|
---|
3836 | c->pix_abs[0][2] = pix_abs16_y2_c;
|
---|
3837 | c->pix_abs[0][3] = pix_abs16_xy2_c;
|
---|
3838 | c->pix_abs[1][0] = pix_abs8_c;
|
---|
3839 | c->pix_abs[1][1] = pix_abs8_x2_c;
|
---|
3840 | c->pix_abs[1][2] = pix_abs8_y2_c;
|
---|
3841 | c->pix_abs[1][3] = pix_abs8_xy2_c;
|
---|
3842 |
|
---|
3843 | #define dspfunc(PFX, IDX, NUM) \
|
---|
3844 | c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
|
---|
3845 | c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
|
---|
3846 | c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
|
---|
3847 | c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
|
---|
3848 |
|
---|
3849 | dspfunc(put, 0, 16);
|
---|
3850 | dspfunc(put_no_rnd, 0, 16);
|
---|
3851 | dspfunc(put, 1, 8);
|
---|
3852 | dspfunc(put_no_rnd, 1, 8);
|
---|
3853 | dspfunc(put, 2, 4);
|
---|
3854 | dspfunc(put, 3, 2);
|
---|
3855 |
|
---|
3856 | dspfunc(avg, 0, 16);
|
---|
3857 | dspfunc(avg_no_rnd, 0, 16);
|
---|
3858 | dspfunc(avg, 1, 8);
|
---|
3859 | dspfunc(avg_no_rnd, 1, 8);
|
---|
3860 | dspfunc(avg, 2, 4);
|
---|
3861 | dspfunc(avg, 3, 2);
|
---|
3862 | #undef dspfunc
|
---|
3863 |
|
---|
3864 | c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
|
---|
3865 | c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
|
---|
3866 |
|
---|
3867 | c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
|
---|
3868 | c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
|
---|
3869 | c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
|
---|
3870 | c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
|
---|
3871 | c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
|
---|
3872 | c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
|
---|
3873 | c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
|
---|
3874 | c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
|
---|
3875 | c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
|
---|
3876 |
|
---|
3877 | c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
|
---|
3878 | c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
|
---|
3879 | c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
|
---|
3880 | c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
|
---|
3881 | c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
|
---|
3882 | c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
|
---|
3883 | c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
|
---|
3884 | c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
|
---|
3885 | c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
|
---|
3886 |
|
---|
3887 | #define dspfunc(PFX, IDX, NUM) \
|
---|
3888 | c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
|
---|
3889 | c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
|
---|
3890 | c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
|
---|
3891 | c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
|
---|
3892 | c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
|
---|
3893 | c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
|
---|
3894 | c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
|
---|
3895 | c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
|
---|
3896 | c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
|
---|
3897 | c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
|
---|
3898 | c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
|
---|
3899 | c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
|
---|
3900 | c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
|
---|
3901 | c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
|
---|
3902 | c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
|
---|
3903 | c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
|
---|
3904 |
|
---|
3905 | dspfunc(put_qpel, 0, 16);
|
---|
3906 | dspfunc(put_no_rnd_qpel, 0, 16);
|
---|
3907 |
|
---|
3908 | dspfunc(avg_qpel, 0, 16);
|
---|
3909 | /* dspfunc(avg_no_rnd_qpel, 0, 16); */
|
---|
3910 |
|
---|
3911 | dspfunc(put_qpel, 1, 8);
|
---|
3912 | dspfunc(put_no_rnd_qpel, 1, 8);
|
---|
3913 |
|
---|
3914 | dspfunc(avg_qpel, 1, 8);
|
---|
3915 | /* dspfunc(avg_no_rnd_qpel, 1, 8); */
|
---|
3916 |
|
---|
3917 | dspfunc(put_h264_qpel, 0, 16);
|
---|
3918 | dspfunc(put_h264_qpel, 1, 8);
|
---|
3919 | dspfunc(put_h264_qpel, 2, 4);
|
---|
3920 | dspfunc(put_h264_qpel, 3, 2);
|
---|
3921 | dspfunc(avg_h264_qpel, 0, 16);
|
---|
3922 | dspfunc(avg_h264_qpel, 1, 8);
|
---|
3923 | dspfunc(avg_h264_qpel, 2, 4);
|
---|
3924 |
|
---|
3925 | #undef dspfunc
|
---|
3926 | c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
|
---|
3927 | c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
|
---|
3928 | c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
|
---|
3929 | c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
|
---|
3930 | c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
|
---|
3931 | c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
|
---|
3932 |
|
---|
3933 | c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
|
---|
3934 | c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
|
---|
3935 | c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
|
---|
3936 | c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
|
---|
3937 | c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
|
---|
3938 | c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
|
---|
3939 | c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
|
---|
3940 | c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
|
---|
3941 | c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
|
---|
3942 | c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
|
---|
3943 | c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
|
---|
3944 | c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
|
---|
3945 | c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
|
---|
3946 | c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
|
---|
3947 | c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
|
---|
3948 | c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
|
---|
3949 | c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
|
---|
3950 | c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
|
---|
3951 | c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
|
---|
3952 | c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
|
---|
3953 |
|
---|
3954 | #ifdef CONFIG_CAVS_DECODER
|
---|
3955 | ff_cavsdsp_init(c,avctx);
|
---|
3956 | #endif
|
---|
3957 |
|
---|
3958 | c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
|
---|
3959 | c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
|
---|
3960 | c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
|
---|
3961 | c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
|
---|
3962 | c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
|
---|
3963 | c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
|
---|
3964 | c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
|
---|
3965 | c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
|
---|
3966 |
|
---|
3967 | #define SET_CMP_FUNC(name) \
|
---|
3968 | c->name[0]= name ## 16_c;\
|
---|
3969 | c->name[1]= name ## 8x8_c;
|
---|
3970 |
|
---|
3971 | SET_CMP_FUNC(hadamard8_diff)
|
---|
3972 | c->hadamard8_diff[4]= hadamard8_intra16_c;
|
---|
3973 | SET_CMP_FUNC(dct_sad)
|
---|
3974 | SET_CMP_FUNC(dct_max)
|
---|
3975 | c->sad[0]= pix_abs16_c;
|
---|
3976 | c->sad[1]= pix_abs8_c;
|
---|
3977 | c->sse[0]= sse16_c;
|
---|
3978 | c->sse[1]= sse8_c;
|
---|
3979 | c->sse[2]= sse4_c;
|
---|
3980 | SET_CMP_FUNC(quant_psnr)
|
---|
3981 | SET_CMP_FUNC(rd)
|
---|
3982 | SET_CMP_FUNC(bit)
|
---|
3983 | c->vsad[0]= vsad16_c;
|
---|
3984 | c->vsad[4]= vsad_intra16_c;
|
---|
3985 | c->vsse[0]= vsse16_c;
|
---|
3986 | c->vsse[4]= vsse_intra16_c;
|
---|
3987 | c->nsse[0]= nsse16_c;
|
---|
3988 | c->nsse[1]= nsse8_c;
|
---|
3989 | #ifdef CONFIG_SNOW_ENCODER
|
---|
3990 | c->w53[0]= w53_16_c;
|
---|
3991 | c->w53[1]= w53_8_c;
|
---|
3992 | c->w97[0]= w97_16_c;
|
---|
3993 | c->w97[1]= w97_8_c;
|
---|
3994 | #endif
|
---|
3995 |
|
---|
3996 | c->add_bytes= add_bytes_c;
|
---|
3997 | c->diff_bytes= diff_bytes_c;
|
---|
3998 | c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
|
---|
3999 | c->bswap_buf= bswap_buf;
|
---|
4000 |
|
---|
4001 | c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
|
---|
4002 | c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
|
---|
4003 | c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
|
---|
4004 | c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
|
---|
4005 | c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
|
---|
4006 | c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
|
---|
4007 |
|
---|
4008 | c->h263_h_loop_filter= h263_h_loop_filter_c;
|
---|
4009 | c->h263_v_loop_filter= h263_v_loop_filter_c;
|
---|
4010 |
|
---|
4011 | c->h261_loop_filter= h261_loop_filter_c;
|
---|
4012 |
|
---|
4013 | c->try_8x8basis= try_8x8basis_c;
|
---|
4014 | c->add_8x8basis= add_8x8basis_c;
|
---|
4015 |
|
---|
4016 | #ifdef CONFIG_SNOW_ENCODER
|
---|
4017 | c->vertical_compose97i = ff_snow_vertical_compose97i;
|
---|
4018 | c->horizontal_compose97i = ff_snow_horizontal_compose97i;
|
---|
4019 | c->inner_add_yblock = ff_snow_inner_add_yblock;
|
---|
4020 | #endif
|
---|
4021 |
|
---|
4022 | c->shrink[0]= ff_img_copy_plane;
|
---|
4023 | c->shrink[1]= ff_shrink22;
|
---|
4024 | c->shrink[2]= ff_shrink44;
|
---|
4025 | c->shrink[3]= ff_shrink88;
|
---|
4026 |
|
---|
4027 | c->prefetch= just_return;
|
---|
4028 |
|
---|
4029 | #ifdef HAVE_MMX
|
---|
4030 | dsputil_init_mmx(c, avctx);
|
---|
4031 | #endif
|
---|
4032 | #ifdef ARCH_ARMV4L
|
---|
4033 | dsputil_init_armv4l(c, avctx);
|
---|
4034 | #endif
|
---|
4035 | #ifdef HAVE_MLIB
|
---|
4036 | dsputil_init_mlib(c, avctx);
|
---|
4037 | #endif
|
---|
4038 | #ifdef ARCH_SPARC
|
---|
4039 | dsputil_init_vis(c,avctx);
|
---|
4040 | #endif
|
---|
4041 | #ifdef ARCH_ALPHA
|
---|
4042 | dsputil_init_alpha(c, avctx);
|
---|
4043 | #endif
|
---|
4044 | #ifdef ARCH_POWERPC
|
---|
4045 | dsputil_init_ppc(c, avctx);
|
---|
4046 | #endif
|
---|
4047 | #ifdef HAVE_MMI
|
---|
4048 | dsputil_init_mmi(c, avctx);
|
---|
4049 | #endif
|
---|
4050 | #ifdef ARCH_SH4
|
---|
4051 | dsputil_init_sh4(c,avctx);
|
---|
4052 | #endif
|
---|
4053 |
|
---|
4054 | switch(c->idct_permutation_type){
|
---|
4055 | case FF_NO_IDCT_PERM:
|
---|
4056 | for(i=0; i<64; i++)
|
---|
4057 | c->idct_permutation[i]= i;
|
---|
4058 | break;
|
---|
4059 | case FF_LIBMPEG2_IDCT_PERM:
|
---|
4060 | for(i=0; i<64; i++)
|
---|
4061 | c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
---|
4062 | break;
|
---|
4063 | case FF_SIMPLE_IDCT_PERM:
|
---|
4064 | for(i=0; i<64; i++)
|
---|
4065 | c->idct_permutation[i]= simple_mmx_permutation[i];
|
---|
4066 | break;
|
---|
4067 | case FF_TRANSPOSE_IDCT_PERM:
|
---|
4068 | for(i=0; i<64; i++)
|
---|
4069 | c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
|
---|
4070 | break;
|
---|
4071 | case FF_PARTTRANS_IDCT_PERM:
|
---|
4072 | for(i=0; i<64; i++)
|
---|
4073 | c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
|
---|
4074 | break;
|
---|
4075 | default:
|
---|
4076 | av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
|
---|
4077 | }
|
---|
4078 | }
|
---|
4079 |
|
---|