VirtualBox

source: vbox/trunk/src/libs/ffmpeg-20060710/libavcodec/snow.c@ 9441

Last change on this file since 9441 was 5776, checked in by vboxsync, 17 years ago

ffmpeg: exported to OSE

File size: 169.0 KB
Line 
1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "avcodec.h"
20#include "common.h"
21#include "dsputil.h"
22#include "snow.h"
23
24#include "rangecoder.h"
25
26#include "mpegvideo.h"
27
28#undef NDEBUG
29#include <assert.h>
30
31static const int8_t quant3[256]={
32 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
48};
49static const int8_t quant3b[256]={
50 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66};
67static const int8_t quant3bA[256]={
68 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84};
85static const int8_t quant5[256]={
86 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
102};
103static const int8_t quant7[256]={
104 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
120};
121static const int8_t quant9[256]={
122 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
138};
139static const int8_t quant11[256]={
140 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
156};
157static const int8_t quant13[256]={
158 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
174};
175
176#if 0 //64*cubic
177static const uint8_t obmc32[1024]={
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210//error:0.000022
211};
212static const uint8_t obmc16[256]={
213 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
229//error:0.000033
230};
231#elif 1 // 64*linear
232static const uint8_t obmc32[1024]={
233 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
234 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
235 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
236 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
237 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
238 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
239 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
240 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
241 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
242 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
243 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
244 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
245 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
246 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
247 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
248 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
253 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
254 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
255 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
256 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
257 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
258 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
259 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
260 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
261 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
262 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
263 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
264 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
265 //error:0.000020
266};
267static const uint8_t obmc16[256]={
268 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
269 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
270 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
271 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
272 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
273 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
280 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
281 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
282 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
283 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
284//error:0.000015
285};
286#else //64*cos
287static const uint8_t obmc32[1024]={
288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
320//error:0.000022
321};
322static const uint8_t obmc16[256]={
323 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
339//error:0.000022
340};
341#endif
342
343//linear *64
344static const uint8_t obmc8[64]={
345 4, 12, 20, 28, 28, 20, 12, 4,
346 12, 36, 60, 84, 84, 60, 36, 12,
347 20, 60,100,140,140,100, 60, 20,
348 28, 84,140,196,196,140, 84, 28,
349 28, 84,140,196,196,140, 84, 28,
350 20, 60,100,140,140,100, 60, 20,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 4, 12, 20, 28, 28, 20, 12, 4,
353//error:0.000000
354};
355
356//linear *64
357static const uint8_t obmc4[16]={
358 16, 48, 48, 16,
359 48,144,144, 48,
360 48,144,144, 48,
361 16, 48, 48, 16,
362//error:0.000000
363};
364
365static const uint8_t *obmc_tab[4]={
366 obmc32, obmc16, obmc8, obmc4
367};
368
369static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
370
371typedef struct BlockNode{
372 int16_t mx;
373 int16_t my;
374 uint8_t ref;
375 uint8_t color[3];
376 uint8_t type;
377//#define TYPE_SPLIT 1
378#define BLOCK_INTRA 1
379#define BLOCK_OPT 2
380//#define TYPE_NOCOLOR 4
381 uint8_t level; //FIXME merge into type?
382}BlockNode;
383
384static const BlockNode null_block= { //FIXME add border maybe
385 .color= {128,128,128},
386 .mx= 0,
387 .my= 0,
388 .ref= 0,
389 .type= 0,
390 .level= 0,
391};
392
393#define LOG2_MB_SIZE 4
394#define MB_SIZE (1<<LOG2_MB_SIZE)
395
396typedef struct x_and_coeff{
397 int16_t x;
398 uint16_t coeff;
399} x_and_coeff;
400
401typedef struct SubBand{
402 int level;
403 int stride;
404 int width;
405 int height;
406 int qlog; ///< log(qscale)/log[2^(1/6)]
407 DWTELEM *buf;
408 int buf_x_offset;
409 int buf_y_offset;
410 int stride_line; ///< Stride measured in lines, not pixels.
411 x_and_coeff * x_coeff;
412 struct SubBand *parent;
413 uint8_t state[/*7*2*/ 7 + 512][32];
414}SubBand;
415
416typedef struct Plane{
417 int width;
418 int height;
419 SubBand band[MAX_DECOMPOSITIONS][4];
420}Plane;
421
422typedef struct SnowContext{
423// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
424
425 AVCodecContext *avctx;
426 RangeCoder c;
427 DSPContext dsp;
428 AVFrame new_picture;
429 AVFrame input_picture; ///< new_picture with the internal linesizes
430 AVFrame current_picture;
431 AVFrame last_picture[MAX_REF_FRAMES];
432 AVFrame mconly_picture;
433// uint8_t q_context[16];
434 uint8_t header_state[32];
435 uint8_t block_state[128 + 32*128];
436 int keyframe;
437 int always_reset;
438 int version;
439 int spatial_decomposition_type;
440 int temporal_decomposition_type;
441 int spatial_decomposition_count;
442 int temporal_decomposition_count;
443 int max_ref_frames;
444 int ref_frames;
445 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
446 uint32_t *ref_scores[MAX_REF_FRAMES];
447 DWTELEM *spatial_dwt_buffer;
448 int colorspace_type;
449 int chroma_h_shift;
450 int chroma_v_shift;
451 int spatial_scalability;
452 int qlog;
453 int lambda;
454 int lambda2;
455 int pass1_rc;
456 int mv_scale;
457 int qbias;
458#define QBIAS_SHIFT 3
459 int b_width;
460 int b_height;
461 int block_max_depth;
462 Plane plane[MAX_PLANES];
463 BlockNode *block;
464#define ME_CACHE_SIZE 1024
465 int me_cache[ME_CACHE_SIZE];
466 int me_cache_generation;
467 slice_buffer sb;
468
469 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
470}SnowContext;
471
472typedef struct {
473 DWTELEM *b0;
474 DWTELEM *b1;
475 DWTELEM *b2;
476 DWTELEM *b3;
477 int y;
478} dwt_compose_t;
479
480#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
481//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
482
483static void iterative_me(SnowContext *s);
484
485static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
486{
487 int i;
488
489 buf->base_buffer = base_buffer;
490 buf->line_count = line_count;
491 buf->line_width = line_width;
492 buf->data_count = max_allocated_lines;
493 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
494 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
495
496 for (i = 0; i < max_allocated_lines; i++)
497 {
498 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
499 }
500
501 buf->data_stack_top = max_allocated_lines - 1;
502}
503
504static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
505{
506 int offset;
507 DWTELEM * buffer;
508
509// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
510
511 assert(buf->data_stack_top >= 0);
512// assert(!buf->line[line]);
513 if (buf->line[line])
514 return buf->line[line];
515
516 offset = buf->line_width * line;
517 buffer = buf->data_stack[buf->data_stack_top];
518 buf->data_stack_top--;
519 buf->line[line] = buffer;
520
521// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
522
523 return buffer;
524}
525
526static void slice_buffer_release(slice_buffer * buf, int line)
527{
528 int offset;
529 DWTELEM * buffer;
530
531 assert(line >= 0 && line < buf->line_count);
532 assert(buf->line[line]);
533
534 offset = buf->line_width * line;
535 buffer = buf->line[line];
536 buf->data_stack_top++;
537 buf->data_stack[buf->data_stack_top] = buffer;
538 buf->line[line] = NULL;
539
540// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
541}
542
543static void slice_buffer_flush(slice_buffer * buf)
544{
545 int i;
546 for (i = 0; i < buf->line_count; i++)
547 {
548 if (buf->line[i])
549 {
550// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
551 slice_buffer_release(buf, i);
552 }
553 }
554}
555
556static void slice_buffer_destroy(slice_buffer * buf)
557{
558 int i;
559 slice_buffer_flush(buf);
560
561 for (i = buf->data_count - 1; i >= 0; i--)
562 {
563 assert(buf->data_stack[i]);
564 av_freep(&buf->data_stack[i]);
565 }
566 assert(buf->data_stack);
567 av_freep(&buf->data_stack);
568 assert(buf->line);
569 av_freep(&buf->line);
570}
571
572#ifdef __sgi
573// Avoid a name clash on SGI IRIX
574#undef qexp
575#endif
576#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
577static uint8_t qexp[QROOT];
578
579static inline int mirror(int v, int m){
580 while((unsigned)v > (unsigned)m){
581 v=-v;
582 if(v<0) v+= 2*m;
583 }
584 return v;
585}
586
587static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
588 int i;
589
590 if(v){
591 const int a= ABS(v);
592 const int e= av_log2(a);
593#if 1
594 const int el= FFMIN(e, 10);
595 put_rac(c, state+0, 0);
596
597 for(i=0; i<el; i++){
598 put_rac(c, state+1+i, 1); //1..10
599 }
600 for(; i<e; i++){
601 put_rac(c, state+1+9, 1); //1..10
602 }
603 put_rac(c, state+1+FFMIN(i,9), 0);
604
605 for(i=e-1; i>=el; i--){
606 put_rac(c, state+22+9, (a>>i)&1); //22..31
607 }
608 for(; i>=0; i--){
609 put_rac(c, state+22+i, (a>>i)&1); //22..31
610 }
611
612 if(is_signed)
613 put_rac(c, state+11 + el, v < 0); //11..21
614#else
615
616 put_rac(c, state+0, 0);
617 if(e<=9){
618 for(i=0; i<e; i++){
619 put_rac(c, state+1+i, 1); //1..10
620 }
621 put_rac(c, state+1+i, 0);
622
623 for(i=e-1; i>=0; i--){
624 put_rac(c, state+22+i, (a>>i)&1); //22..31
625 }
626
627 if(is_signed)
628 put_rac(c, state+11 + e, v < 0); //11..21
629 }else{
630 for(i=0; i<e; i++){
631 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
632 }
633 put_rac(c, state+1+FFMIN(i,9), 0);
634
635 for(i=e-1; i>=0; i--){
636 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
637 }
638
639 if(is_signed)
640 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
641 }
642#endif
643 }else{
644 put_rac(c, state+0, 1);
645 }
646}
647
648static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
649 if(get_rac(c, state+0))
650 return 0;
651 else{
652 int i, e, a;
653 e= 0;
654 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
655 e++;
656 }
657
658 a= 1;
659 for(i=e-1; i>=0; i--){
660 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
661 }
662
663 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
664 return -a;
665 else
666 return a;
667 }
668}
669
670static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
671 int i;
672 int r= log2>=0 ? 1<<log2 : 1;
673
674 assert(v>=0);
675 assert(log2>=-4);
676
677 while(v >= r){
678 put_rac(c, state+4+log2, 1);
679 v -= r;
680 log2++;
681 if(log2>0) r+=r;
682 }
683 put_rac(c, state+4+log2, 0);
684
685 for(i=log2-1; i>=0; i--){
686 put_rac(c, state+31-i, (v>>i)&1);
687 }
688}
689
690static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
691 int i;
692 int r= log2>=0 ? 1<<log2 : 1;
693 int v=0;
694
695 assert(log2>=-4);
696
697 while(get_rac(c, state+4+log2)){
698 v+= r;
699 log2++;
700 if(log2>0) r+=r;
701 }
702
703 for(i=log2-1; i>=0; i--){
704 v+= get_rac(c, state+31-i)<<i;
705 }
706
707 return v;
708}
709
710static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
711 const int mirror_left= !highpass;
712 const int mirror_right= (width&1) ^ highpass;
713 const int w= (width>>1) - 1 + (highpass & width);
714 int i;
715
716#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
717 if(mirror_left){
718 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
719 dst += dst_step;
720 src += src_step;
721 }
722
723 for(i=0; i<w; i++){
724 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
725 }
726
727 if(mirror_right){
728 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
729 }
730}
731
732#ifndef lift5
733static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
734 const int mirror_left= !highpass;
735 const int mirror_right= (width&1) ^ highpass;
736 const int w= (width>>1) - 1 + (highpass & width);
737 int i;
738
739 if(mirror_left){
740 int r= 3*2*ref[0];
741 r += r>>4;
742 r += r>>8;
743 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
744 dst += dst_step;
745 src += src_step;
746 }
747
748 for(i=0; i<w; i++){
749 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
750 r += r>>4;
751 r += r>>8;
752 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
753 }
754
755 if(mirror_right){
756 int r= 3*2*ref[w*ref_step];
757 r += r>>4;
758 r += r>>8;
759 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
760 }
761}
762#endif
763
764#ifndef liftS
765static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
766 const int mirror_left= !highpass;
767 const int mirror_right= (width&1) ^ highpass;
768 const int w= (width>>1) - 1 + (highpass & width);
769 int i;
770
771 assert(shift == 4);
772#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
773 if(mirror_left){
774 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
775 dst += dst_step;
776 src += src_step;
777 }
778
779 for(i=0; i<w; i++){
780 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
781 }
782
783 if(mirror_right){
784 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
785 }
786}
787#endif
788
789
790static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791 int x, i;
792
793 for(x=start; x<width; x+=2){
794 int64_t sum=0;
795
796 for(i=0; i<n; i++){
797 int x2= x + 2*i - n + 1;
798 if (x2< 0) x2= -x2;
799 else if(x2>=width) x2= 2*width-x2-2;
800 sum += coeffs[i]*(int64_t)dst[x2];
801 }
802 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
803 else dst[x] += (sum + (1<<shift)/2)>>shift;
804 }
805}
806
807static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
808 int x, y, i;
809 for(y=start; y<height; y+=2){
810 for(x=0; x<width; x++){
811 int64_t sum=0;
812
813 for(i=0; i<n; i++){
814 int y2= y + 2*i - n + 1;
815 if (y2< 0) y2= -y2;
816 else if(y2>=height) y2= 2*height-y2-2;
817 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
818 }
819 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
820 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
821 }
822 }
823}
824
825#define SCALEX 1
826#define LX0 0
827#define LX1 1
828
829#if 0 // more accurate 9/7
830#define N1 2
831#define SHIFT1 14
832#define COEFFS1 (int[]){-25987,-25987}
833#define N2 2
834#define SHIFT2 19
835#define COEFFS2 (int[]){-27777,-27777}
836#define N3 2
837#define SHIFT3 15
838#define COEFFS3 (int[]){28931,28931}
839#define N4 2
840#define SHIFT4 15
841#define COEFFS4 (int[]){14533,14533}
842#elif 1 // 13/7 CRF
843#define N1 4
844#define SHIFT1 4
845#define COEFFS1 (int[]){1,-9,-9,1}
846#define N2 4
847#define SHIFT2 4
848#define COEFFS2 (int[]){-1,5,5,-1}
849#define N3 0
850#define SHIFT3 1
851#define COEFFS3 NULL
852#define N4 0
853#define SHIFT4 1
854#define COEFFS4 NULL
855#elif 1 // 3/5
856#define LX0 1
857#define LX1 0
858#define SCALEX 0.5
859#define N1 2
860#define SHIFT1 1
861#define COEFFS1 (int[]){1,1}
862#define N2 2
863#define SHIFT2 2
864#define COEFFS2 (int[]){-1,-1}
865#define N3 0
866#define SHIFT3 0
867#define COEFFS3 NULL
868#define N4 0
869#define SHIFT4 0
870#define COEFFS4 NULL
871#elif 1 // 11/5
872#define N1 0
873#define SHIFT1 1
874#define COEFFS1 NULL
875#define N2 2
876#define SHIFT2 2
877#define COEFFS2 (int[]){-1,-1}
878#define N3 2
879#define SHIFT3 0
880#define COEFFS3 (int[]){-1,-1}
881#define N4 4
882#define SHIFT4 7
883#define COEFFS4 (int[]){-5,29,29,-5}
884#define SCALEX 4
885#elif 1 // 9/7 CDF
886#define N1 2
887#define SHIFT1 7
888#define COEFFS1 (int[]){-203,-203}
889#define N2 2
890#define SHIFT2 12
891#define COEFFS2 (int[]){-217,-217}
892#define N3 2
893#define SHIFT3 7
894#define COEFFS3 (int[]){113,113}
895#define N4 2
896#define SHIFT4 9
897#define COEFFS4 (int[]){227,227}
898#define SCALEX 1
899#elif 1 // 7/5 CDF
900#define N1 0
901#define SHIFT1 1
902#define COEFFS1 NULL
903#define N2 2
904#define SHIFT2 2
905#define COEFFS2 (int[]){-1,-1}
906#define N3 2
907#define SHIFT3 0
908#define COEFFS3 (int[]){-1,-1}
909#define N4 2
910#define SHIFT4 4
911#define COEFFS4 (int[]){3,3}
912#elif 1 // 9/7 MN
913#define N1 4
914#define SHIFT1 4
915#define COEFFS1 (int[]){1,-9,-9,1}
916#define N2 2
917#define SHIFT2 2
918#define COEFFS2 (int[]){1,1}
919#define N3 0
920#define SHIFT3 1
921#define COEFFS3 NULL
922#define N4 0
923#define SHIFT4 1
924#define COEFFS4 NULL
925#else // 13/7 CRF
926#define N1 4
927#define SHIFT1 4
928#define COEFFS1 (int[]){1,-9,-9,1}
929#define N2 4
930#define SHIFT2 4
931#define COEFFS2 (int[]){-1,5,5,-1}
932#define N3 0
933#define SHIFT3 1
934#define COEFFS3 NULL
935#define N4 0
936#define SHIFT4 1
937#define COEFFS4 NULL
938#endif
939static void horizontal_decomposeX(DWTELEM *b, int width){
940 DWTELEM temp[width];
941 const int width2= width>>1;
942 const int w2= (width+1)>>1;
943 int x;
944
945 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
946 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
947 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
948 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
949
950 for(x=0; x<width2; x++){
951 temp[x ]= b[2*x ];
952 temp[x+w2]= b[2*x + 1];
953 }
954 if(width&1)
955 temp[x ]= b[2*x ];
956 memcpy(b, temp, width*sizeof(int));
957}
958
959static void horizontal_composeX(DWTELEM *b, int width){
960 DWTELEM temp[width];
961 const int width2= width>>1;
962 int x;
963 const int w2= (width+1)>>1;
964
965 memcpy(temp, b, width*sizeof(int));
966 for(x=0; x<width2; x++){
967 b[2*x ]= temp[x ];
968 b[2*x + 1]= temp[x+w2];
969 }
970 if(width&1)
971 b[2*x ]= temp[x ];
972
973 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
974 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
975 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
976 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
977}
978
979static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
980 int x, y;
981
982 for(y=0; y<height; y++){
983 for(x=0; x<width; x++){
984 buffer[y*stride + x] *= SCALEX;
985 }
986 }
987
988 for(y=0; y<height; y++){
989 horizontal_decomposeX(buffer + y*stride, width);
990 }
991
992 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
993 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
994 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
995 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
996}
997
998static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
999 int x, y;
1000
1001 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1002 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1003 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1004 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1005
1006 for(y=0; y<height; y++){
1007 horizontal_composeX(buffer + y*stride, width);
1008 }
1009
1010 for(y=0; y<height; y++){
1011 for(x=0; x<width; x++){
1012 buffer[y*stride + x] /= SCALEX;
1013 }
1014 }
1015}
1016
1017static void horizontal_decompose53i(DWTELEM *b, int width){
1018 DWTELEM temp[width];
1019 const int width2= width>>1;
1020 int x;
1021 const int w2= (width+1)>>1;
1022
1023 for(x=0; x<width2; x++){
1024 temp[x ]= b[2*x ];
1025 temp[x+w2]= b[2*x + 1];
1026 }
1027 if(width&1)
1028 temp[x ]= b[2*x ];
1029#if 0
1030 {
1031 int A1,A2,A3,A4;
1032 A2= temp[1 ];
1033 A4= temp[0 ];
1034 A1= temp[0+width2];
1035 A1 -= (A2 + A4)>>1;
1036 A4 += (A1 + 1)>>1;
1037 b[0+width2] = A1;
1038 b[0 ] = A4;
1039 for(x=1; x+1<width2; x+=2){
1040 A3= temp[x+width2];
1041 A4= temp[x+1 ];
1042 A3 -= (A2 + A4)>>1;
1043 A2 += (A1 + A3 + 2)>>2;
1044 b[x+width2] = A3;
1045 b[x ] = A2;
1046
1047 A1= temp[x+1+width2];
1048 A2= temp[x+2 ];
1049 A1 -= (A2 + A4)>>1;
1050 A4 += (A1 + A3 + 2)>>2;
1051 b[x+1+width2] = A1;
1052 b[x+1 ] = A4;
1053 }
1054 A3= temp[width-1];
1055 A3 -= A2;
1056 A2 += (A1 + A3 + 2)>>2;
1057 b[width -1] = A3;
1058 b[width2-1] = A2;
1059 }
1060#else
1061 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1062 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1063#endif
1064}
1065
1066static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1067 int i;
1068
1069 for(i=0; i<width; i++){
1070 b1[i] -= (b0[i] + b2[i])>>1;
1071 }
1072}
1073
1074static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075 int i;
1076
1077 for(i=0; i<width; i++){
1078 b1[i] += (b0[i] + b2[i] + 2)>>2;
1079 }
1080}
1081
1082static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1083 int y;
1084 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1085 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1086
1087 for(y=-2; y<height; y+=2){
1088 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1089 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1090
1091{START_TIMER
1092 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1093 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1094STOP_TIMER("horizontal_decompose53i")}
1095
1096{START_TIMER
1097 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1098 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1099STOP_TIMER("vertical_decompose53i*")}
1100
1101 b0=b2;
1102 b1=b3;
1103 }
1104}
1105
1106static void horizontal_decompose97i(DWTELEM *b, int width){
1107 DWTELEM temp[width];
1108 const int w2= (width+1)>>1;
1109
1110 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1111 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1112 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1113 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1114}
1115
1116
1117static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1118 int i;
1119
1120 for(i=0; i<width; i++){
1121 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1122 }
1123}
1124
1125static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1126 int i;
1127
1128 for(i=0; i<width; i++){
1129#ifdef lift5
1130 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1131#else
1132 int r= 3*(b0[i] + b2[i]);
1133 r+= r>>4;
1134 r+= r>>8;
1135 b1[i] += (r+W_CO)>>W_CS;
1136#endif
1137 }
1138}
1139
1140static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1141 int i;
1142
1143 for(i=0; i<width; i++){
1144#ifdef liftS
1145 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1146#else
1147 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1148#endif
1149 }
1150}
1151
1152static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1153 int i;
1154
1155 for(i=0; i<width; i++){
1156 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1157 }
1158}
1159
1160static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1161 int y;
1162 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1163 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1164 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1165 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1166
1167 for(y=-4; y<height; y+=2){
1168 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1169 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1170
1171{START_TIMER
1172 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1173 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1174if(width>400){
1175STOP_TIMER("horizontal_decompose97i")
1176}}
1177
1178{START_TIMER
1179 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1180 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1181 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1182 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1183
1184if(width>400){
1185STOP_TIMER("vertical_decompose97i")
1186}}
1187
1188 b0=b2;
1189 b1=b3;
1190 b2=b4;
1191 b3=b5;
1192 }
1193}
1194
1195void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1196 int level;
1197
1198 for(level=0; level<decomposition_count; level++){
1199 switch(type){
1200 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1201 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1202 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1203 }
1204 }
1205}
1206
1207static void horizontal_compose53i(DWTELEM *b, int width){
1208 DWTELEM temp[width];
1209 const int width2= width>>1;
1210 const int w2= (width+1)>>1;
1211 int x;
1212
1213#if 0
1214 int A1,A2,A3,A4;
1215 A2= temp[1 ];
1216 A4= temp[0 ];
1217 A1= temp[0+width2];
1218 A1 -= (A2 + A4)>>1;
1219 A4 += (A1 + 1)>>1;
1220 b[0+width2] = A1;
1221 b[0 ] = A4;
1222 for(x=1; x+1<width2; x+=2){
1223 A3= temp[x+width2];
1224 A4= temp[x+1 ];
1225 A3 -= (A2 + A4)>>1;
1226 A2 += (A1 + A3 + 2)>>2;
1227 b[x+width2] = A3;
1228 b[x ] = A2;
1229
1230 A1= temp[x+1+width2];
1231 A2= temp[x+2 ];
1232 A1 -= (A2 + A4)>>1;
1233 A4 += (A1 + A3 + 2)>>2;
1234 b[x+1+width2] = A1;
1235 b[x+1 ] = A4;
1236 }
1237 A3= temp[width-1];
1238 A3 -= A2;
1239 A2 += (A1 + A3 + 2)>>2;
1240 b[width -1] = A3;
1241 b[width2-1] = A2;
1242#else
1243 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1244 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1245#endif
1246 for(x=0; x<width2; x++){
1247 b[2*x ]= temp[x ];
1248 b[2*x + 1]= temp[x+w2];
1249 }
1250 if(width&1)
1251 b[2*x ]= temp[x ];
1252}
1253
1254static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1255 int i;
1256
1257 for(i=0; i<width; i++){
1258 b1[i] += (b0[i] + b2[i])>>1;
1259 }
1260}
1261
1262static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1263 int i;
1264
1265 for(i=0; i<width; i++){
1266 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1267 }
1268}
1269
1270static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1271 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1272 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1273 cs->y = -1;
1274}
1275
1276static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1277 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1278 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1279 cs->y = -1;
1280}
1281
1282static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1283 int y= cs->y;
1284
1285 DWTELEM *b0= cs->b0;
1286 DWTELEM *b1= cs->b1;
1287 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1288 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1289
1290{START_TIMER
1291 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1292 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1293STOP_TIMER("vertical_compose53i*")}
1294
1295{START_TIMER
1296 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1297 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1298STOP_TIMER("horizontal_compose53i")}
1299
1300 cs->b0 = b2;
1301 cs->b1 = b3;
1302 cs->y += 2;
1303}
1304
1305static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1306 int y= cs->y;
1307 DWTELEM *b0= cs->b0;
1308 DWTELEM *b1= cs->b1;
1309 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1310 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1311
1312{START_TIMER
1313 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1314 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1315STOP_TIMER("vertical_compose53i*")}
1316
1317{START_TIMER
1318 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1319 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1320STOP_TIMER("horizontal_compose53i")}
1321
1322 cs->b0 = b2;
1323 cs->b1 = b3;
1324 cs->y += 2;
1325}
1326
1327static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1328 dwt_compose_t cs;
1329 spatial_compose53i_init(&cs, buffer, height, stride);
1330 while(cs.y <= height)
1331 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1332}
1333
1334
1335void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1336 DWTELEM temp[width];
1337 const int w2= (width+1)>>1;
1338
1339 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1340 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1341 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1342 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1343}
1344
1345static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1346 int i;
1347
1348 for(i=0; i<width; i++){
1349 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1350 }
1351}
1352
1353static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1354 int i;
1355
1356 for(i=0; i<width; i++){
1357#ifdef lift5
1358 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1359#else
1360 int r= 3*(b0[i] + b2[i]);
1361 r+= r>>4;
1362 r+= r>>8;
1363 b1[i] -= (r+W_CO)>>W_CS;
1364#endif
1365 }
1366}
1367
1368static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1369 int i;
1370
1371 for(i=0; i<width; i++){
1372#ifdef liftS
1373 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1374#else
1375 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1376#endif
1377 }
1378}
1379
1380static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1381 int i;
1382
1383 for(i=0; i<width; i++){
1384 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1385 }
1386}
1387
1388void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1389 int i;
1390
1391 for(i=0; i<width; i++){
1392#ifndef lift5
1393 int r;
1394#endif
1395 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1396#ifdef lift5
1397 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1398#else
1399 r= 3*(b2[i] + b4[i]);
1400 r+= r>>4;
1401 r+= r>>8;
1402 b3[i] -= (r+W_CO)>>W_CS;
1403#endif
1404#ifdef liftS
1405 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1406#else
1407 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1408#endif
1409 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1410 }
1411}
1412
1413static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1414 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1415 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1416 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1417 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1418 cs->y = -3;
1419}
1420
1421static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1422 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1423 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1424 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1425 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1426 cs->y = -3;
1427}
1428
1429static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1430 int y = cs->y;
1431
1432 DWTELEM *b0= cs->b0;
1433 DWTELEM *b1= cs->b1;
1434 DWTELEM *b2= cs->b2;
1435 DWTELEM *b3= cs->b3;
1436 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1437 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1438
1439{START_TIMER
1440 if(y>0 && y+4<height){
1441 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1442 }else{
1443 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1444 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1445 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1446 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1447 }
1448if(width>400){
1449STOP_TIMER("vertical_compose97i")}}
1450
1451{START_TIMER
1452 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1453 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1454if(width>400 && y+0<(unsigned)height){
1455STOP_TIMER("horizontal_compose97i")}}
1456
1457 cs->b0=b2;
1458 cs->b1=b3;
1459 cs->b2=b4;
1460 cs->b3=b5;
1461 cs->y += 2;
1462}
1463
1464static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1465 int y = cs->y;
1466 DWTELEM *b0= cs->b0;
1467 DWTELEM *b1= cs->b1;
1468 DWTELEM *b2= cs->b2;
1469 DWTELEM *b3= cs->b3;
1470 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1471 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1472
1473{START_TIMER
1474 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1475 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1476 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1477 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1478if(width>400){
1479STOP_TIMER("vertical_compose97i")}}
1480
1481{START_TIMER
1482 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1483 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1484if(width>400 && b0 <= b2){
1485STOP_TIMER("horizontal_compose97i")}}
1486
1487 cs->b0=b2;
1488 cs->b1=b3;
1489 cs->b2=b4;
1490 cs->b3=b5;
1491 cs->y += 2;
1492}
1493
1494static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1495 dwt_compose_t cs;
1496 spatial_compose97i_init(&cs, buffer, height, stride);
1497 while(cs.y <= height)
1498 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1499}
1500
1501static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1502 int level;
1503 for(level=decomposition_count-1; level>=0; level--){
1504 switch(type){
1505 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1506 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1507 /* not slicified yet */
1508 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1509 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1510 }
1511 }
1512}
1513
1514static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1515 int level;
1516 for(level=decomposition_count-1; level>=0; level--){
1517 switch(type){
1518 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1519 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1520 /* not slicified yet */
1521 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1522 }
1523 }
1524}
1525
1526static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1527 const int support = type==1 ? 3 : 5;
1528 int level;
1529 if(type==2) return;
1530
1531 for(level=decomposition_count-1; level>=0; level--){
1532 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1533 switch(type){
1534 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1535 break;
1536 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1537 break;
1538 case DWT_X: break;
1539 }
1540 }
1541 }
1542}
1543
1544static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1545 const int support = type==1 ? 3 : 5;
1546 int level;
1547 if(type==2) return;
1548
1549 for(level=decomposition_count-1; level>=0; level--){
1550 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1551 switch(type){
1552 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1553 break;
1554 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1555 break;
1556 case DWT_X: break;
1557 }
1558 }
1559 }
1560}
1561
1562static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1563 if(type==2){
1564 int level;
1565 for(level=decomposition_count-1; level>=0; level--)
1566 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1567 }else{
1568 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1569 int y;
1570 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1571 for(y=0; y<height; y+=4)
1572 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1573 }
1574}
1575
1576static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1577 const int w= b->width;
1578 const int h= b->height;
1579 int x, y;
1580
1581 if(1){
1582 int run=0;
1583 int runs[w*h];
1584 int run_index=0;
1585 int max_index;
1586
1587 for(y=0; y<h; y++){
1588 for(x=0; x<w; x++){
1589 int v, p=0;
1590 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1591 v= src[x + y*stride];
1592
1593 if(y){
1594 t= src[x + (y-1)*stride];
1595 if(x){
1596 lt= src[x - 1 + (y-1)*stride];
1597 }
1598 if(x + 1 < w){
1599 rt= src[x + 1 + (y-1)*stride];
1600 }
1601 }
1602 if(x){
1603 l= src[x - 1 + y*stride];
1604 /*if(x > 1){
1605 if(orientation==1) ll= src[y + (x-2)*stride];
1606 else ll= src[x - 2 + y*stride];
1607 }*/
1608 }
1609 if(parent){
1610 int px= x>>1;
1611 int py= y>>1;
1612 if(px<b->parent->width && py<b->parent->height)
1613 p= parent[px + py*2*stride];
1614 }
1615 if(!(/*ll|*/l|lt|t|rt|p)){
1616 if(v){
1617 runs[run_index++]= run;
1618 run=0;
1619 }else{
1620 run++;
1621 }
1622 }
1623 }
1624 }
1625 max_index= run_index;
1626 runs[run_index++]= run;
1627 run_index=0;
1628 run= runs[run_index++];
1629
1630 put_symbol2(&s->c, b->state[30], max_index, 0);
1631 if(run_index <= max_index)
1632 put_symbol2(&s->c, b->state[1], run, 3);
1633
1634 for(y=0; y<h; y++){
1635 if(s->c.bytestream_end - s->c.bytestream < w*40){
1636 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1637 return -1;
1638 }
1639 for(x=0; x<w; x++){
1640 int v, p=0;
1641 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1642 v= src[x + y*stride];
1643
1644 if(y){
1645 t= src[x + (y-1)*stride];
1646 if(x){
1647 lt= src[x - 1 + (y-1)*stride];
1648 }
1649 if(x + 1 < w){
1650 rt= src[x + 1 + (y-1)*stride];
1651 }
1652 }
1653 if(x){
1654 l= src[x - 1 + y*stride];
1655 /*if(x > 1){
1656 if(orientation==1) ll= src[y + (x-2)*stride];
1657 else ll= src[x - 2 + y*stride];
1658 }*/
1659 }
1660 if(parent){
1661 int px= x>>1;
1662 int py= y>>1;
1663 if(px<b->parent->width && py<b->parent->height)
1664 p= parent[px + py*2*stride];
1665 }
1666 if(/*ll|*/l|lt|t|rt|p){
1667 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1668
1669 put_rac(&s->c, &b->state[0][context], !!v);
1670 }else{
1671 if(!run){
1672 run= runs[run_index++];
1673
1674 if(run_index <= max_index)
1675 put_symbol2(&s->c, b->state[1], run, 3);
1676 assert(v);
1677 }else{
1678 run--;
1679 assert(!v);
1680 }
1681 }
1682 if(v){
1683 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1684 int l2= 2*ABS(l) + (l<0);
1685 int t2= 2*ABS(t) + (t<0);
1686
1687 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1688 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1689 }
1690 }
1691 }
1692 }
1693 return 0;
1694}
1695
1696static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1697// encode_subband_qtree(s, b, src, parent, stride, orientation);
1698// encode_subband_z0run(s, b, src, parent, stride, orientation);
1699 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1700// encode_subband_dzr(s, b, src, parent, stride, orientation);
1701}
1702
1703static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1704 const int w= b->width;
1705 const int h= b->height;
1706 int x,y;
1707
1708 if(1){
1709 int run, runs;
1710 x_and_coeff *xc= b->x_coeff;
1711 x_and_coeff *prev_xc= NULL;
1712 x_and_coeff *prev2_xc= xc;
1713 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1714 x_and_coeff *prev_parent_xc= parent_xc;
1715
1716 runs= get_symbol2(&s->c, b->state[30], 0);
1717 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1718 else run= INT_MAX;
1719
1720 for(y=0; y<h; y++){
1721 int v=0;
1722 int lt=0, t=0, rt=0;
1723
1724 if(y && prev_xc->x == 0){
1725 rt= prev_xc->coeff;
1726 }
1727 for(x=0; x<w; x++){
1728 int p=0;
1729 const int l= v;
1730
1731 lt= t; t= rt;
1732
1733 if(y){
1734 if(prev_xc->x <= x)
1735 prev_xc++;
1736 if(prev_xc->x == x + 1)
1737 rt= prev_xc->coeff;
1738 else
1739 rt=0;
1740 }
1741 if(parent_xc){
1742 if(x>>1 > parent_xc->x){
1743 parent_xc++;
1744 }
1745 if(x>>1 == parent_xc->x){
1746 p= parent_xc->coeff;
1747 }
1748 }
1749 if(/*ll|*/l|lt|t|rt|p){
1750 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1751
1752 v=get_rac(&s->c, &b->state[0][context]);
1753 if(v){
1754 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1755 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1756
1757 xc->x=x;
1758 (xc++)->coeff= v;
1759 }
1760 }else{
1761 if(!run){
1762 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1763 else run= INT_MAX;
1764 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1765 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1766
1767 xc->x=x;
1768 (xc++)->coeff= v;
1769 }else{
1770 int max_run;
1771 run--;
1772 v=0;
1773
1774 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1775 else max_run= FFMIN(run, w-x-1);
1776 if(parent_xc)
1777 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1778 x+= max_run;
1779 run-= max_run;
1780 }
1781 }
1782 }
1783 (xc++)->x= w+1; //end marker
1784 prev_xc= prev2_xc;
1785 prev2_xc= xc;
1786
1787 if(parent_xc){
1788 if(y&1){
1789 while(parent_xc->x != parent->width+1)
1790 parent_xc++;
1791 parent_xc++;
1792 prev_parent_xc= parent_xc;
1793 }else{
1794 parent_xc= prev_parent_xc;
1795 }
1796 }
1797 }
1798
1799 (xc++)->x= w+1; //end marker
1800 }
1801}
1802
1803static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1804 const int w= b->width;
1805 int y;
1806 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1807 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1808 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1809 int new_index = 0;
1810
1811 START_TIMER
1812
1813 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1814 qadd= 0;
1815 qmul= 1<<QEXPSHIFT;
1816 }
1817
1818 /* If we are on the second or later slice, restore our index. */
1819 if (start_y != 0)
1820 new_index = save_state[0];
1821
1822
1823 for(y=start_y; y<h; y++){
1824 int x = 0;
1825 int v;
1826 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1827 memset(line, 0, b->width*sizeof(DWTELEM));
1828 v = b->x_coeff[new_index].coeff;
1829 x = b->x_coeff[new_index++].x;
1830 while(x < w)
1831 {
1832 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1833 register int u= -(v&1);
1834 line[x] = (t^u) - u;
1835
1836 v = b->x_coeff[new_index].coeff;
1837 x = b->x_coeff[new_index++].x;
1838 }
1839 }
1840 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1841 STOP_TIMER("decode_subband")
1842 }
1843
1844 /* Save our variables for the next slice. */
1845 save_state[0] = new_index;
1846
1847 return;
1848}
1849
1850static void reset_contexts(SnowContext *s){
1851 int plane_index, level, orientation;
1852
1853 for(plane_index=0; plane_index<3; plane_index++){
1854 for(level=0; level<s->spatial_decomposition_count; level++){
1855 for(orientation=level ? 1:0; orientation<4; orientation++){
1856 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1857 }
1858 }
1859 }
1860 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1861 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1862}
1863
1864static int alloc_blocks(SnowContext *s){
1865 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1866 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1867
1868 s->b_width = w;
1869 s->b_height= h;
1870
1871 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1872 return 0;
1873}
1874
1875static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1876 uint8_t *bytestream= d->bytestream;
1877 uint8_t *bytestream_start= d->bytestream_start;
1878 *d= *s;
1879 d->bytestream= bytestream;
1880 d->bytestream_start= bytestream_start;
1881}
1882
1883//near copy & paste from dsputil, FIXME
1884static int pix_sum(uint8_t * pix, int line_size, int w)
1885{
1886 int s, i, j;
1887
1888 s = 0;
1889 for (i = 0; i < w; i++) {
1890 for (j = 0; j < w; j++) {
1891 s += pix[0];
1892 pix ++;
1893 }
1894 pix += line_size - w;
1895 }
1896 return s;
1897}
1898
1899//near copy & paste from dsputil, FIXME
1900static int pix_norm1(uint8_t * pix, int line_size, int w)
1901{
1902 int s, i, j;
1903 uint32_t *sq = squareTbl + 256;
1904
1905 s = 0;
1906 for (i = 0; i < w; i++) {
1907 for (j = 0; j < w; j ++) {
1908 s += sq[pix[0]];
1909 pix ++;
1910 }
1911 pix += line_size - w;
1912 }
1913 return s;
1914}
1915
1916static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1917 const int w= s->b_width << s->block_max_depth;
1918 const int rem_depth= s->block_max_depth - level;
1919 const int index= (x + y*w) << rem_depth;
1920 const int block_w= 1<<rem_depth;
1921 BlockNode block;
1922 int i,j;
1923
1924 block.color[0]= l;
1925 block.color[1]= cb;
1926 block.color[2]= cr;
1927 block.mx= mx;
1928 block.my= my;
1929 block.ref= ref;
1930 block.type= type;
1931 block.level= level;
1932
1933 for(j=0; j<block_w; j++){
1934 for(i=0; i<block_w; i++){
1935 s->block[index + i + j*w]= block;
1936 }
1937 }
1938}
1939
1940static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1941 const int offset[3]= {
1942 y*c-> stride + x,
1943 ((y*c->uvstride + x)>>1),
1944 ((y*c->uvstride + x)>>1),
1945 };
1946 int i;
1947 for(i=0; i<3; i++){
1948 c->src[0][i]= src [i];
1949 c->ref[0][i]= ref [i] + offset[i];
1950 }
1951 assert(!ref_index);
1952}
1953
1954static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1955 BlockNode *left, BlockNode *top, BlockNode *tr){
1956 if(s->ref_frames == 1){
1957 *mx = mid_pred(left->mx, top->mx, tr->mx);
1958 *my = mid_pred(left->my, top->my, tr->my);
1959 }else{
1960 const int *scale = scale_mv_ref[ref];
1961 *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
1962 top ->mx * scale[top ->ref] + 128 >>8,
1963 tr ->mx * scale[tr ->ref] + 128 >>8);
1964 *my = mid_pred(left->my * scale[left->ref] + 128 >>8,
1965 top ->my * scale[top ->ref] + 128 >>8,
1966 tr ->my * scale[tr ->ref] + 128 >>8);
1967 }
1968}
1969
1970//FIXME copy&paste
1971#define P_LEFT P[1]
1972#define P_TOP P[2]
1973#define P_TOPRIGHT P[3]
1974#define P_MEDIAN P[4]
1975#define P_MV1 P[9]
1976#define FLAG_QPEL 1 //must be 1
1977
1978static int encode_q_branch(SnowContext *s, int level, int x, int y){
1979 uint8_t p_buffer[1024];
1980 uint8_t i_buffer[1024];
1981 uint8_t p_state[sizeof(s->block_state)];
1982 uint8_t i_state[sizeof(s->block_state)];
1983 RangeCoder pc, ic;
1984 uint8_t *pbbak= s->c.bytestream;
1985 uint8_t *pbbak_start= s->c.bytestream_start;
1986 int score, score2, iscore, i_len, p_len, block_s, sum;
1987 const int w= s->b_width << s->block_max_depth;
1988 const int h= s->b_height << s->block_max_depth;
1989 const int rem_depth= s->block_max_depth - level;
1990 const int index= (x + y*w) << rem_depth;
1991 const int block_w= 1<<(LOG2_MB_SIZE - level);
1992 int trx= (x+1)<<rem_depth;
1993 int try= (y+1)<<rem_depth;
1994 BlockNode *left = x ? &s->block[index-1] : &null_block;
1995 BlockNode *top = y ? &s->block[index-w] : &null_block;
1996 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1997 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1998 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1999 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2000 int pl = left->color[0];
2001 int pcb= left->color[1];
2002 int pcr= left->color[2];
2003 int pmx, pmy;
2004 int mx=0, my=0;
2005 int l,cr,cb;
2006 const int stride= s->current_picture.linesize[0];
2007 const int uvstride= s->current_picture.linesize[1];
2008 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2009 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2010 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2011 int P[10][2];
2012 int16_t last_mv[3][2];
2013 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2014 const int shift= 1+qpel;
2015 MotionEstContext *c= &s->m.me;
2016 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2017 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2018 int my_context= av_log2(2*ABS(left->my - top->my));
2019 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2020 int ref, best_ref, ref_score, ref_mx, ref_my;
2021
2022 assert(sizeof(s->block_state) >= 256);
2023 if(s->keyframe){
2024 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2025 return 0;
2026 }
2027
2028// clip predictors / edge ?
2029
2030 P_LEFT[0]= left->mx;
2031 P_LEFT[1]= left->my;
2032 P_TOP [0]= top->mx;
2033 P_TOP [1]= top->my;
2034 P_TOPRIGHT[0]= tr->mx;
2035 P_TOPRIGHT[1]= tr->my;
2036
2037 last_mv[0][0]= s->block[index].mx;
2038 last_mv[0][1]= s->block[index].my;
2039 last_mv[1][0]= right->mx;
2040 last_mv[1][1]= right->my;
2041 last_mv[2][0]= bottom->mx;
2042 last_mv[2][1]= bottom->my;
2043
2044 s->m.mb_stride=2;
2045 s->m.mb_x=
2046 s->m.mb_y= 0;
2047 s->m.me.skip= 0;
2048
2049 assert(s->m.me. stride == stride);
2050 assert(s->m.me.uvstride == uvstride);
2051
2052 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2053 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2054 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2055 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2056
2057 c->xmin = - x*block_w - 16+2;
2058 c->ymin = - y*block_w - 16+2;
2059 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2060 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2061
2062 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2063 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2064 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2065 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2066 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2067 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2068 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2069
2070 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2071 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2072
2073 if (!y) {
2074 c->pred_x= P_LEFT[0];
2075 c->pred_y= P_LEFT[1];
2076 } else {
2077 c->pred_x = P_MEDIAN[0];
2078 c->pred_y = P_MEDIAN[1];
2079 }
2080
2081 score= INT_MAX;
2082 best_ref= 0;
2083 for(ref=0; ref<s->ref_frames; ref++){
2084 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2085
2086 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2087 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2088
2089 assert(ref_mx >= c->xmin);
2090 assert(ref_mx <= c->xmax);
2091 assert(ref_my >= c->ymin);
2092 assert(ref_my <= c->ymax);
2093
2094 ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2095 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2096 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2097 if(s->ref_mvs[ref]){
2098 s->ref_mvs[ref][index][0]= ref_mx;
2099 s->ref_mvs[ref][index][1]= ref_my;
2100 s->ref_scores[ref][index]= ref_score;
2101 }
2102 if(score > ref_score){
2103 score= ref_score;
2104 best_ref= ref;
2105 mx= ref_mx;
2106 my= ref_my;
2107 }
2108 }
2109 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2110
2111 // subpel search
2112 pc= s->c;
2113 pc.bytestream_start=
2114 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2115 memcpy(p_state, s->block_state, sizeof(s->block_state));
2116
2117 if(level!=s->block_max_depth)
2118 put_rac(&pc, &p_state[4 + s_context], 1);
2119 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2120 if(s->ref_frames > 1)
2121 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2122 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2123 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2124 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2125 p_len= pc.bytestream - pc.bytestream_start;
2126 score += (s->lambda2*(p_len*8
2127 + (pc.outstanding_count - s->c.outstanding_count)*8
2128 + (-av_log2(pc.range) + av_log2(s->c.range))
2129 ))>>FF_LAMBDA_SHIFT;
2130
2131 block_s= block_w*block_w;
2132 sum = pix_sum(current_data[0], stride, block_w);
2133 l= (sum + block_s/2)/block_s;
2134 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2135
2136 block_s= block_w*block_w>>2;
2137 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2138 cb= (sum + block_s/2)/block_s;
2139// iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2140 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2141 cr= (sum + block_s/2)/block_s;
2142// iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2143
2144 ic= s->c;
2145 ic.bytestream_start=
2146 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2147 memcpy(i_state, s->block_state, sizeof(s->block_state));
2148 if(level!=s->block_max_depth)
2149 put_rac(&ic, &i_state[4 + s_context], 1);
2150 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2151 put_symbol(&ic, &i_state[32], l-pl , 1);
2152 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2153 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2154 i_len= ic.bytestream - ic.bytestream_start;
2155 iscore += (s->lambda2*(i_len*8
2156 + (ic.outstanding_count - s->c.outstanding_count)*8
2157 + (-av_log2(ic.range) + av_log2(s->c.range))
2158 ))>>FF_LAMBDA_SHIFT;
2159
2160// assert(score==256*256*256*64-1);
2161 assert(iscore < 255*255*256 + s->lambda2*10);
2162 assert(iscore >= 0);
2163 assert(l>=0 && l<=255);
2164 assert(pl>=0 && pl<=255);
2165
2166 if(level==0){
2167 int varc= iscore >> 8;
2168 int vard= score >> 8;
2169 if (vard <= 64 || vard < varc)
2170 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2171 else
2172 c->scene_change_score+= s->m.qscale;
2173 }
2174
2175 if(level!=s->block_max_depth){
2176 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2177 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2178 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2179 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2180 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2181 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2182
2183 if(score2 < score && score2 < iscore)
2184 return score2;
2185 }
2186
2187 if(iscore < score){
2188 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2189 memcpy(pbbak, i_buffer, i_len);
2190 s->c= ic;
2191 s->c.bytestream_start= pbbak_start;
2192 s->c.bytestream= pbbak + i_len;
2193 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2194 memcpy(s->block_state, i_state, sizeof(s->block_state));
2195 return iscore;
2196 }else{
2197 memcpy(pbbak, p_buffer, p_len);
2198 s->c= pc;
2199 s->c.bytestream_start= pbbak_start;
2200 s->c.bytestream= pbbak + p_len;
2201 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2202 memcpy(s->block_state, p_state, sizeof(s->block_state));
2203 return score;
2204 }
2205}
2206
2207static always_inline int same_block(BlockNode *a, BlockNode *b){
2208 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2209 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2210 }else{
2211 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2212 }
2213}
2214
2215static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2216 const int w= s->b_width << s->block_max_depth;
2217 const int rem_depth= s->block_max_depth - level;
2218 const int index= (x + y*w) << rem_depth;
2219 int trx= (x+1)<<rem_depth;
2220 BlockNode *b= &s->block[index];
2221 BlockNode *left = x ? &s->block[index-1] : &null_block;
2222 BlockNode *top = y ? &s->block[index-w] : &null_block;
2223 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2224 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2225 int pl = left->color[0];
2226 int pcb= left->color[1];
2227 int pcr= left->color[2];
2228 int pmx, pmy;
2229 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2230 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref;
2231 int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref;
2232 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2233
2234 if(s->keyframe){
2235 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2236 return;
2237 }
2238
2239 if(level!=s->block_max_depth){
2240 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2241 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2242 }else{
2243 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2244 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2245 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2246 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2247 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2248 return;
2249 }
2250 }
2251 if(b->type & BLOCK_INTRA){
2252 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2253 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2254 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2255 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2256 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2257 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2258 }else{
2259 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2260 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2261 if(s->ref_frames > 1)
2262 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2263 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2264 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2265 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2266 }
2267}
2268
2269static void decode_q_branch(SnowContext *s, int level, int x, int y){
2270 const int w= s->b_width << s->block_max_depth;
2271 const int rem_depth= s->block_max_depth - level;
2272 const int index= (x + y*w) << rem_depth;
2273 int trx= (x+1)<<rem_depth;
2274 BlockNode *left = x ? &s->block[index-1] : &null_block;
2275 BlockNode *top = y ? &s->block[index-w] : &null_block;
2276 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2277 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2278 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2279
2280 if(s->keyframe){
2281 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2282 return;
2283 }
2284
2285 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2286 int type;
2287 int l = left->color[0];
2288 int cb= left->color[1];
2289 int cr= left->color[2];
2290 int mx= mid_pred(left->mx, top->mx, tr->mx);
2291 int my= mid_pred(left->my, top->my, tr->my);
2292 int ref = 0;
2293 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2294 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2295 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2296
2297 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2298
2299 if(type){
2300 pred_mv(s, &mx, &my, 0, left, top, tr);
2301 l += get_symbol(&s->c, &s->block_state[32], 1);
2302 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2303 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2304 }else{
2305 if(s->ref_frames > 1)
2306 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2307 pred_mv(s, &mx, &my, ref, left, top, tr);
2308 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2309 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2310 }
2311 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2312 }else{
2313 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2314 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2315 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2316 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2317 }
2318}
2319
2320static void encode_blocks(SnowContext *s){
2321 int x, y;
2322 int w= s->b_width;
2323 int h= s->b_height;
2324
2325 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2326 iterative_me(s);
2327
2328 for(y=0; y<h; y++){
2329 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2330 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2331 return;
2332 }
2333 for(x=0; x<w; x++){
2334 if(s->avctx->me_method == ME_ITER)
2335 encode_q_branch2(s, 0, x, y);
2336 else
2337 encode_q_branch (s, 0, x, y);
2338 }
2339 }
2340}
2341
2342static void decode_blocks(SnowContext *s){
2343 int x, y;
2344 int w= s->b_width;
2345 int h= s->b_height;
2346
2347 for(y=0; y<h; y++){
2348 for(x=0; x<w; x++){
2349 decode_q_branch(s, 0, x, y);
2350 }
2351 }
2352}
2353
2354static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2355 int x, y;
2356START_TIMER
2357 for(y=0; y < b_h+5; y++){
2358 for(x=0; x < b_w; x++){
2359 int a0= src[x ];
2360 int a1= src[x + 1];
2361 int a2= src[x + 2];
2362 int a3= src[x + 3];
2363 int a4= src[x + 4];
2364 int a5= src[x + 5];
2365// int am= 9*(a1+a2) - (a0+a3);
2366 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2367// int am= 18*(a2+a3) - 2*(a1+a4);
2368// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2369// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2370
2371// if(b_w==16) am= 8*(a1+a2);
2372
2373 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2374 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2375
2376 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2377 if(am&(~255)) am= ~(am>>31);
2378
2379 tmp[x] = am;
2380
2381/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2382 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2383 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2384 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2385 }
2386 tmp += stride;
2387 src += stride;
2388 }
2389 tmp -= (b_h+5)*stride;
2390
2391 for(y=0; y < b_h; y++){
2392 for(x=0; x < b_w; x++){
2393 int a0= tmp[x + 0*stride];
2394 int a1= tmp[x + 1*stride];
2395 int a2= tmp[x + 2*stride];
2396 int a3= tmp[x + 3*stride];
2397 int a4= tmp[x + 4*stride];
2398 int a5= tmp[x + 5*stride];
2399 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2400// int am= 18*(a2+a3) - 2*(a1+a4);
2401/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2402 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2403
2404// if(b_w==16) am= 8*(a1+a2);
2405
2406 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2407 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2408
2409 if(am&(~255)) am= ~(am>>31);
2410
2411 dst[x] = am;
2412/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2413 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2414 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2415 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2416 }
2417 dst += stride;
2418 tmp += stride;
2419 }
2420STOP_TIMER("mc_block")
2421}
2422
2423#define mca(dx,dy,b_w)\
2424static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2425 uint8_t tmp[stride*(b_w+5)];\
2426 assert(h==b_w);\
2427 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2428}
2429
2430mca( 0, 0,16)
2431mca( 8, 0,16)
2432mca( 0, 8,16)
2433mca( 8, 8,16)
2434mca( 0, 0,8)
2435mca( 8, 0,8)
2436mca( 0, 8,8)
2437mca( 8, 8,8)
2438
2439static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2440 if(block->type & BLOCK_INTRA){
2441 int x, y;
2442 const int color = block->color[plane_index];
2443 const int color4= color*0x01010101;
2444 if(b_w==32){
2445 for(y=0; y < b_h; y++){
2446 *(uint32_t*)&dst[0 + y*stride]= color4;
2447 *(uint32_t*)&dst[4 + y*stride]= color4;
2448 *(uint32_t*)&dst[8 + y*stride]= color4;
2449 *(uint32_t*)&dst[12+ y*stride]= color4;
2450 *(uint32_t*)&dst[16+ y*stride]= color4;
2451 *(uint32_t*)&dst[20+ y*stride]= color4;
2452 *(uint32_t*)&dst[24+ y*stride]= color4;
2453 *(uint32_t*)&dst[28+ y*stride]= color4;
2454 }
2455 }else if(b_w==16){
2456 for(y=0; y < b_h; y++){
2457 *(uint32_t*)&dst[0 + y*stride]= color4;
2458 *(uint32_t*)&dst[4 + y*stride]= color4;
2459 *(uint32_t*)&dst[8 + y*stride]= color4;
2460 *(uint32_t*)&dst[12+ y*stride]= color4;
2461 }
2462 }else if(b_w==8){
2463 for(y=0; y < b_h; y++){
2464 *(uint32_t*)&dst[0 + y*stride]= color4;
2465 *(uint32_t*)&dst[4 + y*stride]= color4;
2466 }
2467 }else if(b_w==4){
2468 for(y=0; y < b_h; y++){
2469 *(uint32_t*)&dst[0 + y*stride]= color4;
2470 }
2471 }else{
2472 for(y=0; y < b_h; y++){
2473 for(x=0; x < b_w; x++){
2474 dst[x + y*stride]= color;
2475 }
2476 }
2477 }
2478 }else{
2479 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2480 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2481 int mx= block->mx*scale;
2482 int my= block->my*scale;
2483 const int dx= mx&15;
2484 const int dy= my&15;
2485 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2486 sx += (mx>>4) - 2;
2487 sy += (my>>4) - 2;
2488 src += sx + sy*stride;
2489 if( (unsigned)sx >= w - b_w - 4
2490 || (unsigned)sy >= h - b_h - 4){
2491 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2492 src= tmp + MB_SIZE;
2493 }
2494// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2495// assert(!(b_w&(b_w-1)));
2496 assert(b_w>1 && b_h>1);
2497 assert(tab_index>=0 && tab_index<4 || b_w==32);
2498 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2499 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2500 else if(b_w==32){
2501 int y;
2502 for(y=0; y<b_h; y+=16){
2503 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2504 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2505 }
2506 }else if(b_w==b_h)
2507 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2508 else if(b_w==2*b_h){
2509 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2510 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2511 }else{
2512 assert(2*b_w==b_h);
2513 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2514 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2515 }
2516 }
2517}
2518
2519void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2520 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2521 int y, x;
2522 DWTELEM * dst;
2523 for(y=0; y<b_h; y++){
2524 //FIXME ugly missue of obmc_stride
2525 uint8_t *obmc1= obmc + y*obmc_stride;
2526 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2527 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2528 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2529 dst = slice_buffer_get_line(sb, src_y + y);
2530 for(x=0; x<b_w; x++){
2531 int v= obmc1[x] * block[3][x + y*src_stride]
2532 +obmc2[x] * block[2][x + y*src_stride]
2533 +obmc3[x] * block[1][x + y*src_stride]
2534 +obmc4[x] * block[0][x + y*src_stride];
2535
2536 v <<= 8 - LOG2_OBMC_MAX;
2537 if(FRAC_BITS != 8){
2538 v += 1<<(7 - FRAC_BITS);
2539 v >>= 8 - FRAC_BITS;
2540 }
2541 if(add){
2542 v += dst[x + src_x];
2543 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2544 if(v&(~255)) v= ~(v>>31);
2545 dst8[x + y*src_stride] = v;
2546 }else{
2547 dst[x + src_x] -= v;
2548 }
2549 }
2550 }
2551}
2552
2553//FIXME name clenup (b_w, block_w, b_width stuff)
2554static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2555 DWTELEM * dst = NULL;
2556 const int b_width = s->b_width << s->block_max_depth;
2557 const int b_height= s->b_height << s->block_max_depth;
2558 const int b_stride= b_width;
2559 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2560 BlockNode *rt= lt+1;
2561 BlockNode *lb= lt+b_stride;
2562 BlockNode *rb= lb+1;
2563 uint8_t *block[4];
2564 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2565 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2566 uint8_t *ptmp;
2567 int x,y;
2568
2569 if(b_x<0){
2570 lt= rt;
2571 lb= rb;
2572 }else if(b_x + 1 >= b_width){
2573 rt= lt;
2574 rb= lb;
2575 }
2576 if(b_y<0){
2577 lt= lb;
2578 rt= rb;
2579 }else if(b_y + 1 >= b_height){
2580 lb= lt;
2581 rb= rt;
2582 }
2583
2584 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2585 obmc -= src_x;
2586 b_w += src_x;
2587 src_x=0;
2588 }else if(src_x + b_w > w){
2589 b_w = w - src_x;
2590 }
2591 if(src_y<0){
2592 obmc -= src_y*obmc_stride;
2593 b_h += src_y;
2594 src_y=0;
2595 }else if(src_y + b_h> h){
2596 b_h = h - src_y;
2597 }
2598
2599 if(b_w<=0 || b_h<=0) return;
2600
2601assert(src_stride > 2*MB_SIZE + 5);
2602// old_dst += src_x + src_y*dst_stride;
2603 dst8+= src_x + src_y*src_stride;
2604// src += src_x + src_y*src_stride;
2605
2606 ptmp= tmp + 3*tmp_step;
2607 block[0]= ptmp;
2608 ptmp+=tmp_step;
2609 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2610
2611 if(same_block(lt, rt)){
2612 block[1]= block[0];
2613 }else{
2614 block[1]= ptmp;
2615 ptmp+=tmp_step;
2616 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2617 }
2618
2619 if(same_block(lt, lb)){
2620 block[2]= block[0];
2621 }else if(same_block(rt, lb)){
2622 block[2]= block[1];
2623 }else{
2624 block[2]= ptmp;
2625 ptmp+=tmp_step;
2626 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2627 }
2628
2629 if(same_block(lt, rb) ){
2630 block[3]= block[0];
2631 }else if(same_block(rt, rb)){
2632 block[3]= block[1];
2633 }else if(same_block(lb, rb)){
2634 block[3]= block[2];
2635 }else{
2636 block[3]= ptmp;
2637 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2638 }
2639#if 0
2640 for(y=0; y<b_h; y++){
2641 for(x=0; x<b_w; x++){
2642 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2643 if(add) dst[x + y*dst_stride] += v;
2644 else dst[x + y*dst_stride] -= v;
2645 }
2646 }
2647 for(y=0; y<b_h; y++){
2648 uint8_t *obmc2= obmc + (obmc_stride>>1);
2649 for(x=0; x<b_w; x++){
2650 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2651 if(add) dst[x + y*dst_stride] += v;
2652 else dst[x + y*dst_stride] -= v;
2653 }
2654 }
2655 for(y=0; y<b_h; y++){
2656 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2657 for(x=0; x<b_w; x++){
2658 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2659 if(add) dst[x + y*dst_stride] += v;
2660 else dst[x + y*dst_stride] -= v;
2661 }
2662 }
2663 for(y=0; y<b_h; y++){
2664 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2665 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2666 for(x=0; x<b_w; x++){
2667 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2668 if(add) dst[x + y*dst_stride] += v;
2669 else dst[x + y*dst_stride] -= v;
2670 }
2671 }
2672#else
2673{
2674
2675 START_TIMER
2676
2677 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2678 STOP_TIMER("Inner add y block")
2679}
2680#endif
2681}
2682
2683//FIXME name clenup (b_w, block_w, b_width stuff)
2684static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2685 const int b_width = s->b_width << s->block_max_depth;
2686 const int b_height= s->b_height << s->block_max_depth;
2687 const int b_stride= b_width;
2688 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2689 BlockNode *rt= lt+1;
2690 BlockNode *lb= lt+b_stride;
2691 BlockNode *rb= lb+1;
2692 uint8_t *block[4];
2693 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2694 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2695 uint8_t *ptmp;
2696 int x,y;
2697
2698 if(b_x<0){
2699 lt= rt;
2700 lb= rb;
2701 }else if(b_x + 1 >= b_width){
2702 rt= lt;
2703 rb= lb;
2704 }
2705 if(b_y<0){
2706 lt= lb;
2707 rt= rb;
2708 }else if(b_y + 1 >= b_height){
2709 lb= lt;
2710 rb= rt;
2711 }
2712
2713 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2714 obmc -= src_x;
2715 b_w += src_x;
2716 if(!offset_dst)
2717 dst -= src_x;
2718 src_x=0;
2719 }else if(src_x + b_w > w){
2720 b_w = w - src_x;
2721 }
2722 if(src_y<0){
2723 obmc -= src_y*obmc_stride;
2724 b_h += src_y;
2725 if(!offset_dst)
2726 dst -= src_y*dst_stride;
2727 src_y=0;
2728 }else if(src_y + b_h> h){
2729 b_h = h - src_y;
2730 }
2731
2732 if(b_w<=0 || b_h<=0) return;
2733
2734assert(src_stride > 2*MB_SIZE + 5);
2735 if(offset_dst)
2736 dst += src_x + src_y*dst_stride;
2737 dst8+= src_x + src_y*src_stride;
2738// src += src_x + src_y*src_stride;
2739
2740 ptmp= tmp + 3*tmp_step;
2741 block[0]= ptmp;
2742 ptmp+=tmp_step;
2743 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2744
2745 if(same_block(lt, rt)){
2746 block[1]= block[0];
2747 }else{
2748 block[1]= ptmp;
2749 ptmp+=tmp_step;
2750 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2751 }
2752
2753 if(same_block(lt, lb)){
2754 block[2]= block[0];
2755 }else if(same_block(rt, lb)){
2756 block[2]= block[1];
2757 }else{
2758 block[2]= ptmp;
2759 ptmp+=tmp_step;
2760 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2761 }
2762
2763 if(same_block(lt, rb) ){
2764 block[3]= block[0];
2765 }else if(same_block(rt, rb)){
2766 block[3]= block[1];
2767 }else if(same_block(lb, rb)){
2768 block[3]= block[2];
2769 }else{
2770 block[3]= ptmp;
2771 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2772 }
2773#if 0
2774 for(y=0; y<b_h; y++){
2775 for(x=0; x<b_w; x++){
2776 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2777 if(add) dst[x + y*dst_stride] += v;
2778 else dst[x + y*dst_stride] -= v;
2779 }
2780 }
2781 for(y=0; y<b_h; y++){
2782 uint8_t *obmc2= obmc + (obmc_stride>>1);
2783 for(x=0; x<b_w; x++){
2784 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2785 if(add) dst[x + y*dst_stride] += v;
2786 else dst[x + y*dst_stride] -= v;
2787 }
2788 }
2789 for(y=0; y<b_h; y++){
2790 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2791 for(x=0; x<b_w; x++){
2792 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2793 if(add) dst[x + y*dst_stride] += v;
2794 else dst[x + y*dst_stride] -= v;
2795 }
2796 }
2797 for(y=0; y<b_h; y++){
2798 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2799 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2800 for(x=0; x<b_w; x++){
2801 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2802 if(add) dst[x + y*dst_stride] += v;
2803 else dst[x + y*dst_stride] -= v;
2804 }
2805 }
2806#else
2807 for(y=0; y<b_h; y++){
2808 //FIXME ugly missue of obmc_stride
2809 uint8_t *obmc1= obmc + y*obmc_stride;
2810 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2811 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2812 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2813 for(x=0; x<b_w; x++){
2814 int v= obmc1[x] * block[3][x + y*src_stride]
2815 +obmc2[x] * block[2][x + y*src_stride]
2816 +obmc3[x] * block[1][x + y*src_stride]
2817 +obmc4[x] * block[0][x + y*src_stride];
2818
2819 v <<= 8 - LOG2_OBMC_MAX;
2820 if(FRAC_BITS != 8){
2821 v += 1<<(7 - FRAC_BITS);
2822 v >>= 8 - FRAC_BITS;
2823 }
2824 if(add){
2825 v += dst[x + y*dst_stride];
2826 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2827 if(v&(~255)) v= ~(v>>31);
2828 dst8[x + y*src_stride] = v;
2829 }else{
2830 dst[x + y*dst_stride] -= v;
2831 }
2832 }
2833 }
2834#endif
2835}
2836
2837static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2838 Plane *p= &s->plane[plane_index];
2839 const int mb_w= s->b_width << s->block_max_depth;
2840 const int mb_h= s->b_height << s->block_max_depth;
2841 int x, y, mb_x;
2842 int block_size = MB_SIZE >> s->block_max_depth;
2843 int block_w = plane_index ? block_size/2 : block_size;
2844 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2845 int obmc_stride= plane_index ? block_size : 2*block_size;
2846 int ref_stride= s->current_picture.linesize[plane_index];
2847 uint8_t *dst8= s->current_picture.data[plane_index];
2848 int w= p->width;
2849 int h= p->height;
2850 START_TIMER
2851
2852 if(s->keyframe || (s->avctx->debug&512)){
2853 if(mb_y==mb_h)
2854 return;
2855
2856 if(add){
2857 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2858 {
2859// DWTELEM * line = slice_buffer_get_line(sb, y);
2860 DWTELEM * line = sb->line[y];
2861 for(x=0; x<w; x++)
2862 {
2863// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2864 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2865 v >>= FRAC_BITS;
2866 if(v&(~255)) v= ~(v>>31);
2867 dst8[x + y*ref_stride]= v;
2868 }
2869 }
2870 }else{
2871 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2872 {
2873// DWTELEM * line = slice_buffer_get_line(sb, y);
2874 DWTELEM * line = sb->line[y];
2875 for(x=0; x<w; x++)
2876 {
2877 line[x] -= 128 << FRAC_BITS;
2878// buf[x + y*w]-= 128<<FRAC_BITS;
2879 }
2880 }
2881 }
2882
2883 return;
2884 }
2885
2886 for(mb_x=0; mb_x<=mb_w; mb_x++){
2887 START_TIMER
2888
2889 add_yblock_buffered(s, sb, old_buffer, dst8, obmc,
2890 block_w*mb_x - block_w/2,
2891 block_w*mb_y - block_w/2,
2892 block_w, block_w,
2893 w, h,
2894 w, ref_stride, obmc_stride,
2895 mb_x - 1, mb_y - 1,
2896 add, plane_index);
2897
2898 STOP_TIMER("add_yblock")
2899 }
2900
2901 STOP_TIMER("predict_slice")
2902}
2903
2904static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2905 Plane *p= &s->plane[plane_index];
2906 const int mb_w= s->b_width << s->block_max_depth;
2907 const int mb_h= s->b_height << s->block_max_depth;
2908 int x, y, mb_x;
2909 int block_size = MB_SIZE >> s->block_max_depth;
2910 int block_w = plane_index ? block_size/2 : block_size;
2911 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2912 const int obmc_stride= plane_index ? block_size : 2*block_size;
2913 int ref_stride= s->current_picture.linesize[plane_index];
2914 uint8_t *dst8= s->current_picture.data[plane_index];
2915 int w= p->width;
2916 int h= p->height;
2917 START_TIMER
2918
2919 if(s->keyframe || (s->avctx->debug&512)){
2920 if(mb_y==mb_h)
2921 return;
2922
2923 if(add){
2924 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2925 for(x=0; x<w; x++){
2926 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2927 v >>= FRAC_BITS;
2928 if(v&(~255)) v= ~(v>>31);
2929 dst8[x + y*ref_stride]= v;
2930 }
2931 }
2932 }else{
2933 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2934 for(x=0; x<w; x++){
2935 buf[x + y*w]-= 128<<FRAC_BITS;
2936 }
2937 }
2938 }
2939
2940 return;
2941 }
2942
2943 for(mb_x=0; mb_x<=mb_w; mb_x++){
2944 START_TIMER
2945
2946 add_yblock(s, buf, dst8, obmc,
2947 block_w*mb_x - block_w/2,
2948 block_w*mb_y - block_w/2,
2949 block_w, block_w,
2950 w, h,
2951 w, ref_stride, obmc_stride,
2952 mb_x - 1, mb_y - 1,
2953 add, 1, plane_index);
2954
2955 STOP_TIMER("add_yblock")
2956 }
2957
2958 STOP_TIMER("predict_slice")
2959}
2960
2961static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2962 const int mb_h= s->b_height << s->block_max_depth;
2963 int mb_y;
2964 for(mb_y=0; mb_y<=mb_h; mb_y++)
2965 predict_slice(s, buf, plane_index, add, mb_y);
2966}
2967
2968static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2969 int i, x2, y2;
2970 Plane *p= &s->plane[plane_index];
2971 const int block_size = MB_SIZE >> s->block_max_depth;
2972 const int block_w = plane_index ? block_size/2 : block_size;
2973 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2974 const int obmc_stride= plane_index ? block_size : 2*block_size;
2975 const int ref_stride= s->current_picture.linesize[plane_index];
2976 uint8_t *src= s-> input_picture.data[plane_index];
2977 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2978 const int b_stride = s->b_width << s->block_max_depth;
2979 const int w= p->width;
2980 const int h= p->height;
2981 int index= mb_x + mb_y*b_stride;
2982 BlockNode *b= &s->block[index];
2983 BlockNode backup= *b;
2984 int ab=0;
2985 int aa=0;
2986
2987 b->type|= BLOCK_INTRA;
2988 b->color[plane_index]= 0;
2989 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2990
2991 for(i=0; i<4; i++){
2992 int mb_x2= mb_x + (i &1) - 1;
2993 int mb_y2= mb_y + (i>>1) - 1;
2994 int x= block_w*mb_x2 + block_w/2;
2995 int y= block_w*mb_y2 + block_w/2;
2996
2997 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2998 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2999
3000 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3001 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3002 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3003 int obmc_v= obmc[index];
3004 int d;
3005 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3006 if(x<0) obmc_v += obmc[index + block_w];
3007 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3008 if(x+block_w>w) obmc_v += obmc[index - block_w];
3009 //FIXME precalc this or simplify it somehow else
3010
3011 d = -dst[index] + (1<<(FRAC_BITS-1));
3012 dst[index] = d;
3013 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3014 aa += obmc_v * obmc_v; //FIXME precalclate this
3015 }
3016 }
3017 }
3018 *b= backup;
3019
3020 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3021}
3022
3023static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3024 const int b_stride = s->b_width << s->block_max_depth;
3025 const int b_height = s->b_height<< s->block_max_depth;
3026 int index= x + y*b_stride;
3027 BlockNode *b = &s->block[index];
3028 BlockNode *left = x ? &s->block[index-1] : &null_block;
3029 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3030 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3031 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3032 int dmx, dmy;
3033// int mx_context= av_log2(2*ABS(left->mx - top->mx));
3034// int my_context= av_log2(2*ABS(left->my - top->my));
3035
3036 if(x<0 || x>=b_stride || y>=b_height)
3037 return 0;
3038/*
30391 0 0
304001X 1-2 1
3041001XX 3-6 2-3
30420001XXX 7-14 4-7
304300001XXXX 15-30 8-15
3044*/
3045//FIXME try accurate rate
3046//FIXME intra and inter predictors if surrounding blocks arent the same type
3047 if(b->type & BLOCK_INTRA){
3048 return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3049 + av_log2(2*ABS(left->color[1] - b->color[1]))
3050 + av_log2(2*ABS(left->color[2] - b->color[2])));
3051 }else{
3052 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
3053 dmx-= b->mx;
3054 dmy-= b->my;
3055 return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda
3056 + av_log2(2*ABS(dmy))
3057 + av_log2(2*b->ref));
3058 }
3059}
3060
3061static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3062 Plane *p= &s->plane[plane_index];
3063 const int block_size = MB_SIZE >> s->block_max_depth;
3064 const int block_w = plane_index ? block_size/2 : block_size;
3065 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3066 const int obmc_stride= plane_index ? block_size : 2*block_size;
3067 const int ref_stride= s->current_picture.linesize[plane_index];
3068 uint8_t *dst= s->current_picture.data[plane_index];
3069 uint8_t *src= s-> input_picture.data[plane_index];
3070 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3071 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3072 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
3073 const int b_stride = s->b_width << s->block_max_depth;
3074 const int b_height = s->b_height<< s->block_max_depth;
3075 const int w= p->width;
3076 const int h= p->height;
3077 int distortion;
3078 int rate= 0;
3079 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3080 int sx= block_w*mb_x - block_w/2;
3081 int sy= block_w*mb_y - block_w/2;
3082 int x0= FFMAX(0,-sx);
3083 int y0= FFMAX(0,-sy);
3084 int x1= FFMIN(block_w*2, w-sx);
3085 int y1= FFMIN(block_w*2, h-sy);
3086 int i,x,y;
3087
3088 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3089
3090 for(y=y0; y<y1; y++){
3091 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3092 const DWTELEM *pred1 = pred + y*obmc_stride;
3093 uint8_t *cur1 = cur + y*ref_stride;
3094 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3095 for(x=x0; x<x1; x++){
3096 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3097 v = (v + pred1[x]) >> FRAC_BITS;
3098 if(v&(~255)) v= ~(v>>31);
3099 dst1[x] = v;
3100 }
3101 }
3102
3103 /* copy the regions where obmc[] = (uint8_t)256 */
3104 if(LOG2_OBMC_MAX == 8
3105 && (mb_x == 0 || mb_x == b_stride-1)
3106 && (mb_y == 0 || mb_y == b_height-1)){
3107 if(mb_x == 0)
3108 x1 = block_w;
3109 else
3110 x0 = block_w;
3111 if(mb_y == 0)
3112 y1 = block_w;
3113 else
3114 y0 = block_w;
3115 for(y=y0; y<y1; y++)
3116 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3117 }
3118
3119 if(block_w==16){
3120 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3121 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3122 /* FIXME cmps overlap but don't cover the wavelet's whole support,
3123 * so improving the score of one block is not strictly guaranteed to
3124 * improve the score of the whole frame, so iterative motion est
3125 * doesn't always converge. */
3126 if(s->avctx->me_cmp == FF_CMP_W97)
3127 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3128 else if(s->avctx->me_cmp == FF_CMP_W53)
3129 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3130 else{
3131 distortion = 0;
3132 for(i=0; i<4; i++){
3133 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3134 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3135 }
3136 }
3137 }else{
3138 assert(block_w==8);
3139 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3140 }
3141
3142 if(plane_index==0){
3143 for(i=0; i<4; i++){
3144/* ..RRr
3145 * .RXx.
3146 * rxx..
3147 */
3148 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3149 }
3150 if(mb_x == b_stride-2)
3151 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3152 }
3153 return distortion + rate*penalty_factor;
3154}
3155
3156static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3157 int i, y2;
3158 Plane *p= &s->plane[plane_index];
3159 const int block_size = MB_SIZE >> s->block_max_depth;
3160 const int block_w = plane_index ? block_size/2 : block_size;
3161 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3162 const int obmc_stride= plane_index ? block_size : 2*block_size;
3163 const int ref_stride= s->current_picture.linesize[plane_index];
3164 uint8_t *dst= s->current_picture.data[plane_index];
3165 uint8_t *src= s-> input_picture.data[plane_index];
3166 const static DWTELEM zero_dst[4096]; //FIXME
3167 const int b_stride = s->b_width << s->block_max_depth;
3168 const int b_height = s->b_height<< s->block_max_depth;
3169 const int w= p->width;
3170 const int h= p->height;
3171 int distortion= 0;
3172 int rate= 0;
3173 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3174
3175 for(i=0; i<9; i++){
3176 int mb_x2= mb_x + (i%3) - 1;
3177 int mb_y2= mb_y + (i/3) - 1;
3178 int x= block_w*mb_x2 + block_w/2;
3179 int y= block_w*mb_y2 + block_w/2;
3180
3181 add_yblock(s, zero_dst, dst, obmc,
3182 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3183
3184 //FIXME find a cleaner/simpler way to skip the outside stuff
3185 for(y2= y; y2<0; y2++)
3186 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3187 for(y2= h; y2<y+block_w; y2++)
3188 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3189 if(x<0){
3190 for(y2= y; y2<y+block_w; y2++)
3191 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3192 }
3193 if(x+block_w > w){
3194 for(y2= y; y2<y+block_w; y2++)
3195 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3196 }
3197
3198 assert(block_w== 8 || block_w==16);
3199 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3200 }
3201
3202 if(plane_index==0){
3203 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3204 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3205
3206/* ..RRRr
3207 * .RXXx.
3208 * .RXXx.
3209 * rxxx.
3210 */
3211 if(merged)
3212 rate = get_block_bits(s, mb_x, mb_y, 2);
3213 for(i=merged?4:0; i<9; i++){
3214 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3215 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3216 }
3217 }
3218 return distortion + rate*penalty_factor;
3219}
3220
3221static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3222 const int b_stride= s->b_width << s->block_max_depth;
3223 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3224 BlockNode backup= *block;
3225 int rd, index, value;
3226
3227 assert(mb_x>=0 && mb_y>=0);
3228 assert(mb_x<b_stride);
3229
3230 if(intra){
3231 block->color[0] = p[0];
3232 block->color[1] = p[1];
3233 block->color[2] = p[2];
3234 block->type |= BLOCK_INTRA;
3235 }else{
3236 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3237 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3238 if(s->me_cache[index] == value)
3239 return 0;
3240 s->me_cache[index]= value;
3241
3242 block->mx= p[0];
3243 block->my= p[1];
3244 block->type &= ~BLOCK_INTRA;
3245 }
3246
3247 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3248
3249//FIXME chroma
3250 if(rd < *best_rd){
3251 *best_rd= rd;
3252 return 1;
3253 }else{
3254 *block= backup;
3255 return 0;
3256 }
3257}
3258
3259/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3260static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3261 int p[2] = {p0, p1};
3262 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3263}
3264
3265static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3266 const int b_stride= s->b_width << s->block_max_depth;
3267 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3268 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3269 int rd, index, value;
3270
3271 assert(mb_x>=0 && mb_y>=0);
3272 assert(mb_x<b_stride);
3273 assert(((mb_x|mb_y)&1) == 0);
3274
3275 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3276 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3277 if(s->me_cache[index] == value)
3278 return 0;
3279 s->me_cache[index]= value;
3280
3281 block->mx= p0;
3282 block->my= p1;
3283 block->ref= ref;
3284 block->type &= ~BLOCK_INTRA;
3285 block[1]= block[b_stride]= block[b_stride+1]= *block;
3286
3287 rd= get_4block_rd(s, mb_x, mb_y, 0);
3288
3289//FIXME chroma
3290 if(rd < *best_rd){
3291 *best_rd= rd;
3292 return 1;
3293 }else{
3294 block[0]= backup[0];
3295 block[1]= backup[1];
3296 block[b_stride]= backup[2];
3297 block[b_stride+1]= backup[3];
3298 return 0;
3299 }
3300}
3301
3302static void iterative_me(SnowContext *s){
3303 int pass, mb_x, mb_y;
3304 const int b_width = s->b_width << s->block_max_depth;
3305 const int b_height= s->b_height << s->block_max_depth;
3306 const int b_stride= b_width;
3307 int color[3];
3308
3309 {
3310 RangeCoder r = s->c;
3311 uint8_t state[sizeof(s->block_state)];
3312 memcpy(state, s->block_state, sizeof(s->block_state));
3313 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3314 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3315 encode_q_branch(s, 0, mb_x, mb_y);
3316 s->c = r;
3317 memcpy(s->block_state, state, sizeof(s->block_state));
3318 }
3319
3320 for(pass=0; pass<25; pass++){
3321 int change= 0;
3322
3323 for(mb_y= 0; mb_y<b_height; mb_y++){
3324 for(mb_x= 0; mb_x<b_width; mb_x++){
3325 int dia_change, i, j, ref;
3326 int best_rd= INT_MAX, ref_rd;
3327 BlockNode backup, ref_b;
3328 const int index= mb_x + mb_y * b_stride;
3329 BlockNode *block= &s->block[index];
3330 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3331 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3332 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3333 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3334 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3335 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3336 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3337 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3338 const int b_w= (MB_SIZE >> s->block_max_depth);
3339 uint8_t obmc_edged[b_w*2][b_w*2];
3340
3341 if(pass && (block->type & BLOCK_OPT))
3342 continue;
3343 block->type |= BLOCK_OPT;
3344
3345 backup= *block;
3346
3347 if(!s->me_cache_generation)
3348 memset(s->me_cache, 0, sizeof(s->me_cache));
3349 s->me_cache_generation += 1<<22;
3350
3351 //FIXME precalc
3352 {
3353 int x, y;
3354 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3355 if(mb_x==0)
3356 for(y=0; y<b_w*2; y++)
3357 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3358 if(mb_x==b_stride-1)
3359 for(y=0; y<b_w*2; y++)
3360 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3361 if(mb_y==0){
3362 for(x=0; x<b_w*2; x++)
3363 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3364 for(y=1; y<b_w; y++)
3365 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3366 }
3367 if(mb_y==b_height-1){
3368 for(x=0; x<b_w*2; x++)
3369 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3370 for(y=b_w; y<b_w*2-1; y++)
3371 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3372 }
3373 }
3374
3375 //skip stuff outside the picture
3376 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3377 {
3378 uint8_t *src= s-> input_picture.data[0];
3379 uint8_t *dst= s->current_picture.data[0];
3380 const int stride= s->current_picture.linesize[0];
3381 const int block_w= MB_SIZE >> s->block_max_depth;
3382 const int sx= block_w*mb_x - block_w/2;
3383 const int sy= block_w*mb_y - block_w/2;
3384 const int w= s->plane[0].width;
3385 const int h= s->plane[0].height;
3386 int y;
3387
3388 for(y=sy; y<0; y++)
3389 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3390 for(y=h; y<sy+block_w*2; y++)
3391 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3392 if(sx<0){
3393 for(y=sy; y<sy+block_w*2; y++)
3394 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3395 }
3396 if(sx+block_w*2 > w){
3397 for(y=sy; y<sy+block_w*2; y++)
3398 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3399 }
3400 }
3401
3402 // intra(black) = neighbors' contribution to the current block
3403 for(i=0; i<3; i++)
3404 color[i]= get_dc(s, mb_x, mb_y, i);
3405
3406 // get previous score (cant be cached due to OBMC)
3407 if(pass > 0 && (block->type&BLOCK_INTRA)){
3408 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3409 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3410 }else
3411 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3412
3413 ref_b= *block;
3414 ref_rd= best_rd;
3415 for(ref=0; ref < s->ref_frames; ref++){
3416 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3417 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3418 continue;
3419 block->ref= ref;
3420 best_rd= INT_MAX;
3421
3422 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3423 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3424 if(tb)
3425 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3426 if(lb)
3427 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3428 if(rb)
3429 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3430 if(bb)
3431 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3432
3433 /* fullpel ME */
3434 //FIXME avoid subpel interpol / round to nearest integer
3435 do{
3436 dia_change=0;
3437 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3438 for(j=0; j<i; j++){
3439 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3440 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3441 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3442 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3443 }
3444 }
3445 }while(dia_change);
3446 /* subpel ME */
3447 do{
3448 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3449 dia_change=0;
3450 for(i=0; i<8; i++)
3451 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3452 }while(dia_change);
3453 //FIXME or try the standard 2 pass qpel or similar
3454
3455 mvr[0][0]= block->mx;
3456 mvr[0][1]= block->my;
3457 if(ref_rd > best_rd){
3458 ref_rd= best_rd;
3459 ref_b= *block;
3460 }
3461 }
3462 best_rd= ref_rd;
3463 *block= ref_b;
3464#if 1
3465 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3466 //FIXME RD style color selection
3467#endif
3468 if(!same_block(block, &backup)){
3469 if(tb ) tb ->type &= ~BLOCK_OPT;
3470 if(lb ) lb ->type &= ~BLOCK_OPT;
3471 if(rb ) rb ->type &= ~BLOCK_OPT;
3472 if(bb ) bb ->type &= ~BLOCK_OPT;
3473 if(tlb) tlb->type &= ~BLOCK_OPT;
3474 if(trb) trb->type &= ~BLOCK_OPT;
3475 if(blb) blb->type &= ~BLOCK_OPT;
3476 if(brb) brb->type &= ~BLOCK_OPT;
3477 change ++;
3478 }
3479 }
3480 }
3481 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3482 if(!change)
3483 break;
3484 }
3485
3486 if(s->block_max_depth == 1){
3487 int change= 0;
3488 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3489 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3490 int i;
3491 int best_rd, init_rd;
3492 const int index= mb_x + mb_y * b_stride;
3493 BlockNode *b[4];
3494
3495 b[0]= &s->block[index];
3496 b[1]= b[0]+1;
3497 b[2]= b[0]+b_stride;
3498 b[3]= b[2]+1;
3499 if(same_block(b[0], b[1]) &&
3500 same_block(b[0], b[2]) &&
3501 same_block(b[0], b[3]))
3502 continue;
3503
3504 if(!s->me_cache_generation)
3505 memset(s->me_cache, 0, sizeof(s->me_cache));
3506 s->me_cache_generation += 1<<22;
3507
3508 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3509
3510 //FIXME more multiref search?
3511 check_4block_inter(s, mb_x, mb_y,
3512 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3513 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3514
3515 for(i=0; i<4; i++)
3516 if(!(b[i]->type&BLOCK_INTRA))
3517 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3518
3519 if(init_rd != best_rd)
3520 change++;
3521 }
3522 }
3523 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3524 }
3525}
3526
3527static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3528 const int level= b->level;
3529 const int w= b->width;
3530 const int h= b->height;
3531 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3532 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3533 int x,y, thres1, thres2;
3534// START_TIMER
3535
3536 if(s->qlog == LOSSLESS_QLOG) return;
3537
3538 bias= bias ? 0 : (3*qmul)>>3;
3539 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3540 thres2= 2*thres1;
3541
3542 if(!bias){
3543 for(y=0; y<h; y++){
3544 for(x=0; x<w; x++){
3545 int i= src[x + y*stride];
3546
3547 if((unsigned)(i+thres1) > thres2){
3548 if(i>=0){
3549 i<<= QEXPSHIFT;
3550 i/= qmul; //FIXME optimize
3551 src[x + y*stride]= i;
3552 }else{
3553 i= -i;
3554 i<<= QEXPSHIFT;
3555 i/= qmul; //FIXME optimize
3556 src[x + y*stride]= -i;
3557 }
3558 }else
3559 src[x + y*stride]= 0;
3560 }
3561 }
3562 }else{
3563 for(y=0; y<h; y++){
3564 for(x=0; x<w; x++){
3565 int i= src[x + y*stride];
3566
3567 if((unsigned)(i+thres1) > thres2){
3568 if(i>=0){
3569 i<<= QEXPSHIFT;
3570 i= (i + bias) / qmul; //FIXME optimize
3571 src[x + y*stride]= i;
3572 }else{
3573 i= -i;
3574 i<<= QEXPSHIFT;
3575 i= (i + bias) / qmul; //FIXME optimize
3576 src[x + y*stride]= -i;
3577 }
3578 }else
3579 src[x + y*stride]= 0;
3580 }
3581 }
3582 }
3583 if(level+1 == s->spatial_decomposition_count){
3584// STOP_TIMER("quantize")
3585 }
3586}
3587
3588static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3589 const int w= b->width;
3590 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3591 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3592 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3593 int x,y;
3594 START_TIMER
3595
3596 if(s->qlog == LOSSLESS_QLOG) return;
3597
3598 for(y=start_y; y<end_y; y++){
3599// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3600 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3601 for(x=0; x<w; x++){
3602 int i= line[x];
3603 if(i<0){
3604 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3605 }else if(i>0){
3606 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3607 }
3608 }
3609 }
3610 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3611 STOP_TIMER("dquant")
3612 }
3613}
3614
3615static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3616 const int w= b->width;
3617 const int h= b->height;
3618 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3619 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3620 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3621 int x,y;
3622 START_TIMER
3623
3624 if(s->qlog == LOSSLESS_QLOG) return;
3625
3626 for(y=0; y<h; y++){
3627 for(x=0; x<w; x++){
3628 int i= src[x + y*stride];
3629 if(i<0){
3630 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3631 }else if(i>0){
3632 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3633 }
3634 }
3635 }
3636 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3637 STOP_TIMER("dquant")
3638 }
3639}
3640
3641static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3642 const int w= b->width;
3643 const int h= b->height;
3644 int x,y;
3645
3646 for(y=h-1; y>=0; y--){
3647 for(x=w-1; x>=0; x--){
3648 int i= x + y*stride;
3649
3650 if(x){
3651 if(use_median){
3652 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3653 else src[i] -= src[i - 1];
3654 }else{
3655 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3656 else src[i] -= src[i - 1];
3657 }
3658 }else{
3659 if(y) src[i] -= src[i - stride];
3660 }
3661 }
3662 }
3663}
3664
3665static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3666 const int w= b->width;
3667 int x,y;
3668
3669// START_TIMER
3670
3671 DWTELEM * line;
3672 DWTELEM * prev;
3673
3674 if (start_y != 0)
3675 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3676
3677 for(y=start_y; y<end_y; y++){
3678 prev = line;
3679// line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3680 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3681 for(x=0; x<w; x++){
3682 if(x){
3683 if(use_median){
3684 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3685 else line[x] += line[x - 1];
3686 }else{
3687 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3688 else line[x] += line[x - 1];
3689 }
3690 }else{
3691 if(y) line[x] += prev[x];
3692 }
3693 }
3694 }
3695
3696// STOP_TIMER("correlate")
3697}
3698
3699static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3700 const int w= b->width;
3701 const int h= b->height;
3702 int x,y;
3703
3704 for(y=0; y<h; y++){
3705 for(x=0; x<w; x++){
3706 int i= x + y*stride;
3707
3708 if(x){
3709 if(use_median){
3710 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3711 else src[i] += src[i - 1];
3712 }else{
3713 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3714 else src[i] += src[i - 1];
3715 }
3716 }else{
3717 if(y) src[i] += src[i - stride];
3718 }
3719 }
3720 }
3721}
3722
3723static void encode_header(SnowContext *s){
3724 int plane_index, level, orientation;
3725 uint8_t kstate[32];
3726
3727 memset(kstate, MID_STATE, sizeof(kstate));
3728
3729 put_rac(&s->c, kstate, s->keyframe);
3730 if(s->keyframe || s->always_reset)
3731 reset_contexts(s);
3732 if(s->keyframe){
3733 put_symbol(&s->c, s->header_state, s->version, 0);
3734 put_rac(&s->c, s->header_state, s->always_reset);
3735 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3736 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3737 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3738 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3739 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3740 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3741 put_rac(&s->c, s->header_state, s->spatial_scalability);
3742// put_rac(&s->c, s->header_state, s->rate_scalability);
3743 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3744
3745 for(plane_index=0; plane_index<2; plane_index++){
3746 for(level=0; level<s->spatial_decomposition_count; level++){
3747 for(orientation=level ? 1:0; orientation<4; orientation++){
3748 if(orientation==2) continue;
3749 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3750 }
3751 }
3752 }
3753 }
3754 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3755 put_symbol(&s->c, s->header_state, s->qlog, 1);
3756 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3757 put_symbol(&s->c, s->header_state, s->qbias, 1);
3758 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3759}
3760
3761static int decode_header(SnowContext *s){
3762 int plane_index, level, orientation;
3763 uint8_t kstate[32];
3764
3765 memset(kstate, MID_STATE, sizeof(kstate));
3766
3767 s->keyframe= get_rac(&s->c, kstate);
3768 if(s->keyframe || s->always_reset)
3769 reset_contexts(s);
3770 if(s->keyframe){
3771 s->version= get_symbol(&s->c, s->header_state, 0);
3772 if(s->version>0){
3773 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3774 return -1;
3775 }
3776 s->always_reset= get_rac(&s->c, s->header_state);
3777 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3778 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3779 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3780 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3781 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3782 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3783 s->spatial_scalability= get_rac(&s->c, s->header_state);
3784// s->rate_scalability= get_rac(&s->c, s->header_state);
3785 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3786
3787 for(plane_index=0; plane_index<3; plane_index++){
3788 for(level=0; level<s->spatial_decomposition_count; level++){
3789 for(orientation=level ? 1:0; orientation<4; orientation++){
3790 int q;
3791 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3792 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3793 else q= get_symbol(&s->c, s->header_state, 1);
3794 s->plane[plane_index].band[level][orientation].qlog= q;
3795 }
3796 }
3797 }
3798 }
3799
3800 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3801 if(s->spatial_decomposition_type > 2){
3802 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3803 return -1;
3804 }
3805
3806 s->qlog= get_symbol(&s->c, s->header_state, 1);
3807 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3808 s->qbias= get_symbol(&s->c, s->header_state, 1);
3809 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3810 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3811 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3812 s->block_max_depth= 0;
3813 return -1;
3814 }
3815
3816 return 0;
3817}
3818
3819static void init_qexp(void){
3820 int i;
3821 double v=128;
3822
3823 for(i=0; i<QROOT; i++){
3824 qexp[i]= lrintf(v);
3825 v *= pow(2, 1.0 / QROOT);
3826 }
3827}
3828
3829static int common_init(AVCodecContext *avctx){
3830 SnowContext *s = avctx->priv_data;
3831 int width, height;
3832 int level, orientation, plane_index, dec;
3833 int i, j;
3834
3835 s->avctx= avctx;
3836
3837 dsputil_init(&s->dsp, avctx);
3838
3839#define mcf(dx,dy)\
3840 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3841 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3842 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3843 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3844 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3845 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3846
3847 mcf( 0, 0)
3848 mcf( 4, 0)
3849 mcf( 8, 0)
3850 mcf(12, 0)
3851 mcf( 0, 4)
3852 mcf( 4, 4)
3853 mcf( 8, 4)
3854 mcf(12, 4)
3855 mcf( 0, 8)
3856 mcf( 4, 8)
3857 mcf( 8, 8)
3858 mcf(12, 8)
3859 mcf( 0,12)
3860 mcf( 4,12)
3861 mcf( 8,12)
3862 mcf(12,12)
3863
3864#define mcfh(dx,dy)\
3865 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3866 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3867 mc_block_hpel ## dx ## dy ## 16;\
3868 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3869 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3870 mc_block_hpel ## dx ## dy ## 8;
3871
3872 mcfh(0, 0)
3873 mcfh(8, 0)
3874 mcfh(0, 8)
3875 mcfh(8, 8)
3876
3877 if(!qexp[0])
3878 init_qexp();
3879
3880 dec= s->spatial_decomposition_count= 5;
3881 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3882
3883 s->chroma_h_shift= 1; //FIXME XXX
3884 s->chroma_v_shift= 1;
3885
3886// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3887
3888 width= s->avctx->width;
3889 height= s->avctx->height;
3890
3891 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3892
3893 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3894 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3895
3896 for(plane_index=0; plane_index<3; plane_index++){
3897 int w= s->avctx->width;
3898 int h= s->avctx->height;
3899
3900 if(plane_index){
3901 w>>= s->chroma_h_shift;
3902 h>>= s->chroma_v_shift;
3903 }
3904 s->plane[plane_index].width = w;
3905 s->plane[plane_index].height= h;
3906//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3907 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3908 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3909 SubBand *b= &s->plane[plane_index].band[level][orientation];
3910
3911 b->buf= s->spatial_dwt_buffer;
3912 b->level= level;
3913 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3914 b->width = (w + !(orientation&1))>>1;
3915 b->height= (h + !(orientation>1))>>1;
3916
3917 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3918 b->buf_x_offset = 0;
3919 b->buf_y_offset = 0;
3920
3921 if(orientation&1){
3922 b->buf += (w+1)>>1;
3923 b->buf_x_offset = (w+1)>>1;
3924 }
3925 if(orientation>1){
3926 b->buf += b->stride>>1;
3927 b->buf_y_offset = b->stride_line >> 1;
3928 }
3929
3930 if(level)
3931 b->parent= &s->plane[plane_index].band[level-1][orientation];
3932 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3933 }
3934 w= (w+1)>>1;
3935 h= (h+1)>>1;
3936 }
3937 }
3938
3939 for(i=0; i<MAX_REF_FRAMES; i++)
3940 for(j=0; j<MAX_REF_FRAMES; j++)
3941 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3942
3943 reset_contexts(s);
3944/*
3945 width= s->width= avctx->width;
3946 height= s->height= avctx->height;
3947
3948 assert(width && height);
3949*/
3950 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3951
3952 return 0;
3953}
3954
3955static int qscale2qlog(int qscale){
3956 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3957 + 61*QROOT/8; //<64 >60
3958}
3959
3960static void ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3961{
3962 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3963 * FIXME we know exact mv bits at this point,
3964 * but ratecontrol isn't set up to include them. */
3965 uint32_t coef_sum= 0;
3966 int level, orientation;
3967
3968 for(level=0; level<s->spatial_decomposition_count; level++){
3969 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3970 SubBand *b= &s->plane[0].band[level][orientation];
3971 DWTELEM *buf= b->buf;
3972 const int w= b->width;
3973 const int h= b->height;
3974 const int stride= b->stride;
3975 const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
3976 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3977 const int qdiv= (1<<16)/qmul;
3978 int x, y;
3979 if(orientation==0)
3980 decorrelate(s, b, buf, stride, 1, 0);
3981 for(y=0; y<h; y++)
3982 for(x=0; x<w; x++)
3983 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3984 if(orientation==0)
3985 correlate(s, b, buf, stride, 1, 0);
3986 }
3987 }
3988
3989 /* ugly, ratecontrol just takes a sqrt again */
3990 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3991 assert(coef_sum < INT_MAX);
3992
3993 if(pict->pict_type == I_TYPE){
3994 s->m.current_picture.mb_var_sum= coef_sum;
3995 s->m.current_picture.mc_mb_var_sum= 0;
3996 }else{
3997 s->m.current_picture.mc_mb_var_sum= coef_sum;
3998 s->m.current_picture.mb_var_sum= 0;
3999 }
4000
4001 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
4002 s->lambda= pict->quality * 3/2;
4003 s->qlog= qscale2qlog(pict->quality);
4004}
4005
4006static void calculate_vissual_weight(SnowContext *s, Plane *p){
4007 int width = p->width;
4008 int height= p->height;
4009 int level, orientation, x, y;
4010
4011 for(level=0; level<s->spatial_decomposition_count; level++){
4012 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4013 SubBand *b= &p->band[level][orientation];
4014 DWTELEM *buf= b->buf;
4015 int64_t error=0;
4016
4017 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
4018 buf[b->width/2 + b->height/2*b->stride]= 256*256;
4019 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
4020 for(y=0; y<height; y++){
4021 for(x=0; x<width; x++){
4022 int64_t d= s->spatial_dwt_buffer[x + y*width];
4023 error += d*d;
4024 }
4025 }
4026
4027 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
4028// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
4029 }
4030 }
4031}
4032
4033static int encode_init(AVCodecContext *avctx)
4034{
4035 SnowContext *s = avctx->priv_data;
4036 int plane_index;
4037
4038 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
4039 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
4040 "use vstrict=-2 / -strict -2 to use it anyway\n");
4041 return -1;
4042 }
4043
4044 if(avctx->prediction_method == DWT_97
4045 && (avctx->flags & CODEC_FLAG_QSCALE)
4046 && avctx->global_quality == 0){
4047 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
4048 return -1;
4049 }
4050
4051 common_init(avctx);
4052 alloc_blocks(s);
4053
4054 s->version=0;
4055
4056 s->m.avctx = avctx;
4057 s->m.flags = avctx->flags;
4058 s->m.bit_rate= avctx->bit_rate;
4059
4060 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4061 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4062 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4063 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4064 h263_encode_init(&s->m); //mv_penalty
4065
4066 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4067
4068 if(avctx->flags&CODEC_FLAG_PASS1){
4069 if(!avctx->stats_out)
4070 avctx->stats_out = av_mallocz(256);
4071 }
4072 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4073 if(ff_rate_control_init(&s->m) < 0)
4074 return -1;
4075 }
4076 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4077
4078 for(plane_index=0; plane_index<3; plane_index++){
4079 calculate_vissual_weight(s, &s->plane[plane_index]);
4080 }
4081
4082
4083 avctx->coded_frame= &s->current_picture;
4084 switch(avctx->pix_fmt){
4085// case PIX_FMT_YUV444P:
4086// case PIX_FMT_YUV422P:
4087 case PIX_FMT_YUV420P:
4088 case PIX_FMT_GRAY8:
4089// case PIX_FMT_YUV411P:
4090// case PIX_FMT_YUV410P:
4091 s->colorspace_type= 0;
4092 break;
4093/* case PIX_FMT_RGBA32:
4094 s->colorspace= 1;
4095 break;*/
4096 default:
4097 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
4098 return -1;
4099 }
4100// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4101 s->chroma_h_shift= 1;
4102 s->chroma_v_shift= 1;
4103
4104 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4105 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4106
4107 s->avctx->get_buffer(s->avctx, &s->input_picture);
4108
4109 if(s->avctx->me_method == ME_ITER){
4110 int i;
4111 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4112 for(i=0; i<s->max_ref_frames; i++){
4113 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4114 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4115 }
4116 }
4117
4118 return 0;
4119}
4120
4121static int frame_start(SnowContext *s){
4122 AVFrame tmp;
4123 int w= s->avctx->width; //FIXME round up to x16 ?
4124 int h= s->avctx->height;
4125
4126 if(s->current_picture.data[0]){
4127 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4128 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4129 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4130 }
4131
4132 tmp= s->last_picture[s->max_ref_frames-1];
4133 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4134 s->last_picture[0]= s->current_picture;
4135 s->current_picture= tmp;
4136
4137 if(s->keyframe){
4138 s->ref_frames= 0;
4139 }else{
4140 int i;
4141 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4142 if(i && s->last_picture[i-1].key_frame)
4143 break;
4144 s->ref_frames= i;
4145 }
4146
4147 s->current_picture.reference= 1;
4148 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4149 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4150 return -1;
4151 }
4152
4153 s->current_picture.key_frame= s->keyframe;
4154
4155 return 0;
4156}
4157
4158static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4159 SnowContext *s = avctx->priv_data;
4160 RangeCoder * const c= &s->c;
4161 AVFrame *pict = data;
4162 const int width= s->avctx->width;
4163 const int height= s->avctx->height;
4164 int level, orientation, plane_index, i, y;
4165
4166 ff_init_range_encoder(c, buf, buf_size);
4167 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4168
4169 for(i=0; i<3; i++){
4170 int shift= !!i;
4171 for(y=0; y<(height>>shift); y++)
4172 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4173 &pict->data[i][y * pict->linesize[i]],
4174 width>>shift);
4175 }
4176 s->new_picture = *pict;
4177
4178 s->m.picture_number= avctx->frame_number;
4179 if(avctx->flags&CODEC_FLAG_PASS2){
4180 s->m.pict_type =
4181 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4182 s->keyframe= pict->pict_type==FF_I_TYPE;
4183 if(!(avctx->flags&CODEC_FLAG_QSCALE))
4184 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4185 }else{
4186 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4187 s->m.pict_type=
4188 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4189 }
4190
4191 if(s->pass1_rc && avctx->frame_number == 0)
4192 pict->quality= 2*FF_QP2LAMBDA;
4193 if(pict->quality){
4194 s->qlog= qscale2qlog(pict->quality);
4195 s->lambda = pict->quality * 3/2;
4196 }
4197 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4198 s->qlog= LOSSLESS_QLOG;
4199 s->lambda = 0;
4200 }//else keep previous frame's qlog until after motion est
4201
4202 frame_start(s);
4203
4204 s->m.current_picture_ptr= &s->m.current_picture;
4205 if(pict->pict_type == P_TYPE){
4206 int block_width = (width +15)>>4;
4207 int block_height= (height+15)>>4;
4208 int stride= s->current_picture.linesize[0];
4209
4210 assert(s->current_picture.data[0]);
4211 assert(s->last_picture[0].data[0]);
4212
4213 s->m.avctx= s->avctx;
4214 s->m.current_picture.data[0]= s->current_picture.data[0];
4215 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4216 s->m. new_picture.data[0]= s-> input_picture.data[0];
4217 s->m. last_picture_ptr= &s->m. last_picture;
4218 s->m.linesize=
4219 s->m. last_picture.linesize[0]=
4220 s->m. new_picture.linesize[0]=
4221 s->m.current_picture.linesize[0]= stride;
4222 s->m.uvlinesize= s->current_picture.linesize[1];
4223 s->m.width = width;
4224 s->m.height= height;
4225 s->m.mb_width = block_width;
4226 s->m.mb_height= block_height;
4227 s->m.mb_stride= s->m.mb_width+1;
4228 s->m.b8_stride= 2*s->m.mb_width+1;
4229 s->m.f_code=1;
4230 s->m.pict_type= pict->pict_type;
4231 s->m.me_method= s->avctx->me_method;
4232 s->m.me.scene_change_score=0;
4233 s->m.flags= s->avctx->flags;
4234 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4235 s->m.out_format= FMT_H263;
4236 s->m.unrestricted_mv= 1;
4237
4238 s->m.lambda = s->lambda;
4239 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4240 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4241
4242 s->m.dsp= s->dsp; //move
4243 ff_init_me(&s->m);
4244 s->dsp= s->m.dsp;
4245 }
4246
4247redo_frame:
4248
4249 s->m.pict_type = pict->pict_type;
4250 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4251
4252 encode_header(s);
4253 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4254 encode_blocks(s);
4255 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4256
4257 for(plane_index=0; plane_index<3; plane_index++){
4258 Plane *p= &s->plane[plane_index];
4259 int w= p->width;
4260 int h= p->height;
4261 int x, y;
4262// int bits= put_bits_count(&s->c.pb);
4263
4264 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4265 //FIXME optimize
4266 if(pict->data[plane_index]) //FIXME gray hack
4267 for(y=0; y<h; y++){
4268 for(x=0; x<w; x++){
4269 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4270 }
4271 }
4272 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4273
4274 if( plane_index==0
4275 && pict->pict_type == P_TYPE
4276 && !(avctx->flags&CODEC_FLAG_PASS2)
4277 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4278 ff_init_range_encoder(c, buf, buf_size);
4279 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4280 pict->pict_type= FF_I_TYPE;
4281 s->keyframe=1;
4282 s->current_picture.key_frame=1;
4283 reset_contexts(s);
4284 goto redo_frame;
4285 }
4286
4287 if(s->qlog == LOSSLESS_QLOG){
4288 for(y=0; y<h; y++){
4289 for(x=0; x<w; x++){
4290 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4291 }
4292 }
4293 }
4294
4295 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4296
4297 if(s->pass1_rc && plane_index==0)
4298 ratecontrol_1pass(s, pict);
4299
4300 for(level=0; level<s->spatial_decomposition_count; level++){
4301 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4302 SubBand *b= &p->band[level][orientation];
4303
4304 quantize(s, b, b->buf, b->stride, s->qbias);
4305 if(orientation==0)
4306 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4307 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4308 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4309 if(orientation==0)
4310 correlate(s, b, b->buf, b->stride, 1, 0);
4311 }
4312 }
4313// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4314
4315 for(level=0; level<s->spatial_decomposition_count; level++){
4316 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4317 SubBand *b= &p->band[level][orientation];
4318
4319 dequantize(s, b, b->buf, b->stride);
4320 }
4321 }
4322
4323 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4324 if(s->qlog == LOSSLESS_QLOG){
4325 for(y=0; y<h; y++){
4326 for(x=0; x<w; x++){
4327 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4328 }
4329 }
4330 }
4331{START_TIMER
4332 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4333STOP_TIMER("pred-conv")}
4334 }else{
4335 //ME/MC only
4336 if(pict->pict_type == I_TYPE){
4337 for(y=0; y<h; y++){
4338 for(x=0; x<w; x++){
4339 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4340 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4341 }
4342 }
4343 }else{
4344 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4345 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4346 }
4347 }
4348 if(s->avctx->flags&CODEC_FLAG_PSNR){
4349 int64_t error= 0;
4350
4351 if(pict->data[plane_index]) //FIXME gray hack
4352 for(y=0; y<h; y++){
4353 for(x=0; x<w; x++){
4354 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4355 error += d*d;
4356 }
4357 }
4358 s->avctx->error[plane_index] += error;
4359 s->current_picture.error[plane_index] = error;
4360 }
4361 }
4362
4363 if(s->last_picture[s->max_ref_frames-1].data[0])
4364 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4365
4366 s->current_picture.coded_picture_number = avctx->frame_number;
4367 s->current_picture.pict_type = pict->pict_type;
4368 s->current_picture.quality = pict->quality;
4369 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4370 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4371 s->m.current_picture.display_picture_number =
4372 s->m.current_picture.coded_picture_number = avctx->frame_number;
4373 s->m.current_picture.quality = pict->quality;
4374 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4375 if(s->pass1_rc)
4376 ff_rate_estimate_qscale(&s->m, 0);
4377 if(avctx->flags&CODEC_FLAG_PASS1)
4378 ff_write_pass1_stats(&s->m);
4379 s->m.last_pict_type = s->m.pict_type;
4380
4381 emms_c();
4382
4383 return ff_rac_terminate(c);
4384}
4385
4386static void common_end(SnowContext *s){
4387 int plane_index, level, orientation, i;
4388
4389 av_freep(&s->spatial_dwt_buffer);
4390
4391 av_freep(&s->m.me.scratchpad);
4392 av_freep(&s->m.me.map);
4393 av_freep(&s->m.me.score_map);
4394 av_freep(&s->m.obmc_scratchpad);
4395
4396 av_freep(&s->block);
4397
4398 for(i=0; i<MAX_REF_FRAMES; i++){
4399 av_freep(&s->ref_mvs[i]);
4400 av_freep(&s->ref_scores[i]);
4401 if(s->last_picture[i].data[0])
4402 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4403 }
4404
4405 for(plane_index=0; plane_index<3; plane_index++){
4406 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4407 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4408 SubBand *b= &s->plane[plane_index].band[level][orientation];
4409
4410 av_freep(&b->x_coeff);
4411 }
4412 }
4413 }
4414}
4415
4416static int encode_end(AVCodecContext *avctx)
4417{
4418 SnowContext *s = avctx->priv_data;
4419
4420 common_end(s);
4421 av_free(avctx->stats_out);
4422
4423 return 0;
4424}
4425
4426static int decode_init(AVCodecContext *avctx)
4427{
4428 SnowContext *s = avctx->priv_data;
4429 int block_size;
4430
4431 avctx->pix_fmt= PIX_FMT_YUV420P;
4432
4433 common_init(avctx);
4434
4435 block_size = MB_SIZE >> s->block_max_depth;
4436 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4437
4438 return 0;
4439}
4440
4441static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4442 SnowContext *s = avctx->priv_data;
4443 RangeCoder * const c= &s->c;
4444 int bytes_read;
4445 AVFrame *picture = data;
4446 int level, orientation, plane_index;
4447
4448 ff_init_range_decoder(c, buf, buf_size);
4449 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4450
4451 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4452 decode_header(s);
4453 if(!s->block) alloc_blocks(s);
4454
4455 frame_start(s);
4456 //keyframe flag dupliaction mess FIXME
4457 if(avctx->debug&FF_DEBUG_PICT_INFO)
4458 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4459
4460 decode_blocks(s);
4461
4462 for(plane_index=0; plane_index<3; plane_index++){
4463 Plane *p= &s->plane[plane_index];
4464 int w= p->width;
4465 int h= p->height;
4466 int x, y;
4467 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4468
4469if(s->avctx->debug&2048){
4470 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4471 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4472
4473 for(y=0; y<h; y++){
4474 for(x=0; x<w; x++){
4475 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4476 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4477 }
4478 }
4479}
4480
4481{ START_TIMER
4482 for(level=0; level<s->spatial_decomposition_count; level++){
4483 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4484 SubBand *b= &p->band[level][orientation];
4485 unpack_coeffs(s, b, b->parent, orientation);
4486 }
4487 }
4488 STOP_TIMER("unpack coeffs");
4489}
4490
4491{START_TIMER
4492 const int mb_h= s->b_height << s->block_max_depth;
4493 const int block_size = MB_SIZE >> s->block_max_depth;
4494 const int block_w = plane_index ? block_size/2 : block_size;
4495 int mb_y;
4496 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4497 int yd=0, yq=0;
4498 int y;
4499 int end_y;
4500
4501 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4502 for(mb_y=0; mb_y<=mb_h; mb_y++){
4503
4504 int slice_starty = block_w*mb_y;
4505 int slice_h = block_w*(mb_y+1);
4506 if (!(s->keyframe || s->avctx->debug&512)){
4507 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4508 slice_h -= (block_w >> 1);
4509 }
4510
4511 {
4512 START_TIMER
4513 for(level=0; level<s->spatial_decomposition_count; level++){
4514 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4515 SubBand *b= &p->band[level][orientation];
4516 int start_y;
4517 int end_y;
4518 int our_mb_start = mb_y;
4519 int our_mb_end = (mb_y + 1);
4520 const int extra= 3;
4521 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4522 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4523 if (!(s->keyframe || s->avctx->debug&512)){
4524 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4525 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4526 }
4527 start_y = FFMIN(b->height, start_y);
4528 end_y = FFMIN(b->height, end_y);
4529
4530 if (start_y != end_y){
4531 if (orientation == 0){
4532 SubBand * correlate_band = &p->band[0][0];
4533 int correlate_end_y = FFMIN(b->height, end_y + 1);
4534 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4535 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4536 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4537 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4538 }
4539 else
4540 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4541 }
4542 }
4543 }
4544 STOP_TIMER("decode_subband_slice");
4545 }
4546
4547{ START_TIMER
4548 for(; yd<slice_h; yd+=4){
4549 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4550 }
4551 STOP_TIMER("idwt slice");}
4552
4553
4554 if(s->qlog == LOSSLESS_QLOG){
4555 for(; yq<slice_h && yq<h; yq++){
4556 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4557 for(x=0; x<w; x++){
4558 line[x] <<= FRAC_BITS;
4559 }
4560 }
4561 }
4562
4563 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4564
4565 y = FFMIN(p->height, slice_starty);
4566 end_y = FFMIN(p->height, slice_h);
4567 while(y < end_y)
4568 slice_buffer_release(&s->sb, y++);
4569 }
4570
4571 slice_buffer_flush(&s->sb);
4572
4573STOP_TIMER("idwt + predict_slices")}
4574 }
4575
4576 emms_c();
4577
4578 if(s->last_picture[s->max_ref_frames-1].data[0])
4579 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4580
4581if(!(s->avctx->debug&2048))
4582 *picture= s->current_picture;
4583else
4584 *picture= s->mconly_picture;
4585
4586 *data_size = sizeof(AVFrame);
4587
4588 bytes_read= c->bytestream - c->bytestream_start;
4589 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4590
4591 return bytes_read;
4592}
4593
4594static int decode_end(AVCodecContext *avctx)
4595{
4596 SnowContext *s = avctx->priv_data;
4597
4598 slice_buffer_destroy(&s->sb);
4599
4600 common_end(s);
4601
4602 return 0;
4603}
4604
4605AVCodec snow_decoder = {
4606 "snow",
4607 CODEC_TYPE_VIDEO,
4608 CODEC_ID_SNOW,
4609 sizeof(SnowContext),
4610 decode_init,
4611 NULL,
4612 decode_end,
4613 decode_frame,
4614 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4615 NULL
4616};
4617
4618#ifdef CONFIG_ENCODERS
4619AVCodec snow_encoder = {
4620 "snow",
4621 CODEC_TYPE_VIDEO,
4622 CODEC_ID_SNOW,
4623 sizeof(SnowContext),
4624 encode_init,
4625 encode_frame,
4626 encode_end,
4627};
4628#endif
4629
4630
4631#if 0
4632#undef malloc
4633#undef free
4634#undef printf
4635
4636int main(){
4637 int width=256;
4638 int height=256;
4639 int buffer[2][width*height];
4640 SnowContext s;
4641 int i;
4642 s.spatial_decomposition_count=6;
4643 s.spatial_decomposition_type=1;
4644
4645 printf("testing 5/3 DWT\n");
4646 for(i=0; i<width*height; i++)
4647 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4648
4649 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4650 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4651
4652 for(i=0; i<width*height; i++)
4653 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4654
4655 printf("testing 9/7 DWT\n");
4656 s.spatial_decomposition_type=0;
4657 for(i=0; i<width*height; i++)
4658 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4659
4660 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4661 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4662
4663 for(i=0; i<width*height; i++)
4664 if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4665
4666#if 0
4667 printf("testing AC coder\n");
4668 memset(s.header_state, 0, sizeof(s.header_state));
4669 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4670 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4671
4672 for(i=-256; i<256; i++){
4673START_TIMER
4674 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4675STOP_TIMER("put_symbol")
4676 }
4677 ff_rac_terminate(&s.c);
4678
4679 memset(s.header_state, 0, sizeof(s.header_state));
4680 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4681 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4682
4683 for(i=-256; i<256; i++){
4684 int j;
4685START_TIMER
4686 j= get_symbol(&s.c, s.header_state, 1);
4687STOP_TIMER("get_symbol")
4688 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4689 }
4690#endif
4691{
4692int level, orientation, x, y;
4693int64_t errors[8][4];
4694int64_t g=0;
4695
4696 memset(errors, 0, sizeof(errors));
4697 s.spatial_decomposition_count=3;
4698 s.spatial_decomposition_type=0;
4699 for(level=0; level<s.spatial_decomposition_count; level++){
4700 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4701 int w= width >> (s.spatial_decomposition_count-level);
4702 int h= height >> (s.spatial_decomposition_count-level);
4703 int stride= width << (s.spatial_decomposition_count-level);
4704 DWTELEM *buf= buffer[0];
4705 int64_t error=0;
4706
4707 if(orientation&1) buf+=w;
4708 if(orientation>1) buf+=stride>>1;
4709
4710 memset(buffer[0], 0, sizeof(int)*width*height);
4711 buf[w/2 + h/2*stride]= 256*256;
4712 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4713 for(y=0; y<height; y++){
4714 for(x=0; x<width; x++){
4715 int64_t d= buffer[0][x + y*width];
4716 error += d*d;
4717 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4718 }
4719 if(ABS(height/2-y)<9 && level==2) printf("\n");
4720 }
4721 error= (int)(sqrt(error)+0.5);
4722 errors[level][orientation]= error;
4723 if(g) g=ff_gcd(g, error);
4724 else g= error;
4725 }
4726 }
4727 printf("static int const visual_weight[][4]={\n");
4728 for(level=0; level<s.spatial_decomposition_count; level++){
4729 printf(" {");
4730 for(orientation=0; orientation<4; orientation++){
4731 printf("%8lld,", errors[level][orientation]/g);
4732 }
4733 printf("},\n");
4734 }
4735 printf("};\n");
4736 {
4737 int level=2;
4738 int orientation=3;
4739 int w= width >> (s.spatial_decomposition_count-level);
4740 int h= height >> (s.spatial_decomposition_count-level);
4741 int stride= width << (s.spatial_decomposition_count-level);
4742 DWTELEM *buf= buffer[0];
4743 int64_t error=0;
4744
4745 buf+=w;
4746 buf+=stride>>1;
4747
4748 memset(buffer[0], 0, sizeof(int)*width*height);
4749#if 1
4750 for(y=0; y<height; y++){
4751 for(x=0; x<width; x++){
4752 int tab[4]={0,2,3,1};
4753 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4754 }
4755 }
4756 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4757#else
4758 for(y=0; y<h; y++){
4759 for(x=0; x<w; x++){
4760 buf[x + y*stride ]=169;
4761 buf[x + y*stride-w]=64;
4762 }
4763 }
4764 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4765#endif
4766 for(y=0; y<height; y++){
4767 for(x=0; x<width; x++){
4768 int64_t d= buffer[0][x + y*width];
4769 error += d*d;
4770 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4771 }
4772 if(ABS(height/2-y)<9) printf("\n");
4773 }
4774 }
4775
4776}
4777 return 0;
4778}
4779#endif
4780
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette