1 | /*
|
---|
2 | * Simple free lossless/lossy audio codec
|
---|
3 | * Copyright (c) 2004 Alex Beregszaszi
|
---|
4 | *
|
---|
5 | * This library is free software; you can redistribute it and/or
|
---|
6 | * modify it under the terms of the GNU Lesser General Public
|
---|
7 | * License as published by the Free Software Foundation; either
|
---|
8 | * version 2 of the License, or (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This library is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
13 | * Lesser General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU Lesser General Public
|
---|
16 | * License along with this library; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
---|
18 | */
|
---|
19 | #include "avcodec.h"
|
---|
20 | #include "bitstream.h"
|
---|
21 | #include "golomb.h"
|
---|
22 |
|
---|
23 | /**
|
---|
24 | * @file sonic.c
|
---|
25 | * Simple free lossless/lossy audio codec
|
---|
26 | * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
|
---|
27 | * Written and designed by Alex Beregszaszi
|
---|
28 | *
|
---|
29 | * TODO:
|
---|
30 | * - CABAC put/get_symbol
|
---|
31 | * - independent quantizer for channels
|
---|
32 | * - >2 channels support
|
---|
33 | * - more decorrelation types
|
---|
34 | * - more tap_quant tests
|
---|
35 | * - selectable intlist writers/readers (bonk-style, golomb, cabac)
|
---|
36 | */
|
---|
37 |
|
---|
38 | #define MAX_CHANNELS 2
|
---|
39 |
|
---|
40 | #define MID_SIDE 0
|
---|
41 | #define LEFT_SIDE 1
|
---|
42 | #define RIGHT_SIDE 2
|
---|
43 |
|
---|
44 | typedef struct SonicContext {
|
---|
45 | int lossless, decorrelation;
|
---|
46 |
|
---|
47 | int num_taps, downsampling;
|
---|
48 | double quantization;
|
---|
49 |
|
---|
50 | int channels, samplerate, block_align, frame_size;
|
---|
51 |
|
---|
52 | int *tap_quant;
|
---|
53 | int *int_samples;
|
---|
54 | int *coded_samples[MAX_CHANNELS];
|
---|
55 |
|
---|
56 | // for encoding
|
---|
57 | int *tail;
|
---|
58 | int tail_size;
|
---|
59 | int *window;
|
---|
60 | int window_size;
|
---|
61 |
|
---|
62 | // for decoding
|
---|
63 | int *predictor_k;
|
---|
64 | int *predictor_state[MAX_CHANNELS];
|
---|
65 | } SonicContext;
|
---|
66 |
|
---|
67 | #define LATTICE_SHIFT 10
|
---|
68 | #define SAMPLE_SHIFT 4
|
---|
69 | #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
|
---|
70 | #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
|
---|
71 |
|
---|
72 | #define BASE_QUANT 0.6
|
---|
73 | #define RATE_VARIATION 3.0
|
---|
74 |
|
---|
75 | static inline int divide(int a, int b)
|
---|
76 | {
|
---|
77 | if (a < 0)
|
---|
78 | return -( (-a + b/2)/b );
|
---|
79 | else
|
---|
80 | return (a + b/2)/b;
|
---|
81 | }
|
---|
82 |
|
---|
83 | static inline int shift(int a,int b)
|
---|
84 | {
|
---|
85 | return (a+(1<<(b-1))) >> b;
|
---|
86 | }
|
---|
87 |
|
---|
88 | static inline int shift_down(int a,int b)
|
---|
89 | {
|
---|
90 | return (a>>b)+((a<0)?1:0);
|
---|
91 | }
|
---|
92 |
|
---|
93 | #if 1
|
---|
94 | static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
|
---|
95 | {
|
---|
96 | int i;
|
---|
97 |
|
---|
98 | for (i = 0; i < entries; i++)
|
---|
99 | set_se_golomb(pb, buf[i]);
|
---|
100 |
|
---|
101 | return 1;
|
---|
102 | }
|
---|
103 |
|
---|
104 | static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
|
---|
105 | {
|
---|
106 | int i;
|
---|
107 |
|
---|
108 | for (i = 0; i < entries; i++)
|
---|
109 | buf[i] = get_se_golomb(gb);
|
---|
110 |
|
---|
111 | return 1;
|
---|
112 | }
|
---|
113 |
|
---|
114 | #else
|
---|
115 |
|
---|
116 | #define ADAPT_LEVEL 8
|
---|
117 |
|
---|
118 | static int bits_to_store(uint64_t x)
|
---|
119 | {
|
---|
120 | int res = 0;
|
---|
121 |
|
---|
122 | while(x)
|
---|
123 | {
|
---|
124 | res++;
|
---|
125 | x >>= 1;
|
---|
126 | }
|
---|
127 | return res;
|
---|
128 | }
|
---|
129 |
|
---|
130 | static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
|
---|
131 | {
|
---|
132 | int i, bits;
|
---|
133 |
|
---|
134 | if (!max)
|
---|
135 | return;
|
---|
136 |
|
---|
137 | bits = bits_to_store(max);
|
---|
138 |
|
---|
139 | for (i = 0; i < bits-1; i++)
|
---|
140 | put_bits(pb, 1, value & (1 << i));
|
---|
141 |
|
---|
142 | if ( (value | (1 << (bits-1))) <= max)
|
---|
143 | put_bits(pb, 1, value & (1 << (bits-1)));
|
---|
144 | }
|
---|
145 |
|
---|
146 | static unsigned int read_uint_max(GetBitContext *gb, int max)
|
---|
147 | {
|
---|
148 | int i, bits, value = 0;
|
---|
149 |
|
---|
150 | if (!max)
|
---|
151 | return 0;
|
---|
152 |
|
---|
153 | bits = bits_to_store(max);
|
---|
154 |
|
---|
155 | for (i = 0; i < bits-1; i++)
|
---|
156 | if (get_bits1(gb))
|
---|
157 | value += 1 << i;
|
---|
158 |
|
---|
159 | if ( (value | (1<<(bits-1))) <= max)
|
---|
160 | if (get_bits1(gb))
|
---|
161 | value += 1 << (bits-1);
|
---|
162 |
|
---|
163 | return value;
|
---|
164 | }
|
---|
165 |
|
---|
166 | static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
|
---|
167 | {
|
---|
168 | int i, j, x = 0, low_bits = 0, max = 0;
|
---|
169 | int step = 256, pos = 0, dominant = 0, any = 0;
|
---|
170 | int *copy, *bits;
|
---|
171 |
|
---|
172 | copy = av_mallocz(4* entries);
|
---|
173 | if (!copy)
|
---|
174 | return -1;
|
---|
175 |
|
---|
176 | if (base_2_part)
|
---|
177 | {
|
---|
178 | int energy = 0;
|
---|
179 |
|
---|
180 | for (i = 0; i < entries; i++)
|
---|
181 | energy += abs(buf[i]);
|
---|
182 |
|
---|
183 | low_bits = bits_to_store(energy / (entries * 2));
|
---|
184 | if (low_bits > 15)
|
---|
185 | low_bits = 15;
|
---|
186 |
|
---|
187 | put_bits(pb, 4, low_bits);
|
---|
188 | }
|
---|
189 |
|
---|
190 | for (i = 0; i < entries; i++)
|
---|
191 | {
|
---|
192 | put_bits(pb, low_bits, abs(buf[i]));
|
---|
193 | copy[i] = abs(buf[i]) >> low_bits;
|
---|
194 | if (copy[i] > max)
|
---|
195 | max = abs(copy[i]);
|
---|
196 | }
|
---|
197 |
|
---|
198 | bits = av_mallocz(4* entries*max);
|
---|
199 | if (!bits)
|
---|
200 | {
|
---|
201 | // av_free(copy);
|
---|
202 | return -1;
|
---|
203 | }
|
---|
204 |
|
---|
205 | for (i = 0; i <= max; i++)
|
---|
206 | {
|
---|
207 | for (j = 0; j < entries; j++)
|
---|
208 | if (copy[j] >= i)
|
---|
209 | bits[x++] = copy[j] > i;
|
---|
210 | }
|
---|
211 |
|
---|
212 | // store bitstream
|
---|
213 | while (pos < x)
|
---|
214 | {
|
---|
215 | int steplet = step >> 8;
|
---|
216 |
|
---|
217 | if (pos + steplet > x)
|
---|
218 | steplet = x - pos;
|
---|
219 |
|
---|
220 | for (i = 0; i < steplet; i++)
|
---|
221 | if (bits[i+pos] != dominant)
|
---|
222 | any = 1;
|
---|
223 |
|
---|
224 | put_bits(pb, 1, any);
|
---|
225 |
|
---|
226 | if (!any)
|
---|
227 | {
|
---|
228 | pos += steplet;
|
---|
229 | step += step / ADAPT_LEVEL;
|
---|
230 | }
|
---|
231 | else
|
---|
232 | {
|
---|
233 | int interloper = 0;
|
---|
234 |
|
---|
235 | while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
|
---|
236 | interloper++;
|
---|
237 |
|
---|
238 | // note change
|
---|
239 | write_uint_max(pb, interloper, (step >> 8) - 1);
|
---|
240 |
|
---|
241 | pos += interloper + 1;
|
---|
242 | step -= step / ADAPT_LEVEL;
|
---|
243 | }
|
---|
244 |
|
---|
245 | if (step < 256)
|
---|
246 | {
|
---|
247 | step = 65536 / step;
|
---|
248 | dominant = !dominant;
|
---|
249 | }
|
---|
250 | }
|
---|
251 |
|
---|
252 | // store signs
|
---|
253 | for (i = 0; i < entries; i++)
|
---|
254 | if (buf[i])
|
---|
255 | put_bits(pb, 1, buf[i] < 0);
|
---|
256 |
|
---|
257 | // av_free(bits);
|
---|
258 | // av_free(copy);
|
---|
259 |
|
---|
260 | return 0;
|
---|
261 | }
|
---|
262 |
|
---|
263 | static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
|
---|
264 | {
|
---|
265 | int i, low_bits = 0, x = 0;
|
---|
266 | int n_zeros = 0, step = 256, dominant = 0;
|
---|
267 | int pos = 0, level = 0;
|
---|
268 | int *bits = av_mallocz(4* entries);
|
---|
269 |
|
---|
270 | if (!bits)
|
---|
271 | return -1;
|
---|
272 |
|
---|
273 | if (base_2_part)
|
---|
274 | {
|
---|
275 | low_bits = get_bits(gb, 4);
|
---|
276 |
|
---|
277 | if (low_bits)
|
---|
278 | for (i = 0; i < entries; i++)
|
---|
279 | buf[i] = get_bits(gb, low_bits);
|
---|
280 | }
|
---|
281 |
|
---|
282 | // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
|
---|
283 |
|
---|
284 | while (n_zeros < entries)
|
---|
285 | {
|
---|
286 | int steplet = step >> 8;
|
---|
287 |
|
---|
288 | if (!get_bits1(gb))
|
---|
289 | {
|
---|
290 | for (i = 0; i < steplet; i++)
|
---|
291 | bits[x++] = dominant;
|
---|
292 |
|
---|
293 | if (!dominant)
|
---|
294 | n_zeros += steplet;
|
---|
295 |
|
---|
296 | step += step / ADAPT_LEVEL;
|
---|
297 | }
|
---|
298 | else
|
---|
299 | {
|
---|
300 | int actual_run = read_uint_max(gb, steplet-1);
|
---|
301 |
|
---|
302 | // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
|
---|
303 |
|
---|
304 | for (i = 0; i < actual_run; i++)
|
---|
305 | bits[x++] = dominant;
|
---|
306 |
|
---|
307 | bits[x++] = !dominant;
|
---|
308 |
|
---|
309 | if (!dominant)
|
---|
310 | n_zeros += actual_run;
|
---|
311 | else
|
---|
312 | n_zeros++;
|
---|
313 |
|
---|
314 | step -= step / ADAPT_LEVEL;
|
---|
315 | }
|
---|
316 |
|
---|
317 | if (step < 256)
|
---|
318 | {
|
---|
319 | step = 65536 / step;
|
---|
320 | dominant = !dominant;
|
---|
321 | }
|
---|
322 | }
|
---|
323 |
|
---|
324 | // reconstruct unsigned values
|
---|
325 | n_zeros = 0;
|
---|
326 | for (i = 0; n_zeros < entries; i++)
|
---|
327 | {
|
---|
328 | while(1)
|
---|
329 | {
|
---|
330 | if (pos >= entries)
|
---|
331 | {
|
---|
332 | pos = 0;
|
---|
333 | level += 1 << low_bits;
|
---|
334 | }
|
---|
335 |
|
---|
336 | if (buf[pos] >= level)
|
---|
337 | break;
|
---|
338 |
|
---|
339 | pos++;
|
---|
340 | }
|
---|
341 |
|
---|
342 | if (bits[i])
|
---|
343 | buf[pos] += 1 << low_bits;
|
---|
344 | else
|
---|
345 | n_zeros++;
|
---|
346 |
|
---|
347 | pos++;
|
---|
348 | }
|
---|
349 | // av_free(bits);
|
---|
350 |
|
---|
351 | // read signs
|
---|
352 | for (i = 0; i < entries; i++)
|
---|
353 | if (buf[i] && get_bits1(gb))
|
---|
354 | buf[i] = -buf[i];
|
---|
355 |
|
---|
356 | // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
|
---|
357 |
|
---|
358 | return 0;
|
---|
359 | }
|
---|
360 | #endif
|
---|
361 |
|
---|
362 | static void predictor_init_state(int *k, int *state, int order)
|
---|
363 | {
|
---|
364 | int i;
|
---|
365 |
|
---|
366 | for (i = order-2; i >= 0; i--)
|
---|
367 | {
|
---|
368 | int j, p, x = state[i];
|
---|
369 |
|
---|
370 | for (j = 0, p = i+1; p < order; j++,p++)
|
---|
371 | {
|
---|
372 | int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
|
---|
373 | state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
|
---|
374 | x = tmp;
|
---|
375 | }
|
---|
376 | }
|
---|
377 | }
|
---|
378 |
|
---|
379 | static int predictor_calc_error(int *k, int *state, int order, int error)
|
---|
380 | {
|
---|
381 | int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
|
---|
382 |
|
---|
383 | #if 1
|
---|
384 | int *k_ptr = &(k[order-2]),
|
---|
385 | *state_ptr = &(state[order-2]);
|
---|
386 | for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
|
---|
387 | {
|
---|
388 | int k_value = *k_ptr, state_value = *state_ptr;
|
---|
389 | x -= shift_down(k_value * state_value, LATTICE_SHIFT);
|
---|
390 | state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
|
---|
391 | }
|
---|
392 | #else
|
---|
393 | for (i = order-2; i >= 0; i--)
|
---|
394 | {
|
---|
395 | x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
|
---|
396 | state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
|
---|
397 | }
|
---|
398 | #endif
|
---|
399 |
|
---|
400 | // don't drift too far, to avoid overflows
|
---|
401 | if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
|
---|
402 | if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
|
---|
403 |
|
---|
404 | state[0] = x;
|
---|
405 |
|
---|
406 | return x;
|
---|
407 | }
|
---|
408 |
|
---|
409 | // Heavily modified Levinson-Durbin algorithm which
|
---|
410 | // copes better with quantization, and calculates the
|
---|
411 | // actual whitened result as it goes.
|
---|
412 |
|
---|
413 | static void modified_levinson_durbin(int *window, int window_entries,
|
---|
414 | int *out, int out_entries, int channels, int *tap_quant)
|
---|
415 | {
|
---|
416 | int i;
|
---|
417 | int *state = av_mallocz(4* window_entries);
|
---|
418 |
|
---|
419 | memcpy(state, window, 4* window_entries);
|
---|
420 |
|
---|
421 | for (i = 0; i < out_entries; i++)
|
---|
422 | {
|
---|
423 | int step = (i+1)*channels, k, j;
|
---|
424 | double xx = 0.0, xy = 0.0;
|
---|
425 | #if 1
|
---|
426 | int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
|
---|
427 | j = window_entries - step;
|
---|
428 | for (;j>=0;j--,x_ptr++,state_ptr++)
|
---|
429 | {
|
---|
430 | double x_value = *x_ptr, state_value = *state_ptr;
|
---|
431 | xx += state_value*state_value;
|
---|
432 | xy += x_value*state_value;
|
---|
433 | }
|
---|
434 | #else
|
---|
435 | for (j = 0; j <= (window_entries - step); j++);
|
---|
436 | {
|
---|
437 | double stepval = window[step+j], stateval = window[j];
|
---|
438 | // xx += (double)window[j]*(double)window[j];
|
---|
439 | // xy += (double)window[step+j]*(double)window[j];
|
---|
440 | xx += stateval*stateval;
|
---|
441 | xy += stepval*stateval;
|
---|
442 | }
|
---|
443 | #endif
|
---|
444 | if (xx == 0.0)
|
---|
445 | k = 0;
|
---|
446 | else
|
---|
447 | k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
|
---|
448 |
|
---|
449 | if (k > (LATTICE_FACTOR/tap_quant[i]))
|
---|
450 | k = LATTICE_FACTOR/tap_quant[i];
|
---|
451 | if (-k > (LATTICE_FACTOR/tap_quant[i]))
|
---|
452 | k = -(LATTICE_FACTOR/tap_quant[i]);
|
---|
453 |
|
---|
454 | out[i] = k;
|
---|
455 | k *= tap_quant[i];
|
---|
456 |
|
---|
457 | #if 1
|
---|
458 | x_ptr = &(window[step]);
|
---|
459 | state_ptr = &(state[0]);
|
---|
460 | j = window_entries - step;
|
---|
461 | for (;j>=0;j--,x_ptr++,state_ptr++)
|
---|
462 | {
|
---|
463 | int x_value = *x_ptr, state_value = *state_ptr;
|
---|
464 | *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
|
---|
465 | *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
|
---|
466 | }
|
---|
467 | #else
|
---|
468 | for (j=0; j <= (window_entries - step); j++)
|
---|
469 | {
|
---|
470 | int stepval = window[step+j], stateval=state[j];
|
---|
471 | window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
|
---|
472 | state[j] += shift_down(k * stepval, LATTICE_SHIFT);
|
---|
473 | }
|
---|
474 | #endif
|
---|
475 | }
|
---|
476 |
|
---|
477 | av_free(state);
|
---|
478 | }
|
---|
479 |
|
---|
480 | static int samplerate_table[] =
|
---|
481 | { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
|
---|
482 |
|
---|
483 | #ifdef CONFIG_ENCODERS
|
---|
484 |
|
---|
485 | static inline int code_samplerate(int samplerate)
|
---|
486 | {
|
---|
487 | switch (samplerate)
|
---|
488 | {
|
---|
489 | case 44100: return 0;
|
---|
490 | case 22050: return 1;
|
---|
491 | case 11025: return 2;
|
---|
492 | case 96000: return 3;
|
---|
493 | case 48000: return 4;
|
---|
494 | case 32000: return 5;
|
---|
495 | case 24000: return 6;
|
---|
496 | case 16000: return 7;
|
---|
497 | case 8000: return 8;
|
---|
498 | }
|
---|
499 | return -1;
|
---|
500 | }
|
---|
501 |
|
---|
502 | static int sonic_encode_init(AVCodecContext *avctx)
|
---|
503 | {
|
---|
504 | SonicContext *s = avctx->priv_data;
|
---|
505 | PutBitContext pb;
|
---|
506 | int i, version = 0;
|
---|
507 |
|
---|
508 | if (avctx->channels > MAX_CHANNELS)
|
---|
509 | {
|
---|
510 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
|
---|
511 | return -1; /* only stereo or mono for now */
|
---|
512 | }
|
---|
513 |
|
---|
514 | if (avctx->channels == 2)
|
---|
515 | s->decorrelation = MID_SIDE;
|
---|
516 |
|
---|
517 | if (avctx->codec->id == CODEC_ID_SONIC_LS)
|
---|
518 | {
|
---|
519 | s->lossless = 1;
|
---|
520 | s->num_taps = 32;
|
---|
521 | s->downsampling = 1;
|
---|
522 | s->quantization = 0.0;
|
---|
523 | }
|
---|
524 | else
|
---|
525 | {
|
---|
526 | s->num_taps = 128;
|
---|
527 | s->downsampling = 2;
|
---|
528 | s->quantization = 1.0;
|
---|
529 | }
|
---|
530 |
|
---|
531 | // max tap 2048
|
---|
532 | if ((s->num_taps < 32) || (s->num_taps > 1024) ||
|
---|
533 | ((s->num_taps>>5)<<5 != s->num_taps))
|
---|
534 | {
|
---|
535 | av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
|
---|
536 | return -1;
|
---|
537 | }
|
---|
538 |
|
---|
539 | // generate taps
|
---|
540 | s->tap_quant = av_mallocz(4* s->num_taps);
|
---|
541 | for (i = 0; i < s->num_taps; i++)
|
---|
542 | s->tap_quant[i] = (int)(sqrt(i+1));
|
---|
543 |
|
---|
544 | s->channels = avctx->channels;
|
---|
545 | s->samplerate = avctx->sample_rate;
|
---|
546 |
|
---|
547 | s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
|
---|
548 | s->frame_size = s->channels*s->block_align*s->downsampling;
|
---|
549 |
|
---|
550 | s->tail = av_mallocz(4* s->num_taps*s->channels);
|
---|
551 | if (!s->tail)
|
---|
552 | return -1;
|
---|
553 | s->tail_size = s->num_taps*s->channels;
|
---|
554 |
|
---|
555 | s->predictor_k = av_mallocz(4 * s->num_taps);
|
---|
556 | if (!s->predictor_k)
|
---|
557 | return -1;
|
---|
558 |
|
---|
559 | for (i = 0; i < s->channels; i++)
|
---|
560 | {
|
---|
561 | s->coded_samples[i] = av_mallocz(4* s->block_align);
|
---|
562 | if (!s->coded_samples[i])
|
---|
563 | return -1;
|
---|
564 | }
|
---|
565 |
|
---|
566 | s->int_samples = av_mallocz(4* s->frame_size);
|
---|
567 |
|
---|
568 | s->window_size = ((2*s->tail_size)+s->frame_size);
|
---|
569 | s->window = av_mallocz(4* s->window_size);
|
---|
570 | if (!s->window)
|
---|
571 | return -1;
|
---|
572 |
|
---|
573 | avctx->extradata = av_mallocz(16);
|
---|
574 | if (!avctx->extradata)
|
---|
575 | return -1;
|
---|
576 | init_put_bits(&pb, avctx->extradata, 16*8);
|
---|
577 |
|
---|
578 | put_bits(&pb, 2, version); // version
|
---|
579 | if (version == 1)
|
---|
580 | {
|
---|
581 | put_bits(&pb, 2, s->channels);
|
---|
582 | put_bits(&pb, 4, code_samplerate(s->samplerate));
|
---|
583 | }
|
---|
584 | put_bits(&pb, 1, s->lossless);
|
---|
585 | if (!s->lossless)
|
---|
586 | put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
|
---|
587 | put_bits(&pb, 2, s->decorrelation);
|
---|
588 | put_bits(&pb, 2, s->downsampling);
|
---|
589 | put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
|
---|
590 | put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
|
---|
591 |
|
---|
592 | flush_put_bits(&pb);
|
---|
593 | avctx->extradata_size = put_bits_count(&pb)/8;
|
---|
594 |
|
---|
595 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
|
---|
596 | version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
|
---|
597 |
|
---|
598 | avctx->coded_frame = avcodec_alloc_frame();
|
---|
599 | if (!avctx->coded_frame)
|
---|
600 | return -ENOMEM;
|
---|
601 | avctx->coded_frame->key_frame = 1;
|
---|
602 | avctx->frame_size = s->block_align*s->downsampling;
|
---|
603 |
|
---|
604 | return 0;
|
---|
605 | }
|
---|
606 |
|
---|
607 | static int sonic_encode_close(AVCodecContext *avctx)
|
---|
608 | {
|
---|
609 | SonicContext *s = avctx->priv_data;
|
---|
610 | int i;
|
---|
611 |
|
---|
612 | av_freep(&avctx->coded_frame);
|
---|
613 |
|
---|
614 | for (i = 0; i < s->channels; i++)
|
---|
615 | av_free(s->coded_samples[i]);
|
---|
616 |
|
---|
617 | av_free(s->predictor_k);
|
---|
618 | av_free(s->tail);
|
---|
619 | av_free(s->tap_quant);
|
---|
620 | av_free(s->window);
|
---|
621 | av_free(s->int_samples);
|
---|
622 |
|
---|
623 | return 0;
|
---|
624 | }
|
---|
625 |
|
---|
626 | static int sonic_encode_frame(AVCodecContext *avctx,
|
---|
627 | uint8_t *buf, int buf_size, void *data)
|
---|
628 | {
|
---|
629 | SonicContext *s = avctx->priv_data;
|
---|
630 | PutBitContext pb;
|
---|
631 | int i, j, ch, quant = 0, x = 0;
|
---|
632 | short *samples = data;
|
---|
633 |
|
---|
634 | init_put_bits(&pb, buf, buf_size*8);
|
---|
635 |
|
---|
636 | // short -> internal
|
---|
637 | for (i = 0; i < s->frame_size; i++)
|
---|
638 | s->int_samples[i] = samples[i];
|
---|
639 |
|
---|
640 | if (!s->lossless)
|
---|
641 | for (i = 0; i < s->frame_size; i++)
|
---|
642 | s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
|
---|
643 |
|
---|
644 | switch(s->decorrelation)
|
---|
645 | {
|
---|
646 | case MID_SIDE:
|
---|
647 | for (i = 0; i < s->frame_size; i += s->channels)
|
---|
648 | {
|
---|
649 | s->int_samples[i] += s->int_samples[i+1];
|
---|
650 | s->int_samples[i+1] -= shift(s->int_samples[i], 1);
|
---|
651 | }
|
---|
652 | break;
|
---|
653 | case LEFT_SIDE:
|
---|
654 | for (i = 0; i < s->frame_size; i += s->channels)
|
---|
655 | s->int_samples[i+1] -= s->int_samples[i];
|
---|
656 | break;
|
---|
657 | case RIGHT_SIDE:
|
---|
658 | for (i = 0; i < s->frame_size; i += s->channels)
|
---|
659 | s->int_samples[i] -= s->int_samples[i+1];
|
---|
660 | break;
|
---|
661 | }
|
---|
662 |
|
---|
663 | memset(s->window, 0, 4* s->window_size);
|
---|
664 |
|
---|
665 | for (i = 0; i < s->tail_size; i++)
|
---|
666 | s->window[x++] = s->tail[i];
|
---|
667 |
|
---|
668 | for (i = 0; i < s->frame_size; i++)
|
---|
669 | s->window[x++] = s->int_samples[i];
|
---|
670 |
|
---|
671 | for (i = 0; i < s->tail_size; i++)
|
---|
672 | s->window[x++] = 0;
|
---|
673 |
|
---|
674 | for (i = 0; i < s->tail_size; i++)
|
---|
675 | s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
|
---|
676 |
|
---|
677 | // generate taps
|
---|
678 | modified_levinson_durbin(s->window, s->window_size,
|
---|
679 | s->predictor_k, s->num_taps, s->channels, s->tap_quant);
|
---|
680 | if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
|
---|
681 | return -1;
|
---|
682 |
|
---|
683 | for (ch = 0; ch < s->channels; ch++)
|
---|
684 | {
|
---|
685 | x = s->tail_size+ch;
|
---|
686 | for (i = 0; i < s->block_align; i++)
|
---|
687 | {
|
---|
688 | int sum = 0;
|
---|
689 | for (j = 0; j < s->downsampling; j++, x += s->channels)
|
---|
690 | sum += s->window[x];
|
---|
691 | s->coded_samples[ch][i] = sum;
|
---|
692 | }
|
---|
693 | }
|
---|
694 |
|
---|
695 | // simple rate control code
|
---|
696 | if (!s->lossless)
|
---|
697 | {
|
---|
698 | double energy1 = 0.0, energy2 = 0.0;
|
---|
699 | for (ch = 0; ch < s->channels; ch++)
|
---|
700 | {
|
---|
701 | for (i = 0; i < s->block_align; i++)
|
---|
702 | {
|
---|
703 | double sample = s->coded_samples[ch][i];
|
---|
704 | energy2 += sample*sample;
|
---|
705 | energy1 += fabs(sample);
|
---|
706 | }
|
---|
707 | }
|
---|
708 |
|
---|
709 | energy2 = sqrt(energy2/(s->channels*s->block_align));
|
---|
710 | energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
|
---|
711 |
|
---|
712 | // increase bitrate when samples are like a gaussian distribution
|
---|
713 | // reduce bitrate when samples are like a two-tailed exponential distribution
|
---|
714 |
|
---|
715 | if (energy2 > energy1)
|
---|
716 | energy2 += (energy2-energy1)*RATE_VARIATION;
|
---|
717 |
|
---|
718 | quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
|
---|
719 | // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
|
---|
720 |
|
---|
721 | if (quant < 1)
|
---|
722 | quant = 1;
|
---|
723 | if (quant > 65535)
|
---|
724 | quant = 65535;
|
---|
725 |
|
---|
726 | set_ue_golomb(&pb, quant);
|
---|
727 |
|
---|
728 | quant *= SAMPLE_FACTOR;
|
---|
729 | }
|
---|
730 |
|
---|
731 | // write out coded samples
|
---|
732 | for (ch = 0; ch < s->channels; ch++)
|
---|
733 | {
|
---|
734 | if (!s->lossless)
|
---|
735 | for (i = 0; i < s->block_align; i++)
|
---|
736 | s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
|
---|
737 |
|
---|
738 | if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
|
---|
739 | return -1;
|
---|
740 | }
|
---|
741 |
|
---|
742 | // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
|
---|
743 |
|
---|
744 | flush_put_bits(&pb);
|
---|
745 | return (put_bits_count(&pb)+7)/8;
|
---|
746 | }
|
---|
747 | #endif //CONFIG_ENCODERS
|
---|
748 |
|
---|
749 | static int sonic_decode_init(AVCodecContext *avctx)
|
---|
750 | {
|
---|
751 | SonicContext *s = avctx->priv_data;
|
---|
752 | GetBitContext gb;
|
---|
753 | int i, version;
|
---|
754 |
|
---|
755 | s->channels = avctx->channels;
|
---|
756 | s->samplerate = avctx->sample_rate;
|
---|
757 |
|
---|
758 | if (!avctx->extradata)
|
---|
759 | {
|
---|
760 | av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
|
---|
761 | return -1;
|
---|
762 | }
|
---|
763 |
|
---|
764 | init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
|
---|
765 |
|
---|
766 | version = get_bits(&gb, 2);
|
---|
767 | if (version > 1)
|
---|
768 | {
|
---|
769 | av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
|
---|
770 | return -1;
|
---|
771 | }
|
---|
772 |
|
---|
773 | if (version == 1)
|
---|
774 | {
|
---|
775 | s->channels = get_bits(&gb, 2);
|
---|
776 | s->samplerate = samplerate_table[get_bits(&gb, 4)];
|
---|
777 | av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
|
---|
778 | s->channels, s->samplerate);
|
---|
779 | }
|
---|
780 |
|
---|
781 | if (s->channels > MAX_CHANNELS)
|
---|
782 | {
|
---|
783 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
|
---|
784 | return -1;
|
---|
785 | }
|
---|
786 |
|
---|
787 | s->lossless = get_bits1(&gb);
|
---|
788 | if (!s->lossless)
|
---|
789 | skip_bits(&gb, 3); // XXX FIXME
|
---|
790 | s->decorrelation = get_bits(&gb, 2);
|
---|
791 |
|
---|
792 | s->downsampling = get_bits(&gb, 2);
|
---|
793 | s->num_taps = (get_bits(&gb, 5)+1)<<5;
|
---|
794 | if (get_bits1(&gb)) // XXX FIXME
|
---|
795 | av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
|
---|
796 |
|
---|
797 | s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling;
|
---|
798 | s->frame_size = s->channels*s->block_align*s->downsampling;
|
---|
799 | // avctx->frame_size = s->block_align;
|
---|
800 |
|
---|
801 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
|
---|
802 | version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
|
---|
803 |
|
---|
804 | // generate taps
|
---|
805 | s->tap_quant = av_mallocz(4* s->num_taps);
|
---|
806 | for (i = 0; i < s->num_taps; i++)
|
---|
807 | s->tap_quant[i] = (int)(sqrt(i+1));
|
---|
808 |
|
---|
809 | s->predictor_k = av_mallocz(4* s->num_taps);
|
---|
810 |
|
---|
811 | for (i = 0; i < s->channels; i++)
|
---|
812 | {
|
---|
813 | s->predictor_state[i] = av_mallocz(4* s->num_taps);
|
---|
814 | if (!s->predictor_state[i])
|
---|
815 | return -1;
|
---|
816 | }
|
---|
817 |
|
---|
818 | for (i = 0; i < s->channels; i++)
|
---|
819 | {
|
---|
820 | s->coded_samples[i] = av_mallocz(4* s->block_align);
|
---|
821 | if (!s->coded_samples[i])
|
---|
822 | return -1;
|
---|
823 | }
|
---|
824 | s->int_samples = av_mallocz(4* s->frame_size);
|
---|
825 |
|
---|
826 | return 0;
|
---|
827 | }
|
---|
828 |
|
---|
829 | static int sonic_decode_close(AVCodecContext *avctx)
|
---|
830 | {
|
---|
831 | SonicContext *s = avctx->priv_data;
|
---|
832 | int i;
|
---|
833 |
|
---|
834 | av_free(s->int_samples);
|
---|
835 | av_free(s->tap_quant);
|
---|
836 | av_free(s->predictor_k);
|
---|
837 |
|
---|
838 | for (i = 0; i < s->channels; i++)
|
---|
839 | {
|
---|
840 | av_free(s->predictor_state[i]);
|
---|
841 | av_free(s->coded_samples[i]);
|
---|
842 | }
|
---|
843 |
|
---|
844 | return 0;
|
---|
845 | }
|
---|
846 |
|
---|
847 | static int sonic_decode_frame(AVCodecContext *avctx,
|
---|
848 | void *data, int *data_size,
|
---|
849 | uint8_t *buf, int buf_size)
|
---|
850 | {
|
---|
851 | SonicContext *s = avctx->priv_data;
|
---|
852 | GetBitContext gb;
|
---|
853 | int i, quant, ch, j;
|
---|
854 | short *samples = data;
|
---|
855 |
|
---|
856 | if (buf_size == 0) return 0;
|
---|
857 |
|
---|
858 | // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
|
---|
859 |
|
---|
860 | init_get_bits(&gb, buf, buf_size*8);
|
---|
861 |
|
---|
862 | intlist_read(&gb, s->predictor_k, s->num_taps, 0);
|
---|
863 |
|
---|
864 | // dequantize
|
---|
865 | for (i = 0; i < s->num_taps; i++)
|
---|
866 | s->predictor_k[i] *= s->tap_quant[i];
|
---|
867 |
|
---|
868 | if (s->lossless)
|
---|
869 | quant = 1;
|
---|
870 | else
|
---|
871 | quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
|
---|
872 |
|
---|
873 | // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
|
---|
874 |
|
---|
875 | for (ch = 0; ch < s->channels; ch++)
|
---|
876 | {
|
---|
877 | int x = ch;
|
---|
878 |
|
---|
879 | predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
|
---|
880 |
|
---|
881 | intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
|
---|
882 |
|
---|
883 | for (i = 0; i < s->block_align; i++)
|
---|
884 | {
|
---|
885 | for (j = 0; j < s->downsampling - 1; j++)
|
---|
886 | {
|
---|
887 | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
|
---|
888 | x += s->channels;
|
---|
889 | }
|
---|
890 |
|
---|
891 | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
|
---|
892 | x += s->channels;
|
---|
893 | }
|
---|
894 |
|
---|
895 | for (i = 0; i < s->num_taps; i++)
|
---|
896 | s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
|
---|
897 | }
|
---|
898 |
|
---|
899 | switch(s->decorrelation)
|
---|
900 | {
|
---|
901 | case MID_SIDE:
|
---|
902 | for (i = 0; i < s->frame_size; i += s->channels)
|
---|
903 | {
|
---|
904 | s->int_samples[i+1] += shift(s->int_samples[i], 1);
|
---|
905 | s->int_samples[i] -= s->int_samples[i+1];
|
---|
906 | }
|
---|
907 | break;
|
---|
908 | case LEFT_SIDE:
|
---|
909 | for (i = 0; i < s->frame_size; i += s->channels)
|
---|
910 | s->int_samples[i+1] += s->int_samples[i];
|
---|
911 | break;
|
---|
912 | case RIGHT_SIDE:
|
---|
913 | for (i = 0; i < s->frame_size; i += s->channels)
|
---|
914 | s->int_samples[i] += s->int_samples[i+1];
|
---|
915 | break;
|
---|
916 | }
|
---|
917 |
|
---|
918 | if (!s->lossless)
|
---|
919 | for (i = 0; i < s->frame_size; i++)
|
---|
920 | s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
|
---|
921 |
|
---|
922 | // internal -> short
|
---|
923 | for (i = 0; i < s->frame_size; i++)
|
---|
924 | {
|
---|
925 | if (s->int_samples[i] > 32767)
|
---|
926 | samples[i] = 32767;
|
---|
927 | else if (s->int_samples[i] < -32768)
|
---|
928 | samples[i] = -32768;
|
---|
929 | else
|
---|
930 | samples[i] = s->int_samples[i];
|
---|
931 | }
|
---|
932 |
|
---|
933 | align_get_bits(&gb);
|
---|
934 |
|
---|
935 | *data_size = s->frame_size * 2;
|
---|
936 |
|
---|
937 | return (get_bits_count(&gb)+7)/8;
|
---|
938 | }
|
---|
939 |
|
---|
940 | #ifdef CONFIG_ENCODERS
|
---|
941 | AVCodec sonic_encoder = {
|
---|
942 | "sonic",
|
---|
943 | CODEC_TYPE_AUDIO,
|
---|
944 | CODEC_ID_SONIC,
|
---|
945 | sizeof(SonicContext),
|
---|
946 | sonic_encode_init,
|
---|
947 | sonic_encode_frame,
|
---|
948 | sonic_encode_close,
|
---|
949 | NULL,
|
---|
950 | };
|
---|
951 |
|
---|
952 | AVCodec sonic_ls_encoder = {
|
---|
953 | "sonicls",
|
---|
954 | CODEC_TYPE_AUDIO,
|
---|
955 | CODEC_ID_SONIC_LS,
|
---|
956 | sizeof(SonicContext),
|
---|
957 | sonic_encode_init,
|
---|
958 | sonic_encode_frame,
|
---|
959 | sonic_encode_close,
|
---|
960 | NULL,
|
---|
961 | };
|
---|
962 | #endif
|
---|
963 |
|
---|
964 | #ifdef CONFIG_DECODERS
|
---|
965 | AVCodec sonic_decoder = {
|
---|
966 | "sonic",
|
---|
967 | CODEC_TYPE_AUDIO,
|
---|
968 | CODEC_ID_SONIC,
|
---|
969 | sizeof(SonicContext),
|
---|
970 | sonic_decode_init,
|
---|
971 | NULL,
|
---|
972 | sonic_decode_close,
|
---|
973 | sonic_decode_frame,
|
---|
974 | };
|
---|
975 | #endif
|
---|