1 /*
2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 * Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 /**
26 * @file
27 * G.722 ADPCM audio encoder
28 */
29
35
36 #define FREEZE_INTERVAL 128
37
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39 problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43 undefined behavior. Using larger values is insanely slow anyway. */
45 #define MAX_TRELLIS 16
46
48 {
50 int i;
51 for (i = 0; i < 2; i++) {
55 }
56 return 0;
57 }
58
60 {
62 int ret;
63
67
69 int frontier = 1 << avctx->
trellis;
71 int i;
72 for (i = 0; i < 2; i++) {
79 }
80 }
81 }
82
84 /* validate frame size */
86 int new_frame_size;
87
89 new_frame_size = 2;
92 else
94
96 "allowed. Using %d instead of %d\n", new_frame_size,
99 }
100 } else {
101 /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
102 a common packet size for VoIP applications */
104 }
106
108 /* validate trellis */
112 "allowed. Using %d instead of %d\n", new_trellis,
115 }
116 }
117
119
120 return 0;
123 return ret;
124 }
125
127 35, 72, 110, 150, 190, 233, 276, 323,
128 370, 422, 473, 530, 587, 650, 714, 786,
129 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
130 1765, 1980, 2195, 2557, 2919
131 };
132
134 int *xlow, int *xhigh)
135 {
136 int xout[2];
140 *xlow = xout[0] + xout[1] >> 14;
141 *xhigh = xout[0] - xout[1] >> 14;
147 }
148 }
149
151 {
152 int diff = av_clip_int16(xhigh - state->s_predictor);
153 int pred = 141 * state->scale_factor >> 8;
154 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
155 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
156 }
157
159 {
160 int diff = av_clip_int16(xlow - state->s_predictor);
161 /* = diff >= 0 ? diff : -(diff + 1) */
162 int limit = diff ^ (diff >> (
sizeof(
diff)*8-1));
163 int i = 0;
164 limit = limit + 1 << 10;
165 if (limit >
low_quant[8] * state->scale_factor)
166 i = 9;
167 while (i < 29 && limit >
low_quant[i] * state->scale_factor)
168 i++;
169 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
170 }
171
174 const int16_t *samples)
175 {
176 int i, j, k;
177 int frontier = 1 << trellis;
180 int pathn[2] = {0, 0}, froze = -1;
182
183 for (i = 0; i < 2; i++) {
185 nodes_next[i] = c->
nodep_buf[i] + frontier;
187 nodes[i][0] = c->
node_buf[i] + frontier;
188 nodes[i][0]->
ssd = 0;
189 nodes[i][0]->
path = 0;
190 nodes[i][0]->state = c->
band[i];
191 }
192
193 for (i = 0; i < nb_samples >> 1; i++) {
194 int xlow, xhigh;
196 int heap_pos[2] = {0, 0};
197
198 for (j = 0; j < 2; j++) {
199 next[j] = c->
node_buf[j] + frontier*(i & 1);
200 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
201 }
202
204
205 for (j = 0; j < frontier && nodes[0][j]; j++) {
206 /* Only k >> 2 affects the future adaptive state, therefore testing
207 * small steps that don't change k >> 2 is useless, the original
208 * value from encode_low is better than them. Since we step k
209 * in steps of 4, make sure range is a multiple of 4, so that
210 * we don't miss the original value from encode_low. */
211 int range = j < frontier/2 ? 4 : 0;
213
214 int ilow =
encode_low(&cur_node->state, xlow);
215
216 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
217 int decoded, dec_diff, pos;
220
221 if (k < 0)
222 continue;
223
224 decoded = av_clip_intp2((cur_node->state.scale_factor *
226 + cur_node->state.s_predictor, 14);
227 dec_diff = xlow - decoded;
228
229 #define STORE_NODE(index, UPDATE, VALUE)\
230 ssd = cur_node->ssd + dec_diff*dec_diff;\
231 /* Check for wraparound. Using 64 bit ssd counters would \
232 * be simpler, but is slower on x86 32 bit. */\
233 if (ssd < cur_node->ssd)\
234 continue;\
235 if (heap_pos[index] < frontier) {\
236 pos = heap_pos[index]++;\
237 av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
238 node = nodes_next[index][pos] = next[index]++;\
239 node->path = pathn[index]++;\
240 } else {\
241 /* Try to replace one of the leaf nodes with the new \
242 * one, but not always testing the same leaf position */\
243 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
244 if (ssd >= nodes_next[index][pos]->ssd)\
245 continue;\
246 heap_pos[index]++;\
247 node = nodes_next[index][pos];\
248 }\
249 node->ssd = ssd;\
250 node->state = cur_node->state;\
251 UPDATE;\
252 c->paths[index][node->path].value = VALUE;\
253 c->paths[index][node->path].prev = cur_node->path;\
254 /* Sift the newly inserted node up in the heap to restore \
255 * the heap property */\
256 while (pos > 0) {\
257 int parent = (pos - 1) >> 1;\
258 if (nodes_next[index][parent]->ssd <= ssd)\
259 break;\
260 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
261 nodes_next[index][pos]);\
262 pos = parent;\
263 }
265 }
266 }
267
268 for (j = 0; j < frontier && nodes[1][j]; j++) {
269 int ihigh;
271
272 /* We don't try to get any initial guess for ihigh via
273 * encode_high - since there's only 4 possible values, test
274 * them all. Testing all of these gives a much, much larger
275 * gain than testing a larger range around ilow. */
276 for (ihigh = 0; ihigh < 4; ihigh++) {
277 int dhigh, decoded, dec_diff, pos;
280
281 dhigh = cur_node->state.scale_factor *
283 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
284 dec_diff = xhigh - decoded;
285
287 }
288 }
289
290 for (j = 0; j < 2; j++) {
292
293 if (nodes[j][0]->
ssd > (1 << 16)) {
294 for (k = 1; k < frontier && nodes[j][k]; k++)
295 nodes[j][k]->
ssd -= nodes[j][0]->
ssd;
296 nodes[j][0]->ssd = 0;
297 }
298 }
299
301 p[0] = &c->
paths[0][nodes[0][0]->path];
302 p[1] = &c->
paths[1][nodes[1][0]->path];
303 for (j = i; j > froze; j--) {
304 dst[j] = p[1]->value << 6 | p[0]->value;
307 }
308 froze = i;
309 pathn[0] = pathn[1] = 0;
310 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
311 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
312 }
313 }
314
317 for (j = i; j > froze; j--) {
318 dst[j] = p[1]->value << 6 | p[0]->value;
321 }
322 c->
band[0] = nodes[0][0]->state;
323 c->
band[1] = nodes[1][0]->state;
324 }
325
327 const int16_t *samples)
328 {
329 int xlow, xhigh, ilow, ihigh;
336 *dst = ihigh << 6 | ilow;
337 }
338
341 const int16_t *samples)
342 {
343 int i;
344 for (i = 0; i < nb_samples; i += 2)
346 }
347
350 {
352 const int16_t *samples = (
const int16_t *)frame->
data[0];
354
357 return ret;
358
360
363 else
365
366 /* handle last frame with odd frame_size */
367 if (nb_samples < frame->nb_samples) {
368 int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
370 }
371
374 *got_packet_ptr = 1;
375 return 0;
376 }
377
390 };
struct G722Context::TrellisNode ** nodep_buf[2]
This structure describes decoded (raw) audio or video data.
void(* apply_qmf)(const int16_t *prev_samples, int xout[2])
#define AV_LOG_WARNING
Something somehow does not look correct.
static av_cold int init(AVCodecContext *avctx)
struct G722Context::TrellisPath * paths[2]
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
static int encode_high(const struct G722Band *state, int xhigh)
#define PREV_SAMPLES_BUF_SIZE
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
static av_cold int g722_encode_init(AVCodecContext *avctx)
struct G722Context::TrellisNode * node_buf[2]
const int16_t ff_g722_low_inv_quant6[64]
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
int initial_padding
Audio only.
int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]
memory of past decoded samples
simple assert() macros that are a bit more flexible than ISO C assert().
AVCodec ff_adpcm_g722_encoder
const char * name
Name of the codec implementation.
struct G722Context::G722Band band[2]
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
static av_cold int g722_encode_close(AVCodecContext *avctx)
static int encode_low(const struct G722Band *state, int xlow)
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
static void error(const char *err)
static const float pred[4]
int frame_size
Number of samples per channel in an audio frame.
static const int16_t low_quant[33]
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
main external API structure.
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
common internal api header.
common internal and external API header
int prev_samples_pos
the number of values in prev_samples
int trellis
trellis RD quantization
#define STORE_NODE(index, UPDATE, VALUE)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
const int16_t ff_g722_high_inv_quant[4]
av_cold void ff_g722dsp_init(G722DSPContext *c)
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
static enum AVSampleFormat sample_fmts[]
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
#define FFSWAP(type, a, b)
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
#define AV_CH_LAYOUT_MONO
This structure stores compressed data.
int16_t scale_factor
delayed quantizer scale factor
int nb_samples
number of audio samples (per channel) described by this frame
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
#define AV_NOPTS_VALUE
Undefined timestamp value.
void * av_mallocz_array(size_t nmemb, size_t size)