1 /*
2 * Real Audio 1.0 (14.4K) encoder
3 * Copyright (c) 2010 Francesco Lavra <francescolavra@interfree.it>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * Real Audio 1.0 (14.4K) encoder
25 * @author Francesco Lavra <francescolavra@interfree.it>
26 */
27
29
37
39 {
43 return 0;
44 }
45
46
48 {
50 int ret;
51
55 return -1;
56 }
67 if (ret < 0)
68 goto error;
69
71
72 return 0;
73 error:
75 return ret;
76 }
77
78
79 /**
80 * Quantize a value by searching a sorted table for the element with the
81 * nearest value
82 *
83 * @param value value to quantize
84 * @param table array containing the quantization table
85 * @param size size of the quantization table
86 * @return index of the quantization table corresponding to the element with the
87 * nearest value
88 */
90 {
91 unsigned int low = 0, high = size - 1;
92
93 while (1) {
94 int index = (low + high) >> 1;
96
97 if (index == low)
98 return table[high] + error > value ? low : high;
99 if (error > 0) {
101 } else {
103 }
104 }
105 }
106
107
108 /**
109 * Orthogonalize a vector to another vector
110 *
111 * @param v vector to orthogonalize
112 * @param u vector against which orthogonalization is performed
113 */
115 {
116 int i;
117 float num = 0, den = 0;
118
120 num += v[i] * u[i];
121 den += u[i] * u[i];
122 }
123 num /= den;
125 v[i] -= num * u[i];
126 }
127
128
129 /**
130 * Calculate match score and gain of an LPC-filtered vector with respect to
131 * input data, possibly othogonalizing it to up to 2 other vectors
132 *
133 * @param work array used to calculate the filtered vector
134 * @param coefs coefficients of the LPC filter
135 * @param vect original vector
136 * @param ortho1 first vector against which orthogonalization is performed
137 * @param ortho2 second vector against which orthogonalization is performed
138 * @param data input data
139 * @param score pointer to variable where match score is returned
140 * @param gain pointer to variable where gain is returned
141 */
143 const float *ortho1, const float *ortho2,
144 const float *
data,
float *score,
float *gain)
145 {
147 int i;
148
150 if (ortho1)
152 if (ortho2)
154 c = g = 0;
156 g += work[i] * work[i];
157 c += data[i] * work[i];
158 }
159 if (c <= 0) {
160 *score = 0;
161 return;
162 }
165 }
166
167
168 /**
169 * Create a vector from the adaptive codebook at a given lag value
170 *
171 * @param vect array where vector is stored
172 * @param cb adaptive codebook
173 * @param lag lag value
174 */
176 {
177 int i;
178
181 vect[i] = cb[i];
184 vect[lag + i] = cb[i];
185 }
186
187
188 /**
189 * Search the adaptive codebook for the best entry and gain and remove its
190 * contribution from input data
191 *
192 * @param adapt_cb array from which the adaptive codebook is extracted
193 * @param work array used to calculate LPC-filtered vectors
194 * @param coefs coefficients of the LPC filter
195 * @param data input data
196 * @return index of the best entry of the adaptive codebook
197 */
199 const float *coefs,
float *
data)
200 {
202 float score, gain, best_score,
av_uninit(best_gain);
204
205 gain = best_score = 0;
209 if (score > best_score) {
210 best_score = score;
211 best_vect = i;
212 best_gain = gain;
213 }
214 }
215 if (!best_score)
216 return 0;
217
218 /**
219 * Re-calculate the filtered vector from the vector with maximum match score
220 * and remove its contribution from input data.
221 */
225 data[i] -= best_gain * work[i];
226 return best_vect - BLOCKSIZE / 2 + 1;
227 }
228
229
230 /**
231 * Find the best vector of a fixed codebook by applying an LPC filter to
232 * codebook entries, possibly othogonalizing them to up to 2 other vectors and
233 * matching the results with input data
234 *
235 * @param work array used to calculate the filtered vectors
236 * @param coefs coefficients of the LPC filter
237 * @param cb fixed codebook
238 * @param ortho1 first vector against which orthogonalization is performed
239 * @param ortho2 second vector against which orthogonalization is performed
240 * @param data input data
241 * @param idx pointer to variable where the index of the best codebook entry is
242 * returned
243 * @param gain pointer to variable where the gain of the best codebook entry is
244 * returned
245 */
248 const float *ortho2,
float *
data,
int *idx,
249 float *gain)
250 {
251 int i, j;
252 float g, score, best_score;
254
255 *idx = *gain = best_score = 0;
260 if (score > best_score) {
261 best_score = score;
262 *idx = i;
264 }
265 }
266 }
267
268
269 /**
270 * Search the two fixed codebooks for the best entry and gain
271 *
272 * @param work array used to calculate LPC-filtered vectors
273 * @param coefs coefficients of the LPC filter
274 * @param data input data
275 * @param cba_idx index of the best entry of the adaptive codebook
276 * @param cb1_idx pointer to variable where the index of the best entry of the
277 * first fixed codebook is returned
278 * @param cb2_idx pointer to variable where the index of the best entry of the
279 * second fixed codebook is returned
280 */
282 int cba_idx, int *cb1_idx, int *cb2_idx)
283 {
284 int i, ortho_cb1;
285 float gain;
288
289 /**
290 * The filtered vector from the adaptive codebook can be retrieved from
291 * work, because this function is called just after adaptive_cb_search().
292 */
293 if (cba_idx)
294 memcpy(cba_vect, work, sizeof(cba_vect));
295
297 data, cb1_idx, &gain);
298
299 /**
300 * Re-calculate the filtered vector from the vector with maximum match score
301 * and remove its contribution from input data.
302 */
303 if (gain) {
307 if (cba_idx)
310 data[i] -= gain * work[i];
311 memcpy(cb1_vect, work, sizeof(cb1_vect));
312 ortho_cb1 = 1;
313 } else
314 ortho_cb1 = 0;
315
317 ortho_cb1 ? cb1_vect : NULL, data, cb2_idx, &gain);
318 }
319
320
321 /**
322 * Encode a subblock of the current frame
323 *
324 * @param ractx encoder context
325 * @param sblock_data input data of the subblock
326 * @param lpc_coefs coefficients of the LPC filter
327 * @param rms RMS of the reflection coefficients
328 * @param pb pointer to PutBitContext of the current frame
329 */
331 const int16_t *sblock_data,
332 const int16_t *lpc_coefs, unsigned int rms,
334 {
338 int cba_idx, cb1_idx, cb2_idx, gain;
342 float error, best_error;
343
346 coefs[i] = lpc_coefs[i] * (1/4096.0);
347 }
348
349 /**
350 * Calculate the zero-input response of the LPC filter and subtract it from
351 * input data.
352 */
354 LPC_ORDER);
356 zero[i] = work[LPC_ORDER + i];
357 data[i] = sblock_data[i] - zero[i];
358 }
359
360 /**
361 * Codebook search is performed without taking into account the contribution
362 * of the previous subblock, since it has been just subtracted from input
363 * data.
364 */
365 memset(work, 0, LPC_ORDER * sizeof(*work));
366
368 data);
369 if (cba_idx) {
370 /**
371 * The filtered vector from the adaptive codebook can be retrieved from
372 * work, see implementation of adaptive_cb_search().
373 */
374 memcpy(cba, work + LPC_ORDER, sizeof(cba));
375
378 }
379 fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
383 }
385 LPC_ORDER);
386 memcpy(cb1, work + LPC_ORDER, sizeof(cb1));
389 LPC_ORDER);
390 memcpy(cb2, work + LPC_ORDER, sizeof(cb2));
392 best_error = FLT_MAX;
393 gain = 0;
394 for (n = 0; n < 256; n++) {
396 (1/4096.0);
398 (1/4096.0);
399 error = 0;
400 if (cba_idx) {
402 (1/4096.0);
404 data[i] = zero[i] + g[0] * cba[i] + g[1] * cb1[i] +
405 g[2] * cb2[i];
406 error += (data[i] - sblock_data[i]) *
407 (data[i] - sblock_data[i]);
408 }
409 } else {
411 data[i] = zero[i] + g[1] * cb1[i] + g[2] * cb2[i];
412 error += (data[i] - sblock_data[i]) *
413 (data[i] - sblock_data[i]);
414 }
415 }
416 if (error < best_error) {
417 best_error = error;
419 }
420 }
426 gain);
427 }
428
429
432 {
441 int lpc_refl[
LPC_ORDER];
/**< reflection coefficients of the frame */
442 unsigned int refl_rms[
NBLOCKS];
/**< RMS of the reflection coefficients */
443 const int16_t *samples = frame ? (
const int16_t *)frame->
data[0] :
NULL;
444 int energy = 0;
445 int i, idx, ret;
446
448 return 0;
449
451 return ret;
452
453 /**
454 * Since the LPC coefficients are calculated on a frame centered over the
455 * fourth subframe, to encode a given frame, data from the next frame is
456 * needed. In each call to this function, the previous frame (whose data are
457 * saved in the encoder context) is encoded, and data from the current frame
458 * are saved in the encoder context to be used in the next function call.
459 */
462 energy += (lpc_data[i] * lpc_data[i]) >> 4;
463 }
464 if (frame) {
465 int j;
467 lpc_data[i] = samples[j] >> 2;
468 energy += (lpc_data[i] * lpc_data[i]) >> 4;
469 }
470 }
472 memset(&lpc_data[i], 0, (
NBLOCKS * BLOCKSIZE - i) *
sizeof(*lpc_data));
474 32)];
475
480 block_coefs[
NBLOCKS - 1][i] = -(lpc_coefs[LPC_ORDER - 1][i] <<
481 (12 - shift[LPC_ORDER - 1]));
482
483 /**
484 * TODO: apply perceptual weighting of the input speech through bandwidth
485 * expansion of the LPC filter.
486 */
487
489 /**
490 * The filter is unstable: use the coefficients of the previous frame.
491 */
494 /* the filter is still unstable. set reflection coeffs to zero. */
495 memset(lpc_refl, 0, sizeof(lpc_refl));
496 }
497 }
503 }
507 refl_rms[1] =
ff_interp(ractx, block_coefs[1], 2,
508 energy <= ractx->old_energy,
510 refl_rms[2] =
ff_interp(ractx, block_coefs[2], 3, 0, energy);
516 block_coefs[i], refl_rms[i], &pb);
521
522 /* copy input samples to current block for processing in next call */
523 i = 0;
524 if (frame) {
527
529 return ret;
530 } else
533 (NBLOCKS * BLOCKSIZE - i) *
sizeof(*ractx->
curr_block));
534
535 /* Get the next frame pts/duration */
538
540 *got_packet_ptr = 1;
541 return 0;
542 }
543
544
557 .supported_samplerates = (const int[]){ 8000, 0 },
559 };
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int64_t *duration)
Remove frame(s) from the queue.
unsigned int lpc_tables[2][10]
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
static int shift(int a, int b)
int ff_t_sqrt(unsigned int x)
Evaluate sqrt(x << 24).
This structure describes decoded (raw) audio or video data.
const int16_t *const ff_lpc_refl_cb[10]
ptrdiff_t const GLvoid * data
static int adaptive_cb_search(const int16_t *adapt_cb, float *work, const float *coefs, float *data)
Search the adaptive codebook for the best entry and gain and remove its contribution from input data...
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
int64_t bit_rate
the average bitrate
int ff_lpc_calc_coefs(LPCContext *s, const int32_t *samples, int blocksize, int min_order, int max_order, int precision, int32_t coefs[][MAX_LPC_ORDER], int *shift, enum FFLPCType lpc_type, int lpc_passes, int omethod, int max_shift, int zero_shift)
Calculate LPC coefficients for multiple orders.
static av_cold int init(AVCodecContext *avctx)
static int quantize(int value, const int16_t *table, unsigned int size)
Quantize a value by searching a sorted table for the element with the nearest value.
static av_cold int ra144_encode_close(AVCodecContext *avctx)
int16_t adapt_cb[146+2]
Adaptive codebook, its size is two units bigger to avoid a buffer overflow.
static void orthogonalize(float *v, const float *u)
Orthogonalize a vector to another vector.
av_cold void ff_audiodsp_init(AudioDSPContext *c)
static void get_match_score(float *work, const float *coefs, float *vect, const float *ortho1, const float *ortho2, const float *data, float *score, float *gain)
Calculate match score and gain of an LPC-filtered vector with respect to input data, possibly othogonalizing it to up to 2 other vectors.
AVCodec ff_ra_144_encoder
static int ra144_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
#define NBLOCKS
number of subblocks within a block
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
static double cb(void *priv, double x, double y)
unsigned int lpc_refl_rms[2]
av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
unsigned int ff_rms(const int *data)
#define FIXED_CB_SIZE
size of fixed codebooks
const uint16_t ff_cb2_base[128]
void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, int cba_idx, int cb1_idx, int cb2_idx, int gval, int gain)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
unsigned int * lpc_coef[2]
LPC coefficients: lpc_coef[0] is the coefficients of the current frame and lpc_coef[1] of the previou...
static const int sizes[][2]
const int8_t ff_cb1_vects[128][40]
static const struct endianess table[]
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
int initial_padding
Audio only.
const char * name
Name of the codec implementation.
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
static void fixed_cb_search(float *work, const float *coefs, float *data, int cba_idx, int *cb1_idx, int *cb2_idx)
Search the two fixed codebooks for the best entry and gain.
av_cold void ff_lpc_end(LPCContext *s)
Uninitialize LPCContext.
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold, int energy)
GLsizei GLboolean const GLfloat * value
void ff_copy_and_dup(int16_t *target, const int16_t *source, int offset)
Copy the last offset values of *source to *target.
void ff_int_to_int16(int16_t *out, const int *inp)
const int16_t ff_gain_val_tab[256][3]
int frame_size
Number of samples per channel in an audio frame.
#define BLOCKSIZE
subblock size in 16-bit words
void ff_eval_coefs(int *coefs, const int *refl)
Evaluate the LPC filter coefficients from the reflection coefficients.
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
main external API structure.
static av_cold int ra144_encode_init(AVCodecContext *avctx)
Levinson-Durbin recursion.
int16_t buffer_a[FFALIGN(BLOCKSIZE, 16)]
int ff_eval_refl(int *refl, const int16_t *coefs, AVCodecContext *avctx)
Evaluate the reflection coefficients from the filter coefficients.
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
#define BUFFERSIZE
the size of the adaptive codebook
unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy)
av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order, enum FFLPCType lpc_type)
Initialize LPCContext.
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
static void find_best_vect(float *work, const float *coefs, const int8_t cb[][BLOCKSIZE], const float *ortho1, const float *ortho2, float *data, int *idx, float *gain)
Find the best vector of a fixed codebook by applying an LPC filter to codebook entries, possibly othogonalizing them to up to 2 other vectors and matching the results with input data.
const int8_t ff_cb2_vects[128][40]
common internal api header.
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
unsigned int old_energy
previous frame energy
const int16_t ff_energy_tab[32]
const uint8_t ff_gain_exp_tab[256]
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
int16_t curr_sblock[50]
The current subblock padded by the last 10 values of the previous one.
int channels
number of audio channels
void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
static enum AVSampleFormat sample_fmts[]
const uint16_t ff_cb1_base[128]
int16_t curr_block[NBLOCKS *BLOCKSIZE]
#define FFSWAP(type, a, b)
int ff_irms(AudioDSPContext *adsp, const int16_t *data)
inverse root mean square
#define AV_CH_LAYOUT_MONO
static void create_adapt_vect(float *vect, const int16_t *cb, int lag)
Create a vector from the adaptive codebook at a given lag value.
This structure stores compressed data.
int nb_samples
number of audio samples (per channel) described by this frame
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
static void ra144_encode_subblock(RA144Context *ractx, const int16_t *sblock_data, const int16_t *lpc_coefs, unsigned int rms, PutBitContext *pb)
Encode a subblock of the current frame.