1 /*
2 * AAC encoder utilities
3 * Copyright (C) 2015 Rostislav Pehlivanov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * AAC encoder utilities
25 * @author Rostislav Pehlivanov ( atomnuker gmail com )
26 */
27
28 #ifndef AVCODEC_AACENC_UTILS_H
29 #define AVCODEC_AACENC_UTILS_H
30
34
35 #define ROUND_STANDARD 0.4054f
36 #define ROUND_TO_ZERO 0.1054f
37 #define C_QUANT 0.4054f
38
40 {
41 int i;
42 for (i = 0; i <
size; i++) {
43 float a = fabsf(in[i]);
44 out[i] = sqrtf(a * sqrtf(a));
45 }
46 }
47
49 {
50 return sqrtf(a * sqrtf(a));
51 }
52
53 /**
54 * Quantize one coefficient.
55 * @return absolute value of the quantized coefficient
56 * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
57 */
58 static inline int quant(
float coef,
const float Q,
const float rounding)
59 {
61 return sqrtf(a * sqrtf(a)) + rounding;
62 }
63
65 int size,
float Q34,
int is_signed,
int maxval,
66 const float rounding)
67 {
68 int i;
69 double qc;
70 for (i = 0; i <
size; i++) {
71 qc = scaled[i] * Q34;
72 out[i] = (int)
FFMIN(qc + rounding, (
double)maxval);
73 if (is_signed && in[i] < 0.0f) {
74 out[i] = -out[i];
75 }
76 }
77 }
78
79 static inline float find_max_val(
int group_len,
int swb_size,
const float *scaled)
80 {
81 float maxval = 0.0f;
82 int w2, i;
83 for (w2 = 0; w2 < group_len; w2++) {
84 for (i = 0; i < swb_size; i++) {
85 maxval =
FFMAX(maxval, scaled[w2*128+i]);
86 }
87 }
88 return maxval;
89 }
90
92 {
94 float Q34 = sqrtf(Q * sqrtf(Q));
96 qmaxval = maxval * Q34 +
C_QUANT;
98 cb = 11;
99 else
102 }
103
105 const float *scaled, float nzslope) {
106 const float iswb_size = 1.0f / swb_size;
107 const float iswb_sizem1 = 1.0f / (swb_size - 1);
108 const float ethresh = thresh;
109 float form = 0.0f,
weight = 0.0f;
110 int w2, i;
111 for (w2 = 0; w2 < group_len; w2++) {
112 float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f;
113 float nzl = 0;
114 for (i = 0; i < swb_size; i++) {
115 float s = fabsf(scaled[w2*128+i]);
116 maxval =
FFMAX(maxval, s);
119 /* We really don't want a hard non-zero-line count, since
120 * even below-threshold lines do add up towards band spectral power.
121 * So, fall steeply towards zero, but smoothly
122 */
123 if (s >= ethresh) {
124 nzl += 1.0f;
125 } else {
126 nzl +=
powf(s / ethresh, nzslope);
127 }
128 }
129 if (e2 > thresh) {
130 float frm;
131 e *= iswb_size;
132
133 /** compute variance */
134 for (i = 0; i < swb_size; i++) {
135 float d = fabsf(scaled[w2*128+i]) - e;
136 var += d*d;
137 }
138 var = sqrtf(var * iswb_sizem1);
139
140 e2 *= iswb_size;
141 frm = e /
FFMIN(e+4*var,maxval);
142 form += e2 * sqrtf(frm) /
FFMAX(0.5f,nzl);
144 }
145 }
148 } else {
149 return 1.0f;
150 }
151 }
152
153 /** Return the minimum scalefactor where the quantized coef does not clip. */
155 {
157 }
158
159 /** Return the maximum scalefactor where the quantized coef is not zero. */
161 {
163 }
164
165 /*
166 * Returns the closest possible index to an array of float values, given a value.
167 */
169 {
172 for (i = 0; i < num; i++) {
173 float error = (val - arr[i])*(val - arr[i]);
174 if (error < quant_min_err) {
175 quant_min_err = error;
176 index = i;
177 }
178 }
180 }
181
182 /**
183 * approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
184 */
186 {
187 return 0.001f + 0.0035f * (b*b*
b) / (15.5f*15.5f*15.5f);
188 }
189
190 /*
191 * Compute a nextband map to be used with SF delta constraint utilities.
192 * The nextband array should contain 128 elements, and positions that don't
193 * map to valid, nonzero bands of the form w*16+g (with w being the initial
194 * window of the window group, only) are left indetermined.
195 */
197 {
198 unsigned char prevband = 0;
200 /** Just a safe default */
201 for (g = 0; g < 128; g++)
202 nextband[g] = g;
203
204 /** Now really navigate the nonzero band chain */
208 prevband = nextband[prevband] = w*16+
g;
209 }
210 }
211 nextband[prevband] = prevband; /* terminate */
212 }
213
214 /*
215 * Updates nextband to reflect a removed band (equivalent to
216 * calling ff_init_nextband_map after marking a band as zero)
217 */
219 {
220 nextband[prevband] = nextband[
band];
221 }
222
223 /*
224 * Checks whether the specified band could be removed without inducing
225 * scalefactor delta that violates SF delta encoding constraints.
226 * prev_sf has to be the scalefactor of the previous nonzero, nonspecial
227 * band, in encoding order, or negative if there was no such band.
228 */
231 {
232 return prev_sf >= 0
235 }
236
237 /*
238 * Checks whether the specified band's scalefactor could be replaced
239 * with another one without violating SF delta encoding constraints.
240 * prev_sf has to be the scalefactor of the previous nonzero, nonsepcial
241 * band, in encoding order, or negative if there was no such band.
242 */
244 const uint8_t *nextband,
int prev_sf,
int new_sf,
int band)
245 {
250 }
251
252 #define ERROR_IF(cond, ...) \
253 if (cond) { \
254 av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
255 return AVERROR(EINVAL); \
256 }
257
258 #define WARN_IF(cond, ...) \
259 if (cond) { \
260 av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
261 }
262
263 #endif /* AVCODEC_AACENC_UTILS_H */
const char const char void * val
Band types following are encoded differently from others.
static void ff_nextband_remove(uint8_t *nextband, int prevband, int band)
static void abs_pow34_v(float *out, const float *in, const int size)
#define SCALE_MAX_DIFF
maximum scalefactor difference allowed by standard
static av_always_inline float bval2bmax(float b)
approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
static int ff_sfdelta_can_remove_band(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int band)
static uint8_t coef2maxsf(float coef)
Return the maximum scalefactor where the quantized coef is not zero.
static int ff_sfdelta_can_replace(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int new_sf, int band)
static double cb(void *priv, double x, double y)
static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope)
static int quant(float coef, const float Q, const float rounding)
Quantize one coefficient.
int num_swb
number of scalefactor window bands
float ff_aac_pow2sf_tab[428]
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
static uint8_t coef2minsf(float coef)
Return the minimum scalefactor where the quantized coef does not clip.
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
AAC definitions and structures.
#define FF_ARRAY_ELEMS(a)
static int quant_array_idx(const float val, const float *arr, const int num)
static int find_min_book(float maxval, int sf)
static void quantize_bands(int *out, const float *in, const float *scaled, int size, float Q34, int is_signed, int maxval, const float rounding)
static const unsigned char aac_maxval_cb[]
IndividualChannelStream ics
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
static float pos_pow34(float a)
static int weight(int i, int blen, int offset)
uint8_t zeroes[128]
band is not coded (used by encoder)
int sf_idx[128]
scalefactor indices (used by encoder)
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Single Channel Element - used for both SCE and LFE elements.
enum BandType band_type[128]
band types
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
static float find_max_val(int group_len, int swb_size, const float *scaled)