1 /*
2 * AAC coefficients encoder
3 * Copyright (C) 2008-2009 Konstantin Shishkov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * AAC coefficients encoder
25 */
26
27 /***********************************
28 * TODOs:
29 * speedup quantizer selection
30 * add sane pulse detection
31 ***********************************/
32
33 #include "libavutil/libm.h" // brought forward to work around cygwin header breakage
34
36
47
50
52
53 /* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
54 * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
55 #define NOISE_SPREAD_THRESHOLD 0.9f
56
57 /* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
58 * replace low energy non zero bands */
59 #define NOISE_LAMBDA_REPLACE 1.948f
60
62
64 const float *in,
float *
quant,
const float *scaled,
65 int size,
int scale_idx,
int cb,
66 const float lambda,
const float uplim,
67 int *
bits,
float *energy);
68
69 /**
70 * Calculate rate distortion cost for quantizing with given codebook
71 *
72 * @return quantization distortion
73 */
77 const float *scaled,
int size,
int scale_idx,
78 int cb,
const float lambda,
const float uplim,
79 int *
bits,
float *energy,
int BT_ZERO,
int BT_UNSIGNED,
80 int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
81 const float ROUNDING)
82 {
87 const float CLIPPED_ESCAPE = 165140.0f*IQ;
88 float cost = 0;
89 float qenergy = 0;
90 const int dim = BT_PAIR ? 2 : 4;
91 int resbits = 0;
92 int off;
93
94 if (BT_ZERO || BT_NOISE || BT_STEREO) {
99 if (energy)
100 *energy = qenergy;
103 for (
int j = 0; j <
dim; j++)
105 }
107 }
108 if (!scaled) {
109 s->aacdsp.abs_pow34(
s->scoefs, in,
size);
111 }
113 if (BT_UNSIGNED) {
114 off = 0;
115 } else {
117 }
119 const float *vec;
120 int *quants =
s->qcoefs +
i;
121 int curidx = 0;
122 int curbits;
123 float quantized, rd = 0.0f;
124 for (
int j = 0; j <
dim; j++) {
126 curidx += quants[j] + off;
127 }
130 if (BT_UNSIGNED) {
131 for (
int j = 0; j <
dim; j++) {
133 float di;
134 if (BT_ESC && vec[j] == 64.0
f) {
//FIXME: slow
135 if (t >= CLIPPED_ESCAPE) {
136 quantized = CLIPPED_ESCAPE;
137 curbits += 21;
138 } else {
142 }
143 } else {
144 quantized = vec[j]*IQ;
145 }
146 di = t - quantized;
148 out[
i+j] = in[
i+j] >= 0 ? quantized : -quantized;
150 curbits++;
151 qenergy += quantized*quantized;
152 rd += di*di;
153 }
154 } else {
155 for (
int j = 0; j <
dim; j++) {
156 quantized = vec[j]*IQ;
157 qenergy += quantized*quantized;
159 out[
i+j] = quantized;
160 rd += (in[
i+j] - quantized)*(in[
i+j] - quantized);
161 }
162 }
163 cost += rd *
lambda + curbits;
164 resbits += curbits;
165 if (cost >= uplim)
166 return uplim;
169 if (BT_UNSIGNED)
170 for (
int j = 0; j <
dim; j++)
173 if (BT_ESC) {
174 for (int j = 0; j < 2; j++) {
178
181 }
182 }
183 }
184 }
185 }
186
189 if (energy)
190 *energy = qenergy;
191 return cost;
192 }
193
195 const float *in,
float *
quant,
const float *scaled,
196 int size,
int scale_idx,
int cb,
197 const float lambda,
const float uplim,
198 int *
bits,
float *energy) {
200 return 0.0f;
201 }
202
203 #define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING) \
204 static float quantize_and_encode_band_cost_ ## NAME( \
205 struct AACEncContext *s, \
206 PutBitContext *pb, const float *in, float *quant, \
207 const float *scaled, int size, int scale_idx, \
208 int cb, const float lambda, const float uplim, \
209 int *bits, float *energy) { \
210 return quantize_and_encode_band_cost_template( \
211 s, pb, in, quant, scaled, size, scale_idx, \
212 BT_ESC ? ESC_BT : cb, lambda, uplim, bits, energy, \
213 BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, \
214 ROUNDING); \
215 }
216
226
228 {
229 quantize_and_encode_band_cost_ZERO,
230 quantize_and_encode_band_cost_SQUAD,
231 quantize_and_encode_band_cost_SQUAD,
232 quantize_and_encode_band_cost_UQUAD,
233 quantize_and_encode_band_cost_UQUAD,
234 quantize_and_encode_band_cost_SPAIR,
235 quantize_and_encode_band_cost_SPAIR,
236 quantize_and_encode_band_cost_UPAIR,
237 quantize_and_encode_band_cost_UPAIR,
238 quantize_and_encode_band_cost_UPAIR,
239 quantize_and_encode_band_cost_UPAIR,
240 quantize_and_encode_band_cost_ESC,
242 quantize_and_encode_band_cost_NOISE,
243 quantize_and_encode_band_cost_STEREO,
244 quantize_and_encode_band_cost_STEREO,
245 };
246
248 {
249 quantize_and_encode_band_cost_ZERO,
250 quantize_and_encode_band_cost_SQUAD,
251 quantize_and_encode_band_cost_SQUAD,
252 quantize_and_encode_band_cost_UQUAD,
253 quantize_and_encode_band_cost_UQUAD,
254 quantize_and_encode_band_cost_SPAIR,
255 quantize_and_encode_band_cost_SPAIR,
256 quantize_and_encode_band_cost_UPAIR,
257 quantize_and_encode_band_cost_UPAIR,
258 quantize_and_encode_band_cost_UPAIR,
259 quantize_and_encode_band_cost_UPAIR,
260 quantize_and_encode_band_cost_ESC_RTZ,
262 quantize_and_encode_band_cost_NOISE,
263 quantize_and_encode_band_cost_STEREO,
264 quantize_and_encode_band_cost_STEREO,
265 };
266
268 const float *in,
float *
quant,
const float *scaled,
269 int size,
int scale_idx,
int cb,
270 const float lambda,
const float uplim,
271 int *
bits,
float *energy)
272 {
276 }
277
279 const float *in,
float *
out,
int size,
int scale_idx,
280 int cb,
const float lambda,
int rtz)
281 {
284 }
285
286 /**
287 * structure used in optimal codebook search
288 */
290 int prev_idx;
///< pointer to the previous path point
294
299
300 #define TRELLIS_STAGES 121
301 #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
302
304 {
306 int prevscaler_n = -255, prevscaler_i = 0;
308
312 continue;
318 if (prevscaler_n == -255)
321 }
322 }
323 }
324
326 return;
327
328 /* Clip the scalefactor indices */
332 continue;
337 }
338 }
339 }
340 }
341
344 const float lambda)
345 {
346 int start = 0,
i,
w, w2,
g;
348 float dists[128] = { 0 }, uplims[128] = { 0 };
349 float maxvals[128];
350 int fflag, minscaler;
351 int its = 0;
352 int allz = 0;
354
355 // for values above this the decoder might end up in an endless loop
356 // due to always having more bits than what can be encoded.
357 destbits =
FFMIN(destbits, 5800);
358 //some heuristic to determine initial quantizers will reduce search time
359 //determine zero bands and upper limits
361 start = 0;
363 int nz = 0;
364 float uplim = 0.0f;
366 FFPsyBand *band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
370 continue;
371 }
372 nz = 1;
373 }
374 uplims[
w*16+
g] = uplim *512;
377 if (nz)
378 minthr =
FFMIN(minthr, uplim);
379 allz |= nz;
381 }
382 }
387 continue;
388 }
390 }
391 }
392
393 if (!allz)
394 return;
395 s->aacdsp.abs_pow34(
s->scoefs, sce->
coeffs, 1024);
397
401 const float *scaled =
s->scoefs + start;
404 }
405 }
406
407 //perform two-loop search
408 //outer loop - improve quality
409 do {
410 int tbits, qstep;
411 minscaler = sce->
sf_idx[0];
412 //inner loop - quantize spectrum to fit into given number of bits
413 qstep = its ? 1 : 32;
414 do {
415 int prev = -1;
416 tbits = 0;
420 const float *coefs = sce->
coeffs + start;
421 const float *scaled =
s->scoefs + start;
424 float dist = 0.0f;
425
428 continue;
429 }
435 coefs + w2*128,
436 scaled + w2*128,
442 }
443 dists[
w*16+
g] = dist -
bits;
444 if (prev != -1) {
446 }
450 }
451 }
452 if (tbits > destbits) {
453 for (
i = 0;
i < 128;
i++)
454 if (sce->
sf_idx[
i] < 218 - qstep)
456 } else {
457 for (
i = 0;
i < 128;
i++)
458 if (sce->
sf_idx[
i] > 60 - qstep)
460 }
461 qstep >>= 1;
462 if (!qstep && tbits > destbits*1.02 && sce->
sf_idx[0] < 217)
463 qstep = 1;
464 } while (qstep);
465
466 fflag = 0;
468
472 if (dists[
w*16+
g] > uplims[
w*16+
g] && sce->
sf_idx[
w*16+
g] > 60) {
475 else //Try to make sure there is some energy in every band
477 }
481 fflag = 1;
483 }
484 }
485 its++;
486 } while (fflag && its < 10);
487 }
488
490 {
494 int bandwidth, cutoff;
495 float *PNS = &
s->scoefs[0*128], *PNS34 = &
s->scoefs[1*128];
496 float *NOR34 = &
s->scoefs[3*128];
497 uint8_t nextband[128];
498 const float lambda =
s->lambda;
499 const float freq_mult = avctx->
sample_rate*0.5f/wlen;
502 const float dist_bias =
av_clipf(4.
f * 120 / lambda, 0.25
f, 4.0
f);
503 const float pns_transient_energy_r =
FFMIN(0.7
f, lambda / 140.
f);
504
507 * (lambda / 120.f);
508
509 /** Keep this in sync with twoloop's cutoff selection */
510 float rate_bandwidth_multiplier = 1.5f;
511 int prev = -1000, prev_sf = -1;
513 ? (refbits * rate_bandwidth_multiplier * avctx->
sample_rate / 1024)
515
516 frame_bit_rate *= 1.15f;
517
519 bandwidth = avctx->
cutoff;
520 } else {
522 }
523
524 cutoff = bandwidth * 2 * wlen / avctx->
sample_rate;
525
531 int noise_sfi;
532 float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
533 float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
534 float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
535 float min_energy = -1.0f, max_energy = 0.0f;
537 const float freq = (start-wstart)*freq_mult;
542 continue;
543 }
545 band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
546 sfb_energy += band->
energy;
549 if (!w2) {
550 min_energy = max_energy = band->
energy;
551 } else {
554 }
555 }
556
557 /* Ramps down at ~8000Hz and loosens the dist threshold */
559
560 /* PNS is acceptable when all of these are true:
561 * 1. high spread energy (noise-like band)
562 * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
563 * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
564 *
565 * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
566 */
568 ((sce->
zeroes[
w*16+
g] || !sce->
band_alt[
w*16+
g]) && sfb_energy < threshold*
sqrtf(1.0
f/freq_boost)) || spread < spread_threshold ||
569 (!sce->
zeroes[
w*16+
g] && sce->
band_alt[
w*16+
g] && sfb_energy > threshold*thr_mult*freq_boost) ||
570 min_energy < pns_transient_energy_r * max_energy ) {
574 continue;
575 }
576
577 pns_tgt_energy = sfb_energy*
FFMIN(1.0
f, spread*spread);
580 if (prev != -1000) {
585 continue;
586 }
587 }
589 float band_energy,
scale, pns_senergy;
591 band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
594 PNS[
i] =
s->random_state;
595 }
596 band_energy =
s->fdsp->scalarproduct_float(PNS, PNS, sce->
ics.
swb_sizes[
g]);
599 pns_senergy =
s->fdsp->scalarproduct_float(PNS, PNS, sce->
ics.
swb_sizes[
g]);
600 pns_energy += pns_senergy;
604 NOR34,
609 /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
611 }
613 dist2 += 5;
614 } else {
615 dist2 += 9;
616 }
617 energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
618 sce->
pns_ener[
w*16+
g] = energy_ratio*pns_tgt_energy;
619 if (sce->
zeroes[
w*16+
g] || !sce->
band_alt[
w*16+
g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
622 prev = noise_sfi;
623 } else {
626 }
627 }
628 }
629 }
630
632 {
636 int bandwidth, cutoff;
637 const float lambda =
s->lambda;
638 const float freq_mult = avctx->
sample_rate*0.5f/wlen;
640 const float pns_transient_energy_r =
FFMIN(0.7
f, lambda / 140.
f);
641
644 * (lambda / 120.f);
645
646 /** Keep this in sync with twoloop's cutoff selection */
647 float rate_bandwidth_multiplier = 1.5f;
649 ? (refbits * rate_bandwidth_multiplier * avctx->
sample_rate / 1024)
651
652 frame_bit_rate *= 1.15f;
653
655 bandwidth = avctx->
cutoff;
656 } else {
658 }
659
660 cutoff = bandwidth * 2 * wlen / avctx->
sample_rate;
661
665 float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
666 float min_energy = -1.0f, max_energy = 0.0f;
668 const float freq = start*freq_mult;
670 if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
672 continue;
673 }
675 band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
676 sfb_energy += band->
energy;
679 if (!w2) {
680 min_energy = max_energy = band->
energy;
681 } else {
684 }
685 }
686
687 /* PNS is acceptable when all of these are true:
688 * 1. high spread energy (noise-like band)
689 * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
690 * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
691 */
693 if (sfb_energy < threshold*
sqrtf(1.5
f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
695 } else {
697 }
698 }
699 }
700 }
701
703 {
704 int start = 0,
i,
w, w2,
g, sid_sf_boost, prev_mid, prev_side;
705 uint8_t nextband0[128], nextband1[128];
706 float *
M =
s->scoefs + 128*0, *
S =
s->scoefs + 128*1;
707 float *L34 =
s->scoefs + 128*2, *R34 =
s->scoefs + 128*3;
708 float *M34 =
s->scoefs + 128*4, *S34 =
s->scoefs + 128*5;
709 const float lambda =
s->lambda;
710 const float mslambda =
FFMIN(1.0
f, lambda / 120.
f);
714 return;
715
716 /** Scout out next nonzero bands */
719
720 prev_mid = sce0->
sf_idx[0];
721 prev_side = sce1->
sf_idx[0];
723 start = 0;
729 float Mmax = 0.0f, Smax = 0.0f;
730
731 /* Must compute mid/side SF and book for the whole window group */
735 + sce1->
coeffs[start+(
w+w2)*128+
i]) * 0.5;
738 }
742 Mmax =
FFMAX(Mmax, M34[
i]);
743 Smax =
FFMAX(Smax, S34[
i]);
744 }
745 }
746
747 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
748 float dist1 = 0.0f, dist2 = 0.0f;
750 int minidx;
751 int mididx, sididx;
752 int midcb, sidcb;
753
760 /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
761 continue;
762 }
763
766
767 /* No CB can be zero */
768 midcb =
FFMAX(1,midcb);
769 sidcb =
FFMAX(1,sidcb);
770
772 FFPsyBand *band0 = &
s->psy.ch[
s->cur_channel+0].psy_bands[(
w+w2)*16+
g];
773 FFPsyBand *band1 = &
s->psy.ch[
s->cur_channel+1].psy_bands[(
w+w2)*16+
g];
778 + sce1->
coeffs[start+(
w+w2)*128+
i]) * 0.5;
781 }
782
788 L34,
794 R34,
800 M34,
802 mididx,
803 midcb,
806 S34,
808 sididx,
809 sidcb,
810 mslambda / (minthr * bmax + FLT_MIN),
INFINITY, &b4,
NULL);
815 }
824 /* ms_mask unneeded, and it confuses some decoders */
826 }
827 break;
828 }
else if (
B1 >
B0) {
829 /* More boost won't fix this */
830 break;
831 }
832 }
833 }
839 }
840 }
841 }
842
856 },
869 },
870 };