1 /*
2 * AAC encoder twoloop coder
3 * Copyright (C) 2008-2009 Konstantin Shishkov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * AAC encoder twoloop coder
25 * @author Konstantin Shishkov, Claudio Freire
26 */
27
28 /**
29 * This file contains a template for the twoloop coder function.
30 * It needs to be provided, externally, as an already included declaration,
31 * the following functions from aacenc_quantization/util.h. They're not included
32 * explicitly here to make it possible to provide alternative implementations:
33 * - quantize_band_cost
34 * - abs_pow34_v
35 * - find_max_val
36 * - find_min_book
37 * - find_form_factor
38 */
39
40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
41 #define AVCODEC_AACCODER_TWOLOOP_H
42
52
53 /** Frequency in Hz for lower limit of noise substitution **/
54 #define NOISE_LOW_LIMIT 4000
55
56 /* Reflects the cost to change codebooks */
58 {
60 }
61
62 /**
63 * two-loop quantizers search taken from ISO 13818-7 Appendix C
64 */
68 const float lambda)
69 {
70 int start = 0,
i,
w, w2,
g, recomprd;
73 * (lambda / 120.f);
74 int refbits = destbits;
75 int toomanybits, toofewbits;
76 char nzs[128];
77 uint8_t nextband[128];
78 int maxsf[128], minsf[128];
79 float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
80 float maxvals[128], spread_thr_r[128];
81 float min_spread_thr_r, max_spread_thr_r;
82
83 /**
84 * rdlambda controls the maximum tolerated distortion. Twoloop
85 * will keep iterating until it fails to lower it or it reaches
86 * ulimit * rdlambda. Keeping it low increases quality on difficult
87 * signals, but lower it too much, and bits will be taken from weak
88 * signals, creating "holes". A balance is necessary.
89 * rdmax and rdmin specify the relative deviation from rdlambda
90 * allowed for tonality compensation
91 */
92 float rdlambda =
av_clipf(2.0
f * 120.
f / lambda, 0.0625
f, 16.0
f);
93 const float nzslope = 1.5f;
94 float rdmin = 0.03125f;
95 float rdmax = 1.0f;
96
97 /**
98 * sfoffs controls an offset of optmium allocation that will be
99 * applied based on lambda. Keep it real and modest, the loop
100 * will take care of the rest, this just accelerates convergence
101 */
103
104 int fflag, minscaler, nminscaler;
105 int its = 0;
106 int maxits = 30;
107 int allz = 0;
108 int tbits;
109 int cutoff = 1024;
110 int pns_start_pos;
111 int prev;
112
113 /**
114 * zeroscale controls a multiplier of the threshold, if band energy
115 * is below this, a zero is forced. Keep it lower than 1, unless
116 * low lambda is used, because energy < threshold doesn't mean there's
117 * no audible signal outright, it's just energy. Also make it rise
118 * slower than rdlambda, as rdscale has due compensation with
119 * noisy band depriorization below, whereas zeroing logic is rather dumb
120 */
121 float zeroscale;
122 if (lambda > 120.
f) {
124 } else {
125 zeroscale = 1.f;
126 }
127
128 if (
s->psy.bitres.alloc >= 0) {
129 /**
130 * Psy granted us extra bits to use, from the reservoire
131 * adjust for lambda except what psy already did
132 */
133 destbits =
s->psy.bitres.alloc
135 }
136
138 /**
139 * Constant Q-scale doesn't compensate MS coding on its own
140 * No need to be overly precise, this only controls RD
141 * adjustment CB limits when going overboard
142 */
143 if (
s->options.mid_side &&
s->cur_type ==
TYPE_CPE)
144 destbits *= 2;
145
146 /**
147 * When using a constant Q-scale, don't adjust bits, just use RD
148 * Don't let it go overboard, though... 8x psy target is enough
149 */
150 toomanybits = 5800;
151 toofewbits = destbits / 16;
152
153 /** Don't offset scalers, just RD */
155 rdlambda =
sqrtf(rdlambda);
156
157 /** search further */
158 maxits *= 2;
159 } else {
160 /* When using ABR, be strict, but a reasonable leeway is
161 * critical to allow RC to smoothly track desired bitrate
162 * without sudden quality drops that cause audible artifacts.
163 * Symmetry is also desirable, to avoid systematic bias.
164 */
165 toomanybits = destbits + destbits/8;
166 toofewbits = destbits - destbits/8;
167
168 sfoffs = 0;
169 rdlambda =
sqrtf(rdlambda);
170 }
171
172 /** and zero out above cutoff frequency */
173 {
175 int bandwidth;
176
177 /**
178 * Scale, psy gives us constant quality, this LP only scales
179 * bitrate by lambda, so we save bits on subjectively unimportant HF
180 * rather than increase quantization noise. Adjust nominal bitrate
181 * to effective bitrate according to encoding parameters,
182 * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
183 */
184 float rate_bandwidth_multiplier = 1.5f;
186 ? (refbits * rate_bandwidth_multiplier * avctx->
sample_rate / 1024)
188
189 /** Compensate for extensions that increase efficiency */
190 if (
s->options.pns ||
s->options.intensity_stereo)
191 frame_bit_rate *= 1.15f;
192
194 bandwidth = avctx->
cutoff;
195 } else {
197 s->psy.cutoff = bandwidth;
198 }
199
200 cutoff = bandwidth * 2 * wlen / avctx->
sample_rate;
202 }
203
204 /**
205 * for values above this the decoder might end up in an endless loop
206 * due to always having more bits than what can be encoded.
207 */
208 destbits =
FFMIN(destbits, 5800);
209 toomanybits =
FFMIN(toomanybits, 5800);
210 toofewbits =
FFMIN(toofewbits, 5800);
211 /**
212 * XXX: some heuristic to determine initial quantizers will reduce search time
213 * determine zero bands and upper distortion limits
214 */
215 min_spread_thr_r = -1;
216 max_spread_thr_r = -1;
219 int nz = 0;
220 float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
222 FFPsyBand *band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
225 continue;
226 }
227 nz = 1;
228 }
229 if (!nz) {
230 uplim = 0.0f;
231 } else {
232 nz = 0;
234 FFPsyBand *band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
236 continue;
240 nz++;
241 }
242 }
243 uplims[
w*16+
g] = uplim;
244 energies[
w*16+
g] = energy;
247 allz |= nz;
249 spread_thr_r[
w*16+
g] = energy * nz / (uplim * spread);
250 if (min_spread_thr_r < 0) {
251 min_spread_thr_r = max_spread_thr_r = spread_thr_r[
w*16+
g];
252 } else {
253 min_spread_thr_r =
FFMIN(min_spread_thr_r, spread_thr_r[
w*16+
g]);
254 max_spread_thr_r =
FFMAX(max_spread_thr_r, spread_thr_r[
w*16+
g]);
255 }
256 }
257 }
258 }
259
260 /** Compute initial scalers */
261 minscaler = 65535;
266 continue;
267 }
268 /**
269 * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
270 * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
271 * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
272 * more robust.
273 */
277 + sfoffs,
280 }
281 }
282
283 /** Clip */
289
290 if (!allz)
291 return;
292 s->aacdsp.abs_pow34(
s->scoefs, sce->
coeffs, 1024);
294
295 for (
i = 0;
i <
sizeof(minsf) /
sizeof(minsf[0]); ++
i)
300 const float *scaled =
s->scoefs + start;
301 int minsfidx;
303 if (maxvals[
w*16+
g] > 0) {
306 minsf[(
w+w2)*16+
g] = minsfidx;
307 }
309 }
310 }
311
312 /**
313 * Scale uplims to match rate distortion to quality
314 * bu applying noisy band depriorization and tonal band priorization.
315 * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
316 * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
317 * rate distortion requirements.
318 */
319 memcpy(euplims, uplims, sizeof(euplims));
321 /** psy already priorizes transients to some extent */
331 nzslope * cleanup_factor);
332 energy2uplim *= de_psy_factor;
334 /** In ABR, we need to priorize less and let rate control do its thing */
335 energy2uplim =
sqrtf(energy2uplim);
336 }
337 energy2uplim =
FFMAX(0.015625
f,
FFMIN(1.0
f, energy2uplim));
338 uplims[
w*16+
g] *=
av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
340
345 2.0f);
346 energy2uplim *= de_psy_factor;
348 /** In ABR, we need to priorize less and let rate control do its thing */
349 energy2uplim =
sqrtf(energy2uplim);
350 }
351 energy2uplim =
FFMAX(0.015625
f,
FFMIN(1.0
f, energy2uplim));
353 0.5f, 1.0f);
354 }
356 }
357 }
358
359 for (
i = 0;
i <
sizeof(maxsf) /
sizeof(maxsf[0]); ++
i)
361
362 //perform two-loop search
363 //outer loop - improve quality
364 do {
365 //inner loop - quantize spectrum to fit into given number of bits
366 int overdist;
367 int qstep = its ? 1 : 32;
368 do {
369 int changed = 0;
370 prev = -1;
371 recomprd = 0;
372 tbits = 0;
376 const float *coefs = &sce->
coeffs[start];
377 const float *scaled = &
s->scoefs[start];
380 float dist = 0.0f;
381 float qenergy = 0.0f;
382
386 /** PNS isn't free */
388 }
389 continue;
390 }
394 float sqenergy;
396 scaled + w2*128,
400 1.0f,
403 0);
405 qenergy += sqenergy;
406 }
407 dists[
w*16+
g] = dist -
bits;
408 qenergies[
w*16+
g] = qenergy;
409 if (prev != -1) {
412 }
416 }
417 }
418 if (tbits > toomanybits) {
419 recomprd = 1;
420 for (
i = 0;
i < 128;
i++) {
424 if (new_sf != sce->
sf_idx[
i]) {
426 changed = 1;
427 }
428 }
429 }
430 } else if (tbits < toofewbits) {
431 recomprd = 1;
432 for (
i = 0;
i < 128;
i++) {
435 if (new_sf != sce->
sf_idx[
i]) {
437 changed = 1;
438 }
439 }
440 }
441 }
442 qstep >>= 1;
443 if (!qstep && tbits > toomanybits && sce->
sf_idx[0] < 217 && changed)
444 qstep = 1;
445 } while (qstep);
446
447 overdist = 1;
448 fflag = tbits < toofewbits;
449 for (
i = 0;
i < 2 && (overdist || recomprd); ++
i) {
450 if (recomprd) {
451 /** Must recompute distortion */
452 prev = -1;
453 tbits = 0;
457 const float *coefs = sce->
coeffs + start;
458 const float *scaled =
s->scoefs + start;
461 float dist = 0.0f;
462 float qenergy = 0.0f;
463
467 /** PNS isn't free */
469 }
470 continue;
471 }
475 float sqenergy;
477 scaled + w2*128,
481 1.0f,
484 0);
486 qenergy += sqenergy;
487 }
488 dists[
w*16+
g] = dist -
bits;
489 qenergies[
w*16+
g] = qenergy;
490 if (prev != -1) {
493 }
497 }
498 }
499 }
500 if (!
i &&
s->options.pns && its > maxits/2 && tbits > toofewbits) {
501 float maxoverdist = 0.0f;
502 float ovrfactor = 1.f+(maxits-its)*16.
f/maxits;
503 overdist = recomprd = 0;
507 float ovrdist = dists[
w*16+
g] /
FFMAX(uplims[
w*16+
g],euplims[
w*16+
g]);
508 maxoverdist =
FFMAX(maxoverdist, ovrdist);
509 overdist++;
510 }
511 }
512 }
513 if (overdist) {
514 /* We have overdistorted bands, trade for zeroes (that can be noise)
515 * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
516 */
517 float minspread = max_spread_thr_r;
518 float maxspread = min_spread_thr_r;
519 float zspread;
520 int zeroable = 0;
521 int zeroed = 0;
522 int maxzeroed, zloop;
526 minspread =
FFMIN(minspread, spread_thr_r[
w*16+
g]);
527 maxspread =
FFMAX(maxspread, spread_thr_r[
w*16+
g]);
528 zeroable++;
529 }
530 }
531 }
532 zspread = (maxspread-minspread) * 0.0125
f + minspread;
533 /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
534 * and forced the hand of the later search_for_pns step.
535 * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
536 * and leave further PNSing to search_for_pns if worthwhile.
537 */
538 zspread =
FFMIN3(min_spread_thr_r * 8.
f, zspread,
539 ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
540 maxzeroed =
FFMIN(zeroable,
FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
541 for (zloop = 0; zloop < 2; zloop++) {
542 /* Two passes: first distorted stuff - two birds in one shot and all that,
543 * then anything viable. Viable means not zero, but either CB=zero-able
544 * (too high SF), not SF <= 1 (that means we'd be operating at very high
545 * quality, we don't want PNS when doing VHQ), PNS allowed, and within
546 * the lowest ranking percentile.
547 */
548 float loopovrfactor = (zloop) ? 1.0
f : ovrfactor;
550 int mcb;
551 for (
g = sce->
ics.
num_swb-1;
g > 0 && zeroed < maxzeroed;
g--) {
553 continue;
558 || (mcb <= 1 && dists[w*16+g] >
FFMIN(uplims[
w*16+
g], euplims[
w*16+
g]))) ) {
561 zeroed++;
562 }
563 }
564 }
565 }
566 if (zeroed)
567 recomprd = fflag = 1;
568 } else {
569 overdist = 0;
570 }
571 }
572 }
573
579 }
580 }
581 }
582
584 prev = -1;
586 /** Start with big steps, end up fine-tunning */
587 int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
588 int edepth = depth+2;
589 float uplmax = its / (maxits*0.25f) + 1.0
f;
590 uplmax *= (tbits > destbits) ?
FFMIN(2.0
f, tbits / (
float)
FFMAX(1,destbits)) : 1.0
f;
594 if (prev < 0 && !sce->zeroes[
w*16+
g])
597 const float *coefs = sce->
coeffs + start;
598 const float *scaled =
s->scoefs + start;
602 if ((!cmb || dists[
w*16+
g] > uplims[
w*16+
g]) && sce->
sf_idx[
w*16+
g] >
FFMAX(mindeltasf, minsf[
w*16+
g])) {
603 /* Try to make sure there is some energy in every nonzero band
604 * NOTE: This algorithm must be forcibly imbalanced, pushing harder
605 * on holes or more distorted bands at first, otherwise there's
606 * no net gain (since the next iteration will offset all bands
607 * on the opposite direction to compensate for extra bits)
608 */
609 for (
i = 0;
i < edepth && sce->
sf_idx[
w*16+
g] > mindeltasf; ++
i) {
611 float dist, qenergy;
614 dist = qenergy = 0.f;
618 }
else if (
i >= depth && dists[
w*16+
g] < euplims[
w*16+
g]) {
619 break;
620 }
621 /* !g is the DC band, it's important, since quantization error here
622 * applies to less than a cycle, it creates horrible intermodulation
623 * distortion if it doesn't stick to what psy requests
624 */
629 float sqenergy;
631 scaled + w2*128,
635 1.0f,
638 0);
640 qenergy += sqenergy;
641 }
643 dists[
w*16+
g] = dist -
bits;
644 qenergies[
w*16+
g] = qenergy;
645 if (
mb && (sce->
sf_idx[
w*16+
g] < mindeltasf || (
646 (dists[
w*16+
g] <
FFMIN(uplmax*uplims[
w*16+
g], euplims[
w*16+
g]))
647 && (
fabsf(qenergies[
w*16+
g]-energies[
w*16+
g]) < euplims[
w*16+
g])
648 ) )) {
649 break;
650 }
651 }
652 }
else if (tbits > toofewbits && sce->
sf_idx[
w*16+
g] <
FFMIN(maxdeltasf, maxsf[
w*16+
g])
653 && (dists[
w*16+
g] <
FFMIN(euplims[
w*16+
g], uplims[
w*16+
g]))
654 && (
fabsf(qenergies[
w*16+
g]-energies[
w*16+
g]) < euplims[
w*16+
g])
655 ) {
656 /** Um... over target. Save bits for more important stuff. */
657 for (
i = 0;
i < depth && sce->
sf_idx[
w*16+
g] < maxdeltasf; ++
i) {
659 float dist, qenergy;
662 dist = qenergy = 0.f;
666 float sqenergy;
668 scaled + w2*128,
672 1.0f,
675 0);
677 qenergy += sqenergy;
678 }
680 if (dist <
FFMIN(euplims[
w*16+
g], uplims[
w*16+
g])) {
682 dists[
w*16+
g] = dist;
683 qenergies[
w*16+
g] = qenergy;
684 } else {
685 break;
686 }
687 } else {
689 break;
690 }
691 }
692 }
695 fflag = 1;
698 }
700 }
701 }
702
703 /** SF difference limit violation risk. Must re-clamp. */
704 prev = -1;
709 if (prev < 0)
710 prev = prevsf;
714 if (!fflag && prevsf != sce->
sf_idx[
w*16+
g])
715 fflag = 1;
716 }
717 }
718 }
719
720 its++;
721 } while (fflag && its < maxits);
722
723 /** Scout out next nonzero bands */
725
726 prev = -1;
728 /** Make sure proper codebooks are set */
734 /** Cannot zero out, make sure it's not attempted */
736 } else {
739 }
740 }
741 } else {
743 }
744 /** Check that there's no SF delta range violations */
746 if (prev != -1) {
749 }
else if (sce->
zeroes[0]) {
750 /** Set global gain to something useful */
752 }
754 }
755 }
756 }
757 }
758
759 #endif /* AVCODEC_AACCODER_TWOLOOP_H */