FFmpeg: libavcodec/aaccoder_twoloop.h Source File

FFmpeg

[フレーム]

libavcodec

aaccoder_twoloop.h

Go to the documentation of this file.

1 /*

2 * AAC encoder twoloop coder

4 *

5 * This file is part of FFmpeg.

6 *

7 * FFmpeg is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Lesser General Public

9 * License as published by the Free Software Foundation; either

10 * version 2.1 of the License, or (at your option) any later version.

11 *

12 * FFmpeg is distributed in the hope that it will be useful,

13 * but WITHOUT ANY WARRANTY; without even the implied warranty of

14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 * Lesser General Public License for more details.

16 *

17 * You should have received a copy of the GNU Lesser General Public

18 * License along with FFmpeg; if not, write to the Free Software

19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

20 */

22 /**

23 * @file

24 * AAC encoder twoloop coder

25 * @author Konstantin Shishkov, Claudio Freire

26 */

28 /**

29 * This file contains a template for the twoloop coder function.

30 * It needs to be provided, externally, as an already included declaration,

31 * the following functions from aacenc_quantization/util.h. They're not included

32 * explicitly here to make it possible to provide alternative implementations:

33 * - quantize_band_cost

34 * - abs_pow34_v

35 * - find_max_val

36 * - find_min_book

37 * - find_form_factor

38 */

40 #ifndef AVCODEC_AACCODER_TWOLOOP_H

41 #define AVCODEC_AACCODER_TWOLOOP_H

43 #include <float.h>

44 #include "libavutil/mathematics.h"

45 #include "mathops.h"

46 #include "avcodec.h"

47 #include "put_bits.h"

48 #include "aac.h"

49 #include "aacenc.h"

50 #include "aactab.h"

51 #include "aacenctab.h"

53 /** Frequency in Hz for lower limit of noise substitution **/

54 #define NOISE_LOW_LIMIT 4000

56 /* Reflects the cost to change codebooks */

57 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)

58 {

59 return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;

60 }

62 /**

63 * two-loop quantizers search taken from ISO 13818-7 Appendix C

64 */

65 static void search_for_quantizers_twoloop(AVCodecContext *avctx,

66 AACEncContext *s,

67 SingleChannelElement *sce,

68 const float lambda)

69 {

70 int start = 0, i, w, w2, g, recomprd;

71 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate

72 / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->ch_layout.nb_channels)

73 * (lambda / 120.f);

74 int refbits = destbits;

75 int toomanybits, toofewbits;

76 char nzs[128];

77 uint8_t nextband[128];

78 int maxsf[128], minsf[128];

79 float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];

80 float maxvals[128], spread_thr_r[128];

81 float min_spread_thr_r, max_spread_thr_r;

83 /**

84 * rdlambda controls the maximum tolerated distortion. Twoloop

85 * will keep iterating until it fails to lower it or it reaches

86 * ulimit * rdlambda. Keeping it low increases quality on difficult

87 * signals, but lower it too much, and bits will be taken from weak

88 * signals, creating "holes". A balance is necessary.

89 * rdmax and rdmin specify the relative deviation from rdlambda

90 * allowed for tonality compensation

91 */

92 float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);

93 const float nzslope = 1.5f;

94 float rdmin = 0.03125f;

95 float rdmax = 1.0f;

97 /**

98 * sfoffs controls an offset of optmium allocation that will be

99 * applied based on lambda. Keep it real and modest, the loop

100 * will take care of the rest, this just accelerates convergence

101 */

102 float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);

103

104 int fflag, minscaler, nminscaler;

105 int its = 0;

106 int maxits = 30;

107 int allz = 0;

108 int tbits;

109 int cutoff = 1024;

110 int pns_start_pos;

111 int prev;

112

113 /**

114 * zeroscale controls a multiplier of the threshold, if band energy

115 * is below this, a zero is forced. Keep it lower than 1, unless

116 * low lambda is used, because energy < threshold doesn't mean there's

117 * no audible signal outright, it's just energy. Also make it rise

118 * slower than rdlambda, as rdscale has due compensation with

119 * noisy band depriorization below, whereas zeroing logic is rather dumb

120 */

121 float zeroscale;

122 if (lambda > 120.f) {

123 zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);

124 } else {

125 zeroscale = 1.f;

126 }

127

128 if (s->psy.bitres.alloc >= 0) {

129 /**

130 * Psy granted us extra bits to use, from the reservoire

131 * adjust for lambda except what psy already did

132 */

133 destbits = s->psy.bitres.alloc

134 * (lambda / (avctx->global_quality ? avctx->global_quality : 120));

135 }

136

137 if (avctx->flags & AV_CODEC_FLAG_QSCALE) {

138 /**

139 * Constant Q-scale doesn't compensate MS coding on its own

140 * No need to be overly precise, this only controls RD

141 * adjustment CB limits when going overboard

142 */

143 if (s->options.mid_side && s->cur_type == TYPE_CPE)

144 destbits *= 2;

145

146 /**

147 * When using a constant Q-scale, don't adjust bits, just use RD

148 * Don't let it go overboard, though... 8x psy target is enough

149 */

150 toomanybits = 5800;

151 toofewbits = destbits / 16;

152

153 /** Don't offset scalers, just RD */

154 sfoffs = sce->ics.num_windows - 1;

155 rdlambda = sqrtf(rdlambda);

156

157 /** search further */

158 maxits *= 2;

159 } else {

160 /* When using ABR, be strict, but a reasonable leeway is

161 * critical to allow RC to smoothly track desired bitrate

162 * without sudden quality drops that cause audible artifacts.

163 * Symmetry is also desirable, to avoid systematic bias.

164 */

165 toomanybits = destbits + destbits/8;

166 toofewbits = destbits - destbits/8;

167

168 sfoffs = 0;

169 rdlambda = sqrtf(rdlambda);

170 }

171

172 /** and zero out above cutoff frequency */

173 {

174 int wlen = 1024 / sce->ics.num_windows;

175 int bandwidth;

176

177 /**

178 * Scale, psy gives us constant quality, this LP only scales

179 * bitrate by lambda, so we save bits on subjectively unimportant HF

180 * rather than increase quantization noise. Adjust nominal bitrate

181 * to effective bitrate according to encoding parameters,

182 * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.

183 */

184 float rate_bandwidth_multiplier = 1.5f;

185 int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)

186 ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)

187 : (avctx->bit_rate / avctx->ch_layout.nb_channels);

188

189 /** Compensate for extensions that increase efficiency */

190 if (s->options.pns || s->options.intensity_stereo)

191 frame_bit_rate *= 1.15f;

192

193 if (avctx->cutoff > 0) {

194 bandwidth = avctx->cutoff;

195 } else {

196 bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));

197 s->psy.cutoff = bandwidth;

198 }

199

200 cutoff = bandwidth * 2 * wlen / avctx->sample_rate;

201 pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;

202 }

203

204 /**

205 * for values above this the decoder might end up in an endless loop

206 * due to always having more bits than what can be encoded.

207 */

208 destbits = FFMIN(destbits, 5800);

209 toomanybits = FFMIN(toomanybits, 5800);

210 toofewbits = FFMIN(toofewbits, 5800);

211 /**

212 * XXX: some heuristic to determine initial quantizers will reduce search time

213 * determine zero bands and upper distortion limits

214 */

215 min_spread_thr_r = -1;

216 max_spread_thr_r = -1;

217 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

218 for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {

219 int nz = 0;

220 float uplim = 0.0f, energy = 0.0f, spread = 0.0f;

221 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

222 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];

223 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {

224 sce->zeroes[(w+w2)*16+g] = 1;

225 continue;

226 }

227 nz = 1;

228 }

229 if (!nz) {

230 uplim = 0.0f;

231 } else {

232 nz = 0;

233 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

234 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];

235 if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)

236 continue;

237 uplim += band->threshold;

238 energy += band->energy;

239 spread += band->spread;

240 nz++;

241 }

242 }

243 uplims[w*16+g] = uplim;

244 energies[w*16+g] = energy;

245 nzs[w*16+g] = nz;

246 sce->zeroes[w*16+g] = !nz;

247 allz |= nz;

248 if (nz && sce->can_pns[w*16+g]) {

249 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);

250 if (min_spread_thr_r < 0) {

251 min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];

252 } else {

253 min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);

254 max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);

255 }

256 }

257 }

258 }

259

260 /** Compute initial scalers */

261 minscaler = 65535;

262 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

263 for (g = 0; g < sce->ics.num_swb; g++) {

264 if (sce->zeroes[w*16+g]) {

265 sce->sf_idx[w*16+g] = SCALE_ONE_POS;

266 continue;

267 }

268 /**

269 * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).

270 * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,

271 * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus

272 * more robust.

273 */

274 sce->sf_idx[w*16+g] = av_clip(

275 SCALE_ONE_POS

276 + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])

277 + sfoffs,

278 60, SCALE_MAX_POS);

279 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);

280 }

281 }

282

283 /** Clip */

284 minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);

285 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])

286 for (g = 0; g < sce->ics.num_swb; g++)

287 if (!sce->zeroes[w*16+g])

288 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);

289

290 if (!allz)

291 return;

292 s->aacdsp.abs_pow34(s->scoefs, sce->coeffs, 1024);

293 ff_quantize_band_cost_cache_init(s);

294

295 for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i)

296 minsf[i] = 0;

297 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

298 start = w*128;

299 for (g = 0; g < sce->ics.num_swb; g++) {

300 const float *scaled = s->scoefs + start;

301 int minsfidx;

302 maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);

303 if (maxvals[w*16+g] > 0) {

304 minsfidx = coef2minsf(maxvals[w*16+g]);

305 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)

306 minsf[(w+w2)*16+g] = minsfidx;

307 }

308 start += sce->ics.swb_sizes[g];

309 }

310 }

311

312 /**

313 * Scale uplims to match rate distortion to quality

314 * bu applying noisy band depriorization and tonal band priorization.

315 * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.

316 * If maxval^2 ~ energy, then that band is mostly noise, and we can relax

317 * rate distortion requirements.

318 */

319 memcpy(euplims, uplims, sizeof(euplims));

320 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

321 /** psy already priorizes transients to some extent */

322 float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;

323 start = w*128;

324 for (g = 0; g < sce->ics.num_swb; g++) {

325 if (nzs[g] > 0) {

326 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));

327 float energy2uplim = find_form_factor(

328 sce->ics.group_len[w], sce->ics.swb_sizes[g],

329 uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),

330 sce->coeffs + start,

331 nzslope * cleanup_factor);

332 energy2uplim *= de_psy_factor;

333 if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {

334 /** In ABR, we need to priorize less and let rate control do its thing */

335 energy2uplim = sqrtf(energy2uplim);

336 }

337 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));

338 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)

339 * sce->ics.group_len[w];

340

341 energy2uplim = find_form_factor(

342 sce->ics.group_len[w], sce->ics.swb_sizes[g],

343 uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),

344 sce->coeffs + start,

345 2.0f);

346 energy2uplim *= de_psy_factor;

347 if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {

348 /** In ABR, we need to priorize less and let rate control do its thing */

349 energy2uplim = sqrtf(energy2uplim);

350 }

351 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));

352 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],

353 0.5f, 1.0f);

354 }

355 start += sce->ics.swb_sizes[g];

356 }

357 }

358

359 for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)

360 maxsf[i] = SCALE_MAX_POS;

361

362 //perform two-loop search

363 //outer loop - improve quality

364 do {

365 //inner loop - quantize spectrum to fit into given number of bits

366 int overdist;

367 int qstep = its ? 1 : 32;

368 do {

369 int changed = 0;

370 prev = -1;

371 recomprd = 0;

372 tbits = 0;

373 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

374 start = w*128;

375 for (g = 0; g < sce->ics.num_swb; g++) {

376 const float *coefs = &sce->coeffs[start];

377 const float *scaled = &s->scoefs[start];

378 int bits = 0;

379 int cb;

380 float dist = 0.0f;

381 float qenergy = 0.0f;

382

383 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {

384 start += sce->ics.swb_sizes[g];

385 if (sce->can_pns[w*16+g]) {

386 /** PNS isn't free */

387 tbits += ff_pns_bits(sce, w, g);

388 }

389 continue;

390 }

391 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

392 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

393 int b;

394 float sqenergy;

395 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,

396 scaled + w2*128,

397 sce->ics.swb_sizes[g],

398 sce->sf_idx[w*16+g],

399 cb,

400 1.0f,

401 INFINITY,

402 &b, &sqenergy,

403 0);

404 bits += b;

405 qenergy += sqenergy;

406 }

407 dists[w*16+g] = dist - bits;

408 qenergies[w*16+g] = qenergy;

409 if (prev != -1) {

410 int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);

411 bits += ff_aac_scalefactor_bits[sfdiff];

412 }

413 tbits += bits;

414 start += sce->ics.swb_sizes[g];

415 prev = sce->sf_idx[w*16+g];

416 }

417 }

418 if (tbits > toomanybits) {

419 recomprd = 1;

420 for (i = 0; i < 128; i++) {

421 if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {

422 int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];

423 int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);

424 if (new_sf != sce->sf_idx[i]) {

425 sce->sf_idx[i] = new_sf;

426 changed = 1;

427 }

428 }

429 }

430 } else if (tbits < toofewbits) {

431 recomprd = 1;

432 for (i = 0; i < 128; i++) {

433 if (sce->sf_idx[i] > SCALE_ONE_POS) {

434 int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep);

435 if (new_sf != sce->sf_idx[i]) {

436 sce->sf_idx[i] = new_sf;

437 changed = 1;

438 }

439 }

440 }

441 }

442 qstep >>= 1;

443 if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)

444 qstep = 1;

445 } while (qstep);

446

447 overdist = 1;

448 fflag = tbits < toofewbits;

449 for (i = 0; i < 2 && (overdist || recomprd); ++i) {

450 if (recomprd) {

451 /** Must recompute distortion */

452 prev = -1;

453 tbits = 0;

454 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

455 start = w*128;

456 for (g = 0; g < sce->ics.num_swb; g++) {

457 const float *coefs = sce->coeffs + start;

458 const float *scaled = s->scoefs + start;

459 int bits = 0;

460 int cb;

461 float dist = 0.0f;

462 float qenergy = 0.0f;

463

464 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {

465 start += sce->ics.swb_sizes[g];

466 if (sce->can_pns[w*16+g]) {

467 /** PNS isn't free */

468 tbits += ff_pns_bits(sce, w, g);

469 }

470 continue;

471 }

472 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

473 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

474 int b;

475 float sqenergy;

476 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,

477 scaled + w2*128,

478 sce->ics.swb_sizes[g],

479 sce->sf_idx[w*16+g],

480 cb,

481 1.0f,

482 INFINITY,

483 &b, &sqenergy,

484 0);

485 bits += b;

486 qenergy += sqenergy;

487 }

488 dists[w*16+g] = dist - bits;

489 qenergies[w*16+g] = qenergy;

490 if (prev != -1) {

491 int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);

492 bits += ff_aac_scalefactor_bits[sfdiff];

493 }

494 tbits += bits;

495 start += sce->ics.swb_sizes[g];

496 prev = sce->sf_idx[w*16+g];

497 }

498 }

499 }

500 if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {

501 float maxoverdist = 0.0f;

502 float ovrfactor = 1.f+(maxits-its)*16.f/maxits;

503 overdist = recomprd = 0;

504 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

505 for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {

506 if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {

507 float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);

508 maxoverdist = FFMAX(maxoverdist, ovrdist);

509 overdist++;

510 }

511 }

512 }

513 if (overdist) {

514 /* We have overdistorted bands, trade for zeroes (that can be noise)

515 * Zero the bands in the lowest 1.25% spread-energy-threshold ranking

516 */

517 float minspread = max_spread_thr_r;

518 float maxspread = min_spread_thr_r;

519 float zspread;

520 int zeroable = 0;

521 int zeroed = 0;

522 int maxzeroed, zloop;

523 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

524 for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {

525 if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {

526 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);

527 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);

528 zeroable++;

529 }

530 }

531 }

532 zspread = (maxspread-minspread) * 0.0125f + minspread;

533 /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,

534 * and forced the hand of the later search_for_pns step.

535 * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,

536 * and leave further PNSing to search_for_pns if worthwhile.

537 */

538 zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,

539 ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));

540 maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));

541 for (zloop = 0; zloop < 2; zloop++) {

542 /* Two passes: first distorted stuff - two birds in one shot and all that,

543 * then anything viable. Viable means not zero, but either CB=zero-able

544 * (too high SF), not SF <= 1 (that means we'd be operating at very high

545 * quality, we don't want PNS when doing VHQ), PNS allowed, and within

546 * the lowest ranking percentile.

547 */

548 float loopovrfactor = (zloop) ? 1.0f : ovrfactor;

549 int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;

550 int mcb;

551 for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {

552 if (sce->ics.swb_offset[g] < pns_start_pos)

553 continue;

554 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

555 if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread

556 && sce->sf_idx[w*16+g] > loopminsf

557 && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))

558 || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {

559 sce->zeroes[w*16+g] = 1;

560 sce->band_type[w*16+g] = 0;

561 zeroed++;

562 }

563 }

564 }

565 }

566 if (zeroed)

567 recomprd = fflag = 1;

568 } else {

569 overdist = 0;

570 }

571 }

572 }

573

574 minscaler = SCALE_MAX_POS;

575 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

576 for (g = 0; g < sce->ics.num_swb; g++) {

577 if (!sce->zeroes[w*16+g]) {

578 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);

579 }

580 }

581 }

582

583 minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);

584 prev = -1;

585 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

586 /** Start with big steps, end up fine-tunning */

587 int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;

588 int edepth = depth+2;

589 float uplmax = its / (maxits*0.25f) + 1.0f;

590 uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;

591 start = w * 128;

592 for (g = 0; g < sce->ics.num_swb; g++) {

593 int prevsc = sce->sf_idx[w*16+g];

594 if (prev < 0 && !sce->zeroes[w*16+g])

595 prev = sce->sf_idx[0];

596 if (!sce->zeroes[w*16+g]) {

597 const float *coefs = sce->coeffs + start;

598 const float *scaled = s->scoefs + start;

599 int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

600 int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);

601 int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);

602 if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) {

603 /* Try to make sure there is some energy in every nonzero band

604 * NOTE: This algorithm must be forcibly imbalanced, pushing harder

605 * on holes or more distorted bands at first, otherwise there's

606 * no net gain (since the next iteration will offset all bands

607 * on the opposite direction to compensate for extra bits)

608 */

609 for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {

610 int cb, bits;

611 float dist, qenergy;

612 int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);

613 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

614 dist = qenergy = 0.f;

615 bits = 0;

616 if (!cb) {

617 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);

618 } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {

619 break;

620 }

621 /* !g is the DC band, it's important, since quantization error here

622 * applies to less than a cycle, it creates horrible intermodulation

623 * distortion if it doesn't stick to what psy requests

624 */

625 if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])

626 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);

627 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

628 int b;

629 float sqenergy;

630 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,

631 scaled + w2*128,

632 sce->ics.swb_sizes[g],

633 sce->sf_idx[w*16+g]-1,

634 cb,

635 1.0f,

636 INFINITY,

637 &b, &sqenergy,

638 0);

639 bits += b;

640 qenergy += sqenergy;

641 }

642 sce->sf_idx[w*16+g]--;

643 dists[w*16+g] = dist - bits;

644 qenergies[w*16+g] = qenergy;

645 if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (

646 (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))

647 && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])

648 ) )) {

649 break;

650 }

651 }

652 } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])

653 && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))

654 && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])

655 ) {

656 /** Um... over target. Save bits for more important stuff. */

657 for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {

658 int cb, bits;

659 float dist, qenergy;

660 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);

661 if (cb > 0) {

662 dist = qenergy = 0.f;

663 bits = 0;

664 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {

665 int b;

666 float sqenergy;

667 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,

668 scaled + w2*128,

669 sce->ics.swb_sizes[g],

670 sce->sf_idx[w*16+g]+1,

671 cb,

672 1.0f,

673 INFINITY,

674 &b, &sqenergy,

675 0);

676 bits += b;

677 qenergy += sqenergy;

678 }

679 dist -= bits;

680 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {

681 sce->sf_idx[w*16+g]++;

682 dists[w*16+g] = dist;

683 qenergies[w*16+g] = qenergy;

684 } else {

685 break;

686 }

687 } else {

688 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);

689 break;

690 }

691 }

692 }

693 prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);

694 if (sce->sf_idx[w*16+g] != prevsc)

695 fflag = 1;

696 nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);

697 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

698 }

699 start += sce->ics.swb_sizes[g];

700 }

701 }

702

703 /** SF difference limit violation risk. Must re-clamp. */

704 prev = -1;

705 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

706 for (g = 0; g < sce->ics.num_swb; g++) {

707 if (!sce->zeroes[w*16+g]) {

708 int prevsf = sce->sf_idx[w*16+g];

709 if (prev < 0)

710 prev = prevsf;

711 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);

712 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

713 prev = sce->sf_idx[w*16+g];

714 if (!fflag && prevsf != sce->sf_idx[w*16+g])

715 fflag = 1;

716 }

717 }

718 }

719

720 its++;

721 } while (fflag && its < maxits);

722

723 /** Scout out next nonzero bands */

724 ff_init_nextband_map(sce, nextband);

725

726 prev = -1;

727 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {

728 /** Make sure proper codebooks are set */

729 for (g = 0; g < sce->ics.num_swb; g++) {

730 if (!sce->zeroes[w*16+g]) {

731 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);

732 if (sce->band_type[w*16+g] <= 0) {

733 if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {

734 /** Cannot zero out, make sure it's not attempted */

735 sce->band_type[w*16+g] = 1;

736 } else {

737 sce->zeroes[w*16+g] = 1;

738 sce->band_type[w*16+g] = 0;

739 }

740 }

741 } else {

742 sce->band_type[w*16+g] = 0;

743 }

744 /** Check that there's no SF delta range violations */

745 if (!sce->zeroes[w*16+g]) {

746 if (prev != -1) {

747 av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;

748 av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);

749 } else if (sce->zeroes[0]) {

750 /** Set global gain to something useful */

751 sce->sf_idx[0] = sce->sf_idx[w*16+g];

752 }

753 prev = sce->sf_idx[w*16+g];

754 }

755 }

756 }

757 }

758

759 #endif /* AVCODEC_AACCODER_TWOLOOP_H */

INFINITY

#define INFINITY

Definition: mathematics.h:118

av_clip

#define av_clip

Definition: common.h:100

SingleChannelElement::can_pns

uint8_t can_pns[128]

band is allowed to PNS (informative)

Definition: aacenc.h:135

AVCodecContext::sample_rate

int sample_rate

samples per second

Definition: avcodec.h:1056

static double cb(void *priv, double x, double y)

Definition: vf_geq.c:247

aacenctab.h

log2f

#define log2f(x)

Definition: libm.h:409

AV_CODEC_FLAG_QSCALE

#define AV_CODEC_FLAG_QSCALE

Use fixed qscale.

Definition: avcodec.h:224

SingleChannelElement::zeroes

uint8_t zeroes[128]

band is not coded

Definition: aacenc.h:134

av_unused

#define av_unused

Definition: attributes.h:131

uint8_t w

Definition: llviddspenc.c:38

#define b

Definition: input.c:41

float.h

mathematics.h

ff_sfdelta_can_remove_band

static int ff_sfdelta_can_remove_band(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int band)

Definition: aacenc_utils.h:208

FFMAX

#define FFMAX(a, b)

Definition: macros.h:47

AVChannelLayout::nb_channels

int nb_channels

Number of channels in this layout.

Definition: channel_layout.h:321

SCALE_MAX_POS

#define SCALE_MAX_POS

scalefactor index maximum value

Definition: aac.h:89

IndividualChannelStream::num_swb

int num_swb

number of scalefactor window bands

Definition: aacdec.h:171

SingleChannelElement::coeffs

float coeffs[1024]

coefficients for IMDCT, maybe processed

Definition: aacenc.h:139

AVCodecContext::ch_layout

AVChannelLayout ch_layout

Audio channel layout.

Definition: avcodec.h:1071

SCALE_DIV_512

#define SCALE_DIV_512

scalefactor difference that corresponds to scale difference in 512 times

Definition: aac.h:87

TYPE_CPE

@ TYPE_CPE

Definition: aac.h:41

find_form_factor

static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope)

Definition: aacenc_utils.h:80

AVCodecContext::flags

int flags

AV_CODEC_FLAG_*.

Definition: avcodec.h:508

fabsf

static __device__ float fabsf(float a)

Definition: cuda_runtime.h:181

SingleChannelElement::ics

IndividualChannelStream ics

Definition: aacdec.h:211

#define s(width, name)

Definition: cbs_vp9.c:198

AVCodecContext::global_quality

int global_quality

Global quality for codecs which cannot change it per frame.

Definition: avcodec.h:1249

IndividualChannelStream::swb_sizes

const uint8_t * swb_sizes

table of scalefactor band sizes for a particular window

Definition: aacenc.h:98

const char * g

Definition: vf_curves.c:128

bits

uint8_t bits

Definition: vp3data.h:128

SCALE_DIFF_ZERO

#define SCALE_DIFF_ZERO

codebook index corresponding to zero scalefactor indices difference

Definition: aac.h:91

quantize_band_cost_cached

static float quantize_band_cost_cached(struct AACEncContext *s, int w, int g, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)

Definition: aacenc_quantization_misc.h:31

AVCodecContext::bit_rate

int64_t bit_rate

the average bitrate

Definition: avcodec.h:501

mathops.h

FFPsyBand

single band psychoacoustic information

Definition: psymodel.h:50

aac.h

aactab.h

sqrtf

static __device__ float sqrtf(float a)

Definition: cuda_runtime.h:184

ff_init_nextband_map

static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)

Definition: aacenc_utils.h:175

av_clipf

Definition: af_crystalizer.c:122

SingleChannelElement::sf_idx

int sf_idx[128]

scalefactor indices

Definition: aacenc.h:133

ff_aac_scalefactor_bits

const uint8_t ff_aac_scalefactor_bits[121]

Definition: aactab.c:204

coef2minsf

static uint8_t coef2minsf(float coef)

Return the minimum scalefactor where the quantized coef does not clip.

Definition: aacenc_utils.h:133

Definition: af_crystalizer.c:122

powf

#define powf(x, y)

Definition: libm.h:50

for

for(k=2;k<=8;++k)

Definition: h264pred_template.c:425

search_for_quantizers_twoloop

static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)

two-loop quantizers search taken from ISO 13818-7 Appendix C

Definition: aaccoder_twoloop.h:65

SingleChannelElement::band_type

enum BandType band_type[128]

band types

Definition: aacdec.h:214

#define mb

Definition: vf_colormatrix.c:99

SCALE_MAX_DIFF

#define SCALE_MAX_DIFF

maximum scalefactor difference allowed by standard

Definition: aac.h:90

AAC_CUTOFF_FROM_BITRATE

#define AAC_CUTOFF_FROM_BITRATE(bit_rate, channels, sample_rate)

Definition: psymodel.h:35

SingleChannelElement

Single Channel Element - used for both SCE and LFE elements.

Definition: aacdec.h:210

#define i(width, name, range_min, range_max)

Definition: cbs_h2645.c:256

IndividualChannelStream::num_windows

int num_windows

Definition: aacdec.h:172

SCALE_ONE_POS

#define SCALE_ONE_POS

scalefactor index that corresponds to scale=1.0

Definition: aac.h:88

find_min_book

static int find_min_book(float maxval, int sf)

Definition: aacenc_utils.h:68

FFMIN3

#define FFMIN3(a, b, c)

Definition: macros.h:50

FFPsyBand::threshold

float threshold

Definition: psymodel.h:53

IndividualChannelStream::swb_offset

const uint16_t * swb_offset

table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...

Definition: aacdec.h:170

AVCodecContext::cutoff

int cutoff

Audio cutoff bandwidth (0 means "automatic")

Definition: avcodec.h:1096

av_assert1

#define av_assert1(cond)

assert() equivalent, that does not lie in speed critical code.

Definition: avassert.h:56

FFMIN

#define FFMIN(a, b)

Definition: macros.h:49

NOISE_LOW_LIMIT

#define NOISE_LOW_LIMIT

This file contains a template for the twoloop coder function.

Definition: aaccoder_twoloop.h:54

ff_sqrf

static av_const float ff_sqrf(float a)

Definition: mathops.h:238

avcodec.h

AACEncContext

AAC encoder context.

Definition: aacenc.h:212

FFPsyBand::energy

float energy

Definition: psymodel.h:52

AVCodecContext

main external API structure.

Definition: avcodec.h:451

find_max_val

static float find_max_val(int group_len, int swb_size, const float *scaled)

Definition: aacenc_utils.h:56

FFMAX3

#define FFMAX3(a, b, c)

Definition: macros.h:48

FFPsyBand::spread

float spread

Definition: psymodel.h:54

put_bits.h

IndividualChannelStream::group_len

uint8_t group_len[8]

Definition: aacdec.h:168

ff_pns_bits

static int ff_pns_bits(SingleChannelElement *sce, int w, int g)

Definition: aaccoder_twoloop.h:57

ff_quantize_band_cost_cache_init

void ff_quantize_band_cost_cache_init(struct AACEncContext *s)

Definition: aacenc.c:401

aacenc.h

Generated on Fri Aug 22 2025 13:58:06 for FFmpeg by doxygen 1.8.17