FFmpeg: libavcodec/wmavoice.c Source File

FFmpeg

[フレーム]

libavcodec

wmavoice.c

Go to the documentation of this file.

1 /*

2 * Windows Media Audio Voice decoder.

4 *

5 * This file is part of FFmpeg.

6 *

7 * FFmpeg is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Lesser General Public

9 * License as published by the Free Software Foundation; either

10 * version 2.1 of the License, or (at your option) any later version.

11 *

12 * FFmpeg is distributed in the hope that it will be useful,

13 * but WITHOUT ANY WARRANTY; without even the implied warranty of

14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 * Lesser General Public License for more details.

16 *

17 * You should have received a copy of the GNU Lesser General Public

18 * License along with FFmpeg; if not, write to the Free Software

19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

20 */

22 /**

23 * @file

24 * @brief Windows Media Audio Voice compatible decoder

25 * @author Ronald S. Bultje <rsbultje@gmail.com>

26 */

28 #include <math.h>

30 #include "libavutil/channel_layout.h"

31 #include "libavutil/float_dsp.h"

32 #include "libavutil/mem.h"

33 #include "libavutil/mem_internal.h"

34 #include "libavutil/thread.h"

35 #include "libavutil/tx.h"

36 #include "avcodec.h"

37 #include "codec_internal.h"

38 #include "decode.h"

39 #include "get_bits.h"

40 #include "put_bits.h"

41 #include "wmavoice_data.h"

42 #include "celp_filters.h"

43 #include "acelp_vectors.h"

44 #include "acelp_filters.h"

45 #include "lsp.h"

46 #include "sinewin.h"

48 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame

49 #define MAX_LSPS 16 ///< maximum filter order

50 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple

51 ///< of 16 for ASM input buffer alignment

52 #define MAX_FRAMES 3 ///< maximum number of frames per superframe

53 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame

54 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history

55 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)

56 ///< maximum number of samples per superframe

57 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that

58 ///< was split over two packets

59 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration

61 /**

62 * Frame type VLC coding.

63 */

64 static VLCElem frame_type_vlc[132];

66 /**

67 * Adaptive codebook types.

68 */

69 enum {

70 ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)

71 ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which

72 ///< we interpolate to get a per-sample pitch.

73 ///< Signal is generated using an asymmetric sinc

74 ///< window function

75 ///< @note see #wmavoice_ipol1_coeffs

76 ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using

77 ///< a Hamming sinc window function

78 ///< @note see #wmavoice_ipol2_coeffs

79 };

81 /**

82 * Fixed codebook types.

83 */

84 enum {

85 FCB_TYPE_SILENCE = 0, ///< comfort noise during silence

86 ///< generated from a hardcoded (fixed) codebook

87 ///< with per-frame (low) gain values

88 FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block

89 ///< gain values

90 FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,

91 ///< used in particular for low-bitrate streams

92 FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in

93 ///< combinations of either single pulses or

94 ///< pulse pairs

95 };

97 /**

98 * Description of frame types.

99 */

100 static const struct frame_type_desc {

101 uint8_t n_blocks; ///< amount of blocks per frame (each block

102 ///< (contains 160/#n_blocks samples)

103 uint8_t log_n_blocks; ///< log2(#n_blocks)

104 uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)

105 uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)

106 uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs

107 ///< (rather than just one single pulse)

108 ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES

109 } frame_descs[17] = {

110 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0 },

111 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0 },

112 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0 },

113 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2 },

114 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5 },

115 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0 },

116 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2 },

117 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5 },

118 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0 },

119 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2 },

120 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5 },

121 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0 },

122 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2 },

123 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5 },

124 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0 },

125 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2 },

126 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5 }

127 };

128

129 /**

130 * WMA Voice decoding context.

131 */

132 typedef struct WMAVoiceContext {

133 /**

134 * @name Global values specified in the stream header / extradata or used all over.

135 * @{

136 */

137 GetBitContext gb; ///< packet bitreader. During decoder init,

138 ///< it contains the extradata from the

139 ///< demuxer. During decoding, it contains

140 ///< packet data.

141 int8_t vbm_tree[25]; ///< converts VLC codes to frame type

142

143 int spillover_bitsize; ///< number of bits used to specify

144 ///< #spillover_nbits in the packet header

145 ///< = ceil(log2(ctx->block_align << 3))

146 int history_nsamples; ///< number of samples in history for signal

147 ///< prediction (through ACB)

148

149 /* postfilter specific values */

150 int do_apf; ///< whether to apply the averaged

151 ///< projection filter (APF)

152 int denoise_strength; ///< strength of denoising in Wiener filter

153 ///< [0-11]

154 int denoise_tilt_corr; ///< Whether to apply tilt correction to the

155 ///< Wiener filter coefficients (postfilter)

156 int dc_level; ///< Predicted amount of DC noise, based

157 ///< on which a DC removal filter is used

158

159 int lsps; ///< number of LSPs per frame [10 or 16]

160 int lsp_q_mode; ///< defines quantizer defaults [0, 1]

161 int lsp_def_mode; ///< defines different sets of LSP defaults

162 ///< [0, 1]

163

164 int min_pitch_val; ///< base value for pitch parsing code

165 int max_pitch_val; ///< max value + 1 for pitch parsing

166 int pitch_nbits; ///< number of bits used to specify the

167 ///< pitch value in the frame header

168 int block_pitch_nbits; ///< number of bits used to specify the

169 ///< first block's pitch value

170 int block_pitch_range; ///< range of the block pitch

171 int block_delta_pitch_nbits; ///< number of bits used to specify the

172 ///< delta pitch between this and the last

173 ///< block's pitch value, used in all but

174 ///< first block

175 int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is

176 ///< from -this to +this-1)

177 uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale

178 ///< conversion

179

180 /**

181 * @}

182 *

183 * @name Packet values specified in the packet header or related to a packet.

184 *

185 * A packet is considered to be a single unit of data provided to this

186 * decoder by the demuxer.

187 * @{

188 */

189 int spillover_nbits; ///< number of bits of the previous packet's

190 ///< last superframe preceding this

191 ///< packet's first full superframe (useful

192 ///< for re-synchronization also)

193 int has_residual_lsps; ///< if set, superframes contain one set of

194 ///< LSPs that cover all frames, encoded as

195 ///< independent and residual LSPs; if not

196 ///< set, each frame contains its own, fully

197 ///< independent, LSPs

198 int skip_bits_next; ///< number of bits to skip at the next call

199 ///< to #wmavoice_decode_packet() (since

200 ///< they're part of the previous superframe)

201

202 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + AV_INPUT_BUFFER_PADDING_SIZE]; ///<

203 ///< cache for superframe data split over

204 ///< multiple packets

205 int sframe_cache_size; ///< set to >0 if we have data from an

206 ///< (incomplete) superframe from a previous

207 ///< packet that spilled over in the current

208 ///< packet; specifies the amount of bits in

209 ///< #sframe_cache

210 PutBitContext pb; ///< bitstream writer for #sframe_cache

211

212 /**

213 * @}

214 *

215 * @name Frame and superframe values

216 * Superframe and frame data - these can change from frame to frame,

217 * although some of them do in that case serve as a cache / history for

218 * the next frame or superframe.

219 * @{

220 */

221 double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous

222 ///< superframe

223 int last_pitch_val; ///< pitch value of the previous frame

224 int last_acb_type; ///< frame type [0-2] of the previous frame

225 int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)

226 ///< << 16) / #MAX_FRAMESIZE

227 float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE

228

229 int aw_idx_is_ext; ///< whether the AW index was encoded in

230 ///< 8 bits (instead of 6)

231 int aw_pulse_range; ///< the range over which #aw_pulse_set1()

232 ///< can apply the pulse, relative to the

233 ///< value in aw_first_pulse_off. The exact

234 ///< position of the first AW-pulse is within

235 ///< [pulse_off, pulse_off + this], and

236 ///< depends on bitstream values; [16 or 24]

237 int aw_n_pulses[2]; ///< number of AW-pulses in each block; note

238 ///< that this number can be negative (in

239 ///< which case it basically means "zero")

240 int aw_first_pulse_off[2]; ///< index of first sample to which to

241 ///< apply AW-pulses, or -0xff if unset

242 int aw_next_pulse_off_cache; ///< the position (relative to start of the

243 ///< second block) at which pulses should

244 ///< start to be positioned, serves as a

245 ///< cache for pitch-adaptive window pulses

246 ///< between blocks

247

248 int frame_cntr; ///< current frame index [0 - 0xFFFE]; is

249 ///< only used for comfort noise in #pRNG()

250 int nb_superframes; ///< number of superframes in current packet

251 float gain_pred_err[6]; ///< cache for gain prediction

252 float excitation_history[MAX_SIGNAL_HISTORY]; ///< cache of the signal of

253 ///< previous superframes, used as a history

254 ///< for signal generation

255 float synth_history[MAX_LSPS]; ///< see #excitation_history

256 /**

257 * @}

258 *

259 * @name Postfilter values

260 *

261 * Variables used for postfilter implementation, mostly history for

262 * smoothing and so on, and context variables for FFT/iFFT.

263 * @{

264 */

265 AVTXContext *rdft, *irdft; ///< contexts for FFT-calculation in the

266 av_tx_fn rdft_fn, irdft_fn; ///< postfilter (for denoise filter)

267 AVTXContext *dct, *dst; ///< contexts for phase shift (in Hilbert

268 av_tx_fn dct_fn, dst_fn; ///< transform, part of postfilter)

269 float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]

270 ///< range

271 float postfilter_agc; ///< gain control memory, used in

272 ///< #adaptive_gain_control()

273 float dcf_mem[2]; ///< DC filter history

274 /// zero filter output (i.e. excitation) by postfilter

275 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];

276 float denoise_filter_cache[MAX_FRAMESIZE];

277 int denoise_filter_cache_size; ///< samples in #denoise_filter_cache

278 /// aligned buffer for LPC tilting

279 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x82];

280 /// aligned buffer for denoise coefficients

281 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x82];

282 /// aligned buffer for postfilter speech synthesis

283 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];

284 /**

285 * @}

286 */

287 } WMAVoiceContext;

288

289 /**

290 * Set up the variable bit mode (VBM) tree from container extradata.

291 * @param gb bit I/O context.

292 * The bit context (s->gb) should be loaded with byte 23-46 of the

293 * container extradata (i.e. the ones containing the VBM tree).

294 * @param vbm_tree pointer to array to which the decoded VBM tree will be

295 * written.

296 * @return 0 on success, <0 on error.

297 */

298 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])

299 {

300 int cntr[8] = { 0 }, n, res;

301

302 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);

303 for (n = 0; n < 17; n++) {

304 res = get_bits(gb, 3);

305 if (cntr[res] > 3) // should be >= 3 + (res == 7))

306 return -1;

307 vbm_tree[res * 3 + cntr[res]++] = n;

308 }

309 return 0;

310 }

311

312 static av_cold void wmavoice_init_static_data(void)

313 {

314 static const uint8_t bits[] = {

315 2, 2, 2, 4, 4, 4,

316 6, 6, 6, 8, 8, 8,

317 10, 10, 10, 12, 12, 12,

318 14, 14, 14, 14

319 };

320

321 VLC_INIT_STATIC_TABLE_FROM_LENGTHS(frame_type_vlc, VLC_NBITS,

322 FF_ARRAY_ELEMS(bits), bits,

323 1, NULL, 0, 0, 0, 0);

324 }

325

326 static av_cold void wmavoice_flush(AVCodecContext *ctx)

327 {

328 WMAVoiceContext *s = ctx->priv_data;

329 int n;

330

331 s->postfilter_agc = 0;

332 s->sframe_cache_size = 0;

333 s->skip_bits_next = 0;

334 for (n = 0; n < s->lsps; n++)

335 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);

336 memset(s->excitation_history, 0,

337 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);

338 memset(s->synth_history, 0,

339 sizeof(*s->synth_history) * MAX_LSPS);

340 memset(s->gain_pred_err, 0,

341 sizeof(s->gain_pred_err));

342

343 if (s->do_apf) {

344 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,

345 sizeof(*s->synth_filter_out_buf) * s->lsps);

346 memset(s->dcf_mem, 0,

347 sizeof(*s->dcf_mem) * 2);

348 memset(s->zero_exc_pf, 0,

349 sizeof(*s->zero_exc_pf) * s->history_nsamples);

350 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));

351 }

352 }

353

354 /**

355 * Set up decoder with parameters from demuxer (extradata etc.).

356 */

357 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)

358 {

359 static AVOnce init_static_once = AV_ONCE_INIT;

360 int n, flags, pitch_range, lsp16_flag, ret;

361 WMAVoiceContext *s = ctx->priv_data;

362

363 ff_thread_once(&init_static_once, wmavoice_init_static_data);

364

365 /**

366 * Extradata layout:

367 * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),

368 * - byte 19-22: flags field (annoyingly in LE; see below for known

369 * values),

370 * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,

371 * rest is 0).

372 */

373 if (ctx->extradata_size != 46) {

374 av_log(ctx, AV_LOG_ERROR,

375 "Invalid extradata size %d (should be 46)\n",

376 ctx->extradata_size);

377 return AVERROR_INVALIDDATA;

378 }

379 if (ctx->block_align <= 0 || ctx->block_align > (1<<22)) {

380 av_log(ctx, AV_LOG_ERROR, "Invalid block alignment %d.\n", ctx->block_align);

381 return AVERROR_INVALIDDATA;

382 }

383

384 flags = AV_RL32(ctx->extradata + 18);

385 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);

386 s->do_apf = flags & 0x1;

387 if (s->do_apf) {

388 float scale = 1.0f;

389

390 ret = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, &scale, 0);

391 if (ret < 0)

392 return ret;

393

394 ret = av_tx_init(&s->irdft, &s->irdft_fn, AV_TX_FLOAT_RDFT, 1, 1 << 7, &scale, 0);

395 if (ret < 0)

396 return ret;

397

398 scale = 1.0 / (1 << 6);

399 ret = av_tx_init(&s->dct, &s->dct_fn, AV_TX_FLOAT_DCT_I, 0, 1 << 6, &scale, 0);

400 if (ret < 0)

401 return ret;

402

403 scale = 1.0 / (1 << 6);

404 ret = av_tx_init(&s->dst, &s->dst_fn, AV_TX_FLOAT_DST_I, 0, 1 << 6, &scale, 0);

405 if (ret < 0)

406 return ret;

407

408 ff_sine_window_init(s->cos, 256);

409 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));

410 for (n = 0; n < 255; n++) {

411 s->sin[n] = -s->sin[510 - n];

412 s->cos[510 - n] = s->cos[n];

413 }

414 }

415 s->denoise_strength = (flags >> 2) & 0xF;

416 if (s->denoise_strength >= 12) {

417 av_log(ctx, AV_LOG_ERROR,

418 "Invalid denoise filter strength %d (max=11)\n",

419 s->denoise_strength);

420 return AVERROR_INVALIDDATA;

421 }

422 s->denoise_tilt_corr = !!(flags & 0x40);

423 s->dc_level = (flags >> 7) & 0xF;

424 s->lsp_q_mode = !!(flags & 0x2000);

425 s->lsp_def_mode = !!(flags & 0x4000);

426 lsp16_flag = flags & 0x1000;

427 if (lsp16_flag) {

428 s->lsps = 16;

429 } else {

430 s->lsps = 10;

431 }

432 for (n = 0; n < s->lsps; n++)

433 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);

434

435 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);

436 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {

437 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");

438 return AVERROR_INVALIDDATA;

439 }

440

441 if (ctx->sample_rate >= INT_MAX / (256 * 37))

442 return AVERROR_INVALIDDATA;

443

444 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;

445 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;

446 pitch_range = s->max_pitch_val - s->min_pitch_val;

447 if (pitch_range <= 0) {

448 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");

449 return AVERROR_INVALIDDATA;

450 }

451 s->pitch_nbits = av_ceil_log2(pitch_range);

452 s->last_pitch_val = 40;

453 s->last_acb_type = ACB_TYPE_NONE;

454 s->history_nsamples = s->max_pitch_val + 8;

455

456 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {

457 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,

458 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;

459

460 av_log(ctx, AV_LOG_ERROR,

461 "Unsupported samplerate %d (min=%d, max=%d)\n",

462 ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz

463

464 return AVERROR(ENOSYS);

465 }

466

467 s->block_conv_table[0] = s->min_pitch_val;

468 s->block_conv_table[1] = (pitch_range * 25) >> 6;

469 s->block_conv_table[2] = (pitch_range * 44) >> 6;

470 s->block_conv_table[3] = s->max_pitch_val - 1;

471 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;

472 if (s->block_delta_pitch_hrange <= 0) {

473 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");

474 return AVERROR_INVALIDDATA;

475 }

476 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);

477 s->block_pitch_range = s->block_conv_table[2] +

478 s->block_conv_table[3] + 1 +

479 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);

480 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);

481

482 av_channel_layout_uninit(&ctx->ch_layout);

483 ctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;

484 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;

485

486 return 0;

487 }

488

489 /**

490 * @name Postfilter functions

491 * Postfilter functions (gain control, wiener denoise filter, DC filter,

492 * kalman smoothening, plus surrounding code to wrap it)

493 * @{

494 */

495 /**

496 * Adaptive gain control (as used in postfilter).

497 *

498 * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except

499 * that the energy here is calculated using sum(abs(...)), whereas the

500 * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).

501 *

502 * @param out output buffer for filtered samples

503 * @param in input buffer containing the samples as they are after the

504 * postfilter steps so far

505 * @param speech_synth input buffer containing speech synth before postfilter

506 * @param size input buffer size

507 * @param alpha exponential filter factor

508 * @param gain_mem pointer to filter memory (single float)

509 */

510 static void adaptive_gain_control(float *out, const float *in,

511 const float *speech_synth,

512 int size, float alpha, float *gain_mem)

513 {

514 int i;

515 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;

516 float mem = *gain_mem;

517

518 for (i = 0; i < size; i++) {

519 speech_energy += fabsf(speech_synth[i]);

520 postfilter_energy += fabsf(in[i]);

521 }

522 gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :

523 (1.0 - alpha) * speech_energy / postfilter_energy;

524

525 for (i = 0; i < size; i++) {

526 mem = alpha * mem + gain_scale_factor;

527 out[i] = in[i] * mem;

528 }

529

530 *gain_mem = mem;

531 }

532

533 /**

534 * Kalman smoothing function.

535 *

536 * This function looks back pitch +/- 3 samples back into history to find

537 * the best fitting curve (that one giving the optimal gain of the two

538 * signals, i.e. the highest dot product between the two), and then

539 * uses that signal history to smoothen the output of the speech synthesis

540 * filter.

541 *

542 * @param s WMA Voice decoding context

543 * @param pitch pitch of the speech signal

544 * @param in input speech signal

545 * @param out output pointer for smoothened signal

546 * @param size input/output buffer size

547 *

548 * @returns -1 if no smoothening took place, e.g. because no optimal

549 * fit could be found, or 0 on success.

550 */

551 static int kalman_smoothen(WMAVoiceContext *s, int pitch,

552 const float *in, float *out, int size)

553 {

554 int n;

555 float optimal_gain = 0, dot;

556 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],

557 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],

558 *best_hist_ptr = NULL;

559

560 /* find best fitting point in history */

561 do {

562 dot = ff_scalarproduct_float_c(in, ptr, size);

563 if (dot > optimal_gain) {

564 optimal_gain = dot;

565 best_hist_ptr = ptr;

566 }

567 } while (--ptr >= end);

568

569 if (optimal_gain <= 0)

570 return -1;

571 dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);

572 if (dot <= 0) // would be 1.0

573 return -1;

574

575 if (optimal_gain <= dot) {

576 dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000

577 } else

578 dot = 0.625;

579

580 /* actual smoothing */

581 for (n = 0; n < size; n++)

582 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);

583

584 return 0;

585 }

586

587 /**

588 * Get the tilt factor of a formant filter from its transfer function

589 * @see #tilt_factor() in amrnbdec.c, which does essentially the same,

590 * but somehow (??) it does a speech synthesis filter in the

591 * middle, which is missing here

592 *

593 * @param lpcs LPC coefficients

594 * @param n_lpcs Size of LPC buffer

595 * @returns the tilt factor

596 */

597 static float tilt_factor(const float *lpcs, int n_lpcs)

598 {

599 float rh0, rh1;

600

601 rh0 = 1.0 + ff_scalarproduct_float_c(lpcs, lpcs, n_lpcs);

602 rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);

603

604 return rh1 / rh0;

605 }

606

607 /**

608 * Derive denoise filter coefficients (in real domain) from the LPCs.

609 */

610 static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,

611 int fcb_type, float *coeffs_dst, int remainder)

612 {

613 float last_coeff, min = 15.0, max = -15.0;

614 float irange, angle_mul, gain_mul, range, sq;

615 LOCAL_ALIGNED_32(float, coeffs, [0x82]);

616 LOCAL_ALIGNED_32(float, lpcs, [0x82]);

617 LOCAL_ALIGNED_32(float, lpcs_dct, [0x82]);

618 int n, idx;

619

620 memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));

621

622 /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */

623 s->rdft_fn(s->rdft, lpcs, lpcs_src, sizeof(float));

624 #define log_range(var, assign) do { \

625 float tmp = log10f(assign); var = tmp; \

626 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \

627 } while (0)

628 log_range(last_coeff, lpcs[64] * lpcs[64]);

629 for (n = 1; n < 64; n++)

630 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +

631 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);

632 log_range(lpcs[0], lpcs[0] * lpcs[0]);

633 #undef log_range

634 range = max - min;

635 lpcs[64] = last_coeff;

636

637 /* Now, use this spectrum to pick out these frequencies with higher

638 * (relative) power/energy (which we then take to be "not noise"),

639 * and set up a table (still in lpc[]) of (relative) gains per frequency.

640 * These frequencies will be maintained, while others ("noise") will be

641 * decreased in the filter output. */

642 irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]

643 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :

644 (5.0 / 14.7));

645 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);

646 for (n = 0; n <= 64; n++) {

647 float pwr;

648

649 idx = lrint((max - lpcs[n]) * irange - 1);

650 idx = FFMAX(0, idx);

651 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];

652 lpcs[n] = angle_mul * pwr;

653

654 /* 70.57 =~ 1/log10(1.0331663) */

655 idx = av_clipd((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);

656

657 if (idx > 127) { // fall back if index falls outside table range

658 coeffs[n] = wmavoice_energy_table[127] *

659 powf(1.0331663, idx - 127);

660 } else

661 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];

662 }

663

664 /* calculate the Hilbert transform of the gains, which we do (since this

665 * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).

666 * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the

667 * "moment" of the LPCs in this filter. */

668 s->dct_fn(s->dct, lpcs_dct, lpcs, sizeof(float));

669 s->dst_fn(s->dst, lpcs, lpcs_dct, sizeof(float));

670

671 /* Split out the coefficient indexes into phase/magnitude pairs */

672 idx = 255 + av_clip(lpcs[64], -255, 255);

673 coeffs[0] = coeffs[0] * s->cos[idx];

674 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);

675 last_coeff = coeffs[64] * s->cos[idx];

676 for (n = 63;; n--) {

677 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);

678 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];

679 coeffs[n * 2] = coeffs[n] * s->cos[idx];

680

681 if (!--n) break;

682

683 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);

684 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];

685 coeffs[n * 2] = coeffs[n] * s->cos[idx];

686 }

687 coeffs[64] = last_coeff;

688

689 /* move into real domain */

690 s->irdft_fn(s->irdft, coeffs_dst, coeffs, sizeof(AVComplexFloat));

691

692 /* tilt correction and normalize scale */

693 memset(&coeffs_dst[remainder], 0, sizeof(coeffs_dst[0]) * (128 - remainder));

694 if (s->denoise_tilt_corr) {

695 float tilt_mem = 0;

696

697 coeffs_dst[remainder - 1] = 0;

698 ff_tilt_compensation(&tilt_mem,

699 -1.8 * tilt_factor(coeffs_dst, remainder - 1),

700 coeffs_dst, remainder);

701 }

702 sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs_dst, coeffs_dst,

703 remainder));

704 for (n = 0; n < remainder; n++)

705 coeffs_dst[n] *= sq;

706 }

707

708 /**

709 * This function applies a Wiener filter on the (noisy) speech signal as

710 * a means to denoise it.

711 *

712 * - take RDFT of LPCs to get the power spectrum of the noise + speech;

713 * - using this power spectrum, calculate (for each frequency) the Wiener

714 * filter gain, which depends on the frequency power and desired level

715 * of noise subtraction (when set too high, this leads to artifacts)

716 * We can do this symmetrically over the X-axis (so 0-4kHz is the inverse

717 * of 4-8kHz);

718 * - by doing a phase shift, calculate the Hilbert transform of this array

719 * of per-frequency filter-gains to get the filtering coefficients;

720 * - smoothen/normalize/de-tilt these filter coefficients as desired;

721 * - take RDFT of noisy sound, apply the coefficients and take its IRDFT

722 * to get the denoised speech signal;

723 * - the leftover (i.e. output of the IRDFT on denoised speech data beyond

724 * the frame boundary) are saved and applied to subsequent frames by an

725 * overlap-add method (otherwise you get clicking-artifacts).

726 *

727 * @param s WMA Voice decoding context

728 * @param fcb_type Frame (codebook) type

729 * @param synth_pf input: the noisy speech signal, output: denoised speech

730 * data; should be 16-byte aligned (for ASM purposes)

731 * @param size size of the speech data

732 * @param lpcs LPCs used to synthesize this frame's speech data

733 */

734 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,

735 float *synth_pf, int size,

736 const float *lpcs)

737 {

738 int remainder, lim, n;

739

740 if (fcb_type != FCB_TYPE_SILENCE) {

741 LOCAL_ALIGNED_32(float, coeffs_f, [0x82]);

742 LOCAL_ALIGNED_32(float, synth_f, [0x82]);

743 float *tilted_lpcs = s->tilted_lpcs_pf,

744 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;

745

746 tilted_lpcs[0] = 1.0;

747 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);

748 memset(&tilted_lpcs[s->lsps + 1], 0,

749 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));

750 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),

751 tilted_lpcs, s->lsps + 2);

752

753 /* The IRDFT output (127 samples for 7-bit filter) beyond the frame

754 * size is applied to the next frame. All input beyond this is zero,

755 * and thus all output beyond this will go towards zero, hence we can

756 * limit to min(size-1, 127-size) as a performance consideration. */

757 remainder = FFMIN(127 - size, size - 1);

758 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);

759

760 /* apply coefficients (in frequency spectrum domain), i.e. complex

761 * number multiplication */

762 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));

763 s->rdft_fn(s->rdft, synth_f, synth_pf, sizeof(float));

764 s->rdft_fn(s->rdft, coeffs_f, coeffs, sizeof(float));

765 synth_f[0] *= coeffs_f[0];

766 synth_f[1] *= coeffs_f[1];

767 for (n = 1; n <= 64; n++) {

768 float v1 = synth_f[n * 2], v2 = synth_f[n * 2 + 1];

769 synth_f[n * 2] = v1 * coeffs_f[n * 2] - v2 * coeffs_f[n * 2 + 1];

770 synth_f[n * 2 + 1] = v2 * coeffs_f[n * 2] + v1 * coeffs_f[n * 2 + 1];

771 }

772 s->irdft_fn(s->irdft, synth_pf, synth_f, sizeof(AVComplexFloat));

773 }

774

775 /* merge filter output with the history of previous runs */

776 if (s->denoise_filter_cache_size) {

777 lim = FFMIN(s->denoise_filter_cache_size, size);

778 for (n = 0; n < lim; n++)

779 synth_pf[n] += s->denoise_filter_cache[n];

780 s->denoise_filter_cache_size -= lim;

781 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],

782 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);

783 }

784

785 /* move remainder of filter output into a cache for future runs */

786 if (fcb_type != FCB_TYPE_SILENCE) {

787 lim = FFMIN(remainder, s->denoise_filter_cache_size);

788 for (n = 0; n < lim; n++)

789 s->denoise_filter_cache[n] += synth_pf[size + n];

790 if (lim < remainder) {

791 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],

792 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));

793 s->denoise_filter_cache_size = remainder;

794 }

795 }

796 }

797

798 /**

799 * Averaging projection filter, the postfilter used in WMAVoice.

800 *

801 * This uses the following steps:

802 * - A zero-synthesis filter (generate excitation from synth signal)

803 * - Kalman smoothing on excitation, based on pitch

804 * - Re-synthesized smoothened output

805 * - Iterative Wiener denoise filter

806 * - Adaptive gain filter

807 * - DC filter

808 *

809 * @param s WMAVoice decoding context

810 * @param synth Speech synthesis output (before postfilter)

811 * @param samples Output buffer for filtered samples

812 * @param size Buffer size of synth & samples

813 * @param lpcs Generated LPCs used for speech synthesis

814 * @param zero_exc_pf destination for zero synthesis filter (16-byte aligned)

815 * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)

816 * @param pitch Pitch of the input signal

817 */

818 static void postfilter(WMAVoiceContext *s, const float *synth,

819 float *samples, int size,

820 const float *lpcs, float *zero_exc_pf,

821 int fcb_type, int pitch)

822 {

823 float synth_filter_in_buf[MAX_FRAMESIZE / 2],

824 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],

825 *synth_filter_in = zero_exc_pf;

826

827 av_assert0(size <= MAX_FRAMESIZE / 2);

828

829 /* generate excitation from input signal */

830 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);

831

832 if (fcb_type >= FCB_TYPE_AW_PULSES &&

833 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))

834 synth_filter_in = synth_filter_in_buf;

835

836 /* re-synthesize speech after smoothening, and keep history */

837 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,

838 synth_filter_in, size, s->lsps);

839 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],

840 sizeof(synth_pf[0]) * s->lsps);

841

842 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);

843

844 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,

845 &s->postfilter_agc);

846

847 if (s->dc_level > 8) {

848 /* remove ultra-low frequency DC noise / highpass filter;

849 * coefficients are identical to those used in SIPR decoding,

850 * and very closely resemble those used in AMR-NB decoding. */

851 ff_acelp_apply_order_2_transfer_function(samples, samples,

852 (const float[2]) { -1.99997, 1.0 },

853 (const float[2]) { -1.9330735188, 0.93589198496 },

854 0.93980580475, s->dcf_mem, size);

855 }

856 }

857 /**

858 * @}

859 */

860

861 /**

862 * Dequantize LSPs

863 * @param lsps output pointer to the array that will hold the LSPs

864 * @param num number of LSPs to be dequantized

865 * @param values quantized values, contains n_stages values

866 * @param sizes range (i.e. max value) of each quantized value

867 * @param n_stages number of dequantization runs

868 * @param table dequantization table to be used

869 * @param mul_q LSF multiplier

870 * @param base_q base (lowest) LSF values

871 */

872 static void dequant_lsps(double *lsps, int num,

873 const uint16_t *values,

874 const uint16_t *sizes,

875 int n_stages, const uint8_t *table,

876 const double *mul_q,

877 const double *base_q)

878 {

879 int n, m;

880

881 memset(lsps, 0, num * sizeof(*lsps));

882 for (n = 0; n < n_stages; n++) {

883 const uint8_t *t_off = &table[values[n] * num];

884 double base = base_q[n], mul = mul_q[n];

885

886 for (m = 0; m < num; m++)

887 lsps[m] += base + mul * t_off[m];

888

889 table += sizes[n] * num;

890 }

891 }

892

893 /**

894 * @name LSP dequantization routines

895 * LSP dequantization routines, for 10/16LSPs and independent/residual coding.

896 * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;

897 * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.

898 * @{

899 */

900 /**

901 * Parse 10 independently-coded LSPs.

902 */

903 static void dequant_lsp10i(GetBitContext *gb, double *lsps)

904 {

905 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };

906 static const double mul_lsf[4] = {

907 5.2187144800e-3, 1.4626986422e-3,

908 9.6179549166e-4, 1.1325736225e-3

909 };

910 static const double base_lsf[4] = {

911 M_PI * -2.15522e-1, M_PI * -6.1646e-2,

912 M_PI * -3.3486e-2, M_PI * -5.7408e-2

913 };

914 uint16_t v[4];

915

916 v[0] = get_bits(gb, 8);

917 v[1] = get_bits(gb, 6);

918 v[2] = get_bits(gb, 5);

919 v[3] = get_bits(gb, 5);

920

921 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,

922 mul_lsf, base_lsf);

923 }

924

925 /**

926 * Parse 10 independently-coded LSPs, and then derive the tables to

927 * generate LSPs for the other frames from them (residual coding).

928 */

929 static void dequant_lsp10r(GetBitContext *gb,

930 double *i_lsps, const double *old,

931 double *a1, double *a2, int q_mode)

932 {

933 static const uint16_t vec_sizes[3] = { 128, 64, 64 };

934 static const double mul_lsf[3] = {

935 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3

936 };

937 static const double base_lsf[3] = {

938 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2

939 };

940 const float (*ipol_tab)[2][10] = q_mode ?

941 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;

942 uint16_t interpol, v[3];

943 int n;

944

945 dequant_lsp10i(gb, i_lsps);

946

947 interpol = get_bits(gb, 5);

948 v[0] = get_bits(gb, 7);

949 v[1] = get_bits(gb, 6);

950 v[2] = get_bits(gb, 6);

951

952 for (n = 0; n < 10; n++) {

953 double delta = old[n] - i_lsps[n];

954 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];

955 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];

956 }

957

958 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,

959 mul_lsf, base_lsf);

960 }

961

962 /**

963 * Parse 16 independently-coded LSPs.

964 */

965 static void dequant_lsp16i(GetBitContext *gb, double *lsps)

966 {

967 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };

968 static const double mul_lsf[5] = {

969 3.3439586280e-3, 6.9908173703e-4,

970 3.3216608306e-3, 1.0334960326e-3,

971 3.1899104283e-3

972 };

973 static const double base_lsf[5] = {

974 M_PI * -1.27576e-1, M_PI * -2.4292e-2,

975 M_PI * -1.28094e-1, M_PI * -3.2128e-2,

976 M_PI * -1.29816e-1

977 };

978 uint16_t v[5];

979

980 v[0] = get_bits(gb, 8);

981 v[1] = get_bits(gb, 6);

982 v[2] = get_bits(gb, 7);

983 v[3] = get_bits(gb, 6);

984 v[4] = get_bits(gb, 7);

985

986 dequant_lsps( lsps, 5, v, vec_sizes, 2,

987 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);

988 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,

989 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);

990 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,

991 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);

992 }

993

994 /**

995 * Parse 16 independently-coded LSPs, and then derive the tables to

996 * generate LSPs for the other frames from them (residual coding).

997 */

998 static void dequant_lsp16r(GetBitContext *gb,

999 double *i_lsps, const double *old,

1000 double *a1, double *a2, int q_mode)

1001 {

1002 static const uint16_t vec_sizes[3] = { 128, 128, 128 };

1003 static const double mul_lsf[3] = {

1004 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3

1005 };

1006 static const double base_lsf[3] = {

1007 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2

1008 };

1009 const float (*ipol_tab)[2][16] = q_mode ?

1010 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;

1011 uint16_t interpol, v[3];

1012 int n;

1013

1014 dequant_lsp16i(gb, i_lsps);

1015

1016 interpol = get_bits(gb, 5);

1017 v[0] = get_bits(gb, 7);

1018 v[1] = get_bits(gb, 7);

1019 v[2] = get_bits(gb, 7);

1020

1021 for (n = 0; n < 16; n++) {

1022 double delta = old[n] - i_lsps[n];

1023 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];

1024 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];

1025 }

1026

1027 dequant_lsps( a2, 10, v, vec_sizes, 1,

1028 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);

1029 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,

1030 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);

1031 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,

1032 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);

1033 }

1034

1035 /**

1036 * @}

1037 * @name Pitch-adaptive window coding functions

1038 * The next few functions are for pitch-adaptive window coding.

1039 * @{

1040 */

1041 /**

1042 * Parse the offset of the first pitch-adaptive window pulses, and

1043 * the distribution of pulses between the two blocks in this frame.

1044 * @param s WMA Voice decoding context private data

1045 * @param gb bit I/O context

1046 * @param pitch pitch for each block in this frame

1047 */

1048 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,

1049 const int *pitch)

1050 {

1051 static const int16_t start_offset[94] = {

1052 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,

1053 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,

1054 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,

1055 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,

1056 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,

1057 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,

1058 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,

1059 141, 143, 145, 147, 149, 151, 153, 155, 157, 159

1060 };

1061 int bits, offset;

1062

1063 /* position of pulse */

1064 s->aw_idx_is_ext = 0;

1065 if ((bits = get_bits(gb, 6)) >= 54) {

1066 s->aw_idx_is_ext = 1;

1067 bits += (bits - 54) * 3 + get_bits(gb, 2);

1068 }

1069

1070 /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count

1071 * the distribution of the pulses in each block contained in this frame. */

1072 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;

1073 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;

1074 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];

1075 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;

1076 offset += s->aw_n_pulses[0] * pitch[0];

1077 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];

1078 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;

1079

1080 /* if continuing from a position before the block, reset position to

1081 * start of block (when corrected for the range over which it can be

1082 * spread in aw_pulse_set1()). */

1083 if (start_offset[bits] < MAX_FRAMESIZE / 2) {

1084 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)

1085 s->aw_first_pulse_off[1] -= pitch[1];

1086 if (start_offset[bits] < 0)

1087 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)

1088 s->aw_first_pulse_off[0] -= pitch[0];

1089 }

1090 }

1091

1092 /**

1093 * Apply second set of pitch-adaptive window pulses.

1094 * @param s WMA Voice decoding context private data

1095 * @param gb bit I/O context

1096 * @param block_idx block index in frame [0, 1]

1097 * @param fcb structure containing fixed codebook vector info

1098 * @return -1 on error, 0 otherwise

1099 */

1100 static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,

1101 int block_idx, AMRFixed *fcb)

1102 {

1103 uint16_t use_mask_mem[9]; // only 5 are used, rest is padding

1104 uint16_t *use_mask = use_mask_mem + 2;

1105 /* in this function, idx is the index in the 80-bit (+ padding) use_mask

1106 * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits

1107 * of idx are the position of the bit within a particular item in the

1108 * array (0 being the most significant bit, and 15 being the least

1109 * significant bit), and the remainder (>> 4) is the index in the

1110 * use_mask[]-array. This is faster and uses less memory than using a

1111 * 80-byte/80-int array. */

1112 int pulse_off = s->aw_first_pulse_off[block_idx],

1113 pulse_start, n, idx, range, aidx, start_off = 0;

1114

1115 /* set offset of first pulse to within this block */

1116 if (s->aw_n_pulses[block_idx] > 0)

1117 while (pulse_off + s->aw_pulse_range < 1)

1118 pulse_off += fcb->pitch_lag;

1119

1120 /* find range per pulse */

1121 if (s->aw_n_pulses[0] > 0) {

1122 if (block_idx == 0) {

1123 range = 32;

1124 } else /* block_idx = 1 */ {

1125 range = 8;

1126 if (s->aw_n_pulses[block_idx] > 0)

1127 pulse_off = s->aw_next_pulse_off_cache;

1128 }

1129 } else

1130 range = 16;

1131 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;

1132

1133 /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,

1134 * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus

1135 * we exclude that range from being pulsed again in this function. */

1136 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));

1137 memset( use_mask, -1, 5 * sizeof(use_mask[0]));

1138 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));

1139 if (s->aw_n_pulses[block_idx] > 0)

1140 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {

1141 int excl_range = s->aw_pulse_range; // always 16 or 24

1142 uint16_t *use_mask_ptr = &use_mask[idx >> 4];

1143 int first_sh = 16 - (idx & 15);

1144 *use_mask_ptr++ &= 0xFFFFu << first_sh;

1145 excl_range -= first_sh;

1146 if (excl_range >= 16) {

1147 *use_mask_ptr++ = 0;

1148 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);

1149 } else

1150 *use_mask_ptr &= 0xFFFF >> excl_range;

1151 }

1152

1153 /* find the 'aidx'th offset that is not excluded */

1154 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);

1155 for (n = 0; n <= aidx; pulse_start++) {

1156 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;

1157 if (idx >= MAX_FRAMESIZE / 2) { // find from zero

1158 if (use_mask[0]) idx = 0x0F;

1159 else if (use_mask[1]) idx = 0x1F;

1160 else if (use_mask[2]) idx = 0x2F;

1161 else if (use_mask[3]) idx = 0x3F;

1162 else if (use_mask[4]) idx = 0x4F;

1163 else return -1;

1164 idx -= av_log2_16bit(use_mask[idx >> 4]);

1165 }

1166 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {

1167 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));

1168 n++;

1169 start_off = idx;

1170 }

1171 }

1172

1173 fcb->x[fcb->n] = start_off;

1174 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;

1175 fcb->n++;

1176

1177 /* set offset for next block, relative to start of that block */

1178 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;

1179 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;

1180 return 0;

1181 }

1182

1183 /**

1184 * Apply first set of pitch-adaptive window pulses.

1185 * @param s WMA Voice decoding context private data

1186 * @param gb bit I/O context

1187 * @param block_idx block index in frame [0, 1]

1188 * @param fcb storage location for fixed codebook pulse info

1189 */

1190 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,

1191 int block_idx, AMRFixed *fcb)

1192 {

1193 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));

1194 float v;

1195

1196 if (s->aw_n_pulses[block_idx] > 0) {

1197 int n, v_mask, i_mask, sh, n_pulses;

1198

1199 if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each

1200 n_pulses = 3;

1201 v_mask = 8;

1202 i_mask = 7;

1203 sh = 4;

1204 } else { // 4 pulses, 1:sign + 2:index each

1205 n_pulses = 4;

1206 v_mask = 4;

1207 i_mask = 3;

1208 sh = 3;

1209 }

1210

1211 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {

1212 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;

1213 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +

1214 s->aw_first_pulse_off[block_idx];

1215 while (fcb->x[fcb->n] < 0)

1216 fcb->x[fcb->n] += fcb->pitch_lag;

1217 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)

1218 fcb->n++;

1219 }

1220 } else {

1221 int num2 = (val & 0x1FF) >> 1, delta, idx;

1222

1223 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }

1224 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }

1225 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }

1226 else { delta = 7; idx = num2 + 1 - 3 * 75; }

1227 v = (val & 0x200) ? -1.0 : 1.0;

1228

1229 fcb->no_repeat_mask |= 3 << fcb->n;

1230 fcb->x[fcb->n] = idx - delta;

1231 fcb->y[fcb->n] = v;

1232 fcb->x[fcb->n + 1] = idx;

1233 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;

1234 fcb->n += 2;

1235 }

1236 }

1237

1238 /**

1239 * @}

1240 *

1241 * Generate a random number from frame_cntr and block_idx, which will live

1242 * in the range [0, 1000 - block_size] (so it can be used as an index in a

1243 * table of size 1000 of which you want to read block_size entries).

1244 *

1245 * @param frame_cntr current frame number

1246 * @param block_num current block index

1247 * @param block_size amount of entries we want to read from a table

1248 * that has 1000 entries

1249 * @return a (non-)random number in the [0, 1000 - block_size] range.

1250 */

1251 static int pRNG(int frame_cntr, int block_num, int block_size)

1252 {

1253 /* array to simplify the calculation of z:

1254 * y = (x % 9) * 5 + 6;

1255 * z = (49995 * x) / y;

1256 * Since y only has 9 values, we can remove the division by using a

1257 * LUT and using FASTDIV-style divisions. For each of the 9 values

1258 * of y, we can rewrite z as:

1259 * z = x * (49995 / y) + x * ((49995 % y) / y)

1260 * In this table, each col represents one possible value of y, the

1261 * first number is 49995 / y, and the second is the FASTDIV variant

1262 * of 49995 % y / y. */

1263 static const unsigned int div_tbl[9][2] = {

1264 { 8332, 3 * 715827883U }, // y = 6

1265 { 4545, 0 * 390451573U }, // y = 11

1266 { 3124, 11 * 268435456U }, // y = 16

1267 { 2380, 15 * 204522253U }, // y = 21

1268 { 1922, 23 * 165191050U }, // y = 26

1269 { 1612, 23 * 138547333U }, // y = 31

1270 { 1388, 27 * 119304648U }, // y = 36

1271 { 1219, 16 * 104755300U }, // y = 41

1272 { 1086, 39 * 93368855U } // y = 46

1273 };

1274 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;

1275 if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,

1276 // so this is effectively a modulo (%)

1277 y = x - 9 * MULH(477218589, x); // x % 9

1278 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));

1279 // z = x * 49995 / (y * 5 + 6)

1280 return z % (1000 - block_size);

1281 }

1282

1283 /**

1284 * Parse hardcoded signal for a single block.

1285 * @note see #synth_block().

1286 */

1287 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,

1288 int block_idx, int size,

1289 const struct frame_type_desc *frame_desc,

1290 float *excitation)

1291 {

1292 float gain;

1293 int n, r_idx;

1294

1295 av_assert0(size <= MAX_FRAMESIZE);

1296

1297 /* Set the offset from which we start reading wmavoice_std_codebook */

1298 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {

1299 r_idx = pRNG(s->frame_cntr, block_idx, size);

1300 gain = s->silence_gain;

1301 } else /* FCB_TYPE_HARDCODED */ {

1302 r_idx = get_bits(gb, 8);

1303 gain = wmavoice_gain_universal[get_bits(gb, 6)];

1304 }

1305

1306 /* Clear gain prediction parameters */

1307 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));

1308

1309 /* Apply gain to hardcoded codebook and use that as excitation signal */

1310 for (n = 0; n < size; n++)

1311 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;

1312 }

1313

1314 /**

1315 * Parse FCB/ACB signal for a single block.

1316 * @note see #synth_block().

1317 */

1318 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,

1319 int block_idx, int size,

1320 int block_pitch_sh2,

1321 const struct frame_type_desc *frame_desc,

1322 float *excitation)

1323 {

1324 static const float gain_coeff[6] = {

1325 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458

1326 };

1327 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;

1328 int n, idx, gain_weight;

1329 AMRFixed fcb;

1330

1331 av_assert0(size <= MAX_FRAMESIZE / 2);

1332 memset(pulses, 0, sizeof(*pulses) * size);

1333

1334 fcb.pitch_lag = block_pitch_sh2 >> 2;

1335 fcb.pitch_fac = 1.0;

1336 fcb.no_repeat_mask = 0;

1337 fcb.n = 0;

1338

1339 /* For the other frame types, this is where we apply the innovation

1340 * (fixed) codebook pulses of the speech signal. */

1341 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {

1342 aw_pulse_set1(s, gb, block_idx, &fcb);

1343 if (aw_pulse_set2(s, gb, block_idx, &fcb)) {

1344 /* Conceal the block with silence and return.

1345 * Skip the correct amount of bits to read the next

1346 * block from the correct offset. */

1347 int r_idx = pRNG(s->frame_cntr, block_idx, size);

1348

1349 for (n = 0; n < size; n++)

1350 excitation[n] =

1351 wmavoice_std_codebook[r_idx + n] * s->silence_gain;

1352 skip_bits(gb, 7 + 1);

1353 return;

1354 }

1355 } else /* FCB_TYPE_EXC_PULSES */ {

1356 int offset_nbits = 5 - frame_desc->log_n_blocks;

1357

1358 fcb.no_repeat_mask = -1;

1359 /* similar to ff_decode_10_pulses_35bits(), but with single pulses

1360 * (instead of double) for a subset of pulses */

1361 for (n = 0; n < 5; n++) {

1362 float sign;

1363 int pos1, pos2;

1364

1365 sign = get_bits1(gb) ? 1.0 : -1.0;

1366 pos1 = get_bits(gb, offset_nbits);

1367 fcb.x[fcb.n] = n + 5 * pos1;

1368 fcb.y[fcb.n++] = sign;

1369 if (n < frame_desc->dbl_pulses) {

1370 pos2 = get_bits(gb, offset_nbits);

1371 fcb.x[fcb.n] = n + 5 * pos2;

1372 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;

1373 }

1374 }

1375 }

1376 ff_set_fixed_vector(pulses, &fcb, 1.0, size);

1377

1378 /* Calculate gain for adaptive & fixed codebook signal.

1379 * see ff_amr_set_fixed_gain(). */

1380 idx = get_bits(gb, 7);

1381 fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err,

1382 gain_coeff, 6) -

1383 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);

1384 acb_gain = wmavoice_gain_codebook_acb[idx];

1385 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],

1386 -2.9957322736 /* log(0.05) */,

1387 1.6094379124 /* log(5.0) */);

1388

1389 gain_weight = 8 >> frame_desc->log_n_blocks;

1390 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,

1391 sizeof(*s->gain_pred_err) * (6 - gain_weight));

1392 for (n = 0; n < gain_weight; n++)

1393 s->gain_pred_err[n] = pred_err;

1394

1395 /* Calculation of adaptive codebook */

1396 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {

1397 int len;

1398 for (n = 0; n < size; n += len) {

1399 int next_idx_sh16;

1400 int abs_idx = block_idx * size + n;

1401 int pitch_sh16 = (s->last_pitch_val << 16) +

1402 s->pitch_diff_sh16 * abs_idx;

1403 int pitch = (pitch_sh16 + 0x6FFF) >> 16;

1404 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;

1405 idx = idx_sh16 >> 16;

1406 if (s->pitch_diff_sh16) {

1407 if (s->pitch_diff_sh16 > 0) {

1408 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;

1409 } else

1410 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;

1411 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,

1412 1, size - n);

1413 } else

1414 len = size;

1415

1416 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],

1417 wmavoice_ipol1_coeffs, 17,

1418 idx, 9, len);

1419 }

1420 } else /* ACB_TYPE_HAMMING */ {

1421 int block_pitch = block_pitch_sh2 >> 2;

1422 idx = block_pitch_sh2 & 3;

1423 if (idx) {

1424 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],

1425 wmavoice_ipol2_coeffs, 4,

1426 idx, 8, size);

1427 } else

1428 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,

1429 sizeof(float) * size);

1430 }

1431

1432 /* Interpolate ACB/FCB and use as excitation signal */

1433 ff_weighted_vector_sumf(excitation, excitation, pulses,

1434 acb_gain, fcb_gain, size);

1435 }

1436

1437 /**

1438 * Parse data in a single block.

1439 *

1440 * @param s WMA Voice decoding context private data

1441 * @param gb bit I/O context

1442 * @param block_idx index of the to-be-read block

1443 * @param size amount of samples to be read in this block

1444 * @param block_pitch_sh2 pitch for this block << 2

1445 * @param lsps LSPs for (the end of) this frame

1446 * @param prev_lsps LSPs for the last frame

1447 * @param frame_desc frame type descriptor

1448 * @param excitation target memory for the ACB+FCB interpolated signal

1449 * @param synth target memory for the speech synthesis filter output

1450 * @return 0 on success, <0 on error.

1451 */

1452 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,

1453 int block_idx, int size,

1454 int block_pitch_sh2,

1455 const double *lsps, const double *prev_lsps,

1456 const struct frame_type_desc *frame_desc,

1457 float *excitation, float *synth)

1458 {

1459 double i_lsps[MAX_LSPS];

1460 float lpcs[MAX_LSPS];

1461 float fac;

1462 int n;

1463

1464 if (frame_desc->acb_type == ACB_TYPE_NONE)

1465 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);

1466 else

1467 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,

1468 frame_desc, excitation);

1469

1470 /* convert interpolated LSPs to LPCs */

1471 fac = (block_idx + 0.5) / frame_desc->n_blocks;

1472 for (n = 0; n < s->lsps; n++) // LSF -> LSP

1473 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));

1474 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);

1475

1476 /* Speech synthesis */

1477 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);

1478 }

1479

1480 /**

1481 * Synthesize output samples for a single frame.

1482 *

1483 * @param ctx WMA Voice decoder context

1484 * @param gb bit I/O context (s->gb or one for cross-packet superframes)

1485 * @param frame_idx Frame number within superframe [0-2]

1486 * @param samples pointer to output sample buffer, has space for at least 160

1487 * samples

1488 * @param lsps LSP array

1489 * @param prev_lsps array of previous frame's LSPs

1490 * @param excitation target buffer for excitation signal

1491 * @param synth target buffer for synthesized speech data

1492 * @return 0 on success, <0 on error.

1493 */

1494 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,

1495 float *samples,

1496 const double *lsps, const double *prev_lsps,

1497 float *excitation, float *synth)

1498 {

1499 WMAVoiceContext *s = ctx->priv_data;

1500 int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);

1501 int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);

1502

1503 /* Parse frame type ("frame header"), see frame_descs */

1504 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc, 6, 3)], block_nsamples;

1505

1506 pitch[0] = INT_MAX;

1507

1508 if (bd_idx < 0) {

1509 av_log(ctx, AV_LOG_ERROR,

1510 "Invalid frame type VLC code, skipping\n");

1511 return AVERROR_INVALIDDATA;

1512 }

1513

1514 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;

1515

1516 /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */

1517 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {

1518 /* Pitch is provided per frame, which is interpreted as the pitch of

1519 * the last sample of the last block of this frame. We can interpolate

1520 * the pitch of other blocks (and even pitch-per-sample) by gradually

1521 * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */

1522 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;

1523 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;

1524 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);

1525 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);

1526 if (s->last_acb_type == ACB_TYPE_NONE ||

1527 20 * abs(cur_pitch_val - s->last_pitch_val) >

1528 (cur_pitch_val + s->last_pitch_val))

1529 s->last_pitch_val = cur_pitch_val;

1530

1531 /* pitch per block */

1532 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {

1533 int fac = n * 2 + 1;

1534

1535 pitch[n] = (MUL16(fac, cur_pitch_val) +

1536 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +

1537 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;

1538 }

1539

1540 /* "pitch-diff-per-sample" for calculation of pitch per sample */

1541 s->pitch_diff_sh16 =

1542 (cur_pitch_val - s->last_pitch_val) * (1 << 16) / MAX_FRAMESIZE;

1543 }

1544

1545 /* Global gain (if silence) and pitch-adaptive window coordinates */

1546 switch (frame_descs[bd_idx].fcb_type) {

1547 case FCB_TYPE_SILENCE:

1548 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];

1549 break;

1550 case FCB_TYPE_AW_PULSES:

1551 aw_parse_coords(s, gb, pitch);

1552 break;

1553 }

1554

1555 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {

1556 int bl_pitch_sh2;

1557

1558 /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */

1559 switch (frame_descs[bd_idx].acb_type) {

1560 case ACB_TYPE_HAMMING: {

1561 /* Pitch is given per block. Per-block pitches are encoded as an

1562 * absolute value for the first block, and then delta values

1563 * relative to this value) for all subsequent blocks. The scale of

1564 * this pitch value is semi-logarithmic compared to its use in the

1565 * decoder, so we convert it to normal scale also. */

1566 int block_pitch,

1567 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,

1568 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,

1569 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;

1570

1571 if (n == 0) {

1572 block_pitch = get_bits(gb, s->block_pitch_nbits);

1573 } else

1574 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +

1575 get_bits(gb, s->block_delta_pitch_nbits);

1576 /* Convert last_ so that any next delta is within _range */

1577 last_block_pitch = av_clip(block_pitch,

1578 s->block_delta_pitch_hrange,

1579 s->block_pitch_range -

1580 s->block_delta_pitch_hrange);

1581

1582 /* Convert semi-log-style scale back to normal scale */

1583 if (block_pitch < t1) {

1584 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;

1585 } else {

1586 block_pitch -= t1;

1587 if (block_pitch < t2) {

1588 bl_pitch_sh2 =

1589 (s->block_conv_table[1] << 2) + (block_pitch << 1);

1590 } else {

1591 block_pitch -= t2;

1592 if (block_pitch < t3) {

1593 bl_pitch_sh2 =

1594 (s->block_conv_table[2] + block_pitch) << 2;

1595 } else

1596 bl_pitch_sh2 = s->block_conv_table[3] << 2;

1597 }

1598 }

1599 pitch[n] = bl_pitch_sh2 >> 2;

1600 break;

1601 }

1602

1603 case ACB_TYPE_ASYMMETRIC: {

1604 bl_pitch_sh2 = pitch[n] << 2;

1605 break;

1606 }

1607

1608 default: // ACB_TYPE_NONE has no pitch

1609 bl_pitch_sh2 = 0;

1610 break;

1611 }

1612

1613 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,

1614 lsps, prev_lsps, &frame_descs[bd_idx],

1615 &excitation[n * block_nsamples],

1616 &synth[n * block_nsamples]);

1617 }

1618

1619 /* Averaging projection filter, if applicable. Else, just copy samples

1620 * from synthesis buffer */

1621 if (s->do_apf) {

1622 double i_lsps[MAX_LSPS];

1623 float lpcs[MAX_LSPS];

1624

1625 if(frame_descs[bd_idx].fcb_type >= FCB_TYPE_AW_PULSES && pitch[0] == INT_MAX)

1626 return AVERROR_INVALIDDATA;

1627

1628 for (n = 0; n < s->lsps; n++) // LSF -> LSP

1629 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));

1630 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);

1631 postfilter(s, synth, samples, 80, lpcs,

1632 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],

1633 frame_descs[bd_idx].fcb_type, pitch[0]);

1634

1635 for (n = 0; n < s->lsps; n++) // LSF -> LSP

1636 i_lsps[n] = cos(lsps[n]);

1637 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);

1638 postfilter(s, &synth[80], &samples[80], 80, lpcs,

1639 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],

1640 frame_descs[bd_idx].fcb_type, pitch[0]);

1641 } else

1642 memcpy(samples, synth, 160 * sizeof(synth[0]));

1643

1644 /* Cache values for next frame */

1645 s->frame_cntr++;

1646 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)

1647 s->last_acb_type = frame_descs[bd_idx].acb_type;

1648 switch (frame_descs[bd_idx].acb_type) {

1649 case ACB_TYPE_NONE:

1650 s->last_pitch_val = 0;

1651 break;

1652 case ACB_TYPE_ASYMMETRIC:

1653 s->last_pitch_val = cur_pitch_val;

1654 break;

1655 case ACB_TYPE_HAMMING:

1656 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];

1657 break;

1658 }

1659

1660 return 0;

1661 }

1662

1663 /**

1664 * Ensure minimum value for first item, maximum value for last value,

1665 * proper spacing between each value and proper ordering.

1666 *

1667 * @param lsps array of LSPs

1668 * @param num size of LSP array

1669 *

1670 * @note basically a double version of #ff_acelp_reorder_lsf(), might be

1671 * useful to put in a generic location later on. Parts are also

1672 * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),

1673 * which is in float.

1674 */

1675 static void stabilize_lsps(double *lsps, int num)

1676 {

1677 int n, m, l;

1678

1679 /* set minimum value for first, maximum value for last and minimum

1680 * spacing between LSF values.

1681 * Very similar to ff_set_min_dist_lsf(), but in double. */

1682 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);

1683 for (n = 1; n < num; n++)

1684 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);

1685 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);

1686

1687 /* reorder (looks like one-time / non-recursed bubblesort).

1688 * Very similar to ff_sort_nearly_sorted_floats(), but in double. */

1689 for (n = 1; n < num; n++) {

1690 if (lsps[n] < lsps[n - 1]) {

1691 for (m = 1; m < num; m++) {

1692 double tmp = lsps[m];

1693 for (l = m - 1; l >= 0; l--) {

1694 if (lsps[l] <= tmp) break;

1695 lsps[l + 1] = lsps[l];

1696 }

1697 lsps[l + 1] = tmp;

1698 }

1699 break;

1700 }

1701 }

1702 }

1703

1704 /**

1705 * Synthesize output samples for a single superframe. If we have any data

1706 * cached in s->sframe_cache, that will be used instead of whatever is loaded

1707 * in s->gb.

1708 *

1709 * WMA Voice superframes contain 3 frames, each containing 160 audio samples,

1710 * to give a total of 480 samples per frame. See #synth_frame() for frame

1711 * parsing. In addition to 3 frames, superframes can also contain the LSPs

1712 * (if these are globally specified for all frames (residually); they can

1713 * also be specified individually per-frame. See the s->has_residual_lsps

1714 * option), and can specify the number of samples encoded in this superframe

1715 * (if less than 480), usually used to prevent blanks at track boundaries.

1716 *

1717 * @param ctx WMA Voice decoder context

1718 * @return 0 on success, <0 on error or 1 if there was not enough data to

1719 * fully parse the superframe

1720 */

1721 static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,

1722 int *got_frame_ptr)

1723 {

1724 WMAVoiceContext *s = ctx->priv_data;

1725 GetBitContext *gb = &s->gb, s_gb;

1726 int n, res, n_samples = MAX_SFRAMESIZE;

1727 double lsps[MAX_FRAMES][MAX_LSPS];

1728 const double *mean_lsf = s->lsps == 16 ?

1729 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];

1730 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];

1731 float synth[MAX_LSPS + MAX_SFRAMESIZE];

1732 float *samples;

1733

1734 memcpy(synth, s->synth_history,

1735 s->lsps * sizeof(*synth));

1736 memcpy(excitation, s->excitation_history,

1737 s->history_nsamples * sizeof(*excitation));

1738

1739 if (s->sframe_cache_size > 0) {

1740 gb = &s_gb;

1741 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);

1742 s->sframe_cache_size = 0;

1743 }

1744

1745 /* First bit is speech/music bit, it differentiates between WMAVoice

1746 * speech samples (the actual codec) and WMAVoice music samples, which

1747 * are really WMAPro-in-WMAVoice-superframes. I've never seen those in

1748 * the wild yet. */

1749 if (!get_bits1(gb)) {

1750 avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");

1751 return AVERROR_PATCHWELCOME;

1752 }

1753

1754 /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */

1755 if (get_bits1(gb)) {

1756 if ((n_samples = get_bits(gb, 12)) > MAX_SFRAMESIZE) {

1757 av_log(ctx, AV_LOG_ERROR,

1758 "Superframe encodes > %d samples (%d), not allowed\n",

1759 MAX_SFRAMESIZE, n_samples);

1760 return AVERROR_INVALIDDATA;

1761 }

1762 }

1763

1764 /* Parse LSPs, if global for the superframe (can also be per-frame). */

1765 if (s->has_residual_lsps) {

1766 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];

1767

1768 for (n = 0; n < s->lsps; n++)

1769 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];

1770

1771 if (s->lsps == 10) {

1772 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);

1773 } else /* s->lsps == 16 */

1774 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);

1775

1776 for (n = 0; n < s->lsps; n++) {

1777 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);

1778 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);

1779 lsps[2][n] += mean_lsf[n];

1780 }

1781 for (n = 0; n < 3; n++)

1782 stabilize_lsps(lsps[n], s->lsps);

1783 }

1784

1785 /* synth_superframe can run multiple times per packet

1786 * free potential previous frame */

1787 av_frame_unref(frame);

1788

1789 /* get output buffer */

1790 frame->nb_samples = MAX_SFRAMESIZE;

1791 if ((res = ff_get_buffer(ctx, frame, 0)) < 0)

1792 return res;

1793 frame->nb_samples = n_samples;

1794 samples = (float *)frame->data[0];

1795

1796 /* Parse frames, optionally preceded by per-frame (independent) LSPs. */

1797 for (n = 0; n < 3; n++) {

1798 if (!s->has_residual_lsps) {

1799 int m;

1800

1801 if (s->lsps == 10) {

1802 dequant_lsp10i(gb, lsps[n]);

1803 } else /* s->lsps == 16 */

1804 dequant_lsp16i(gb, lsps[n]);

1805

1806 for (m = 0; m < s->lsps; m++)

1807 lsps[n][m] += mean_lsf[m];

1808 stabilize_lsps(lsps[n], s->lsps);

1809 }

1810

1811 if ((res = synth_frame(ctx, gb, n,

1812 &samples[n * MAX_FRAMESIZE],

1813 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],

1814 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],

1815 &synth[s->lsps + n * MAX_FRAMESIZE]))) {

1816 *got_frame_ptr = 0;

1817 return res;

1818 }

1819 }

1820

1821 /* Statistics? FIXME - we don't check for length, a slight overrun

1822 * will be caught by internal buffer padding, and anything else

1823 * will be skipped, not read. */

1824 if (get_bits1(gb)) {

1825 res = get_bits(gb, 4);

1826 skip_bits(gb, 10 * (res + 1));

1827 }

1828

1829 if (get_bits_left(gb) < 0) {

1830 wmavoice_flush(ctx);

1831 return AVERROR_INVALIDDATA;

1832 }

1833

1834 *got_frame_ptr = 1;

1835

1836 /* Update history */

1837 memcpy(s->prev_lsps, lsps[2],

1838 s->lsps * sizeof(*s->prev_lsps));

1839 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],

1840 s->lsps * sizeof(*synth));

1841 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],

1842 s->history_nsamples * sizeof(*excitation));

1843 if (s->do_apf)

1844 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],

1845 s->history_nsamples * sizeof(*s->zero_exc_pf));

1846

1847 return 0;

1848 }

1849

1850 /**

1851 * Parse the packet header at the start of each packet (input data to this

1852 * decoder).

1853 *

1854 * @param s WMA Voice decoding context private data

1855 * @return <0 on error, nb_superframes on success.

1856 */

1857 static int parse_packet_header(WMAVoiceContext *s)

1858 {

1859 GetBitContext *gb = &s->gb;

1860 unsigned int res, n_superframes = 0;

1861

1862 skip_bits(gb, 4); // packet sequence number

1863 s->has_residual_lsps = get_bits1(gb);

1864 do {

1865 if (get_bits_left(gb) < 6 + s->spillover_bitsize)

1866 return AVERROR_INVALIDDATA;

1867

1868 res = get_bits(gb, 6); // number of superframes per packet

1869 // (minus first one if there is spillover)

1870 n_superframes += res;

1871 } while (res == 0x3F);

1872 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);

1873

1874 return get_bits_left(gb) >= 0 ? n_superframes : AVERROR_INVALIDDATA;

1875 }

1876

1877 /**

1878 * Copy (unaligned) bits from gb/data/size to pb.

1879 *

1880 * @param pb target buffer to copy bits into

1881 * @param data source buffer to copy bits from

1882 * @param size size of the source data, in bytes

1883 * @param gb bit I/O context specifying the current position in the source.

1884 * data. This function might use this to align the bit position to

1885 * a whole-byte boundary before calling #ff_copy_bits() on aligned

1886 * source data

1887 * @param nbits the amount of bits to copy from source to target

1888 *

1889 * @note after calling this function, the current position in the input bit

1890 * I/O context is undefined.

1891 */

1892 static void copy_bits(PutBitContext *pb,

1893 const uint8_t *data, int size,

1894 GetBitContext *gb, int nbits)

1895 {

1896 int rmn_bytes, rmn_bits;

1897

1898 rmn_bits = rmn_bytes = get_bits_left(gb);

1899 if (rmn_bits < nbits)

1900 return;

1901 if (nbits > put_bits_left(pb))

1902 return;

1903 rmn_bits &= 7; rmn_bytes >>= 3;

1904 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)

1905 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));

1906 ff_copy_bits(pb, data + size - rmn_bytes,

1907 FFMIN(nbits - rmn_bits, rmn_bytes << 3));

1908 }

1909

1910 /**

1911 * Packet decoding: a packet is anything that the (ASF) demuxer contains,

1912 * and we expect that the demuxer / application provides it to us as such

1913 * (else you'll probably get garbage as output). Every packet has a size of

1914 * ctx->block_align bytes, starts with a packet header (see

1915 * #parse_packet_header()), and then a series of superframes. Superframe

1916 * boundaries may exceed packets, i.e. superframes can split data over

1917 * multiple (two) packets.

1918 *

1919 * For more information about frames, see #synth_superframe().

1920 */

1921 static int wmavoice_decode_packet(AVCodecContext *ctx, AVFrame *frame,

1922 int *got_frame_ptr, AVPacket *avpkt)

1923 {

1924 WMAVoiceContext *s = ctx->priv_data;

1925 GetBitContext *gb = &s->gb;

1926 const uint8_t *buf = avpkt->data;

1927 uint8_t dummy[1];

1928 int size, res, pos;

1929

1930 /* Packets are sometimes a multiple of ctx->block_align, with a packet

1931 * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer

1932 * feeds us ASF packets, which may concatenate multiple "codec" packets

1933 * in a single "muxer" packet, so we artificially emulate that by

1934 * capping the packet size at ctx->block_align. */

1935 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);

1936 buf = size ? buf : dummy;

1937 res = init_get_bits8(&s->gb, buf, size);

1938 if (res < 0)

1939 return res;

1940

1941 /* size == ctx->block_align is used to indicate whether we are dealing with

1942 * a new packet or a packet of which we already read the packet header

1943 * previously. */

1944 if (!(size % ctx->block_align)) { // new packet header

1945 if (!size) {

1946 s->spillover_nbits = 0;

1947 s->nb_superframes = 0;

1948 } else {

1949 if ((res = parse_packet_header(s)) < 0)

1950 return res;

1951 s->nb_superframes = res;

1952 }

1953

1954 /* If the packet header specifies a s->spillover_nbits, then we want

1955 * to push out all data of the previous packet (+ spillover) before

1956 * continuing to parse new superframes in the current packet. */

1957 if (s->sframe_cache_size > 0) {

1958 int cnt = get_bits_count(gb);

1959 if (cnt + s->spillover_nbits > avpkt->size * 8) {

1960 s->spillover_nbits = avpkt->size * 8 - cnt;

1961 }

1962 copy_bits(&s->pb, buf, size, gb, s->spillover_nbits);

1963 flush_put_bits(&s->pb);

1964 s->sframe_cache_size += s->spillover_nbits;

1965 if ((res = synth_superframe(ctx, frame, got_frame_ptr)) == 0 &&

1966 *got_frame_ptr) {

1967 cnt += s->spillover_nbits;

1968 s->skip_bits_next = cnt & 7;

1969 res = cnt >> 3;

1970 return res;

1971 } else

1972 skip_bits_long (gb, s->spillover_nbits - cnt +

1973 get_bits_count(gb)); // resync

1974 } else if (s->spillover_nbits) {

1975 skip_bits_long(gb, s->spillover_nbits); // resync

1976 }

1977 } else if (s->skip_bits_next)

1978 skip_bits(gb, s->skip_bits_next);

1979

1980 /* Try parsing superframes in current packet */

1981 s->sframe_cache_size = 0;

1982 s->skip_bits_next = 0;

1983 pos = get_bits_left(gb);

1984 if (s->nb_superframes-- == 0) {

1985 *got_frame_ptr = 0;

1986 return size;

1987 } else if (s->nb_superframes > 0) {

1988 if ((res = synth_superframe(ctx, frame, got_frame_ptr)) < 0) {

1989 return res;

1990 } else if (*got_frame_ptr) {

1991 int cnt = get_bits_count(gb);

1992 s->skip_bits_next = cnt & 7;

1993 res = cnt >> 3;

1994 return res;

1995 }

1996 } else if ((s->sframe_cache_size = pos) > 0) {

1997 /* ... cache it for spillover in next packet */

1998 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);

1999 copy_bits(&s->pb, buf, size, gb, s->sframe_cache_size);

2000 // FIXME bad - just copy bytes as whole and add use the

2001 // skip_bits_next field

2002 }

2003

2004 return size;

2005 }

2006

2007 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)

2008 {

2009 WMAVoiceContext *s = ctx->priv_data;

2010

2011 if (s->do_apf) {

2012 av_tx_uninit(&s->rdft);

2013 av_tx_uninit(&s->irdft);

2014 av_tx_uninit(&s->dct);

2015 av_tx_uninit(&s->dst);

2016 }

2017

2018 return 0;

2019 }

2020

2021 const FFCodec ff_wmavoice_decoder = {

2022 .p.name = "wmavoice",

2023 CODEC_LONG_NAME("Windows Media Audio Voice"),

2024 .p.type = AVMEDIA_TYPE_AUDIO,

2025 .p.id = AV_CODEC_ID_WMAVOICE,

2026 .priv_data_size = sizeof(WMAVoiceContext),

2027 .init = wmavoice_decode_init,

2028 .close = wmavoice_decode_end,

2029 FF_CODEC_DECODE_CB(wmavoice_decode_packet),

2030 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,

2031 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,

2032 .flush = wmavoice_flush,

2033 };

flags

const SwsFlags flags[]

Definition: swscale.c:61

WMAVoiceContext::has_residual_lsps

int has_residual_lsps

if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...

Definition: wmavoice.c:193

skip_bits_long

static void skip_bits_long(GetBitContext *s, int n)

Skips the specified number of bits.

Definition: get_bits.h:276

AMRFixed::x

int x[10]

Definition: acelp_vectors.h:55

wmavoice_std_codebook

static const float wmavoice_std_codebook[1000]

Definition: wmavoice_data.h:2585

interpol

static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)

Definition: vsrc_mandelbrot.c:186

ACB_TYPE_NONE

@ ACB_TYPE_NONE

no adaptive codebook (only hardcoded fixed)

Definition: wmavoice.c:70

MAX_LSPS

#define MAX_LSPS

maximum filter order

Definition: wmavoice.c:49

WMAVoiceContext::aw_next_pulse_off_cache

int aw_next_pulse_off_cache

the position (relative to start of the second block) at which pulses should start to be positioned,...

Definition: wmavoice.c:242

WMAVoiceContext::max_pitch_val

int max_pitch_val

max value + 1 for pitch parsing

Definition: wmavoice.c:165

av_clip

#define av_clip

Definition: common.h:100

aw_pulse_set2

static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)

Apply second set of pitch-adaptive window pulses.

Definition: wmavoice.c:1100

FF_CODEC_CAP_INIT_CLEANUP

#define FF_CODEC_CAP_INIT_CLEANUP

The codec allows calling the close function for deallocation even if the init function returned a fai...

Definition: codec_internal.h:42

acelp_vectors.h

get_bits_left

static int get_bits_left(GetBitContext *gb)

Definition: get_bits.h:689

AVERROR

Filter the word "frame" indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions

wmavoice_dq_lsp10i

static const uint8_t wmavoice_dq_lsp10i[0xf00]

Definition: wmavoice_data.h:33

mem_internal.h

WMAVoiceContext::tilted_lpcs_pf

float tilted_lpcs_pf[0x82]

aligned buffer for LPC tilting

Definition: wmavoice.c:279

out

FILE * out

Definition: movenc.c:55

thread.h

frame_descs

static const struct frame_type_desc frame_descs[17]

AVTXContext

Definition: tx_priv.h:235

wmavoice_dq_lsp16r3

static const uint8_t wmavoice_dq_lsp16r3[0x600]

Definition: wmavoice_data.h:1526

dequant_lsps

static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)

Dequantize LSPs.

Definition: wmavoice.c:872

init_put_bits

static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)

Initialize the PutBitContext s.

Definition: put_bits.h:62

WMAVoiceContext::excitation_history

float excitation_history[MAX_SIGNAL_HISTORY]

cache of the signal of

Definition: wmavoice.c:252

get_bits_count

static int get_bits_count(const GetBitContext *s)

Definition: get_bits.h:250

av_log2_16bit

int av_log2_16bit(unsigned v)

Definition: intmath.c:31

AVFrame

This structure describes decoded (raw) audio or video data.

Definition: frame.h:427

put_bits

static void put_bits(Jpeg2000EncoderContext *s, int val, int n)

put n times val bit

Definition: j2kenc.c:224

aw_pulse_set1

static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)

Apply first set of pitch-adaptive window pulses.

Definition: wmavoice.c:1190

ff_acelp_apply_order_2_transfer_function

void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)

Apply an order 2 rational transfer function in-place.

Definition: acelp_filters.c:121

#define u(width, name, range_min, range_max)

Definition: cbs_apv.c:68

AVPacket::data

uint8_t * data

Definition: packet.h:558

pRNG

static int pRNG(int frame_cntr, int block_num, int block_size)

Generate a random number from frame_cntr and block_idx, which will live in the range [0,...

Definition: wmavoice.c:1251

ff_wmavoice_decoder

const FFCodec ff_wmavoice_decoder

Definition: wmavoice.c:2021

table

static const uint16_t table[]

Definition: prosumer.c:203

data

const char data[16]

Definition: mxf.c:149

WMAVoiceContext::silence_gain

float silence_gain

set for use in blocks if ACB_TYPE_NONE

Definition: wmavoice.c:227

expf

#define expf(x)

Definition: libm.h:285

WMAVoiceContext::denoise_filter_cache_size

int denoise_filter_cache_size

samples in denoise_filter_cache

Definition: wmavoice.c:277

wmavoice_denoise_power_table

static const float wmavoice_denoise_power_table[12][64]

LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).

Definition: wmavoice_data.h:3064

wmavoice_gain_codebook_acb

static const float wmavoice_gain_codebook_acb[128]

Definition: wmavoice_data.h:2874

FFCodec

Definition: codec_internal.h:127

base

uint8_t base

Definition: vp3data.h:128

AVComplexFloat

Definition: tx.h:27

max

#define max(a, b)

Definition: cuda_runtime.h:33

FFMAX

#define FFMAX(a, b)

Definition: macros.h:47

ff_celp_lp_synthesis_filterf

void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)

LP synthesis filter.

Definition: celp_filters.c:86

WMAVoiceContext::aw_idx_is_ext

int aw_idx_is_ext

whether the AW index was encoded in 8 bits (instead of 6)

Definition: wmavoice.c:229

init_get_bits

static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)

Initialize GetBitContext.

Definition: get_bits.h:512

av_tx_init

av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)

Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...

Definition: tx.c:903

WMAVoiceContext::dc_level

int dc_level

Predicted amount of DC noise, based on which a DC removal filter is used.

Definition: wmavoice.c:156

wmavoice_dq_lsp16i1

static const uint8_t wmavoice_dq_lsp16i1[0x640]

Definition: wmavoice_data.h:420

WMAVoiceContext::block_conv_table

uint16_t block_conv_table[4]

boundaries for block pitch unit/scale conversion

Definition: wmavoice.c:177

frame_type_desc::log_n_blocks

uint8_t log_n_blocks

log2(n_blocks)

Definition: wmavoice.c:103

skip_bits

static void skip_bits(GetBitContext *s, int n)

Definition: get_bits.h:379

WMAVoiceContext::aw_pulse_range

int aw_pulse_range

the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...

Definition: wmavoice.c:231

static av_cold void close(AVCodecParserContext *s)

Definition: apv_parser.c:135

get_bits

static unsigned int get_bits(GetBitContext *s, int n)

Read 1-25 bits.

Definition: get_bits.h:333

ff_copy_bits

void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)

Copy the content of src to the bitstream.

Definition: bitstream.c:49

ACB_TYPE_ASYMMETRIC

@ ACB_TYPE_ASYMMETRIC

adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.

Definition: wmavoice.c:71

FFCodec::p

AVCodec p

The public AVCodec.

Definition: codec_internal.h:131

av_ceil_log2

#define av_ceil_log2

Definition: common.h:97

AMRFixed::pitch_fac

float pitch_fac

Definition: acelp_vectors.h:59

dummy

int dummy

Definition: motion.c:66

GetBitContext

Definition: get_bits.h:109

MULH

#define MULH

Definition: mathops.h:42

wmavoice_flush

static av_cold void wmavoice_flush(AVCodecContext *ctx)

Definition: wmavoice.c:326

put_bits_left

static int put_bits_left(PutBitContext *s)

Definition: put_bits.h:135

frame_type_desc::n_blocks

uint8_t n_blocks

amount of blocks per frame (each block (contains 160/n_blocks samples)

Definition: wmavoice.c:101

val

static double val(void *priv, double ch)

Definition: aeval.c:77

WMAVoiceContext::irdft_fn

av_tx_fn irdft_fn

postfilter (for denoise filter)

Definition: wmavoice.c:266

dequant_lsp10i

static void dequant_lsp10i(GetBitContext *gb, double *lsps)

Parse 10 independently-coded LSPs.

Definition: wmavoice.c:903

synth_block

static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)

Parse data in a single block.

Definition: wmavoice.c:1452

MAX_SFRAMESIZE

#define MAX_SFRAMESIZE

maximum number of samples per superframe

Definition: wmavoice.c:55

wmavoice_gain_codebook_fcb

static const float wmavoice_gain_codebook_fcb[128]

Definition: wmavoice_data.h:2893

static double a2(void *priv, double x, double y)

Definition: vf_xfade.c:2030

WMAVoiceContext::denoise_filter_cache

float denoise_filter_cache[MAX_FRAMESIZE]

Definition: wmavoice.c:276

fabsf

static __device__ float fabsf(float a)

Definition: cuda_runtime.h:181

WMAVoiceContext::sin

float sin[511]

Definition: wmavoice.c:269

calc_input_response

static void calc_input_response(WMAVoiceContext *s, float *lpcs_src, int fcb_type, float *coeffs_dst, int remainder)

Derive denoise filter coefficients (in real domain) from the LPCs.

Definition: wmavoice.c:610

AV_CODEC_ID_WMAVOICE

@ AV_CODEC_ID_WMAVOICE

Definition: codec_id.h:495

lrint

#define lrint

Definition: tablegen.h:53

MUL16

#define MUL16(ra, rb)

Definition: mathops.h:88

ff_thread_once

static int ff_thread_once(char *control, void(*routine)(void))

Definition: thread.h:205

AV_LOG_ERROR

#define AV_LOG_ERROR

Something went wrong and cannot losslessly be recovered.

Definition: log.h:210

FF_ARRAY_ELEMS

#define FF_ARRAY_ELEMS(a)

Definition: sinewin_tablegen.c:29

av_cold

#define av_cold

Definition: attributes.h:106

init_get_bits8

static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)

Initialize GetBitContext.

Definition: get_bits.h:539

MAX_LSPS_ALIGN16

#define MAX_LSPS_ALIGN16

same as MAX_LSPS; needs to be multiple

Definition: wmavoice.c:50

av_tx_fn

void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)

Function pointer to a function to perform the transform.

Definition: tx.h:151

av_memcpy_backptr

void av_memcpy_backptr(uint8_t *dst, int back, int cnt)

Overlapping memcpy() implementation.

Definition: mem.c:447

float

Definition: af_crystalizer.c:122

wmavoice_dq_lsp10r

static const uint8_t wmavoice_dq_lsp10r[0x1400]

Definition: wmavoice_data.h:749

FF_CODEC_DECODE_CB

#define FF_CODEC_DECODE_CB(func)

Definition: codec_internal.h:346

WMAVoiceContext::sframe_cache_size

int sframe_cache_size

set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...

Definition: wmavoice.c:205

WMAVoiceContext::dst

AVTXContext * dst

contexts for phase shift (in Hilbert

Definition: wmavoice.c:267

#define s(width, name)

Definition: cbs_vp9.c:198

WMAVoiceContext::lsp_q_mode

int lsp_q_mode

defines quantizer defaults [0, 1]

Definition: wmavoice.c:160

frame_type_desc::fcb_type

uint8_t fcb_type

Fixed codebook type (FCB_TYPE_*)

Definition: wmavoice.c:105

log_range

#define log_range(var, assign)

WMAVoiceContext::prev_lsps

double prev_lsps[MAX_LSPS]

LSPs of the last frame of the previous superframe.

Definition: wmavoice.c:221

AVMEDIA_TYPE_AUDIO

@ AVMEDIA_TYPE_AUDIO

Definition: avutil.h:201

WMAVoiceContext::aw_n_pulses

int aw_n_pulses[2]

number of AW-pulses in each block; note that this number can be negative (in which case it basically ...

Definition: wmavoice.c:237

AMRFixed

Sparse representation for the algebraic codebook (fixed) vector.

Definition: acelp_vectors.h:53

bits

uint8_t bits

Definition: vp3data.h:128

adaptive_gain_control

static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)

Adaptive gain control (as used in postfilter).

Definition: wmavoice.c:510

av_assert0

#define av_assert0(cond)

assert() equivalent, that is always enabled.

Definition: avassert.h:41

wmavoice_lsp16_intercoeff_a

static const float wmavoice_lsp16_intercoeff_a[32][2][16]

Definition: wmavoice_data.h:2047

FCB_TYPE_SILENCE

@ FCB_TYPE_SILENCE

comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...

Definition: wmavoice.c:85

ctx

AVFormatContext * ctx

Definition: movenc.c:49

decode.h

get_bits.h

wmavoice_mean_lsf10

static const double wmavoice_mean_lsf10[2][10]

Definition: wmavoice_data.h:2565

WMAVoiceContext::spillover_nbits

int spillover_nbits

number of bits of the previous packet's last superframe preceding this packet's first full superframe...

Definition: wmavoice.c:189

UMULH

static av_always_inline unsigned UMULH(unsigned a, unsigned b)

Definition: mathops.h:68

AMRFixed::y

float y[10]

Definition: acelp_vectors.h:56

WMAVoiceContext::denoise_coeffs_pf

float denoise_coeffs_pf[0x82]

aligned buffer for denoise coefficients

Definition: wmavoice.c:281

wmavoice_gain_silence

static const float wmavoice_gain_silence[256]

Definition: wmavoice_data.h:2788

PutBitContext

Definition: put_bits.h:50

WMAVoiceContext::vbm_tree

int8_t vbm_tree[25]

converts VLC codes to frame type

Definition: wmavoice.c:141

CODEC_LONG_NAME

#define CODEC_LONG_NAME(str)

Definition: codec_internal.h:331

tmp

static uint8_t tmp[40]

Definition: aes_ctr.c:52

WMAVoiceContext::dct_fn

av_tx_fn dct_fn

Definition: wmavoice.c:268

wmavoice_dq_lsp16i3

static const uint8_t wmavoice_dq_lsp16i3[0x300]

Definition: wmavoice_data.h:682

if(ret)

Definition: filter_design.txt:179

AMRFixed::no_repeat_mask

int no_repeat_mask

Definition: acelp_vectors.h:57

postfilter

static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)

Averaging projection filter, the postfilter used in WMAVoice.

Definition: wmavoice.c:818

AV_ONCE_INIT

#define AV_ONCE_INIT

Definition: thread.h:203

NULL

#define NULL

Definition: coverity.c:32

sizes

static const int sizes[][2]

Definition: img2dec.c:61

WMAVoiceContext::history_nsamples

int history_nsamples

number of samples in history for signal prediction (through ACB)

Definition: wmavoice.c:146

WMAVoiceContext::synth_history

float synth_history[MAX_LSPS]

see excitation_history

Definition: wmavoice.c:255

LOCAL_ALIGNED_32

#define LOCAL_ALIGNED_32(t, v,...)

Definition: mem_internal.h:132

AVERROR_PATCHWELCOME

#define AVERROR_PATCHWELCOME

Not yet implemented in FFmpeg, patches welcome.

Definition: error.h:64

last_coeff

static const uint8_t last_coeff[3]

Definition: qdm2data.h:187

WMAVoiceContext::denoise_strength

int denoise_strength

strength of denoising in Wiener filter [0-11]

Definition: wmavoice.c:152

MAX_SIGNAL_HISTORY

#define MAX_SIGNAL_HISTORY

maximum excitation signal history

Definition: wmavoice.c:54

WMAVoiceContext::sframe_cache

uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]

cache for superframe data split over multiple packets

Definition: wmavoice.c:202

get_bits1

static unsigned int get_bits1(GetBitContext *s)

Definition: get_bits.h:386

dequant_lsp10r

static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)

Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...

Definition: wmavoice.c:929

WMAVoiceContext::pitch_nbits

int pitch_nbits

number of bits used to specify the pitch value in the frame header

Definition: wmavoice.c:166

WMAVoiceContext::block_delta_pitch_nbits

int block_delta_pitch_nbits

number of bits used to specify the delta pitch between this and the last block's pitch value,...

Definition: wmavoice.c:171

kalman_smoothen

static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)

Kalman smoothing function.

Definition: wmavoice.c:551

WMAVoiceContext::skip_bits_next

int skip_bits_next

number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...

Definition: wmavoice.c:198

sqrtf

static __device__ float sqrtf(float a)

Definition: cuda_runtime.h:184

abs

#define abs(x)

Definition: cuda_runtime.h:35

WMAVoiceContext::dst_fn

av_tx_fn dst_fn

transform, part of postfilter)

Definition: wmavoice.c:268

WMAVoiceContext::rdft

AVTXContext * rdft

Definition: wmavoice.c:265

celp_filters.h

MAX_FRAMESIZE

#define MAX_FRAMESIZE

maximum number of samples per frame

Definition: wmavoice.c:53

av_clipf

Definition: af_crystalizer.c:122

MAX_FRAMES

#define MAX_FRAMES

maximum number of frames per superframe

Definition: wmavoice.c:52

get_vlc2

static av_always_inline int get_vlc2(GetBitContext *s, const VLCElem *table, int bits, int max_depth)

Parse a vlc code.

Definition: get_bits.h:646

decode_vbmtree

static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])

Set up the variable bit mode (VBM) tree from container extradata.

Definition: wmavoice.c:298

AVOnce

#define AVOnce

Definition: thread.h:202

aw_parse_coords

static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)

Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...

Definition: wmavoice.c:1048

wmavoice_init_static_data

static av_cold void wmavoice_init_static_data(void)

Definition: wmavoice.c:312

float_dsp.h

ff_scalarproduct_float_c

float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)

Return the scalar product of two vectors of floats.

Definition: float_scalarproduct.c:24

ACB_TYPE_HAMMING

@ ACB_TYPE_HAMMING

Per-block pitch with signal generation using a Hamming sinc window function.

Definition: wmavoice.c:76

WMAVoiceContext::dcf_mem

float dcf_mem[2]

DC filter history.

Definition: wmavoice.c:273

ff_get_buffer

int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)

Get a buffer for a frame.

Definition: decode.c:1720

init

int(* init)(AVBSFContext *ctx)

Definition: dts2pts.c:368

AV_CODEC_CAP_DR1

#define AV_CODEC_CAP_DR1

Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.

Definition: codec.h:52

parse_packet_header

static int parse_packet_header(WMAVoiceContext *s)

Parse the packet header at the start of each packet (input data to this decoder).

Definition: wmavoice.c:1857

AV_TX_FLOAT_DCT_I

@ AV_TX_FLOAT_DCT_I

Discrete Cosine Transform I.

Definition: tx.h:116

AVPacket::size

int size

Definition: packet.h:559

powf

#define powf(x, y)

Definition: libm.h:52

AVChannelLayout

An AVChannelLayout holds information about the channel layout of audio data.

Definition: channel_layout.h:319

codec_internal.h

DECLARE_ALIGNED

#define DECLARE_ALIGNED(n, t, v)

Definition: mem_internal.h:104

WMAVoiceContext::spillover_bitsize

int spillover_bitsize

number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...

Definition: wmavoice.c:143

for

for(k=2;k<=8;++k)

Definition: h264pred_template.c:424

FCB_TYPE_HARDCODED

@ FCB_TYPE_HARDCODED

hardcoded (fixed) codebook with per-block gain values

Definition: wmavoice.c:88

WMAVoiceContext::pb

PutBitContext pb

bitstream writer for sframe_cache

Definition: wmavoice.c:210

FCB_TYPE_AW_PULSES

@ FCB_TYPE_AW_PULSES

Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.

Definition: wmavoice.c:90

WMAVoiceContext::last_pitch_val

int last_pitch_val

pitch value of the previous frame

Definition: wmavoice.c:223

size

int size

Definition: twinvq_data.h:10344

wiener_denoise

static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)

This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.

Definition: wmavoice.c:734

VLCElem

Definition: vlc.h:32

wmavoice_lsp10_intercoeff_b

static const float wmavoice_lsp10_intercoeff_b[32][2][10]

Definition: wmavoice_data.h:1852

range

enum AVColorRange range

Definition: mediacodec_wrapper.c:2594

dequant_lsp16i

static void dequant_lsp16i(GetBitContext *gb, double *lsps)

Parse 16 independently-coded LSPs.

Definition: wmavoice.c:965

wmavoice_dq_lsp16r1

static const uint8_t wmavoice_dq_lsp16r1[0x500]

Definition: wmavoice_data.h:1264

WMAVoiceContext::aw_first_pulse_off

int aw_first_pulse_off[2]

index of first sample to which to apply AW-pulses, or -0xff if unset

Definition: wmavoice.c:240

WMAVoiceContext::zero_exc_pf

float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]

zero filter output (i.e. excitation) by postfilter

Definition: wmavoice.c:275

sinewin.h

wmavoice_dq_lsp16r2

static const uint8_t wmavoice_dq_lsp16r2[0x500]

Definition: wmavoice_data.h:1395

offset

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset

Definition: writing_filters.txt:86

frame_type_desc

Description of frame types.

Definition: wmavoice.c:100

WMAVoiceContext::block_pitch_range

int block_pitch_range

range of the block pitch

Definition: wmavoice.c:170

stabilize_lsps

static void stabilize_lsps(double *lsps, int num)

Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...

Definition: wmavoice.c:1675

M_PI

#define M_PI

Definition: mathematics.h:67

ff_tilt_compensation

void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)

Apply tilt compensation filter, 1 - tilt * z-1.

Definition: acelp_filters.c:138

av_tx_uninit

av_cold void av_tx_uninit(AVTXContext **ctx)

Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.

Definition: tx.c:295

wmavoice_energy_table

static const float wmavoice_energy_table[128]

LUT for 1.071575641632 * pow(1.0331663, n - 127)

Definition: wmavoice_data.h:3026

ff_sine_window_init

void ff_sine_window_init(float *window, int n)

Generate a sine window.

Definition: sinewin_tablegen.h:59

wmavoice_decode_init

static av_cold int wmavoice_decode_init(AVCodecContext *ctx)

Set up decoder with parameters from demuxer (extradata etc.).

Definition: wmavoice.c:357

WMAVoiceContext::block_delta_pitch_hrange

int block_delta_pitch_hrange

1/2 range of the delta (full range is from -this to +this-1)

Definition: wmavoice.c:175

wmavoice_ipol2_coeffs

static const float wmavoice_ipol2_coeffs[32]

Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...

Definition: wmavoice_data.h:3012

WMAVoiceContext::pitch_diff_sh16

int pitch_diff_sh16

((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE

Definition: wmavoice.c:225

WMAVoiceContext::gain_pred_err

float gain_pred_err[6]

cache for gain prediction

Definition: wmavoice.c:251

WMAVoiceContext::rdft_fn

av_tx_fn rdft_fn

Definition: wmavoice.c:266

#define i(width, name, range_min, range_max)

Definition: cbs_h2645.c:256

WMAVoiceContext::nb_superframes

int nb_superframes

number of superframes in current packet

Definition: wmavoice.c:250

WMAVoiceContext::cos

float cos[511]

8-bit cosine/sine windows over [-pi,pi] range

Definition: wmavoice.c:269

WMAVoiceContext::denoise_tilt_corr

int denoise_tilt_corr

Whether to apply tilt correction to the Wiener filter coefficients (postfilter)

Definition: wmavoice.c:154

delta

float delta

Definition: vorbis_enc_data.h:430

wmavoice_lsp16_intercoeff_b

static const float wmavoice_lsp16_intercoeff_b[32][2][16]

Definition: wmavoice_data.h:2306

FFMIN

#define FFMIN(a, b)

Definition: macros.h:49

av_frame_unref

void av_frame_unref(AVFrame *frame)

Unreference all the buffers referenced by frame and reset the frame fields.

Definition: frame.c:496

acelp_filters.h

ff_weighted_vector_sumf

void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)

float implementation of weighted sum of two vectors.

Definition: acelp_vectors.c:182

WMAVoiceContext::lsp_def_mode

int lsp_def_mode

defines different sets of LSP defaults [0, 1]

Definition: wmavoice.c:161

wmavoice_gain_universal

static const float wmavoice_gain_universal[64]

Definition: wmavoice_data.h:2855

AVCodec::name

const char * name

Name of the codec implementation.

Definition: codec.h:179

len

int len

Definition: vorbis_enc_data.h:426

FCB_TYPE_EXC_PULSES

@ FCB_TYPE_EXC_PULSES

Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.

Definition: wmavoice.c:92

WMAVoiceContext::synth_filter_out_buf

float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]

aligned buffer for postfilter speech synthesis

Definition: wmavoice.c:283

tilt_factor

static float tilt_factor(const float *lpcs, int n_lpcs)

Get the tilt factor of a formant filter from its transfer function.

Definition: wmavoice.c:597

VLC_NBITS

#define VLC_NBITS

number of bits to read per VLC iteration

Definition: wmavoice.c:59

wmavoice_data.h

Windows Media Voice (WMAVoice) tables.

avcodec.h

WMAVoiceContext::min_pitch_val

int min_pitch_val

base value for pitch parsing code

Definition: wmavoice.c:164

WMAVoiceContext::last_acb_type

int last_acb_type

frame type [0-2] of the previous frame

Definition: wmavoice.c:224

WMAVoiceContext::dct

AVTXContext * dct

Definition: wmavoice.c:267

av_uninit

#define av_uninit(x)

Definition: attributes.h:174

ret

Definition: filter_design.txt:187

frame

these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame

Definition: filter_design.txt:265

lsp.h

ff_celp_lp_zero_synthesis_filterf

void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)

LP zero synthesis filter.

Definition: celp_filters.c:201

WMAVoiceContext::do_apf

int do_apf

whether to apply the averaged projection filter (APF)

Definition: wmavoice.c:150

pos

unsigned int pos

Definition: spdifenc.c:414

AMRFixed::n

int n

Definition: acelp_vectors.h:54

wmavoice_dq_lsp16i2

static const uint8_t wmavoice_dq_lsp16i2[0x3c0]

Definition: wmavoice_data.h:583

AV_INPUT_BUFFER_PADDING_SIZE

#define AV_INPUT_BUFFER_PADDING_SIZE

Definition: defs.h:40

wmavoice_mean_lsf16

static const double wmavoice_mean_lsf16[2][16]

Definition: wmavoice_data.h:2574

AV_RL32

uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32

Definition: bytestream.h:92

#define U(x)

Definition: vpx_arith.h:37

AV_TX_FLOAT_RDFT

@ AV_TX_FLOAT_RDFT

Real to complex and complex to real DFTs.

Definition: tx.h:90

wmavoice_decode_end

static av_cold int wmavoice_decode_end(AVCodecContext *ctx)

Definition: wmavoice.c:2007

WMAVoiceContext::lsps

int lsps

number of LSPs per frame [10 or 16]

Definition: wmavoice.c:159

AVCodecContext

main external API structure.

Definition: avcodec.h:431

wmavoice_decode_packet

static int wmavoice_decode_packet(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)

Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...

Definition: wmavoice.c:1921

channel_layout.h

WMAVoiceContext::block_pitch_nbits

int block_pitch_nbits

number of bits used to specify the first block's pitch value

Definition: wmavoice.c:168

AV_TX_FLOAT_DST_I

@ AV_TX_FLOAT_DST_I

Discrete Sine Transform I.

Definition: tx.h:128

synth_superframe

static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)

Synthesize output samples for a single superframe.

Definition: wmavoice.c:1721

av_channel_layout_uninit

void av_channel_layout_uninit(AVChannelLayout *channel_layout)

Free any allocated data in the channel layout and reset the channel count to 0.

Definition: channel_layout.c:442

WMAVoiceContext::frame_cntr

int frame_cntr

current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()

Definition: wmavoice.c:248

wmavoice_ipol1_coeffs

static const float wmavoice_ipol1_coeffs[17 *9]

Definition: wmavoice_data.h:2960

values

Definition: filter_design.txt:264

ff_set_fixed_vector

void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)

Add fixed vector to an array from a sparse representation.

Definition: acelp_vectors.c:224

mean_lsf

static const float mean_lsf[10]

Definition: siprdata.h:27

AV_CODEC_CAP_DELAY

#define AV_CODEC_CAP_DELAY

Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...

Definition: codec.h:76

samples

Filter the word "frame" indicates either a video frame or a group of audio samples

Definition: filter_design.txt:8

copy_bits

static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)

Copy (unaligned) bits from gb/data/size to pb.

Definition: wmavoice.c:1892

synth_frame

static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)

Synthesize output samples for a single frame.

Definition: wmavoice.c:1494

mem.h

M_LN10

#define M_LN10

Definition: mathematics.h:49

WMAVoiceContext::gb

GetBitContext gb

packet bitreader.

Definition: wmavoice.c:137

avpriv_request_sample

#define avpriv_request_sample(...)

Definition: tableprint_vlc.h:37

synth_block_fcb_acb

static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)

Parse FCB/ACB signal for a single block.

Definition: wmavoice.c:1318

flush_put_bits

static void flush_put_bits(PutBitContext *s)

Pad the end of the output stream with zeros.

Definition: put_bits.h:153

AV_CHANNEL_LAYOUT_MONO

#define AV_CHANNEL_LAYOUT_MONO

Definition: channel_layout.h:394

VLC_INIT_STATIC_TABLE_FROM_LENGTHS

#define VLC_INIT_STATIC_TABLE_FROM_LENGTHS(vlc_table, nb_bits, nb_codes, lens, lens_wrap, syms, syms_wrap, syms_size, offset, flags)

Definition: vlc.h:288

scale

static void scale(int *out, const int *in, const int w, const int h, const int shift)

Definition: intra.c:273

alpha

static const int16_t alpha[]

Definition: ilbcdata.h:55

AVPacket

This structure stores compressed data.

Definition: packet.h:535

synth_block_hardcoded

static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)

Parse hardcoded signal for a single block.

Definition: wmavoice.c:1287

SFRAME_CACHE_MAXSIZE

#define SFRAME_CACHE_MAXSIZE

maximum cache size for frame data that

Definition: wmavoice.c:57

frame_type_vlc

static VLCElem frame_type_vlc[132]

Frame type VLC coding.

Definition: wmavoice.c:64

AMRFixed::pitch_lag

int pitch_lag