1 /*
2 * AMR wideband decoder
3 * Copyright (c) 2010 Marcelo Galvao Povoa
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A particular PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * AMR wideband decoder
25 */
26
31
40
41 #define AMR_USE_16BIT_TABLES
43
46
51 float isf_cur[
LP_ORDER];
///< working ISF vector from current frame
52 float isf_q_past[
LP_ORDER];
///< quantized ISF vector of the previous frame
53 float isf_past_final[
LP_ORDER];
///< final processed ISF vector of the previous frame
54 double isp[4][
LP_ORDER];
///< ISP vectors from current frame
55 double isp_sub4_past[
LP_ORDER];
///< ISP vector for the 4th subframe of the previous frame
56
57 float lp_coef[4][
LP_ORDER];
///< Linear Prediction Coefficients from ISP vector
58
61
63 float *
excitation;
///< points to current excitation in excitation_buf[]
64
65 float pitch_vector[
AMRWB_SFR_SIZE];
///< adaptive codebook (pitch) vector for current subframe
66 float fixed_vector[
AMRWB_SFR_SIZE];
///< algebraic codebook (fixed) vector for current subframe
67
68 float prediction_error[4];
///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes
69 float pitch_gain[6];
///< quantified pitch gains for the current and previous five subframes
70 float fixed_gain[2];
///< quantified fixed gains for the current and previous subframes
71
72 float tilt_coef;
///< {beta_1} related to the voicing of the previous subframe
73
76 float prev_tr_gain;
///< previous initial gain used by noise enhancer for threshold
77
81
82 float hpf_31_mem[2], hpf_400_mem[2];
///< previous values in the high pass filters
83 float demph_mem[1];
///< previous value in the de-emphasis filter
84 float bpf_6_7_mem[
HB_FIR_SIZE];
///< previous values in the high-band band pass filter
85 float lpf_7_mem[
HB_FIR_SIZE];
///< previous values in the high-band low pass filter
86
87 AVLFG prng;
///< random number generator for white noise excitation
93
95
97 {
99 int i;
100
104 }
105
111
113
116
119
120 for (i = 0; i < 4; i++)
122
127
128 return 0;
129 }
130
131 /**
132 * Decode the frame header in the "MIME/storage" format. This format
133 * is simpler and does not carry the auxiliary frame information.
134 *
135 * @param[in] ctx The Context
136 * @param[in] buf Pointer to the input buffer
137 *
138 * @return The decoded header length in bytes
139 */
141 {
142 /* Decode frame header (1st octet) */
145
146 return 1;
147 }
148
149 /**
150 * Decode quantized ISF vectors using 36-bit indexes (6K60 mode only).
151 *
152 * @param[in] ind Array of 5 indexes
153 * @param[out] isf_q Buffer for isf_q[LP_ORDER]
154 *
155 */
157 {
158 int i;
159
160 for (i = 0; i < 9; i++)
161 isf_q[i] =
dico1_isf[ind[0]][i] * (1.0f / (1 << 15));
162
163 for (i = 0; i < 7; i++)
164 isf_q[i + 9] =
dico2_isf[ind[1]][i] * (1.0f / (1 << 15));
165
166 for (i = 0; i < 5; i++)
168
169 for (i = 0; i < 4; i++)
171
172 for (i = 0; i < 7; i++)
174 }
175
176 /**
177 * Decode quantized ISF vectors using 46-bit indexes (except 6K60 mode).
178 *
179 * @param[in] ind Array of 7 indexes
180 * @param[out] isf_q Buffer for isf_q[LP_ORDER]
181 *
182 */
184 {
185 int i;
186
187 for (i = 0; i < 9; i++)
188 isf_q[i] =
dico1_isf[ind[0]][i] * (1.0f / (1 << 15));
189
190 for (i = 0; i < 7; i++)
191 isf_q[i + 9] =
dico2_isf[ind[1]][i] * (1.0f / (1 << 15));
192
193 for (i = 0; i < 3; i++)
194 isf_q[i] +=
dico21_isf[ind[2]][i] * (1.0f / (1 << 15));
195
196 for (i = 0; i < 3; i++)
197 isf_q[i + 3] +=
dico22_isf[ind[3]][i] * (1.0f / (1 << 15));
198
199 for (i = 0; i < 3; i++)
200 isf_q[i + 6] +=
dico23_isf[ind[4]][i] * (1.0f / (1 << 15));
201
202 for (i = 0; i < 3; i++)
203 isf_q[i + 9] +=
dico24_isf[ind[5]][i] * (1.0f / (1 << 15));
204
205 for (i = 0; i < 4; i++)
206 isf_q[i + 12] +=
dico25_isf[ind[6]][i] * (1.0f / (1 << 15));
207 }
208
209 /**
210 * Apply mean and past ISF values using the prediction factor.
211 * Updates past ISF vector.
212 *
213 * @param[in,out] isf_q Current quantized ISF
214 * @param[in,out] isf_past Past quantized ISF
215 *
216 */
218 {
219 int i;
220 float tmp;
221
223 tmp = isf_q[i];
224 isf_q[i] +=
isf_mean[i] * (1.0f / (1 << 15));
226 isf_past[i] = tmp;
227 }
228 }
229
230 /**
231 * Interpolate the fourth ISP vector from current and past frames
232 * to obtain an ISP vector for each subframe.
233 *
234 * @param[in,out] isp_q ISPs for each subframe
235 * @param[in] isp4_past Past ISP for subframe 4
236 */
238 {
239 int i, k;
240
241 for (k = 0; k < 3; k++) {
244 isp_q[k][i] = (1.0 - c) * isp4_past[i] + c * isp_q[3][i];
245 }
246 }
247
248 /**
249 * Decode an adaptive codebook index into pitch lag (except 6k60, 8k85 modes).
250 * Calculate integer lag and fractional lag always using 1/4 resolution.
251 * In 1st and 3rd subframes the index is relative to last subframe integer lag.
252 *
253 * @param[out] lag_int Decoded integer pitch lag
254 * @param[out] lag_frac Decoded fractional pitch lag
255 * @param[in] pitch_index Adaptive codebook pitch index
256 * @param[in,out] base_lag_int Base integer lag used in relative subframes
257 * @param[in] subframe Current subframe index (0 to 3)
258 */
260 uint8_t *base_lag_int,
int subframe)
261 {
262 if (subframe == 0 || subframe == 2) {
263 if (pitch_index < 376) {
264 *lag_int = (pitch_index + 137) >> 2;
265 *lag_frac = pitch_index - (*lag_int << 2) + 136;
266 } else if (pitch_index < 440) {
267 *lag_int = (pitch_index + 257 - 376) >> 1;
268 *lag_frac = (pitch_index - (*lag_int << 1) + 256 - 376) << 1;
269 /* the actual resolution is 1/2 but expressed as 1/4 */
270 } else {
271 *lag_int = pitch_index - 280;
272 *lag_frac = 0;
273 }
274 /* minimum lag for next subframe */
275 *base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0),
277 // XXX: the spec states clearly that *base_lag_int should be
278 // the nearest integer to *lag_int (minus 8), but the ref code
279 // actually always uses its floor, I'm following the latter
280 } else {
281 *lag_int = (pitch_index + 1) >> 2;
282 *lag_frac = pitch_index - (*lag_int << 2);
283 *lag_int += *base_lag_int;
284 }
285 }
286
287 /**
288 * Decode an adaptive codebook index into pitch lag for 8k85 and 6k60 modes.
289 * The description is analogous to decode_pitch_lag_high, but in 6k60 the
290 * relative index is used for all subframes except the first.
291 */
294 {
295 if (subframe == 0 || (subframe == 2 && mode !=
MODE_6k60)) {
296 if (pitch_index < 116) {
297 *lag_int = (pitch_index + 69) >> 1;
298 *lag_frac = (pitch_index - (*lag_int << 1) + 68) << 1;
299 } else {
300 *lag_int = pitch_index - 24;
301 *lag_frac = 0;
302 }
303 // XXX: same problem as before
304 *base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0),
306 } else {
307 *lag_int = (pitch_index + 1) >> 1;
308 *lag_frac = (pitch_index - (*lag_int << 1)) << 1;
309 *lag_int += *base_lag_int;
310 }
311 }
312
313 /**
314 * Find the pitch vector by interpolating the past excitation at the
315 * pitch delay, which is obtained in this function.
316 *
317 * @param[in,out] ctx The context
318 * @param[in] amr_subframe Current subframe data
319 * @param[in] subframe Current subframe index (0 to 3)
320 */
323 const int subframe)
324 {
325 int pitch_lag_int, pitch_lag_frac;
326 int i;
329
333 } else
336
338 pitch_lag_int += pitch_lag_frac > 0;
339
340 /* Calculate the pitch vector by interpolating the past excitation at the
341 pitch lag using a hamming windowed sinc function */
343 exc + 1 - pitch_lag_int,
345 pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
347
348 /* Check which pitch signal path should be used
349 * 6k60 and 8k85 modes have the ltp flag set to 0 */
350 if (amr_subframe->
ltp) {
352 } else {
354 ctx->
pitch_vector[i] = 0.18 * exc[i - 1] + 0.64 * exc[i] +
355 0.18 * exc[i + 1];
356 memcpy(exc, ctx->
pitch_vector, AMRWB_SFR_SIZE *
sizeof(
float));
357 }
358 }
359
360 /** Get x bits in the index interval [lsb,lsb+len-1] inclusive */
361 #define BIT_STR(x,lsb,len) (((x) >> (lsb)) & ((1 << (len)) - 1))
362
363 /** Get the bit at specified position */
364 #define BIT_POS(x, p) (((x) >> (p)) & 1)
365
366 /**
367 * The next six functions decode_[i]p_track decode exactly i pulses
368 * positions and amplitudes (-1 or 1) in a subframe track using
369 * an encoded pulse indexing (TS 26.190 section 5.8.2).
370 *
371 * The results are given in out[], in which a negative number means
372 * amplitude -1 and vice versa (i.e., ampl(x) = x / abs(x) ).
373 *
374 * @param[out] out Output buffer (writes i elements)
375 * @param[in] code Pulse index (no. of bits varies, see below)
376 * @param[in] m (log2) Number of potential positions
377 * @param[in] off Offset for decoded positions
378 */
380 {
381 int pos =
BIT_STR(code, 0, m) +
off;
///code: m+1 bits
382
383 out[0] =
BIT_POS(code, m) ? -pos : pos;
384 }
385
387 {
390
391 out[0] =
BIT_POS(code, 2*m) ? -pos0 : pos0;
392 out[1] =
BIT_POS(code, 2*m) ? -pos1 : pos1;
393 out[1] = pos0 > pos1 ? -out[1] : out[1];
394 }
395
397 {
398 int half_2p =
BIT_POS(code, 2*m - 1) << (m - 1);
399
401 m - 1, off + half_2p);
403 }
404
406 {
407 int half_4p, subhalf_2p;
408 int b_offset = 1 << (m - 1);
409
410 switch (
BIT_STR(code, 4*m - 2, 2)) {
/* case ID (2 bits) */
411 case 0: /* 0 pulses in A, 4 pulses in B or vice versa */
412 half_4p =
BIT_POS(code, 4*m - 3) << (m - 1);
// which has 4 pulses
413 subhalf_2p =
BIT_POS(code, 2*m - 3) << (m - 2);
414
416 m - 2, off + half_4p + subhalf_2p);
418 m - 1, off + half_4p);
419 break;
420 case 1: /* 1 pulse in A, 3 pulses in B */
422 m - 1, off);
424 m - 1, off + b_offset);
425 break;
426 case 2: /* 2 pulses in each half */
428 m - 1, off);
430 m - 1, off + b_offset);
431 break;
432 case 3: /* 3 pulses in A, 1 pulse in B */
434 m - 1, off);
436 m - 1, off + b_offset);
437 break;
438 }
439 }
440
442 {
443 int half_3p =
BIT_POS(code, 5*m - 1) << (m - 1);
444
446 m - 1, off + half_3p);
447
449 }
450
452 {
453 int b_offset = 1 << (m - 1);
454 /* which half has more pulses in cases 0 to 2 */
455 int half_more =
BIT_POS(code, 6*m - 5) << (m - 1);
456 int half_other = b_offset - half_more;
457
458 switch (
BIT_STR(code, 6*m - 4, 2)) {
/* case ID (2 bits) */
459 case 0: /* 0 pulses in A, 6 pulses in B or vice versa */
461 m - 1, off + half_more);
463 m - 1, off + half_more);
464 break;
465 case 1: /* 1 pulse in A, 5 pulses in B or vice versa */
467 m - 1, off + half_other);
469 m - 1, off + half_more);
470 break;
471 case 2: /* 2 pulses in A, 4 pulses in B or vice versa */
473 m - 1, off + half_other);
475 m - 1, off + half_more);
476 break;
477 case 3: /* 3 pulses in A, 3 pulses in B */
479 m - 1, off);
481 m - 1, off + b_offset);
482 break;
483 }
484 }
485
486 /**
487 * Decode the algebraic codebook index to pulse positions and signs,
488 * then construct the algebraic codebook vector.
489 *
490 * @param[out] fixed_vector Buffer for the fixed codebook excitation
491 * @param[in] pulse_hi MSBs part of the pulse index array (higher modes only)
492 * @param[in] pulse_lo LSBs part of the pulse index array
493 * @param[in] mode Mode of the current frame
494 */
496 const uint16_t *pulse_lo,
const enum Mode mode)
497 {
498 /* sig_pos stores for each track the decoded pulse position indexes
499 * (1-based) multiplied by its corresponding amplitude (+1 or -1) */
500 int sig_pos[4][6];
501 int spacing = (mode ==
MODE_6k60) ? 2 : 4;
502 int i, j;
503
504 switch (mode) {
506 for (i = 0; i < 2; i++)
508 break;
510 for (i = 0; i < 4; i++)
512 break;
514 for (i = 0; i < 4; i++)
516 break;
518 for (i = 0; i < 2; i++)
520 for (i = 2; i < 4; i++)
522 break;
524 for (i = 0; i < 4; i++)
526 break;
528 for (i = 0; i < 4; i++)
530 ((int) pulse_hi[i] << 14), 4, 1);
531 break;
533 for (i = 0; i < 2; i++)
535 ((int) pulse_hi[i] << 10), 4, 1);
536 for (i = 2; i < 4; i++)
538 ((int) pulse_hi[i] << 14), 4, 1);
539 break;
542 for (i = 0; i < 4; i++)
544 ((int) pulse_hi[i] << 11), 4, 1);
545 break;
546 }
547
549
550 for (i = 0; i < 4; i++)
552 int pos = (
FFABS(sig_pos[i][j]) - 1) * spacing + i;
553
554 fixed_vector[pos] += sig_pos[i][j] < 0 ? -1.0 : 1.0;
555 }
556 }
557
558 /**
559 * Decode pitch gain and fixed gain correction factor.
560 *
561 * @param[in] vq_gain Vector-quantized index for gains
562 * @param[in] mode Mode of the current frame
563 * @param[out] fixed_gain_factor Decoded fixed gain correction factor
564 * @param[out] pitch_gain Decoded pitch gain
565 */
567 float *fixed_gain_factor, float *pitch_gain)
568 {
571
572 *pitch_gain = gains[0] * (1.0f / (1 << 14));
573 *fixed_gain_factor = gains[1] * (1.0f / (1 << 11));
574 }
575
576 /**
577 * Apply pitch sharpening filters to the fixed codebook vector.
578 *
579 * @param[in] ctx The context
580 * @param[in,out] fixed_vector Fixed codebook excitation
581 */
582 // XXX: Spec states this procedure should be applied when the pitch
583 // lag is less than 64, but this checking seems absent in reference and AMR-NB
585 {
586 int i;
587
588 /* Tilt part */
590 fixed_vector[i] -= fixed_vector[i - 1] * ctx->
tilt_coef;
591
592 /* Periodicity enhancement part */
594 fixed_vector[i] += fixed_vector[i - ctx->
pitch_lag_int] * 0.85;
595 }
596
597 /**
598 * Calculate the voicing factor (-1.0 = unvoiced to 1.0 = voiced).
599 *
600 * @param[in] p_vector, f_vector Pitch and fixed excitation vectors
601 * @param[in] p_gain, f_gain Pitch and fixed gains
602 * @param[in] ctx The context
603 */
604 // XXX: There is something wrong with the precision here! The magnitudes
605 // of the energies are not correct. Please check the reference code carefully
607 float *f_vector, float f_gain,
609 {
610 double p_ener = (double) ctx->
dot_productf(p_vector, p_vector,
612 p_gain * p_gain;
613 double f_ener = (double) ctx->
dot_productf(f_vector, f_vector,
615 f_gain * f_gain;
616
617 return (p_ener - f_ener) / (p_ener + f_ener);
618 }
619
620 /**
621 * Reduce fixed vector sparseness by smoothing with one of three IR filters,
622 * also known as "adaptive phase dispersion".
623 *
624 * @param[in] ctx The context
625 * @param[in,out] fixed_vector Unfiltered fixed vector
626 * @param[out] buf Space for modified vector if necessary
627 *
628 * @return The potentially overwritten filtered fixed vector address
629 */
631 float *fixed_vector,
float *
buf)
632 {
633 int ir_filter_nr;
634
636 return fixed_vector;
637
639 ir_filter_nr = 0; // strong filtering
641 ir_filter_nr = 1; // medium filtering
642 } else
643 ir_filter_nr = 2; // no filtering
644
645 /* detect 'onset' */
647 if (ir_filter_nr < 2)
648 ir_filter_nr++;
649 } else {
651
652 for (i = 0; i < 6; i++)
654 count++;
655
656 if (count > 2)
657 ir_filter_nr = 0;
658
660 ir_filter_nr--;
661 }
662
663 /* update ir filter strength history */
665
667
668 if (ir_filter_nr < 2) {
669 int i;
671
672 /* Circular convolution code in the reference
673 * decoder was modified to avoid using one
674 * extra array. The filtered vector is given by:
675 *
676 * c2(n) = sum(i,0,len-1){ c(i) * coef( (n - i + len) % len ) }
677 */
678
681 if (fixed_vector[i])
683 AMRWB_SFR_SIZE);
685 }
686
687 return fixed_vector;
688 }
689
690 /**
691 * Calculate a stability factor {teta} based on distance between
692 * current and past isf. A value of 1 shows maximum signal stability.
693 */
695 {
696 int i;
698
700 acc += (isf[i] - isf_past[i]) * (isf[i] - isf_past[i]);
701
702 // XXX: This part is not so clear from the reference code
703 // the result is more accurate changing the "/ 256" to "* 512"
704 return FFMAX(0.0, 1.25 - acc * 0.8 * 512);
705 }
706
707 /**
708 * Apply a non-linear fixed gain smoothing in order to reduce
709 * fluctuation in the energy of excitation.
710 *
711 * @param[in] fixed_gain Unsmoothed fixed gain
712 * @param[in,out] prev_tr_gain Previous threshold gain (updated)
713 * @param[in] voice_fac Frame voicing factor
714 * @param[in] stab_fac Frame stability factor
715 *
716 * @return The smoothed gain
717 */
719 float voice_fac, float stab_fac)
720 {
721 float sm_fac = 0.5 * (1 - voice_fac) * stab_fac;
722 float g0;
723
724 // XXX: the following fixed-point constants used to in(de)crement
725 // gain by 1.5dB were taken from the reference code, maybe it could
726 // be simpler
727 if (fixed_gain < *prev_tr_gain) {
728 g0 =
FFMIN(*prev_tr_gain, fixed_gain + fixed_gain *
729 (6226 * (1.0f / (1 << 15)))); // +1.5 dB
730 } else
731 g0 =
FFMAX(*prev_tr_gain, fixed_gain *
732 (27536 * (1.0f / (1 << 15)))); // -1.5 dB
733
734 *prev_tr_gain = g0; // update next frame threshold
735
736 return sm_fac * g0 + (1 - sm_fac) * fixed_gain;
737 }
738
739 /**
740 * Filter the fixed_vector to emphasize the higher frequencies.
741 *
742 * @param[in,out] fixed_vector Fixed codebook vector
743 * @param[in] voice_fac Frame voicing factor
744 */
746 {
747 int i;
748 float cpe = 0.125 * (1 + voice_fac);
749 float last = fixed_vector[0]; // holds c(i - 1)
750
751 fixed_vector[0] -= cpe * fixed_vector[1];
752
754 float cur = fixed_vector[i];
755
756 fixed_vector[i] -= cpe * (last + fixed_vector[i + 1]);
757 last = cur;
758 }
759
760 fixed_vector[AMRWB_SFR_SIZE - 1] -= cpe * last;
761 }
762
763 /**
764 * Conduct 16th order linear predictive coding synthesis from excitation.
765 *
766 * @param[in] ctx Pointer to the AMRWBContext
767 * @param[in] lpc Pointer to the LPC coefficients
768 * @param[out] excitation Buffer for synthesis final excitation
769 * @param[in] fixed_gain Fixed codebook gain for synthesis
770 * @param[in] fixed_vector Algebraic codebook vector
771 * @param[in,out] samples Pointer to the output samples and memory
772 */
774 float fixed_gain, const float *fixed_vector,
775 float *samples)
776 {
779
780 /* emphasize pitch vector contribution in low bitrate modes */
782 int i;
785
786 // XXX: Weird part in both ref code and spec. A unknown parameter
787 // {beta} seems to be identical to the current pitch gain
789
792
794 energy, AMRWB_SFR_SIZE);
795 }
796
799 }
800
801 /**
802 * Apply to synthesis a de-emphasis filter of the form:
803 * H(z) = 1 / (1 - m * z^-1)
804 *
805 * @param[out] out Output buffer
806 * @param[in] in Input samples array with in[-1]
807 * @param[in] m Filter coefficient
808 * @param[in,out] mem State from last filtering
809 */
811 {
812 int i;
813
814 out[0] = in[0] + m * mem[0];
815
817 out[i] = in[i] + out[i - 1] * m;
818
819 mem[0] = out[AMRWB_SFR_SIZE - 1];
820 }
821
822 /**
823 * Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using
824 * a FIR interpolation filter. Uses past data from before *in address.
825 *
826 * @param[out] out Buffer for interpolated signal
827 * @param[in] in Current signal data (length 0.8*o_size)
828 * @param[in] o_size Output signal length
829 * @param[in] ctx The context
830 */
832 {
834 int i, j, k;
835 int int_part = 0, frac_part;
836
837 i = 0;
838 for (j = 0; j < o_size / 5; j++) {
839 out[i] = in[int_part];
840 frac_part = 4;
841 i++;
842
843 for (k = 1; k < 5; k++) {
847 int_part++;
848 frac_part--;
849 i++;
850 }
851 }
852 }
853
854 /**
855 * Calculate the high-band gain based on encoded index (23k85 mode) or
856 * on the low-band speech signal and the Voice Activity Detection flag.
857 *
858 * @param[in] ctx The context
859 * @param[in] synth LB speech synthesis at 12.8k
860 * @param[in] hb_idx Gain index for mode 23k85 only
861 * @param[in] vad VAD flag for the frame
862 */
865 {
866 int wsp = (vad > 0);
867 float tilt;
868
871
874
875 /* return gain bounded by [0.1, 1.0] */
876 return av_clipf((1.0 -
FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0);
877 }
878
879 /**
880 * Generate the high-band excitation with the same energy from the lower
881 * one and scaled by the given gain.
882 *
883 * @param[in] ctx The context
884 * @param[out] hb_exc Buffer for the excitation
885 * @param[in] synth_exc Low-band excitation used for synthesis
886 * @param[in] hb_gain Wanted excitation gain
887 */
889 const float *synth_exc, float hb_gain)
890 {
891 int i;
894
895 /* Generate a white-noise excitation */
898
900 energy * hb_gain * hb_gain,
901 AMRWB_SFR_SIZE_16k);
902 }
903
904 /**
905 * Calculate the auto-correlation for the ISF difference vector.
906 */
908 {
909 int i;
910 float sum = 0.0;
911
912 for (i = 7; i <
LP_ORDER - 2; i++) {
913 float prod = (diff_isf[i] - mean) * (diff_isf[i - lag] - mean);
914 sum += prod * prod;
915 }
916 return sum;
917 }
918
919 /**
920 * Extrapolate a ISF vector to the 16kHz range (20th order LP)
921 * used at mode 6k60 LP filter for the high frequency band.
922 *
923 * @param[out] isf Buffer for extrapolated isf; contains LP_ORDER
924 * values on input
925 */
927 {
928 float diff_isf[
LP_ORDER - 2], diff_mean;
929 float corr_lag[3];
931 int i, j, i_max_corr;
932
933 isf[LP_ORDER_16k - 1] = isf[
LP_ORDER - 1];
934
935 /* Calculate the difference vector */
937 diff_isf[i] = isf[i + 1] - isf[i];
938
939 diff_mean = 0.0;
940 for (i = 2; i < LP_ORDER - 2; i++)
941 diff_mean += diff_isf[i] * (1.0f / (LP_ORDER - 4));
942
943 /* Find which is the maximum autocorrelation */
944 i_max_corr = 0;
945 for (i = 0; i < 3; i++) {
947
948 if (corr_lag[i] > corr_lag[i_max_corr])
949 i_max_corr = i;
950 }
951 i_max_corr++;
952
953 for (i = LP_ORDER - 1; i < LP_ORDER_16k - 1; i++)
954 isf[i] = isf[i - 1] + isf[i - 1 - i_max_corr]
955 - isf[i - 2 - i_max_corr];
956
957 /* Calculate an estimate for ISF(18) and scale ISF based on the error */
958 est = 7965 + (isf[2] - isf[3] - isf[4]) / 6.0;
959 scale = 0.5 * (
FFMIN(est, 7600) - isf[LP_ORDER - 2]) /
960 (isf[LP_ORDER_16k - 2] - isf[LP_ORDER - 2]);
961
962 for (i = LP_ORDER - 1, j = 0; i < LP_ORDER_16k - 1; i++, j++)
963 diff_isf[j] = scale * (isf[i] - isf[i - 1]);
964
965 /* Stability insurance */
966 for (i = 1; i < LP_ORDER_16k -
LP_ORDER; i++)
967 if (diff_isf[i] + diff_isf[i - 1] < 5.0) {
968 if (diff_isf[i] > diff_isf[i - 1]) {
969 diff_isf[i - 1] = 5.0 - diff_isf[i];
970 } else
971 diff_isf[i] = 5.0 - diff_isf[i - 1];
972 }
973
974 for (i = LP_ORDER - 1, j = 0; i < LP_ORDER_16k - 1; i++, j++)
975 isf[i] = isf[i - 1] + diff_isf[j] * (1.0f / (1 << 15));
976
977 /* Scale the ISF vector for 16000 Hz */
978 for (i = 0; i < LP_ORDER_16k - 1; i++)
979 isf[i] *= 0.8;
980 }
981
982 /**
983 * Spectral expand the LP coefficients using the equation:
984 * y[i] = x[i] * (gamma ** i)
985 *
986 * @param[out] out Output buffer (may use input array)
987 * @param[in] lpc LP coefficients array
988 * @param[in] gamma Weighting factor
989 * @param[in] size LP array size
990 */
992 {
993 int i;
994 float fac = gamma;
995
996 for (i = 0; i <
size; i++) {
997 out[i] = lpc[i] * fac;
998 fac *= gamma;
999 }
1000 }
1001
1002 /**
1003 * Conduct 20th order linear predictive coding synthesis for the high
1004 * frequency band excitation at 16kHz.
1005 *
1006 * @param[in] ctx The context
1007 * @param[in] subframe Current subframe index (0 to 3)
1008 * @param[in,out] samples Pointer to the output speech samples
1009 * @param[in] exc Generated white-noise scaled excitation
1010 * @param[in] isf Current frame isf vector
1011 * @param[in] isf_past Past frame final isf vector
1012 */
1014 const float *exc, const float *isf, const float *isf_past)
1015 {
1018
1020 float e_isf[
LP_ORDER_16k];
// ISF vector for extrapolation
1022
1025
1027
1031
1033 } else {
1035 }
1036
1039 }
1040
1041 /**
1042 * Apply a 15th order filter to high-band samples.
1043 * The filter characteristic depends on the given coefficients.
1044 *
1045 * @param[out] out Buffer for filtered output
1046 * @param[in] fir_coef Filter coefficients
1047 * @param[in,out] mem State from last filtering (updated)
1048 * @param[in] in Input speech data (high-band)
1049 *
1050 * @remark It is safe to pass the same array in in and out parameters
1051 */
1052
1053 #ifndef hb_fir_filter
1056 {
1057 int i, j;
1059
1060 memcpy(data,
mem, HB_FIR_SIZE *
sizeof(
float));
1062
1064 out[i] = 0.0;
1066 out[i] += data[i + j] * fir_coef[j];
1067 }
1068
1069 memcpy(
mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE *
sizeof(
float));
1070 }
1071 #endif /* hb_fir_filter */
1072
1073 /**
1074 * Update context state before the next subframe.
1075 */
1077 {
1080
1083
1090 }
1091
1093 int *got_frame_ptr,
AVPacket *avpkt)
1094 {
1099 int buf_size = avpkt->
size;
1100 int expected_fr_size, header_size;
1101 float *buf_out;
1102 float spare_vector[
AMRWB_SFR_SIZE];
// extra stack space to hold result from anti-sparseness processing
1103 float fixed_gain_factor; // fixed gain correction factor (gamma)
1104 float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use
1105 float synth_fixed_gain; // the fixed gain that synthesis should use
1106 float voice_fac, stab_fac; // parameters used for gain smoothing
1107 float synth_exc[
AMRWB_SFR_SIZE];
// post-processed excitation for synthesis
1110 float hb_gain;
1112
1113 /* get output buffer */
1117 buf_out = (
float *)frame->
data[0];
1118
1124 }
1126
1127 if (buf_size < expected_fr_size) {
1129 "Frame too small (%d bytes). Truncated file?\n", buf_size);
1130 *got_frame_ptr = 0;
1132 }
1133
1136
1140 }
1141
1144
1145 /* Decode the quantized ISF vector */
1148 } else {
1150 }
1151
1154
1156
1159
1160 /* Generate a ISP vector for each subframe */
1164 }
1166
1167 for (sub = 0; sub < 4; sub++)
1169
1170 for (sub = 0; sub < 4; sub++) {
1173
1174 /* Decode adaptive codebook (pitch vector) */
1176 /* Decode innovative codebook (fixed vector) */
1179
1181
1184
1193
1194 /* Calculate voice factor and store tilt for next subframe */
1198 ctx->
tilt_coef = voice_fac * 0.25 + 0.25;
1199
1200 /* Construct current excitation */
1205 }
1206
1207 /* Post-processing of excitation elements */
1209 voice_fac, stab_fac);
1210
1212 spare_vector);
1213
1215
1218
1219 /* Synthesis speech post-processing */
1222
1226
1229
1230 /* High frequency band (6.4 - 7.0 kHz) generation part */
1234
1237
1239
1242
1243 /* High-band post-processing filters */
1246
1249 hb_samples);
1250
1251 /* Add the low and high frequency bands */
1253 sub_buf[i] = (sub_buf[i] + hb_samples[i]) * (1.0f / (1 << 15));
1254
1255 /* Update buffers and history */
1257 }
1258
1259 /* update state for next frame */
1262
1263 *got_frame_ptr = 1;
1264
1265 return expected_fr_size;
1266 }
1267
1279 };