1 /*
2 * Copyright 2002-2008 Xiph.org Foundation
3 * Copyright 2002-2008 Jean-Marc Valin
4 * Copyright 2005-2007 Analog Devices Inc.
5 * Copyright 2005-2008 Commonwealth Scientific and Industrial Research Organisation (CSIRO)
6 * Copyright 1993, 2002, 2006 David Rowe
7 * Copyright 2003 EpicGames
8 * Copyright 1992-1994 Jutta Degener, Carsten Bormann
9
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13
14 * - Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16
17 * - Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20
21 * - Neither the name of the Xiph.org Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
29 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * This file is part of FFmpeg.
38 *
39 * FFmpeg is free software; you can redistribute it and/or
40 * modify it under the terms of the GNU Lesser General Public
41 * License as published by the Free Software Foundation; either
42 * version 2.1 of the License, or (at your option) any later version.
43 *
44 * FFmpeg is distributed in the hope that it will be useful,
45 * but WITHOUT ANY WARRANTY; without even the implied warranty of
46 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
47 * Lesser General Public License for more details.
48 *
49 * You should have received a copy of the GNU Lesser General Public
50 * License along with FFmpeg; if not, write to the Free Software
51 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
52 */
53
64
65 #define SPEEX_NB_MODES 3
66 #define SPEEX_INBAND_STEREO 9
67
70 #define NB_FRAME_SIZE 160
72 #define NB_SUBMODE_BITS 4
73 #define SB_SUBMODE_BITS 3
74
75 #define NB_SUBFRAME_SIZE 40
76 #define NB_NB_SUBFRAMES 4
77 #define NB_PITCH_START 17
78 #define NB_PITCH_END 144
79
80 #define NB_DEC_BUFFER (NB_FRAME_SIZE + 2 * NB_PITCH_END + NB_SUBFRAME_SIZE + 12)
81
82 #define SPEEX_MEMSET(dst, c, n) (memset((dst), (c), (n) * sizeof(*(dst))))
83 #define SPEEX_COPY(dst, src, n) (memcpy((dst), (src), (n) * sizeof(*(dst))))
84
85 #define LSP_LINEAR(i) (.25f * (i) + .25f)
86 #define LSP_LINEAR_HIGH(i) (.3125f * (i) + .75f)
87 #define LSP_DIV_256(x) (0.00390625f * (x))
88 #define LSP_DIV_512(x) (0.001953125f * (x))
89 #define LSP_DIV_1024(x) (0.0009765625f * (x))
90
95 } LtpParam;
96
101
109
118
119 /** Quantizes LSPs */
121
122 /** Decodes quantized LSPs */
124
125 /** Long-term predictor quantization */
127 float *, float *, float *,
128 const void *, int, int,
float, int, int,
130 float *, int, int, int, float *);
131
132 /** Long-term un-quantize */
134 float,
const void *, int,
int *,
137
138 /** Innovation quantization function */
140 float *, float *, const void *,
141 int, int, float *, float *,
143
144 /** Innovation unquantization function */
147
149 int lbr_pitch;
/**< Set to -1 for "normal" modes, otherwise encode pitch using
150 a global pitch and allowing a +- lbr_pitch variation (for
151 low not-rates)*/
153 sub-frames */
155 gain */
157 quality (and higher bit-rate)*/
159
161 const void *
LtpParam;
/**< Pitch parameters (options) */
162
165
168
179
182 int modeID;
/**< ID of the decoder mode */
190 int lpc_size;
/**< Order of high-band LPC analysis */
192 float *
innov_save;
/**< If non-NULL, innovation is copied here */
193
194 /* This is used in packet loss concealment */
195 int last_pitch;
/**< Pitch of last correctly decoded frame */
197 uint32_t
seed;
/**< Seed used for random number generation */
198
203
204 /* Vocoder data */
209
212
213 float *
exc;
/**< Start of excitation frame */
214 float mem_hp[2];
/**< High-pass filter memory */
224
225 /* Default handler for user callbacks: skip it */
227 {
228 const int req_size =
get_bits(gb, 4);
230 return 0;
231 }
232
234 float balance;
/**< Left/right balance info */
235 float e_ratio;
/**< Ratio of energies: E(left+right)/[E(left)+E(right)] */
239
243
246 int32_t mode;
/**< Mode used (0 for narrowband, 1 for wideband) */
254
256
259
262
264 {
266
267 for (
int i = 0;
i < order;
i++)
269
271 for (
int i = 0;
i < 10;
i++)
273
275 for (
int i = 0;
i < 5;
i++)
277
279 for (
int i = 0;
i < 5;
i++)
281 }
282
284 float pitch_coef, const void *par, int nsf,
285 int *pitch_val,
float *gain_val,
GetBitContext *gb,
int count_lost,
286 int subframe_offset, float last_pitch_gain, int cdbk_offset)
287 {
289 pitch_coef =
fminf(pitch_coef, .99
f);
290 for (
int i = 0;
i < nsf;
i++) {
291 exc_out[
i] = exc[
i - start] * pitch_coef;
293 }
294 pitch_val[0] = start;
295 gain_val[0] = gain_val[2] = 0.f;
296 gain_val[1] = pitch_coef;
297 }
298
300 {
301 const uint32_t jflone = 0x3f800000;
302 const uint32_t jflmsk = 0x007fffff;
303 float fran;
305 seed[0] = 1664525 *
seed[0] + 1013904223;
306 ran = jflone | (jflmsk &
seed[0]);
308 fran -= 1.5f;
309 fran *= std;
310 return fran;
311 }
312
315 {
316 for (
int i = 0;
i < nsf;
i++)
318 }
319
322 {
323 int subvect_size, nb_subvect, have_sign, shape_bits;
325 const signed char *shape_cb;
326 int signs[10], ind[10];
327
328 params = par;
331
335
336 /* Decode codewords and gains */
337 for (
int i = 0;
i < nb_subvect;
i++) {
340 }
341 /* Compute decoded excitation */
342 for (
int i = 0;
i < nb_subvect;
i++) {
343 const float s = signs[
i] ? -1.f : 1.f;
344
345 for (int j = 0; j < subvect_size; j++)
346 exc[subvect_size *
i + j] +=
s * 0.03125
f * shape_cb[ind[
i] * subvect_size + j];
347 }
348 }
349
350 #define SUBMODE(x) st->submodes[st->submodeID]->x
351
352 #define gain_3tap_to_1tap(g) (FFABS(g[1]) + (g[0] > 0.f ? g[0] : -.5f * g[0]) + (g[2] > 0.f ? g[2] : -.5f * g[2]))
353
354 static void
356 const void *par,
int nsf,
int *pitch_val,
float *gain_val,
GetBitContext *gb,
357 int count_lost, int subframe_offset, float last_pitch_gain, int cdbk_offset)
358 {
359 int pitch, gain_index, gain_cdbk_size;
360 const int8_t *gain_cdbk;
361 const LtpParam *params;
362 float gain[3];
363
364 params = (const LtpParam *)par;
365 gain_cdbk_size = 1 << params->gain_bits;
366 gain_cdbk = params->gain_cdbk + 4 * gain_cdbk_size * cdbk_offset;
367
368 pitch =
get_bitsz(gb, params->pitch_bits);
369 pitch += start;
370 gain_index =
get_bitsz(gb, params->gain_bits);
371 gain[0] = 0.015625f * gain_cdbk[gain_index * 4] + .5f;
372 gain[1] = 0.015625f * gain_cdbk[gain_index * 4 + 1] + .5f;
373 gain[2] = 0.015625f * gain_cdbk[gain_index * 4 + 2] + .5f;
374
375 if (count_lost && pitch > subframe_offset) {
376 float tmp = count_lost < 4 ? last_pitch_gain : 0.5f * last_pitch_gain;
377 float gain_sum;
378
381
382 if (gain_sum >
tmp && gain_sum > 0.
f) {
384 for (
int i = 0;
i < 3;
i++)
386 }
387 }
388
389 pitch_val[0] = pitch;
390 gain_val[0] = gain[0];
391 gain_val[1] = gain[1];
392 gain_val[2] = gain[2];
394
395 for (
int i = 0;
i < 3;
i++) {
396 int tmp1, tmp3;
397 int pp = pitch + 1 -
i;
398 tmp1 = nsf;
399 if (tmp1 > pp)
400 tmp1 = pp;
401 for (int j = 0; j < tmp1; j++)
402 exc_out[j] += gain[2 -
i] * exc[j - pp];
403 tmp3 = nsf;
404 if (tmp3 > pp + pitch)
405 tmp3 = pp + pitch;
406 for (int j = tmp1; j < tmp3; j++)
407 exc_out[j] += gain[2 -
i] * exc[j - pp - pitch];
408 }
409 }
410
412 {
414
415 for (
int i = 0;
i < order;
i++)
417
419 for (
int i = 0;
i < 10;
i++)
421
423 for (
int i = 0;
i < 5;
i++)
425
427 for (
int i = 0;
i < 5;
i++)
429
431 for (
int i = 0;
i < 5;
i++)
433
435 for (
int i = 0;
i < 5;
i++)
437 }
438
440 {
442
443 for (
int i = 0;
i < order;
i++)
445
447 for (
int i = 0;
i < order;
i++)
449
451 for (
int i = 0;
i < order;
i++)
453 }
454
455 /* 2150 bps "vocoder-like" mode for comfort noise */
459 };
460
461 /* 5.95 kbps very low bit-rate mode */
465 };
466
467 /* 8 kbps low bit-rate mode */
471 };
472
473 /* 11 kbps medium bit-rate mode */
477 };
478
479 /* 15 kbps high bit-rate mode */
483 };
484
485 /* 18.2 high bit-rate mode */
489 };
490
491 /* 24.6 kbps high bit-rate mode */
495 };
496
497 /* 3.95 kbps very low bit-rate mode */
501 };
502
506 };
507
511 };
512
516 };
517
521 };
522
525
527 {
533 .submodes = {
536 },
537 .default_submode = 5,
538 },
539 {
540 .modeID = 1,
544 .lpc_size = 8,
545 .folding_gain = 0.9f,
546 .submodes = {
548 },
549 .default_submode = 3,
550 },
551 {
552 .modeID = 2,
554 .frame_size = 320,
555 .subframe_size = 80,
556 .lpc_size = 8,
557 .folding_gain = 0.7f,
558 .submodes = {
560 },
561 .default_submode = 1,
562 },
563 };
564
566 {
567 float sum = 0.f;
568
569 for (
int i = 0;
i <
len;
i++)
571
574 }
575
576 static void bw_lpc(
float gamma,
const float *lpc_in,
577 float *lpc_out, int order)
578 {
580
581 for (
int i = 0;
i < order;
i++) {
582 lpc_out[
i] =
tmp * lpc_in[
i];
584 }
585 }
586
587 static void iir_mem(
const float *x,
const float *den,
588 float *y,
int N,
int ord,
float *mem)
589 {
590 for (
int i = 0;
i <
N;
i++) {
591 float yi = x[
i] + mem[0];
592 float nyi = -yi;
593 for (int j = 0; j < ord - 1; j++)
594 mem[j] = mem[j + 1] + den[j] * nyi;
595 mem[ord - 1] = den[ord - 1] * nyi;
597 }
598 }
599
600 static void highpass(
const float *x,
float *y,
int len,
float *mem,
int wide)
601 {
602 static const float Pcoef[2][3] = {{ 1.00000f, -1.92683f, 0.93071f }, { 1.00000f, -1.97226f, 0.97332f } };
603 static const float Zcoef[2][3] = {{ 0.96446f, -1.92879f, 0.96446f }, { 0.98645f, -1.97277f, 0.98645f } };
604 const float *den, *num;
605
606 den = Pcoef[wide];
607 num = Zcoef[wide];
608 for (
int i = 0;
i <
len;
i++) {
609 float yi = num[0] * x[
i] + mem[0];
610 mem[0] = mem[1] + num[1] * x[
i] + -den[1] * yi;
611 mem[1] = num[2] * x[
i] + -den[2] * yi;
613 }
614 }
615
616 #define median3(a, b, c) \
617 ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) \
618 : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a))))
619
621 {
624
627
628 return 0;
629 }
630
632 {
634
637 } else {
638 int adv;
639
640 if (id < 2)
641 adv = 1;
642 else if (id < 8)
643 adv = 4;
644 else if (id < 10)
645 adv = 8;
646 else if (id < 12)
647 adv = 16;
648 else if (id < 14)
649 adv = 32;
650 else
651 adv = 64;
653 }
654 return 0;
655 }
656
658 {
659 for (
int i = 0;
i <
len;
i++) {
660 if (!isnormal(vec[
i]) ||
fabsf(vec[
i]) < 1e-8
f)
662 else
664 }
665 }
666
668 {
669 for (
int i = 0;
i <
len;
i++)
671 }
672
674 {
675 float sum = 0.f;
676
677 for (
int i = 0;
i <
len;
i += 8) {
678 float part = 0.f;
679 part += x[
i + 0] * y[
i + 0];
680 part += x[
i + 1] * y[
i + 1];
681 part += x[
i + 2] * y[
i + 2];
682 part += x[
i + 3] * y[
i + 3];
683 part += x[
i + 4] * y[
i + 4];
684 part += x[
i + 5] * y[
i + 5];
685 part += x[
i + 6] * y[
i + 6];
686 part += x[
i + 7] * y[
i + 7];
687 sum += part;
688 }
689
690 return sum;
691 }
692
694 {
695 float corr[4][7], maxcorr;
696 int maxi, maxj;
697
698 for (
int i = 0;
i < 7;
i++)
700 for (
int i = 0;
i < 3;
i++) {
701 for (int j = 0; j < 7; j++) {
702 int i1, i2;
704
705 i1 = 3 - j;
706 if (i1 < 0)
707 i1 = 0;
708 i2 = 10 - j;
709 if (i2 > 7)
710 i2 = 7;
711 for (int k = i1; k < i2; k++)
713 corr[
i + 1][j] =
tmp;
714 }
715 }
716 maxi = maxj = 0;
717 maxcorr = corr[0][0];
718 for (
int i = 0;
i < 4;
i++) {
719 for (int j = 0; j < 7; j++) {
720 if (corr[
i][j] > maxcorr) {
721 maxcorr = corr[
i][j];
723 maxj = j;
724 }
725 }
726 }
727 for (
int i = 0;
i <
len;
i++) {
730 for (int k = 0; k < 7; k++)
731 tmp += exc[
i - (pitch - maxj + 3) + k - 3] *
shift_filt[maxi - 1][k];
732 } else {
733 tmp = exc[
i - (pitch - maxj + 3)];
734 }
736 }
737 return pitch - maxj + 3;
738 }
739
740 static void multicomb(
const float *exc,
float *new_exc,
float *ak,
int p,
int nsf,
741 int pitch, int max_pitch, float comb_gain)
742 {
743 float old_ener, new_ener;
744 float iexc0_mag, iexc1_mag, exc_mag;
746 float corr0, corr1, gain0, gain1;
747 float pgain1, pgain2;
748 float c1,
c2, g1, g2;
749 float ngain, gg1, gg2;
750 int corr_pitch = pitch;
751
753 if (corr_pitch > max_pitch)
755 else
757
763 if (corr0 > iexc0_mag * exc_mag)
764 pgain1 = 1.f;
765 else
766 pgain1 = (corr0 / exc_mag) / iexc0_mag;
767 if (corr1 > iexc1_mag * exc_mag)
768 pgain2 = 1.f;
769 else
770 pgain2 = (corr1 / exc_mag) / iexc1_mag;
771 gg1 = exc_mag / iexc0_mag;
772 gg2 = exc_mag / iexc1_mag;
773 if (comb_gain > 0.
f) {
774 c1 = .4f * comb_gain + .07f;
775 c2 = .5f + 1.72f * (
c1 - .07f);
776 } else {
778 }
779 g1 = 1.f -
c2 * pgain1 * pgain1;
780 g2 = 1.f -
c2 * pgain2 * pgain2;
785
786 if (corr_pitch > max_pitch) {
787 gain0 = .7f * g1 * gg1;
788 gain1 = .3f * g2 * gg2;
789 } else {
790 gain0 = .6f * g1 * gg1;
791 gain1 = .6f * g2 * gg2;
792 }
793 for (
int i = 0;
i < nsf;
i++)
794 new_exc[
i] = exc[
i] + (gain0 * iexc[
i]) + (gain1 * iexc[
i + nsf]);
797
798 old_ener =
fmaxf(old_ener, 1.
f);
799 new_ener =
fmaxf(new_ener, 1.
f);
800 old_ener =
fminf(old_ener, new_ener);
801 ngain = old_ener / new_ener;
802
803 for (
int i = 0;
i < nsf;
i++)
805 }
806
808 float *lsp,
int len,
int subframe,
809 int nb_subframes, float margin)
810 {
811 const float tmp = (1.f + subframe) / nb_subframes;
812
813 for (
int i = 0;
i <
len;
i++) {
814 lsp[
i] = (1.f -
tmp) * old_lsp[
i] +
tmp * new_lsp[
i];
816 }
817 for (
int i = 1;
i <
len - 1;
i++) {
818 lsp[
i] =
fmaxf(lsp[
i], lsp[
i - 1] + margin);
819 if (lsp[
i] > lsp[
i + 1] - margin)
820 lsp[
i] = .5f * (lsp[
i] + lsp[
i + 1] - margin);
821 }
822 }
823
824 static void lsp_to_lpc(
const float *freq,
float *ak,
int lpcrdr)
825 {
826 float xout1, xout2, xin1, xin2;
827 float *pw, *n0;
830 const int m = lpcrdr >> 1;
831
832 pw = Wp;
833
834 xin1 = xin2 = 1.f;
835
836 for (
int i = 0;
i < lpcrdr;
i++)
837 x_freq[
i] = -
cosf(freq[
i]);
838
839 /* reconstruct P(z) and Q(z) by cascading second order
840 * polynomials in form 1 - 2xz(-1) +z(-2), where x is the
841 * LSP coefficient
842 */
843 for (int j = 0; j <= lpcrdr; j++) {
844 int i2 = 0;
845 for (
int i = 0;
i < m;
i++, i2 += 2) {
847 xout1 = xin1 + 2.f * x_freq[i2 ] * n0[0] + n0[1];
848 xout2 = xin2 + 2.f * x_freq[i2 + 1] * n0[2] + n0[3];
849 n0[1] = n0[0];
850 n0[3] = n0[2];
851 n0[0] = xin1;
852 n0[2] = xin2;
853 xin1 = xout1;
854 xin2 = xout2;
855 }
856 xout1 = xin1 + n0[4];
857 xout2 = xin2 - n0[5];
858 if (j > 0)
859 ak[j - 1] = (xout1 + xout2) * 0.5
f;
860 n0[4] = xin1;
861 n0[5] = xin2;
862
863 xin1 = 0.f;
864 xin2 = 0.f;
865 }
866 }
867
870 {
872 float ol_gain = 0, ol_pitch_coef = 0, best_pitch_gain = 0, pitch_average = 0;
873 int m, pitch, wideband, ol_pitch = 0, best_pitch = 40;
880 float pitch_gain[3] = { 0 };
881
883
885 do { /* Search for next narrowband block (handle requests, skip wideband blocks) */
889 if (wideband) /* Skip wideband block (for compatibility) */ {
890 int submode, advance;
891
895 if (advance < 0)
898
902 if (wideband) {
906 if (advance < 0)
910 if (wideband) {
913 }
914 }
915 }
919 if (m == 15) /* We found a terminator */ {
921 } else if (m == 14) /* Speex in-band request */ {
925 } else if (m == 13) /* User in-band request */ {
929 } else if (m > 8) /* Invalid mode */ {
931 }
932 } while (m > 8);
933
934 st->
submodeID = m;
/* Get the sub-mode that was used */
935 }
936
937 /* Shift all buffers by one frame */
939
940 /* If null mode (no transmission), just set a couple things to zero */
943 float innov_gain = 0.f;
944
949
950 /* Final signal synthesis from excitation */
953
954 return 0;
955 }
956
957 /* Unquantize LSPs */
959
960 /* Damp memory if a frame was lost and the LSP changed too much */
962 float fact, lsp_dist = 0;
963
969 }
970
971 /* Handle first frame and lost-packet case */
974
975 /* Get open-loop pitch estimation for low bit-rate pitch coding */
978
979 if (
SUBMODE(forced_pitch_gain))
980 ol_pitch_coef = 0.066667f *
get_bits(gb, 4);
981
982 /* Get global excitation gain */
984
987
990
991 for (
int sub = 0; sub <
NB_NB_SUBFRAMES; sub++) {
/* Loop on subframes */
992 float *exc, *innov_save =
NULL,
tmp, ener;
993 int pit_min, pit_max,
offset, q_energy;
994
999
1001
1002 /* Adaptive codebook contribution */
1004 /* Handle pitch constraints if any */
1005 if (
SUBMODE(lbr_pitch) != -1) {
1006 int margin =
SUBMODE(lbr_pitch);
1007
1008 if (margin) {
1009 pit_min = ol_pitch - margin + 1;
1011 pit_max = ol_pitch + margin;
1013 } else {
1014 pit_min = pit_max = ol_pitch;
1015 }
1016 } else {
1019 }
1020
1021 SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef,
SUBMODE(LtpParam),
1024
1026
1028
1029 pitch_average +=
tmp;
1030 if ((
tmp > best_pitch_gain &&
1031 FFABS(2 * best_pitch - pitch) >= 3 &&
1032 FFABS(3 * best_pitch - pitch) >= 4 &&
1033 FFABS(4 * best_pitch - pitch) >= 5) ||
1034 (
tmp > .6
f * best_pitch_gain &&
1035 (
FFABS(best_pitch - 2 * pitch) < 3 ||
1036 FFABS(best_pitch - 3 * pitch) < 4 ||
1037 FFABS(best_pitch - 4 * pitch) < 5)) ||
1038 ((.67
f *
tmp) > best_pitch_gain &&
1039 (
FFABS(2 * best_pitch - pitch) < 3 ||
1040 FFABS(3 * best_pitch - pitch) < 4 ||
1041 FFABS(4 * best_pitch - pitch) < 5))) {
1042 best_pitch = pitch;
1043 if (
tmp > best_pitch_gain)
1044 best_pitch_gain =
tmp;
1045 }
1046
1047 memset(innov, 0, sizeof(innov));
1048
1049 /* Decode sub-frame gain correction */
1050 if (
SUBMODE(have_subframe_gain) == 3) {
1053 }
else if (
SUBMODE(have_subframe_gain) == 1) {
1056 } else {
1057 ener = ol_gain;
1058 }
1059
1061 /* Fixed codebook contribution */
1063 /* De-normalize innovation and update excitation */
1064
1066
1067 /* Decode second codebook (only for some modes) */
1068 if (
SUBMODE(double_codebook)) {
1070
1074 innov[
i] += innov2[
i];
1075 }
1077 exc[
i] = exc32[
i] + innov[
i];
1078 if (innov_save)
1079 memcpy(innov_save, innov, sizeof(innov));
1080
1081 /* Vocoder mode */
1083 float g = ol_pitch_coef;
1084
1086
1092 }
1094
1096 float exci = exc[
i];
1097 exc[
i] = (.7f * exc[
i] + .3f * st->
voc_m1) + ((1.
f - .85
f *
g) * innov[
i]) - .15
f *
g * st->
voc_m2;
1102 }
1103 }
1104 }
1105
1112 } else {
1114 }
1115
1116 /* If the last packet was lost, re-scale the excitation to obtain the same
1117 * energy as encoded in ol_gain */
1119 float exc_ener, gain;
1120
1123 gain =
fminf(ol_gain / (exc_ener + 1.
f), 2.
f);
1127 }
1128 }
1129
1130 for (
int sub = 0; sub <
NB_NB_SUBFRAMES; sub++) {
/* Loop on subframes */
1132 float pi_g = 1.f, *sp =
out +
offset;
/* Original signal */
1133
1136
1137 for (
int i = 0;
i <
NB_ORDER;
i += 2)
/* Compute analysis filter at w=pi */
1138 pi_g += ak[
i + 1] - ak[
i];
1141
1143
1145 }
1146
1149
1150 /* Store the LSPs for interpolation in the next frame */
1152
1158
1159 return 0;
1160 }
1161
1162 static void qmf_synth(
const float *x1,
const float *x2,
const float *
a,
float *y,
int N,
int M,
float *mem1,
float *mem2)
1163 {
1164 const int M2 =
M >> 1,
N2 =
N >> 1;
1165 float xx1[352], xx2[352];
1166
1167 for (
int i = 0;
i <
N2;
i++)
1168 xx1[
i] = x1[
N2-1-
i];
1169 for (
int i = 0;
i < M2;
i++)
1170 xx1[
N2+
i] = mem1[2*
i+1];
1171 for (
int i = 0;
i <
N2;
i++)
1172 xx2[
i] = x2[
N2-1-
i];
1173 for (
int i = 0;
i < M2;
i++)
1174 xx2[
N2+
i] = mem2[2*
i+1];
1175
1176 for (
int i = 0;
i <
N2;
i += 2) {
1177 float y0, y1, y2, y3;
1178 float x10, x20;
1179
1180 y0 = y1 = y2 = y3 = 0.f;
1183
1184 for (int j = 0; j < M2; j += 2) {
1185 float x11, x21;
1187
1190 x11 = xx1[
N2-1+j-
i];
1191 x21 = xx2[
N2-1+j-
i];
1192
1193 y0 +=
a0 * (x11-x21);
1194 y1 +=
a1 * (x11+x21);
1195 y2 +=
a0 * (x10-x20);
1196 y3 +=
a1 * (x10+x20);
1201
1202 y0 +=
a0 * (x10-x20);
1203 y1 +=
a1 * (x10+x20);
1204 y2 +=
a0 * (x11-x21);
1205 y3 +=
a1 * (x11+x21);
1206 }
1207 y[2 *
i ] = 2.f * y0;
1208 y[2 *
i+1] = 2.f * y1;
1209 y[2 *
i+2] = 2.f * y2;
1210 y[2 *
i+3] = 2.f * y3;
1211 }
1212
1213 for (
int i = 0;
i < M2;
i++)
1214 mem1[2*
i+1] = xx1[
i];
1215 for (
int i = 0;
i < M2;
i++)
1216 mem2[2*
i+1] = xx2[
i];
1217 }
1218
1221 {
1228 float *low_innov_alias;
1232
1234
1237 s->st[st->
modeID - 1].innov_save = low_innov_alias;
1241 }
1242
1246 else
1247 wideband = 0;
1248 if (wideband) { /* Regular wideband frame, read the submode */
1251 } else { /* Was a narrowband frame, set "null submode" */
1253 }
1256 }
1257
1258 /* If null mode (no transmission), just set a couple things to zero */
1262
1264
1265 /* Final signal synthesis from excitation */
1267
1269
1270 return 0;
1271 }
1272
1273 memcpy(low_pi_gain,
s->st[st->
modeID - 1].pi_gain,
sizeof(low_pi_gain));
1274 memcpy(low_exc_rms,
s->st[st->
modeID - 1].exc_rms,
sizeof(low_exc_rms));
1275
1277
1280
1282 float filter_ratio, el, rl, rh;
1283 float *innov_save =
NULL, *sp;
1284 float exc[80];
1286
1289 /* Pointer for saving innovation */
1293 }
1294
1298
1299 /* Calculate reponse ratio between the low and high filter in the middle
1300 of the band (4000 Hz) */
1302 rh = 1.f;
1304 rh += ak[
i + 1] - ak[
i];
1306 }
1307
1308 rl = low_pi_gain[sub];
1309 filter_ratio = (rl + .01f) / (rh + .01
f);
1310
1312 if (!
SUBMODE(innovation_unquant)) {
1314 const float g =
expf(.125
f * (x - 10)) / filter_ratio;
1315
1317 exc[
i ] =
mode->folding_gain * low_innov_alias[
offset +
i ] *
g;
1318 exc[
i + 1] = -
mode->folding_gain * low_innov_alias[
offset +
i + 1] *
g;
1319 }
1320 } else {
1322
1323 el = low_exc_rms[sub];
1325
1328
1329 scale = (gc * el) / filter_ratio;
1333
1335 if (
SUBMODE(double_codebook)) {
1336 float innov2[80];
1337
1342 exc[
i] += innov2[
i];
1343 }
1344 }
1345
1348 innov_save[2 *
i] = exc[
i];
1349 }
1350
1352 memcpy(st->
exc_buf, exc,
sizeof(exc));
1355 }
1356
1359
1361
1362 return 0;
1363 }
1364
1366 {
1369
1374
1382
1384
1389
1394
1395 return 0;
1396 }
1397
1399 const uint8_t *extradata, int extradata_size)
1400 {
1402 const uint8_t *buf =
av_strnstr(extradata,
"Speex ", extradata_size);
1403
1404 if (!buf)
1406
1407 buf += 28;
1408
1409 s->version_id = bytestream_get_le32(&buf);
1410 buf += 4;
1411 s->rate = bytestream_get_le32(&buf);
1414 s->mode = bytestream_get_le32(&buf);
1417 s->bitstream_version = bytestream_get_le32(&buf);
1418 if (
s->bitstream_version != 4)
1420 s->nb_channels = bytestream_get_le32(&buf);
1421 if (
s->nb_channels <= 0 ||
s->nb_channels > 2)
1423 s->bitrate = bytestream_get_le32(&buf);
1424 s->frame_size = bytestream_get_le32(&buf);
1426 s->frame_size > INT32_MAX >> (
s->mode > 0))
1428 s->frame_size <<= (
s->mode > 0);
1429 s->vbr = bytestream_get_le32(&buf);
1430 s->frames_per_packet = bytestream_get_le32(&buf);
1431 if (
s->frames_per_packet <= 0 ||
1432 s->frames_per_packet > 64 ||
1433 s->frames_per_packet >= INT32_MAX /
s->nb_channels /
s->frame_size)
1435 s->extra_headers = bytestream_get_le32(&buf);
1436
1437 return 0;
1438 }
1439
1441 {
1444
1448
1453 } else {
1457
1459 if (
s->nb_channels <= 0 ||
s->nb_channels > 2)
1461
1463 case 8000:
s->mode = 0;
break;
1464 case 16000:
s->mode = 1;
break;
1465 case 32000:
s->mode = 2;
break;
1466 default:
s->mode = 2;
1467 }
1468
1469 s->frames_per_packet = 64;
1471 }
1472
1475
1479 }
1480
1485 }
1486
1487 s->pkt_size = ((
const uint8_t[]){ 5, 10, 15, 20, 20, 28, 28, 38, 38, 46, 62 })[
quality];
1488
1494 s->frames_per_packet = 1;
1496 }
1497
1505
1506 for (
int m = 0; m <=
s->mode; m++) {
1510 }
1511
1512 s->stereo.balance = 1.f;
1513 s->stereo.e_ratio = .5f;
1514 s->stereo.smooth_left = 1.f;
1515 s->stereo.smooth_right = 1.f;
1516
1517 return 0;
1518 }
1519
1521 {
1522 float balance, e_left, e_right, e_ratio;
1523
1526
1527 /* These two are Q14, with max value just below 2. */
1528 e_right = 1.f /
sqrtf(e_ratio * (1.
f + balance));
1529 e_left =
sqrtf(balance) * e_right;
1530
1537 }
1538 }
1539
1541 int *got_frame_ptr,
AVPacket *avpkt)
1542 {
1544 int frames_per_packet =
s->frames_per_packet;
1545 const float scale = 1.f / 32768.f;
1546 int buf_size = avpkt->
size;
1549
1550 if (
s->pkt_size && avpkt->
size == 62)
1551 buf_size =
s->pkt_size;
1554
1555 frame->nb_samples =
FFALIGN(
s->frame_size * frames_per_packet, 4);
1558
1559 dst = (
float *)
frame->extended_data[0];
1560 for (
int i = 0;
i < frames_per_packet;
i++) {
1568 frames_per_packet =
i + 1;
1569 break;
1570 }
1571 }
1572
1573 dst = (
float *)
frame->extended_data[0];
1575 frame->nb_samples =
s->frame_size * frames_per_packet;
1576
1577 *got_frame_ptr = 1;
1578
1580 }
1581
1583 {
1586 return 0;
1587 }
1588
1600 };