1 /*
2 * Copyright (c) 2018 Gregor Richards
3 * Copyright (c) 2017 Mozilla
4 * Copyright (c) 2005-2009 Xiph.Org Foundation
5 * Copyright (c) 2007-2008 CSIRO
6 * Copyright (c) 2008-2011 Octasic Inc.
7 * Copyright (c) Jean-Marc Valin
8 * Copyright (c) 2019 Paul B Mahol
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * - Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * - Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
25 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
45
46 #define FRAME_SIZE_SHIFT 2
47 #define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
48 #define WINDOW_SIZE (2*FRAME_SIZE)
49 #define FREQ_SIZE (FRAME_SIZE + 1)
50
51 #define PITCH_MIN_PERIOD 60
52 #define PITCH_MAX_PERIOD 768
53 #define PITCH_FRAME_SIZE 960
54 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
55
56 #define SQUARE(x) ((x)*(x))
57
59
61 #define NB_DELTA_CEPS 6
62
63 #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
64
65 #define WEIGHTS_SCALE (1.f/256)
66
67 #define MAX_NEURONS 128
68
69 #define ACTIVATION_TANH 0
70 #define ACTIVATION_SIGMOID 1
71 #define ACTIVATION_RELU 2
72
74
82
91
95
98
101
104
107
111
118
135
138
141
144
147
149
152
153 #define F_ACTIVATION_TANH 0
154 #define F_ACTIVATION_SIGMOID 1
155 #define F_ACTIVATION_RELU 2
156
158 {
159 #define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0)
160 #define FREE_DENSE(name) do { \
161 if (model->name) { \
162 av_free((void *) model->name->input_weights); \
163 av_free((void *) model->name->bias); \
164 av_free((void *) model->name); \
165 } \
166 } while (0)
167 #define FREE_GRU(name) do { \
168 if (model->name) { \
169 av_free((void *) model->name->input_weights); \
170 av_free((void *) model->name->recurrent_weights); \
171 av_free((void *) model->name->bias); \
172 av_free((void *) model->name); \
173 } \
174 } while (0)
175
176 if (!model)
177 return;
185 }
186
188 {
196 int in;
197
198 if (fscanf(
f,
"rnnoise-nu model file version %d\n", &in) != 1 || in != 1)
200
204
205 #define ALLOC_LAYER(type, name) \
206 name = av_calloc(1, sizeof(type)); \
207 if (!name) { \
208 rnnoise_model_free(ret); \
209 return AVERROR(ENOMEM); \
210 } \
211 ret->name = name
212
219
220 #define INPUT_VAL(name) do { \
221 if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \
222 rnnoise_model_free(ret); \
223 return AVERROR(EINVAL); \
224 } \
225 name = in; \
226 } while (0)
227
228 #define INPUT_ACTIVATION(name) do { \
229 int activation; \
230 INPUT_VAL(activation); \
231 switch (activation) { \
232 case F_ACTIVATION_SIGMOID: \
233 name = ACTIVATION_SIGMOID; \
234 break; \
235 case F_ACTIVATION_RELU: \
236 name = ACTIVATION_RELU; \
237 break; \
238 default: \
239 name = ACTIVATION_TANH; \
240 } \
241 } while (0)
242
243 #define INPUT_ARRAY(name, len) do { \
244 float *values = av_calloc((len), sizeof(float)); \
245 if (!values) { \
246 rnnoise_model_free(ret); \
247 return AVERROR(ENOMEM); \
248 } \
249 name = values; \
250 for (int i = 0; i < (len); i++) { \
251 if (fscanf(f, "%d", &in) != 1) { \
252 rnnoise_model_free(ret); \
253 return AVERROR(EINVAL); \
254 } \
255 values[i] = in; \
256 } \
257 } while (0)
258
259 #define INPUT_ARRAY3(name, len0, len1, len2) do { \
260 float *values = av_calloc(FFALIGN((len0), 4) * FFALIGN((len1), 4) * (len2), sizeof(float)); \
261 if (!values) { \
262 rnnoise_model_free(ret); \
263 return AVERROR(ENOMEM); \
264 } \
265 name = values; \
266 for (int k = 0; k < (len0); k++) { \
267 for (int i = 0; i < (len2); i++) { \
268 for (int j = 0; j < (len1); j++) { \
269 if (fscanf(f, "%d", &in) != 1) { \
270 rnnoise_model_free(ret); \
271 return AVERROR(EINVAL); \
272 } \
273 values[j * (len2) * FFALIGN((len0), 4) + i * FFALIGN((len0), 4) + k] = in; \
274 } \
275 } \
276 } \
277 } while (0)
278
279 #define NEW_LINE() do { \
280 int c; \
281 while ((c = fgetc(f)) != EOF) { \
282 if (c == '\n') \
283 break; \
284 } \
285 } while (0)
286
287 #define INPUT_DENSE(name) do { \
288 INPUT_VAL(name->nb_inputs); \
289 INPUT_VAL(name->nb_neurons); \
290 ret->name ## _size = name->nb_neurons; \
291 INPUT_ACTIVATION(name->activation); \
292 NEW_LINE(); \
293 INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \
294 NEW_LINE(); \
295 INPUT_ARRAY(name->bias, name->nb_neurons); \
296 NEW_LINE(); \
297 } while (0)
298
299 #define INPUT_GRU(name) do { \
300 INPUT_VAL(name->nb_inputs); \
301 INPUT_VAL(name->nb_neurons); \
302 ret->name ## _size = name->nb_neurons; \
303 INPUT_ACTIVATION(name->activation); \
304 NEW_LINE(); \
305 INPUT_ARRAY3(name->input_weights, name->nb_inputs, name->nb_neurons, 3); \
306 NEW_LINE(); \
307 INPUT_ARRAY3(name->recurrent_weights, name->nb_neurons, name->nb_neurons, 3); \
308 NEW_LINE(); \
309 INPUT_ARRAY(name->bias, name->nb_neurons * 3); \
310 NEW_LINE(); \
311 } while (0)
312
319
323 }
324
326
327 return 0;
328 }
329
333 {
337 };
339
343
345 }
346
348 {
352
353 s->channels =
inlink->ch_layout.nb_channels;
354
359
360 for (
int i = 0;
i <
s->channels;
i++) {
362
371 }
372
373 for (
int i = 0;
i <
s->channels;
i++) {
376
381
386 }
387
389 }
390
391 static void biquad(
float *y,
float mem[2],
const float *x,
392 const float *
b,
const float *
a,
int N)
393 {
394 for (
int i = 0;
i <
N;
i++) {
396
399 mem[0] = mem[1] + (
b[0]*
xi -
a[0]*yi);
400 mem[1] = (
b[1]*
xi -
a[1]*yi);
402 }
403 }
404
405 #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
406 #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
407 #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
408
410 {
413
417 }
418
420
422 }
423
425 {
428
430
434 }
435
437
440 }
441
443 /*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 20k*/
444 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
445 };
446
448 {
450
452 int band_size;
453
455 for (int j = 0; j < band_size; j++) {
456 float tmp, frac = (
float)j / band_size;
457
460 sum[
i] += (1.f - frac) *
tmp;
461 sum[
i + 1] += frac *
tmp;
462 }
463 }
464
465 sum[0] *= 2;
467
470 }
471
473 {
475
477 int band_size;
478
480 for (int j = 0; j < band_size; j++) {
481 float tmp, frac = (
float)j / band_size;
482
485 sum[
i] += (1 - frac) *
tmp;
486 sum[
i + 1] += frac *
tmp;
487 }
488 }
489
490 sum[0] *= 2;
492
495 }
496
498 {
500
507 }
508
510 {
513 const float mix =
s->mix;
514 const float imix = 1.f -
FFMAX(
mix, 0.
f);
515
521
524 }
525
526 static inline void xcorr_kernel(
const float *x,
const float *y,
float sum[4],
int len)
527 {
528 float y_0, y_1, y_2, y_3 = 0;
529 int j;
530
531 y_0 = *y++;
532 y_1 = *y++;
533 y_2 = *y++;
534
535 for (j = 0; j <
len - 3; j += 4) {
537
539 y_3 = *y++;
545 y_0 = *y++;
551 y_1 = *y++;
557 y_2 = *y++;
562 }
563
566
567 y_3 = *y++;
572 }
573
576
577 y_0 = *y++;
582 }
583
586
587 y_1 = *y++;
592 }
593 }
594
596 const float *y,
int N)
597 {
598 float xy = 0.f;
599
600 for (
int i = 0;
i <
N;
i++)
602
603 return xy;
604 }
605
607 float *xcorr,
int len,
int max_pitch)
608 {
610
611 for (
i = 0;
i < max_pitch - 3;
i += 4) {
612 float sum[4] = { 0, 0, 0, 0};
613
615
617 xcorr[
i + 1] = sum[1];
618 xcorr[
i + 2] = sum[2];
619 xcorr[
i + 3] = sum[3];
620 }
621 /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
622 for (;
i < max_pitch;
i++) {
624 }
625 }
626
628 float *ac, /* out: [0...lag-1] ac values */
630 int overlap,
631 int lag,
632 int n)
633 {
634 int fastN = n - lag;
636 const float *xptr;
638
639 if (overlap == 0) {
640 xptr = x;
641 } else {
642 for (
int i = 0;
i < n;
i++)
644 for (
int i = 0;
i < overlap;
i++) {
647 }
648 xptr = xx;
649 }
650
653
654 for (int k = 0; k <= lag; k++) {
655 float d = 0.f;
656
657 for (
int i = k + fastN;
i < n;
i++)
658 d += xptr[
i] * xptr[
i-k];
659 ac[k] += d;
660 }
661
663 }
664
665 static void celt_lpc(
float *lpc,
/* out: [0...p-1] LPC coefficients */
666 const float *ac, /* in: [0...p] autocorrelation values */
668 {
670
672 if (ac[0] != 0) {
673 for (
int i = 0;
i <
p;
i++) {
674 /* Sum up this iteration's reflection coefficient */
675 float rr = 0;
676 for (
int j = 0; j <
i; j++)
677 rr += (lpc[j] * ac[
i - j]);
680 /* Update LPC coefficients and total error */
682 for (
int j = 0; j < (
i + 1) >> 1; j++) {
683 float tmp1, tmp2;
684 tmp1 = lpc[j];
686 lpc[j] = tmp1 + (
r*tmp2);
687 lpc[
i-1-j] = tmp2 + (
r*tmp1);
688 }
689
691 /* Bail out once we get 30 dB gain */
692 if (
error < .001
f * ac[0])
693 break;
694 }
695 }
696 }
697
699 const float *num,
700 float *y,
702 float *mem)
703 {
704 float num0, num1, num2, num3, num4;
705 float mem0, mem1, mem2, mem3, mem4;
706
707 num0 = num[0];
708 num1 = num[1];
709 num2 = num[2];
710 num3 = num[3];
711 num4 = num[4];
712 mem0 = mem[0];
713 mem1 = mem[1];
714 mem2 = mem[2];
715 mem3 = mem[3];
716 mem4 = mem[4];
717
718 for (
int i = 0;
i <
N;
i++) {
720
721 sum += (num0*mem0);
722 sum += (num1*mem1);
723 sum += (num2*mem2);
724 sum += (num3*mem3);
725 sum += (num4*mem4);
726 mem4 = mem3;
727 mem3 = mem2;
728 mem2 = mem1;
729 mem1 = mem0;
732 }
733
734 mem[0] = mem0;
735 mem[1] = mem1;
736 mem[2] = mem2;
737 mem[3] = mem3;
738 mem[4] = mem4;
739 }
740
743 {
744 float ac[5];
746 float lpc[4], mem[5]={0,0,0,0,0};
747 float lpc2[5];
749
750 for (
int i = 1; i < len >> 1;
i++)
751 x_lp[
i] = .5
f * (.5
f * (x[0][(2*
i-1)]+x[0][(2*
i+1)])+x[0][2*
i]);
752 x_lp[0] = .5f * (.5f * (x[0][1])+x[0][0]);
754 for (
int i = 1; i < len >> 1;
i++)
755 x_lp[
i] += (.5
f * (.5
f * (x[1][(2*
i-1)]+x[1][(2*
i+1)])+x[1][2*
i]));
756 x_lp[0] += .5f * (.5f * (x[1][1])+x[1][0]);
757 }
758
760
761 /* Noise floor -40 dB */
762 ac[0] *= 1.0001f;
763 /* Lag windowing */
764 for (
int i = 1;
i <= 4;
i++) {
765 /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
766 ac[
i] -= ac[
i]*(.008f*
i)*(.008
f*
i);
767 }
768
770 for (
int i = 0;
i < 4;
i++) {
772 lpc[
i] = (lpc[
i] *
tmp);
773 }
774 /* Add a zero */
775 lpc2[0] = lpc[0] + .8f;
776 lpc2[1] = lpc[1] + (
c1 * lpc[0]);
777 lpc2[2] = lpc[2] + (
c1 * lpc[1]);
778 lpc2[3] = lpc[3] + (
c1 * lpc[2]);
779 lpc2[4] = (
c1 * lpc[3]);
781 }
782
783 static inline void dual_inner_prod(
const float *x,
const float *y01,
const float *y02,
784 int N,
float *xy1,
float *xy2)
785 {
786 float xy01 = 0, xy02 = 0;
787
788 for (
int i = 0;
i <
N;
i++) {
789 xy01 += (x[
i] * y01[
i]);
790 xy02 += (x[
i] * y02[
i]);
791 }
792
793 *xy1 = xy01;
794 *xy2 = xy02;
795 }
796
798 {
799 return xy /
sqrtf(1.
f + xx * yy);
800 }
801
802 static const uint8_t
second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
804 int *T0_, int prev_period, float prev_gain)
805 {
808 float pg;
809 float xy,xx,yy,xy2;
810 float xcorr[3];
811 float best_xy, best_yy;
813 int minperiod0;
815
816 minperiod0 = minperiod;
817 maxperiod /= 2;
818 minperiod /= 2;
819 *T0_ /= 2;
820 prev_period /= 2;
822 x += maxperiod;
823 if (*T0_>=maxperiod)
824 *T0_=maxperiod-1;
825
828 yy_lookup[0] = xx;
829 yy=xx;
830 for (
i = 1;
i <= maxperiod;
i++) {
831 yy = yy+(x[-
i] * x[-
i])-(x[
N-
i] * x[
N-
i]);
832 yy_lookup[
i] =
FFMAX(0, yy);
833 }
834 yy = yy_lookup[T0];
835 best_xy = xy;
836 best_yy = yy;
838 /* Look for any pitch at T/k */
839 for (k = 2; k <= 15; k++) {
840 int T1, T1b;
841 float g1;
842 float cont=0;
843 float thresh;
844 T1 = (2*T0+k)/(2*k);
845 if (T1 < minperiod)
846 break;
847 /* Look for another strong correlation at T1b */
848 if (k==2)
849 {
850 if (T1+T0>maxperiod)
851 T1b = T0;
852 else
853 T1b = T0+T1;
854 } else
855 {
857 }
859 xy = .5f * (xy + xy2);
860 yy = .5f * (yy_lookup[T1] + yy_lookup[T1b]);
862 if (
FFABS(T1-prev_period)<=1)
863 cont = prev_gain;
864 else if (
FFABS(T1-prev_period)<=2 && 5 * k * k < T0)
865 cont = prev_gain * .5f;
866 else
867 cont = 0;
868 thresh =
FFMAX(.3
f, (.7
f * g0) - cont);
869 /* Bias against very high pitch (very short period) to avoid false-positives
870 due to short-term correlation */
871 if (T1<3*minperiod)
872 thresh =
FFMAX(.4
f, (.85
f * g0) - cont);
873 else if (T1<2*minperiod)
874 thresh =
FFMAX(.5
f, (.9
f * g0) - cont);
875 if (g1 > thresh)
876 {
877 best_xy = xy;
878 best_yy = yy;
881 }
882 }
883 best_xy =
FFMAX(0, best_xy);
884 if (best_yy <= best_xy)
886 else
887 pg = best_xy/(best_yy + 1);
888
889 for (k = 0; k < 3; k++)
891 if ((xcorr[2]-xcorr[0]) > .7f * (xcorr[1]-xcorr[0]))
893 else if ((xcorr[0]-xcorr[2]) > (.7f * (xcorr[1] - xcorr[2])))
895 else
900
901 if (*T0_<minperiod0)
902 *T0_=minperiod0;
903 return pg;
904 }
905
907 int max_pitch, int *best_pitch)
908 {
909 float best_num[2];
910 float best_den[2];
911 float Syy = 1.f;
912
913 best_num[0] = -1;
914 best_num[1] = -1;
915 best_den[0] = 0;
916 best_den[1] = 0;
917 best_pitch[0] = 0;
918 best_pitch[1] = 1;
919
920 for (
int j = 0; j <
len; j++)
921 Syy += y[j] * y[j];
922
923 for (
int i = 0;
i < max_pitch;
i++) {
925 float num;
926 float xcorr16;
927
929 /* Considering the range of xcorr16, this should avoid both underflows
930 and overflows (inf) when squaring xcorr16 */
932 num = xcorr16 * xcorr16;
933 if ((num * best_den[1]) > (best_num[1] * Syy)) {
934 if ((num * best_den[0]) > (best_num[0] * Syy)) {
935 best_num[1] = best_num[0];
936 best_den[1] = best_den[0];
937 best_pitch[1] = best_pitch[0];
938 best_num[0] = num;
939 best_den[0] = Syy;
941 } else {
942 best_num[1] = num;
943 best_den[1] = Syy;
945 }
946 }
947 }
950 }
951 }
952
954 int len,
int max_pitch,
int *pitch)
955 {
956 int lag;
957 int best_pitch[2]={0,0};
959
963
965
966 /* Downsample by 2 again */
967 for (int j = 0; j < len >> 2; j++)
968 x_lp4[j] = x_lp[2*j];
969 for (int j = 0; j < lag >> 2; j++)
970 y_lp4[j] = y[2*j];
971
972 /* Coarse search with 4x decimation */
973
975
977
978 /* Finer search with 2x decimation */
979 for (
int i = 0; i < max_pitch >> 1;
i++) {
980 float sum;
982 if (
FFABS(
i-2*best_pitch[0])>2 &&
FFABS(
i-2*best_pitch[1])>2)
983 continue;
985 xcorr[
i] =
FFMAX(-1, sum);
986 }
987
989
990 /* Refine by pseudo-interpolation */
991 if (best_pitch[0] > 0 && best_pitch[0] < (max_pitch >> 1) - 1) {
993
994 a = xcorr[best_pitch[0] - 1];
995 b = xcorr[best_pitch[0]];
996 c = xcorr[best_pitch[0] + 1];
997 if (
c -
a > .7
f * (
b -
a))
999 else if (
a -
c > .7
f * (
b-
c))
1001 else
1003 } else {
1005 }
1006
1007 *pitch = 2 * best_pitch[0] -
offset;
1008 }
1009
1011 {
1013 float sum;
1014
1017 }
1018 }
1019
1021 float *Ex, float *Ep, float *Exp, float *features, const float *in)
1022 {
1024 float *ceps_0, *ceps_1, *ceps_2;
1025 float spec_variability = 0;
1029 int pitch_index;
1030 float gain;
1031 float *(pre[1]);
1033 float follow, logMax;
1034
1043
1048
1051
1056
1059
1061
1064
1068 logMax = -2;
1069 follow = -2;
1070
1074 logMax =
FFMAX(logMax, Ly[
i]);
1075 follow =
FFMAX(follow-1.5, Ly[
i]);
1077 }
1078
1080 /* If there's no audio, avoid messing up the state. */
1082 return 1;
1083 }
1084
1085 dct(
s, features, Ly);
1086 features[0] -= 12;
1087 features[1] -= 4;
1091
1093 ceps_0[
i] = features[
i];
1094
1097 features[
i] = ceps_0[
i] + ceps_1[
i] + ceps_2[
i];
1100 }
1101 /* Spectral variability features. */
1104
1106 float mindist = 1e15f;
1107 for (
int j = 0; j <
CEPS_MEM; j++) {
1108 float dist = 0.f;
1109 for (
int k = 0; k <
NB_BANDS; k++) {
1111
1114 }
1115
1117 mindist =
FFMIN(mindist, dist);
1118 }
1119
1120 spec_variability += mindist;
1121 }
1122
1124
1125 return 0;
1126 }
1127
1129 {
1131
1134
1135 for (int j = 0; j < band_size; j++) {
1136 float frac = (
float)j / band_size;
1137
1139 }
1140 }
1141 }
1142
1144 const float *Exp,
const float *
g)
1145 {
1151
1153 if (Exp[
i]>
g[
i])
r[
i] = 1;
1157 }
1160 X[
i].re += rf[
i]*
P[
i].re;
1161 X[
i].im += rf[
i]*
P[
i].im;
1162 }
1165 norm[
i] =
sqrtf(Ex[
i] / (1e-8+newE[
i]));
1166 }
1169 X[
i].re *= normf[
i];
1170 X[
i].im *= normf[
i];
1171 }
1172 }
1173
1175 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
1176 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
1177 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
1178 0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
1179 0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
1180 0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
1181 0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
1182 0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
1183 0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
1184 0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
1185 0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
1186 0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
1187 0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
1188 0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
1189 0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
1190 0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
1191 0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
1192 0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
1193 0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
1194 0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
1195 0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
1196 0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
1197 0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
1198 0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
1199 0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
1200 0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
1201 0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
1202 0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
1203 0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
1204 0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
1205 0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
1206 0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
1207 0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
1208 0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
1209 0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
1210 0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
1211 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
1212 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
1213 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1214 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
1215 1.000000f,
1216 };
1217
1219 {
1220 float y, dy;
1221 float sign=1;
1223
1224 /* Tests are reversed to catch NaNs */
1225 if (!(x<8))
1226 return 1;
1227 if (!(x>-8))
1228 return -1;
1229 /* Another check in case of -ffast-math */
1230
1232 return 0;
1233
1234 if (x < 0) {
1235 x=-x;
1236 sign=-1;
1237 }
1241 dy = 1-y*y;
1242 y = y + x*dy*(1 - y*x);
1243 return sign*y;
1244 }
1245
1247 {
1249 }
1250
1252 {
1254
1255 for (
int i = 0;
i <
N;
i++) {
1256 /* Compute update gate. */
1257 float sum = layer->
bias[
i];
1258
1259 for (
int j = 0; j <
M; j++)
1261
1263 }
1264
1266 for (
int i = 0;
i <
N;
i++)
1269 for (
int i = 0;
i <
N;
i++)
1272 for (
int i = 0;
i <
N;
i++)
1274 } else {
1276 }
1277 }
1278
1280 {
1288 const int stride = 3 * AN, istride = 3 * AM;
1289
1290 for (
int i = 0;
i <
N;
i++) {
1291 /* Compute update gate. */
1292 float sum = gru->
bias[
i];
1293
1297 }
1298
1299 for (
int i = 0;
i <
N;
i++) {
1300 /* Compute reset gate. */
1301 float sum = gru->
bias[
N +
i];
1302
1306 }
1307
1308 for (
int i = 0;
i <
N;
i++) {
1309 /* Compute output. */
1310 float sum = gru->
bias[2 *
N +
i];
1311
1313 for (
int j = 0; j <
N; j++)
1315
1322 else
1325 }
1326
1328 }
1329
1330 #define INPUT_SIZE 42
1331
1333 {
1337
1341
1347
1349
1355
1358 }
1359
1361 int disabled)
1362 {
1371 float vad_prob = 0;
1373 static const float a_hp[2] = {-1.99599, 0.99600};
1374 static const float b_hp[2] = {-2, 1};
1375 int silence;
1376
1379
1380 if (!silence && !disabled) {
1385
1388 }
1389
1391
1395 }
1396 }
1397
1399 memcpy(history, in,
FRAME_SIZE *
sizeof(*history));
1400
1401 return vad_prob;
1402 }
1403
1407
1409 {
1414 const int start = (
out->ch_layout.nb_channels * jobnr) / nb_jobs;
1415 const int end = (
out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
1416
1417 for (int ch = start; ch < end; ch++) {
1419 (
float *)
out->extended_data[ch],
1422 }
1423
1424 return 0;
1425 }
1426
1428 {
1433
1438 }
1440
1444
1447 }
1448
1450 {
1455
1457
1461
1464
1467
1469 }
1470
1472 {
1476
1483 }
1484
1487 if (!*model ||
ret < 0)
1489
1490 return 0;
1491 }
1492
1494 {
1497
1501
1505
1509 }
1510
1512 for (
int j = 0; j <
NB_BANDS; j++) {
1514 if (j == 0)
1515 s->dct_table[j][
i] *=
sqrtf(.5);
1516 }
1517 }
1518
1519 return 0;
1520 }
1521
1523 {
1525
1528
1529 for (
int ch = 0; ch <
s->channels &&
s->st; ch++) {
1530 av_freep(&
s->st[ch].rnn[n].vad_gru_state);
1531 av_freep(&
s->st[ch].rnn[n].noise_gru_state);
1532 av_freep(&
s->st[ch].rnn[n].denoise_gru_state);
1533 }
1534 }
1535
1537 char *res,
int res_len,
int flags)
1538 {
1541
1545
1549
1551 for (
int ch = 0; ch <
s->channels; ch++)
1553
1556 for (
int ch = 0; ch <
s->channels; ch++)
1560 }
1561
1563 return 0;
1564 }
1565
1567 {
1569
1572 for (
int ch = 0; ch <
s->channels &&
s->st; ch++) {
1575 }
1577 }
1578
1580 {
1584 },
1585 };
1586
1587 #define OFFSET(x) offsetof(AudioRNNContext, x)
1588 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
1589
1595 };
1596
1598
1601 .p.description =
NULL_IF_CONFIG_SMALL(
"Reduce noise from speech using Recurrent Neural Networks."),
1602 .p.priv_class = &arnndn_class,
1613 };