1 /*
2 * Dynamic Audio Normalizer
3 * Copyright (c) 2015 LoRd_MuldeR <mulder2@gmx.de>. Some rights reserved.
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * Dynamic Audio Normalizer
25 */
26
28
31
32 #define MIN_FILTER_SIZE 3
33 #define MAX_FILTER_SIZE 301
34
35 #define FF_BUFQUEUE_SIZE (MAX_FILTER_SIZE + 1)
37
42
47
54
57
59
66
76
80
85
88
89 #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x)
90 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
91
114 };
115
117
119 {
121
122 if (!(
s->filter_size & 1)) {
125 }
126
127 return 0;
128 }
129
131 {
134 }
135
137 {
139
142
144 if (!q)
146
150
155 }
156
157 return q;
158 }
159
161 {
162 if (q)
165 }
166
168 {
170 }
171
173 {
175 }
176
178 {
180
183
184 return 0;
185 }
186
188 {
191 }
192
194 {
196
200
201 return 0;
202 }
203
205 {
207
210
211 return 0;
212 }
213
215 {
218
221
223 for (
int i = 0;
i < side;
i++)
226 } else {
227 int count = (q->
size - new_size + 1) / 2;
228
229 while (count-- > 0)
231 }
232
234 }
235
237 {
238 double total_weight = 0.0;
239 const double sigma = (((
s->filter_size / 2.0) - 1.0) / 3.0) + (1.0 / 3.0);
242
243 // Pre-compute constants
244 const int offset =
s->filter_size / 2;
245 const double c1 = 1.0 / (sigma * sqrt(2.0 *
M_PI));
246 const double c2 = 2.0 * sigma * sigma;
247
248 // Compute weights
249 for (
i = 0;
i <
s->filter_size;
i++) {
251
253 total_weight +=
s->weights[
i];
254 }
255
256 // Adjust weights
257 adjust = 1.0 / total_weight;
258 for (
i = 0;
i <
s->filter_size;
i++) {
260 }
261 }
262
264 {
267
271
272 for (
c = 0;
c <
s->channels;
c++) {
273 if (
s->gain_history_original)
275 if (
s->gain_history_minimum)
277 if (
s->gain_history_smoothed)
279 if (
s->threshold_history)
281 }
282
287
289 s->is_enabled =
NULL;
290
292
294 }
295
297 {
301
303
304 s->channels =
inlink->channels;
307
309 s->dc_correction_value =
av_calloc(
inlink->channels,
sizeof(*
s->dc_correction_value));
310 s->compress_threshold =
av_calloc(
inlink->channels,
sizeof(*
s->compress_threshold));
311 s->gain_history_original =
av_calloc(
inlink->channels,
sizeof(*
s->gain_history_original));
312 s->gain_history_minimum =
av_calloc(
inlink->channels,
sizeof(*
s->gain_history_minimum));
313 s->gain_history_smoothed =
av_calloc(
inlink->channels,
sizeof(*
s->gain_history_smoothed));
314 s->threshold_history =
av_calloc(
inlink->channels,
sizeof(*
s->threshold_history));
317 if (!
s->prev_amplification_factor || !
s->dc_correction_value ||
318 !
s->compress_threshold ||
319 !
s->gain_history_original || !
s->gain_history_minimum ||
320 !
s->gain_history_smoothed || !
s->threshold_history ||
321 !
s->is_enabled || !
s->weights)
323
325 s->prev_amplification_factor[
c] = 1.0;
326
331
332 if (!
s->gain_history_original[
c] || !
s->gain_history_minimum[
c] ||
333 !
s->gain_history_smoothed[
c] || !
s->threshold_history[
c])
335 }
336
338
339 return 0;
340 }
341
342 static inline double fade(
double prev,
double next,
int pos,
int length)
343 {
344 const double step_size = 1.0 / length;
345 const double f0 = 1.0 - (step_size * (
pos + 1.0));
346 const double f1 = 1.0 - f0;
347 return f0 * prev + f1 * next;
348 }
349
351 {
353 }
354
355 static inline double bound(
const double threshold,
const double val)
356 {
357 const double CONST = 0.8862269254527580136490837416705725913987747280611935;
//sqrt(PI) / 2.0
359 }
360
362 {
363 double max = DBL_EPSILON;
365
367 for (
c = 0;
c <
frame->channels;
c++) {
368 double *data_ptr = (
double *)
frame->extended_data[
c];
369
372 }
373 } else {
374 double *data_ptr = (
double *)
frame->extended_data[
channel];
375
378 }
379
381 }
382
384 {
385 double rms_value = 0.0;
387
389 for (
c = 0;
c <
frame->channels;
c++) {
390 const double *data_ptr = (
double *)
frame->extended_data[
c];
391
393 rms_value +=
pow_2(data_ptr[
i]);
394 }
395 }
396
397 rms_value /=
frame->nb_samples *
frame->channels;
398 } else {
399 const double *data_ptr = (
double *)
frame->extended_data[
channel];
401 rms_value +=
pow_2(data_ptr[
i]);
402 }
403
404 rms_value /=
frame->nb_samples;
405 }
406
407 return FFMAX(sqrt(rms_value), DBL_EPSILON);
408 }
409
412 {
414 const double maximum_gain =
s->peak_value / peak_magnitude;
417
418 gain.
threshold = peak_magnitude >
s->threshold;
419 gain.max_gain =
bound(
s->max_amplification,
FFMIN(maximum_gain, rms_gain));
420
421 return gain;
422 }
423
425 {
426 double min = DBL_MAX;
428
431 }
432
434 }
435
437 {
438 double result = 0.0, tsum = 0.0;
440
444 }
445
446 if (tsum == 0.0)
448
450 }
451
454 {
456 const int pre_fill_size =
s->filter_size / 2;
458
459 s->prev_amplification_factor[
channel] = initial_value;
460
464 }
465 }
466
468
471
473 const int pre_fill_size =
s->filter_size / 2;
474 double initial_value =
s->alt_boundary_mode ?
cqueue_peek(
s->gain_history_original[
channel], 0) : 1.0;
475 int input = pre_fill_size;
476
481 }
482 }
483
485
487
489
491 }
492
494 double smoothed,
limit;
495
499
501
504 }
505 }
506
507 static inline double update_value(
double new,
double old,
double aggressiveness)
508 {
509 av_assert0((aggressiveness >= 0.0) && (aggressiveness <= 1.0));
510 return aggressiveness * new + (1.0 - aggressiveness) * old;
511 }
512
514 {
515 const double diff = 1.0 /
frame->nb_samples;
516 int is_first_frame =
cqueue_empty(
s->gain_history_original[0]);
518
519 for (
c = 0;
c <
s->channels;
c++) {
520 double *dst_ptr = (
double *)
frame->extended_data[
c];
521 double current_average_value = 0.0;
522 double prev_value;
523
525 current_average_value += dst_ptr[
i] *
diff;
526
527 prev_value = is_first_frame ? current_average_value :
s->dc_correction_value[
c];
528 s->dc_correction_value[
c] = is_first_frame ? current_average_value :
update_value(current_average_value,
s->dc_correction_value[
c], 0.1);
529
530 for (
i = 0;
i <
frame->nb_samples;
i++) {
531 dst_ptr[
i] -=
fade(prev_value,
s->dc_correction_value[
c],
i,
frame->nb_samples);
532 }
533 }
534 }
535
537 {
538 if ((threshold > DBL_EPSILON) && (threshold < (1.0 - DBL_EPSILON))) {
539 double current_threshold = threshold;
540 double step_size = 1.0;
541
542 while (step_size > DBL_EPSILON) {
543 while ((
llrint((current_threshold + step_size) * (UINT64_C(1) << 63)) >
544 llrint(current_threshold * (UINT64_C(1) << 63))) &&
545 (
bound(current_threshold + step_size, 1.0) <= threshold)) {
546 current_threshold += step_size;
547 }
548
549 step_size /= 2.0;
550 }
551
552 return current_threshold;
553 } else {
554 return threshold;
555 }
556 }
557
560 {
561 double variance = 0.0;
563
565 for (
c = 0;
c <
s->channels;
c++) {
566 const double *data_ptr = (
double *)
frame->extended_data[
c];
567
569 variance +=
pow_2(data_ptr[
i]);
// Assume that MEAN is *zero*
570 }
571 }
572 variance /= (
s->channels *
frame->nb_samples) - 1;
573 } else {
574 const double *data_ptr = (
double *)
frame->extended_data[
channel];
575
577 variance +=
pow_2(data_ptr[
i]);
// Assume that MEAN is *zero*
578 }
579 variance /=
frame->nb_samples - 1;
580 }
581
582 return FFMAX(sqrt(variance), DBL_EPSILON);
583 }
584
586 {
587 int is_first_frame =
cqueue_empty(
s->gain_history_original[0]);
589
590 if (
s->channels_coupled) {
592 const double current_threshold =
FFMIN(1.0,
s->compress_factor * standard_deviation);
593
594 const double prev_value = is_first_frame ? current_threshold :
s->compress_threshold[0];
595 double prev_actual_thresh, curr_actual_thresh;
596 s->compress_threshold[0] = is_first_frame ? current_threshold :
update_value(current_threshold,
s->compress_threshold[0], (1.0/3.0));
597
600
601 for (
c = 0;
c <
s->channels;
c++) {
602 double *
const dst_ptr = (
double *)
frame->extended_data[
c];
604 const double localThresh =
fade(prev_actual_thresh, curr_actual_thresh,
i,
frame->nb_samples);
606 }
607 }
608 } else {
609 for (
c = 0;
c <
s->channels;
c++) {
612
613 const double prev_value = is_first_frame ? current_threshold :
s->compress_threshold[
c];
614 double prev_actual_thresh, curr_actual_thresh;
615 double *dst_ptr;
616 s->compress_threshold[
c] = is_first_frame ? current_threshold :
update_value(current_threshold,
s->compress_threshold[
c], 1.0/3.0);
617
620
621 dst_ptr = (
double *)
frame->extended_data[
c];
623 const double localThresh =
fade(prev_actual_thresh, curr_actual_thresh,
i,
frame->nb_samples);
625 }
626 }
627 }
628 }
629
631 {
632 if (
s->dc_correction) {
634 }
635
636 if (
s->compress_factor > DBL_EPSILON) {
638 }
639
640 if (
s->channels_coupled) {
643
644 for (
c = 0;
c <
s->channels;
c++)
646 } else {
648
649 for (
c = 0;
c <
s->channels;
c++)
651 }
652 }
653
656 {
658
659 for (
c = 0;
c <
s->channels;
c++) {
661 double *dst_ptr = (
double *)
frame->extended_data[
c];
662 double current_amplification_factor;
663
665
666 for (
i = 0;
i <
frame->nb_samples && enabled;
i++) {
667 const double amplification_factor =
fade(
s->prev_amplification_factor[
c],
668 current_amplification_factor,
i,
670
671 dst_ptr[
i] = src_ptr[
i] * amplification_factor;
672 }
673
674 s->prev_amplification_factor[
c] = current_amplification_factor;
675 }
676 }
677
679 {
684
685 while (((
s->queue.available >=
s->filter_size) ||
686 (
s->eof &&
s->queue.available)) &&
690 double is_enabled;
691
693
696 } else {
701 }
703 }
704
711 }
712
717 } else {
719 }
720
722 }
723
726 {
729
732
733 for (
c = 0;
c <
s->channels;
c++) {
734 double *dst_ptr = (
double *)
out->extended_data[
c];
735
737 dst_ptr[
i] =
s->alt_boundary_mode ? DBL_EPSILON : ((
s->target_rms > DBL_EPSILON) ?
FFMIN(
s->peak_value,
s->target_rms) :
s->peak_value);
738 if (
s->dc_correction) {
739 dst_ptr[
i] *= ((
i % 2) == 1) ? -1 : 1;
740 dst_ptr[
i] +=
s->dc_correction_value[
c];
741 }
742 }
743 }
744
746 }
747
749 {
754
757 }
else if (
s->queue.available) {
759
763 }
764
766 }
767
769 {
776
778
787 }
788
791 return 0;
792 }
793 }
794
798 }
799
800 if (
s->eof &&
s->queue.available)
801 return flush(outlink);
802
803 if (
s->eof && !
s->queue.available) {
805 return 0;
806 }
807
810
812 }
813
815 char *res,
int res_len,
int flags)
816 {
819 int prev_filter_size =
s->filter_size;
821
825
827 if (prev_filter_size !=
s->filter_size) {
829
830 for (
int c = 0;
c <
s->channels;
c++) {
834 }
835 }
836
838
839 return 0;
840 }
841
843 {
847 },
848 };
849
851 {
854 },
855 };
856
858 .
name =
"dynaudnorm",
867 .priv_class = &dynaudnorm_class,
870 };