1 /*
2 * Copyright (c) 2017 Richard Ling
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /*
22 * Normalize RGB video (aka histogram stretching, contrast stretching).
23 * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
24 *
25 * For each channel of each frame, the filter computes the input range and maps
26 * it linearly to the user-specified output range. The output range defaults
27 * to the full dynamic range from pure black to pure white.
28 *
29 * Naively maximising the dynamic range of each frame of video in isolation
30 * may cause flickering (rapid changes in brightness of static objects in the
31 * scene) when small dark or bright objects enter or leave the scene. This
32 * filter can apply temporal smoothing to the input range to reduce flickering.
33 * Temporal smoothing is similar to the auto-exposure (automatic gain control)
34 * on a video camera, which performs the same function; and, like a video
35 * camera, it may cause a period of over- or under-exposure of the video.
36 *
37 * The filter can normalize the R,G,B channels independently, which may cause
38 * color shifting, or link them together as a single channel, which prevents
39 * color shifting. More precisely, linked normalization preserves hue (as it's
40 * defined in HSV/HSL color spaces) while independent normalization does not.
41 * Independent normalization can be used to remove color casts, such as the
42 * blue cast from underwater video, restoring more natural colors. The filter
43 * can also combine independent and linked normalization in any ratio.
44 *
45 * Finally the overall strength of the filter can be adjusted, from no effect
46 * to full normalization.
47 *
48 * The 5 AVOptions are:
49 * blackpt, Colors which define the output range. The minimum input value
50 * whitept is mapped to the blackpt. The maximum input value is mapped to
51 * the whitept. The defaults are black and white respectively.
52 * Specifying white for blackpt and black for whitept will give
53 * color-inverted, normalized video. Shades of grey can be used
54 * to reduce the dynamic range (contrast). Specifying saturated
55 * colors here can create some interesting effects.
56 *
57 * smoothing The amount of temporal smoothing, expressed in frames (>=0).
58 * the minimum and maximum input values of each channel are
59 * smoothed using a rolling average over the current frame and
60 * that many previous frames of video. Defaults to 0 (no temporal
61 * smoothing).
62 *
63 * independence
64 * Controls the ratio of independent (color shifting) channel
65 * normalization to linked (color preserving) normalization. 0.0
66 * is fully linked, 1.0 is fully independent. Defaults to fully
67 * independent.
68 *
69 * strength Overall strength of the filter. 1.0 is full strength. 0.0 is
70 * a rather expensive no-op. Values in between can give a gentle
71 * boost to low-contrast video without creating an artificial
72 * over-processed look. The default is full strength.
73 */
74
84
89
91 uint16_t
in;
// Original input byte value for this frame.
92 float smoothed;
// Smoothed input value [0,255].
93 float out;
// Output value [0,255]
95
98
99 // Storage for the corresponding AVOptions
105
106 uint8_t
co[4];
// Offsets to R,G,B,A bytes respectively in each pixel
112 int history_len;
// Number of frames to average; based on smoothing factor
113 int frame_num;
// Increments on each frame, starting from 0.
114
115 // Per-extremum, per-channel history, for temporal smoothing.
117 uint16_t *
history_mem;
// Single allocation for above history entries
118
119 uint16_t
lut[3][65536];
// Lookup table
120
124
125 #define OFFSET(x) offsetof(NormalizeContext, x)
126 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
127 #define FLAGSR AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
128
136 };
137
139
141 {
142 for (
int c = 0;
c < 3;
c++)
146 for (
int x = 0; x < in->
width; x++) {
147 for (
int c = 0;
c < 3;
c++) {
150 }
152 }
153 }
154 }
155
157 {
158 for (
int y = 0; y < in->
height; y++) {
160 uint8_t *outp =
out->data[0] + y *
out->linesize[0];
161 for (
int x = 0; x < in->
width; x++) {
162 for (
int c = 0;
c < 3;
c++)
163 outp[
s->co[
c]] =
s->lut[
c][inp[
s->co[
c]]];
164 if (
s->num_components == 4)
165 // Copy alpha as-is.
166 outp[
s->co[3]] = inp[
s->co[3]];
169 }
170 }
171 }
172
174 {
178 for (
int y = 0; y < in->
height; y++) {
182 for (
int x = 0; x < in->
width; x++) {
189 }
190 }
191 }
192
194 {
195 for (
int y = 0; y < in->
height; y++) {
200 uint8_t *outrp =
out->data[2] + y *
out->linesize[2];
201 uint8_t *outgp =
out->data[0] + y *
out->linesize[0];
202 uint8_t *outbp =
out->data[1] + y *
out->linesize[1];
203 uint8_t *outap =
out->data[3] + y *
out->linesize[3];
204 for (
int x = 0; x < in->
width; x++) {
205 outrp[x] =
s->lut[0][inrp[x]];
206 outgp[x] =
s->lut[1][ingp[x]];
207 outbp[x] =
s->lut[2][inbp[x]];
208 if (
s->num_components == 4)
209 outap[x] = inap[x];
210 }
211 }
212 }
213
215 {
216 for (
int c = 0;
c < 3;
c++)
218 for (
int y = 0; y < in->
height; y++) {
219 uint16_t *inp = (uint16_t *)(in->
data[0] + y * in->
linesize[0]);
220 for (
int x = 0; x < in->
width; x++) {
221 for (
int c = 0;
c < 3;
c++) {
224 }
226 }
227 }
228 }
229
231 {
232 for (
int y = 0; y < in->
height; y++) {
233 uint16_t *inp = (uint16_t *)(in->
data[0] + y * in->
linesize[0]);
234 uint16_t *outp = (uint16_t *)(
out->data[0] + y *
out->linesize[0]);
235 for (
int x = 0; x < in->
width; x++) {
236 for (
int c = 0;
c < 3;
c++)
237 outp[
s->co[
c]] =
s->lut[
c][inp[
s->co[
c]]];
238 if (
s->num_components == 4)
239 // Copy alpha as-is.
240 outp[
s->co[3]] = inp[
s->co[3]];
243 }
244 }
245 }
246
248 {
252 for (
int y = 0; y < in->
height; y++) {
253 uint16_t *inrp = (uint16_t *)(in->
data[2] + y * in->
linesize[2]);
254 uint16_t *ingp = (uint16_t *)(in->
data[0] + y * in->
linesize[0]);
255 uint16_t *inbp = (uint16_t *)(in->
data[1] + y * in->
linesize[1]);
256 for (
int x = 0; x < in->
width; x++) {
263 }
264 }
265 }
266
268 {
269 for (
int y = 0; y < in->
height; y++) {
270 uint16_t *inrp = (uint16_t *)(in->
data[2] + y * in->
linesize[2]);
271 uint16_t *ingp = (uint16_t *)(in->
data[0] + y * in->
linesize[0]);
272 uint16_t *inbp = (uint16_t *)(in->
data[1] + y * in->
linesize[1]);
273 uint16_t *inap = (uint16_t *)(in->
data[3] + y * in->
linesize[3]);
274 uint16_t *outrp = (uint16_t *)(
out->data[2] + y *
out->linesize[2]);
275 uint16_t *outgp = (uint16_t *)(
out->data[0] + y *
out->linesize[0]);
276 uint16_t *outbp = (uint16_t *)(
out->data[1] + y *
out->linesize[1]);
277 uint16_t *outap = (uint16_t *)(
out->data[3] + y *
out->linesize[3]);
278 for (
int x = 0; x < in->
width; x++) {
279 outrp[x] =
s->lut[0][inrp[x]];
280 outgp[x] =
s->lut[1][ingp[x]];
281 outbp[x] =
s->lut[2][inbp[x]];
282 if (
s->num_components == 4)
283 outap[x] = inap[x];
284 }
285 }
286 }
287
288 // This function is the main guts of the filter. Normalizes the input frame
289 // into the output frame. The frames are known to have the same dimensions
290 // and pixel format.
292 {
293 // Per-extremum, per-channel local variables.
295
296 float rgb_min_smoothed; // Min input range for linked normalization
297 float rgb_max_smoothed; // Max input range for linked normalization
299
300 // First, scan the input frame to find, for each channel, the minimum
301 // (min.in) and maximum (max.in) values present in the channel.
303
304 // Next, for each channel, push min.in and max.in into their respective
305 // histories, to determine the min.smoothed and max.smoothed for this frame.
306 {
307 int history_idx =
s->frame_num %
s->history_len;
308 // Assume the history is not yet full; num_history_vals is the number
309 // of frames received so far including the current frame.
310 int num_history_vals =
s->frame_num + 1;
311 if (
s->frame_num >=
s->history_len) {
312 //The history is full; drop oldest value and cap num_history_vals.
313 for (
c = 0;
c < 3;
c++) {
314 s->min[
c].history_sum -=
s->min[
c].history[history_idx];
315 s->max[
c].history_sum -=
s->max[
c].history[history_idx];
316 }
317 num_history_vals =
s->history_len;
318 }
319 // For each extremum, update history_sum and calculate smoothed value
320 // as the rolling average of the history entries.
321 for (
c = 0;
c < 3;
c++) {
322 s->min[
c].history_sum += (
s->min[
c].history[history_idx] =
min[
c].in);
323 min[
c].smoothed =
s->min[
c].history_sum / (float)num_history_vals;
324 s->max[
c].history_sum += (
s->max[
c].history[history_idx] =
max[
c].in);
325 max[
c].smoothed =
s->max[
c].history_sum / (float)num_history_vals;
326 }
327 }
328
329 // Determine the input range for linked normalization. This is simply the
330 // minimum of the per-channel minimums, and the maximum of the per-channel
331 // maximums.
332 rgb_min_smoothed =
FFMIN3(
min[0].smoothed,
min[1].smoothed,
min[2].smoothed);
333 rgb_max_smoothed =
FFMAX3(
max[0].smoothed,
max[1].smoothed,
max[2].smoothed);
334
335 // Now, process each channel to determine the input and output range and
336 // build the lookup tables.
337 for (
c = 0;
c < 3;
c++) {
338 int in_val;
339 // Adjust the input range for this channel [min.smoothed,max.smoothed]
340 // by mixing in the correct proportion of the linked normalization
341 // input range [rgb_min_smoothed,rgb_max_smoothed].
342 min[
c].smoothed = (
min[
c].smoothed *
s->independence)
343 + (rgb_min_smoothed * (1.0
f -
s->independence));
344 max[
c].smoothed = (
max[
c].smoothed *
s->independence)
345 + (rgb_max_smoothed * (1.0
f -
s->independence));
346
347 // Calculate the output range [min.out,max.out] as a ratio of the full-
348 // strength output range [blackpt,whitept] and the original input range
349 // [min.in,max.in], based on the user-specified filter strength.
350 min[
c].out = (
s->sblackpt[
c] *
s->strength)
351 + (
min[
c].in * (1.0
f -
s->strength));
352 max[
c].out = (
s->swhitept[
c] *
s->strength)
353 + (
max[
c].in * (1.0
f -
s->strength));
354
355 // Now, build a lookup table which linearly maps the adjusted input range
356 // [min.smoothed,max.smoothed] to the output range [min.out,max.out].
357 // Perform the linear interpolation for each x:
358 // lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5)
359 // where scale = (max.out - min.out) / (max.smoothed - min.smoothed)
360 if (
min[
c].smoothed ==
max[
c].smoothed) {
361 // There is no dynamic range to expand. No mapping for this channel.
362 for (in_val =
min[
c].in; in_val <=
max[
c].in; in_val++)
363 s->lut[
c][in_val] =
min[
c].out;
364 } else {
365 // We must set lookup values for all values in the original input
366 // range [min.in,max.in]. Since the original input range may be
367 // larger than [min.smoothed,max.smoothed], some output values may
368 // fall outside the [0,255] dynamic range. We need to clamp them.
370 for (in_val =
min[
c].in; in_val <=
max[
c].in; in_val++) {
373 s->lut[
c][in_val] = out_val;
374 }
375 }
376 }
377
378 // Finally, process the pixels of the input frame using the lookup tables.
379 s->process(
s, in,
out);
380
382 }
383
384 // Now we define all the functions accessible from the ff_vf_normalize class,
385 // which is ffmpeg's interface to our filter. See doc/filter_design.txt and
386 // doc/writing_filters.txt for descriptions of what these interface functions
387 // are expected to do.
388
389 // The pixel formats that our filter supports. We should be able to process
390 // any 8-bit RGB formats. 16-bit support might be useful one day.
408 };
409
410 // At this point we know the pixel format used for both input and output. We
411 // can also access the frame rate of the input video and allocate some memory
412 // appropriately
414 {
416 // Store offsets to R,G,B,A bytes respectively in each pixel
419
421 s->depth =
desc->comp[0].depth;
422 scale = 1 << (
s->depth - 8);
423 s->num_components =
desc->nb_components;
425 // Convert smoothing value to history_len (a count of frames to average,
426 // must be at least 1). Currently this is a direct assignment, but the
427 // smoothing value was originally envisaged as a number of seconds. In
428 // future it would be nice to set history_len using a number of seconds,
429 // but VFR video is currently an obstacle to doing so.
430 s->history_len =
s->smoothing + 1;
431 // Allocate the history buffers -- there are 6 -- one for each extrema.
432 // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6)
433 // can't overflow on 32bit causing a too-small allocation.
434 s->history_mem =
av_malloc(
s->history_len * 6 *
sizeof(*
s->history_mem));
435 if (
s->history_mem ==
NULL)
437
438 for (
c = 0;
c < 3;
c++) {
439 s->min[
c].history =
s->history_mem + (
c*2) *
s->history_len;
440 s->max[
c].history =
s->history_mem + (
c*2+1) *
s->history_len;
441 s->sblackpt[
c] =
scale *
s->blackpt[
c] + (
s->blackpt[
c] & (1 << (
s->depth - 8)));
442 s->swhitept[
c] =
scale *
s->whitept[
c] + (
s->whitept[
c] & (1 << (
s->depth - 8)));
443 }
444
446
450 } else {
453 }
454
455 return 0;
456 }
457
458 // Free any memory allocations here
460 {
462
464 }
465
466 // This function is pretty much standard from doc/writing_filters.txt. It
467 // tries to do in-place filtering where possible, only allocating a new output
468 // frame when absolutely necessary.
470 {
475 // Set 'direct' if we can modify the input frame in-place. Otherwise we
476 // need to retrieve a new frame from the output link.
478
481 } else {
486 }
488 }
489
490 // Now we've got the input and output frames (which may be the same frame)
491 // perform the filtering with our custom function.
493
494 if (
ctx->is_disabled) {
497 }
498
501
503 }
504
506 {
511 },
512 };
513
515 {
518 },
519 };
520
525 .priv_class = &normalize_class,
532 };