1 /*
2 * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3 * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * Calculate VMAF Motion score.
25 */
26
34
36
38 0.054488685,
39 0.244201342,
40 0.402619947,
41 0.244201342,
42 0.054488685
43 };
44
51
52 #define OFFSET(x) offsetof(VMAFMotionContext, x)
53 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
54
58 };
59
61
63 int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
64 {
65 ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
66 ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
67 uint64_t sum = 0;
68 int i, j;
69
70 for (i = 0; i <
h; i++) {
71 for (j = 0; j <
w; j++) {
72 sum += abs(img1[j] - img2[j]);
73 }
74 img1 += img1_stride;
75 img2 += img2_stride;
76 }
77
78 return sum;
79 }
80
82 uint16_t *dst,
int w,
int h, ptrdiff_t _src_stride,
83 ptrdiff_t _dst_stride)
84 {
85 ptrdiff_t src_stride = _src_stride / sizeof(*src);
86 ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
87 int radius = filt_w / 2;
88 int borders_left = radius;
89 int borders_right = w - (filt_w - radius);
90 int i, j, k;
91 int sum = 0;
92
93 for (i = 0; i <
h; i++) {
94 for (j = 0; j < borders_left; j++) {
95 sum = 0;
96 for (k = 0; k < filt_w; k++) {
97 int j_tap =
FFABS(j - radius + k);
98 if (j_tap >= w) {
99 j_tap = w - (j_tap - w + 1);
100 }
101 sum += filter[k] * src[i * src_stride + j_tap];
102 }
103 dst[i * dst_stride + j] = sum >>
BIT_SHIFT;
104 }
105
106 for (j = borders_left; j < borders_right; j++) {
107 int sum = 0;
108 for (k = 0; k < filt_w; k++) {
109 sum += filter[k] * src[i * src_stride + j - radius + k];
110 }
111 dst[i * dst_stride + j] = sum >>
BIT_SHIFT;
112 }
113
114 for (j = borders_right; j <
w; j++) {
115 sum = 0;
116 for (k = 0; k < filt_w; k++) {
117 int j_tap =
FFABS(j - radius + k);
118 if (j_tap >= w) {
119 j_tap = w - (j_tap - w + 1);
120 }
121 sum += filter[k] * src[i * src_stride + j_tap];
122 }
123 dst[i * dst_stride + j] = sum >>
BIT_SHIFT;
124 }
125 }
126 }
127
128 #define conv_y_fn(type, bits) \
129 static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
130 const uint8_t *_src, uint16_t *dst, \
131 int w, int h, ptrdiff_t _src_stride, \
132 ptrdiff_t _dst_stride) \
133 { \
134 const type *src = (const type *) _src; \
135 ptrdiff_t src_stride = _src_stride / sizeof(*src); \
136 ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
137 int radius = filt_w / 2; \
138 int borders_top = radius; \
139 int borders_bottom = h - (filt_w - radius); \
140 int i, j, k; \
141 int sum = 0; \
142 \
143 for (i = 0; i < borders_top; i++) { \
144 for (j = 0; j < w; j++) { \
145 sum = 0; \
146 for (k = 0; k < filt_w; k++) { \
147 int i_tap = FFABS(i - radius + k); \
148 if (i_tap >= h) { \
149 i_tap = h - (i_tap - h + 1); \
150 } \
151 sum += filter[k] * src[i_tap * src_stride + j]; \
152 } \
153 dst[i * dst_stride + j] = sum >> bits; \
154 } \
155 } \
156 for (i = borders_top; i < borders_bottom; i++) { \
157 for (j = 0; j < w; j++) { \
158 sum = 0; \
159 for (k = 0; k < filt_w; k++) { \
160 sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
161 } \
162 dst[i * dst_stride + j] = sum >> bits; \
163 } \
164 } \
165 for (i = borders_bottom; i < h; i++) { \
166 for (j = 0; j < w; j++) { \
167 sum = 0; \
168 for (k = 0; k < filt_w; k++) { \
169 int i_tap = FFABS(i - radius + k); \
170 if (i_tap >= h) { \
171 i_tap = h - (i_tap - h + 1); \
172 } \
173 sum += filter[k] * src[i_tap * src_stride + j]; \
174 } \
175 dst[i * dst_stride + j] = sum >> bits; \
176 } \
177 } \
178 }
179
182
185 dsp->
convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
187 }
188
190 {
191 double score;
192
197
199 score = 0.0;
200 } else {
203 // the output score is always normalized to 8 bits
205 }
206
210
211 return score;
212 }
213
215 {
217 snprintf(value,
sizeof(value),
"%0.2f", d);
219 }
220
222 {
224 double score;
225
231 }
232 }
233
234
237 {
238 size_t data_sz;
239 int i;
241
245
246 data_sz = (size_t) s->
stride * h;
251 }
252
253 for (i = 0; i < 5; i++) {
255 }
256
258
259 return 0;
260 }
261
263 {
266
274 return ret;
275 }
276
278 }
279
281 {
284
287 }
288
290 {
294
296 }
297
299 {
303 }
304
306 {
308
312 } else {
320 return err;
321 }
322 }
323 }
324
325 return 0;
326 }
327
329 {
332
335 }
336
339 }
340
342 {
347 },
349 };
350
352 {
355 },
357 };
358
360 .
name =
"vmafmotion",
366 .priv_class = &vmafmotion_class,
367 .
inputs = vmafmotion_inputs,
369 };
#define AV_PIX_FMT_FLAG_PAL
Pixel format has a palette in data[1], values are indexes in this palette.
static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
static const char * format[]
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
This structure describes decoded (raw) audio or video data.
static const AVFilterPad vmafmotion_inputs[]
Main libavfilter public API header.
AVFILTER_DEFINE_CLASS(vmafmotion)
int h
agreed upon image height
static av_cold void uninit(AVFilterContext *ctx)
static uint8_t img2[WIDTH *HEIGHT]
static const AVFilterPad vmafmotion_outputs[]
static uint8_t img1[WIDTH *HEIGHT]
const char * name
Pad name.
uint64_t(* sad)(const uint16_t *img1, const uint16_t *img2, int w, int h, ptrdiff_t img1_stride, ptrdiff_t img2_stride)
AVFilterLink ** inputs
array of pointers to input links
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
static void filter(int16_t *output, ptrdiff_t out_stride, int16_t *low, ptrdiff_t low_stride, int16_t *high, ptrdiff_t high_stride, int len, int clip)
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
AVDictionary * metadata
metadata.
static int query_formats(AVFilterContext *ctx)
A filter pad used for either input or output.
A link between two filters.
static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void * priv
private data for use by the filter
static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
#define AV_PIX_FMT_FLAG_HWACCEL
Pixel format is an HW accelerated format.
static av_cold int init(AVFilterContext *ctx)
int w
agreed upon image width
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
uint8_t nb_components
The number of components each pixel has, (1-4)
GLsizei GLboolean const GLfloat * value
static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
double ff_vmafmotion_uninit(VMAFMotionData *s)
void(* convolution_x)(const uint16_t *filter, int filt_w, const uint16_t *src, uint16_t *dst, int w, int h, ptrdiff_t src_stride, ptrdiff_t dst_stride)
static const AVFilterPad inputs[]
static const AVFilterPad outputs[]
int format
agreed upon media format
#define AV_LOG_INFO
Standard information.
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
Set the given entry in *pm, overwriting an existing entry.
static int config_input_ref(AVFilterLink *inlink)
Describe the class of an AVClass context structure.
const char * name
Filter name.
AVFilter ff_vf_vmafmotion
#define AV_PIX_FMT_FLAG_BITSTREAM
All values of a component are bit-wise packed end to end.
AVFilterLink ** outputs
array of pointers to output links
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
int ff_vmafmotion_init(VMAFMotionData *s, int w, int h, enum AVPixelFormat fmt)
static void set_meta(AVDictionary **metadata, const char *key, float d)
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
void(* convolution_y)(const uint16_t *filter, int filt_w, const uint8_t *src, uint16_t *dst, int w, int h, ptrdiff_t src_stride, ptrdiff_t dst_stride)
static int ref[MAX_W *MAX_W]
static const AVOption vmafmotion_options[]
#define AV_PIX_FMT_FLAG_BE
Pixel format is big-endian.
AVFilterContext * dst
dest filter
static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, uint16_t *dst, int w, int h, ptrdiff_t _src_stride, ptrdiff_t _dst_stride)
static const float FILTER_5[5]
VMAFMotionDSPContext vmafdsp
#define FFSWAP(type, a, b)
int depth
Number of bits in the component.
AVPixelFormat
Pixel format.
#define AV_PIX_FMT_FLAG_PLANAR
At least one pixel component is not in the first data plane.
#define conv_y_fn(type, bits)