1 /*
2 * Copyright (c) 2020 Yaroslav Pogrebnyak <yyyaroslav@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22 * @file
23 * Overlay one video on top of another using cuda hardware acceleration
24 */
25
33
37
39
40 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, ctx->hwctx->internal->cuda_dl, x)
41 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
42
45
48
53 };
54
60 };
61
70 #if FF_API_FRAME_PKT
72 #endif
75 };
76
81 };
82
84 "main_w", "W", ///< width of the main video
85 "main_h", "H", ///< height of the main video
86 "overlay_w", "w", ///< width of the overlay video
87 "overlay_h", "h", ///< height of the overlay video
88 "x",
89 "y",
90 "n", ///< number of frame
91 #if FF_API_FRAME_PKT
92 "pos", ///< position in the file
93 #endif
94 "t", ///< timestamp expressed in seconds
96 };
97
98 /**
99 * OverlayCUDAContext
100 */
103
106
109
114
116
120
123
126
127 /**
128 * Helper to find out if provided format is supported by filter
129 */
131 {
134 return 1;
135 return 0;
136 }
137
139 {
141 return INT_MAX;
142 return (int)d & ~((1 << chroma_sub) - 1);
143 }
144
146 {
148
151 /* necessary if x is expressed from y */
153
155
156 /* the cuda pixel format is using hwaccel, normalizing y is unnecessary */
157 s->y_position =
s->var_values[
VAR_Y];
158 }
159
161 {
164
165 if (*pexpr)
166 old = *pexpr;
171 "Error when evaluating the expression '%s' for %s\n",
173 *pexpr = old;
175 }
176
178 return 0;
179 }
180
181 /**
182 * Helper checks if we can process main and overlay pixel formats
183 */
185 switch(format_main) {
191 default:
192 return 0;
193 }
194 }
195
196 /**
197 * Call overlay kernell for a plane
198 */
201 int x_position, int y_position,
202 uint8_t* main_data, int main_linesize,
203 int main_width, int main_height,
204 uint8_t* overlay_data, int overlay_linesize,
205 int overlay_width, int overlay_height,
206 uint8_t* alpha_data, int alpha_linesize,
207 int alpha_adj_x, int alpha_adj_y) {
208
209 CudaFunctions *cu =
ctx->hwctx->internal->cuda_dl;
210
211 void* kernel_args[] = {
212 &x_position, &y_position,
213 &main_data, &main_linesize,
214 &overlay_data, &overlay_linesize,
215 &overlay_width, &overlay_height,
216 &alpha_data, &alpha_linesize,
217 &alpha_adj_x, &alpha_adj_y,
218 };
219
224 0,
ctx->cu_stream, kernel_args,
NULL));
225 }
226
227 /**
228 * Perform blend overlay picture over main picture
229 */
231 {
233
239
240 CudaFunctions *cu =
ctx->hwctx->internal->cuda_dl;
241 CUcontext
dummy, cuda_ctx =
ctx->hwctx->cuda_ctx;
242
243 AVFrame *input_main, *input_overlay;
244
245 ctx->cu_ctx = cuda_ctx;
246
247 // read main and overlay frames from inputs
251
252 if (!input_main)
254
255 if (!input_overlay)
257
262 }
263
264 // push cuda context
265
270 }
271
276
277 #if FF_API_FRAME_PKT
279 {
282 }
284 #endif
285
290
292
297 }
298
299 // overlay first plane
300
302 ctx->x_position,
ctx->y_position,
307 input_overlay->
data[3], input_overlay->
linesize[3], 1, 1);
308
309 // overlay rest planes depending on pixel format
310
311 switch(
ctx->in_format_overlay) {
314 ctx->x_position,
ctx->y_position / 2,
319 0, 0, 0, 0);
320 break;
324 ctx->x_position / 2 ,
ctx->y_position / 2,
328 input_overlay->
width / 2, input_overlay->
height / 2,
329 input_overlay->
data[3], input_overlay->
linesize[3], 2, 2);
330
332 ctx->x_position / 2 ,
ctx->y_position / 2,
336 input_overlay->
width / 2, input_overlay->
height / 2,
337 input_overlay->
data[3], input_overlay->
linesize[3], 2, 2);
338 break;
339 default:
344 }
345
347
349 }
350
352 {
356
357
358 /* Finish the configuration by evaluating the expressions
359 now when both inputs are configured. */
368 #if FF_API_FRAME_PKT
370 #endif
371
375
379 s->var_values[
VAR_X],
s->x_position,
380 s->var_values[
VAR_Y],
s->y_position);
381 }
382
383 return 0;
384 }
385
386 /**
387 * Initialize overlay_cuda
388 */
390 {
393
394 return 0;
395 }
396
397 /**
398 * Uninitialize overlay_cuda
399 */
401 {
403
405
406 if (
ctx->hwctx &&
ctx->cu_module) {
408 CudaFunctions *cu =
ctx->hwctx->internal->cuda_dl;
412 }
413
418 }
419
420 /**
421 * Activate overlay_cuda
422 */
424 {
426
428 }
429
430 /**
431 * Configure output
432 */
434 {
435 extern const unsigned char ff_vf_overlay_cuda_ptx_data[];
436 extern const unsigned int ff_vf_overlay_cuda_ptx_len;
437
438 int err;
442
446
450
451 CUcontext
dummy, cuda_ctx;
452 CudaFunctions *cu;
453
454 // check main input formats
455
459 }
460
466 }
467
468 // check overlay input formats
469
470 if (!frames_ctx_overlay) {
473 }
474
480 }
481
482 // check we can overlay pictures with those pixel formats
483
488 }
489
490 // initialize
491
493 if (!
ctx->hw_device_ctx)
496
497 cuda_ctx =
ctx->hwctx->cuda_ctx;
499
500 ctx->cu_stream =
ctx->hwctx->stream;
501
505
506 // load functions
507
508 cu =
ctx->hwctx->internal->cuda_dl;
509
510 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
511 if (err < 0) {
512 return err;
513 }
514
516 if (err < 0) {
518 return err;
519 }
520
521 err =
CHECK_CU(cu->cuModuleGetFunction(&
ctx->cu_func,
ctx->cu_module,
"Overlay_Cuda"));
522 if (err < 0) {
524 return err;
525 }
526
528
529 // init dual input
530
532 if (err < 0) {
533 return err;
534 }
535
537 }
538
539
540 #define OFFSET(x) offsetof(OverlayCUDAContext, x)
541 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
542
546 { "eof_action", "Action to take when encountering EOF from secondary input ",
555 {
"shortest",
"force termination when the shortest input terminates",
OFFSET(
fs.opt_shortest),
AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1,
FLAGS },
558 };
559
561
563 {
566 },
567 {
568 .name = "overlay",
571 },
572 };
573
575 {
579 },
580 };
581
583 .
name =
"overlay_cuda",
586 .priv_class = &overlay_cuda_class,
593 .preinit = overlay_cuda_framesync_preinit,
595 };