1 /*
2 * Copyright (c) 2022 Mohamed Khaled <Mohamed_Khaled_Kamal@outlook.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
22 #include <stdio.h>
23 #include <string.h>
24
33
39
44 };
45
46 #define DIV_UP(a, b) (((a) + (b)-1) / (b))
49 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
50
53
55
61
67
71
78
80 {
82
86
90
91 return 0;
92 }
93
95 {
97
98 if (
s->hwctx &&
s->cu_module)
99 {
100 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
102
103 CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
104 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
107 }
108
112 }
113
115 {
119
121 if (!out_ref)
124
129
133
138
140 s->frames_ctx = out_ref;
141
142 return 0;
146 }
147
149 {
151
154 return 1;
155 return 0;
156 }
157
159 {
162
163 s->in_fmt = in_format;
164 s->out_fmt = out_format;
165
170
171 // find maximum step of each component of each plane
172 // For our subset of formats, this should accurately tell us how many channels CUDA needs
173 // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
174
175 for (
i = 0;
i <
s->in_desc->nb_components;
i++)
176 {
177 d = (
s->in_desc->comp[
i].depth + 7) / 8;
178 p =
s->in_desc->comp[
i].plane;
179 s->in_plane_channels[p] =
FFMAX(
s->in_plane_channels[p],
s->in_desc->comp[
i].step /
d);
180
181 s->in_plane_depths[p] =
s->in_desc->comp[
i].depth;
182 }
183 }
184
186 {
190
191 /* check that we have a hw context */
192 if (!
ctx->inputs[0]->hw_frames_ctx)
193 {
196 }
198
200 {
203 }
204
206
210
212 if (!
ctx->outputs[0]->hw_frames_ctx)
214
215 return 0;
216 }
217
219 {
221 CUcontext
context, cuda_ctx =
s->hwctx->cuda_ctx;
222 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
224
225 extern const unsigned char ff_vf_chromakey_cuda_ptx_data[];
226 extern const unsigned int ff_vf_chromakey_cuda_ptx_len;
227
231
233 ff_vf_chromakey_cuda_ptx_data, ff_vf_chromakey_cuda_ptx_len);
236
237 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func,
s->cu_module,
"Process_uchar"));
239 {
242 }
243
244 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uv,
s->cu_module,
"Process_uchar2"));
246 {
249 }
250
253
255 }
256
257 #define FIXNUM(x) lrint((x) * (1 << 10))
258 #define RGB_TO_U(rgb) (((-FIXNUM(0.16874) * rgb[0] - FIXNUM(0.33126) * rgb[1] + FIXNUM(0.50000) * rgb[2] + (1 << 9) - 1) >> 10) + 128)
259 #define RGB_TO_V(rgb) (((FIXNUM(0.50000) * rgb[0] - FIXNUM(0.41869) * rgb[1] - FIXNUM(0.08131) * rgb[2] + (1 << 9) - 1) >> 10) + 128)
260
262 {
269
270 s->hwctx = device_hwctx;
271 s->cu_stream =
s->hwctx->stream;
272
274 s->chromakey_uv[0] =
s->chromakey_rgba[1];
275 s->chromakey_uv[1] =
s->chromakey_rgba[2];
276 } else {
277 s->chromakey_uv[0] =
RGB_TO_U(
s->chromakey_rgba);
278 s->chromakey_uv[1] =
RGB_TO_V(
s->chromakey_rgba);
279 }
280
284
286
290
291 return 0;
292 }
293
295 CUtexObject src_tex[3],
AVFrame *out_frame,
297 int width_uv, int height_uv, int pitch_uv,
298 float u_key, float v_key, float similarity,
299 float blend)
300 {
302 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
303
304 CUdeviceptr dst_devptr[4] = {
305 (CUdeviceptr)out_frame->
data[0], (CUdeviceptr)out_frame->
data[1],
306 (CUdeviceptr)out_frame->
data[2], (CUdeviceptr)out_frame->
data[3]
307 };
308
309 void *args_uchar[] = {
310 &src_tex[0], &src_tex[1], &src_tex[2],
311 &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
313 &width_uv, &height_uv, &pitch_uv,
314 &u_key, &v_key, &similarity, &blend
315 };
316
320 }
321
324 {
326 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
327 CUcontext
context, cuda_ctx =
s->hwctx->cuda_ctx;
329
330 CUtexObject tex[3] = {0, 0, 0};
331
335
336 for (
i = 0;
i <
s->in_planes;
i++)
337 {
338 CUDA_TEXTURE_DESC tex_desc = {
339 .filterMode = CU_TR_FILTER_MODE_LINEAR,
340 .flags = 0, // CU_TRSF_READ_AS_INTEGER to get raw ints instead of normalized floats from tex2D
341 };
342
343 CUDA_RESOURCE_DESC res_desc = {
344 .resType = CU_RESOURCE_TYPE_PITCH2D,
345 .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT8,
346 .res.pitch2D.numChannels =
s->in_plane_channels[
i],
347 .res.pitch2D.pitchInBytes = in->
linesize[
i],
348 .res.pitch2D.devPtr = (CUdeviceptr)in->
data[
i],
349 };
350
351 if (
i == 1 ||
i == 2)
352 {
355 }
356 else
357 {
358 res_desc.res.pitch2D.width = in->
width;
359 res_desc.res.pitch2D.height = in->
height;
360 }
361
364 goto exit;
365 }
366
369 out->width,
out->height,
out->linesize[0],
373 s->chromakey_uv[0],
s->chromakey_uv[1],
374 s->similarity,
s->blend);
376 goto exit;
377
378 exit:
379 for (
i = 0;
i <
s->in_planes;
i++)
381 CHECK_CU(cu->cuTexObjectDestroy(tex[
i]));
382
384
386 }
387
389 {
393
397
402
405
409
410 return 0;
411 }
412
414 {
418 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
419
423
426 {
429 }
430
431 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
434
436
440
447 }
448
449 #define OFFSET(x) offsetof(ChromakeyCUDAContext, x)
450 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
457 };
458
464 };
465
467 {
471 },
472 };
473
475 {
479 },
480 };
481
483 .
name =
"chromakey_cuda",
485
488
491
494
496
498 };