1 /*
2 * Copyright (c) 2022 Mohamed Khaled <Mohamed_Khaled_Kamal@outlook.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
22 #include <stdio.h>
23
31
35
40 };
41
42 #define DIV_UP(a, b) (((a) + (b)-1) / (b))
45 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
46
49
51
57
63
67
74
76 {
78
82
86
87 return 0;
88 }
89
91 {
93
94 if (
s->hwctx &&
s->cu_module)
95 {
96 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
98
99 CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
100 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
103 }
104
108 }
109
111 {
115
117 if (!out_ref)
120
125
129
134
136 s->frames_ctx = out_ref;
137
138 return 0;
142 }
143
145 {
147
150 return 1;
151 return 0;
152 }
153
155 {
158
159 s->in_fmt = in_format;
160 s->out_fmt = out_format;
161
166
167 // find maximum step of each component of each plane
168 // For our subset of formats, this should accurately tell us how many channels CUDA needs
169 // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
170
171 for (
i = 0;
i <
s->in_desc->nb_components;
i++)
172 {
173 d = (
s->in_desc->comp[
i].depth + 7) / 8;
174 p =
s->in_desc->comp[
i].plane;
175 s->in_plane_channels[p] =
FFMAX(
s->in_plane_channels[p],
s->in_desc->comp[
i].step /
d);
176
177 s->in_plane_depths[p] =
s->in_desc->comp[
i].depth;
178 }
179 }
180
182 {
186
187 /* check that we have a hw context */
188 if (!
ctx->inputs[0]->hw_frames_ctx)
189 {
192 }
194
196 {
199 }
200
202
206
208 if (!
ctx->outputs[0]->hw_frames_ctx)
210
211 return 0;
212 }
213
215 {
217 CUcontext
context, cuda_ctx =
s->hwctx->cuda_ctx;
218 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
220
221 extern const unsigned char ff_vf_chromakey_cuda_ptx_data[];
222 extern const unsigned int ff_vf_chromakey_cuda_ptx_len;
223
227
229 ff_vf_chromakey_cuda_ptx_data, ff_vf_chromakey_cuda_ptx_len);
232
233 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func,
s->cu_module,
"Process_uchar"));
235 {
238 }
239
240 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uv,
s->cu_module,
"Process_uchar2"));
242 {
245 }
246
249
251 }
252
253 #define FIXNUM(x) lrint((x) * (1 << 10))
254 #define RGB_TO_U(rgb) (((-FIXNUM(0.16874) * rgb[0] - FIXNUM(0.33126) * rgb[1] + FIXNUM(0.50000) * rgb[2] + (1 << 9) - 1) >> 10) + 128)
255 #define RGB_TO_V(rgb) (((FIXNUM(0.50000) * rgb[0] - FIXNUM(0.41869) * rgb[1] - FIXNUM(0.08131) * rgb[2] + (1 << 9) - 1) >> 10) + 128)
256
258 {
265
266 s->hwctx = device_hwctx;
267 s->cu_stream =
s->hwctx->stream;
268
270 s->chromakey_uv[0] =
s->chromakey_rgba[1];
271 s->chromakey_uv[1] =
s->chromakey_rgba[2];
272 } else {
273 s->chromakey_uv[0] =
RGB_TO_U(
s->chromakey_rgba);
274 s->chromakey_uv[1] =
RGB_TO_V(
s->chromakey_rgba);
275 }
276
280
282
286
287 return 0;
288 }
289
291 CUtexObject src_tex[3],
AVFrame *out_frame,
293 int width_uv, int height_uv, int pitch_uv,
294 float u_key, float v_key, float similarity,
295 float blend)
296 {
298 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
299
300 CUdeviceptr dst_devptr[4] = {
301 (CUdeviceptr)out_frame->
data[0], (CUdeviceptr)out_frame->
data[1],
302 (CUdeviceptr)out_frame->
data[2], (CUdeviceptr)out_frame->
data[3]
303 };
304
305 void *args_uchar[] = {
306 &src_tex[0], &src_tex[1], &src_tex[2],
307 &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
309 &width_uv, &height_uv, &pitch_uv,
310 &u_key, &v_key, &similarity, &blend
311 };
312
316 }
317
320 {
322 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
323 CUcontext
context, cuda_ctx =
s->hwctx->cuda_ctx;
325
326 CUtexObject tex[3] = {0, 0, 0};
327
331
332 for (
i = 0;
i <
s->in_planes;
i++)
333 {
334 CUDA_TEXTURE_DESC tex_desc = {
335 .filterMode = CU_TR_FILTER_MODE_LINEAR,
336 .flags = 0, // CU_TRSF_READ_AS_INTEGER to get raw ints instead of normalized floats from tex2D
337 };
338
339 CUDA_RESOURCE_DESC res_desc = {
340 .resType = CU_RESOURCE_TYPE_PITCH2D,
341 .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT8,
342 .res.pitch2D.numChannels =
s->in_plane_channels[
i],
343 .res.pitch2D.pitchInBytes = in->
linesize[
i],
344 .res.pitch2D.devPtr = (CUdeviceptr)in->
data[
i],
345 };
346
347 if (
i == 1 ||
i == 2)
348 {
351 }
352 else
353 {
354 res_desc.res.pitch2D.width = in->
width;
355 res_desc.res.pitch2D.height = in->
height;
356 }
357
360 goto exit;
361 }
362
365 out->width,
out->height,
out->linesize[0],
369 s->chromakey_uv[0],
s->chromakey_uv[1],
370 s->similarity,
s->blend);
372 goto exit;
373
374 exit:
375 for (
i = 0;
i <
s->in_planes;
i++)
377 CHECK_CU(cu->cuTexObjectDestroy(tex[
i]));
378
380
382 }
383
385 {
389
393
398
401
405
406 return 0;
407 }
408
410 {
414 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
415
419
422 {
425 }
426
427 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
430
432
436
443 }
444
445 #define OFFSET(x) offsetof(ChromakeyCUDAContext, x)
446 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
453 };
454
460 };
461
463 {
467 },
468 };
469
471 {
475 },
476 };
477
479 .
name =
"chromakey_cuda",
481
484
487
490
492
494 };