1 /*
2 * Copyright (c) 2022 Mohamed Khaled <Mohamed_Khaled_Kamal@outlook.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
22 #include <stdio.h>
23
31
35
37
42 };
43
44 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
47
48 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
49
50
54
58
62
69
71 {
73
74 if (
s->hwctx &&
s->cu_module) {
75 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
76 CUcontext bilateral;
77
78 CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
79 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
81 CHECK_CU(cu->cuCtxPopCurrent(&bilateral));
82 }
83 }
84
86 {
88
91 return 1;
92 return 0;
93 }
94
96 {
99
102
103 // find maximum step of each component of each plane
104 // For our subset of formats, this should accurately tell us how many channels CUDA needs
105 // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
106
107 for (
i = 0;
i <
s->in_desc->nb_components;
i++) {
108 d = (
s->in_desc->comp[
i].depth + 7) / 8;
109 p =
s->in_desc->comp[
i].plane;
110 s->in_plane_channels[
p] =
FFMAX(
s->in_plane_channels[
p],
s->in_desc->comp[
i].step / d);
111 }
112 }
113
115 {
119
120 /* check that we have a hw context */
124 }
126
130 }
131
133
137
138 return 0;
139 }
140
142 {
144 CUcontext bilateral, cuda_ctx =
s->hwctx->cuda_ctx;
145 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
147
148 extern const unsigned char ff_vf_bilateral_cuda_ptx_data[];
149 extern const unsigned int ff_vf_bilateral_cuda_ptx_len;
150
154
156 ff_vf_bilateral_cuda_ptx_data, ff_vf_bilateral_cuda_ptx_len);
159
160 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func,
s->cu_module,
"Process_uchar"));
164 }
165
166 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uv,
s->cu_module,
"Process_uchar2"));
170 }
171
173 CHECK_CU(cu->cuCtxPopCurrent(&bilateral));
174
176 }
177
179 {
187
191
194
195 s->hwctx = device_hwctx;
196 s->cu_stream =
s->hwctx->stream;
197
199
200 // the window_size makes more sense when it is odd, so add 1 if even
202
206
207 return 0;
208 }
209
211 CUtexObject src_tex[3],
AVFrame *out_frame,
213 int width_uv, int height_uv, int pitch_uv,
214 int window_size, float sigmaS, float sigmaR)
215 {
217 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
219
220 CUdeviceptr dst_devptr[3] = {
221 (CUdeviceptr)out_frame->
data[0], (CUdeviceptr)out_frame->
data[1], (CUdeviceptr)out_frame->
data[2]
222 };
223
224 void *args_uchar[] = {
225 &src_tex[0], &src_tex[1], &src_tex[2],
226 &dst_devptr[0], &dst_devptr[1], &dst_devptr[2],
228 &width_uv, &height_uv, &pitch_uv,
229 &window_size, &sigmaS, &sigmaR
230 };
231
237
239 }
240
243 {
245 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
246 CUcontext bilateral, cuda_ctx =
s->hwctx->cuda_ctx;
248
249 CUtexObject tex[3] = { 0, 0, 0 };
250
254
255 for (
i = 0;
i <
s->in_planes;
i++) {
256 CUDA_TEXTURE_DESC tex_desc = {
257 .filterMode = CU_TR_FILTER_MODE_LINEAR,
258 .flags = 0, // CU_TRSF_READ_AS_INTEGER to get raw ints instead of normalized floats from tex2D
259 };
260
261 CUDA_RESOURCE_DESC res_desc = {
262 .resType = CU_RESOURCE_TYPE_PITCH2D,
263 .res.pitch2D.format = CU_AD_FORMAT_UNSIGNED_INT8,
264 .res.pitch2D.numChannels =
s->in_plane_channels[
i],
265 .res.pitch2D.pitchInBytes = in->
linesize[
i],
266 .res.pitch2D.devPtr = (CUdeviceptr)in->
data[
i],
267 };
268
269 if (
i == 1 ||
i == 2) {
272 } else {
273 res_desc.res.pitch2D.width = in->
width;
274 res_desc.res.pitch2D.height = in->
height;
275 }
276
279 goto exit;
280 }
281
284 out->width,
out->height,
out->linesize[0],
287 out->linesize[1] >> ((
s->in_plane_channels[1] > 1) ? 1 : 0),
288 s->window_size,
s->sigmaS,
s->sigmaR);
289
290 exit:
291 for (
i = 0;
i <
s->in_planes;
i++)
293 CHECK_CU(cu->cuTexObjectDestroy(tex[
i]));
294
295 CHECK_CU(cu->cuCtxPopCurrent(&bilateral));
296
298 }
299
301 {
305 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
306
308 CUcontext bilateral;
310
315 }
316
317 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
320
324
328
332
339 }
340
341 #define OFFSET(x) offsetof(CUDABilateralContext, x)
342 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
348 };
349
355 };
356
358 {
362 },
363 };
364
366 {
370 },
371 };
372
374 .
p.
name =
"bilateral_cuda",
383 };