1 /*
2 * Copyright (C) 2019 Philip Langdale <philipl@overt.org>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
25
28
30
33
36
41
48
49 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
50 #define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1))
53
54 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
55
57 CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next,
59 int src_width, // Width is pixels per channel
60 int src_height, // Height is pixels per channel
61 int src_pitch, // Pitch is bytes
63 int dst_width, // Width is pixels per channel
64 int dst_height, // Height is pixels per channel
65 int dst_pitch, // Pitch is pixels per channel
66 int parity,
int tff,
int clip_max)
67 {
70 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
71 CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0;
74
75 void *args[] = { &
dst, &tex_prev, &tex_cur, &tex_next,
76 &dst_width, &dst_height, &dst_pitch,
77 &src_width, &src_height, &
parity, &tff,
78 &is_field_end, &clip_max };
79
80 CUDA_TEXTURE_DESC tex_desc = {
81 .filterMode = CU_TR_FILTER_MODE_POINT,
82 .flags = CU_TRSF_READ_AS_INTEGER,
83 };
84
85 CUDA_RESOURCE_DESC res_desc = {
86 .resType = CU_RESOURCE_TYPE_PITCH2D,
87 .res.pitch2D.format =
format,
89 .res.pitch2D.width = src_width,
90 .res.pitch2D.height = src_height,
91 .res.pitch2D.pitchInBytes = src_pitch,
92 };
93
94 res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev;
95 ret =
CHECK_CU(cu->cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc,
NULL));
97 goto exit;
98
99 res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur;
100 ret =
CHECK_CU(cu->cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc,
NULL));
102 goto exit;
103
104 res_desc.res.pitch2D.devPtr = (CUdeviceptr)next;
105 ret =
CHECK_CU(cu->cuTexObjectCreate(&tex_next, &res_desc, &tex_desc,
NULL));
107 goto exit;
108
112 0,
s->hwctx->stream, args,
NULL));
113
114 exit:
115 if (tex_prev)
116 CHECK_CU(cu->cuTexObjectDestroy(tex_prev));
117 if (tex_cur)
118 CHECK_CU(cu->cuTexObjectDestroy(tex_cur));
119 if (tex_next)
120 CHECK_CU(cu->cuTexObjectDestroy(tex_next));
121
123 }
124
127 {
130 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
133
134 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
136 return;
137
143
144 if (
comp->plane <
i) {
145 // We process planes as a whole, so don't reprocess
146 // them for additional components
147 continue;
148 }
149
150 pixel_size = (
comp->depth +
comp->shift) / 8;
152 if (pixel_size > 2 ||
channels > 2) {
154 goto exit;
155 }
156 switch (pixel_size) {
157 case 1:
159 format = CU_AD_FORMAT_UNSIGNED_INT8;
160 break;
161 case 2:
163 format = CU_AD_FORMAT_UNSIGNED_INT16;
164 break;
165 default:
167 goto exit;
168 }
169
170 clip_max = (1 << (
comp->depth +
comp->shift)) - 1;
171
173 "Deinterlacing plane %d: pixel_size: %d channels: %d\n",
183 (CUdeviceptr)
dst->data[
i],
188 }
189
192 }
193
194 exit:
196 return;
197 }
198
200 {
203
204 if (
s->hwctx &&
s->cu_module) {
205 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
206 CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
207 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
209 }
210
212
216 s->input_frames =
NULL;
217 }
218
220 {
224
227 "required to associate the processing device.\n");
229 }
230
232 if (!
s->input_frames_ref) {
234 "failed.\n");
236 }
238
239 return 0;
240 }
241
243 {
249 CudaFunctions *cu;
252
255 if (!
s->device_ref) {
257 "failed.\n");
259 }
261 cu =
s->hwctx->internal->cuda_dl;
262
266 "for output.\n");
268 goto exit;
269 }
270
272
274 output_frames->
sw_format =
s->input_frames->sw_format;
275 output_frames->
width =
ctx->inputs[0]->w;
276 output_frames->
height =
ctx->inputs[0]->h;
277
279
282 goto exit;
283
287 "context for output: %d\n",
ret);
288 goto exit;
289 }
290
293 goto exit;
294
297
301 goto exit;
302 }
303
304 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
306 goto exit;
307
310 goto exit;
311
312 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar,
s->cu_module,
"bwdif_uchar"));
314 goto exit;
315
316 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar2,
s->cu_module,
"bwdif_uchar2"));
318 goto exit;
319
320 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort,
s->cu_module,
"bwdif_ushort"));
322 goto exit;
323
324 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort2,
s->cu_module,
"bwdif_ushort2"));
326 goto exit;
327
328 exit:
330
332 }
333
340 };
341
343 {
348 },
349 };
350
352 {
357 },
358 };
359
361 .
name =
"bwdif_cuda",
371 };