1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
24 #include <stdio.h>
25
33
38
41
53 };
54
55 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
58
59 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
60
61 enum {
63
68
70 };
71
74
76
82
85
88
89 /**
90 * Output sw format. AV_PIX_FMT_NONE for no conversion.
91 */
93
94 char *
w_expr;
///< width expression string
95 char *
h_expr;
///< height expression string
96
99
105
109
112
114 {
116
120
124
125 return 0;
126 }
127
129 {
131
132 if (
s->hwctx &&
s->cu_module) {
133 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
135
136 CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
137 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
140 }
141
145 }
146
148 {
152
154 if (!out_ref)
157
162
166
171
174
176 s->frames_ctx = out_ref;
177
178 return 0;
182 }
183
185 {
187
190 return 1;
191 return 0;
192 }
193
195 {
198
199 s->in_fmt = in_format;
200 s->out_fmt = out_format;
201
206
207 // find maximum step of each component of each plane
208 // For our subset of formats, this should accurately tell us how many channels CUDA needs
209 // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
210
211 for (
i = 0;
i <
s->in_desc->nb_components;
i++) {
212 d = (
s->in_desc->comp[
i].depth + 7) / 8;
213 p =
s->in_desc->comp[
i].plane;
214 s->in_plane_channels[p] =
FFMAX(
s->in_plane_channels[p],
s->in_desc->comp[
i].step / d);
215
216 s->in_plane_depths[p] =
s->in_desc->comp[
i].depth;
217 }
218 }
219
221 int out_width, int out_height)
222 {
226
228
232
233 /* check that we have a hw context */
237 }
241
246 }
251 }
252
254
255 if (
s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
259 } else {
261
265
266 if (in_width == out_width && in_height == out_height &&
269 }
270
274
275 return 0;
276 }
277
279 {
281 CUcontext
dummy, cuda_ctx =
s->hwctx->cuda_ctx;
282 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
283 char buf[128];
285
288
289 const char *function_infix = "";
290
291 extern const unsigned char ff_vf_scale_cuda_ptx_data[];
292 extern const unsigned int ff_vf_scale_cuda_ptx_len;
293
294 switch(
s->interp_algo) {
296 function_infix = "Nearest";
297 s->interp_use_linear = 0;
298 s->interp_as_integer = 1;
299 break;
301 function_infix = "Bilinear";
302 s->interp_use_linear = 1;
303 s->interp_as_integer = 1;
304 break;
307 function_infix = "Bicubic";
308 s->interp_use_linear = 0;
309 s->interp_as_integer = 0;
310 break;
312 function_infix = "Lanczos";
313 s->interp_use_linear = 0;
314 s->interp_as_integer = 0;
315 break;
316 default:
319 }
320
324
326 ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len);
329
330 snprintf(buf,
sizeof(buf),
"Subsample_%s_%s_%s", function_infix, in_fmt_name, out_fmt_name);
331 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func,
s->cu_module, buf));
336 }
337
338 snprintf(buf,
sizeof(buf),
"Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
339 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uv,
s->cu_module, buf));
342
345
347 }
348
350 {
359
361 s->w_expr,
s->h_expr,
365
367 s->force_original_aspect_ratio,
s->force_divisible_by);
368
372
375
379
382
383 s->hwctx = device_hwctx;
384 s->cu_stream =
s->hwctx->stream;
385
386 if (
inlink->sample_aspect_ratio.num) {
389 inlink->sample_aspect_ratio);
390 } else {
392 }
393
397 s->passthrough ?
" (passthrough)" :
"");
398
402
403 return 0;
404
407 }
408
410 CUtexObject src_tex[4], int src_width, int src_height,
411 AVFrame *out_frame,
int dst_width,
int dst_height,
int dst_pitch)
412 {
414 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
415
416 CUdeviceptr dst_devptr[4] = {
417 (CUdeviceptr)out_frame->
data[0], (CUdeviceptr)out_frame->
data[1],
418 (CUdeviceptr)out_frame->
data[2], (CUdeviceptr)out_frame->
data[3]
419 };
420
421 void *args_uchar[] = {
422 &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
423 &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
424 &dst_width, &dst_height, &dst_pitch,
425 &src_width, &src_height, &
s->param
426 };
427
431 }
432
435 {
437 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
438 CUcontext
dummy, cuda_ctx =
s->hwctx->cuda_ctx;
440
441 CUtexObject tex[4] = { 0, 0, 0, 0 };
442
446
447 for (
i = 0;
i <
s->in_planes;
i++) {
448 CUDA_TEXTURE_DESC tex_desc = {
449 .filterMode =
s->interp_use_linear ?
450 CU_TR_FILTER_MODE_LINEAR :
451 CU_TR_FILTER_MODE_POINT,
452 .flags =
s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
453 };
454
455 CUDA_RESOURCE_DESC res_desc = {
456 .resType = CU_RESOURCE_TYPE_PITCH2D,
457 .res.pitch2D.format =
s->in_plane_depths[
i] <= 8 ?
458 CU_AD_FORMAT_UNSIGNED_INT8 :
459 CU_AD_FORMAT_UNSIGNED_INT16,
460 .res.pitch2D.numChannels =
s->in_plane_channels[
i],
461 .res.pitch2D.pitchInBytes = in->
linesize[
i],
462 .res.pitch2D.devPtr = (CUdeviceptr)in->
data[
i],
463 };
464
465 if (
i == 1 ||
i == 2) {
468 } else {
469 res_desc.res.pitch2D.width = in->
width;
470 res_desc.res.pitch2D.height = in->
height;
471 }
472
475 goto exit;
476 }
477
478 // scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
483 goto exit;
484
485 if (
s->out_planes > 1) {
486 // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
495 goto exit;
496 }
497
498 exit:
499 for (
i = 0;
i <
s->in_planes;
i++)
501 CHECK_CU(cu->cuTexObjectDestroy(tex[
i]));
502
504
506 }
507
509 {
514
518
523
526
527 s->frame->width = outlink->
w;
528 s->frame->height = outlink->
h;
529
533
534 return 0;
535 }
536
538 {
542 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
543
547
550
555 }
556
557 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
560
562
566
567 av_reduce(&
out->sample_aspect_ratio.num, &
out->sample_aspect_ratio.den,
570 INT_MAX);
571
578 }
579
581 {
583
584 return s->passthrough ?
587 }
588
589 #define OFFSET(x) offsetof(CUDAScaleContext, x)
590 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
600 {
"passthrough",
"Do not process frames at all if parameters match",
OFFSET(passthrough),
AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1,
FLAGS },
602 {
"force_original_aspect_ratio",
"decrease or increase w/h if necessary to keep the original AR",
OFFSET(force_original_aspect_ratio),
AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2,
FLAGS, .unit =
"force_oar" },
606 {
"force_divisible_by",
"enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used",
OFFSET(force_divisible_by),
AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256,
FLAGS },
608 };
609
615 };
616
618 {
623 },
624 };
625
627 {
631 },
632 };
633
635 .
name =
"scale_cuda",
637
640
643
646
648
650 };