1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
24 #include <stdio.h>
25 #include <string.h>
26
35
41
44
54 };
55
56 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
59
60 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
61
62 enum {
64
69
71 };
72
75
77
83
86
89
90 /**
91 * Output sw format. AV_PIX_FMT_NONE for no conversion.
92 */
94
95 char *
w_expr;
///< width expression string
96 char *
h_expr;
///< height expression string
97
100
106
110
113
115 {
117
121
125
126 return 0;
127 }
128
130 {
132
133 if (
s->hwctx &&
s->cu_module) {
134 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
136
137 CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
138 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
141 }
142
146 }
147
149 {
153
155 if (!out_ref)
158
163
167
172
175
177 s->frames_ctx = out_ref;
178
179 return 0;
183 }
184
186 {
188
191 return 1;
192 return 0;
193 }
194
196 {
199
200 s->in_fmt = in_format;
201 s->out_fmt = out_format;
202
207
208 // find maximum step of each component of each plane
209 // For our subset of formats, this should accurately tell us how many channels CUDA needs
210 // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
211
212 for (
i = 0;
i <
s->in_desc->nb_components;
i++) {
213 d = (
s->in_desc->comp[
i].depth + 7) / 8;
214 p =
s->in_desc->comp[
i].plane;
215 s->in_plane_channels[p] =
FFMAX(
s->in_plane_channels[p],
s->in_desc->comp[
i].step /
d);
216
217 s->in_plane_depths[p] =
s->in_desc->comp[
i].depth;
218 }
219 }
220
222 int out_width, int out_height)
223 {
225
227
231
232 /* check that we have a hw context */
233 if (!
ctx->inputs[0]->hw_frames_ctx) {
236 }
240
245 }
250 }
251
253
254 if (
s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
258 } else {
260
264
265 if (in_width == out_width && in_height == out_height &&
268 }
269
271 if (!
ctx->outputs[0]->hw_frames_ctx)
273
274 return 0;
275 }
276
278 {
280 CUcontext
dummy, cuda_ctx =
s->hwctx->cuda_ctx;
281 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
282 char buf[128];
284
287
288 const char *function_infix = "";
289
290 extern const unsigned char ff_vf_scale_cuda_ptx_data[];
291 extern const unsigned int ff_vf_scale_cuda_ptx_len;
292
293 switch(
s->interp_algo) {
295 function_infix = "Nearest";
296 s->interp_use_linear = 0;
297 s->interp_as_integer = 1;
298 break;
300 function_infix = "Bilinear";
301 s->interp_use_linear = 1;
302 s->interp_as_integer = 1;
303 break;
306 function_infix = "Bicubic";
307 s->interp_use_linear = 0;
308 s->interp_as_integer = 0;
309 break;
311 function_infix = "Lanczos";
312 s->interp_use_linear = 0;
313 s->interp_as_integer = 0;
314 break;
315 default:
318 }
319
323
325 ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len);
328
329 snprintf(buf,
sizeof(buf),
"Subsample_%s_%s_%s", function_infix, in_fmt_name, out_fmt_name);
330 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func,
s->cu_module, buf));
335 }
336
337 snprintf(buf,
sizeof(buf),
"Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
338 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uv,
s->cu_module, buf));
341
344
346 }
347
349 {
357
358 s->hwctx = device_hwctx;
359 s->cu_stream =
s->hwctx->stream;
360
362 s->w_expr,
s->h_expr,
366
368 s->force_original_aspect_ratio,
s->force_divisible_by);
369
370 if (((int64_t)
h *
inlink->w) > INT_MAX ||
371 ((int64_t)
w *
inlink->h) > INT_MAX)
373
376
380
381 if (
inlink->sample_aspect_ratio.num) {
384 inlink->sample_aspect_ratio);
385 } else {
387 }
388
392 s->passthrough ?
" (passthrough)" :
"");
393
397
398 return 0;
399
402 }
403
405 CUtexObject src_tex[4], int src_width, int src_height,
406 AVFrame *out_frame,
int dst_width,
int dst_height,
int dst_pitch)
407 {
409 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
410
411 CUdeviceptr dst_devptr[4] = {
412 (CUdeviceptr)out_frame->
data[0], (CUdeviceptr)out_frame->
data[1],
413 (CUdeviceptr)out_frame->
data[2], (CUdeviceptr)out_frame->
data[3]
414 };
415
416 void *args_uchar[] = {
417 &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
418 &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
419 &dst_width, &dst_height, &dst_pitch,
420 &src_width, &src_height, &
s->param
421 };
422
426 }
427
430 {
432 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
433 CUcontext
dummy, cuda_ctx =
s->hwctx->cuda_ctx;
435
436 CUtexObject tex[4] = { 0, 0, 0, 0 };
437
441
442 for (
i = 0;
i <
s->in_planes;
i++) {
443 CUDA_TEXTURE_DESC tex_desc = {
444 .filterMode =
s->interp_use_linear ?
445 CU_TR_FILTER_MODE_LINEAR :
446 CU_TR_FILTER_MODE_POINT,
447 .flags =
s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
448 };
449
450 CUDA_RESOURCE_DESC res_desc = {
451 .resType = CU_RESOURCE_TYPE_PITCH2D,
452 .res.pitch2D.format =
s->in_plane_depths[
i] <= 8 ?
453 CU_AD_FORMAT_UNSIGNED_INT8 :
454 CU_AD_FORMAT_UNSIGNED_INT16,
455 .res.pitch2D.numChannels =
s->in_plane_channels[
i],
456 .res.pitch2D.pitchInBytes = in->
linesize[
i],
457 .res.pitch2D.devPtr = (CUdeviceptr)in->
data[
i],
458 };
459
460 if (
i == 1 ||
i == 2) {
463 } else {
464 res_desc.res.pitch2D.width = in->
width;
465 res_desc.res.pitch2D.height = in->
height;
466 }
467
470 goto exit;
471 }
472
473 // scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
478 goto exit;
479
480 if (
s->out_planes > 1) {
481 // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
490 goto exit;
491 }
492
493 exit:
494 for (
i = 0;
i <
s->in_planes;
i++)
496 CHECK_CU(cu->cuTexObjectDestroy(tex[
i]));
497
499
501 }
502
504 {
509
513
518
521
522 s->frame->width = outlink->
w;
523 s->frame->height = outlink->
h;
524
528
529 return 0;
530 }
531
533 {
537 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
538
542
545
550 }
551
552 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
555
557
561
562 av_reduce(&
out->sample_aspect_ratio.num, &
out->sample_aspect_ratio.den,
565 INT_MAX);
566
573 }
574
576 {
578
579 return s->passthrough ?
582 }
583
584 #define OFFSET(x) offsetof(CUDAScaleContext, x)
585 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
595 {
"passthrough",
"Do not process frames at all if parameters match",
OFFSET(passthrough),
AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1,
FLAGS },
597 {
"force_original_aspect_ratio",
"decrease or increase w/h if necessary to keep the original AR",
OFFSET(force_original_aspect_ratio),
AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2,
FLAGS,
"force_oar" },
601 {
"force_divisible_by",
"enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used",
OFFSET(force_divisible_by),
AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256,
FLAGS },
603 };
604
610 };
611
613 {
618 },
619 };
620
622 {
626 },
627 };
628
630 .
name =
"scale_cuda",
632
635
638
641
643
645 };