1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
24 #include <stdio.h>
25
33
38
41
53 };
54
55 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
58
59 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
60
61 enum {
63
68
70 };
71
74
76
82
85
88
89 /**
90 * Output sw format. AV_PIX_FMT_NONE for no conversion.
91 */
93
94 char *
w_expr;
///< width expression string
95 char *
h_expr;
///< height expression string
96
99
105
109
112
114 {
116
120
124
125 return 0;
126 }
127
129 {
131
132 if (
s->hwctx &&
s->cu_module) {
133 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
135
136 CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
137 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
140 }
141
145 }
146
148 {
152
154 if (!out_ref)
157
162
166
171
174
176 s->frames_ctx = out_ref;
177
178 return 0;
182 }
183
185 {
187
190 return 1;
191 return 0;
192 }
193
195 {
198
199 s->in_fmt = in_format;
200 s->out_fmt = out_format;
201
206
207 // find maximum step of each component of each plane
208 // For our subset of formats, this should accurately tell us how many channels CUDA needs
209 // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
210
211 for (
i = 0;
i <
s->in_desc->nb_components;
i++) {
212 d = (
s->in_desc->comp[
i].depth + 7) / 8;
213 p =
s->in_desc->comp[
i].plane;
214 s->in_plane_channels[p] =
FFMAX(
s->in_plane_channels[p],
s->in_desc->comp[
i].step /
d);
215
216 s->in_plane_depths[p] =
s->in_desc->comp[
i].depth;
217 }
218 }
219
221 int out_width, int out_height)
222 {
224
226
230
231 /* check that we have a hw context */
232 if (!
ctx->inputs[0]->hw_frames_ctx) {
235 }
239
244 }
249 }
250
252
253 if (
s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
257 } else {
259
263
264 if (in_width == out_width && in_height == out_height &&
267 }
268
270 if (!
ctx->outputs[0]->hw_frames_ctx)
272
273 return 0;
274 }
275
277 {
279 CUcontext
dummy, cuda_ctx =
s->hwctx->cuda_ctx;
280 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
281 char buf[128];
283
286
287 const char *function_infix = "";
288
289 extern const unsigned char ff_vf_scale_cuda_ptx_data[];
290 extern const unsigned int ff_vf_scale_cuda_ptx_len;
291
292 switch(
s->interp_algo) {
294 function_infix = "Nearest";
295 s->interp_use_linear = 0;
296 s->interp_as_integer = 1;
297 break;
299 function_infix = "Bilinear";
300 s->interp_use_linear = 1;
301 s->interp_as_integer = 1;
302 break;
305 function_infix = "Bicubic";
306 s->interp_use_linear = 0;
307 s->interp_as_integer = 0;
308 break;
310 function_infix = "Lanczos";
311 s->interp_use_linear = 0;
312 s->interp_as_integer = 0;
313 break;
314 default:
317 }
318
322
324 ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len);
327
328 snprintf(buf,
sizeof(buf),
"Subsample_%s_%s_%s", function_infix, in_fmt_name, out_fmt_name);
329 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func,
s->cu_module, buf));
334 }
335
336 snprintf(buf,
sizeof(buf),
"Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
337 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uv,
s->cu_module, buf));
340
343
345 }
346
348 {
356
357 s->hwctx = device_hwctx;
358 s->cu_stream =
s->hwctx->stream;
359
361 s->w_expr,
s->h_expr,
365
367 s->force_original_aspect_ratio,
s->force_divisible_by);
368
372
375
379
380 if (
inlink->sample_aspect_ratio.num) {
383 inlink->sample_aspect_ratio);
384 } else {
386 }
387
391 s->passthrough ?
" (passthrough)" :
"");
392
396
397 return 0;
398
401 }
402
404 CUtexObject src_tex[4], int src_width, int src_height,
405 AVFrame *out_frame,
int dst_width,
int dst_height,
int dst_pitch)
406 {
408 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
409
410 CUdeviceptr dst_devptr[4] = {
411 (CUdeviceptr)out_frame->
data[0], (CUdeviceptr)out_frame->
data[1],
412 (CUdeviceptr)out_frame->
data[2], (CUdeviceptr)out_frame->
data[3]
413 };
414
415 void *args_uchar[] = {
416 &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
417 &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
418 &dst_width, &dst_height, &dst_pitch,
419 &src_width, &src_height, &
s->param
420 };
421
425 }
426
429 {
431 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
432 CUcontext
dummy, cuda_ctx =
s->hwctx->cuda_ctx;
434
435 CUtexObject tex[4] = { 0, 0, 0, 0 };
436
440
441 for (
i = 0;
i <
s->in_planes;
i++) {
442 CUDA_TEXTURE_DESC tex_desc = {
443 .filterMode =
s->interp_use_linear ?
444 CU_TR_FILTER_MODE_LINEAR :
445 CU_TR_FILTER_MODE_POINT,
446 .flags =
s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
447 };
448
449 CUDA_RESOURCE_DESC res_desc = {
450 .resType = CU_RESOURCE_TYPE_PITCH2D,
451 .res.pitch2D.format =
s->in_plane_depths[
i] <= 8 ?
452 CU_AD_FORMAT_UNSIGNED_INT8 :
453 CU_AD_FORMAT_UNSIGNED_INT16,
454 .res.pitch2D.numChannels =
s->in_plane_channels[
i],
455 .res.pitch2D.pitchInBytes = in->
linesize[
i],
456 .res.pitch2D.devPtr = (CUdeviceptr)in->
data[
i],
457 };
458
459 if (
i == 1 ||
i == 2) {
462 } else {
463 res_desc.res.pitch2D.width = in->
width;
464 res_desc.res.pitch2D.height = in->
height;
465 }
466
469 goto exit;
470 }
471
472 // scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
477 goto exit;
478
479 if (
s->out_planes > 1) {
480 // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
489 goto exit;
490 }
491
492 exit:
493 for (
i = 0;
i <
s->in_planes;
i++)
495 CHECK_CU(cu->cuTexObjectDestroy(tex[
i]));
496
498
500 }
501
503 {
508
512
517
520
521 s->frame->width = outlink->
w;
522 s->frame->height = outlink->
h;
523
527
528 return 0;
529 }
530
532 {
536 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
537
541
544
549 }
550
551 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
554
556
560
561 av_reduce(&
out->sample_aspect_ratio.num, &
out->sample_aspect_ratio.den,
564 INT_MAX);
565
572 }
573
575 {
577
578 return s->passthrough ?
581 }
582
583 #define OFFSET(x) offsetof(CUDAScaleContext, x)
584 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
594 {
"passthrough",
"Do not process frames at all if parameters match",
OFFSET(passthrough),
AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1,
FLAGS },
596 {
"force_original_aspect_ratio",
"decrease or increase w/h if necessary to keep the original AR",
OFFSET(force_original_aspect_ratio),
AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2,
FLAGS, .unit =
"force_oar" },
600 {
"force_divisible_by",
"enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used",
OFFSET(force_divisible_by),
AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256,
FLAGS },
602 };
603
609 };
610
612 {
617 },
618 };
619
621 {
625 },
626 };
627
629 .
name =
"scale_cuda",
631
634
637
640
642
644 };