1 /*
2 * HW decode acceleration through NVDEC
3 *
4 * Copyright (c) 2016 Anton Khirnov
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24 #include "config_components.h"
25
33
38
39 #if !NVDECAPI_CHECK_VERSION(9, 0)
40 #define cudaVideoSurfaceFormat_YUV444 2
41 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
42 #endif
43
46
51
55
60
61 #define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
62
64 {
65 switch (id) {
66 #if CONFIG_AV1_NVDEC_HWACCEL
68 #endif
79 }
80 return -1;
81 }
82
84 {
85 int shift_h = 0, shift_v = 0;
86
88 return cudaVideoChromaFormat_Monochrome;
89
91
92 if (shift_h == 1 && shift_v == 1)
93 return cudaVideoChromaFormat_420;
94 else if (shift_h == 1 && shift_v == 0)
95 return cudaVideoChromaFormat_422;
96 else if (shift_h == 0 && shift_v == 0)
97 return cudaVideoChromaFormat_444;
98
99 return -1;
100 }
101
103 CUVIDDECODECREATEINFO *params, void *logctx)
104 {
106 CUVIDDECODECAPS caps = { 0 };
107
108 caps.eCodecType = params->CodecType;
109 caps.eChromaFormat = params->ChromaFormat;
110 caps.nBitDepthMinus8 = params->bitDepthMinus8;
111
112 if (!
decoder->cvdl->cuvidGetDecoderCaps) {
113 av_log(logctx,
AV_LOG_WARNING,
"Used Nvidia driver is too old to perform a capability check.\n");
115 #if defined(_WIN32) || defined(__CYGWIN__)
116 "378.66"
117 #else
118 "378.13"
119 #endif
120 ". Continuing blind.\n");
121 return 0;
122 }
123
127
130 caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
132 caps.nMinWidth, caps.nMaxWidth);
134 caps.nMinHeight, caps.nMaxHeight);
135
136 if (!caps.bIsSupported) {
139 }
140
141 if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
143 (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
145 }
146
147 if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
149 (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
151 }
152
153 if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
155 (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
157 }
158
159 return 0;
160 }
161
163 {
165
167 void *logctx =
decoder->hw_device_ref->data;
172 }
173
176
177 cuvid_free_functions(&
decoder->cvdl);
178
180 }
181
183 CUVIDDECODECREATEINFO *params, void *logctx)
184 {
187
190
193
197
200 if (!decoder_ref) {
203 }
204
209 }
210 decoder->cuda_ctx = device_hwctx->cuda_ctx;
211 decoder->cudl = device_hwctx->internal->cuda_dl;
212 decoder->stream = device_hwctx->stream;
213
214 ret = cuvid_load_functions(&
decoder->cvdl, logctx);
218 }
219
223
228 }
229
231
233
236 }
237
239
240 return 0;
244 }
245
247 {
250
253
257
259
261 }
262
264 {
266
269 ctx->bitstream_len = 0;
270 ctx->bitstream_allocated = 0;
271
274 ctx->slice_offsets_allocated = 0;
275
278
279 return 0;
280 }
281
283 {
285 }
286
288 {
290 }
291
293 {
296
300 out_frames_ref);
303
305
307 // Copied from ff_decode_get_hw_frames_ctx for compatibility
309
312
313 if (!frames_ctx->
pool) {
316 }
317 } else {
318 // This is normally not used to actually allocate frames from
320 }
321
326 }
327
328 return 0;
329 }
330
332 {
334
340
341 CUVIDDECODECREATEINFO params = { 0 };
342
343 cudaVideoSurfaceFormat output_format;
344 int cuvid_codec_type, cuvid_chroma_format, chroma_444;
346
348 if (!sw_desc)
350
352 if (cuvid_codec_type < 0) {
355 }
356
358 if (cuvid_chroma_format < 0) {
361 }
362 chroma_444 =
ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
363
368
372 } else {
374 if (!real_hw_frames_ref)
376 }
377
379 case 8:
381 cudaVideoSurfaceFormat_NV12;
382 break;
383 case 10:
384 case 12:
386 cudaVideoSurfaceFormat_P016;
387 break;
388 default:
392 }
393
395
400 params.bitDepthMinus8 = sw_desc->
comp[0].
depth - 8;
401 params.OutputFormat = output_format;
402 params.CodecType = cuvid_codec_type;
403 params.ChromaFormat = cuvid_chroma_format;
406
409 if (params.ulNumDecodeSurfaces > 32) {
410 av_log(avctx,
AV_LOG_WARNING,
"Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
411 (int)params.ulNumDecodeSurfaces);
414 }
417 }
418
420 decoder->real_hw_frames_ref = real_hw_frames_ref;
421 real_hw_frames_ref =
NULL;
422
424 if (!pool) {
427 }
429
432 if (!
ctx->decoder_pool) {
435 }
436
437 return 0;
441 }
442
444 {
446
447 if (!cf)
448 return;
449
453
455 }
456
458 {
461 void *logctx =
decoder->hw_device_ref->data;
462 CUdeviceptr devptr = (CUdeviceptr)opaque;
465
469
471
473
479 }
480
482 {
486
488
489 CUVIDPROCPARAMS vpp = { 0 };
491
493 CUdeviceptr devptr;
494
495 unsigned int pitch,
i;
497 int shift_h = 0, shift_v = 0;
499
500 vpp.progressive_frame = 1;
501 vpp.output_stream =
decoder->stream;
502
506
508 cf->idx, &devptr,
509 &pitch, &vpp));
512
514 if (!unmap_data) {
516 goto copy_fail;
517 }
518
522 if (!
frame->buf[1]) {
524 goto copy_fail;
525 }
526
529 if (!
frame->hw_frames_ctx) {
531 goto copy_fail;
532 }
533
534 unmap_data->idx = cf->idx;
537
539 for (
i = 0;
frame->linesize[
i];
i++) {
541 frame->linesize[
i] = pitch;
543 }
544
546
547 copy_fail:
548 if (!
frame->buf[1]) {
551 } else {
553 }
554
558 }
559
561 {
566
567 ctx->bitstream_len = 0;
569
571 return 0;
572
574 if (!cf)
576
578 if (!cf->decoder_ref) {
581 }
582
584 if (!cf->idx_ref) {
588 }
589 cf->ref_idx = cf->idx = *(unsigned int*)cf->idx_ref->data;
590
594
595 return 0;
599
600 }
601
603 {
608
612
614
615 if (has_sep_ref) {
616 if (!cf->ref_idx_ref) {
618 if (!cf->ref_idx_ref) {
622 }
623 }
624 cf->ref_idx = *(unsigned int*)cf->ref_idx_ref->data;
625 } else {
627 cf->ref_idx = cf->idx;
628 }
629
630 return 0;
634 }
635
637 {
640 void *logctx = avctx;
641 CUVIDPICPARAMS *pp = &
ctx->pic_params;
642
644
646
647 pp->nBitstreamDataLen =
ctx->bitstream_len;
648 pp->pBitstreamData =
ctx->bitstream;
649 pp->nNumSlices =
ctx->nb_slices;
650 pp->pSliceDataOffsets =
ctx->slice_offsets;
651
655
659
662
664 }
665
667 {
672 }
673
676 {
679
681 (
ctx->nb_slices + 1) *
sizeof(*
ctx->slice_offsets));
685
688
692
693 return 0;
694 }
695
699 int supports_444)
700 {
703 int cuvid_codec_type, cuvid_chroma_format, chroma_444;
704
706 if (!sw_desc)
708
710 if (cuvid_codec_type < 0) {
713 }
714
716 if (cuvid_chroma_format < 0) {
719 }
720 chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
721
725 /*
726 * We add two extra frames to the pool to account for deinterlacing filters
727 * holding onto their frames.
728 */
730
731 switch (sw_desc->comp[0].depth) {
732 case 8:
734 break;
735 case 10:
737 break;
738 case 12:
740 break;
741 default:
743 }
744
745 return 0;
746 }
747
749 {
752
754 return -1;
755
758 if (!cf)
759 return -1;
760
762 }