1 /*
2 * HW decode acceleration through NVDEC
3 *
4 * Copyright (c) 2016 Anton Khirnov
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24 #include "config_components.h"
25
34
40
41 #if !NVDECAPI_CHECK_VERSION(9, 0)
42 #define cudaVideoSurfaceFormat_YUV444 2
43 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
44 #endif
45
48
53
56
59
64
65 #define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
66
68 {
69 switch (id) {
70 #if CONFIG_AV1_NVDEC_HWACCEL
72 #endif
83 }
84 return -1;
85 }
86
88 {
89 int shift_h = 0, shift_v = 0;
90
92 return cudaVideoChromaFormat_Monochrome;
93
95
96 if (shift_h == 1 && shift_v == 1)
97 return cudaVideoChromaFormat_420;
98 else if (shift_h == 1 && shift_v == 0)
99 return cudaVideoChromaFormat_422;
100 else if (shift_h == 0 && shift_v == 0)
101 return cudaVideoChromaFormat_444;
102
103 return -1;
104 }
105
107 CUVIDDECODECREATEINFO *params, void *logctx)
108 {
110 CUVIDDECODECAPS caps = { 0 };
111
112 caps.eCodecType = params->CodecType;
113 caps.eChromaFormat = params->ChromaFormat;
114 caps.nBitDepthMinus8 = params->bitDepthMinus8;
115
116 if (!
decoder->cvdl->cuvidGetDecoderCaps) {
117 av_log(logctx,
AV_LOG_WARNING,
"Used Nvidia driver is too old to perform a capability check.\n");
119 #if defined(_WIN32) || defined(__CYGWIN__)
120 "378.66"
121 #else
122 "378.13"
123 #endif
124 ". Continuing blind.\n");
125 return 0;
126 }
127
131
134 caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
136 caps.nMinWidth, caps.nMaxWidth);
138 caps.nMinHeight, caps.nMaxHeight);
139
140 if (!caps.bIsSupported) {
143 }
144
145 if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
147 (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
149 }
150
151 if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
153 (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
155 }
156
157 if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
159 (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
161 }
162
163 return 0;
164 }
165
167 {
169
171 void *logctx =
decoder->hw_device_ref->data;
176 }
177
180
181 cuvid_free_functions(&
decoder->cvdl);
182 }
183
185 CUVIDDECODECREATEINFO *params, void *logctx)
186 {
189
191
194
199
204 }
205 decoder->cuda_ctx = device_hwctx->cuda_ctx;
206 decoder->cudl = device_hwctx->internal->cuda_dl;
207 decoder->stream = device_hwctx->stream;
208
209 ret = cuvid_load_functions(&
decoder->cvdl, logctx);
213 }
214
218
223 }
224
226
228
231 }
232
234
235 return 0;
239 }
240
242 {
244 unsigned int *intp = obj;
245
248
250
251 return 0;
252 }
253
255 {
257 }
258
260 {
262
265 ctx->bitstream_len = 0;
266 ctx->bitstream_allocated = 0;
267
270 ctx->slice_offsets_allocated = 0;
271
274
275 return 0;
276 }
277
279 {
281 }
282
284 {
286 }
287
289 {
292
296 out_frames_ref);
299
301
303 // Copied from ff_decode_get_hw_frames_ctx for compatibility
305
308
309 if (!frames_ctx->
pool) {
312 }
313 } else {
314 // This is normally not used to actually allocate frames from
316 }
317
322 }
323
324 return 0;
325 }
326
328 {
330
336
337 CUVIDDECODECREATEINFO params = { 0 };
338
340 int cuvid_codec_type, cuvid_chroma_format, chroma_444;
342
344
346 if (!sw_desc)
348
350 if (cuvid_codec_type < 0) {
353 }
354
356 if (cuvid_chroma_format < 0) {
359 }
360 chroma_444 =
ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
361
366
370 } else {
372 if (!real_hw_frames_ref)
374 }
375
377 case 8:
379 cudaVideoSurfaceFormat_NV12;
380 break;
381 case 10:
382 case 12:
384 cudaVideoSurfaceFormat_P016;
385 break;
386 default:
390 }
391
393
398 params.bitDepthMinus8 = sw_desc->
comp[0].
depth - 8;
400 params.CodecType = cuvid_codec_type;
401 params.ChromaFormat = cuvid_chroma_format;
404
407 if (params.ulNumDecodeSurfaces > 32) {
408 av_log(avctx,
AV_LOG_WARNING,
"Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
409 (int)params.ulNumDecodeSurfaces);
412 }
415 }
416
418 decoder->unsafe_output = unsafe_output;
419 decoder->real_hw_frames_ref = real_hw_frames_ref;
420 real_hw_frames_ref =
NULL;
421
423 if (!pool) {
426 }
428
432 if (!
ctx->decoder_pool) {
435 }
436
437 return 0;
441 }
442
444 {
446
447 if (!cf)
448 return;
449
453
455 }
456
458 {
461 void *logctx =
decoder->hw_device_ref->data;
462 CUdeviceptr devptr = (CUdeviceptr)opaque;
465
469
471
473
479 }
480
482 {
486
488
489 CUVIDPROCPARAMS vpp = { 0 };
491
493 CUdeviceptr devptr;
494
495 unsigned int pitch,
i;
497 int shift_h = 0, shift_v = 0;
499
500 vpp.progressive_frame = 1;
501 vpp.output_stream =
decoder->stream;
502
506
508 cf->idx, &devptr,
509 &pitch, &vpp));
512
514 if (!unmap_data) {
516 goto copy_fail;
517 }
518
522 if (!
frame->buf[1]) {
524 goto copy_fail;
525 }
526
529 goto copy_fail;
530
531 unmap_data->idx = cf->idx;
534
536 for (
i = 0;
frame->linesize[
i];
i++) {
538 frame->linesize[
i] = pitch;
540 }
541
543
544 copy_fail:
545 if (!
frame->buf[1]) {
548 } else {
550 }
551
554
555 if (ret < 0 || decoder->unsafe_output)
557
559 }
560
562 {
567
568 ctx->bitstream_len = 0;
570
572 return 0;
573
575 if (!cf)
577
579
581 if (!cf->idx_ref) {
585 }
586 cf->ref_idx = cf->idx = *cf->idx_ref;
587
591
592 return 0;
596
597 }
598
600 {
605
609
611
612 if (has_sep_ref) {
613 if (!cf->ref_idx_ref) {
615 if (!cf->ref_idx_ref) {
619 }
620 }
621 cf->ref_idx = *cf->ref_idx_ref;
622 } else {
624 cf->ref_idx = cf->idx;
625 }
626
627 return 0;
631 }
632
634 {
637 void *logctx = avctx;
638 CUVIDPICPARAMS *pp = &
ctx->pic_params;
639
641
643
644 pp->nBitstreamDataLen =
ctx->bitstream_len;
645 pp->pBitstreamData =
ctx->bitstream;
646 pp->nNumSlices =
ctx->nb_slices;
647 pp->pSliceDataOffsets =
ctx->slice_offsets;
648
652
656
659
661 }
662
664 {
668 ctx->bitstream_len = 0;
671 }
672
675 {
678
680 (
ctx->nb_slices + 1) *
sizeof(*
ctx->slice_offsets));
684
687
691
692 return 0;
693 }
694
698 int supports_444)
699 {
702 int cuvid_codec_type, cuvid_chroma_format, chroma_444;
703
705 if (!sw_desc)
707
709 if (cuvid_codec_type < 0) {
712 }
713
715 if (cuvid_chroma_format < 0) {
718 }
719 chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
720
724 /*
725 * We add two extra frames to the pool to account for deinterlacing filters
726 * holding onto their frames.
727 */
729
730 switch (sw_desc->comp[0].depth) {
731 case 8:
733 break;
734 case 10:
736 break;
737 case 12:
739 break;
740 default:
742 }
743
744 return 0;
745 }
746
748 {
751
753 return -1;
754
757 if (!cf)
758 return -1;
759
761 }