1 /*
2 * HW decode acceleration through NVDEC
3 *
4 * Copyright (c) 2016 Anton Khirnov
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "config.h"
24 #include "config_components.h"
25
34
40
41 #if !NVDECAPI_CHECK_VERSION(9, 0)
42 #define cudaVideoSurfaceFormat_YUV444 2
43 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
44 #endif
45
48
53
56
59
64
65 #define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
66
68 {
69 switch (id) {
70 #if CONFIG_AV1_NVDEC_HWACCEL
72 #endif
83 }
84 return -1;
85 }
86
88 {
89 int shift_h = 0, shift_v = 0;
90
92 return cudaVideoChromaFormat_Monochrome;
93
95
96 if (shift_h == 1 && shift_v == 1)
97 return cudaVideoChromaFormat_420;
98 else if (shift_h == 1 && shift_v == 0)
99 return cudaVideoChromaFormat_422;
100 else if (shift_h == 0 && shift_v == 0)
101 return cudaVideoChromaFormat_444;
102
103 return -1;
104 }
105
107 CUVIDDECODECREATEINFO *params, void *logctx)
108 {
110 CUVIDDECODECAPS caps = { 0 };
111
112 caps.eCodecType = params->CodecType;
113 caps.eChromaFormat = params->ChromaFormat;
114 caps.nBitDepthMinus8 = params->bitDepthMinus8;
115
116 if (!
decoder->cvdl->cuvidGetDecoderCaps) {
117 av_log(logctx,
AV_LOG_WARNING,
"Used Nvidia driver is too old to perform a capability check.\n");
119 #if defined(_WIN32) || defined(__CYGWIN__)
120 "378.66"
121 #else
122 "378.13"
123 #endif
124 ". Continuing blind.\n");
125 return 0;
126 }
127
131
134 caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
136 caps.nMinWidth, caps.nMaxWidth);
138 caps.nMinHeight, caps.nMaxHeight);
139
140 if (!caps.bIsSupported) {
143 }
144
145 if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
147 (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
149 }
150
151 if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
153 (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
155 }
156
157 if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
159 (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
161 }
162
163 return 0;
164 }
165
167 {
169
171 void *logctx =
decoder->hw_device_ref->data;
176 }
177
180
181 cuvid_free_functions(&
decoder->cvdl);
182 }
183
185 CUVIDDECODECREATEINFO *params, void *logctx)
186 {
189
191
194
199
204 }
205 decoder->cuda_ctx = device_hwctx->cuda_ctx;
206 decoder->cudl = device_hwctx->internal->cuda_dl;
207 decoder->stream = device_hwctx->stream;
208
209 ret = cuvid_load_functions(&
decoder->cvdl, logctx);
213 }
214
218
223 }
224
226
228
231 }
232
234
235 return 0;
239 }
240
242 {
244 unsigned int *intp = obj;
245
248
250
251 return 0;
252 }
253
255 {
257 }
258
260 {
262
265 ctx->bitstream_len = 0;
266 ctx->bitstream_allocated = 0;
267
270 ctx->slice_offsets_allocated = 0;
271
274
275 return 0;
276 }
277
279 {
281 }
282
284 {
286 }
287
289 {
292
296 out_frames_ref);
299
301
303 // Copied from ff_decode_get_hw_frames_ctx for compatibility
305
308
309 if (!frames_ctx->
pool) {
312 }
313 } else {
314 // This is normally not used to actually allocate frames from
316 }
317
322 }
323
324 return 0;
325 }
326
328 {
330
336
337 CUVIDDECODECREATEINFO params = { 0 };
338
340 int cuvid_codec_type, cuvid_chroma_format, chroma_444;
342
344
346 if (!sw_desc)
348
350 if (cuvid_codec_type < 0) {
353 }
354
356 if (cuvid_chroma_format < 0) {
359 }
360 chroma_444 =
ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
361
366
370 } else {
372 if (!real_hw_frames_ref)
374 }
375
377 case 8:
378 if (chroma_444) {
380 #ifdef NVDEC_HAVE_422_SUPPORT
381 } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
383 #endif
384 } else {
386 }
387 break;
388 case 10:
389 case 12:
390 if (chroma_444) {
392 #ifdef NVDEC_HAVE_422_SUPPORT
393 } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
395 #endif
396 } else {
398 }
399 break;
400 default:
404 }
405
407
412 params.bitDepthMinus8 = sw_desc->
comp[0].
depth - 8;
414 params.CodecType = cuvid_codec_type;
415 params.ChromaFormat = cuvid_chroma_format;
418
421 if (params.ulNumDecodeSurfaces > 32) {
422 av_log(avctx,
AV_LOG_WARNING,
"Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
423 (int)params.ulNumDecodeSurfaces);
426 }
429 }
430
432 decoder->unsafe_output = unsafe_output;
433 decoder->real_hw_frames_ref = real_hw_frames_ref;
434 real_hw_frames_ref =
NULL;
435
437 if (!pool) {
440 }
442
446 if (!
ctx->decoder_pool) {
449 }
450
451 return 0;
455 }
456
458 {
460
461 if (!cf)
462 return;
463
467
469 }
470
472 {
475 void *logctx =
decoder->hw_device_ref->data;
476 CUdeviceptr devptr = (CUdeviceptr)opaque;
479
483
485
487
493 }
494
496 {
500
502
503 CUVIDPROCPARAMS vpp = { 0 };
505
507 CUdeviceptr devptr;
508
509 unsigned int pitch,
i;
511 int shift_h = 0, shift_v = 0;
513
514 vpp.progressive_frame = 1;
515 vpp.output_stream =
decoder->stream;
516
520
523 &pitch, &vpp));
526
528 if (!unmap_data) {
530 goto copy_fail;
531 }
532
536 if (!
frame->buf[1]) {
538 goto copy_fail;
539 }
540
543 goto copy_fail;
544
545 unmap_data->idx = cf->
idx;
548
550 for (
i = 0;
frame->linesize[
i];
i++) {
552 frame->linesize[
i] = pitch;
554 }
555
557
558 copy_fail:
559 if (!
frame->buf[1]) {
562 } else {
564 }
565
568
569 if (ret < 0 || decoder->unsafe_output)
571
573 }
574
576 {
581
582 ctx->bitstream_len = 0;
584
586 return 0;
587
589 if (!cf)
591
593
599 }
601
605
606 return 0;
610
611 }
612
614 {
619
623
625
626 if (has_sep_ref) {
633 }
634 }
636 } else {
639 }
640
641 return 0;
645 }
646
648 {
651 void *logctx = avctx;
652 CUVIDPICPARAMS *pp = &
ctx->pic_params;
653
655
657
658 pp->nBitstreamDataLen =
ctx->bitstream_len;
659 pp->pBitstreamData =
ctx->bitstream;
660 pp->nNumSlices =
ctx->nb_slices;
661 pp->pSliceDataOffsets =
ctx->slice_offsets;
662
666
670
673
675 }
676
678 {
682 ctx->bitstream_len = 0;
685 }
686
689 {
692
694 (
ctx->nb_slices + 1) *
sizeof(*
ctx->slice_offsets));
698
701
705
706 return 0;
707 }
708
712 int supports_444)
713 {
716 int cuvid_codec_type, cuvid_chroma_format, chroma_444;
717
719 if (!sw_desc)
721
723 if (cuvid_codec_type < 0) {
726 }
727
729 if (cuvid_chroma_format < 0) {
732 }
733 chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
734
738 /*
739 * We add two extra frames to the pool to account for deinterlacing filters
740 * holding onto their frames.
741 */
743
744 switch (sw_desc->comp[0].depth) {
745 case 8:
746 if (chroma_444) {
748 #ifdef NVDEC_HAVE_422_SUPPORT
749 } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
751 #endif
752 } else {
754 }
755 break;
756 case 10:
757 if (chroma_444) {
758 #if FF_API_NVDEC_OLD_PIX_FMTS
760 #else
762 #endif
763 #ifdef NVDEC_HAVE_422_SUPPORT
764 } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
766 #endif
767 } else {
769 }
770 break;
771 case 12:
772 if (chroma_444) {
773 #if FF_API_NVDEC_OLD_PIX_FMTS
775 #else
777 #endif
778 #ifdef NVDEC_HAVE_422_SUPPORT
779 } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
780 #if FF_API_NVDEC_OLD_PIX_FMTS
782 #else
784 #endif
785 #endif
786 } else {
787 #if FF_API_NVDEC_OLD_PIX_FMTS
789 #else
791 #endif
792 }
793 break;
794 default:
796 }
797
798 return 0;
799 }
800
802 {
805
807 return -1;
808
809 fdd =
frame->private_ref;
812 return -1;
813
815 }