1 /*
2 * H.264 hardware encoding using nvidia nvenc
3 * Copyright (c) 2014 Timo Rothenpieler <timo@rothenpieler.org>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #if defined(_WIN32)
23 #include <windows.h>
24 #else
25 #include <dlfcn.h>
26 #endif
27
28 #include <nvEncodeAPI.h>
29
38
39 #if defined(_WIN32)
40 #define CUDAAPI __stdcall
41 #else
43 #endif
44
45 #if defined(_WIN32)
46 #define LOAD_FUNC(l, s) GetProcAddress(l, s)
47 #define DL_CLOSE_FUNC(l) FreeLibrary(l)
48 #else
49 #define LOAD_FUNC(l, s) dlsym(l, s)
50 #define DL_CLOSE_FUNC(l) dlclose(l)
51 #endif
52
53 typedef enum cudaError_enum {
58
67
69
70 #if NVENCAPI_MAJOR_VERSION < 5
71 static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 } };
72 #endif
73
75 {
79
81
84
86 {
89
91
94
96 {
97 union {
100 };
102
104 {
106
111
113 {
122
126
127 #if defined(_WIN32)
130 #else
133 #endif
135
137 {
139
141
145
149
154
156
162
164 {
166 uint32_t read_pos;
167
171
174
175 /* Size always is a multiple of two */
176 mask = queue->
size - 1;
177 read_pos = (queue->
pos - queue->
count) & mask;
179
180 return &queue->
data[read_pos];
181 }
182
184 {
188
190 /* size always has to be a multiple of two */
194
196
200 }
201 }
202
208
211
214
216 *queue = new_queue;
217 }
218
219 mask = queue->
size - 1;
220
222 queue->
pos = (queue->
pos + 1) & mask;
224
225 return 0;
226 }
227
229 {
232
234 }
235
237 {
239
240 if (!res)
242
244 }
245
247 {
250
252 }
253
255 {
257
258 if (!res)
260
262 }
263
264 #define CHECK_LOAD_FUNC(t, f, s) \
265 do { \
266 (f) = (t)LOAD_FUNC(dl_fn->cuda_lib, s); \
267 if (!(f)) { \
268 av_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
269 goto error; \
270 } \
271 } while (0)
272
274 {
277
279 return 1;
280
281 #if defined(_WIN32)
283 #else
284 dl_fn->
cuda_lib = dlopen(
"libcuda.so", RTLD_LAZY);
285 #endif
286
289 goto error;
290 }
291
300
301 return 1;
302
303 error:
304
307
309
310 return 0;
311 }
312
314 {
317 return 0;
318 }
319 return 1;
320 }
321 #define check_cuda_errors(f) if (!check_cuda_errors(avctx, f, #f)) goto error
322
324 {
325 int device_count = 0;
327 char gpu_name[128];
328 int smminor = 0, smmajor = 0;
329 int i, smver;
330
333
335 return 0;
336
338 return 1;
339
341
343
344 if (!device_count) {
346 goto error;
347 }
348
350
352
353 for (i = 0; i < device_count; ++i) {
357
358 smver = (smmajor << 4) | smminor;
359
360 av_log(avctx,
AV_LOG_VERBOSE,
"[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, smmajor, smminor, (smver >= 0x30) ?
"Available" :
"Not Available");
361
362 if (smver >= 0x30)
364 }
365
368 goto error;
369 }
370
371 return 1;
372
373 error:
374
376
377 return 0;
378 }
379
381 {
383 NVENCSTATUS nvstatus;
384
387
389 return 0;
390
392 return 1;
393
394 #if defined(_WIN32)
395 if (sizeof(void*) == 8) {
397 } else {
399 }
400 #else
401 dl_fn->
nvenc_lib = dlopen(
"libnvidia-encode.so.1", RTLD_LAZY);
402 #endif
403
406 goto error;
407 }
408
410
411 if (!nvEncodeAPICreateInstance) {
413 goto error;
414 }
415
416 dl_fn->
nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
417
418 nvstatus = nvEncodeAPICreateInstance(&dl_fn->
nvenc_funcs);
419
420 if (nvstatus != NV_ENC_SUCCESS) {
422 goto error;
423 }
424
426
427 return 1;
428
429 error:
432
434
435 return 0;
436 }
437
439 {
442
445
447
450
459
461 }
462
464 {
465 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_session_params = { 0 };
466 NV_ENC_PRESET_CONFIG preset_config = { 0 };
469 GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
470 NVENCSTATUS nv_status = NV_ENC_SUCCESS;
471 int surfaceCount = 0;
472 int i, num_mbs;
473 int isLL = 0;
474 int res = 0;
475 int dw, dh;
476
477 #if NVENCAPI_MAJOR_VERSION < 5
479 #endif
480
483 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
484
487
491 goto error;
492 }
493
495
498 preset_config.version = NV_ENC_PRESET_CONFIG_VER;
499 preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
500 encode_session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
501 encode_session_params.apiVersion = NVENCAPI_VERSION;
502
503 #if NVENCAPI_MAJOR_VERSION < 5
504 encode_session_params.clientKeyPtr = &license;
505 #endif
506
510 goto error;
511 }
512
515
517 av_log(avctx,
AV_LOG_FATAL,
"Failed creating CUDA context for NVENC: 0x%x\n", (
int)cu_res);
519 goto error;
520 }
521
523
527 goto error;
528 }
529
530 encode_session_params.device = ctx->
cu_context;
531 encode_session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
532
533 nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->
nvencoder);
534 if (nv_status != NV_ENC_SUCCESS) {
536 av_log(avctx,
AV_LOG_FATAL,
"OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (
int)nv_status);
538 goto error;
539 }
540
542 if (!strcmp(ctx->
preset,
"hp")) {
543 encoder_preset = NV_ENC_PRESET_HP_GUID;
544 }
else if (!strcmp(ctx->
preset,
"hq")) {
545 encoder_preset = NV_ENC_PRESET_HQ_GUID;
546 }
else if (!strcmp(ctx->
preset,
"bd")) {
547 encoder_preset = NV_ENC_PRESET_BD_GUID;
548 }
else if (!strcmp(ctx->
preset,
"ll")) {
549 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
550 isLL = 1;
551 }
else if (!strcmp(ctx->
preset,
"llhp")) {
552 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
553 isLL = 1;
554 }
else if (!strcmp(ctx->
preset,
"llhq")) {
555 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
556 isLL = 1;
557 }
else if (!strcmp(ctx->
preset,
"default")) {
558 encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
559 } else {
560 av_log(avctx,
AV_LOG_FATAL,
"Preset \"%s\" is unknown! Supported presets: hp, hq, bd, ll, llhp, llhq, default\n", ctx->
preset);
562 goto error;
563 }
564 }
565
566 nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->
nvencoder, NV_ENC_CODEC_H264_GUID, encoder_preset, &preset_config);
567 if (nv_status != NV_ENC_SUCCESS) {
570 goto error;
571 }
572
576
582 1024 * 1024);
585 } else {
588 }
589
590 // De-compensate for hardware, dubiously, trying to compensate for
591 // playback at 704 pixel width.
592 if (avctx->
width == 720 &&
597 1024 * 1204);
600 }
601
604
605 num_mbs = ((avctx->
width + 15) >> 4) * ((avctx->
height + 15) >> 4);
607
610
612
616
617 if (avctx->
refs >= 0) {
618 /* 0 means "let the hardware decide" */
620 }
621
624 /* 0 is intra-only, 1 is I/P only, 2 is one B Frame, 3 two B frames, and so on. */
626 }
627
633 ctx->
encode_config.encodeCodecConfig.h264Config.idrPeriod = 1;
634 }
635
636 /* when there're b frames, set dts offset */
639
642
645
648 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
649 }
else if (ctx->
twopass == 1 || isLL) {
650 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
651
652 ctx->
encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
653 ctx->
encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
654
655 if (!isLL)
656 av_log(avctx,
AV_LOG_WARNING,
"Twopass mode is only known to work with low latency (ll, llhq, llhp) presets.\n");
657 } else {
658 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
659 }
661 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
665
668 }
else if (avctx->
qmin >= 0 && avctx->
qmax >= 0) {
669 ctx->
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
670
673
677
681 }
682
685
687 ctx->
encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
688 } else {
689 ctx->
encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
690 }
691
694 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
695 break;
697 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
698 break;
701 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
702 break;
703 default:
705 ctx->
encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
706 break;
707 }
708
709 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
710 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
711
714 ctx->
encode_config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->
color_trc;
715
717
720
722 if (nv_status != NV_ENC_SUCCESS) {
725 goto error;
726 }
727
729
732 goto error;
733 }
734
736
739 goto error;
740 }
741
743 NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
744 NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
745 allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
746 allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
747
748 allocSurf.width = (avctx->
width + 31) & ~31;
749 allocSurf.height = (avctx->
height + 31) & ~31;
750
751 allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
752
755 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
756 break;
757
759 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
760 break;
761
763 allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
764 break;
765
766 default:
769 goto error;
770 }
771
772 nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->
nvencoder, &allocSurf);
773 if (nv_status = NV_ENC_SUCCESS){
776 goto error;
777 }
778
784
785 /* 1MB is large enough to hold most output frames. NVENC increases this automaticaly if it's not enough. */
786 allocOut.size = 1024 * 1024;
787
788 allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
789
790 nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->
nvencoder, &allocOut);
791 if (nv_status = NV_ENC_SUCCESS) {
795 goto error;
796 }
797
801 }
802
804 uint32_t outSize = 0;
805 char tmpHeader[256];
806 NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
807 payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
808
809 payload.spsppsBuffer = tmpHeader;
810 payload.inBufferSize = sizeof(tmpHeader);
811 payload.outSPSPPSPayloadSize = &outSize;
812
813 nv_status = p_nvenc->nvEncGetSequenceParams(ctx->
nvencoder, &payload);
814 if (nv_status != NV_ENC_SUCCESS) {
816 goto error;
817 }
818
821
824 goto error;
825 }
826
827 memcpy(avctx->
extradata, tmpHeader, outSize);
828 }
829
832
835
836 return 0;
837
838 error:
839
840 for (i = 0; i < surfaceCount; ++i) {
844 }
845
847 p_nvenc->nvEncDestroyEncoder(ctx->
nvencoder);
848
851
853
855
858
859 return res;
860 }
861
863 {
866 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
867 int i;
868
872
876 }
878
879 p_nvenc->nvEncDestroyEncoder(ctx->
nvencoder);
881
884
886
888
889 return 0;
890 }
891
893 {
896 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
897
898 uint32_t *slice_offsets =
av_mallocz(ctx->
encode_config.encodeCodecConfig.h264Config.sliceModeData *
sizeof(*slice_offsets));
899 NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
900 NVENCSTATUS nv_status;
901 int res = 0;
902
903 if (!slice_offsets)
905
906 lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
907
908 lock_params.doNotWait = 0;
910 lock_params.sliceOffsets = slice_offsets;
911
912 nv_status = p_nvenc->nvEncLockBitstream(ctx->
nvencoder, &lock_params);
913 if (nv_status != NV_ENC_SUCCESS) {
916 goto error;
917 }
918
921 goto error;
922 }
923
924 memcpy(pkt->
data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
925
927 if (nv_status != NV_ENC_SUCCESS)
928 av_log(avctx,
AV_LOG_ERROR,
"Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
929
930 switch (lock_params.pictureType) {
931 case NV_ENC_PIC_TYPE_IDR:
933 case NV_ENC_PIC_TYPE_I:
935 break;
936 case NV_ENC_PIC_TYPE_P:
938 break;
939 case NV_ENC_PIC_TYPE_B:
941 break;
942 case NV_ENC_PIC_TYPE_BI:
944 break;
945 default:
946 av_log(avctx,
AV_LOG_ERROR,
"Unknown picture type encountered, expect the output to be broken.\n");
947 av_log(avctx,
AV_LOG_ERROR,
"Please report this error and include as much information on how to reproduce it as possible.\n");
949 goto error;
950 }
951
952 pkt->
pts = lock_params.outputTimeStamp;
954
955 /* when there're b frame(s), set dts offset */
958
961
964
966
968
969 return 0;
970
971 error:
972
975
976 return res;
977 }
978
981 {
982 NVENCSTATUS nv_status;
984 int res, i = 0;
985
988 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->
nvenc_funcs;
989
990 NV_ENC_PIC_PARAMS pic_params = { 0 };
991 pic_params.version = NV_ENC_PIC_PARAMS_VER;
992
993 if (frame) {
994 NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
996
1000 break;
1001 }
1002 }
1003
1005
1007
1008 lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1010
1011 nv_status = p_nvenc->nvEncLockInputBuffer(ctx->
nvencoder, &lockBufferParams);
1012 if (nv_status != NV_ENC_SUCCESS) {
1014 return 0;
1015 }
1016
1018 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1019
1023
1024 buf += inSurf->
height * lockBufferParams.pitch;
1025
1029
1030 buf += (inSurf->
height * lockBufferParams.pitch) >> 2;
1031
1036 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1037
1041
1042 buf += inSurf->
height * lockBufferParams.pitch;
1043
1048 uint8_t *
buf = lockBufferParams.bufferDataPtr;
1049
1053
1054 buf += inSurf->
height * lockBufferParams.pitch;
1055
1059
1060 buf += inSurf->
height * lockBufferParams.pitch;
1061
1065 } else {
1068 }
1069
1071 if (nv_status != NV_ENC_SUCCESS) {
1074 }
1075
1078 break;
1079
1084 }
1085
1087
1089 pic_params.bufferFmt = inSurf->
format;
1090 pic_params.inputWidth = avctx->
width;
1091 pic_params.inputHeight = avctx->
height;
1093 pic_params.completionEvent = 0;
1094
1097 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
1098 } else {
1099 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
1100 }
1101 } else {
1102 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
1103 }
1104
1105 pic_params.encodePicFlags = 0;
1106 pic_params.inputTimeStamp = frame->
pts;
1107 pic_params.inputDuration = 0;
1108 pic_params.codecPicParams.h264PicParams.sliceMode = ctx->
encode_config.encodeCodecConfig.h264Config.sliceMode;
1109 pic_params.codecPicParams.h264PicParams.sliceModeData = ctx->
encode_config.encodeCodecConfig.h264Config.sliceModeData;
1110
1111 #if NVENCAPI_MAJOR_VERSION < 5
1112 memcpy(&pic_params.rcParams, &ctx->
encode_config.rcParams,
sizeof(NV_ENC_RC_PARAMS));
1113 #endif
1114
1116
1117 if (res)
1118 return res;
1119 } else {
1120 pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
1121 }
1122
1123 nv_status = p_nvenc->nvEncEncodePicture(ctx->
nvencoder, &pic_params);
1124
1125 if (frame && nv_status == NV_ENC_ERR_NEED_MORE_INPUT) {
1127
1128 if (res)
1129 return res;
1130
1132 }
1133
1134 if (nv_status != NV_ENC_SUCCESS && nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
1137 }
1138
1139 if (nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
1143
1144 if (res)
1145 return res;
1146 }
1147
1148 if (frame) {
1150
1151 if (res)
1152 return res;
1153
1155 }
1156 }
1157
1160
1162
1163 if (res)
1164 return res;
1165
1166 tmpoutsurf->
busy = 0;
1169
1170 *got_packet = 1;
1171 } else {
1172 *got_packet = 0;
1173 }
1174
1175 return 0;
1176 }
1177
1181 };
1182
1183 #define OFFSET(x) offsetof(NvencContext, x)
1184 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1186 {
"preset",
"Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)",
OFFSET(
preset),
AV_OPT_TYPE_STRING, { .str =
"hq" }, 0, 0,
VE },
1188 {
"2pass",
"Use 2pass cbr encoding mode (low latency mode only)",
OFFSET(twopass),
AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1,
VE },
1189 {
"gpu",
"Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",
OFFSET(gpu),
AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX,
VE },
1191 };
1192
1198 };
1199
1201 { "b", "0" },
1202 { "qmin", "-1" },
1203 { "qmax", "-1" },
1204 { "qdiff", "-1" },
1205 { "qblur", "-1" },
1206 { "qcomp", "-1" },
1208 };
1209
1220 .priv_class = &nvenc_class,
1223 };