Go to the documentation of this file. 1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
21
28 #include "hevc.h"
30
31 enum {
34 };
35
38
40
42
44
50
52
56
61
66
67
70 {
78 int max_dec_pic_buffering = 0;
80
83
86
87 ptl = &
vps->profile_tier_level;
88 max_dec_pic_buffering =
vps->vps_max_dec_pic_buffering_minus1[0] + 1;
89
90 if (
vps->vps_num_hrd_parameters > 0)
91 hrd = &
vps->hrd_parameters[0];
92
95
96 ptl = &
sps->profile_tier_level;
97 max_dec_pic_buffering =
sps->sps_max_dec_pic_buffering_minus1[0] + 1;
98
99 width =
sps->pic_width_in_luma_samples;
100 height =
sps->pic_height_in_luma_samples;
101
102 if (
sps->vui.vui_hrd_parameters_present_flag)
103 hrd = &
sps->vui.hrd_parameters;
104
107
108 if (
pps->tiles_enabled_flag) {
111 }
112 }
113 }
114
115 if (hrd) {
122 // Adjust for VCL vs. NAL limits.
123 bit_rate = bit_rate * 11 / 10;
124 }
125 }
126
129 max_dec_pic_buffering);
132 "level %s.\n",
desc->name);
133 ctx->level_guess =
desc->level_idc;
134 }
135 }
136
139 {
141
144 if (
ctx->level_guess) {
146 } else {
147 if (!
ctx->level_warned) {
149 "of stream: using level 8.5.\n");
150 ctx->level_warned = 1;
151 }
153 }
154 } else {
156 }
157 }
158 }
159
162 {
164
165 if (
ctx->tick_rate.num &&
ctx->tick_rate.den) {
166 int num, den;
167
169 UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
170
171 vps->vps_time_scale = num;
172 vps->vps_num_units_in_tick = den;
173
174 vps->vps_timing_info_present_flag = 1;
175
176 if (
ctx->num_ticks_poc_diff_one > 0) {
177 vps->vps_num_ticks_poc_diff_one_minus1 =
178 ctx->num_ticks_poc_diff_one - 1;
179 vps->vps_poc_proportional_to_timing_flag = 1;
180 }
else if (
ctx->num_ticks_poc_diff_one == 0) {
181 vps->vps_poc_proportional_to_timing_flag = 0;
182 }
183 }
184
186
187 return 0;
188 }
189
192 {
194 int need_vui = 0;
195 int crop_unit_x, crop_unit_y;
196
197 if (
ctx->sample_aspect_ratio.num &&
ctx->sample_aspect_ratio.den) {
199
201 ctx->sample_aspect_ratio.den, 65535);
202
206 break;
207 }
209 sps->vui.aspect_ratio_idc = 255;
210 sps->vui.sar_width = num;
211 sps->vui.sar_height = den;
212 } else {
213 sps->vui.aspect_ratio_idc =
i;
214 }
215 sps->vui.aspect_ratio_info_present_flag = 1;
216 need_vui = 1;
217 }
218
219 #define SET_OR_INFER(field, value, present_flag, infer) do { \
220 if (value >= 0) { \
221 field = value; \
222 need_vui = 1; \
223 } else if (!present_flag) \
224 field = infer; \
225 } while (0)
226
227 if (
ctx->video_format >= 0 ||
228 ctx->video_full_range_flag >= 0 ||
229 ctx->colour_primaries >= 0 ||
230 ctx->transfer_characteristics >= 0 ||
231 ctx->matrix_coefficients >= 0) {
232
234 sps->vui.video_signal_type_present_flag, 5);
235
237 ctx->video_full_range_flag,
238 sps->vui.video_signal_type_present_flag, 0);
239
240 if (
ctx->colour_primaries >= 0 ||
241 ctx->transfer_characteristics >= 0 ||
242 ctx->matrix_coefficients >= 0) {
243
245 ctx->colour_primaries,
246 sps->vui.colour_description_present_flag, 2);
247
249 ctx->transfer_characteristics,
250 sps->vui.colour_description_present_flag, 2);
251
253 ctx->matrix_coefficients,
254 sps->vui.colour_description_present_flag, 2);
255
256 sps->vui.colour_description_present_flag = 1;
257 }
258 sps->vui.video_signal_type_present_flag = 1;
259 need_vui = 1;
260 }
261
262 if (
ctx->chroma_sample_loc_type >= 0) {
263 sps->vui.chroma_sample_loc_type_top_field =
264 ctx->chroma_sample_loc_type;
265 sps->vui.chroma_sample_loc_type_bottom_field =
266 ctx->chroma_sample_loc_type;
267 sps->vui.chroma_loc_info_present_flag = 1;
268 need_vui = 1;
269 }
270
271 if (
ctx->tick_rate.num &&
ctx->tick_rate.den) {
272 int num, den;
273
275 UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
276
277 sps->vui.vui_time_scale = num;
278 sps->vui.vui_num_units_in_tick = den;
279
280 sps->vui.vui_timing_info_present_flag = 1;
281 need_vui = 1;
282
283 if (
ctx->num_ticks_poc_diff_one > 0) {
284 sps->vui.vui_num_ticks_poc_diff_one_minus1 =
285 ctx->num_ticks_poc_diff_one - 1;
286 sps->vui.vui_poc_proportional_to_timing_flag = 1;
287 }
else if (
ctx->num_ticks_poc_diff_one == 0) {
288 sps->vui.vui_poc_proportional_to_timing_flag = 0;
289 }
290 }
291
292 if (
sps->separate_colour_plane_flag ||
sps->chroma_format_idc == 0) {
293 crop_unit_x = 1;
294 crop_unit_y = 1;
295 } else {
296 crop_unit_x = 1 + (
sps->chroma_format_idc < 3);
297 crop_unit_y = 1 + (
sps->chroma_format_idc < 2);
298 }
299 #define CROP(border, unit) do { \
300 if (ctx->crop_ ## border >= 0) { \
301 if (ctx->crop_ ## border % unit != 0) { \
302 av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
303 "must be a multiple of %d.\n", #border, unit); \
304 return AVERROR(EINVAL); \
305 } \
306 sps->conf_win_ ## border ## _offset = \
307 ctx->crop_ ## border / unit; \
308 sps->conformance_window_flag = 1; \
309 } \
310 } while (0)
312 CROP(right, crop_unit_x);
313 CROP(top, crop_unit_y);
314 CROP(bottom, crop_unit_y);
315 #undef CROP
316
317 if (need_vui)
318 sps->vui_parameters_present_flag = 1;
319
321
322 return 0;
323 }
324
327 {
330
331 // If an AUD is present, it must be the first NAL unit.
335 } else {
338 int pic_type = 0, temporal_id = 8, layer_id = 0;
339
342 if (!nal)
343 continue;
346
351 pic_type < 2)
352 pic_type = 2;
354 pic_type < 1)
355 pic_type = 1;
356 }
357 }
358
361 .nuh_layer_id = layer_id,
362 .nuh_temporal_id_plus1 = temporal_id + 1,
363 };
364 aud->pic_type = pic_type;
365
367 if (err < 0) {
369 return err;
370 }
371 }
372 }
373
376
380 if (err < 0)
381 return err;
382 }
385 if (err < 0)
386 return err;
387 }
388 }
389
390 return 0;
391 }
392
395 .fragment_name = "access unit",
396 .unit_name = "NAL unit",
398 };
399
401 {
403 }
404
405 #define OFFSET(x) offsetof(H265MetadataContext, x)
406 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
410
411 { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
413 { .dbl = 0.0 }, 0, 65535,
FLAGS },
414
415 { "video_format", "Set video format (table E-2)",
417 { .i64 = -1 }, -1, 7,
FLAGS },
418 { "video_full_range_flag", "Set video full range flag",
420 { .i64 = -1 }, -1, 1,
FLAGS },
421 { "colour_primaries", "Set colour primaries (table E-3)",
423 { .i64 = -1 }, -1, 255,
FLAGS },
424 { "transfer_characteristics", "Set transfer characteristics (table E-4)",
426 { .i64 = -1 }, -1, 255,
FLAGS },
427 { "matrix_coefficients", "Set matrix coefficients (table E-5)",
429 { .i64 = -1 }, -1, 255,
FLAGS },
430
431 { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
433 { .i64 = -1 }, -1, 5,
FLAGS },
434
435 { "tick_rate",
436 "Set VPS and VUI tick rate (time_scale / num_units_in_tick)",
438 { .dbl = 0.0 }, 0, UINT_MAX,
FLAGS },
439 { "num_ticks_poc_diff_one",
440 "Set VPS and VUI number of ticks per POC increment",
442 { .i64 = -1 }, -1, INT_MAX,
FLAGS },
443
444 { "crop_left", "Set left border crop offset",
447 { "crop_right", "Set right border crop offset",
450 { "crop_top", "Set top border crop offset",
453 { "crop_bottom", "Set bottom border crop offset",
456
457 { "level", "Set level (tables A.6 and A.7)",
460 { "auto", "Attempt to guess level from stream properties",
463 #define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
464 { .i64 = value }, .flags = FLAGS, .unit = "level"
467 {
LEVEL(
"2.1", 63) },
469 {
LEVEL(
"3.1", 93) },
471 {
LEVEL(
"4.1", 123) },
473 {
LEVEL(
"5.1", 153) },
474 {
LEVEL(
"5.2", 156) },
476 {
LEVEL(
"6.1", 183) },
477 {
LEVEL(
"6.2", 186) },
478 {
LEVEL(
"8.5", 255) },
479 #undef LEVEL
480
482 };
483
489 };
490
493 };
494
496 .
p.
name =
"hevc_metadata",
503 };
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left