Go to the documentation of this file. 1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
21
24 #include "cbs.h"
29
31
32 enum {
35 };
36
39
41
43
45
51
53
57
64
69
70
73 {
81 int max_dec_pic_buffering = 0;
83
86
89
90 ptl = &
vps->profile_tier_level;
91 max_dec_pic_buffering =
vps->vps_max_dec_pic_buffering_minus1[0] + 1;
92
93 if (
vps->vps_num_hrd_parameters > 0)
94 hrd = &
vps->hrd_parameters[0];
95
98
99 ptl = &
sps->profile_tier_level;
100 max_dec_pic_buffering =
sps->sps_max_dec_pic_buffering_minus1[0] + 1;
101
102 width =
sps->pic_width_in_luma_samples;
103 height =
sps->pic_height_in_luma_samples;
104
105 if (
sps->vui.vui_hrd_parameters_present_flag)
106 hrd = &
sps->vui.hrd_parameters;
107
110
111 if (
pps->tiles_enabled_flag) {
114 }
115 }
116 }
117
118 if (hrd) {
125 // Adjust for VCL vs. NAL limits.
126 bit_rate = bit_rate * 11 / 10;
127 }
128 }
129
132 max_dec_pic_buffering);
135 "level %s.\n",
desc->name);
136 ctx->level_guess =
desc->level_idc;
137 }
138 }
139
142 {
144
147 if (
ctx->level_guess) {
149 } else {
150 if (!
ctx->level_warned) {
152 "of stream: using level 8.5.\n");
153 ctx->level_warned = 1;
154 }
156 }
157 } else {
159 }
160 }
161 }
162
165 {
167
168 if (
ctx->tick_rate.num &&
ctx->tick_rate.den) {
169 int num, den;
170
172 UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
173
174 vps->vps_time_scale = num;
175 vps->vps_num_units_in_tick = den;
176
177 vps->vps_timing_info_present_flag = 1;
178
179 if (
ctx->num_ticks_poc_diff_one > 0) {
180 vps->vps_num_ticks_poc_diff_one_minus1 =
181 ctx->num_ticks_poc_diff_one - 1;
182 vps->vps_poc_proportional_to_timing_flag = 1;
183 }
else if (
ctx->num_ticks_poc_diff_one == 0) {
184 vps->vps_poc_proportional_to_timing_flag = 0;
185 }
186 }
187
189
190 return 0;
191 }
192
194 int *crop_left, int *crop_right,
195 int *crop_top, int *crop_bottom)
196 {
199 int right =
ctx->crop_right;
200 int top =
ctx->crop_top;
201 int bottom =
ctx->crop_bottom;
202
203 if (
ctx->width > 0) {
204 if (
ctx->width >
sps->pic_width_in_luma_samples) {
206 "The width option value %d is larger than picture width %d\n",
207 ctx->width,
sps->pic_width_in_luma_samples);
209 }
210
212 if (right > 0)
213 left =
sps->pic_width_in_luma_samples -
ctx->width - right;
214 else
216 }
217
218 if (right < 0)
219 right =
sps->pic_width_in_luma_samples -
ctx->width -
left;
220
221 if (
left < 0 || right < 0 || (
left + right +
ctx->width) !=
222 sps->pic_width_in_luma_samples) {
224 "Invalid value for crop_left %d, crop_right %d, width after "
225 "crop %d, with picture width %d\n",
226 ctx->crop_left,
ctx->crop_right,
ctx->width,
227 sps->pic_width_in_luma_samples);
229 }
230 }
231
232 if (
ctx->height > 0) {
233 if (
ctx->height >
sps->pic_height_in_luma_samples) {
235 "The height option value %d is larger than picture height %d\n",
236 ctx->height,
sps->pic_height_in_luma_samples);
238 }
239
240 if (top < 0) {
241 if (bottom > 0)
242 top =
sps->pic_height_in_luma_samples -
ctx->height - bottom;
243 else
244 top = 0;
245 }
246
247 if (bottom < 0)
248 bottom =
sps->pic_height_in_luma_samples -
ctx->height - top;
249
250 if (top < 0 || bottom < 0 || (top + bottom +
ctx->height) !=
251 sps->pic_height_in_luma_samples) {
253 "Invalid value for crop_top %d, crop_bottom %d, height after "
254 "crop %d, with picture height %d\n",
255 ctx->crop_top,
ctx->crop_bottom,
ctx->height,
256 sps->pic_height_in_luma_samples);
258 }
259 }
260
262 *crop_right = right;
263 *crop_top = top;
264 *crop_bottom = bottom;
265
266 return 0;
267 }
268
271 {
273 int need_vui = 0;
274 int crop_unit_x, crop_unit_y;
275 /* Use local variables to avoid modifying context fields in case of video
276 * resolution changed. Crop doesn't work well with resolution change, this
277 * is the best we can do.
278 */
279 int crop_left, crop_right, crop_top, crop_bottom;
281
282 if (
ctx->sample_aspect_ratio.num &&
ctx->sample_aspect_ratio.den) {
284
286 ctx->sample_aspect_ratio.den, 65535);
287
291 break;
292 }
294 sps->vui.aspect_ratio_idc = 255;
295 sps->vui.sar_width = num;
296 sps->vui.sar_height = den;
297 } else {
298 sps->vui.aspect_ratio_idc =
i;
299 }
300 sps->vui.aspect_ratio_info_present_flag = 1;
301 need_vui = 1;
302 }
303
304 #define SET_OR_INFER(field, value, present_flag, infer) do { \
305 if (value >= 0) { \
306 field = value; \
307 need_vui = 1; \
308 } else if (!present_flag) \
309 field = infer; \
310 } while (0)
311
312 if (
ctx->video_format >= 0 ||
313 ctx->video_full_range_flag >= 0 ||
314 ctx->colour_primaries >= 0 ||
315 ctx->transfer_characteristics >= 0 ||
316 ctx->matrix_coefficients >= 0) {
317
319 sps->vui.video_signal_type_present_flag, 5);
320
322 ctx->video_full_range_flag,
323 sps->vui.video_signal_type_present_flag, 0);
324
325 if (
ctx->colour_primaries >= 0 ||
326 ctx->transfer_characteristics >= 0 ||
327 ctx->matrix_coefficients >= 0) {
328
330 ctx->colour_primaries,
331 sps->vui.colour_description_present_flag, 2);
332
334 ctx->transfer_characteristics,
335 sps->vui.colour_description_present_flag, 2);
336
338 ctx->matrix_coefficients,
339 sps->vui.colour_description_present_flag, 2);
340
341 sps->vui.colour_description_present_flag = 1;
342 }
343 sps->vui.video_signal_type_present_flag = 1;
344 need_vui = 1;
345 }
346
347 if (
ctx->chroma_sample_loc_type >= 0) {
348 sps->vui.chroma_sample_loc_type_top_field =
349 ctx->chroma_sample_loc_type;
350 sps->vui.chroma_sample_loc_type_bottom_field =
351 ctx->chroma_sample_loc_type;
352 sps->vui.chroma_loc_info_present_flag = 1;
353 need_vui = 1;
354 }
355
356 if (
ctx->tick_rate.num &&
ctx->tick_rate.den) {
357 int num, den;
358
360 UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
361
362 sps->vui.vui_time_scale = num;
363 sps->vui.vui_num_units_in_tick = den;
364
365 sps->vui.vui_timing_info_present_flag = 1;
366 need_vui = 1;
367
368 if (
ctx->num_ticks_poc_diff_one > 0) {
369 sps->vui.vui_num_ticks_poc_diff_one_minus1 =
370 ctx->num_ticks_poc_diff_one - 1;
371 sps->vui.vui_poc_proportional_to_timing_flag = 1;
372 }
else if (
ctx->num_ticks_poc_diff_one == 0) {
373 sps->vui.vui_poc_proportional_to_timing_flag = 0;
374 }
375 }
376
378 &crop_top, &crop_bottom);
381
382 if (
sps->separate_colour_plane_flag ||
sps->chroma_format_idc == 0) {
383 crop_unit_x = 1;
384 crop_unit_y = 1;
385 } else {
386 crop_unit_x = 1 + (
sps->chroma_format_idc < 3);
387 crop_unit_y = 1 + (
sps->chroma_format_idc < 2);
388 }
389 #define CROP(border, unit) do { \
390 if (crop_ ## border >= 0) { \
391 if (crop_ ## border % unit != 0) { \
392 av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
393 "must be a multiple of %d.\n", #border, unit); \
394 return AVERROR(EINVAL); \
395 } \
396 sps->conf_win_ ## border ## _offset = \
397 crop_ ## border / unit; \
398 sps->conformance_window_flag = 1; \
399 } \
400 } while (0)
402 CROP(right, crop_unit_x);
403 CROP(top, crop_unit_y);
404 CROP(bottom, crop_unit_y);
405 #undef CROP
406
407 if (need_vui)
408 sps->vui_parameters_present_flag = 1;
409
411
412 return 0;
413 }
414
417 {
420
421 // If an AUD is present, it must be the first NAL unit.
424 ff_cbs_delete_unit(au, 0);
425 } else {
428 int pic_type = 0, temporal_id = 8, layer_id = 0;
429
432 if (!nal)
433 continue;
436
441 pic_type < 2)
442 pic_type = 2;
444 pic_type < 1)
445 pic_type = 1;
446 }
447 }
448
451 .nuh_layer_id = layer_id,
452 .nuh_temporal_id_plus1 = temporal_id + 1,
453 };
454 aud->pic_type = pic_type;
455
457 if (err < 0) {
459 return err;
460 }
461 }
462 }
463
466
470 if (err < 0)
471 return err;
472 }
475 if (err < 0)
476 return err;
477 }
478 }
479
480 return 0;
481 }
482
485 .fragment_name = "access unit",
486 .unit_name = "NAL unit",
488 };
489
491 {
493 }
494
495 #define OFFSET(x) offsetof(H265MetadataContext, x)
496 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
500
501 { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
503 { .dbl = 0.0 }, 0, 65535,
FLAGS },
504
505 { "video_format", "Set video format (table E-2)",
507 { .i64 = -1 }, -1, 7,
FLAGS },
508 { "video_full_range_flag", "Set video full range flag",
510 { .i64 = -1 }, -1, 1,
FLAGS },
511 { "colour_primaries", "Set colour primaries (table E-3)",
513 { .i64 = -1 }, -1, 255,
FLAGS },
514 { "transfer_characteristics", "Set transfer characteristics (table E-4)",
516 { .i64 = -1 }, -1, 255,
FLAGS },
517 { "matrix_coefficients", "Set matrix coefficients (table E-5)",
519 { .i64 = -1 }, -1, 255,
FLAGS },
520
521 { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
523 { .i64 = -1 }, -1, 5,
FLAGS },
524
525 { "tick_rate",
526 "Set VPS and VUI tick rate (time_scale / num_units_in_tick)",
528 { .dbl = 0.0 }, 0, UINT_MAX,
FLAGS },
529 { "num_ticks_poc_diff_one",
530 "Set VPS and VUI number of ticks per POC increment",
532 { .i64 = -1 }, -1, INT_MAX,
FLAGS },
533
534 { "crop_left", "Set left border crop offset",
537 { "crop_right", "Set right border crop offset",
540 { "crop_top", "Set top border crop offset",
543 { "crop_bottom", "Set bottom border crop offset",
546 { "width", "Set width after crop",
549 { "height", "Set height after crop",
552
553 { "level", "Set level (tables A.6 and A.7)",
556 { "auto", "Attempt to guess level from stream properties",
559 #define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
560 { .i64 = value }, .flags = FLAGS, .unit = "level"
563 {
LEVEL(
"2.1", 63) },
565 {
LEVEL(
"3.1", 93) },
567 {
LEVEL(
"4.1", 123) },
569 {
LEVEL(
"5.1", 153) },
570 {
LEVEL(
"5.2", 156) },
572 {
LEVEL(
"6.1", 183) },
573 {
LEVEL(
"6.2", 186) },
574 {
LEVEL(
"8.5", 255) },
575 #undef LEVEL
576
578 };
579
585 };
586
589 };
590
592 .
p.
name =
"hevc_metadata",
599 };
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left