1 /*
2 * Copyright (C) 2016 Open Broadcast Systems Ltd.
3 * Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
30
33
34 /* The limited size resolution of each slice forces us to do this */
35 #define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes)
36
37 /* Decides the cutoff point in # of slices to distribute the leftover bytes */
38 #define SLICE_REDIST_TOTAL 150
39
46
48 { 0 }, /* Custom format, here just to make indexing equal to base_vf */
55
58
65
68
71
74
77 };
79
84
86 };
87
94
95 typedef struct Plane {
104
116
125
132
135
136 /* For conversion from unsigned pixel values to signed */
140
141 /* Picture number */
143
144 /* Base video format */
148
149 /* Quantization matrix */
152
153 /* Division LUT */
155
156 int num_x;
/* #slices horizontally */
162
163 /* Rate control stuff */
169
170 /* Options */
179
180 /* Parse code state */
184
186 {
189 unsigned topbit = 1, maxval = 1;
190 uint64_t pbits = 0;
191
194 return;
195 }
196
197 while (
val > maxval) {
198 topbit <<= 1;
199 maxval <<= 1;
200 maxval |= 1;
201 }
202
204
206 topbit >>= 1;
208 pbits <<= 2;
210 pbits |= 0x1;
211 }
212
214 }
215
217 {
218 int topbit = 1, maxval = 1;
219
221 return 1;
222
223 while (
val > maxval) {
224 topbit <<= 1;
225 maxval <<= 1;
226 maxval |= 1;
227 }
228
230 }
231
232 /* VC-2 10.4 - parse_info() */
234 {
235 uint32_t cur_pos, dist;
236
238
240
241 /* Magic string */
243
244 /* Parse code */
246
247 /* Next parse offset */
248 dist = cur_pos -
s->next_parse_offset;
249 AV_WB32(
s->pb.buf +
s->next_parse_offset + 5, dist);
250 s->next_parse_offset = cur_pos;
252
253 /* Last parse offset */
255
256 s->last_parse_code = pcode;
257 }
258
259 /* VC-2 11.1 - parse_parameters()
260 * The level dictates what the decoder should expect in terms of resolution
261 * and allows it to quickly reject whatever it can't support. Remember,
262 * this codec kinda targets cheapo FPGAs without much memory. Unfortunately
263 * it also limits us greatly in our choice of formats, hence the flag to disable
264 * strict_compliance */
266 {
271 }
272
273 /* VC-2 11.3 - frame_size() */
275 {
277 if (!
s->strict_compliance) {
281 }
282 }
283
284 /* VC-2 11.3.3 - color_diff_sampling_format() */
286 {
288 if (!
s->strict_compliance) {
289 int idx;
290 if (
s->chroma_x_shift == 1 &&
s->chroma_y_shift == 0)
291 idx = 1; /* 422 */
292 else if (
s->chroma_x_shift == 1 &&
s->chroma_y_shift == 1)
293 idx = 2; /* 420 */
294 else
295 idx = 0; /* 444 */
297 }
298 }
299
300 /* VC-2 11.3.4 - scan_format() */
302 {
304 if (!
s->strict_compliance)
306 }
307
308 /* VC-2 11.3.5 - frame_rate() */
310 {
312 if (!
s->strict_compliance) {
317 }
318 }
319
320 /* VC-2 11.3.6 - aspect_ratio() */
322 {
324 if (!
s->strict_compliance) {
329 }
330 }
331
332 /* VC-2 11.3.7 - clean_area() */
334 {
336 }
337
338 /* VC-2 11.3.8 - signal_range() */
340 {
342 if (!
s->strict_compliance)
344 }
345
346 /* VC-2 11.3.9 - color_spec() */
348 {
351 if (!
s->strict_compliance) {
354
355 /* primaries */
363 else
366
367 /* color matrix */
375 else
378
379 /* transfer function */
385 else
388 }
389 }
390
391 /* VC-2 11.3 - source_parameters() */
393 {
402 }
403
404 /* VC-2 11 - sequence_header() */
406 {
412 }
413
414 /* VC-2 12.1 - picture_header() */
416 {
419 }
420
421 /* VC-2 12.3.4.1 - slice_parameters() */
423 {
428 }
429
430 /* 1st idx = LL, second - vertical, third - horizontal, fourth - total */
432 {20, 9, 15, 4},
433 { 0, 6, 6, 4},
434 { 0, 3, 3, 5},
435 { 0, 3, 5, 1},
436 { 0, 11, 10, 11}
437 };
438
440 { 0, 0, 0, 0},
441 { 0, 0, 0, 0},
442 { 0, 0, 0, 0},
443 { 0, 0, 0, 0},
444 { 0, 0, 0, 0}
445 };
446
448 {
449 int level, orientation;
450
451 if (
s->wavelet_depth <= 4 &&
s->quant_matrix ==
VC2_QM_DEF) {
452 s->custom_quant_matrix = 0;
458 }
459 return;
460 }
461
462 s->custom_quant_matrix = 1;
463
466 for (orientation = 0; orientation < 4; orientation++) {
469 else
471 }
472 }
475 for (orientation = 0; orientation < 4; orientation++) {
477 }
478 }
479 } else {
481 for (orientation = 0; orientation < 4; orientation++) {
483 }
484 }
485 }
486 }
487
488 /* VC-2 12.3.4.2 - quant_matrix() */
490 {
493 if (
s->custom_quant_matrix) {
499 }
500 }
501 }
502
503 /* VC-2 12.3 - transform_parameters() */
505 {
508
511 }
512
513 /* VC-2 12.2 - wavelet_transform() */
515 {
518 }
519
520 /* VC-2 12 - picture_parse() */
522 {
527 }
528
529 #define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift))
530
531 /* VC-2 13.5.5.2 - slice_band() */
534 {
535 int x, y;
536
537 const int left =
b->width * (sx+0) /
s->num_x;
538 const int right =
b->width * (sx+1) /
s->num_x;
539 const int top =
b->height * (sy+0) /
s->num_y;
540 const int bottom =
b->height * (sy+1) /
s->num_y;
541
543 const uint64_t q_m = ((uint64_t)(
s->qmagic_lut[
quant][0])) << 2;
544 const uint64_t q_a =
s->qmagic_lut[
quant][1];
546
547 for (y = top; y < bottom; y++) {
548 for (x =
left; x < right; x++) {
551 if (c_abs)
553 }
555 }
556 }
557
559 {
560 int x, y;
564
565 if (slice->
cache[quant_idx])
566 return slice->
cache[quant_idx];
567
568 bits += 8*
s->prefix_bytes;
569 bits += 8;
/* quant_idx */
570
572 for (orientation = !!
level; orientation < 4; orientation++)
573 quants[
level][orientation] =
FFMAX(quant_idx -
s->quant[
level][orientation], 0);
574
575 for (p = 0; p < 3; p++) {
576 int bytes_start, bytes_len, pad_s, pad_c;
577 bytes_start =
bits >> 3;
580 for (orientation = !!
level; orientation < 4; orientation++) {
582
583 const int q_idx = quants[
level][orientation];
584 const uint64_t q_m = ((uint64_t)
s->qmagic_lut[q_idx][0]) << 2;
585 const uint64_t q_a =
s->qmagic_lut[q_idx][1];
587
588 const int left =
b->width * slice->
x /
s->num_x;
589 const int right =
b->width *(slice->
x+1) /
s->num_x;
590 const int top =
b->height * slice->
y /
s->num_y;
591 const int bottom =
b->height *(slice->
y+1) /
s->num_y;
592
594
595 for (y = top; y < bottom; y++) {
596 for (x =
left; x < right; x++) {
597 uint32_t c_abs =
QUANT(
FFABS(buf[x]), q_m, q_a, q_s);
600 }
602 }
603 }
604 }
606 bytes_len = (
bits >> 3) - bytes_start - 1;
607 pad_s =
FFALIGN(bytes_len,
s->size_scaler)/
s->size_scaler;
608 pad_c = (pad_s*
s->size_scaler) - bytes_len;
610 }
611
613
615 }
616
617 /* Approaches the best possible quantizer asymptotically, its kinda exaustive
618 * but we have a LUT to get the coefficient size in bits. Guaranteed to never
619 * overshoot, which is apparently very important when streaming */
621 {
626 int quant_buf[2] = {-1, -1};
629 while ((
bits > top) || (
bits < bottom)) {
633 if (quant_buf[1] ==
quant) {
636 break;
637 }
639 quant_buf[1] = quant_buf[0];
640 quant_buf[0] =
quant;
642 }
645 return 0;
646 }
647
649 {
650 int i, j, slice_x, slice_y, bytes_left = 0;
652 int64_t total_bytes_needed = 0;
656
658
659 for (slice_y = 0; slice_y <
s->num_y; slice_y++) {
660 for (slice_x = 0; slice_x <
s->num_x; slice_x++) {
661 SliceArgs *args = &enc_args[
s->num_x*slice_y + slice_x];
667 memset(args->
cache, 0,
s->q_ceil*
sizeof(*args->
cache));
668 }
669 }
670
671 /* First pass - determine baseline slice sizes w.r.t. max_slice_size */
674
675 for (
i = 0;
i <
s->num_x*
s->num_y;
i++) {
677 bytes_left += args->
bytes;
678 for (j = 0; j < slice_redist_range; j++) {
679 if (args->
bytes > bytes_top[j]) {
680 bytes_top[j] = args->
bytes;
681 top_loc[j] = args;
682 break;
683 }
684 }
685 }
686
687 bytes_left =
s->frame_max_bytes - bytes_left;
688
689 /* Second pass - distribute leftover bytes */
690 while (bytes_left > 0) {
691 int distributed = 0;
692 for (
i = 0;
i < slice_redist_range;
i++) {
694 int bits, bytes,
diff, prev_bytes, new_idx;
695 if (bytes_left <= 0)
696 break;
697 if (!top_loc[
i] || !top_loc[
i]->quant_idx)
698 break;
700 prev_bytes = args->
bytes;
704 diff = bytes - prev_bytes;
705 if ((bytes_left -
diff) > 0) {
709 distributed++;
710 }
711 }
712 if (!distributed)
713 break;
714 }
715
716 for (
i = 0;
i <
s->num_x*
s->num_y;
i++) {
718 total_bytes_needed += args->
bytes;
720 }
721
722 return total_bytes_needed;
723 }
724
725 /* VC-2 13.5.3 - hq_slice */
727 {
731 const int slice_x = slice_dat->
x;
732 const int slice_y = slice_dat->
y;
733 const int quant_idx = slice_dat->
quant_idx;
734 const int slice_bytes_max = slice_dat->
bytes;
736 int p,
level, orientation;
737
738 /* The reference decoder ignores it, and its typical length is 0 */
741
743
744 /* Slice quantization (slice_quantizers() in the specs) */
746 for (orientation = !!
level; orientation < 4; orientation++)
747 quants[
level][orientation] =
FFMAX(quant_idx -
s->quant[
level][orientation], 0);
748
749 /* Luma + 2 Chroma planes */
750 for (p = 0; p < 3; p++) {
751 int bytes_start, bytes_len, pad_s, pad_c;
755 for (orientation = !!
level; orientation < 4; orientation++) {
757 &
s->plane[p].band[
level][orientation],
758 quants[
level][orientation]);
759 }
760 }
763 if (p == 2) {
765 pad_s =
FFALIGN((bytes_len + len_diff),
s->size_scaler)/
s->size_scaler;
766 pad_c = (pad_s*
s->size_scaler) - bytes_len;
767 } else {
768 pad_s =
FFALIGN(bytes_len,
s->size_scaler)/
s->size_scaler;
769 pad_c = (pad_s*
s->size_scaler) - bytes_len;
770 }
771 pb->
buf[bytes_start] = pad_s;
772 /* vc2-reference uses that padding that decodes to '0' coeffs */
775 }
776
777 return 0;
778 }
779
780 /* VC-2 13.5.1 - low_delay_transform_data() */
782 {
783 uint8_t *buf;
784 int slice_x, slice_y,
skip = 0;
786
789
790 for (slice_y = 0; slice_y <
s->num_y; slice_y++) {
791 for (slice_x = 0; slice_x <
s->num_x; slice_x++) {
792 SliceArgs *args = &enc_args[
s->num_x*slice_y + slice_x];
795 }
796 }
797
800
802
803 return 0;
804 }
805
806 /*
807 * Transform basics for a 3 level transform
808 * |---------------------------------------------------------------------|
809 * | LL-0 | HL-0 | | |
810 * |--------|-------| HL-1 | |
811 * | LH-0 | HH-0 | | |
812 * |----------------|-----------------| HL-2 |
813 * | | | |
814 * | LH-1 | HH-1 | |
815 * | | | |
816 * |----------------------------------|----------------------------------|
817 * | | |
818 * | | |
819 * | | |
820 * | LH-2 | HH-2 |
821 * | | |
822 * | | |
823 * | | |
824 * |---------------------------------------------------------------------|
825 *
826 * DWT transforms are generally applied by splitting the image in two vertically
827 * and applying a low pass transform on the left part and a corresponding high
828 * pass transform on the right hand side. This is known as the horizontal filter
829 * stage.
830 * After that, the same operation is performed except the image is divided
831 * horizontally, with the high pass on the lower and the low pass on the higher
832 * side.
833 * Therefore, you're left with 4 subdivisions - known as low-low, low-high,
834 * high-low and high-high. They're referred to as orientations in the decoder
835 * and encoder.
836 *
837 * The LL (low-low) area contains the original image downsampled by the amount
838 * of levels. The rest of the areas can be thought as the details needed
839 * to restore the image perfectly to its original size.
840 */
842 {
846 const ptrdiff_t linesize = transform_dat->
istride;
851 const int idx =
s->wavelet_idx;
852 const int skip = 1 +
s->interlaced;
853
855 ptrdiff_t pix_stride = linesize >> (
s->bpp - 1);
856
859 pix_stride <<= 1;
860 }
else if (
field == 2) {
862 pix_stride <<= 1;
863 } else {
865 }
866
870 for (x = 0; x < p->
width; x++) {
871 buf[x] = pix[x] -
s->diff_offset;
872 }
875 pix += pix_stride;
876 }
877 } else {
880 for (x = 0; x < p->
width; x++) {
881 buf[x] = pix[x] -
s->diff_offset;
882 }
885 pix += pix_stride;
886 }
887 }
888
890
894 b->width,
b->height);
895 }
896
897 return 0;
898 }
899
901 const char *aux_data,
const int header_size,
int field)
902 {
905
906 /* Threaded DWT transform */
907 for (
i = 0;
i < 3;
i++) {
908 s->transform_args[
i].ctx =
s;
909 s->transform_args[
i].field =
field;
910 s->transform_args[
i].plane = &
s->plane[
i];
913 }
916
917 /* Calculate per-slice quantizers and sizes */
919
922 max_frame_bytes <<
s->interlaced, 0);
926 }
928 }
929
930 /* Sequence header */
933
934 /* Encoder version */
935 if (aux_data) {
938 }
939
940 /* Picture header */
943
944 /* Encode slices */
946
947 /* End sequence */
949
950 return 0;
951 }
952
955 {
957 int slice_ceil, sig_size = 256;
961 const int aux_data_size = bitexact ?
sizeof(
"Lavc") :
sizeof(
LIBAVCODEC_IDENT);
962 const int header_size = 100 + aux_data_size;
964
968 s->last_parse_code = 0;
969 s->next_parse_offset = 0;
970
971 /* Rate control */
972 s->frame_max_bytes = (
av_rescale(r_bitrate,
s->avctx->time_base.num,
973 s->avctx->time_base.den) >> 3) - header_size;
974 s->slice_max_bytes = slice_ceil =
av_rescale(
s->frame_max_bytes, 1,
s->num_x*
s->num_y);
975
976 /* Find an appropriate size scaler */
977 while (sig_size > 255) {
979 if (r_size > slice_ceil) {
980 s->slice_max_bytes -= r_size - slice_ceil;
982 }
983 sig_size = r_size/
s->size_scaler;
/* Signalled slize size */
984 s->size_scaler <<= 1;
985 }
986
987 s->slice_min_bytes =
s->slice_max_bytes -
s->slice_max_bytes*(
s->tolerance/100.0f);
988 if (
s->slice_min_bytes < 0)
990
998 }
999
1002
1003 *got_packet = 1;
1004
1005 return 0;
1006 }
1007
1009 {
1012
1014
1015 for (
i = 0;
i < 3;
i++) {
1018 }
1019
1021
1022 return 0;
1023 }
1024
1026 {
1033
1034 s->picture_number = 0;
1035
1036 /* Total allowed quantization range */
1038
1043
1045 s->strict_compliance = 1;
1046
1048 s->slice_max_bytes = 0;
1049 s->slice_min_bytes = 0;
1050
1051 /* Mark unknown as progressive */
1054
1058 continue;
1060 continue;
1062 continue;
1064 continue;
1066 continue;
1068 continue;
1071 break;
1072 }
1073
1076
1077 if ((
s->slice_width & (
s->slice_width - 1)) ||
1078 (
s->slice_height & (
s->slice_height - 1))) {
1081 }
1082
1083 if ((
s->slice_width > avctx->
width) ||
1084 (
s->slice_height > avctx->
height)) {
1087 }
1088
1089 if (
s->base_vf <= 0) {
1091 s->strict_compliance =
s->base_vf = 0;
1093 } else {
1095 "the specifications, decrease strictness to use it.\n");
1097 }
1098 } else {
1101 }
1102
1103 /* Chroma subsampling */
1107
1108 /* Bit depth and color range index */
1112 s->diff_offset = 128;
1117 s->diff_offset = 128;
1118 } else if (depth == 10) {
1121 s->diff_offset = 512;
1122 } else {
1125 s->diff_offset = 2048;
1126 }
1127
1128 /* Planes initialization */
1129 for (
i = 0;
i < 3;
i++) {
1132 p->
width = avctx->
width >> (
i ?
s->chroma_x_shift : 0);
1145 for (o = 0; o < 4; o++) {
1150 shift = (o > 1)*
b->height*
b->stride + (o & 1)*
b->width;
1152 }
1153 }
1154
1155 /* DWT init */
1157 s->plane[
i].coef_stride,
1158 s->plane[
i].dwt_height,
1159 s->slice_width,
s->slice_height))
1161 }
1162
1163 /* Slices */
1164 s->num_x =
s->plane[0].dwt_width/
s->slice_width;
1165 s->num_y =
s->plane[0].dwt_height/
s->slice_height;
1166
1170
1171 for (
i = 0;
i < 116;
i++) {
1173 const uint32_t m =
av_log2(qf);
1174 const uint32_t t = (1ULL << (m + 32)) / qf;
1175 const uint32_t
r = (t*qf + qf) & UINT32_MAX;
1176 if (!(qf & (qf - 1))) {
1177 s->qmagic_lut[
i][0] = 0xFFFFFFFF;
1178 s->qmagic_lut[
i][1] = 0xFFFFFFFF;
1179 }
else if (
r <= 1 << m) {
1180 s->qmagic_lut[
i][0] = t + 1;
1181 s->qmagic_lut[
i][1] = 0;
1182 } else {
1183 s->qmagic_lut[
i][0] = t;
1184 s->qmagic_lut[
i][1] = t;
1185 }
1186 }
1187
1188 return 0;
1189 }
1190
1191 #define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1193 {
"tolerance",
"Max undershoot in percent", offsetof(
VC2EncContext, tolerance),
AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f,
VC2ENC_FLAGS, .unit =
"tolerance"},
1194 {
"slice_width",
"Slice width", offsetof(
VC2EncContext, slice_width),
AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024,
VC2ENC_FLAGS, .unit =
"slice_width"},
1195 {
"slice_height",
"Slice height", offsetof(
VC2EncContext, slice_height),
AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024,
VC2ENC_FLAGS, .unit =
"slice_height"},
1196 {
"wavelet_depth",
"Transform depth", offsetof(
VC2EncContext, wavelet_depth),
AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5,
VC2ENC_FLAGS, .unit =
"wavelet_depth"},
1197 {
"wavelet_type",
"Transform type", offsetof(
VC2EncContext, wavelet_idx),
AV_OPT_TYPE_INT, {.i64 =
VC2_TRANSFORM_9_7}, 0,
VC2_TRANSFORMS_NB,
VC2ENC_FLAGS, .unit =
"wavelet_idx"},
1198 {
"9_7",
"Deslauriers-Dubuc (9,7)", 0,
AV_OPT_TYPE_CONST, {.i64 =
VC2_TRANSFORM_9_7}, INT_MIN, INT_MAX,
VC2ENC_FLAGS, .unit =
"wavelet_idx"},
1199 {
"5_3",
"LeGall (5,3)", 0,
AV_OPT_TYPE_CONST, {.i64 =
VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX,
VC2ENC_FLAGS, .unit =
"wavelet_idx"},
1202 {
"qm",
"Custom quantization matrix", offsetof(
VC2EncContext, quant_matrix),
AV_OPT_TYPE_INT, {.i64 =
VC2_QM_DEF}, 0,
VC2_QM_NB,
VC2ENC_FLAGS, .unit =
"quant_matrix"},
1207 };
1208
1215 };
1216
1218 { "b", "600000000" },
1220 };
1221
1227 };
1228
1244 };