1 /*
2 * Copyright (C) 2016 Open Broadcast Systems Ltd.
3 * Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
31
34
35 /* The limited size resolution of each slice forces us to do this */
36 #define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes)
37
38 /* Decides the cutoff point in # of slices to distribute the leftover bytes */
39 #define SLICE_REDIST_TOTAL 150
40
48
50 { 0 }, /* Custom format, here just to make indexing equal to base_vf */
57
60
67
70
73
76
79 };
81
86
88 };
89
96
97 typedef struct Plane {
106
109 union {
112 };
120
129
136
139
140 /* For conversion from unsigned pixel values to signed */
144
145 /* Picture number */
147
148 /* Base video format */
152
153 /* Quantization matrix */
156
157 /* Division LUT */
159
160 int num_x;
/* #slices horizontally */
166
167 /* Rate control stuff */
173
174 /* Options */
183
184 /* Parse code state */
188
190 {
193 unsigned topbit = 1, maxval = 1;
194 uint64_t pbits = 0;
195
198 return;
199 }
200
201 while (
val > maxval) {
202 topbit <<= 1;
203 maxval <<= 1;
204 maxval |= 1;
205 }
206
208
210 topbit >>= 1;
212 pbits <<= 2;
214 pbits |= 0x1;
215 }
216
218 }
219
221 {
222 int topbit = 1, maxval = 1;
223
225 return 1;
226
227 while (
val > maxval) {
228 topbit <<= 1;
229 maxval <<= 1;
230 maxval |= 1;
231 }
232
234 }
235
236 /* VC-2 10.4 - parse_info() */
238 {
239 uint32_t cur_pos, dist;
240
242
244
245 /* Magic string */
247
248 /* Parse code */
250
251 /* Next parse offset */
252 dist = cur_pos -
s->next_parse_offset;
253 AV_WB32(
s->pb.buf +
s->next_parse_offset + 5, dist);
254 s->next_parse_offset = cur_pos;
256
257 /* Last parse offset */
259
260 s->last_parse_code = pcode;
261 }
262
263 /* VC-2 11.1 - parse_parameters()
264 * The level dictates what the decoder should expect in terms of resolution
265 * and allows it to quickly reject whatever it can't support. Remember,
266 * this codec kinda targets cheapo FPGAs without much memory. Unfortunately
267 * it also limits us greatly in our choice of formats, hence the flag to disable
268 * strict_compliance */
270 {
275 }
276
277 /* VC-2 11.3 - frame_size() */
279 {
281 if (!
s->strict_compliance) {
285 }
286 }
287
288 /* VC-2 11.3.3 - color_diff_sampling_format() */
290 {
292 if (!
s->strict_compliance) {
293 int idx;
294 if (
s->chroma_x_shift == 1 &&
s->chroma_y_shift == 0)
295 idx = 1; /* 422 */
296 else if (
s->chroma_x_shift == 1 &&
s->chroma_y_shift == 1)
297 idx = 2; /* 420 */
298 else
299 idx = 0; /* 444 */
301 }
302 }
303
304 /* VC-2 11.3.4 - scan_format() */
306 {
308 if (!
s->strict_compliance)
310 }
311
312 /* VC-2 11.3.5 - frame_rate() */
314 {
316 if (!
s->strict_compliance) {
321 }
322 }
323
324 /* VC-2 11.3.6 - aspect_ratio() */
326 {
328 if (!
s->strict_compliance) {
333 }
334 }
335
336 /* VC-2 11.3.7 - clean_area() */
338 {
340 }
341
342 /* VC-2 11.3.8 - signal_range() */
344 {
346 if (!
s->strict_compliance)
348 }
349
350 /* VC-2 11.3.9 - color_spec() */
352 {
355 if (!
s->strict_compliance) {
358
359 /* primaries */
367 else
370
371 /* color matrix */
379 else
382
383 /* transfer function */
389 else
392 }
393 }
394
395 /* VC-2 11.3 - source_parameters() */
397 {
406 }
407
408 /* VC-2 11 - sequence_header() */
410 {
416 }
417
418 /* VC-2 12.1 - picture_header() */
420 {
423 }
424
425 /* VC-2 12.3.4.1 - slice_parameters() */
427 {
432 }
433
434 /* 1st idx = LL, second - vertical, third - horizontal, fourth - total */
436 {20, 9, 15, 4},
437 { 0, 6, 6, 4},
438 { 0, 3, 3, 5},
439 { 0, 3, 5, 1},
440 { 0, 11, 10, 11}
441 };
442
444 { 0, 0, 0, 0},
445 { 0, 0, 0, 0},
446 { 0, 0, 0, 0},
447 { 0, 0, 0, 0},
448 { 0, 0, 0, 0}
449 };
450
452 {
453 int level, orientation;
454
455 if (
s->wavelet_depth <= 4 &&
s->quant_matrix ==
VC2_QM_DEF) {
456 s->custom_quant_matrix = 0;
462 }
463 return;
464 }
465
466 s->custom_quant_matrix = 1;
467
470 for (orientation = 0; orientation < 4; orientation++) {
473 else
475 }
476 }
479 for (orientation = 0; orientation < 4; orientation++) {
481 }
482 }
483 } else {
485 for (orientation = 0; orientation < 4; orientation++) {
487 }
488 }
489 }
490 }
491
492 /* VC-2 12.3.4.2 - quant_matrix() */
494 {
497 if (
s->custom_quant_matrix) {
503 }
504 }
505 }
506
507 /* VC-2 12.3 - transform_parameters() */
509 {
512
515 }
516
517 /* VC-2 12.2 - wavelet_transform() */
519 {
522 }
523
524 /* VC-2 12 - picture_parse() */
526 {
531 }
532
533 #define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift))
534
535 /* VC-2 13.5.5.2 - slice_band() */
538 {
539 int x, y;
540
541 const int left =
b->width * (sx+0) /
s->num_x;
542 const int right =
b->width * (sx+1) /
s->num_x;
543 const int top =
b->height * (sy+0) /
s->num_y;
544 const int bottom =
b->height * (sy+1) /
s->num_y;
545
547 const uint64_t q_m = ((uint64_t)(
s->qmagic_lut[
quant][0])) << 2;
548 const uint64_t q_a =
s->qmagic_lut[
quant][1];
550
551 for (y = top; y < bottom; y++) {
552 for (x =
left; x < right; x++) {
555 if (c_abs)
557 }
559 }
560 }
561
563 {
564 int x, y;
568
569 if (slice->
cache[quant_idx])
570 return slice->
cache[quant_idx];
571
572 bits += 8*
s->prefix_bytes;
573 bits += 8;
/* quant_idx */
574
576 for (orientation = !!
level; orientation < 4; orientation++)
577 quants[
level][orientation] =
FFMAX(quant_idx -
s->quant[
level][orientation], 0);
578
579 for (p = 0; p < 3; p++) {
580 int bytes_start, bytes_len, pad_s, pad_c;
581 bytes_start =
bits >> 3;
584 for (orientation = !!
level; orientation < 4; orientation++) {
586
587 const int q_idx = quants[
level][orientation];
588 const uint64_t q_m = ((uint64_t)
s->qmagic_lut[q_idx][0]) << 2;
589 const uint64_t q_a =
s->qmagic_lut[q_idx][1];
591
592 const int left =
b->width * slice->
x /
s->num_x;
593 const int right =
b->width *(slice->
x+1) /
s->num_x;
594 const int top =
b->height * slice->
y /
s->num_y;
595 const int bottom =
b->height *(slice->
y+1) /
s->num_y;
596
598
599 for (y = top; y < bottom; y++) {
600 for (x =
left; x < right; x++) {
601 uint32_t c_abs =
QUANT(
FFABS(buf[x]), q_m, q_a, q_s);
604 }
606 }
607 }
608 }
610 bytes_len = (
bits >> 3) - bytes_start - 1;
611 pad_s =
FFALIGN(bytes_len,
s->size_scaler)/
s->size_scaler;
612 pad_c = (pad_s*
s->size_scaler) - bytes_len;
614 }
615
617
619 }
620
621 /* Approaches the best possible quantizer asymptotically, its kinda exaustive
622 * but we have a LUT to get the coefficient size in bits. Guaranteed to never
623 * overshoot, which is apparently very important when streaming */
625 {
630 int quant_buf[2] = {-1, -1};
633 while ((
bits > top) || (
bits < bottom)) {
637 if (quant_buf[1] ==
quant) {
640 break;
641 }
643 quant_buf[1] = quant_buf[0];
644 quant_buf[0] =
quant;
646 }
649 return 0;
650 }
651
653 {
654 int i, j, slice_x, slice_y, bytes_left = 0;
656 int64_t total_bytes_needed = 0;
660
662
663 for (slice_y = 0; slice_y <
s->num_y; slice_y++) {
664 for (slice_x = 0; slice_x <
s->num_x; slice_x++) {
665 SliceArgs *args = &enc_args[
s->num_x*slice_y + slice_x];
671 memset(args->
cache, 0,
s->q_ceil*
sizeof(*args->
cache));
672 }
673 }
674
675 /* First pass - determine baseline slice sizes w.r.t. max_slice_size */
678
679 for (
i = 0;
i <
s->num_x*
s->num_y;
i++) {
681 bytes_left += args->
bytes;
682 for (j = 0; j < slice_redist_range; j++) {
683 if (args->
bytes > bytes_top[j]) {
684 bytes_top[j] = args->
bytes;
685 top_loc[j] = args;
686 break;
687 }
688 }
689 }
690
691 bytes_left =
s->frame_max_bytes - bytes_left;
692
693 /* Second pass - distribute leftover bytes */
694 while (bytes_left > 0) {
695 int distributed = 0;
696 for (
i = 0;
i < slice_redist_range;
i++) {
698 int bits, bytes,
diff, prev_bytes, new_idx;
699 if (bytes_left <= 0)
700 break;
701 if (!top_loc[
i] || !top_loc[
i]->quant_idx)
702 break;
704 prev_bytes = args->
bytes;
708 diff = bytes - prev_bytes;
709 if ((bytes_left -
diff) > 0) {
713 distributed++;
714 }
715 }
716 if (!distributed)
717 break;
718 }
719
720 for (
i = 0;
i <
s->num_x*
s->num_y;
i++) {
722 total_bytes_needed += args->
bytes;
724 }
725
726 return total_bytes_needed;
727 }
728
729 /* VC-2 13.5.3 - hq_slice */
731 {
735 const int slice_x = slice_dat->
x;
736 const int slice_y = slice_dat->
y;
737 const int quant_idx = slice_dat->
quant_idx;
738 const int slice_bytes_max = slice_dat->
bytes;
740 int p,
level, orientation;
741
742 /* The reference decoder ignores it, and its typical length is 0 */
743 memset(slice_dat->
buf, 0,
s->prefix_bytes);
744
746
748
749 /* Slice quantization (slice_quantizers() in the specs) */
751 for (orientation = !!
level; orientation < 4; orientation++)
752 quants[
level][orientation] =
FFMAX(quant_idx -
s->quant[
level][orientation], 0);
753
754 /* Luma + 2 Chroma planes */
755 for (p = 0; p < 3; p++) {
756 int bytes_start, bytes_len, pad_s, pad_c;
760 for (orientation = !!
level; orientation < 4; orientation++) {
762 &
s->plane[p].band[
level][orientation],
763 quants[
level][orientation]);
764 }
765 }
768 if (p == 2) {
770 pad_s =
FFALIGN((bytes_len + len_diff),
s->size_scaler)/
s->size_scaler;
771 pad_c = (pad_s*
s->size_scaler) - bytes_len;
772 } else {
773 pad_s =
FFALIGN(bytes_len,
s->size_scaler)/
s->size_scaler;
774 pad_c = (pad_s*
s->size_scaler) - bytes_len;
775 }
776 pb->
buf[bytes_start] = pad_s;
777 /* vc2-reference uses that padding that decodes to '0' coeffs */
780 }
781
782 return 0;
783 }
784
785 /* VC-2 13.5.1 - low_delay_transform_data() */
787 {
788 uint8_t *buf;
789 int slice_x, slice_y,
skip = 0;
791
794
795 for (slice_y = 0; slice_y <
s->num_y; slice_y++) {
796 for (slice_x = 0; slice_x <
s->num_x; slice_x++) {
797 SliceArgs *args = &enc_args[
s->num_x*slice_y + slice_x];
800 }
801 }
802
805
807
808 return 0;
809 }
810
811 /*
812 * Transform basics for a 3 level transform
813 * |---------------------------------------------------------------------|
814 * | LL-0 | HL-0 | | |
815 * |--------|-------| HL-1 | |
816 * | LH-0 | HH-0 | | |
817 * |----------------|-----------------| HL-2 |
818 * | | | |
819 * | LH-1 | HH-1 | |
820 * | | | |
821 * |----------------------------------|----------------------------------|
822 * | | |
823 * | | |
824 * | | |
825 * | LH-2 | HH-2 |
826 * | | |
827 * | | |
828 * | | |
829 * |---------------------------------------------------------------------|
830 *
831 * DWT transforms are generally applied by splitting the image in two vertically
832 * and applying a low pass transform on the left part and a corresponding high
833 * pass transform on the right hand side. This is known as the horizontal filter
834 * stage.
835 * After that, the same operation is performed except the image is divided
836 * horizontally, with the high pass on the lower and the low pass on the higher
837 * side.
838 * Therefore, you're left with 4 subdivisions - known as low-low, low-high,
839 * high-low and high-high. They're referred to as orientations in the decoder
840 * and encoder.
841 *
842 * The LL (low-low) area contains the original image downsampled by the amount
843 * of levels. The rest of the areas can be thought as the details needed
844 * to restore the image perfectly to its original size.
845 */
847 {
851 const ptrdiff_t linesize = transform_dat->
istride;
856 const int idx =
s->wavelet_idx;
857 const int skip = 1 +
s->interlaced;
858
860 ptrdiff_t pix_stride = linesize >> (
s->bpp - 1);
861
864 pix_stride <<= 1;
865 }
else if (
field == 2) {
867 pix_stride <<= 1;
868 } else {
870 }
871
875 for (x = 0; x < p->
width; x++) {
876 buf[x] = pix[x] -
s->diff_offset;
877 }
880 pix += pix_stride;
881 }
882 } else {
885 for (x = 0; x < p->
width; x++) {
886 buf[x] = pix[x] -
s->diff_offset;
887 }
890 pix += pix_stride;
891 }
892 }
893
895
899 b->width,
b->height);
900 }
901
902 return 0;
903 }
904
906 const char *aux_data,
const int header_size,
int field)
907 {
910
911 /* Threaded DWT transform */
912 for (
i = 0;
i < 3;
i++) {
913 s->transform_args[
i].ctx =
s;
914 s->transform_args[
i].field =
field;
915 s->transform_args[
i].plane = &
s->plane[
i];
916 s->transform_args[
i].idata =
frame->data[
i];
917 s->transform_args[
i].istride =
frame->linesize[
i];
918 }
921
922 /* Calculate per-slice quantizers and sizes */
924
927 max_frame_bytes <<
s->interlaced, 0);
931 }
932
933 /* Sequence header */
936
937 /* Encoder version */
938 if (aux_data) {
941 }
942
943 /* Picture header */
946
947 /* Encode slices */
949
950 /* End sequence */
952
953 return 0;
954 }
955
958 {
960 int slice_ceil, sig_size = 256;
964 const int aux_data_size = bitexact ?
sizeof(
"Lavc") :
sizeof(
LIBAVCODEC_IDENT);
965 const int header_size = 100 + aux_data_size;
967
971 s->last_parse_code = 0;
972 s->next_parse_offset = 0;
973
974 /* Rate control */
975 s->frame_max_bytes = (
av_rescale(r_bitrate,
s->avctx->time_base.num,
976 s->avctx->time_base.den) >> 3) - header_size;
977 s->slice_max_bytes = slice_ceil =
av_rescale(
s->frame_max_bytes, 1,
s->num_x*
s->num_y);
978
979 /* Find an appropriate size scaler */
980 while (sig_size > 255) {
982 if (r_size > slice_ceil) {
983 s->slice_max_bytes -= r_size - slice_ceil;
985 }
986 sig_size = r_size/
s->size_scaler;
/* Signalled slize size */
987 s->size_scaler <<= 1;
988 }
989
990 s->slice_min_bytes =
s->slice_max_bytes -
s->slice_max_bytes*(
s->tolerance/100.0f);
991 if (
s->slice_min_bytes < 0 ||
s->slice_max_bytes > INT_MAX >> 3)
993
1001 }
1002
1005
1006 *got_packet = 1;
1007
1008 return 0;
1009 }
1010
1012 {
1015
1017
1018 for (
i = 0;
i < 3;
i++) {
1021 }
1022
1024
1025 return 0;
1026 }
1027
1029 {
1034 int depth;
1036
1037 s->picture_number = 0;
1038
1039 /* Total allowed quantization range */
1041
1046
1048 s->strict_compliance = 1;
1049
1051 s->slice_max_bytes = 0;
1052 s->slice_min_bytes = 0;
1053
1054 /* Mark unknown as progressive */
1057
1061 continue;
1063 continue;
1065 continue;
1067 continue;
1069 continue;
1071 continue;
1074 break;
1075 }
1076
1079
1080 if ((
s->slice_width & (
s->slice_width - 1)) ||
1081 (
s->slice_height & (
s->slice_height - 1))) {
1084 }
1085
1086 if ((
s->slice_width > avctx->
width) ||
1087 (
s->slice_height > avctx->
height)) {
1090 }
1091
1092 if (
s->base_vf <= 0) {
1094 s->strict_compliance =
s->base_vf = 0;
1096 } else {
1098 "the specifications, decrease strictness to use it.\n");
1100 }
1101 } else {
1104 }
1105
1107 /* Chroma subsampling */
1110
1111 /* Bit depth and color range index */
1116 s->diff_offset = 128;
1121 s->diff_offset = 128;
1122 } else if (depth == 10) {
1125 s->diff_offset = 512;
1126 } else {
1129 s->diff_offset = 2048;
1130 }
1131
1132 /* Planes initialization */
1133 for (
i = 0;
i < 3;
i++) {
1136 p->
width = avctx->
width >> (
i ?
s->chroma_x_shift : 0);
1149 for (o = 0; o < 4; o++) {
1154 shift = (o > 1)*
b->height*
b->stride + (o & 1)*
b->width;
1156 }
1157 }
1158
1159 /* DWT init */
1161 s->plane[
i].coef_stride,
1162 s->plane[
i].dwt_height,
1163 s->slice_width,
s->slice_height))
1165 }
1166
1167 /* Slices */
1168 s->num_x =
s->plane[0].dwt_width/
s->slice_width;
1169 s->num_y =
s->plane[0].dwt_height/
s->slice_height;
1170
1174
1175 for (
i = 0;
i < 116;
i++) {
1177 const uint32_t m =
av_log2(qf);
1178 const uint32_t t = (1ULL << (m + 32)) / qf;
1179 const uint32_t
r = (t*qf + qf) & UINT32_MAX;
1180 if (!(qf & (qf - 1))) {
1181 s->qmagic_lut[
i][0] = 0xFFFFFFFF;
1182 s->qmagic_lut[
i][1] = 0xFFFFFFFF;
1183 }
else if (
r <= 1 << m) {
1184 s->qmagic_lut[
i][0] = t + 1;
1185 s->qmagic_lut[
i][1] = 0;
1186 } else {
1187 s->qmagic_lut[
i][0] = t;
1188 s->qmagic_lut[
i][1] = t;
1189 }
1190 }
1191
1192 return 0;
1193 }
1194
1195 #define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1197 {
"tolerance",
"Max undershoot in percent", offsetof(
VC2EncContext, tolerance),
AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f,
VC2ENC_FLAGS, .unit =
"tolerance"},
1198 {
"slice_width",
"Slice width", offsetof(
VC2EncContext, slice_width),
AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024,
VC2ENC_FLAGS, .unit =
"slice_width"},
1199 {
"slice_height",
"Slice height", offsetof(
VC2EncContext, slice_height),
AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024,
VC2ENC_FLAGS, .unit =
"slice_height"},
1200 {
"wavelet_depth",
"Transform depth", offsetof(
VC2EncContext, wavelet_depth),
AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5,
VC2ENC_FLAGS, .unit =
"wavelet_depth"},
1201 {
"wavelet_type",
"Transform type", offsetof(
VC2EncContext, wavelet_idx),
AV_OPT_TYPE_INT, {.i64 =
VC2_TRANSFORM_9_7}, 0,
VC2_TRANSFORMS_NB,
VC2ENC_FLAGS, .unit =
"wavelet_idx"},
1202 {
"9_7",
"Deslauriers-Dubuc (9,7)", 0,
AV_OPT_TYPE_CONST, {.i64 =
VC2_TRANSFORM_9_7}, INT_MIN, INT_MAX,
VC2ENC_FLAGS, .unit =
"wavelet_idx"},
1203 {
"5_3",
"LeGall (5,3)", 0,
AV_OPT_TYPE_CONST, {.i64 =
VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX,
VC2ENC_FLAGS, .unit =
"wavelet_idx"},
1206 {
"qm",
"Custom quantization matrix", offsetof(
VC2EncContext, quant_matrix),
AV_OPT_TYPE_INT, {.i64 =
VC2_QM_DEF}, 0,
VC2_QM_NB,
VC2ENC_FLAGS, .unit =
"quant_matrix"},
1211 };
1212
1219 };
1220
1222 { "b", "600000000" },
1224 };
1225
1231 };
1232
1249 };