1 /*
2 * NewTek SpeedHQ codec
3 * Copyright 2017 Steinar H. Gunderson
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * NewTek SpeedHQ decoder.
25 */
26
27 #define BITSTREAM_READER_LE
28
31
44
45 #define MAX_INDEX (64 - 1)
46
47 /*
48 * 5 bits makes for very small tables, with no more than two lookups needed
49 * for the longest (10-bit) codes.
50 */
51 #define ALPHA_VLC_BITS 5
52
64
65 /* NOTE: The first element is always 16, unscaled. */
67 16, 16, 19, 22, 26, 27, 29, 34,
68 16, 16, 22, 24, 27, 29, 34, 37,
69 19, 22, 26, 27, 29, 34, 34, 38,
70 22, 22, 26, 27, 29, 34, 37, 40,
71 22, 26, 27, 29, 32, 35, 40, 48,
72 26, 27, 29, 32, 35, 40, 48, 58,
73 26, 27, 29, 34, 38, 46, 56, 69,
74 27, 29, 35, 38, 46, 56, 69, 83
75 };
76
81
83
85 {
87
88 if (component == 0 || component == 3) {
90 } else {
92 }
95 } else {
97 }
99 }
100
102 {
105
107
108 {
110
111 for ( ;; ) {
113
116
121
125 }
126
128 }
129
130 for (y = 0; y < 8; y++) {
131 for (x = 0; x < 16; x++) {
132 last_alpha[x] -=
block[y * 16 + x];
133 }
134 memcpy(dest, last_alpha, 16);
135 dest += linesize;
136 }
137
138 return 0;
139 }
140
142 {
143 const int *quant_matrix =
s->quant_matrix;
144 const uint8_t *scantable =
s->permutated_intra_scantable;
146 int dc_offset;
147
148 s->bdsp.clear_block(
block);
149
151 last_dc[component] -= dc_offset; /* Note: Opposite of most codecs. */
152 block[scantable[0]] = last_dc[component];
/* quant_matrix[0] is always 16. */
153
154 /* Read AC coefficients. */
155 {
158 for ( ;; ) {
164 break;
169 /* If next bit is 1, level = -level */
173 } else {
174 /* Escape. */
175 #if MIN_CACHE_BITS < 6 + 6 + 12
176 #error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE
177 #endif
182
186 }
187
189 }
191 }
192
193 s->idsp.idct_put(dest, linesize,
block);
194
195 return 0;
196 }
197
199 {
200 int linesize_y =
frame->linesize[0] * line_stride;
201 int linesize_cb =
frame->linesize[1] * line_stride;
202 int linesize_cr =
frame->linesize[2] * line_stride;
203 int linesize_a;
205
206 if (
s->alpha_type != SHQ_NO_ALPHA)
207 linesize_a =
frame->linesize[3] * line_stride;
208
209 for (
int y = 0; y <
frame->height; y += 16 * line_stride) {
210 int last_dc[4] = { 1024, 1024, 1024, 1024 };
211 uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
212 uint8_t last_alpha[16];
213 int x =
frame->width - 8;
214
215 dest_y =
frame->data[0] +
frame->linesize[0] * (y + field_number) + x;
216 if (
s->subsampling == SHQ_SUBSAMPLING_420) {
217 dest_cb =
frame->data[1] +
frame->linesize[1] * (y/2 + field_number) + x / 2;
218 dest_cr =
frame->data[2] +
frame->linesize[2] * (y/2 + field_number) + x / 2;
219 } else {
221 dest_cb =
frame->data[1] +
frame->linesize[1] * (y + field_number) + x / 2;
222 dest_cr =
frame->data[2] +
frame->linesize[2] * (y + field_number) + x / 2;
223 }
224 if (
s->alpha_type != SHQ_NO_ALPHA) {
225 memset(last_alpha, 255, sizeof(last_alpha));
226 dest_a =
frame->data[3] +
frame->linesize[3] * (y + field_number) + x;
227 }
228
241
242 if (
s->subsampling != SHQ_SUBSAMPLING_420) {
247 }
248
249 if (
s->alpha_type == SHQ_RLE_ALPHA) {
250 /* Alpha coded using 16x8 RLE blocks. */
255 }
else if (
s->alpha_type == SHQ_DCT_ALPHA) {
256 /* Alpha encoded exactly like luma. */
265 }
266 }
267
268 return 0;
269 }
270
272 {
273 int ret, x, y, slice_offsets[5];
275 int linesize_y =
frame->linesize[0] * line_stride;
276 int linesize_cb =
frame->linesize[1] * line_stride;
277 int linesize_cr =
frame->linesize[2] * line_stride;
278 int linesize_a;
280
281 if (
s->alpha_type != SHQ_NO_ALPHA)
282 linesize_a =
frame->linesize[3] * line_stride;
283
284 if (end < start || end - start < 3 || end > buf_size)
286
287 slice_offsets[0] = start;
288 slice_offsets[4] = end;
289 for (x = 1; x < 4; x++) {
290 uint32_t last_offset, slice_len;
291
292 last_offset = slice_offsets[x - 1];
293 slice_len =
AV_RL24(buf + last_offset);
294 slice_offsets[x] = last_offset + slice_len;
295
296 if (slice_len < 3 || slice_offsets[x] > end - 3)
298 }
299
300 slice_begin = slice_offsets[slice_number];
301 slice_end = slice_offsets[slice_number + 1];
302
305
306 for (y = slice_number * 16 * line_stride; y <
frame->height; y += line_stride * 64) {
307 uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
308 int last_dc[4] = { 1024, 1024, 1024, 1024 };
309 uint8_t last_alpha[16];
310
311 memset(last_alpha, 255, sizeof(last_alpha));
312
313 dest_y =
frame->data[0] +
frame->linesize[0] * (y + field_number);
314 if (
s->subsampling == SHQ_SUBSAMPLING_420) {
315 dest_cb =
frame->data[1] +
frame->linesize[1] * (y/2 + field_number);
316 dest_cr =
frame->data[2] +
frame->linesize[2] * (y/2 + field_number);
317 } else {
318 dest_cb =
frame->data[1] +
frame->linesize[1] * (y + field_number);
319 dest_cr =
frame->data[2] +
frame->linesize[2] * (y + field_number);
320 }
321 if (
s->alpha_type != SHQ_NO_ALPHA) {
322 dest_a =
frame->data[3] +
frame->linesize[3] * (y + field_number);
323 }
324
325 for (x = 0; x <
frame->width - 8 * (
s->subsampling != SHQ_SUBSAMPLING_444); x += 16) {
326 /* Decode the four luma blocks. */
335
336 /*
337 * Decode the first chroma block. For 4:2:0, this is the only one;
338 * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block.
339 */
344
345 if (
s->subsampling != SHQ_SUBSAMPLING_420) {
346 /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */
351
352 if (
s->subsampling == SHQ_SUBSAMPLING_444) {
353 /* Top-right and bottom-right blocks. */
358 if ((
ret =
decode_dct_block(
s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0)
360 if ((
ret =
decode_dct_block(
s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0)
362
363 dest_cb += 8;
364 dest_cr += 8;
365 }
366 }
367 dest_y += 16;
368 dest_cb += 8;
369 dest_cr += 8;
370
371 if (
s->alpha_type == SHQ_RLE_ALPHA) {
372 /* Alpha coded using 16x8 RLE blocks. */
377 dest_a += 16;
378 }
else if (
s->alpha_type == SHQ_DCT_ALPHA) {
379 /* Alpha encoded exactly like luma. */
388 dest_a += 16;
389 }
390 }
391 }
392
393 if (
s->subsampling != SHQ_SUBSAMPLING_444 && (
frame->width & 15) && slice_number == 3)
395
396 return 0;
397 }
398
400 {
402 (void)threadnr;
403
405 }
406
408 {
410 int field_number = jobnr / 4;
411 int slice_number = jobnr % 4;
412 (void)threadnr;
413
414 if (field_number == 0)
416 else
418 }
419
421 {
424 }
425
428 {
430 const uint8_t *buf = avpkt->
data;
431 int buf_size = avpkt->
size;
434
435 if (buf_size < 4 || avctx->width < 8 || avctx->
width % 8 != 0)
437 if (buf_size < avctx->
width*avctx->
height / 64 / 4)
439
443 }
444
447
449
450 s->second_field_offset =
AV_RL24(buf + 1);
451 if (
s->second_field_offset >= buf_size - 3) {
453 }
454
457
460 }
461
463
464 if (
s->second_field_offset == 4 ||
s->second_field_offset == (buf_size-4)) {
465 /*
466 * Overlapping first and second fields is used to signal
467 * encoding only a single field. In this case, "height"
468 * is ambiguous; it could mean either the height of the
469 * frame as a whole, or of the field. The former would make
470 * more sense for compatibility with legacy decoders,
471 * but this matches the convention used in NDI, which is
472 * the primary user of this trick.
473 */
476 } else {
479 }
480
481 *got_frame = 1;
482 return buf_size;
483 }
484
485 /*
486 * Alpha VLC. Run and level are independently coded, and would be
487 * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't
488 * bother with combining them into one table.
489 */
491 {
492 uint16_t run_code[134], level_code[266];
493 uint8_t
run_bits[134], level_bits[266];
494 int16_t run_symbols[134], level_symbols[266];
496
497 /* Initialize VLC for alpha run. */
499
500 /* 0 -> 0. */
503 run_symbols[
entry] = 0;
505
506 /* 10xx -> xx plus 1. */
507 for (
i = 0;
i < 4; ++
i) {
508 run_code[
entry] = (
i << 2) | 1;
510 run_symbols[
entry] =
i + 1;
512 }
513
514 /* 111xxxxxxx -> xxxxxxx. */
515 for (
i = 0;
i < 128; ++
i) {
516 run_code[
entry] = (
i << 3) | 7;
520 }
521
522 /* 110 -> EOB. */
525 run_symbols[
entry] = -1;
527
529
533 run_code, 2, 2,
535
536 /* Initialize VLC for alpha level. */
538
539 for (sign = 0; sign <= 1; ++sign) {
540 /* 1s -> -1 or +1 (depending on sign bit). */
541 level_code[
entry] = (sign << 1) | 1;
542 level_bits[
entry] = 2;
543 level_symbols[
entry] = sign ? -1 : 1;
545
546 /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */
547 for (
i = 0;
i < 4; ++
i) {
548 level_code[
entry] = (
i << 3) | (sign << 2) | 2;
549 level_bits[
entry] = 5;
550 level_symbols[
entry] = sign ? -(
i + 2) : (
i + 2);
552 }
553 }
554
555 /*
556 * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes
557 * here that would better be encoded in other ways (e.g. 0 would be
558 * encoded by increasing run, and +/- 1 would be encoded with a
559 * shorter code), but it doesn't hurt to allow everything.
560 */
561 for (
i = 0;
i < 256; ++
i) {
562 level_code[
entry] =
i << 2;
563 level_bits[
entry] = 10;
566 }
567
569
572 level_bits, 1, 1,
573 level_code, 2, 2,
575 }
576
578 {
579 /* Exactly the same as MPEG-2, except for a little-endian reader. */
588
592
594 }
595
597 {
601
605
609 s->idsp.idct_permutation);
610
612 case MKTAG(
'S',
'H',
'Q',
'0'):
613 s->subsampling = SHQ_SUBSAMPLING_420;
614 s->alpha_type = SHQ_NO_ALPHA;
616 break;
617 case MKTAG(
'S',
'H',
'Q',
'1'):
618 s->subsampling = SHQ_SUBSAMPLING_420;
619 s->alpha_type = SHQ_RLE_ALPHA;
621 break;
622 case MKTAG(
'S',
'H',
'Q',
'2'):
623 s->subsampling = SHQ_SUBSAMPLING_422;
624 s->alpha_type = SHQ_NO_ALPHA;
626 break;
627 case MKTAG(
'S',
'H',
'Q',
'3'):
628 s->subsampling = SHQ_SUBSAMPLING_422;
629 s->alpha_type = SHQ_RLE_ALPHA;
631 break;
632 case MKTAG(
'S',
'H',
'Q',
'4'):
633 s->subsampling = SHQ_SUBSAMPLING_444;
634 s->alpha_type = SHQ_NO_ALPHA;
636 break;
637 case MKTAG(
'S',
'H',
'Q',
'5'):
638 s->subsampling = SHQ_SUBSAMPLING_444;
639 s->alpha_type = SHQ_RLE_ALPHA;
641 break;
642 case MKTAG(
'S',
'H',
'Q',
'7'):
643 s->subsampling = SHQ_SUBSAMPLING_422;
644 s->alpha_type = SHQ_DCT_ALPHA;
646 break;
647 case MKTAG(
'S',
'H',
'Q',
'9'):
648 s->subsampling = SHQ_SUBSAMPLING_444;
649 s->alpha_type = SHQ_DCT_ALPHA;
651 break;
652 default:
656 }
657
658 /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */
661
662 return 0;
663 }
664
674 };