Go to the documentation of this file. 1 /*
2 * WAV demuxer
3 * Copyright (c) 2001, 2002 Fabrice Bellard
4 *
5 * Sony Wave64 demuxer
6 * RF64 demuxer
7 * Copyright (c) 2009 Daniel Verkamp
8 *
9 * BW64 demuxer
10 *
11 * This file is part of FFmpeg.
12 *
13 * FFmpeg is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
17 *
18 * FFmpeg is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with FFmpeg; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 */
27
28 #include <stdint.h>
29
30 #include "config_components.h"
48
65 int unaligned;
// e.g. if an odd number of bytes ID3 tag was prepended
66 int rifx;
// RIFX: integer byte order for parameters is big endian
68
69 #define OFFSET(x) offsetof(WAVDemuxContext, x)
70 #define DEC AV_OPT_FLAG_DECODING_PARAM
72 #define W64_DEMUXER_OPTIONS_OFFSET (1 * CONFIG_WAV_DEMUXER)
73 #if CONFIG_WAV_DEMUXER
75 #endif
78 };
79
81 {
82 if (CONFIG_SPDIF_DEMUXER &&
s->streams[0]->codecpar->codec_tag == 1) {
86
89 if (!buf) {
91 } else {
97 s->streams[0]->codecpar->codec_id = codec;
99 }
100 }
103 }
104 }
105
108 }
109 }
110
111 #if CONFIG_WAV_DEMUXER
112
113 static int64_t next_tag(
AVIOContext *pb, uint32_t *
tag,
int big_endian)
114 {
116 if (!big_endian) {
118 } else {
120 }
121 }
122
123 /* RIFF chunks are always at even offsets relative to where they start. */
125 {
127
129 }
130
131 /* return the size of the found tag */
133 {
136
137 for (;;) {
142 break;
143 wav_seek_tag(wav, pb,
size, SEEK_CUR);
144 }
146 }
147
149 {
150 /* check file header */
152 return 0;
153 if (!memcmp(p->
buf + 8,
"WAVE", 4)) {
154 if (!memcmp(p->
buf,
"RIFF", 4) || !memcmp(p->
buf,
"RIFX", 4))
155 /* Since the ACT demuxer has a standard WAV header at the top of
156 * its own, the returned score is decreased to avoid a probe
157 * conflict between ACT and WAV. */
159 else if ((!memcmp(p->
buf,
"RF64", 4) ||
160 !memcmp(p->
buf,
"BW64", 4)) &&
161 !memcmp(p->
buf + 12,
"ds64", 4))
163 }
164 return 0;
165 }
166
167 static void handle_stream_probing(
AVStream *st)
168 {
173 }
174 }
175
177 {
181
182 /* parse fmt header */
186 handle_stream_probing(st);
187
189
191
192 return 0;
193 }
194
196 {
199
202
206
211 if (
size != (32 + ((
version==3)?0:8) + 4*num_streams))
220
221 for (
i = 0;
i < num_streams;
i++) {
224 }
228
231
233
237
238 return 0;
239 }
240
242 int length)
243 {
246
250
252
255
256 return 0;
257 }
258
260 {
261 char temp[131], *coding_history;
263 uint64_t time_reference;
264 int64_t umid_parts[8], umid_mask = 0;
265
266 if ((
ret = wav_parse_bext_string(
s,
"description", 256)) < 0 ||
267 (
ret = wav_parse_bext_string(
s,
"originator", 32)) < 0 ||
268 (
ret = wav_parse_bext_string(
s,
"originator_reference", 32)) < 0 ||
269 (
ret = wav_parse_bext_string(
s,
"origination_date", 10)) < 0 ||
270 (
ret = wav_parse_bext_string(
s,
"origination_time", 8)) < 0)
272
277
278 /* check if version is >= 1, in which case an UMID may be present */
280 for (x = 0; x < 8; x++)
281 umid_mask |= umid_parts[x] =
avio_rb64(
s->pb);
282
283 if (umid_mask) {
284 /* the string formatting below is per SMPTE 330M-2004 Annex C */
285 if (umid_parts[4] == 0 && umid_parts[5] == 0 &&
286 umid_parts[6] == 0 && umid_parts[7] == 0) {
287 /* basic UMID */
289 "0x%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64,
290 umid_parts[0], umid_parts[1],
291 umid_parts[2], umid_parts[3]);
292 } else {
293 /* extended UMID */
295 "0x%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64
296 "%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64,
297 umid_parts[0], umid_parts[1],
298 umid_parts[2], umid_parts[3],
299 umid_parts[4], umid_parts[5],
300 umid_parts[6], umid_parts[7]);
301 }
302
305 }
306
308 } else
310
312 /* CodingHistory present */
314
317
321 }
322
323 coding_history[
size] = 0;
327 }
328
329 return 0;
330 }
331
333 { "description", "comment" },
334 { "originator", "encoded_by" },
335 { "origination_date", "date" },
336 { "origination_time", "creation_time" },
337 { 0 },
338 };
339
340 /* wav input */
342 {
344 int64_t sample_count = 0;
345 int rf64 = 0, bw64 = 0;
350 int ret, got_fmt = 0, got_xma2 = 0;
351 int64_t next_tag_ofs, data_ofs = -1;
352
354
356
357 /* read chunk ID */
360 case MKTAG(
'R',
'I',
'F',
'F'):
361 break;
362 case MKTAG(
'R',
'I',
'F',
'X'):
363 wav->rifx = 1;
364 break;
365 case MKTAG(
'R',
'F',
'6',
'4'):
366 rf64 = 1;
367 break;
368 case MKTAG(
'B',
'W',
'6',
'4'):
369 bw64 = 1;
370 break;
371 default:
375 }
376
377 /* read chunk size */
379
380 /* read format */
384 }
385
386 if (rf64 || bw64) {
393
396
397 if (data_size < 0 || sample_count < 0) {
399 "ds64: data_size = %"PRId64", sample_count = %"PRId64"\n",
400 data_size, sample_count);
402 }
404
405 }
406
407 /* Create the audio stream now so that its index is always zero */
409 if (!st)
411
412 for (;;) {
416
418 break;
419
421 case MKTAG(
'f',
'm',
't',
' '):
422 /* only parse the first 'fmt ' tag found */
423 if (!got_xma2 && !got_fmt && (
ret = wav_parse_fmt_tag(
s,
size, st)) < 0) {
425 } else if (got_fmt)
427
428 got_fmt = 1;
429 break;
430 case MKTAG(
'X',
'M',
'A',
'2'):
431 /* only parse the first 'XMA2' tag found */
432 if (!got_fmt && !got_xma2 && (
ret = wav_parse_xma2_tag(
s,
size, st)) < 0) {
434 } else if (got_xma2)
436
437 got_xma2 = 1;
438 break;
439 case MKTAG(
'd',
'a',
't',
'a'):
442 "found no 'fmt ' tag before the 'data' tag\n");
444 }
445
446 if (rf64 || bw64) {
448 }
else if (
size != 0xFFFFFFFF) {
450 next_tag_ofs = wav->
data_end =
size ? next_tag_ofs : INT64_MAX;
451 } else {
453 "file may be invalid\n");
454 data_size = 0;
455 next_tag_ofs = wav->
data_end = INT64_MAX;
456 }
457
459
460 /* don't look for footer metadata if we can't seek or if we don't
461 * know where the data tag ends
462 */
464 goto break_loop;
465 break;
466 case MKTAG(
'f',
'a',
'c',
't'):
469 break;
470 case MKTAG(
'b',
'e',
'x',
't'):
473 break;
474 case MKTAG(
'S',
'M',
'V',
'0'):
478 }
479 // SMV file, a wav file with video appended.
482 goto break_loop;
483 }
487 if (!vst)
499 }
514 }
516 goto break_loop;
517 case MKTAG(
'L',
'I',
'S',
'T'):
518 case
MKTAG(
'l',
'i',
's',
't'):
522 }
524 case MKTAG(
'I',
'N',
'F',
'O'):
526 break;
527 case MKTAG(
'a',
'd',
't',
'l'):
528 if (
s->nb_chapters > 0) {
531 char cue_label[512];
532 unsigned id, sub_size;
533
535 break;
536
538 if (sub_size < 5)
539 break;
541 avio_get_str(pb, sub_size - 4, cue_label,
sizeof(cue_label));
543
544 for (
int i = 0;
i <
s->nb_chapters;
i++) {
545 if (
s->chapters[
i]->id ==
id) {
546 av_dict_set(&
s->chapters[
i]->metadata,
"title", cue_label, 0);
547 break;
548 }
549 }
550 }
551 }
552 break;
553 }
554 break;
555 case MKTAG(
'I',
'D',
'3',
' '):
556 case
MKTAG(
'i',
'd',
'3',
' '): {
559 if (id3v2_extra_meta) {
563 }
565 }
566 break;
567 case MKTAG(
'c',
'u',
'e',
' '):
571
572 if (
size >= nb_cues * 24LL + 4LL) {
573 for (
int i = 0;
i < nb_cues;
i++) {
575
578
581
584 }
585 }
586 }
587 break;
588 }
589
590 /* seek to next tag unless we know that we'll run into EOF */
592 wav_seek_tag(wav, pb, next_tag_ofs, SEEK_SET) < 0) {
593 break;
594 }
595 }
596
597 break_loop:
598 if (!got_fmt && !got_xma2) {
601 }
602
603 if (data_ofs < 0) {
606 }
607
609
610 if (data_size > (INT64_MAX>>3)) {
612 data_size = 0;
613 }
614
622 }
623
627 sample_count = 0;
628 }
629
630 /* G.729 hack (for Ticket4577)
631 * FIXME: Come up with cleaner, more general solution */
634 sample_count = 0;
635 }
636
639 && data_size
642 sample_count = (data_size << 3)
643 /
645
646 if (sample_count)
648
666 }
667
670
672
673 return 0;
674 }
675
676 /**
677 * Find chunk with w64 GUID by skipping over other chunks.
678 * @return the size of the found chunk
679 */
681 {
682 uint8_t guid[16];
684
688 if (size <= 24 || size > INT64_MAX - 8)
690 if (!memcmp(guid, guid1, 16))
693 }
695 }
696
698 {
703
704 if (CONFIG_SPDIF_DEMUXER && wav->
spdif == 1)
706
710 smv_retry:
713
715 /*We always return a video frame first to get the pixel format first*/
720 }
730 goto smv_out;
731 }
735 goto smv_out;
740
742 smv_out:
746 goto smv_retry;
747 }
749 }
750 }
751
756 if (CONFIG_W64_DEMUXER && wav->
w64)
758 else
759 left = find_tag(wav,
s->pb,
MKTAG(
'd',
'a',
't',
'a'));
763 goto smv_retry;
765 }
767 }
768
771 if (size < st->codecpar->block_align)
774 }
780
782 }
783
785 int stream_index, int64_t timestamp,
int flags)
786 {
791
792 if (stream_index != 0 && (!vst || stream_index != vst->
index))
795 int64_t smv_timestamp = timestamp;
796 if (stream_index == 0)
798 else
802 }
803 }
804
811 /* use generic seeking with dynamically generated indexes */
812 return -1;
813 default:
814 break;
815 }
817 }
818
819 static const AVClass wav_demuxer_class = {
824 };
835 .priv_class = &wav_demuxer_class,
836 };
837 #endif /* CONFIG_WAV_DEMUXER */
838
839 #if CONFIG_W64_DEMUXER
841 {
843 return 0;
847 else
848 return 0;
849 }
850
852 {
853 int64_t
size, data_ofs = 0;
857 uint8_t guid[16];
859
863
864 /* riff + wave + fmt + sizes */
865 if (
avio_rl64(pb) < 16 + 8 + 16 + 8 + 16 + 8)
867
872 }
873
875
877 if (!st)
879
882 break;
886
888 /* subtract chunk header size - normal wav file doesn't count it */
893
897
904
907 break;
908
911 int64_t start, end, cur;
912 uint32_t count, chunk_size,
i;
914
918
919 for (
i = 0;
i < count;
i++) {
920 char chunk_key[5], *
value;
921
923 break;
924
925 chunk_key[4] = 0;
928 if (chunk_size == UINT32_MAX || (filesize >= 0 && chunk_size > filesize))
930
934
939 }
941
943 }
944
946 } else {
949 }
950 }
951
952 if (!data_ofs)
954
957
958 handle_stream_probing(st);
960
962
964
965 return 0;
966 }
967
968 static const AVClass w64_demuxer_class = {
973 };
974
985 .priv_class = &w64_demuxer_class,
986 };
987 #endif /* CONFIG_W64_DEMUXER */
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left