1 /*
2 * Copyright (c) 2003 Fabrice Bellard
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22 * @file
23 * ID3v2 header parser
24 *
25 * Specifications available at:
26 * http://id3.org/Developer_Information
27 */
28
29 #include "config.h"
30
31 #if CONFIG_ZLIB
32 #include <zlib.h>
33 #endif
34
44
46 { "TALB", "album" },
47 { "TCOM", "composer" },
48 { "TCON", "genre" },
49 { "TCOP", "copyright" },
50 { "TENC", "encoded_by" },
51 { "TIT2", "title" },
52 { "TLAN", "language" },
53 { "TPE1", "artist" },
54 { "TPE2", "album_artist" },
55 { "TPE3", "performer" },
56 { "TPOS", "disc" },
57 { "TPUB", "publisher" },
58 { "TRCK", "track" },
59 { "TSSE", "encoder" },
60 { "USLT", "lyrics" },
61 { 0 }
62 };
63
65 { "TCMP", "compilation" },
66 { "TDRC", "date" },
67 { "TDRL", "date" },
68 { "TDEN", "creation_time" },
69 { "TSOA", "album-sort" },
70 { "TSOP", "artist-sort" },
71 { "TSOT", "title-sort" },
72 { "TIT1", "grouping" },
73 { 0 }
74 };
75
77 { "TAL", "album" },
78 { "TCO", "genre" },
79 { "TCP", "compilation" },
80 { "TT2", "title" },
81 { "TEN", "encoded_by" },
82 { "TP1", "artist" },
83 { "TP2", "album_artist" },
84 { "TP3", "performer" },
85 { "TRK", "track" },
86 { 0 }
87 };
88
90 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
91 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
92 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
93 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
94 { 0 },
95 };
96
98 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
99 "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
100 { 0 },
101 };
102
104 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",
105 { 0 },
106 };
107
109 "Other",
110 "32x32 pixels 'file icon'",
111 "Other file icon",
112 "Cover (front)",
113 "Cover (back)",
114 "Leaflet page",
115 "Media (e.g. label side of CD)",
116 "Lead artist/lead performer/soloist",
117 "Artist/performer",
118 "Conductor",
119 "Band/Orchestra",
120 "Composer",
121 "Lyricist/text writer",
122 "Recording Location",
123 "During recording",
124 "During performance",
125 "Movie/video screen capture",
126 "A bright coloured fish",
127 "Illustration",
128 "Band/artist logotype",
129 "Publisher/Studio logotype",
130 };
131
142 };
143
145 {
146 return buf[0] == magic[0] &&
147 buf[1] == magic[1] &&
148 buf[2] == magic[2] &&
149 buf[3] != 0xff &&
150 buf[4] != 0xff &&
151 (buf[6] & 0x80) == 0 &&
152 (buf[7] & 0x80) == 0 &&
153 (buf[8] & 0x80) == 0 &&
154 (buf[9] & 0x80) == 0;
155 }
156
158 {
159 int len = ((buf[6] & 0x7f) << 21) +
160 ((buf[7] & 0x7f) << 14) +
161 ((buf[8] & 0x7f) << 7) +
162 (buf[9] & 0x7f) +
164 if (buf[5] & 0x10)
167 }
168
170 {
171 int v = 0;
174 return v;
175 }
176
178 {
179 return (((
size) & (0x7f << 0)) >> 0) +
180 (((
size) & (0x7f << 8)) >> 1) +
181 (((
size) & (0x7f << 16)) >> 2) +
182 (((
size) & (0x7f << 24)) >> 3);
183 }
184
185 /* No real verification, only check that the tag consists of
186 * a combination of capital alpha-numerical characters */
188 {
190 return 0;
191
193 if ((buf[
len] <
'A' ||
197 return 0;
198
199 return 1;
200 }
201
202 /**
203 * Return 1 if the tag of length len at the given offset is valid, 0 if not, -1 on error
204 */
206 {
208
212 return -1;
214 return 1;
215
216 return 0;
217 }
218
219 /**
220 * Free GEOB type extra metadata.
221 */
223 {
229 }
230
231 /**
232 * Decode characters to UTF-8 according to encoding type. The decoded buffer is
233 * always null terminated. Stop reading when either *maxread bytes are read from
234 * pb or U+0000 character is found.
235 *
236 * @param dst Pointer where the address of the buffer with the decoded bytes is
237 * stored. Buffer must be freed by caller.
238 * @param maxread Pointer to maximum number of characters to read from the
239 * AVIOContext. After execution the value is decremented by the number of bytes
240 * actually read.
241 * @returns 0 if no error occurred, dst is uninitialized on error
242 */
244 uint8_t **dst, int *maxread)
245 {
248 uint32_t ch = 1;
249 int left = *maxread, dynsize;
252
256 }
257
258 switch (encoding) {
264 }
265 break;
266
268 if ((
left -= 2) < 0) {
273 }
275 case 0xfffe:
277 case 0xfeff:
278 break;
279 default:
285 }
286 // fall-through
287
289 while ((
left > 1) && ch) {
292 }
294 left += 2;
/* did not read last char from pb */
295 break;
296
302 }
303 break;
304 default:
306 }
307
308 if (ch)
310
312 if (dynsize <= 0) {
315 }
317
318 return 0;
319 }
320
321 /**
322 * Parse a text tag.
323 */
326 {
327 uint8_t *dst;
329 unsigned genre;
330
331 if (taglen < 1)
332 return;
333
335 taglen--; /* account for encoding type byte */
336
337 if (
decode_str(
s, pb, encoding, &dst, &taglen) < 0) {
339 return;
340 }
341
342 if (!(strcmp(
key,
"TCON") && strcmp(
key,
"TCO")) &&
343 (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
347 }
else if (!(strcmp(
key,
"TXXX") && strcmp(
key,
"TXX"))) {
348 /* dst now contains the key, need to get value */
350 if (
decode_str(
s, pb, encoding, &dst, &taglen) < 0) {
353 return;
354 }
356 } else if (!*dst)
358
359 if (dst)
361 }
362
365 {
366 uint8_t lang[4];
367 uint8_t *descriptor =
NULL;
// 'Content descriptor'
368 uint8_t *text;
370 int encoding;
371 int ok = 0;
372
373 if (taglen < 4)
375
377 taglen--;
378
381 lang[3] = '0円';
382 taglen -= 3;
383
384 if (
decode_str(
s, pb, encoding, &descriptor, &taglen) < 0 || taglen < 0)
386
387 if (
decode_str(
s, pb, encoding, &text, &taglen) < 0 || taglen < 0)
389
390 // FFmpeg does not support hierarchical metadata, so concatenate the keys.
391 key =
av_asprintf(
"lyrics-%s%s%s", descriptor[0] ? (
char *)descriptor :
"",
392 descriptor[0] ? "-" : "",
393 lang);
397 }
398
401
402 ok = 1;
404 if (!ok)
407 }
408
409 /**
410 * Parse a comment tag.
411 */
414 {
415 const char *
key =
"comment";
416 uint8_t *dst;
419
420 if (taglen < 4)
421 return;
422
425 taglen -= 4;
426
427 if (
decode_str(
s, pb, encoding, &dst, &taglen) < 0) {
429 return;
430 }
431
432 if (dst && !*dst)
434
435 if (dst) {
436 key = (
const char *) dst;
438 }
439
440 if (
decode_str(
s, pb, encoding, &dst, &taglen) < 0) {
444 return;
445 }
446
447 if (dst)
449 }
450
454
456 {
458 list->tail->next = new_elem;
459 else
460 list->head = new_elem;
461 list->tail = new_elem;
462 }
463
464 /**
465 * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct.
466 */
469 {
472 char encoding;
474
475 if (taglen < 1)
476 return;
477
479 if (!new_extra) {
482 return;
483 }
484
486
487 /* read encoding type byte */
489 taglen--;
490
491 /* read MIME type (always ISO-8859) */
493 &taglen) < 0 ||
494 taglen <= 0)
496
497 /* read file name */
499 taglen <= 0)
501
502 /* read content description */
504 taglen < 0)
506
507 if (taglen) {
508 /* save encapsulated binary data */
510 if (!geob_data->
data) {
513 }
516 "Error reading GEOB frame, data truncated.\n");
518 } else {
521 }
522
523 /* add data to the list */
524 new_extra->
tag =
"GEOB";
526
527 return;
528
533 return;
534 }
535
537 {
538 while (*str >= '0' && *str <= '9')
539 str++;
540 return !*str;
541 }
542
544 {
548 return t;
550 }
551
553 {
555 char date[17] = { 0 }; // YYYY-MM-DD hh:mm
556
559 return;
563
570
574 snprintf(date + 10,
sizeof(date) - 10,
578
580 if (date[0])
582 }
583
585 {
589 }
590
592 {
593 size_t len = strlen(buf);
594 while (
len > 0 && buf[
len - 1] ==
' ')
596 }
597
600 {
601 int enc, pic_type;
602 char mimetype[64] = {0};
608
609 if (taglen <= 4 || (!isv34 && taglen <= 6))
611
613 if (!new_extra)
615
617
619 taglen--;
620
621 /* mimetype */
622 if (isv34) {
624 if (ret < 0 || ret >= taglen)
627 } else {
630
631 mimetype[3] = 0;
632 taglen -= 3;
633 }
634
638 break;
639 }
640 mime++;
641 }
644 "Unknown attached picture mimetype: %s, skipping.\n", mimetype);
646 }
648
649 /* picture type */
651 taglen--;
654 pic_type);
655 pic_type = 0;
656 }
658
659 /* description and picture data */
662 "Error decoding attached picture description.\n");
664 }
665
670
671 new_extra->
tag =
"APIC";
672
673 // The description must be unique, and some ID3v2 tag writers add spaces
674 // to write several APIC entries with the same description.
677
678 return;
679
681 if (apic)
685 }
686
688 {
692 }
693
696 {
697 int taglen;
701
703 if (!new_extra)
704 return;
705
707
710
713
717
726 if (taglen < 0 || taglen >
len)
730 else
733 }
734
737
738 new_extra->
tag =
"CHAP";
740
741 return;
742
746 }
747
749 {
753 }
754
757 {
760
762 if (!meta)
763 return;
764
766
769
773
775
778
781
782 return;
783
787 }
788
794 int isv34);
797
804 };
805
806 /**
807 * Get the corresponding ID3v2EMFunc struct for a tag.
808 * @param isv34 Determines if v2.2 or v2.3/4 strings are used
809 * @return A pointer to the ID3v2EMFunc struct if found, NULL otherwise.
810 */
812 {
818 (isv34 ? 4 : 3)))
821 }
823 }
824
828 {
829 int isv34, unsync;
830 unsigned tlen;
833 int taghdrlen;
834 const char *reason =
NULL;
838 int buffer_size = 0;
840 unsigned char *uncompressed_buffer =
NULL;
841 av_unused int uncompressed_buffer_size = 0;
842 const char *comm_frame;
843
844 if (end > INT64_MAX -
len - 10)
845 return;
847
849
851 case 2:
853 reason = "compression";
855 }
856 isv34 = 0;
857 taghdrlen = 6;
858 comm_frame = "COM";
859 break;
860
861 case 3:
862 case 4:
863 isv34 = 1;
864 taghdrlen = 10;
865 comm_frame = "COMM";
866 break;
867
868 default:
869 reason = "version";
871 }
872
873 unsync =
flags & 0x80;
874
875 if (isv34 &&
flags & 0x40) {
/* Extended header present, just skip over it */
878 /* In v2.4 the length includes the length field we just read. */
879 extlen -= 4;
880
881 if (extlen < 0) {
882 reason = "invalid extended header length";
884 }
888 reason = "extended header too long.";
890 }
891 }
892
893 while (
len >= taghdrlen) {
894 unsigned int tflags = 0;
895 int tunsync = 0;
896 int tcomp = 0;
897 int tencr = 0;
899
900 if (isv34) {
902 break;
906 } else {
907 /* some encoders incorrectly uses v3 sizes instead of syncsafe ones
908 * so check the next tag to see which one to use */
910 if (tlen > 0x7f) {
913
915 break;
916
919 else if (
check_tag(pb, cur + 2 + tlen, 4) != 1)
920 break;
922 } else
924 }
925 }
928 } else {
930 break;
933 }
934 if (tlen > (1<<28))
935 break;
936 len -= taghdrlen + tlen;
937
939 break;
940
942
943 if (!tlen) {
947 continue;
948 }
949
951 if (tlen < 4)
952 break;
954 tlen -= 4;
955 } else
956 dlen = tlen;
957
960
961 /* skip encrypted tags and, if no zlib, compressed tags */
962 if (tencr || (!CONFIG_ZLIB && tcomp)) {
964 if (!tcomp)
966 else if (!tencr)
968 else
969 type =
"encrypted and compressed";
970
973 /* check for text tag or supported special meta tag */
974 }
else if (
tag[0] ==
'T' ||
975 !memcmp(
tag,
"USLT", 4) ||
976 !strcmp(
tag, comm_frame) ||
977 (extra_meta &&
979 pbx = pb;
980
981 if (unsync || tunsync || tcomp) {
986 }
987 }
988 if (unsync || tunsync) {
991 uint8_t *end = t + tlen;
992
996 }
997
998 while (t != end) {
1000 if (t != end && t[-1] == 0xff && !t[0])
1001 t++;
1002 }
1003
1006 pbx = &pb_local.
pub;
// read from sync buffer
1007 }
1008
1009 #if CONFIG_ZLIB
1010 if (tcomp) {
1011 int err;
1012
1014
1015 if (tlen <= 0)
1017 if (dlen / 32768 > tlen)
1019
1020 av_fast_malloc(&uncompressed_buffer, &uncompressed_buffer_size, dlen);
1021 if (!uncompressed_buffer) {
1024 }
1025
1026 if (!(unsync || tunsync)) {
1028 if (err < 0) {
1031 }
1032 tlen = err;
1033 }
1034
1035 err = uncompress(uncompressed_buffer, &dlen,
buffer, tlen);
1036 if (err != Z_OK) {
1039 }
1041 tlen = dlen;
1042 pbx = &pb_local.
pub;
// read from sync buffer
1043 }
1044 #endif
1046 /* parse text tag */
1048 else if (!memcmp(
tag,
"USLT", 4))
1050 else if (!strcmp(
tag, comm_frame))
1052 else
1053 /* parse special meta tag */
1054 extra_func->
read(
s, pbx, tlen,
tag, extra_meta, isv34);
1055 }
else if (!
tag[0]) {
1059 break;
1060 }
1061 /* Skip to end of tag */
1064 }
1065
1066 /* Footer preset, always 10 bytes, skip over it */
1068 end += 10;
1069
1071 if (reason)
1077 return;
1078 }
1079
1083 {
1087 int found_header;
1089
1090 if (extra_metap)
1091 *extra_metap =
NULL;
1092
1094 return;
1095
1097 do {
1098 /* save the current offset in case there's nothing to read/skip */
1102 break;
1103 }
1104
1110 break;
1111 }
1113 if (found_header) {
1114 /* parse ID3v2 header */
1115 len = ((buf[6] & 0x7f) << 21) |
1116 ((buf[7] & 0x7f) << 14) |
1117 ((buf[8] & 0x7f) << 7) |
1118 (buf[9] & 0x7f);
1120 extra_metap ? &extra_meta :
NULL);
1121 } else {
1123 }
1124 } while (found_header);
1129 if (extra_metap)
1130 *extra_metap = extra_meta.
head;
1131 }
1132
1135 {
1137 }
1138
1141 {
1143 }
1144
1146 {
1149
1150 while (current) {
1153 next = current->
next;
1155 current = next;
1156 }
1157
1159 }
1160
1162 {
1164
1165 for (cur = extra_meta; cur; cur = cur->
next) {
1169
1170 if (strcmp(cur->
tag,
"APIC"))
1171 continue;
1173
1177 st =
s->streams[
s->nb_streams - 1];
1179
1182
1185
1187 }
1188
1189 return 0;
1190 }
1191
1193 {
1196
1197 for (
unsigned i = 0; cur; cur = cur->
next) {
1200
1201 if (strcmp(cur->
tag,
"CHAP"))
1202 continue;
1203
1207 if (!chapter)
1208 continue;
1209
1212 }
1213
1214 return 0;
1215 }
1216
1218 {
1221
1222 for (cur = extra_meta; cur; cur = cur->
next) {
1223 if (!strcmp(cur->
tag,
"PRIV")) {
1225 AVBPrint bprint;
1226 char *escaped, *
key;
1228
1231 }
1232
1234
1236 if (priv->
data[
i] < 32 || priv->
data[
i] > 126 || priv->
data[
i] ==
'\\') {
1238 } else {
1240 }
1241 }
1242
1246 }
1247
1250 }
1251 }
1252 }
1253
1254 return 0;
1255 }
1256
1258 {
1260 }