1 /*
2 * Copyright (c) 2003 Fabrice Bellard
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22 * @file
23 * ID3v2 header parser
24 *
25 * Specifications available at:
26 * http://id3.org/Developer_Information
27 */
28
29 #include "config.h"
30
31 #if CONFIG_ZLIB
32 #include <zlib.h>
33 #endif
34
44
46 { "TALB", "album" },
47 { "TCOM", "composer" },
48 { "TCON", "genre" },
49 { "TCOP", "copyright" },
50 { "TENC", "encoded_by" },
51 { "TIT2", "title" },
52 { "TLAN", "language" },
53 { "TPE1", "artist" },
54 { "TPE2", "album_artist" },
55 { "TPE3", "performer" },
56 { "TPOS", "disc" },
57 { "TPUB", "publisher" },
58 { "TRCK", "track" },
59 { "TSSE", "encoder" },
60 { "USLT", "lyrics" },
61 { 0 }
62 };
63
65 { "TCMP", "compilation" },
66 { "TDRC", "date" },
67 { "TDRL", "date" },
68 { "TDEN", "creation_time" },
69 { "TSOA", "album-sort" },
70 { "TSOP", "artist-sort" },
71 { "TSOT", "title-sort" },
72 { "TIT1", "grouping" },
73 { 0 }
74 };
75
77 { "TAL", "album" },
78 { "TCO", "genre" },
79 { "TCP", "compilation" },
80 { "TT2", "title" },
81 { "TEN", "encoded_by" },
82 { "TP1", "artist" },
83 { "TP2", "album_artist" },
84 { "TP3", "performer" },
85 { "TRK", "track" },
86 { 0 }
87 };
88
90 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
91 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
92 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
93 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
94 { 0 },
95 };
96
98 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
99 "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
100 { 0 },
101 };
102
104 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",
105 { 0 },
106 };
107
109 "Other",
110 "32x32 pixels 'file icon'",
111 "Other file icon",
112 "Cover (front)",
113 "Cover (back)",
114 "Leaflet page",
115 "Media (e.g. label side of CD)",
116 "Lead artist/lead performer/soloist",
117 "Artist/performer",
118 "Conductor",
119 "Band/Orchestra",
120 "Composer",
121 "Lyricist/text writer",
122 "Recording Location",
123 "During recording",
124 "During performance",
125 "Movie/video screen capture",
126 "A bright coloured fish",
127 "Illustration",
128 "Band/artist logotype",
129 "Publisher/Studio logotype",
130 };
131
142 };
143
145 {
146 return buf[0] == magic[0] &&
147 buf[1] == magic[1] &&
148 buf[2] == magic[2] &&
149 buf[3] != 0xff &&
150 buf[4] != 0xff &&
151 (buf[6] & 0x80) == 0 &&
152 (buf[7] & 0x80) == 0 &&
153 (buf[8] & 0x80) == 0 &&
154 (buf[9] & 0x80) == 0;
155 }
156
158 {
159 int len = ((buf[6] & 0x7f) << 21) +
160 ((buf[7] & 0x7f) << 14) +
161 ((buf[8] & 0x7f) << 7) +
162 (buf[9] & 0x7f) +
164 if (buf[5] & 0x10)
167 }
168
170 {
171 int v = 0;
174 return v;
175 }
176
178 {
179 return (((
size) & (0x7f << 0)) >> 0) +
180 (((
size) & (0x7f << 8)) >> 1) +
181 (((
size) & (0x7f << 16)) >> 2) +
182 (((
size) & (0x7f << 24)) >> 3);
183 }
184
185 /* No real verification, only check that the tag consists of
186 * a combination of capital alpha-numerical characters */
188 {
190 return 0;
191
193 if ((buf[
len] <
'A' ||
197 return 0;
198
199 return 1;
200 }
201
202 /**
203 * Return 1 if the tag of length len at the given offset is valid, 0 if not, -1 on error
204 */
206 {
208
212 return -1;
214 return 1;
215
216 return 0;
217 }
218
219 /**
220 * Free GEOB type extra metadata.
221 */
223 {
229 }
230
231 /**
232 * Decode characters to UTF-8 according to encoding type. The decoded buffer is
233 * always null terminated. Stop reading when either *maxread bytes are read from
234 * pb or U+0000 character is found.
235 *
236 * @param dst Pointer where the address of the buffer with the decoded bytes is
237 * stored. Buffer must be freed by caller.
238 * @param maxread Pointer to maximum number of characters to read from the
239 * AVIOContext. After execution the value is decremented by the number of bytes
240 * actually read.
241 * @returns 0 if no error occurred, dst is uninitialized on error
242 */
244 uint8_t **dst, int *maxread)
245 {
248 uint32_t ch = 1;
252
256 }
257
258 switch (encoding) {
264 }
265 break;
266
268 if ((
left -= 2) < 0) {
273 }
275 case 0xfffe:
277 case 0xfeff:
278 break;
279 default:
285 }
286 // fall-through
287
289 while ((
left > 1) && ch) {
292 }
294 left += 2;
/* did not read last char from pb */
295 break;
296
302 }
303 break;
304 default:
306 }
307
308 if (ch)
310
313
314 return 0;
315 }
316
317 /**
318 * Parse a text tag.
319 */
322 {
323 uint8_t *dst;
325 unsigned genre;
326
327 if (taglen < 1)
328 return;
329
331 taglen--; /* account for encoding type byte */
332
333 if (
decode_str(
s, pb, encoding, &dst, &taglen) < 0) {
335 return;
336 }
337
338 if (!(strcmp(
key,
"TCON") && strcmp(
key,
"TCO")) &&
339 (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
343 }
else if (!(strcmp(
key,
"TXXX") && strcmp(
key,
"TXX"))) {
344 /* dst now contains the key, need to get value */
346 if (
decode_str(
s, pb, encoding, &dst, &taglen) < 0) {
349 return;
350 }
352 } else if (!*dst)
354
355 if (dst)
357 }
358
361 {
362 uint8_t lang[4];
363 uint8_t *descriptor =
NULL;
// 'Content descriptor'
364 uint8_t *text;
366 int encoding;
367 int ok = 0;
368
369 if (taglen < 1)
371
373 taglen--;
374
377 lang[3] = '0円';
378 taglen -= 3;
379
380 if (
decode_str(
s, pb, encoding, &descriptor, &taglen) < 0)
382
383 if (
decode_str(
s, pb, encoding, &text, &taglen) < 0)
385
386 // FFmpeg does not support hierarchical metadata, so concatenate the keys.
387 key =
av_asprintf(
"lyrics-%s%s%s", descriptor[0] ? (
char *)descriptor :
"",
388 descriptor[0] ? "-" : "",
389 lang);
393 }
394
397
398 ok = 1;
400 if (!ok)
403 }
404
405 /**
406 * Parse a comment tag.
407 */
410 {
411 const char *
key =
"comment";
412 uint8_t *dst;
415
416 if (taglen < 4)
417 return;
418
421 taglen -= 4;
422
423 if (
decode_str(
s, pb, encoding, &dst, &taglen) < 0) {
425 return;
426 }
427
428 if (dst && !*dst)
430
431 if (dst) {
432 key = (
const char *) dst;
434 }
435
436 if (
decode_str(
s, pb, encoding, &dst, &taglen) < 0) {
440 return;
441 }
442
443 if (dst)
445 }
446
450
452 {
454 list->tail->next = new_elem;
455 else
456 list->head = new_elem;
457 list->tail = new_elem;
458 }
459
460 /**
461 * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct.
462 */
465 {
468 char encoding;
470
471 if (taglen < 1)
472 return;
473
475 if (!new_extra) {
478 return;
479 }
480
482
483 /* read encoding type byte */
485 taglen--;
486
487 /* read MIME type (always ISO-8859) */
489 &taglen) < 0 ||
490 taglen <= 0)
492
493 /* read file name */
495 taglen <= 0)
497
498 /* read content description */
500 taglen < 0)
502
503 if (taglen) {
504 /* save encapsulated binary data */
506 if (!geob_data->
data) {
509 }
512 "Error reading GEOB frame, data truncated.\n");
514 } else {
517 }
518
519 /* add data to the list */
520 new_extra->
tag =
"GEOB";
522
523 return;
524
529 return;
530 }
531
533 {
534 while (*
str >=
'0' && *
str <=
'9')
537 }
538
540 {
544 return t;
546 }
547
549 {
551 char date[17] = { 0 }; // YYYY-MM-DD hh:mm
552
555 return;
559
566
570 snprintf(date + 10,
sizeof(date) - 10,
574
576 if (date[0])
578 }
579
581 {
585 }
586
588 {
589 size_t len = strlen(buf);
590 while (
len > 0 && buf[
len - 1] ==
' ')
592 }
593
596 {
597 int enc, pic_type;
598 char mimetype[64] = {0};
604
605 if (taglen <= 4 || (!isv34 && taglen <= 6))
607
609 if (!new_extra)
611
613
615 taglen--;
616
617 /* mimetype */
618 if (isv34) {
620 if (ret < 0 || ret >= taglen)
623 } else {
626
627 mimetype[3] = 0;
628 taglen -= 3;
629 }
630
634 break;
635 }
636 mime++;
637 }
640 "Unknown attached picture mimetype: %s, skipping.\n", mimetype);
642 }
644
645 /* picture type */
647 taglen--;
650 pic_type);
651 pic_type = 0;
652 }
654
655 /* description and picture data */
658 "Error decoding attached picture description.\n");
660 }
661
666
667 new_extra->
tag =
"APIC";
668
669 // The description must be unique, and some ID3v2 tag writers add spaces
670 // to write several APIC entries with the same description.
673
674 return;
675
677 if (apic)
681 }
682
684 {
688 }
689
692 {
693 int taglen;
697
699 if (!new_extra)
700 return;
701
703
706
709
713
722 if (taglen < 0 || taglen >
len)
726 else
729 }
730
733
734 new_extra->
tag =
"CHAP";
736
737 return;
738
742 }
743
745 {
749 }
750
753 {
756
758 if (!meta)
759 return;
760
762
765
769
771
774
777
778 return;
779
783 }
784
790 int isv34);
793
800 };
801
802 /**
803 * Get the corresponding ID3v2EMFunc struct for a tag.
804 * @param isv34 Determines if v2.2 or v2.3/4 strings are used
805 * @return A pointer to the ID3v2EMFunc struct if found, NULL otherwise.
806 */
808 {
814 (isv34 ? 4 : 3)))
817 }
819 }
820
824 {
825 int isv34, unsync;
826 unsigned tlen;
829 int taghdrlen;
830 const char *reason =
NULL;
834 int buffer_size = 0;
836 unsigned char *uncompressed_buffer =
NULL;
837 av_unused int uncompressed_buffer_size = 0;
838 const char *comm_frame;
839
840 if (end > INT64_MAX -
len - 10)
841 return;
843
845
847 case 2:
849 reason = "compression";
851 }
852 isv34 = 0;
853 taghdrlen = 6;
854 comm_frame = "COM";
855 break;
856
857 case 3:
858 case 4:
859 isv34 = 1;
860 taghdrlen = 10;
861 comm_frame = "COMM";
862 break;
863
864 default:
865 reason = "version";
867 }
868
869 unsync =
flags & 0x80;
870
871 if (isv34 &&
flags & 0x40) {
/* Extended header present, just skip over it */
874 /* In v2.4 the length includes the length field we just read. */
875 extlen -= 4;
876
877 if (extlen < 0) {
878 reason = "invalid extended header length";
880 }
884 reason = "extended header too long.";
886 }
887 }
888
889 while (
len >= taghdrlen) {
890 unsigned int tflags = 0;
891 int tunsync = 0;
892 int tcomp = 0;
893 int tencr = 0;
895
896 if (isv34) {
898 break;
902 } else {
903 /* some encoders incorrectly uses v3 sizes instead of syncsafe ones
904 * so check the next tag to see which one to use */
906 if (tlen > 0x7f) {
909
911 break;
912
915 else if (
check_tag(pb, cur + 2 + tlen, 4) != 1)
916 break;
918 } else
920 }
921 }
924 } else {
926 break;
929 }
930 if (tlen > (1<<28))
931 break;
932 len -= taghdrlen + tlen;
933
935 break;
936
938
939 if (!tlen) {
943 continue;
944 }
945
947 if (tlen < 4)
948 break;
950 tlen -= 4;
951 } else
952 dlen = tlen;
953
956
957 /* skip encrypted tags and, if no zlib, compressed tags */
958 if (tencr || (!CONFIG_ZLIB && tcomp)) {
960 if (!tcomp)
962 else if (!tencr)
964 else
965 type =
"encrypted and compressed";
966
969 /* check for text tag or supported special meta tag */
970 }
else if (
tag[0] ==
'T' ||
971 !memcmp(
tag,
"USLT", 4) ||
972 !strcmp(
tag, comm_frame) ||
973 (extra_meta &&
975 pbx = pb;
976
977 if (unsync || tunsync || tcomp) {
981 goto seek;
982 }
983 }
984 if (unsync || tunsync) {
987 uint8_t *end = t + tlen;
988
991 goto seek;
992 }
993
994 while (t != end) {
996 if (t != end && t[-1] == 0xff && !t[0])
997 t++;
998 }
999
1003 pbx = &pb_local.
pub;
// read from sync buffer
1004 }
1005
1006 #if CONFIG_ZLIB
1007 if (tcomp) {
1008 int err;
1009
1011
1012 if (tlen <= 0)
1013 goto seek;
1014 if (dlen / 32768 > tlen)
1015 goto seek;
1016
1017 av_fast_malloc(&uncompressed_buffer, &uncompressed_buffer_size, dlen);
1018 if (!uncompressed_buffer) {
1020 goto seek;
1021 }
1022
1023 if (!(unsync || tunsync)) {
1025 if (err < 0) {
1027 goto seek;
1028 }
1029 tlen = err;
1030 }
1031
1032 err = uncompress(uncompressed_buffer, &dlen,
buffer, tlen);
1033 if (err != Z_OK) {
1035 goto seek;
1036 }
1038 tlen = dlen;
1039 pbx = &pb_local.
pub;
// read from sync buffer
1040 }
1041 #endif
1043 /* parse text tag */
1045 else if (!memcmp(
tag,
"USLT", 4))
1047 else if (!strcmp(
tag, comm_frame))
1049 else
1050 /* parse special meta tag */
1051 extra_func->
read(
s, pbx, tlen,
tag, extra_meta, isv34);
1052 }
else if (!
tag[0]) {
1056 break;
1057 }
1058 /* Skip to end of tag */
1059 seek:
1061 }
1062
1063 /* Footer preset, always 10 bytes, skip over it */
1065 end += 10;
1066
1068 if (reason)
1074 return;
1075 }
1076
1080 {
1084 int found_header;
1085 int64_t start, off;
1086
1087 if (extra_metap)
1088 *extra_metap =
NULL;
1089
1091 return;
1092
1094 do {
1095 /* save the current offset in case there's nothing to read/skip */
1099 break;
1100 }
1101
1107 break;
1108 }
1110 if (found_header) {
1111 /* parse ID3v2 header */
1112 len = ((buf[6] & 0x7f) << 21) |
1113 ((buf[7] & 0x7f) << 14) |
1114 ((buf[8] & 0x7f) << 7) |
1115 (buf[9] & 0x7f);
1117 extra_metap ? &extra_meta :
NULL);
1118 } else {
1120 }
1121 } while (found_header);
1126 if (extra_metap)
1127 *extra_metap = extra_meta.
head;
1128 }
1129
1132 {
1134 }
1135
1138 {
1140 }
1141
1143 {
1146
1147 while (current) {
1150 next = current->
next;
1152 current = next;
1153 }
1154
1156 }
1157
1159 {
1161
1162 for (cur = extra_meta; cur; cur = cur->
next) {
1166
1167 if (strcmp(cur->
tag,
"APIC"))
1168 continue;
1170
1174 st =
s->streams[
s->nb_streams - 1];
1176
1179
1182
1184 }
1185
1186 return 0;
1187 }
1188
1190 {
1193
1194 for (
unsigned i = 0; cur; cur = cur->
next) {
1197
1198 if (strcmp(cur->
tag,
"CHAP"))
1199 continue;
1200
1204 if (!chapter)
1205 continue;
1206
1209 }
1210
1211 return 0;
1212 }
1213
1215 {
1218
1219 for (cur = extra_meta; cur; cur = cur->
next) {
1220 if (!strcmp(cur->
tag,
"PRIV")) {
1222 AVBPrint bprint;
1223 char *escaped, *
key;
1225
1228 }
1229
1231
1233 if (priv->
data[
i] < 32 || priv->
data[
i] > 126 || priv->
data[
i] ==
'\\') {
1235 } else {
1237 }
1238 }
1239
1243 }
1244
1247 }
1248 }
1249 }
1250
1251 return 0;
1252 }
1253
1255 {
1257 }