1 /*
2 * Copyright (c) 2003 Fabrice Bellard
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22 * @file
23 * ID3v2 header parser
24 *
25 * Specifications available at:
26 * http://id3.org/Developer_Information
27 */
28
29 #include "config.h"
30
31 #if CONFIG_ZLIB
32 #include <zlib.h>
33 #endif
34
45
47 { "TALB", "album" },
48 { "TCOM", "composer" },
49 { "TCON", "genre" },
50 { "TCOP", "copyright" },
51 { "TENC", "encoded_by" },
52 { "TIT2", "title" },
53 { "TLAN", "language" },
54 { "TPE1", "artist" },
55 { "TPE2", "album_artist" },
56 { "TPE3", "performer" },
57 { "TPOS", "disc" },
58 { "TPUB", "publisher" },
59 { "TRCK", "track" },
60 { "TSSE", "encoder" },
61 { "USLT", "lyrics" },
62 { 0 }
63 };
64
66 { "TCMP", "compilation" },
67 { "TDRC", "date" },
68 { "TDRL", "date" },
69 { "TDEN", "creation_time" },
70 { "TSOA", "album-sort" },
71 { "TSOP", "artist-sort" },
72 { "TSOT", "title-sort" },
73 { "TIT1", "grouping" },
74 { 0 }
75 };
76
78 { "TAL", "album" },
79 { "TCO", "genre" },
80 { "TCP", "compilation" },
81 { "TT2", "title" },
82 { "TEN", "encoded_by" },
83 { "TP1", "artist" },
84 { "TP2", "album_artist" },
85 { "TP3", "performer" },
86 { "TRK", "track" },
87 { 0 }
88 };
89
91 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
92 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
93 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
94 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
95 { 0 },
96 };
97
99 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
100 "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
101 { 0 },
102 };
103
105 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",
106 { 0 },
107 };
108
110 "Other",
111 "32x32 pixels 'file icon'",
112 "Other file icon",
113 "Cover (front)",
114 "Cover (back)",
115 "Leaflet page",
116 "Media (e.g. label side of CD)",
117 "Lead artist/lead performer/soloist",
118 "Artist/performer",
119 "Conductor",
120 "Band/Orchestra",
121 "Composer",
122 "Lyricist/text writer",
123 "Recording Location",
124 "During recording",
125 "During performance",
126 "Movie/video screen capture",
127 "A bright coloured fish",
128 "Illustration",
129 "Band/artist logotype",
130 "Publisher/Studio logotype",
131 };
132
143 };
144
146 {
147 return buf[0] == magic[0] &&
148 buf[1] == magic[1] &&
149 buf[2] == magic[2] &&
150 buf[3] != 0xff &&
151 buf[4] != 0xff &&
152 (buf[6] & 0x80) == 0 &&
153 (buf[7] & 0x80) == 0 &&
154 (buf[8] & 0x80) == 0 &&
155 (buf[9] & 0x80) == 0;
156 }
157
159 {
160 int len = ((buf[6] & 0x7f) << 21) +
161 ((buf[7] & 0x7f) << 14) +
162 ((buf[8] & 0x7f) << 7) +
163 (buf[9] & 0x7f) +
165 if (buf[5] & 0x10)
168 }
169
171 {
172 int v = 0;
175 return v;
176 }
177
179 {
180 return (((
size) & (0x7f << 0)) >> 0) +
181 (((
size) & (0x7f << 8)) >> 1) +
182 (((
size) & (0x7f << 16)) >> 2) +
183 (((
size) & (0x7f << 24)) >> 3);
184 }
185
186 /* No real verification, only check that the tag consists of
187 * a combination of capital alpha-numerical characters */
189 {
191 return 0;
192
194 if ((buf[
len] <
'A' ||
198 return 0;
199
200 return 1;
201 }
202
203 /**
204 * Return 1 if the tag of length len at the given offset is valid, 0 if not, -1 on error
205 */
207 {
209
213 return -1;
215 return 1;
216
217 return 0;
218 }
219
220 /**
221 * Free GEOB type extra metadata.
222 */
224 {
230 }
231
232 /**
233 * Decode characters to UTF-8 according to encoding type. The decoded buffer is
234 * always null terminated. Stop reading when either *maxread bytes are read from
235 * pb or U+0000 character is found.
236 *
237 * @param dst Pointer where the address of the buffer with the decoded bytes is
238 * stored. Buffer must be freed by caller.
239 * @param maxread Pointer to maximum number of characters to read from the
240 * AVIOContext. After execution the value is decremented by the number of bytes
241 * actually read.
242 * @returns 0 if no error occurred, dst is uninitialized on error
243 */
245 uint8_t **
dst,
int *maxread)
246 {
249 uint32_t ch = 1;
250 int left = *maxread, dynsize;
253
257 }
258
259 switch (encoding) {
265 }
266 break;
267
269 if ((
left -= 2) < 0) {
274 }
276 case 0xfffe:
278 case 0xfeff:
279 break;
280 default:
286 }
287 // fall-through
288
290 while ((
left > 1) && ch) {
293 }
295 left += 2;
/* did not read last char from pb */
296 break;
297
303 }
304 break;
305 default:
307 }
308
309 if (ch)
311
313 if (dynsize <= 0) {
316 }
318
319 return 0;
320 }
321
322 /**
323 * Parse a text tag.
324 */
327 {
330 unsigned genre;
331
332 if (taglen < 1)
333 return;
334
336 taglen--; /* account for encoding type byte */
337
340 return;
341 }
342
343 if (!(strcmp(
key,
"TCON") && strcmp(
key,
"TCO")) &&
344 (sscanf(
dst,
"(%d)", &genre) == 1 || sscanf(
dst,
"%d", &genre) == 1) &&
348 }
else if (!(strcmp(
key,
"TXXX") && strcmp(
key,
"TXX"))) {
349 /* dst now contains the key, need to get value */
354 return;
355 }
359
362 }
363
366 {
367 uint8_t lang[4];
368 uint8_t *descriptor =
NULL;
// 'Content descriptor'
369 uint8_t *text;
371 int encoding;
372 int ok = 0;
373
374 if (taglen < 4)
376
378 taglen--;
379
382 lang[3] = '0円';
383 taglen -= 3;
384
385 if (
decode_str(
s, pb, encoding, &descriptor, &taglen) < 0 || taglen < 0)
387
388 if (
decode_str(
s, pb, encoding, &text, &taglen) < 0 || taglen < 0)
390
391 // FFmpeg does not support hierarchical metadata, so concatenate the keys.
392 key =
av_asprintf(
"lyrics-%s%s%s", descriptor[0] ? (
char *)descriptor :
"",
393 descriptor[0] ? "-" : "",
394 lang);
398 }
399
402
403 ok = 1;
405 if (!ok)
408 }
409
410 /**
411 * Parse a comment tag.
412 */
415 {
416 const char *
key =
"comment";
420
421 if (taglen < 4)
422 return;
423
426 taglen -= 4;
427
430 return;
431 }
432
435
439 }
440
445 return;
446 }
447
450 }
451
455
457 {
459 list->tail->next = new_elem;
460 else
461 list->head = new_elem;
462 list->tail = new_elem;
463 }
464
465 /**
466 * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct.
467 */
470 {
473 char encoding;
475
476 if (taglen < 1)
477 return;
478
480 if (!new_extra) {
483 return;
484 }
485
487
488 /* read encoding type byte */
490 taglen--;
491
492 /* read MIME type (always ISO-8859) */
494 &taglen) < 0 ||
495 taglen <= 0)
497
498 /* read file name */
500 taglen <= 0)
502
503 /* read content description */
505 taglen < 0)
507
508 if (taglen) {
509 /* save encapsulated binary data */
511 if (!geob_data->
data) {
514 }
517 "Error reading GEOB frame, data truncated.\n");
519 } else {
522 }
523
524 /* add data to the list */
525 new_extra->
tag =
"GEOB";
527
528 return;
529
534 return;
535 }
536
538 {
539 while (*str >= '0' && *str <= '9')
540 str++;
541 return !*str;
542 }
543
545 {
549 return t;
551 }
552
554 {
556 char date[17] = { 0 }; // YYYY-MM-DD hh:mm
557
560 return;
564
571
575 snprintf(date + 10,
sizeof(date) - 10,
579
581 if (date[0])
583 }
584
586 {
590 }
591
593 {
594 size_t len = strlen(buf);
595 while (
len > 0 && buf[
len - 1] ==
' ')
597 }
598
601 {
602 int enc, pic_type;
603 char mimetype[64] = {0};
609
610 if (taglen <= 4 || (!isv34 && taglen <= 6))
612
614 if (!new_extra)
616
618
620 taglen--;
621
622 /* mimetype */
623 if (isv34) {
625 if (ret < 0 || ret >= taglen)
628 } else {
631
632 mimetype[3] = 0;
633 taglen -= 3;
634 }
635
639 break;
640 }
641 mime++;
642 }
645 "Unknown attached picture mimetype: %s, skipping.\n", mimetype);
647 }
649
650 /* picture type */
652 taglen--;
655 pic_type);
656 pic_type = 0;
657 }
659
660 /* description and picture data */
663 "Error decoding attached picture description.\n");
665 }
666
671
672 new_extra->
tag =
"APIC";
673
674 // The description must be unique, and some ID3v2 tag writers add spaces
675 // to write several APIC entries with the same description.
678
679 return;
680
682 if (apic)
686 }
687
689 {
693 }
694
697 {
698 int taglen;
702
704 if (!new_extra)
705 return;
706
708
711
714
718
727 if (taglen < 0 || taglen >
len)
731 else
734 }
735
738
739 new_extra->
tag =
"CHAP";
741
742 return;
743
747 }
748
750 {
754 }
755
758 {
761
763 if (!meta)
764 return;
765
767
770
774
776
779
782
783 return;
784
788 }
789
795 int isv34);
798
805 };
806
807 /**
808 * Get the corresponding ID3v2EMFunc struct for a tag.
809 * @param isv34 Determines if v2.2 or v2.3/4 strings are used
810 * @return A pointer to the ID3v2EMFunc struct if found, NULL otherwise.
811 */
813 {
819 (isv34 ? 4 : 3)))
822 }
824 }
825
829 {
830 int isv34, unsync;
831 unsigned tlen;
834 int taghdrlen;
835 const char *reason =
NULL;
839 int buffer_size = 0;
841 unsigned char *uncompressed_buffer =
NULL;
842 av_unused int uncompressed_buffer_size = 0;
843 const char *comm_frame;
844
845 if (end > INT64_MAX -
len - 10)
846 return;
848
850
852 case 2:
854 reason = "compression";
856 }
857 isv34 = 0;
858 taghdrlen = 6;
859 comm_frame = "COM";
860 break;
861
862 case 3:
863 case 4:
864 isv34 = 1;
865 taghdrlen = 10;
866 comm_frame = "COMM";
867 break;
868
869 default:
870 reason = "version";
872 }
873
874 unsync =
flags & 0x80;
875
876 if (isv34 &&
flags & 0x40) {
/* Extended header present, just skip over it */
879 /* In v2.4 the length includes the length field we just read. */
880 extlen -= 4;
881
882 if (extlen < 0) {
883 reason = "invalid extended header length";
885 }
889 reason = "extended header too long.";
891 }
892 }
893
894 while (
len >= taghdrlen) {
895 unsigned int tflags = 0;
896 int tunsync = 0;
897 int tcomp = 0;
898 int tencr = 0;
900
901 if (isv34) {
903 break;
907 } else {
908 /* some encoders incorrectly uses v3 sizes instead of syncsafe ones
909 * so check the next tag to see which one to use */
911 if (tlen > 0x7f) {
914
916 break;
917
920 else if (
check_tag(pb, cur + 2 + tlen, 4) != 1)
921 break;
923 } else
925 }
926 }
929 } else {
931 break;
934 }
935 if (tlen > (1<<28))
936 break;
937 len -= taghdrlen + tlen;
938
940 break;
941
943
944 if (!tlen) {
948 continue;
949 }
950
952 if (tlen < 4)
953 break;
955 tlen -= 4;
956 } else
957 dlen = tlen;
958
961
962 /* skip encrypted tags and, if no zlib, compressed tags */
963 if (tencr || (!CONFIG_ZLIB && tcomp)) {
965 if (!tcomp)
967 else if (!tencr)
969 else
970 type =
"encrypted and compressed";
971
974 /* check for text tag or supported special meta tag */
975 }
else if (
tag[0] ==
'T' ||
976 !memcmp(
tag,
"USLT", 4) ||
977 !strcmp(
tag, comm_frame) ||
978 (extra_meta &&
980 pbx = pb;
981
982 if (unsync || tunsync || tcomp) {
987 }
988 }
989 if (unsync || tunsync) {
992 uint8_t *end = t + tlen;
993
997 }
998
999 while (t != end) {
1001 if (t != end && t[-1] == 0xff && !t[0])
1002 t++;
1003 }
1004
1007 pbx = &pb_local.
pub;
// read from sync buffer
1008 }
1009
1010 #if CONFIG_ZLIB
1011 if (tcomp) {
1012 int err;
1013
1015
1016 if (tlen <= 0)
1018 if (dlen / 32768 > tlen)
1020
1021 av_fast_malloc(&uncompressed_buffer, &uncompressed_buffer_size, dlen);
1022 if (!uncompressed_buffer) {
1025 }
1026
1027 if (!(unsync || tunsync)) {
1029 if (err < 0) {
1032 }
1033 tlen = err;
1034 }
1035
1036 err = uncompress(uncompressed_buffer, &dlen,
buffer, tlen);
1037 if (err != Z_OK) {
1040 }
1042 tlen = dlen;
1043 pbx = &pb_local.
pub;
// read from sync buffer
1044 }
1045 #endif
1047 /* parse text tag */
1049 else if (!memcmp(
tag,
"USLT", 4))
1051 else if (!strcmp(
tag, comm_frame))
1053 else
1054 /* parse special meta tag */
1055 extra_func->
read(
s, pbx, tlen,
tag, extra_meta, isv34);
1056 }
else if (!
tag[0]) {
1060 break;
1061 }
1062 /* Skip to end of tag */
1065 }
1066
1067 /* Footer preset, always 10 bytes, skip over it */
1069 end += 10;
1070
1072 if (reason)
1078 return;
1079 }
1080
1084 {
1088 int found_header;
1090
1091 if (extra_metap)
1092 *extra_metap =
NULL;
1093
1095 return;
1096
1098 do {
1099 /* save the current offset in case there's nothing to read/skip */
1103 break;
1104 }
1105
1111 break;
1112 }
1114 if (found_header) {
1115 /* parse ID3v2 header */
1116 len = ((buf[6] & 0x7f) << 21) |
1117 ((buf[7] & 0x7f) << 14) |
1118 ((buf[8] & 0x7f) << 7) |
1119 (buf[9] & 0x7f);
1121 extra_metap ? &extra_meta :
NULL);
1122 } else {
1124 }
1125 } while (found_header);
1130 if (extra_metap)
1131 *extra_metap = extra_meta.
head;
1132 }
1133
1136 {
1138 }
1139
1142 {
1144 }
1145
1147 {
1150
1151 while (current) {
1154 next = current->
next;
1156 current = next;
1157 }
1158
1160 }
1161
1163 {
1165
1166 for (cur = extra_meta; cur; cur = cur->
next) {
1170
1171 if (strcmp(cur->
tag,
"APIC"))
1172 continue;
1174
1178 st =
s->streams[
s->nb_streams - 1];
1180
1183
1186
1188 }
1189
1190 return 0;
1191 }
1192
1194 {
1197
1198 for (
unsigned i = 0; cur; cur = cur->
next) {
1201
1202 if (strcmp(cur->
tag,
"CHAP"))
1203 continue;
1204
1208 if (!chapter)
1209 continue;
1210
1213 }
1214
1215 return 0;
1216 }
1217
1219 {
1222
1223 for (cur = extra_meta; cur; cur = cur->
next) {
1224 if (!strcmp(cur->
tag,
"PRIV")) {
1226 AVBPrint bprint;
1227 char *escaped, *
key;
1229
1232 }
1233
1235
1237 if (priv->
data[
i] < 32 || priv->
data[
i] > 126 || priv->
data[
i] ==
'\\') {
1239 } else {
1241 }
1242 }
1243
1247 }
1248
1251 }
1252 }
1253 }
1254
1255 return 0;
1256 }
1257
1259 {
1261 }