00001 /* 00002 * Copyright (c) 2003 Fabrice Bellard 00003 * 00004 * This file is part of FFmpeg. 00005 * 00006 * FFmpeg is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2.1 of the License, or (at your option) any later version. 00010 * 00011 * FFmpeg is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with FFmpeg; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00019 */ 00020 00029 #include "config.h" 00030 00031 #if CONFIG_ZLIB 00032 #include <zlib.h> 00033 #endif 00034 00035 #include "id3v2.h" 00036 #include "id3v1.h" 00037 #include "libavutil/avstring.h" 00038 #include "libavutil/intreadwrite.h" 00039 #include "libavutil/dict.h" 00040 #include "avio_internal.h" 00041 00042 const AVMetadataConv ff_id3v2_34_metadata_conv[] = { 00043 { "TALB", "album"}, 00044 { "TCOM", "composer"}, 00045 { "TCON", "genre"}, 00046 { "TCOP", "copyright"}, 00047 { "TENC", "encoded_by"}, 00048 { "TIT2", "title"}, 00049 { "TLAN", "language"}, 00050 { "TPE1", "artist"}, 00051 { "TPE2", "album_artist"}, 00052 { "TPE3", "performer"}, 00053 { "TPOS", "disc"}, 00054 { "TPUB", "publisher"}, 00055 { "TRCK", "track"}, 00056 { "TSSE", "encoder"}, 00057 { 0 } 00058 }; 00059 00060 const AVMetadataConv ff_id3v2_4_metadata_conv[] = { 00061 { "TDRL", "date"}, 00062 { "TDRC", "date"}, 00063 { "TDEN", "creation_time"}, 00064 { "TSOA", "album-sort"}, 00065 { "TSOP", "artist-sort"}, 00066 { "TSOT", "title-sort"}, 00067 { 0 } 00068 }; 00069 00070 static const AVMetadataConv id3v2_2_metadata_conv[] = { 00071 { "TAL", "album"}, 00072 { "TCO", "genre"}, 00073 { "TT2", "title"}, 00074 { "TEN", "encoded_by"}, 00075 { "TP1", "artist"}, 00076 { "TP2", "album_artist"}, 00077 { "TP3", "performer"}, 00078 { "TRK", "track"}, 00079 { 0 } 00080 }; 00081 00082 00083 const char ff_id3v2_tags[][4] = { 00084 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT", 00085 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED", 00086 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3", 00087 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE", 00088 { 0 }, 00089 }; 00090 00091 const char ff_id3v2_4_tags[][4] = { 00092 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO", 00093 "TPRO", "TSOA", "TSOP", "TSOT", "TSST", 00094 { 0 }, 00095 }; 00096 00097 const char ff_id3v2_3_tags[][4] = { 00098 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER", 00099 { 0 }, 00100 }; 00101 00102 int ff_id3v2_match(const uint8_t *buf, const char * magic) 00103 { 00104 return buf[0] == magic[0] && 00105 buf[1] == magic[1] && 00106 buf[2] == magic[2] && 00107 buf[3] != 0xff && 00108 buf[4] != 0xff && 00109 (buf[6] & 0x80) == 0 && 00110 (buf[7] & 0x80) == 0 && 00111 (buf[8] & 0x80) == 0 && 00112 (buf[9] & 0x80) == 0; 00113 } 00114 00115 int ff_id3v2_tag_len(const uint8_t * buf) 00116 { 00117 int len = ((buf[6] & 0x7f) << 21) + 00118 ((buf[7] & 0x7f) << 14) + 00119 ((buf[8] & 0x7f) << 7) + 00120 (buf[9] & 0x7f) + 00121 ID3v2_HEADER_SIZE; 00122 if (buf[5] & 0x10) 00123 len += ID3v2_HEADER_SIZE; 00124 return len; 00125 } 00126 00127 static unsigned int get_size(AVIOContext *s, int len) 00128 { 00129 int v = 0; 00130 while (len--) 00131 v = (v << 7) + (avio_r8(s) & 0x7F); 00132 return v; 00133 } 00134 00138 static void free_geobtag(void *obj) 00139 { 00140 ID3v2ExtraMetaGEOB *geob = obj; 00141 av_free(geob->mime_type); 00142 av_free(geob->file_name); 00143 av_free(geob->description); 00144 av_free(geob->data); 00145 av_free(geob); 00146 } 00147 00160 static int decode_str(AVFormatContext *s, AVIOContext *pb, int encoding, 00161 uint8_t **dst, int *maxread) 00162 { 00163 int ret; 00164 uint8_t tmp; 00165 uint32_t ch = 1; 00166 int left = *maxread; 00167 unsigned int (*get)(AVIOContext*) = avio_rb16; 00168 AVIOContext *dynbuf; 00169 00170 if ((ret = avio_open_dyn_buf(&dynbuf)) < 0) { 00171 av_log(s, AV_LOG_ERROR, "Error opening memory stream\n"); 00172 return ret; 00173 } 00174 00175 switch (encoding) { 00176 00177 case ID3v2_ENCODING_ISO8859: 00178 while (left && ch) { 00179 ch = avio_r8(pb); 00180 PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);) 00181 left--; 00182 } 00183 break; 00184 00185 case ID3v2_ENCODING_UTF16BOM: 00186 if ((left -= 2) < 0) { 00187 av_log(s, AV_LOG_ERROR, "Cannot read BOM value, input too short\n"); 00188 avio_close_dyn_buf(dynbuf, dst); 00189 av_freep(dst); 00190 return AVERROR_INVALIDDATA; 00191 } 00192 switch (avio_rb16(pb)) { 00193 case 0xfffe: 00194 get = avio_rl16; 00195 case 0xfeff: 00196 break; 00197 default: 00198 av_log(s, AV_LOG_ERROR, "Incorrect BOM value\n"); 00199 avio_close_dyn_buf(dynbuf, dst); 00200 av_freep(dst); 00201 *maxread = left; 00202 return AVERROR_INVALIDDATA; 00203 } 00204 // fall-through 00205 00206 case ID3v2_ENCODING_UTF16BE: 00207 while ((left > 1) && ch) { 00208 GET_UTF16(ch, ((left -= 2) >= 0 ? get(pb) : 0), break;) 00209 PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);) 00210 } 00211 if (left < 0) 00212 left += 2; /* did not read last char from pb */ 00213 break; 00214 00215 case ID3v2_ENCODING_UTF8: 00216 while (left && ch) { 00217 ch = avio_r8(pb); 00218 avio_w8(dynbuf, ch); 00219 left--; 00220 } 00221 break; 00222 default: 00223 av_log(s, AV_LOG_WARNING, "Unknown encoding\n"); 00224 } 00225 00226 if (ch) 00227 avio_w8(dynbuf, 0); 00228 00229 avio_close_dyn_buf(dynbuf, dst); 00230 *maxread = left; 00231 00232 return 0; 00233 } 00234 00238 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key) 00239 { 00240 uint8_t *dst; 00241 int encoding, dict_flags = AV_DICT_DONT_OVERWRITE; 00242 unsigned genre; 00243 00244 if (taglen < 1) 00245 return; 00246 00247 encoding = avio_r8(pb); 00248 taglen--; /* account for encoding type byte */ 00249 00250 if (decode_str(s, pb, encoding, &dst, &taglen) < 0) { 00251 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key); 00252 return; 00253 } 00254 00255 if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) 00256 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) 00257 && genre <= ID3v1_GENRE_MAX) { 00258 av_freep(&dst); 00259 dst = ff_id3v1_genre_str[genre]; 00260 } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) { 00261 /* dst now contains the key, need to get value */ 00262 key = dst; 00263 if (decode_str(s, pb, encoding, &dst, &taglen) < 0) { 00264 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key); 00265 av_freep(&key); 00266 return; 00267 } 00268 dict_flags |= AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_STRDUP_KEY; 00269 } 00270 else if (*dst) 00271 dict_flags |= AV_DICT_DONT_STRDUP_VAL; 00272 00273 if (dst) 00274 av_dict_set(&s->metadata, key, dst, dict_flags); 00275 } 00276 00280 static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, char *tag, ID3v2ExtraMeta **extra_meta) 00281 { 00282 ID3v2ExtraMetaGEOB *geob_data = NULL; 00283 ID3v2ExtraMeta *new_extra = NULL; 00284 char encoding; 00285 unsigned int len; 00286 00287 if (taglen < 1) 00288 return; 00289 00290 geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB)); 00291 if (!geob_data) { 00292 av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMetaGEOB)); 00293 return; 00294 } 00295 00296 new_extra = av_mallocz(sizeof(ID3v2ExtraMeta)); 00297 if (!new_extra) { 00298 av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMeta)); 00299 goto fail; 00300 } 00301 00302 /* read encoding type byte */ 00303 encoding = avio_r8(pb); 00304 taglen--; 00305 00306 /* read MIME type (always ISO-8859) */ 00307 if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &geob_data->mime_type, &taglen) < 0 00308 || taglen <= 0) 00309 goto fail; 00310 00311 /* read file name */ 00312 if (decode_str(s, pb, encoding, &geob_data->file_name, &taglen) < 0 00313 || taglen <= 0) 00314 goto fail; 00315 00316 /* read content description */ 00317 if (decode_str(s, pb, encoding, &geob_data->description, &taglen) < 0 00318 || taglen < 0) 00319 goto fail; 00320 00321 if (taglen) { 00322 /* save encapsulated binary data */ 00323 geob_data->data = av_malloc(taglen); 00324 if (!geob_data->data) { 00325 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen); 00326 goto fail; 00327 } 00328 if ((len = avio_read(pb, geob_data->data, taglen)) < taglen) 00329 av_log(s, AV_LOG_WARNING, "Error reading GEOB frame, data truncated.\n"); 00330 geob_data->datasize = len; 00331 } else { 00332 geob_data->data = NULL; 00333 geob_data->datasize = 0; 00334 } 00335 00336 /* add data to the list */ 00337 new_extra->tag = "GEOB"; 00338 new_extra->data = geob_data; 00339 new_extra->next = *extra_meta; 00340 *extra_meta = new_extra; 00341 00342 return; 00343 00344 fail: 00345 av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", tag); 00346 free_geobtag(geob_data); 00347 av_free(new_extra); 00348 return; 00349 } 00350 00351 static int is_number(const char *str) 00352 { 00353 while (*str >= '0' && *str <= '9') str++; 00354 return !*str; 00355 } 00356 00357 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag) 00358 { 00359 AVDictionaryEntry *t; 00360 if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) && 00361 strlen(t->value) == 4 && is_number(t->value)) 00362 return t; 00363 return NULL; 00364 } 00365 00366 static void merge_date(AVDictionary **m) 00367 { 00368 AVDictionaryEntry *t; 00369 char date[17] = {0}; // YYYY-MM-DD hh:mm 00370 00371 if (!(t = get_date_tag(*m, "TYER")) && 00372 !(t = get_date_tag(*m, "TYE"))) 00373 return; 00374 av_strlcpy(date, t->value, 5); 00375 av_dict_set(m, "TYER", NULL, 0); 00376 av_dict_set(m, "TYE", NULL, 0); 00377 00378 if (!(t = get_date_tag(*m, "TDAT")) && 00379 !(t = get_date_tag(*m, "TDA"))) 00380 goto finish; 00381 snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value); 00382 av_dict_set(m, "TDAT", NULL, 0); 00383 av_dict_set(m, "TDA", NULL, 0); 00384 00385 if (!(t = get_date_tag(*m, "TIME")) && 00386 !(t = get_date_tag(*m, "TIM"))) 00387 goto finish; 00388 snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2); 00389 av_dict_set(m, "TIME", NULL, 0); 00390 av_dict_set(m, "TIM", NULL, 0); 00391 00392 finish: 00393 if (date[0]) 00394 av_dict_set(m, "date", date, 0); 00395 } 00396 00397 typedef struct ID3v2EMFunc { 00398 const char *tag3; 00399 const char *tag4; 00400 void (*read)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta **); 00401 void (*free)(void *obj); 00402 } ID3v2EMFunc; 00403 00404 static const ID3v2EMFunc id3v2_extra_meta_funcs[] = { 00405 { "GEO", "GEOB", read_geobtag, free_geobtag }, 00406 { NULL } 00407 }; 00408 00414 static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34) 00415 { 00416 int i = 0; 00417 while (id3v2_extra_meta_funcs[i].tag3) { 00418 if (tag && !memcmp(tag, 00419 (isv34 ? id3v2_extra_meta_funcs[i].tag4 : 00420 id3v2_extra_meta_funcs[i].tag3), 00421 (isv34 ? 4 : 3))) 00422 return &id3v2_extra_meta_funcs[i]; 00423 i++; 00424 } 00425 return NULL; 00426 } 00427 00428 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags, ID3v2ExtraMeta **extra_meta) 00429 { 00430 int isv34, unsync; 00431 unsigned tlen; 00432 char tag[5]; 00433 int64_t next, end = avio_tell(s->pb) + len; 00434 int taghdrlen; 00435 const char *reason = NULL; 00436 AVIOContext pb; 00437 AVIOContext *pbx; 00438 unsigned char *buffer = NULL; 00439 int buffer_size = 0; 00440 const ID3v2EMFunc *extra_func = NULL; 00441 unsigned char *compressed_buffer = NULL; 00442 int compressed_buffer_size = 0; 00443 00444 switch (version) { 00445 case 2: 00446 if (flags & 0x40) { 00447 reason = "compression"; 00448 goto error; 00449 } 00450 isv34 = 0; 00451 taghdrlen = 6; 00452 break; 00453 00454 case 3: 00455 case 4: 00456 isv34 = 1; 00457 taghdrlen = 10; 00458 break; 00459 00460 default: 00461 reason = "version"; 00462 goto error; 00463 } 00464 00465 unsync = flags & 0x80; 00466 00467 if (isv34 && flags & 0x40) { /* Extended header present, just skip over it */ 00468 int extlen = get_size(s->pb, 4); 00469 if (version == 4) 00470 extlen -= 4; // in v2.4 the length includes the length field we just read 00471 00472 if (extlen < 0) { 00473 reason = "invalid extended header length"; 00474 goto error; 00475 } 00476 avio_skip(s->pb, extlen); 00477 len -= extlen + 4; 00478 if (len < 0) { 00479 reason = "extended header too long."; 00480 goto error; 00481 } 00482 } 00483 00484 while (len >= taghdrlen) { 00485 unsigned int tflags = 0; 00486 int tunsync = 0; 00487 int tcomp = 0; 00488 int tencr = 0; 00489 unsigned long dlen; 00490 00491 if (isv34) { 00492 avio_read(s->pb, tag, 4); 00493 tag[4] = 0; 00494 if(version==3){ 00495 tlen = avio_rb32(s->pb); 00496 }else 00497 tlen = get_size(s->pb, 4); 00498 tflags = avio_rb16(s->pb); 00499 tunsync = tflags & ID3v2_FLAG_UNSYNCH; 00500 } else { 00501 avio_read(s->pb, tag, 3); 00502 tag[3] = 0; 00503 tlen = avio_rb24(s->pb); 00504 } 00505 if (tlen > (1<<28)) 00506 break; 00507 len -= taghdrlen + tlen; 00508 00509 if (len < 0) 00510 break; 00511 00512 next = avio_tell(s->pb) + tlen; 00513 00514 if (!tlen) { 00515 if (tag[0]) 00516 av_log(s, AV_LOG_DEBUG, "Invalid empty frame %s, skipping.\n", tag); 00517 continue; 00518 } 00519 00520 if (tflags & ID3v2_FLAG_DATALEN) { 00521 if (tlen < 4) 00522 break; 00523 dlen = avio_rb32(s->pb); 00524 tlen -= 4; 00525 } else 00526 dlen = tlen; 00527 00528 tcomp = tflags & ID3v2_FLAG_COMPRESSION; 00529 tencr = tflags & ID3v2_FLAG_ENCRYPTION; 00530 00531 /* skip encrypted tags and, if no zlib, compressed tags */ 00532 if (tencr || (!CONFIG_ZLIB && tcomp)) { 00533 const char *type; 00534 if (!tcomp) 00535 type = "encrypted"; 00536 else if (!tencr) 00537 type = "compressed"; 00538 else 00539 type = "encrypted and compressed"; 00540 00541 av_log(s, AV_LOG_WARNING, "Skipping %s ID3v2 frame %s.\n", type, tag); 00542 avio_skip(s->pb, tlen); 00543 /* check for text tag or supported special meta tag */ 00544 } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)))) { 00545 if (unsync || tunsync || tcomp) { 00546 int i, j; 00547 00548 av_fast_malloc(&buffer, &buffer_size, dlen); 00549 if (!buffer) { 00550 av_log(s, AV_LOG_ERROR, "Failed to alloc %ld bytes\n", dlen); 00551 goto seek; 00552 } 00553 #if CONFIG_ZLIB 00554 if (tcomp) { 00555 int n, err; 00556 00557 av_log(s, AV_LOG_DEBUG, "Compresssed frame %s tlen=%d dlen=%ld\n", tag, tlen, dlen); 00558 00559 av_fast_malloc(&compressed_buffer, &compressed_buffer_size, tlen); 00560 if (!compressed_buffer) { 00561 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen); 00562 goto seek; 00563 } 00564 00565 n = avio_read(s->pb, compressed_buffer, tlen); 00566 if (n < 0) { 00567 av_log(s, AV_LOG_ERROR, "Failed to read compressed tag\n"); 00568 goto seek; 00569 } 00570 00571 err = uncompress(buffer, &dlen, compressed_buffer, n); 00572 if (err != Z_OK) { 00573 av_log(s, AV_LOG_ERROR, "Failed to uncompress tag: %d\n", err); 00574 goto seek; 00575 } 00576 } 00577 #endif 00578 00579 for (i = 0, j = 0; i < dlen; i++, j++) { 00580 if (!tcomp) 00581 buffer[j] = avio_r8(s->pb); 00582 if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) { 00583 /* Unsynchronised byte, skip it */ 00584 j--; 00585 } 00586 } 00587 ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL); 00588 tlen = j; 00589 pbx = &pb; // read from sync buffer 00590 } else { 00591 pbx = s->pb; // read straight from input 00592 } 00593 if (tag[0] == 'T') 00594 /* parse text tag */ 00595 read_ttag(s, pbx, tlen, tag); 00596 else 00597 /* parse special meta tag */ 00598 extra_func->read(s, pbx, tlen, tag, extra_meta); 00599 } 00600 else if (!tag[0]) { 00601 if (tag[1]) 00602 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding"); 00603 avio_skip(s->pb, tlen); 00604 break; 00605 } 00606 /* Skip to end of tag */ 00607 seek: 00608 avio_seek(s->pb, next, SEEK_SET); 00609 } 00610 00611 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */ 00612 end += 10; 00613 00614 error: 00615 if (reason) 00616 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason); 00617 avio_seek(s->pb, end, SEEK_SET); 00618 av_free(buffer); 00619 av_free(compressed_buffer); 00620 return; 00621 } 00622 00623 void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta) 00624 { 00625 int len, ret; 00626 uint8_t buf[ID3v2_HEADER_SIZE]; 00627 int found_header; 00628 int64_t off; 00629 00630 do { 00631 /* save the current offset in case there's nothing to read/skip */ 00632 off = avio_tell(s->pb); 00633 ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE); 00634 if (ret != ID3v2_HEADER_SIZE) 00635 break; 00636 found_header = ff_id3v2_match(buf, magic); 00637 if (found_header) { 00638 /* parse ID3v2 header */ 00639 len = ((buf[6] & 0x7f) << 21) | 00640 ((buf[7] & 0x7f) << 14) | 00641 ((buf[8] & 0x7f) << 7) | 00642 (buf[9] & 0x7f); 00643 ff_id3v2_parse(s, len, buf[3], buf[5], extra_meta); 00644 } else { 00645 avio_seek(s->pb, off, SEEK_SET); 00646 } 00647 } while (found_header); 00648 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv); 00649 ff_metadata_conv(&s->metadata, NULL, id3v2_2_metadata_conv); 00650 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv); 00651 merge_date(&s->metadata); 00652 } 00653 00654 void ff_id3v2_read(AVFormatContext *s, const char *magic) 00655 { 00656 ff_id3v2_read_all(s, magic, NULL); 00657 } 00658 00659 void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta) 00660 { 00661 ID3v2ExtraMeta *current = *extra_meta, *next; 00662 const ID3v2EMFunc *extra_func; 00663 00664 while (current) { 00665 if ((extra_func = get_extra_meta_func(current->tag, 1))) 00666 extra_func->free(current->data); 00667 next = current->next; 00668 av_freep(¤t); 00669 current = next; 00670 } 00671 }