84

In iOS 4.0 Apple has redesigned the backup process.

iTunes used to store a list of filenames associated with backup files in the Manifest.plist file, but in iOS 4.0 it has moved this information to a Manifest.mbdb

You can see an example of this file by making a backup with your iOS 4.0 devices and looking in your ~/Library/Application Support/MobileSync/Backup folder (Look inside the subfolders with the most recent date)

Here's a screenshot of what the file looks like in a text editor:

alt text
(source: supercrazyawesome.com)

How do I parse this into a Cocoa application so that I can update my (free) iPhone Backup Extractor app (http://supercrazyawesome.com) for iOS 4.0?

Glorfindel
22.8k13 gold badges97 silver badges124 bronze badges
asked Jun 21, 2010 at 13:44
2
  • I have the same question. I tried SQLite, Berkeley DB, serialized NSDictionary and a few other things. Please update this if you figure it out. There also is a dbx file which seems to be an encrypted format. Commented Jun 22, 2010 at 19:02
  • Just a note; once you have determined which file is the consolidated.db file, if you have a Verizon phone, your data will reside in the CdmaCellLocation table, and not the CellLocation table (which is empty). Just in case no one figured it out =) Commented Apr 21, 2011 at 1:15

7 Answers 7

87

Thank you, user374559 and reneD -- that code and description is very helpful.

My stab at some Python to parse and print out the information in a Unix ls-l like format:

#!/usr/bin/env python
import sys
def getint(data, offset, intsize):
 """Retrieve an integer (big-endian) and new offset from the current offset"""
 value = 0
 while intsize > 0:
 value = (value<<8) + ord(data[offset])
 offset = offset + 1
 intsize = intsize - 1
 return value, offset
def getstring(data, offset):
 """Retrieve a string and new offset from the current offset into the data"""
 if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
 return '', offset+2 # Blank string
 length, offset = getint(data, offset, 2) # 2-byte length
 value = data[offset:offset+length]
 return value, (offset + length)
def process_mbdb_file(filename):
 mbdb = {} # Map offset of info in this file => file info
 data = open(filename).read()
 if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
 offset = 4
 offset = offset + 2 # value x05 x00, not sure what this is
 while offset < len(data):
 fileinfo = {}
 fileinfo['start_offset'] = offset
 fileinfo['domain'], offset = getstring(data, offset)
 fileinfo['filename'], offset = getstring(data, offset)
 fileinfo['linktarget'], offset = getstring(data, offset)
 fileinfo['datahash'], offset = getstring(data, offset)
 fileinfo['unknown1'], offset = getstring(data, offset)
 fileinfo['mode'], offset = getint(data, offset, 2)
 fileinfo['unknown2'], offset = getint(data, offset, 4)
 fileinfo['unknown3'], offset = getint(data, offset, 4)
 fileinfo['userid'], offset = getint(data, offset, 4)
 fileinfo['groupid'], offset = getint(data, offset, 4)
 fileinfo['mtime'], offset = getint(data, offset, 4)
 fileinfo['atime'], offset = getint(data, offset, 4)
 fileinfo['ctime'], offset = getint(data, offset, 4)
 fileinfo['filelen'], offset = getint(data, offset, 8)
 fileinfo['flag'], offset = getint(data, offset, 1)
 fileinfo['numprops'], offset = getint(data, offset, 1)
 fileinfo['properties'] = {}
 for ii in range(fileinfo['numprops']):
 propname, offset = getstring(data, offset)
 propval, offset = getstring(data, offset)
 fileinfo['properties'][propname] = propval
 mbdb[fileinfo['start_offset']] = fileinfo
 return mbdb
def process_mbdx_file(filename):
 mbdx = {} # Map offset of info in the MBDB file => fileID string
 data = open(filename).read()
 if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
 offset = 4
 offset = offset + 2 # value 0x02 0x00, not sure what this is
 filecount, offset = getint(data, offset, 4) # 4-byte count of records 
 while offset < len(data):
 # 26 byte record, made up of ...
 fileID = data[offset:offset+20] # 20 bytes of fileID
 fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
 offset = offset + 20
 mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
 mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
 mode, offset = getint(data, offset, 2) # 2-byte mode field
 mbdx[mbdb_offset] = fileID_string
 return mbdx
def modestr(val):
 def mode(val):
 if (val & 0x4): r = 'r'
 else: r = '-'
 if (val & 0x2): w = 'w'
 else: w = '-'
 if (val & 0x1): x = 'x'
 else: x = '-'
 return r+w+x
 return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
 if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
 if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
 elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
 elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
 else: 
 print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
 type = '?' # unknown
 info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
 (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
 f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
 if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
 for name, value in f['properties'].items(): # extra properties
 info = info + ' ' + name + '=' + repr(value)
 return info
verbose = True
if __name__ == '__main__':
 mbdb = process_mbdb_file("Manifest.mbdb")
 mbdx = process_mbdx_file("Manifest.mbdx")
 for offset, fileinfo in mbdb.items():
 if offset in mbdx:
 fileinfo['fileID'] = mbdx[offset]
 else:
 fileinfo['fileID'] = "<nofileID>"
 print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
 print fileinfo_str(fileinfo, verbose)
answered Jun 28, 2010 at 8:05
Sign up to request clarification or add additional context in comments.

11 Comments

This post is about to go down in history.
Brilliant! I had some trouble with missing modules with code from user374559 - but, then, I am a novice with python.
The above text is the script referred to as "iphonels.py" in petewarden.github.com/iPhoneTracker, useful for finding the Apple location tracking database.
Note that this works fine on Windows, too (as long as you have Python to run it, of course), if you just add an "rb" parameter to the open() calls.
No worries. Considering I've never seen a line of Python before this week, it was a surprisingly straightforward port. Might learn Python now :-)
|
29

In iOS 5, the Manifest.mbdx file was eliminated. For the purpose of this article, it was redundant anyway, because the domain and path are in Manifest.mbdb and the ID hash can be generated with SHA1.

Here is my update of galloglass's code so it works with backups of iOS 5 devices. The only changes are elimination of process_mbdx_file() and addition of a few lines in process_mbdb_file().

Tested with backups of an iPhone 4S and an iPad 1, both with plenty of apps and files.

#!/usr/bin/env python
import sys
import hashlib
mbdx = {}
def getint(data, offset, intsize):
 """Retrieve an integer (big-endian) and new offset from the current offset"""
 value = 0
 while intsize > 0:
 value = (value<<8) + ord(data[offset])
 offset = offset + 1
 intsize = intsize - 1
 return value, offset
def getstring(data, offset):
 """Retrieve a string and new offset from the current offset into the data"""
 if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
 return '', offset+2 # Blank string
 length, offset = getint(data, offset, 2) # 2-byte length
 value = data[offset:offset+length]
 return value, (offset + length)
def process_mbdb_file(filename):
 mbdb = {} # Map offset of info in this file => file info
 data = open(filename).read()
 if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
 offset = 4
 offset = offset + 2 # value x05 x00, not sure what this is
 while offset < len(data):
 fileinfo = {}
 fileinfo['start_offset'] = offset
 fileinfo['domain'], offset = getstring(data, offset)
 fileinfo['filename'], offset = getstring(data, offset)
 fileinfo['linktarget'], offset = getstring(data, offset)
 fileinfo['datahash'], offset = getstring(data, offset)
 fileinfo['unknown1'], offset = getstring(data, offset)
 fileinfo['mode'], offset = getint(data, offset, 2)
 fileinfo['unknown2'], offset = getint(data, offset, 4)
 fileinfo['unknown3'], offset = getint(data, offset, 4)
 fileinfo['userid'], offset = getint(data, offset, 4)
 fileinfo['groupid'], offset = getint(data, offset, 4)
 fileinfo['mtime'], offset = getint(data, offset, 4)
 fileinfo['atime'], offset = getint(data, offset, 4)
 fileinfo['ctime'], offset = getint(data, offset, 4)
 fileinfo['filelen'], offset = getint(data, offset, 8)
 fileinfo['flag'], offset = getint(data, offset, 1)
 fileinfo['numprops'], offset = getint(data, offset, 1)
 fileinfo['properties'] = {}
 for ii in range(fileinfo['numprops']):
 propname, offset = getstring(data, offset)
 propval, offset = getstring(data, offset)
 fileinfo['properties'][propname] = propval
 mbdb[fileinfo['start_offset']] = fileinfo
 fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
 id = hashlib.sha1(fullpath)
 mbdx[fileinfo['start_offset']] = id.hexdigest()
 return mbdb
def modestr(val):
 def mode(val):
 if (val & 0x4): r = 'r'
 else: r = '-'
 if (val & 0x2): w = 'w'
 else: w = '-'
 if (val & 0x1): x = 'x'
 else: x = '-'
 return r+w+x
 return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
 if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
 if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
 elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
 elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
 else: 
 print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
 type = '?' # unknown
 info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
 (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
 f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
 if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
 for name, value in f['properties'].items(): # extra properties
 info = info + ' ' + name + '=' + repr(value)
 return info
verbose = True
if __name__ == '__main__':
 mbdb = process_mbdb_file("Manifest.mbdb")
 for offset, fileinfo in mbdb.items():
 if offset in mbdx:
 fileinfo['fileID'] = mbdx[offset]
 else:
 fileinfo['fileID'] = "<nofileID>"
 print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
 print fileinfo_str(fileinfo, verbose)
answered Nov 12, 2011 at 1:52

1 Comment

Jan 2016, still works with iTunes 12.3.2.35 and iOS 9.2.
11

You can find information and a little description of the MBDB/MBDX format here:

http://code.google.com/p/iphonebackupbrowser/

This is my application to browse the backup files. I have tried to document the format of the new files that come with iTunes 9.2.

answered Jun 30, 2010 at 21:03

Comments

8

This python script is awesome.

Here's my Ruby version of it (with minor improvement) and search capabilities. (for iOS 5)

# encoding: utf-8
require 'fileutils'
require 'digest/sha1'
class ManifestParser
 def initialize(mbdb_filename, verbose = false)
 @verbose = verbose
 process_mbdb_file(mbdb_filename)
 end
 # Returns the numbers of records in the Manifest files.
 def record_number
 @mbdb.size
 end
 # Returns a huge string containing the parsing of the Manifest files.
 def to_s
 s = ''
 @mbdb.each do |v|
 s += "#{fileinfo_str(v)}\n"
 end
 s
 end
 def to_file(filename)
 File.open(filename, 'w') do |f|
 @mbdb.each do |v|
 f.puts fileinfo_str(v)
 end
 end
 end
 # Copy the backup files to their real path/name.
 # * domain_match Can be a regexp to restrict the files to copy.
 # * filename_match Can be a regexp to restrict the files to copy.
 def rename_files(domain_match = nil, filename_match = nil)
 @mbdb.each do |v|
 if v[:type] == '-' # Only rename files.
 if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match)
 dst = "#{v[:domain]}/#{v[:filename]}"
 puts "Creating: #{dst}"
 FileUtils.mkdir_p(File.dirname(dst))
 FileUtils.cp(v[:fileID], dst)
 end
 end
 end
 end
 # Return the filename that math the given regexp.
 def search(regexp)
 result = Array.new
 @mbdb.each do |v|
 if "#{v[:domain]}::#{v[:filename]}" =~ regexp
 result << v
 end
 end
 result
 end
 private
 # Retrieve an integer (big-endian) and new offset from the current offset
 def getint(data, offset, intsize)
 value = 0
 while intsize > 0
 value = (value<<8) + data[offset].ord
 offset += 1
 intsize -= 1
 end
 return value, offset
 end
 # Retrieve a string and new offset from the current offset into the data
 def getstring(data, offset)
 return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string
 length, offset = getint(data, offset, 2) # 2-byte length
 value = data[offset...(offset + length)]
 return value, (offset + length)
 end
 def process_mbdb_file(filename)
 @mbdb = Array.new
 data = File.open(filename, 'rb') { |f| f.read }
 puts "MBDB file read. Size: #{data.size}"
 raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb'
 offset = 4
 offset += 2 # value x05 x00, not sure what this is
 while offset < data.size
 fileinfo = Hash.new
 fileinfo[:start_offset] = offset
 fileinfo[:domain], offset = getstring(data, offset)
 fileinfo[:filename], offset = getstring(data, offset)
 fileinfo[:linktarget], offset = getstring(data, offset)
 fileinfo[:datahash], offset = getstring(data, offset)
 fileinfo[:unknown1], offset = getstring(data, offset)
 fileinfo[:mode], offset = getint(data, offset, 2)
 if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink
 fileinfo[:type] = 'l'
 elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File
 fileinfo[:type] = '-'
 elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir
 fileinfo[:type] = 'd'
 else
 # $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode']
 fileinfo[:type] = '?'
 end
 fileinfo[:unknown2], offset = getint(data, offset, 4)
 fileinfo[:unknown3], offset = getint(data, offset, 4)
 fileinfo[:userid], offset = getint(data, offset, 4)
 fileinfo[:groupid], offset = getint(data, offset, 4)
 fileinfo[:mtime], offset = getint(data, offset, 4)
 fileinfo[:atime], offset = getint(data, offset, 4)
 fileinfo[:ctime], offset = getint(data, offset, 4)
 fileinfo[:filelen], offset = getint(data, offset, 8)
 fileinfo[:flag], offset = getint(data, offset, 1)
 fileinfo[:numprops], offset = getint(data, offset, 1)
 fileinfo[:properties] = Hash.new
 (0...(fileinfo[:numprops])).each do |ii|
 propname, offset = getstring(data, offset)
 propval, offset = getstring(data, offset)
 fileinfo[:properties][propname] = propval
 end
 # Compute the ID of the file.
 fullpath = fileinfo[:domain] + '-' + fileinfo[:filename]
 fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath)
 # We add the file to the list of files.
 @mbdb << fileinfo
 end
 @mbdb
 end
 def modestr(val)
 def mode(val)
 r = (val & 0x4) ? 'r' : '-'
 w = (val & 0x2) ? 'w' : '-'
 x = (val & 0x1) ? 'x' : '-'
 r + w + x
 end
 mode(val >> 6) + mode(val >> 3) + mode(val)
 end
 def fileinfo_str(f)
 return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose
 data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]]
 info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data
 info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination
 f[:properties].each do |k, v|
 info += " #{k}=#{v.inspect}"
 end
 info
 end
end
if __FILE__ == 0ドル
 mp = ManifestParser.new 'Manifest.mbdb', true
 mp.to_file 'filenames.txt'
end
answered Feb 12, 2012 at 22:09

Comments

4

I liked galloglas's code, and I changed the main function so that it shows a sorted list of total size by application:

verbose = True
if __name__ == '__main__':
 mbdb = process_mbdb_file("Manifest.mbdb")
 mbdx = process_mbdx_file("Manifest.mbdx")
 sizes = {}
 for offset, fileinfo in mbdb.items():
 if offset in mbdx:
 fileinfo['fileID'] = mbdx[offset]
 else:
 fileinfo['fileID'] = "<nofileID>"
 print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
 print fileinfo_str(fileinfo, verbose)
 if (fileinfo['mode'] & 0xE000) == 0x8000:
 sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen']
 for domain in sorted(sizes, key=sizes.get):
 print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))

That way you can figure out what application is eating all that space.

answered Dec 6, 2010 at 15:39

2 Comments

Great. Not surprisingly, at least for me it is magazine apps that eat most backup space. (I have purchased a bunch of New Yorker and Vanity Fair issues on my iPad.) But what is infuriating is that these magazine apps keep individual images as separate files on the device. Makes the backup go very slow.
Ha ha, in some cases the New Yorker app even has the SVN data for articles... I see files like AppDomain-com.condenet.newyorker::Documents/issues/The New Yorker/396efb8c-fcdd-4997-8122-2e2bdc3940e5/1700_talk_surowiecki_110328/images/.svn/prop-base/101011_r20103_p280.jpg.svn-base
2

For those looking for a Java implementation of a MBDB file reader, there are several out there:

answered Sep 17, 2014 at 0:13

Comments

2

Thanks to galloglass' answer. The code works great with Python 2.7. There is only one thing I want to metion. When read the manifest.mbdb file, you should use binary mode. Otherwise, not all content are read.

I also made some minor changes to make the code work with Python 3.4. Here is the code.

#!/usr/bin/env python
import sys
import hashlib
mbdx = {}
def getint(data, offset, intsize):
 """Retrieve an integer (big-endian) and new offset from the current offset"""
 value = 0
 while intsize > 0:
 value = (value << 8) + data[offset]
 offset = offset + 1
 intsize = intsize - 1
 return value, offset
def getstring(data, offset):
 """Retrieve a string and new offset from the current offset into the data"""
 if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF):
 return '', offset + 2 # Blank string
 length, offset = getint(data, offset, 2) # 2-byte length
 value = data[offset:offset + length]
 return value.decode(encoding='latin-1'), (offset + length)
def process_mbdb_file(filename):
 mbdb = {} # Map offset of info in this file => file info
 data = open(filename, 'rb').read() # 'b' is needed to read all content at once
 if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file")
 offset = 4
 offset = offset + 2 # value x05 x00, not sure what this is
 while offset < len(data):
 fileinfo = {}
 fileinfo['start_offset'] = offset
 fileinfo['domain'], offset = getstring(data, offset)
 fileinfo['filename'], offset = getstring(data, offset)
 fileinfo['linktarget'], offset = getstring(data, offset)
 fileinfo['datahash'], offset = getstring(data, offset)
 fileinfo['unknown1'], offset = getstring(data, offset)
 fileinfo['mode'], offset = getint(data, offset, 2)
 fileinfo['unknown2'], offset = getint(data, offset, 4)
 fileinfo['unknown3'], offset = getint(data, offset, 4)
 fileinfo['userid'], offset = getint(data, offset, 4)
 fileinfo['groupid'], offset = getint(data, offset, 4)
 fileinfo['mtime'], offset = getint(data, offset, 4)
 fileinfo['atime'], offset = getint(data, offset, 4)
 fileinfo['ctime'], offset = getint(data, offset, 4)
 fileinfo['filelen'], offset = getint(data, offset, 8)
 fileinfo['flag'], offset = getint(data, offset, 1)
 fileinfo['numprops'], offset = getint(data, offset, 1)
 fileinfo['properties'] = {}
 for ii in range(fileinfo['numprops']):
 propname, offset = getstring(data, offset)
 propval, offset = getstring(data, offset)
 fileinfo['properties'][propname] = propval
 mbdb[fileinfo['start_offset']] = fileinfo
 fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
 id = hashlib.sha1(fullpath.encode())
 mbdx[fileinfo['start_offset']] = id.hexdigest()
 return mbdb
def modestr(val):
 def mode(val):
 if (val & 0x4):
 r = 'r'
 else:
 r = '-'
 if (val & 0x2):
 w = 'w'
 else:
 w = '-'
 if (val & 0x1):
 x = 'x'
 else:
 x = '-'
 return r + w + x
 return mode(val >> 6) + mode((val >> 3)) + mode(val)
def fileinfo_str(f, verbose=False):
 if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
 if (f['mode'] & 0xE000) == 0xA000:
 type = 'l' # symlink
 elif (f['mode'] & 0xE000) == 0x8000:
 type = '-' # file
 elif (f['mode'] & 0xE000) == 0x4000:
 type = 'd' # dir
 else:
 print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
 type = '?' # unknown
 info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
 (type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'],
 f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
 if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
 for name, value in f['properties'].items(): # extra properties
 info = info + ' ' + name + '=' + repr(value)
 return info
verbose = True
if __name__ == '__main__':
 mbdb = process_mbdb_file(
 r"Manifest.mbdb")
 for offset, fileinfo in mbdb.items():
 if offset in mbdx:
 fileinfo['fileID'] = mbdx[offset]
 else:
 fileinfo['fileID'] = "<nofileID>"
 print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
 print(fileinfo_str(fileinfo, verbose))
answered Aug 21, 2015 at 14:17

1 Comment

I've tried your code, I get IndexError; String out of range, due to value = (value<<8) + ord(data[offset])

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.