[Python-checkins] cpython (merge default -> default): merge heads

benjamin.peterson python-checkins at python.org
Sat Dec 10 23:55:37 CET 2011


http://hg.python.org/cpython/rev/2bbe7dc920de
changeset: 73931:2bbe7dc920de
parent: 73930:8d670cb0d64a
parent: 73929:6cd736239b8a
user: Benjamin Peterson <benjamin at python.org>
date: Sat Dec 10 17:55:31 2011 -0500
summary:
 merge heads
files:
 Doc/library/tarfile.rst | 22 +++++--
 Lib/ftplib.py | 25 +-------
 Lib/tarfile.py | 66 +++++++++++++++++++++--
 Lib/test/test_minidom.py | 2 +-
 Lib/test/test_tarfile.py | 78 ++++++++++++++++++++++++---
 Lib/xml/dom/pulldom.py | 6 +-
 Misc/NEWS | 2 +
 7 files changed, 153 insertions(+), 48 deletions(-)
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -13,12 +13,12 @@
 --------------
 
 The :mod:`tarfile` module makes it possible to read and write tar
-archives, including those using gzip or bz2 compression.
+archives, including those using gzip, bz2 and lzma compression.
 (:file:`.zip` files can be read and written using the :mod:`zipfile` module.)
 
 Some facts and figures:
 
-* reads and writes :mod:`gzip` and :mod:`bz2` compressed archives.
+* reads and writes :mod:`gzip`, :mod:`bz2` and :mod:`lzma` compressed archives.
 
 * read/write support for the POSIX.1-1988 (ustar) format.
 
@@ -55,6 +55,8 @@
 +------------------+---------------------------------------------+
 | ``'r:bz2'`` | Open for reading with bzip2 compression. |
 +------------------+---------------------------------------------+
+ | ``'r:xz'`` | Open for reading with lzma compression. |
+ +------------------+---------------------------------------------+
 | ``'a' or 'a:'`` | Open for appending with no compression. The |
 | | file is created if it does not exist. |
 +------------------+---------------------------------------------+
@@ -64,11 +66,13 @@
 +------------------+---------------------------------------------+
 | ``'w:bz2'`` | Open for bzip2 compressed writing. |
 +------------------+---------------------------------------------+
+ | ``'w:xz'`` | Open for lzma compressed writing. |
+ +------------------+---------------------------------------------+
 
- Note that ``'a:gz'`` or ``'a:bz2'`` is not possible. If *mode* is not suitable
- to open a certain (compressed) file for reading, :exc:`ReadError` is raised. Use
- *mode* ``'r'`` to avoid this. If a compression method is not supported,
- :exc:`CompressionError` is raised.
+ Note that ``'a:gz'``, ``'a:bz2'`` or ``'a:xz'`` is not possible. If *mode*
+ is not suitable to open a certain (compressed) file for reading,
+ :exc:`ReadError` is raised. Use *mode* ``'r'`` to avoid this. If a
+ compression method is not supported, :exc:`CompressionError` is raised.
 
 If *fileobj* is specified, it is used as an alternative to a :term:`file object`
 opened in binary mode for *name*. It is supposed to be at position 0.
@@ -99,6 +103,9 @@
 | ``'r|bz2'`` | Open a bzip2 compressed *stream* for |
 | | reading. |
 +-------------+--------------------------------------------+
+ | ``'r|xz'`` | Open a lzma compressed *stream* for |
+ | | reading. |
+ +-------------+--------------------------------------------+
 | ``'w|'`` | Open an uncompressed *stream* for writing. |
 +-------------+--------------------------------------------+
 | ``'w|gz'`` | Open a gzip compressed *stream* for |
@@ -107,6 +114,9 @@
 | ``'w|bz2'`` | Open a bzip2 compressed *stream* for |
 | | writing. |
 +-------------+--------------------------------------------+
+ | ``'w|xz'`` | Open an lzma compressed *stream* for |
+ | | writing. |
+ +-------------+--------------------------------------------+
 
 
 .. class:: TarFile
diff --git a/Lib/ftplib.py b/Lib/ftplib.py
--- a/Lib/ftplib.py
+++ b/Lib/ftplib.py
@@ -359,8 +359,7 @@
 conn.close()
 raise
 else:
- sock = self.makeport()
- try:
+ with self.makeport() as sock:
 if rest is not None:
 self.sendcmd("REST %s" % rest)
 resp = self.sendcmd(cmd)
@@ -372,8 +371,6 @@
 conn, sockaddr = sock.accept()
 if self.timeout is not _GLOBAL_DEFAULT_TIMEOUT:
 conn.settimeout(self.timeout)
- finally:
- sock.close()
 if resp[:3] == '150':
 # this is conditional in case we received a 125
 size = parse150(resp)
@@ -753,8 +750,7 @@
 
 def retrbinary(self, cmd, callback, blocksize=8192, rest=None):
 self.voidcmd('TYPE I')
- conn = self.transfercmd(cmd, rest)
- try:
+ with self.transfercmd(cmd, rest) as conn:
 while 1:
 data = conn.recv(blocksize)
 if not data:
@@ -763,8 +759,6 @@
 # shutdown ssl layer
 if isinstance(conn, ssl.SSLSocket):
 conn.unwrap()
- finally:
- conn.close()
 return self.voidresp()
 
 def retrlines(self, cmd, callback = None):
@@ -772,7 +766,7 @@
 resp = self.sendcmd('TYPE A')
 conn = self.transfercmd(cmd)
 fp = conn.makefile('r', encoding=self.encoding)
- try:
+ with fp, conn:
 while 1:
 line = fp.readline()
 if self.debugging > 2: print('*retr*', repr(line))
@@ -786,15 +780,11 @@
 # shutdown ssl layer
 if isinstance(conn, ssl.SSLSocket):
 conn.unwrap()
- finally:
- fp.close()
- conn.close()
 return self.voidresp()
 
 def storbinary(self, cmd, fp, blocksize=8192, callback=None, rest=None):
 self.voidcmd('TYPE I')
- conn = self.transfercmd(cmd, rest)
- try:
+ with self.transfercmd(cmd, rest) as conn:
 while 1:
 buf = fp.read(blocksize)
 if not buf: break
@@ -803,14 +793,11 @@
 # shutdown ssl layer
 if isinstance(conn, ssl.SSLSocket):
 conn.unwrap()
- finally:
- conn.close()
 return self.voidresp()
 
 def storlines(self, cmd, fp, callback=None):
 self.voidcmd('TYPE A')
- conn = self.transfercmd(cmd)
- try:
+ with self.transfercmd(cmd) as conn:
 while 1:
 buf = fp.readline()
 if not buf: break
@@ -822,8 +809,6 @@
 # shutdown ssl layer
 if isinstance(conn, ssl.SSLSocket):
 conn.unwrap()
- finally:
- conn.close()
 return self.voidresp()
 
 def abort(self):
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -420,10 +420,11 @@
 self.crc = zlib.crc32(b"")
 if mode == "r":
 self._init_read_gz()
+ self.exception = zlib.error
 else:
 self._init_write_gz()
 
- if comptype == "bz2":
+ elif comptype == "bz2":
 try:
 import bz2
 except ImportError:
@@ -431,8 +432,25 @@
 if mode == "r":
 self.dbuf = b""
 self.cmp = bz2.BZ2Decompressor()
+ self.exception = IOError
 else:
 self.cmp = bz2.BZ2Compressor()
+
+ elif comptype == "xz":
+ try:
+ import lzma
+ except ImportError:
+ raise CompressionError("lzma module is not available")
+ if mode == "r":
+ self.dbuf = b""
+ self.cmp = lzma.LZMADecompressor()
+ self.exception = lzma.LZMAError
+ else:
+ self.cmp = lzma.LZMACompressor()
+
+ elif comptype != "tar":
+ raise CompressionError("unknown compression type %r" % comptype)
+
 except:
 if not self._extfileobj:
 self.fileobj.close()
@@ -584,7 +602,7 @@
 break
 try:
 buf = self.cmp.decompress(buf)
- except IOError:
+ except self.exception:
 raise ReadError("invalid compressed data")
 self.dbuf += buf
 c += len(buf)
@@ -622,11 +640,14 @@
 return self.buf
 
 def getcomptype(self):
- if self.buf.startswith(b"037円213円010円"):
+ if self.buf.startswith(b"\x1f\x8b\x08"):
 return "gz"
- if self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
+ elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
 return "bz2"
- return "tar"
+ elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
+ return "xz"
+ else:
+ return "tar"
 
 def close(self):
 self.fileobj.close()
@@ -1651,18 +1672,22 @@
 'r:' open for reading exclusively uncompressed
 'r:gz' open for reading with gzip compression
 'r:bz2' open for reading with bzip2 compression
+ 'r:xz' open for reading with lzma compression
 'a' or 'a:' open for appending, creating the file if necessary
 'w' or 'w:' open for writing without compression
 'w:gz' open for writing with gzip compression
 'w:bz2' open for writing with bzip2 compression
+ 'w:xz' open for writing with lzma compression
 
 'r|*' open a stream of tar blocks with transparent compression
 'r|' open an uncompressed stream of tar blocks for reading
 'r|gz' open a gzip compressed stream of tar blocks
 'r|bz2' open a bzip2 compressed stream of tar blocks
+ 'r|xz' open an lzma compressed stream of tar blocks
 'w|' open an uncompressed stream for writing
 'w|gz' open a gzip compressed stream for writing
 'w|bz2' open a bzip2 compressed stream for writing
+ 'w|xz' open an lzma compressed stream for writing
 """
 
 if not name and not fileobj:
@@ -1780,11 +1805,40 @@
 t._extfileobj = False
 return t
 
+ @classmethod
+ def xzopen(cls, name, mode="r", fileobj=None, preset=9, **kwargs):
+ """Open lzma compressed tar archive name for reading or writing.
+ Appending is not allowed.
+ """
+ if mode not in ("r", "w"):
+ raise ValueError("mode must be 'r' or 'w'")
+
+ try:
+ import lzma
+ except ImportError:
+ raise CompressionError("lzma module is not available")
+
+ if mode == "r":
+ # LZMAFile complains about a preset argument in read mode.
+ preset = None
+
+ fileobj = lzma.LZMAFile(filename=name if fileobj is None else None,
+ mode=mode, fileobj=fileobj, preset=preset)
+
+ try:
+ t = cls.taropen(name, mode, fileobj, **kwargs)
+ except (lzma.LZMAError, EOFError):
+ fileobj.close()
+ raise ReadError("not an lzma file")
+ t._extfileobj = False
+ return t
+
 # All *open() methods are registered here.
 OPEN_METH = {
 "tar": "taropen", # uncompressed tar
 "gz": "gzopen", # gzip compressed tar
- "bz2": "bz2open" # bzip2 compressed tar
+ "bz2": "bz2open", # bzip2 compressed tar
+ "xz": "xzopen" # lzma compressed tar
 }
 
 #--------------------------------------------------------------------------
diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -47,7 +47,7 @@
 
 def checkWholeText(self, node, s):
 t = node.wholeText
- self.confirm(t == s, "looking for %s, found %s" % (repr(s), repr(t)))
+ self.confirm(t == s, "looking for %r, found %r" % (s, t))
 
 def testParseFromFile(self):
 with open(tstfile) as file:
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -21,6 +21,10 @@
 import bz2
 except ImportError:
 bz2 = None
+try:
+ import lzma
+except ImportError:
+ lzma = None
 
 def md5sum(data):
 return md5(data).hexdigest()
@@ -29,6 +33,7 @@
 tarname = support.findfile("testtar.tar")
 gzipname = os.path.join(TEMPDIR, "testtar.tar.gz")
 bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
+xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
 tmpname = os.path.join(TEMPDIR, "tmp.tar")
 
 md5_regtype = "65f477c818ad9e15f7feab0c6d37742f"
@@ -201,13 +206,15 @@
 _open = gzip.GzipFile
 elif self.mode.endswith(":bz2"):
 _open = bz2.BZ2File
+ elif self.mode.endswith(":xz"):
+ _open = lzma.LZMAFile
 else:
- _open = open
+ _open = io.FileIO
 
 for char in (b'0円', b'a'):
 # Test if EOFHeaderError ('0円') and InvalidHeaderError ('a')
 # are ignored correctly.
- with _open(tmpname, "wb") as fobj:
+ with _open(tmpname, "w") as fobj:
 fobj.write(char * 1024)
 fobj.write(tarfile.TarInfo("foo").tobuf())
 
@@ -222,9 +229,10 @@
 class MiscReadTest(CommonReadTest):
 
 def test_no_name_argument(self):
- if self.mode.endswith("bz2"):
- # BZ2File has no name attribute.
- return
+ if self.mode.endswith(("bz2", "xz")):
+ # BZ2File and LZMAFile have no name attribute.
+ self.skipTest("no name attribute")
+
 with open(self.tarname, "rb") as fobj:
 tar = tarfile.open(fileobj=fobj, mode=self.mode)
 self.assertEqual(tar.name, os.path.abspath(fobj.name))
@@ -265,10 +273,12 @@
 _open = gzip.GzipFile
 elif self.mode.endswith(":bz2"):
 _open = bz2.BZ2File
+ elif self.mode.endswith(":xz"):
+ _open = lzma.LZMAFile
 else:
- _open = open
- fobj = _open(self.tarname, "rb")
- try:
+ _open = io.FileIO
+
+ with _open(self.tarname) as fobj:
 fobj.seek(offset)
 
 # Test if the tarfile starts with the second member.
@@ -281,8 +291,6 @@
 self.assertEqual(tar.extractfile(t).read(), data,
 "seek back did not work")
 tar.close()
- finally:
- fobj.close()
 
 def test_fail_comp(self):
 # For Gzip and Bz2 Tests: fail with a ReadError on an uncompressed file.
@@ -526,6 +534,18 @@
 testfunc(bz2name, "r|*")
 testfunc(bz2name, "r|bz2")
 
+ if lzma:
+ self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r:xz")
+ self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r|xz")
+ self.assertRaises(tarfile.ReadError, tarfile.open, xzname, mode="r:")
+ self.assertRaises(tarfile.ReadError, tarfile.open, xzname, mode="r|")
+
+ testfunc(xzname, "r")
+ testfunc(xzname, "r:*")
+ testfunc(xzname, "r:xz")
+ testfunc(xzname, "r|*")
+ testfunc(xzname, "r|xz")
+
 def test_detect_file(self):
 self._test_modes(self._testfunc_file)
 
@@ -1096,6 +1116,9 @@
 data = dec.decompress(data)
 self.assertTrue(len(dec.unused_data) == 0,
 "found trailing data")
+ elif self.mode.endswith("xz"):
+ with lzma.LZMAFile(tmpname) as fobj:
+ data = fobj.read()
 else:
 with open(tmpname, "rb") as fobj:
 data = fobj.read()
@@ -1510,6 +1533,12 @@
 self._create_testtar("w:bz2")
 self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
 
+ def test_append_lzma(self):
+ if lzma is None:
+ self.skipTest("lzma module not available")
+ self._create_testtar("w:xz")
+ self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
+
 # Append mode is supposed to fail if the tarfile to append to
 # does not end with a zero block.
 def _test_error(self, data):
@@ -1788,6 +1817,21 @@
 self._test_partial_input("r:bz2")
 
 
+class LzmaMiscReadTest(MiscReadTest):
+ tarname = xzname
+ mode = "r:xz"
+class LzmaUstarReadTest(UstarReadTest):
+ tarname = xzname
+ mode = "r:xz"
+class LzmaStreamReadTest(StreamReadTest):
+ tarname = xzname
+ mode = "r|xz"
+class LzmaWriteTest(WriteTest):
+ mode = "w:xz"
+class LzmaStreamWriteTest(StreamWriteTest):
+ mode = "w|xz"
+
+
 def test_main():
 support.unlink(TEMPDIR)
 os.makedirs(TEMPDIR)
@@ -1850,6 +1894,20 @@
 Bz2PartialReadTest,
 ]
 
+ if lzma:
+ # Create testtar.tar.xz and add lzma-specific tests.
+ support.unlink(xzname)
+ with lzma.LZMAFile(xzname, "w") as tar:
+ tar.write(data)
+
+ tests += [
+ LzmaMiscReadTest,
+ LzmaUstarReadTest,
+ LzmaStreamReadTest,
+ LzmaWriteTest,
+ LzmaStreamWriteTest,
+ ]
+
 try:
 support.run_unittest(*tests)
 finally:
diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py
--- a/Lib/xml/dom/pulldom.py
+++ b/Lib/xml/dom/pulldom.py
@@ -1,6 +1,5 @@
 import xml.sax
 import xml.sax.handler
-import types
 
 START_ELEMENT = "START_ELEMENT"
 END_ELEMENT = "END_ELEMENT"
@@ -334,10 +333,7 @@
 return DOMEventStream(stream, parser, bufsize)
 
 def parseString(string, parser=None):
- try:
- from io import StringIO
- except ImportError:
- from io import StringIO
+ from io import StringIO
 
 bufsize = len(string)
 buf = StringIO(string)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -408,6 +408,8 @@
 
 - Alias resource.error to OSError ala PEP 3151.
 
+- Issue #5689: Add support for lzma compression to the tarfile module.
+
 - Issue #13248: Turn 3.2's PendingDeprecationWarning into 3.3's
 DeprecationWarning. It covers 'cgi.escape', 'importlib.abc.PyLoader',
 'importlib.abc.PyPycLoader', 'nntplib.NNTP.xgtitle', 'nntplib.NNTP.xpath',
-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /