[Python-checkins] r84976 - in python/branches/py3k: Doc/library/gzip.rst Lib/gzip.py Lib/test/test_gzip.py Misc/ACKS Misc/NEWS

antoine.pitrou python-checkins at python.org
Thu Sep 23 18:22:57 CEST 2010


Author: antoine.pitrou
Date: Thu Sep 23 18:22:51 2010
New Revision: 84976
Log:
Issue #1675951: Allow GzipFile to work with unseekable file objects.
Patch by Florian Festi.
Modified:
 python/branches/py3k/Doc/library/gzip.rst
 python/branches/py3k/Lib/gzip.py
 python/branches/py3k/Lib/test/test_gzip.py
 python/branches/py3k/Misc/ACKS
 python/branches/py3k/Misc/NEWS
Modified: python/branches/py3k/Doc/library/gzip.rst
==============================================================================
--- python/branches/py3k/Doc/library/gzip.rst	(original)
+++ python/branches/py3k/Doc/library/gzip.rst	Thu Sep 23 18:22:51 2010
@@ -74,6 +74,9 @@
 .. versionchanged:: 3.2
 Support for zero-padded files was added.
 
+ .. versionchanged:: 3.2
+ Support for unseekable files was added.
+
 
 .. function:: open(filename, mode='rb', compresslevel=9)
 
Modified: python/branches/py3k/Lib/gzip.py
==============================================================================
--- python/branches/py3k/Lib/gzip.py	(original)
+++ python/branches/py3k/Lib/gzip.py	Thu Sep 23 18:22:51 2010
@@ -45,6 +45,62 @@
 """
 return GzipFile(filename, mode, compresslevel)
 
+class _PaddedFile:
+ """Minimal read-only file object that prepends a string to the contents
+ of an actual file. Shouldn't be used outside of gzip.py, as it lacks
+ essential functionality."""
+
+ def __init__(self, f, prepend=b''):
+ self._buffer = prepend
+ self._length = len(prepend)
+ self.file = f
+ self._read = 0
+
+ def read(self, size):
+ if self._read is None:
+ return self.file.read(size)
+ if self._read + size <= self._length:
+ read = self._read
+ self._read += size
+ return self._buffer[read:self._read]
+ else:
+ read = self._read
+ self._read = None
+ return self._buffer[read:] + \
+ self.file.read(size-self._length+read)
+
+ def prepend(self, prepend=b'', readprevious=False):
+ if self._read is None:
+ self._buffer = prepend
+ elif readprevious and len(prepend) <= self._read:
+ self._read -= len(prepend)
+ return
+ else:
+ self._buffer = self._buffer[read:] + prepend
+ self._length = len(self._buffer)
+ self._read = 0
+
+ def unused(self):
+ if self._read is None:
+ return b''
+ return self._buffer[self._read:]
+
+ def seek(self, offset, whence=0):
+ # This is only ever called with offset=whence=0
+ if whence == 1 and self._read is not None:
+ if 0 <= offset + self._read <= self._length:
+ self._read += offset
+ return
+ else:
+ offset += self._length - self._read
+ self._read = None
+ self._buffer = None
+ return self.file.seek(offset, whence)
+
+ def __getattr__(self, name):
+ return getattr(name, self.file)
+
+
 class GzipFile(io.BufferedIOBase):
 """The GzipFile class simulates most of the methods of a file object with
 the exception of the readinto() and truncate() methods.
@@ -119,6 +175,7 @@
 self.name = filename
 # Starts small, scales exponentially
 self.min_readsize = 100
+ fileobj = _PaddedFile(fileobj)
 
 elif mode[0:1] == 'w' or mode[0:1] == 'a':
 self.mode = WRITE
@@ -188,6 +245,9 @@
 
 def _read_gzip_header(self):
 magic = self.fileobj.read(2)
+ if magic == b'':
+ raise EOFError("Reached EOF")
+
 if magic != b'037円213円':
 raise IOError('Not a gzipped file')
 method = ord( self.fileobj.read(1) )
@@ -219,6 +279,11 @@
 if flag & FHCRC:
 self.fileobj.read(2) # Read & discard the 16-bit header CRC
 
+ unused = self.fileobj.unused()
+ if unused:
+ uncompress = self.decompress.decompress(unused)
+ self._add_read_data(uncompress)
+
 def write(self,data):
 if self.mode != WRITE:
 import errno
@@ -282,16 +347,6 @@
 if self._new_member:
 # If the _new_member flag is set, we have to
 # jump to the next member, if there is one.
- #
- # First, check if we're at the end of the file;
- # if so, it's time to stop; no more members to read.
- pos = self.fileobj.tell() # Save current position
- self.fileobj.seek(0, 2) # Seek to end of file
- if pos == self.fileobj.tell():
- raise EOFError("Reached EOF")
- else:
- self.fileobj.seek( pos ) # Return to original position
-
 self._init_read()
 self._read_gzip_header()
 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
@@ -305,6 +360,9 @@
 
 if buf == b"":
 uncompress = self.decompress.flush()
+ # Prepend the already read bytes to the fileobj to they can be
+ # seen by _read_eof()
+ self.fileobj.prepend(self.decompress.unused_data, True)
 self._read_eof()
 self._add_read_data( uncompress )
 raise EOFError('Reached EOF')
@@ -316,10 +374,9 @@
 # Ending case: we've come to the end of a member in the file,
 # so seek back to the start of the unused data, finish up
 # this member, and read a new gzip header.
- # (The number of bytes to seek back is the length of the unused
- # data, minus 8 because _read_eof() will rewind a further 8 bytes)
- self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
-
+ # Prepend the already read bytes to the fileobj to they can be
+ # seen by _read_eof() and _read_gzip_header()
+ self.fileobj.prepend(self.decompress.unused_data, True)
 # Check the CRC and file size, and set the flag so we read
 # a new member on the next call
 self._read_eof()
@@ -334,12 +391,10 @@
 self.size = self.size + len(data)
 
 def _read_eof(self):
- # We've read to the end of the file, so we have to rewind in order
- # to reread the 8 bytes containing the CRC and the file size.
+ # We've read to the end of the file
 # We check the that the computed CRC and size of the
 # uncompressed data matches the stored values. Note that the size
 # stored is the true file size mod 2**32.
- self.fileobj.seek(-8, 1)
 crc32 = read32(self.fileobj)
 isize = read32(self.fileobj) # may exceed 2GB
 if crc32 != self.crc:
@@ -355,7 +410,7 @@
 while c == b"\x00":
 c = self.fileobj.read(1)
 if c:
- self.fileobj.seek(-1, 1)
+ self.fileobj.prepend(c, True)
 
 @property
 def closed(self):
Modified: python/branches/py3k/Lib/test/test_gzip.py
==============================================================================
--- python/branches/py3k/Lib/test/test_gzip.py	(original)
+++ python/branches/py3k/Lib/test/test_gzip.py	Thu Sep 23 18:22:51 2010
@@ -22,6 +22,17 @@
 """
 
 
+class UnseekableIO(io.BytesIO):
+ def seekable(self):
+ return False
+
+ def tell(self):
+ raise io.UnsupportedOperation
+
+ def seek(self, *args):
+ raise io.UnsupportedOperation
+
+
 class TestGzip(unittest.TestCase):
 filename = support.TESTFN
 
@@ -265,6 +276,16 @@
 d = f.read()
 self.assertEqual(d, data1 * 50, "Incorrect data in file")
 
+ def test_non_seekable_file(self):
+ uncompressed = data1 * 50
+ buf = UnseekableIO()
+ with gzip.GzipFile(fileobj=buf, mode="wb") as f:
+ f.write(uncompressed)
+ compressed = buf.getvalue()
+ buf = UnseekableIO(compressed)
+ with gzip.GzipFile(fileobj=buf, mode="rb") as f:
+ self.assertEqual(f.read(), uncompressed)
+
 # Testing compress/decompress shortcut functions
 
 def test_compress(self):
Modified: python/branches/py3k/Misc/ACKS
==============================================================================
--- python/branches/py3k/Misc/ACKS	(original)
+++ python/branches/py3k/Misc/ACKS	Thu Sep 23 18:22:51 2010
@@ -260,6 +260,7 @@
 Mark Favas
 Niels Ferguson
 Sebastian Fernandez
+Florian Festi
 Vincent Fiack
 Tomer Filiba
 Jeffrey Finkelstein
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Thu Sep 23 18:22:51 2010
@@ -62,6 +62,9 @@
 Library
 -------
 
+- Issue #1675951: Allow GzipFile to work with unseekable file objects.
+ Patch by Florian Festi.
+
 - Logging: Added QueueListener class to facilitate logging usage for
 performance-critical threads.
 


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /