[Python-checkins] cpython: Issue #27199: TarFile expose copyfileobj bufsize to improve throughput

lukasz.langa python-checkins at python.org
Fri Sep 9 22:50:48 EDT 2016


https://hg.python.org/cpython/rev/0bac85e355b5
changeset: 103534:0bac85e355b5
user: Łukasz Langa <lukasz at langa.pl>
date: Fri Sep 09 19:48:14 2016 -0700
summary:
 Issue #27199: TarFile expose copyfileobj bufsize to improve throughput
Patch by Jason Fried.
files:
 Lib/tarfile.py | 33 ++++++++++++++++++---------------
 Misc/NEWS | 3 +++
 2 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -228,21 +228,21 @@
 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
 return unsigned_chksum, signed_chksum
 
-def copyfileobj(src, dst, length=None, exception=OSError):
+def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
 """Copy length bytes from fileobj src to fileobj dst.
 If length is None, copy the entire content.
 """
+ bufsize = bufsize or 16 * 1024
 if length == 0:
 return
 if length is None:
- shutil.copyfileobj(src, dst)
+ shutil.copyfileobj(src, dst, bufsize)
 return
 
- BUFSIZE = 16 * 1024
- blocks, remainder = divmod(length, BUFSIZE)
+ blocks, remainder = divmod(length, bufsize)
 for b in range(blocks):
- buf = src.read(BUFSIZE)
- if len(buf) < BUFSIZE:
+ buf = src.read(bufsize)
+ if len(buf) < bufsize:
 raise exception("unexpected end of data")
 dst.write(buf)
 
@@ -1403,7 +1403,8 @@
 
 def __init__(self, name=None, mode="r", fileobj=None, format=None,
 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
- errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
+ errors="surrogateescape", pax_headers=None, debug=None,
+ errorlevel=None, copybufsize=None):
 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
 read from an existing archive, 'a' to append data to an existing
 file or 'w' to create a new file overwriting an existing one. `mode'
@@ -1459,6 +1460,7 @@
 self.errorlevel = errorlevel
 
 # Init datastructures.
+ self.copybufsize = copybufsize
 self.closed = False
 self.members = [] # list of members as TarInfo objects
 self._loaded = False # flag if all members have been read
@@ -1558,7 +1560,7 @@
 saved_pos = fileobj.tell()
 try:
 return func(name, "r", fileobj, **kwargs)
- except (ReadError, CompressionError) as e:
+ except (ReadError, CompressionError):
 if fileobj is not None:
 fileobj.seek(saved_pos)
 continue
@@ -1963,10 +1965,10 @@
 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
 self.fileobj.write(buf)
 self.offset += len(buf)
-
+ bufsize=self.copybufsize
 # If there's data to follow, append it.
 if fileobj is not None:
- copyfileobj(fileobj, self.fileobj, tarinfo.size)
+ copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
 if remainder > 0:
 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
@@ -2148,15 +2150,16 @@
 """
 source = self.fileobj
 source.seek(tarinfo.offset_data)
+ bufsize = self.copybufsize
 with bltn_open(targetpath, "wb") as target:
 if tarinfo.sparse is not None:
 for offset, size in tarinfo.sparse:
 target.seek(offset)
- copyfileobj(source, target, size, ReadError)
+ copyfileobj(source, target, size, ReadError, bufsize)
 target.seek(tarinfo.size)
 target.truncate()
 else:
- copyfileobj(source, target, tarinfo.size, ReadError)
+ copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
 
 def makeunknown(self, tarinfo, targetpath):
 """Make a file from a TarInfo object with an unknown type
@@ -2235,7 +2238,7 @@
 os.lchown(targetpath, u, g)
 else:
 os.chown(targetpath, u, g)
- except OSError as e:
+ except OSError:
 raise ExtractError("could not change owner")
 
 def chmod(self, tarinfo, targetpath):
@@ -2244,7 +2247,7 @@
 if hasattr(os, 'chmod'):
 try:
 os.chmod(targetpath, tarinfo.mode)
- except OSError as e:
+ except OSError:
 raise ExtractError("could not change mode")
 
 def utime(self, tarinfo, targetpath):
@@ -2254,7 +2257,7 @@
 return
 try:
 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
- except OSError as e:
+ except OSError:
 raise ExtractError("could not change modification time")
 
 #--------------------------------------------------------------------------
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput.
+ Patch by Jason Fried.
+
 - Issue #27948: In f-strings, only allow backslashes inside the braces
 (where the expressions are). This is a breaking change from the 3.6
 alpha releases, where backslashes are allowed anywhere in an
-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /