[Python-3000] Plea for help: python/branches/py3k-struni/Lib/tarfile.py

Paul Jimenez pj at place.org
Tue Aug 7 06:37:49 CEST 2007


 This evening I had a couple hours to spar and happend to read Guido's
plea for help near the beginning of it. I picked up a failing testcase
that no one had claimed and did what I could: it's not finished, but it
fixes approximately 75% of the errors in test_tarfile. I concentrated
on fixing problems that the testcase turned up; a pure inspection of
the source would turn up lots of things I missed, I'm sure. I hope it's
useful; it probably need minor attention from me on what the Right Thing
to do is in the case of encoding and decoding: ascii? I had to do a
.decode('latin-1') to pass the umlaut-in-a-filename test, but I'm not at
all sure that that's the true Right Thing. Anyway, here's a start; I'm
explicitly *not* claiming that I'll ever touch this source code again; I
don't want to block anyone else from working on it. Enjoy.
 --pj
Index: tarfile.py
===================================================================
--- tarfile.py	(revision 56785)
+++ tarfile.py	(working copy)
@@ -72,33 +72,33 @@
 #---------------------------------------------------------
 # tar constants
 #---------------------------------------------------------
-NUL = "0円" # the null character
+NUL = b"0円" # the null character
 BLOCKSIZE = 512 # length of processing blocks
 RECORDSIZE = BLOCKSIZE * 20 # length of records
-GNU_MAGIC = "ustar 0円" # magic gnu tar string
-POSIX_MAGIC = "ustar\x0000" # magic posix tar string
+GNU_MAGIC = b"ustar 0円" # magic gnu tar string
+POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
 
 LENGTH_NAME = 100 # maximum length of a filename
 LENGTH_LINK = 100 # maximum length of a linkname
 LENGTH_PREFIX = 155 # maximum length of the prefix field
 
-REGTYPE = "0" # regular file
-AREGTYPE = "0円" # regular file
-LNKTYPE = "1" # link (inside tarfile)
-SYMTYPE = "2" # symbolic link
-CHRTYPE = "3" # character special device
-BLKTYPE = "4" # block special device
-DIRTYPE = "5" # directory
-FIFOTYPE = "6" # fifo special device
-CONTTYPE = "7" # contiguous file
+REGTYPE = b"0" # regular file
+AREGTYPE = b"0円" # regular file
+LNKTYPE = b"1" # link (inside tarfile)
+SYMTYPE = b"2" # symbolic link
+CHRTYPE = b"3" # character special device
+BLKTYPE = b"4" # block special device
+DIRTYPE = b"5" # directory
+FIFOTYPE = b"6" # fifo special device
+CONTTYPE = b"7" # contiguous file
 
-GNUTYPE_LONGNAME = "L" # GNU tar longname
-GNUTYPE_LONGLINK = "K" # GNU tar longlink
-GNUTYPE_SPARSE = "S" # GNU tar sparse file
+GNUTYPE_LONGNAME = b"L" # GNU tar longname
+GNUTYPE_LONGLINK = b"K" # GNU tar longlink
+GNUTYPE_SPARSE = b"S" # GNU tar sparse file
 
-XHDTYPE = "x" # POSIX.1-2001 extended header
-XGLTYPE = "g" # POSIX.1-2001 global header
-SOLARIS_XHDTYPE = "X" # Solaris extended header
+XHDTYPE = b"x" # POSIX.1-2001 extended header
+XGLTYPE = b"g" # POSIX.1-2001 global header
+SOLARIS_XHDTYPE = b"X" # Solaris extended header
 
 USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
 GNU_FORMAT = 1 # GNU tar format
@@ -176,6 +176,9 @@
 def stn(s, length):
 """Convert a python string to a null-terminated string buffer.
 """
+ #return s[:length].encode('ascii') + (length - len(s)) * NUL
+ if type(s) != type(b''):
+ s = s.encode('ascii')
 return s[:length] + (length - len(s)) * NUL
 
 def nts(s):
@@ -184,8 +187,8 @@
 # Use the string up to the first null char.
 p = s.find("0円")
 if p == -1:
- return s
- return s[:p]
+ return s.decode('latin-1')
+ return s[:p].decode('latin-1')
 
 def nti(s):
 """Convert a number field to a python number.
@@ -214,7 +217,7 @@
 # encoding, the following digits-1 bytes are a big-endian
 # representation. This allows values up to (256**(digits-1))-1.
 if 0 <= n < 8 ** (digits - 1):
- s = "%0*o" % (digits - 1, n) + NUL
+ s = ("%0*o" % (digits - 1, n)).encode('ascii') + NUL
 else:
 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 raise ValueError("overflow in number field")
@@ -412,7 +415,7 @@
 self.comptype = comptype
 self.fileobj = fileobj
 self.bufsize = bufsize
- self.buf = ""
+ self.buf = b""
 self.pos = 0
 self.closed = False
 
@@ -434,7 +437,7 @@
 except ImportError:
 raise CompressionError("bz2 module is not available")
 if mode == "r":
- self.dbuf = ""
+ self.dbuf = b""
 self.cmp = bz2.BZ2Decompressor()
 else:
 self.cmp = bz2.BZ2Compressor()
@@ -451,10 +454,10 @@
 self.zlib.DEF_MEM_LEVEL,
 0)
 timestamp = struct.pack("<L", int(time.time()))
- self.__write("037円213円010円010円%s002円377円" % timestamp)
+ self.__write(b"037円213円010円010円" + timestamp + b"002円377円")
 if self.name.endswith(".gz"):
 self.name = self.name[:-3]
- self.__write(self.name + NUL)
+ self.__write(self.name.encode('ascii') + NUL)
 
 def write(self, s):
 """Write string s to the stream.
@@ -487,7 +490,7 @@
 
 if self.mode == "w" and self.buf:
 self.fileobj.write(self.buf)
- self.buf = ""
+ self.buf = b""
 if self.comptype == "gz":
 # The native zlib crc is an unsigned 32-bit integer, but
 # the Python wrapper implicitly casts that to a signed C
@@ -507,12 +510,12 @@
 """Initialize for reading a gzip compressed fileobj.
 """
 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
- self.dbuf = ""
+ self.dbuf = b""
 
 # taken from gzip.GzipFile with some alterations
- if self.__read(2) != "037円213円":
+ if self.__read(2) != b"037円213円":
 raise ReadError("not a gzip file")
- if self.__read(1) != "010円":
+ if self.__read(1) != b"010円":
 raise CompressionError("unsupported compression method")
 
 flag = ord(self.__read(1))
@@ -564,7 +567,7 @@
 if not buf:
 break
 t.append(buf)
- buf = "".join(t)
+ buf = b"".join(t)
 else:
 buf = self._read(size)
 self.pos += len(buf)
@@ -588,7 +591,7 @@
 raise ReadError("invalid compressed data")
 t.append(buf)
 c += len(buf)
- t = "".join(t)
+ t = b"".join(t)
 self.dbuf = t[size:]
 return t[:size]
 
@@ -604,7 +607,7 @@
 break
 t.append(buf)
 c += len(buf)
- t = "".join(t)
+ t = b"".join(t)
 self.buf = t[size:]
 return t[:size]
 # class _Stream
@@ -655,7 +658,7 @@
 if self.mode == "r":
 self.bz2obj = bz2.BZ2Decompressor()
 self.fileobj.seek(0)
- self.buf = ""
+ self.buf = b""
 else:
 self.bz2obj = bz2.BZ2Compressor()
 
@@ -670,7 +673,7 @@
 except EOFError:
 break
 x += len(data)
- self.buf = "".join(b)
+ self.buf = b"".join(b)
 
 buf = self.buf[:size]
 self.buf = self.buf[size:]
@@ -753,7 +756,7 @@
 break
 size -= len(buf)
 data.append(buf)
- return "".join(data)
+ return b"".join(data)
 
 def readsparsesection(self, size):
 """Read a single section of a sparse file.
@@ -761,7 +764,7 @@
 section = self.sparse.find(self.position)
 
 if section is None:
- return ""
+ return b""
 
 size = min(size, section.offset + section.size - self.position)
 
@@ -793,7 +796,7 @@
 self.size = tarinfo.size
 
 self.position = 0
- self.buffer = ""
+ self.buffer = b""
 
 def read(self, size=None):
 """Read at most size bytes from the file. If size is not
@@ -802,11 +805,11 @@
 if self.closed:
 raise ValueError("I/O operation on closed file")
 
- buf = ""
+ buf = b""
 if self.buffer:
 if size is None:
 buf = self.buffer
- self.buffer = ""
+ self.buffer = b""
 else:
 buf = self.buffer[:size]
 self.buffer = self.buffer[size:]
@@ -827,16 +830,16 @@
 if self.closed:
 raise ValueError("I/O operation on closed file")
 
- if "\n" in self.buffer:
- pos = self.buffer.find("\n") + 1
+ if b"\n" in self.buffer:
+ pos = self.buffer.find(b"\n") + 1
 else:
 buffers = [self.buffer]
 while True:
 buf = self.fileobj.read(self.blocksize)
 buffers.append(buf)
- if not buf or "\n" in buf:
- self.buffer = "".join(buffers)
- pos = self.buffer.find("\n") + 1
+ if not buf or b"\n" in buf:
+ self.buffer = b"".join(buffers)
+ pos = self.buffer.find(b"\n") + 1
 if pos == 0:
 # no newline found.
 pos = len(self.buffer)
@@ -848,7 +851,7 @@
 buf = self.buffer[:pos]
 self.buffer = self.buffer[pos:]
 self.position += len(buf)
- return buf
+ return buf.decode()
 
 def readlines(self):
 """Return a list with all remaining lines.
@@ -886,7 +889,7 @@
 else:
 raise ValueError("Invalid argument")
 
- self.buffer = ""
+ self.buffer = b""
 self.fileobj.seek(self.position)
 
 def close(self):
@@ -1015,7 +1018,7 @@
 """
 info["magic"] = GNU_MAGIC
 
- buf = ""
+ buf = b""
 if len(info["linkname"]) > LENGTH_LINK:
 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
 
@@ -1071,7 +1074,7 @@
 if pax_headers:
 buf = self._create_pax_generic_header(pax_headers)
 else:
- buf = ""
+ buf = b""
 
 return buf + self._create_header(info, USTAR_FORMAT)
 
@@ -1108,7 +1111,7 @@
 itn(info.get("gid", 0), 8, format),
 itn(info.get("size", 0), 12, format),
 itn(info.get("mtime", 0), 12, format),
- " ", # checksum field
+ b" ", # checksum field
 info.get("type", REGTYPE),
 stn(info.get("linkname", ""), 100),
 stn(info.get("magic", POSIX_MAGIC), 8),
@@ -1119,9 +1122,9 @@
 stn(info.get("prefix", ""), 155)
 ]
 
- buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
+ buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
- buf = buf[:-364] + "%06o0円" % chksum + buf[-357:]
+ buf = buf[:-364] + ("%06o0円" % chksum).encode('ascii') + buf[-357:]
 return buf
 
 @staticmethod
@@ -1139,10 +1142,10 @@
 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
 for name.
 """
- name += NUL
+ name = name.encode('ascii') + NUL
 
 info = {}
- info["name"] = "././@LongLink"
+ info["name"] = b"././@LongLink"
 info["type"] = type
 info["size"] = len(name)
 info["magic"] = GNU_MAGIC
@@ -1324,7 +1327,7 @@
 lastpos = offset + numbytes
 pos += 24
 
- isextended = ord(buf[482])
+ isextended = buf[482]
 origsize = nti(buf[483:495])
 
 # If the isextended flag is given,
@@ -1344,7 +1347,7 @@
 realpos += numbytes
 lastpos = offset + numbytes
 pos += 24
- isextended = ord(buf[504])
+ isextended = buf[504]
 
 if lastpos < origsize:
 sp.append(_hole(lastpos, origsize - lastpos))
Index: test/test_tarfile.py
===================================================================
--- test/test_tarfile.py	(revision 56784)
+++ test/test_tarfile.py	(working copy)
@@ -115,7 +115,7 @@
 fobj.seek(0, 2)
 self.assertEqual(tarinfo.size, fobj.tell(),
 "seek() to file's end failed")
- self.assert_(fobj.read() == "",
+ self.assert_(fobj.read() == b"",
 "read() at file's end did not return empty string")
 fobj.seek(-tarinfo.size, 2)
 self.assertEqual(0, fobj.tell(),


More information about the Python-3000 mailing list

AltStyle によって変換されたページ (->オリジナル) /