Tue May 29 20:35:15 CEST 2007

Author: brett.cannon
Date: Tue May 29 20:35:08 2007
New Revision: 55647
Modified:
 python/branches/bcannon-objcap/ (props changed)
 python/branches/bcannon-objcap/Doc/howto/functional.rst
 python/branches/bcannon-objcap/Doc/lib/libcodecs.tex
 python/branches/bcannon-objcap/Doc/lib/libitertools.tex
 python/branches/bcannon-objcap/Doc/lib/liblogging.tex
 python/branches/bcannon-objcap/Doc/lib/libsubprocess.tex
 python/branches/bcannon-objcap/Doc/lib/libtarfile.tex
 python/branches/bcannon-objcap/Lib/logging/handlers.py
 python/branches/bcannon-objcap/Lib/subprocess.py
 python/branches/bcannon-objcap/Lib/tarfile.py
 python/branches/bcannon-objcap/Lib/test/test_subprocess.py
 python/branches/bcannon-objcap/Lib/test/test_tarfile.py
 python/branches/bcannon-objcap/Lib/test/test_urllib.py
 python/branches/bcannon-objcap/Lib/test/testtar.tar
 python/branches/bcannon-objcap/Misc/NEWS
 python/branches/bcannon-objcap/Misc/cheatsheet
 python/branches/bcannon-objcap/Objects/funcobject.c
 python/branches/bcannon-objcap/PC/WinMain.c
 python/branches/bcannon-objcap/PC/_winreg.c
 python/branches/bcannon-objcap/PC/dl_nt.c
 python/branches/bcannon-objcap/PC/winsound.c
 python/branches/bcannon-objcap/PCbuild8/pythoncore/pythoncore.vcproj
 python/branches/bcannon-objcap/Python/dynload_win.c
Log:
Merged revisions 55564-55646 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/trunk
Modified: python/branches/bcannon-objcap/Doc/howto/functional.rst
==============================================================================

--- python/branches/bcannon-objcap/Doc/howto/functional.rst	(original)
+++ python/branches/bcannon-objcap/Doc/howto/functional.rst	Tue May 29 20:35:08 2007
@@ -978,7 +978,7 @@
 that's a slice of the iterator. With a single ``stop`` argument, 
 it will return the first ``stop``
 elements. If you supply a starting index, you'll get ``stop-start``
-elements, and if you supply a value for ``step`, elements will be
+elements, and if you supply a value for ``step``, elements will be
 skipped accordingly. Unlike Python's string and list slicing, you
 can't use negative values for ``start``, ``stop``, or ``step``.
 
Modified: python/branches/bcannon-objcap/Doc/lib/libcodecs.tex
==============================================================================
--- python/branches/bcannon-objcap/Doc/lib/libcodecs.tex	(original)
+++ python/branches/bcannon-objcap/Doc/lib/libcodecs.tex	Tue May 29 20:35:08 2007
@@ -237,7 +237,7 @@
 \begin{funcdesc}{iterdecode}{iterable, encoding\optional{, errors}}
 Uses an incremental decoder to iteratively decode the input provided by
 \var{iterable}. This function is a generator. \var{errors} (as well as
-any other keyword argument) is passed through to the incremental encoder.
+any other keyword argument) is passed through to the incremental decoder.
 \versionadded{2.5}
 \end{funcdesc}
 
Modified: python/branches/bcannon-objcap/Doc/lib/libitertools.tex
==============================================================================
--- python/branches/bcannon-objcap/Doc/lib/libitertools.tex	(original)
+++ python/branches/bcannon-objcap/Doc/lib/libitertools.tex	Tue May 29 20:35:08 2007
@@ -138,6 +138,13 @@
 identity function and returns the element unchanged. Generally, the
 iterable needs to already be sorted on the same key function.
 
+ The operation of \function{groupby()} is similar to the \code{uniq} filter
+ in \UNIX{}. It generates a break or new group every time the value
+ of the key function changes (which is why it is usually necessary
+ to have sorted the data using the same key function). That behavior
+ differs from SQL's GROUP BY which aggregates common elements regardless
+ of their input order.
+
 The returned group is itself an iterator that shares the underlying
 iterable with \function{groupby()}. Because the source is shared, when
 the \function{groupby} object is advanced, the previous group is no
@@ -147,6 +154,7 @@
 \begin{verbatim}
 groups = []
 uniquekeys = []
+ data = sorted(data, key=keyfunc)
 for k, g in groupby(data, keyfunc):
 groups.append(list(g)) # Store group iterator as a list
 uniquekeys.append(k)
Modified: python/branches/bcannon-objcap/Doc/lib/liblogging.tex
==============================================================================
--- python/branches/bcannon-objcap/Doc/lib/liblogging.tex	(original)
+++ python/branches/bcannon-objcap/Doc/lib/liblogging.tex	Tue May 29 20:35:08 2007
@@ -1208,8 +1208,11 @@
 communicate with a remote \UNIX{} machine whose address is given by
 \var{address} in the form of a \code{(\var{host}, \var{port})}
 tuple. If \var{address} is not specified, \code{('localhost', 514)} is
-used. The address is used to open a UDP socket. If \var{facility} is
-not specified, \constant{LOG_USER} is used.
+used. The address is used to open a UDP socket. An alternative to providing
+a \code{(\var{host}, \var{port})} tuple is providing an address as a string,
+for example "/dev/log". In this case, a Unix domain socket is used to send
+the message to the syslog. If \var{facility} is not specified,
+\constant{LOG_USER} is used.
 \end{classdesc}
 
 \begin{methoddesc}{close}{}
Modified: python/branches/bcannon-objcap/Doc/lib/libsubprocess.tex
==============================================================================
--- python/branches/bcannon-objcap/Doc/lib/libsubprocess.tex	(original)
+++ python/branches/bcannon-objcap/Doc/lib/libsubprocess.tex	Tue May 29 20:35:08 2007
@@ -89,7 +89,10 @@
 
 If \var{close_fds} is true, all file descriptors except \constant{0},
 \constant{1} and \constant{2} will be closed before the child process is
-executed. (\UNIX{} only)
+executed. (\UNIX{} only). Or, on Windows, if \var{close_fds} is true
+then no handles will be inherited by the child process. Note that on
+Windows, you cannot set \var{close_fds} to true and also redirect the
+standard handles by setting \var{stdin}, \var{stdout} or \var{stderr}.
 
 If \var{shell} is \constant{True}, the specified command will be
 executed through the shell.
Modified: python/branches/bcannon-objcap/Doc/lib/libtarfile.tex
==============================================================================
--- python/branches/bcannon-objcap/Doc/lib/libtarfile.tex	(original)
+++ python/branches/bcannon-objcap/Doc/lib/libtarfile.tex	Tue May 29 20:35:08 2007
@@ -133,24 +133,20 @@
 \versionadded{2.6}
 \end{excdesc}
 
+Each of the following constants defines a tar archive format that the
+\module{tarfile} module is able to create. See section \ref{tar-formats} for
+details.
+
 \begin{datadesc}{USTAR_FORMAT}
- \POSIX{}.1-1988 (ustar) format. It supports filenames up to a length of
- at best 256 characters and linknames up to 100 characters. The maximum
- file size is 8 gigabytes. This is an old and limited but widely
- supported format.
+ \POSIX{}.1-1988 (ustar) format.
 \end{datadesc}
 
 \begin{datadesc}{GNU_FORMAT}
- GNU tar format. It supports arbitrarily long filenames and linknames and
- files bigger than 8 gigabytes. It is the defacto standard on GNU/Linux
- systems.
+ GNU tar format.
 \end{datadesc}
 
 \begin{datadesc}{PAX_FORMAT}
- \POSIX{}.1-2001 (pax) format. It is the most flexible format with
- virtually no limits. It supports long filenames and linknames, large files
- and stores pathnames in a portable way. However, not all tar
- implementations today are able to handle pax archives properly.
+ \POSIX{}.1-2001 (pax) format.
 \end{datadesc}
 
 \begin{datadesc}{DEFAULT_FORMAT}
@@ -175,15 +171,15 @@
 
 The \class{TarFile} object provides an interface to a tar archive. A tar
 archive is a sequence of blocks. An archive member (a stored file) is made up
-of a header block followed by data blocks. It is possible, to store a file in a
+of a header block followed by data blocks. It is possible to store a file in a
 tar archive several times. Each archive member is represented by a
 \class{TarInfo} object, see \citetitle{TarInfo Objects} (section
 \ref{tarinfo-objects}) for details.
 
 \begin{classdesc}{TarFile}{name=None, mode='r', fileobj=None,
 format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False,
- ignore_zeros=False, encoding=None, pax_headers=None, debug=0,
- errorlevel=0}
+ ignore_zeros=False, encoding=None, errors=None, pax_headers=None,
+ debug=0, errorlevel=0}
 
 All following arguments are optional and can be accessed as instance
 attributes as well.
@@ -231,18 +227,14 @@
 If \code{2}, all \emph{non-fatal} errors are raised as \exception{TarError}
 exceptions as well.
 
- The \var{encoding} argument defines the local character encoding. It
- defaults to the value from \function{sys.getfilesystemencoding()} or if
- that is \code{None} to \code{"ascii"}. \var{encoding} is used only in
- connection with the pax format which stores text data in \emph{UTF-8}. If
- it is not set correctly, character conversion will fail with a
- \exception{UnicodeError}.
+ The \var{encoding} and \var{errors} arguments control the way strings are
+ converted to unicode objects and vice versa. The default settings will work
+ for most users. See section \ref{tar-unicode} for in-depth information.
 \versionadded{2.6}
 
- The \var{pax_headers} argument must be a dictionary whose elements are
- either unicode objects, numbers or strings that can be decoded to unicode
- using \var{encoding}. This information will be added to the archive as a
- pax global header.
+ The \var{pax_headers} argument is an optional dictionary of unicode strings
+ which will be added as a pax global header if \var{format} is
+ \constant{PAX_FORMAT}.
 \versionadded{2.6}
 \end{classdesc}
 
@@ -287,7 +279,7 @@
 Extract all members from the archive to the current working directory
 or directory \var{path}. If optional \var{members} is given, it must be
 a subset of the list returned by \method{getmembers()}.
- Directory informations like owner, modification time and permissions are
+ Directory information like owner, modification time and permissions are
 set after all members have been extracted. This is done to work around two
 problems: A directory's modification time is reset each time a file is
 created in it. And, if a directory's permissions do not allow writing,
@@ -365,6 +357,11 @@
 \deprecated{2.6}{Use the \member{format} attribute instead.}
 \end{memberdesc}
 
+\begin{memberdesc}{pax_headers}
+ A dictionary containing key-value pairs of pax global headers.
+ \versionadded{2.6}
+\end{memberdesc}
+
 %-----------------
 % TarInfo Objects
 %-----------------
@@ -384,8 +381,8 @@
 Create a \class{TarInfo} object.
 \end{classdesc}
 
-\begin{methoddesc}{frombuf}{}
- Create and return a \class{TarInfo} object from a string buffer.
+\begin{methoddesc}{frombuf}{buf}
+ Create and return a \class{TarInfo} object from string buffer \var{buf}.
 \versionadded[Raises \exception{HeaderError} if the buffer is
 invalid.]{2.6}
 \end{methoddesc}
@@ -396,10 +393,11 @@
 \versionadded{2.6}
 \end{methoddesc}
 
-\begin{methoddesc}{tobuf}{\optional{format}}
- Create a string buffer from a \class{TarInfo} object. See
- \class{TarFile}'s \member{format} argument for information.
- \versionchanged[The \var{format} parameter]{2.6}
+\begin{methoddesc}{tobuf}{\optional{format\optional{, encoding
+ \optional{, errors}}}}
+ Create a string buffer from a \class{TarInfo} object. For information
+ on the arguments see the constructor of the \class{TarFile} class.
+ \versionchanged[The arguments were added]{2.6}
 \end{methoddesc}
 
 A \code{TarInfo} object has the following public data attributes:
@@ -452,6 +450,12 @@
 Group name.
 \end{memberdesc}
 
+\begin{memberdesc}{pax_headers}
+ A dictionary containing key-value pairs of an associated pax
+ extended header.
+ \versionadded{2.6}
+\end{memberdesc}
+
 A \class{TarInfo} object also provides some convenient query methods:
 
 \begin{methoddesc}{isfile}{}
@@ -554,3 +558,103 @@
 tar.extract(tarinfo)
 tar.close()
 \end{verbatim}
+
+%------------
+% Tar format
+%------------
+
+\subsection{Supported tar formats \label{tar-formats}}
+
+There are three tar formats that can be created with the \module{tarfile}
+module:
+
+\begin{itemize}
+
+\item
+The \POSIX{}.1-1988 ustar format (\constant{USTAR_FORMAT}). It supports
+filenames up to a length of at best 256 characters and linknames up to 100
+characters. The maximum file size is 8 gigabytes. This is an old and limited
+but widely supported format.
+
+\item
+The GNU tar format (\constant{GNU_FORMAT}). It supports long filenames and
+linknames, files bigger than 8 gigabytes and sparse files. It is the de facto
+standard on GNU/Linux systems. \module{tarfile} fully supports the GNU tar
+extensions for long names, sparse file support is read-only.
+
+\item
+The \POSIX{}.1-2001 pax format (\constant{PAX_FORMAT}). It is the most
+flexible format with virtually no limits. It supports long filenames and
+linknames, large files and stores pathnames in a portable way. However, not
+all tar implementations today are able to handle pax archives properly.
+
+The \emph{pax} format is an extension to the existing \emph{ustar} format. It
+uses extra headers for information that cannot be stored otherwise. There are
+two flavours of pax headers: Extended headers only affect the subsequent file
+header, global headers are valid for the complete archive and affect all
+following files. All the data in a pax header is encoded in \emph{UTF-8} for
+portability reasons.
+
+\end{itemize}
+
+There are some more variants of the tar format which can be read, but not
+created:
+
+\begin{itemize}
+
+\item
+The ancient V7 format. This is the first tar format from \UNIX{} Seventh
+Edition, storing only regular files and directories. Names must not be longer
+than 100 characters, there is no user/group name information. Some archives
+have miscalculated header checksums in case of fields with non-\ASCII{}
+characters.
+
+\item
+The SunOS tar extended format. This format is a variant of the \POSIX{}.1-2001
+pax format, but is not compatible.
+
+\end{itemize}
+
+%----------------
+% Unicode issues
+%----------------
+
+\subsection{Unicode issues \label{tar-unicode}}
+
+The tar format was originally conceived to make backups on tape drives with the
+main focus on preserving file system information. Nowadays tar archives are
+commonly used for file distribution and exchanging archives over networks. One
+problem of the original format (that all other formats are merely variants of)
+is that there is no concept of supporting different character encodings.
+For example, an ordinary tar archive created on a \emph{UTF-8} system cannot be
+read correctly on a \emph{Latin-1} system if it contains non-\ASCII{}
+characters. Names (i.e. filenames, linknames, user/group names) containing
+these characters will appear damaged. Unfortunately, there is no way to
+autodetect the encoding of an archive.
+
+The pax format was designed to solve this problem. It stores non-\ASCII{} names
+using the universal character encoding \emph{UTF-8}. When a pax archive is
+read, these \emph{UTF-8} names are converted to the encoding of the local
+file system.
+
+The details of unicode conversion are controlled by the \var{encoding} and
+\var{errors} keyword arguments of the \class{TarFile} class.
+
+The default value for \var{encoding} is the local character encoding. It is
+deduced from \function{sys.getfilesystemencoding()} and
+\function{sys.getdefaultencoding()}. In read mode, \var{encoding} is used
+exclusively to convert unicode names from a pax archive to strings in the local
+character encoding. In write mode, the use of \var{encoding} depends on the
+chosen archive format. In case of \constant{PAX_FORMAT}, input names that
+contain non-\ASCII{} characters need to be decoded before being stored as
+\emph{UTF-8} strings. The other formats do not make use of \var{encoding}
+unless unicode objects are used as input names. These are converted to
+8-bit character strings before they are added to the archive.
+
+The \var{errors} argument defines how characters are treated that cannot be
+converted to or from \var{encoding}. Possible values are listed in section
+\ref{codec-base-classes}. In read mode, there is an additional scheme
+\code{'utf-8'} which means that bad characters are replaced by their
+\emph{UTF-8} representation. This is the default scheme. In write mode the
+default value for \var{errors} is \code{'strict'} to ensure that name
+information is not altered unnoticed.
Modified: python/branches/bcannon-objcap/Lib/logging/handlers.py
==============================================================================
--- python/branches/bcannon-objcap/Lib/logging/handlers.py	(original)
+++ python/branches/bcannon-objcap/Lib/logging/handlers.py	Tue May 29 20:35:08 2007
@@ -625,7 +625,8 @@
 """
 Initialize a handler.
 
- If address is specified as a string, UNIX socket is used.
+ If address is specified as a string, a UNIX socket is used. To log to a
+ local syslogd, "SysLogHandler(address="/dev/log")" can be used.
 If facility is not specified, LOG_USER is used.
 """
 logging.Handler.__init__(self)
Modified: python/branches/bcannon-objcap/Lib/subprocess.py
==============================================================================
--- python/branches/bcannon-objcap/Lib/subprocess.py	(original)
+++ python/branches/bcannon-objcap/Lib/subprocess.py	Tue May 29 20:35:08 2007
@@ -545,9 +545,10 @@
 if preexec_fn is not None:
 raise ValueError("preexec_fn is not supported on Windows "
 "platforms")
- if close_fds:
+ if close_fds and (stdin is not None or stdout is not None or
+ stderr is not None):
 raise ValueError("close_fds is not supported on Windows "
- "platforms")
+ "platforms if you redirect stdin/stdout/stderr")
 else:
 # POSIX
 if startupinfo is not None:
@@ -804,9 +805,7 @@
 hp, ht, pid, tid = CreateProcess(executable, args,
 # no special security
 None, None,
- # must inherit handles to pass std
- # handles
- 1,
+ int(not close_fds),
 creationflags,
 env,
 cwd,
Modified: python/branches/bcannon-objcap/Lib/tarfile.py
==============================================================================
--- python/branches/bcannon-objcap/Lib/tarfile.py	(original)
+++ python/branches/bcannon-objcap/Lib/tarfile.py	Tue May 29 20:35:08 2007
@@ -52,7 +52,6 @@
 import copy
 import re
 
-builtin_open = open
 
 if sys.platform == 'mac':
 # This module needs work for MacOS9, especially in the area of pathname
@@ -127,6 +126,17 @@
 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 "uid", "gid", "uname", "gname")
 
+# Fields in a pax header that are numbers, all other fields
+# are treated as strings.
+PAX_NUMBER_FIELDS = {
+ "atime": float,
+ "ctime": float,
+ "mtime": float,
+ "uid": int,
+ "gid": int,
+ "size": int
+}
+
 #---------------------------------------------------------
 # Bits used in the mode field, values in octal.
 #---------------------------------------------------------
@@ -156,7 +166,7 @@
 #---------------------------------------------------------
 ENCODING = sys.getfilesystemencoding()
 if ENCODING is None:
- ENCODING = "ascii"
+ ENCODING = sys.getdefaultencoding()
 
 #---------------------------------------------------------
 # Some useful functions
@@ -220,6 +230,26 @@
 s = chr(0200) + s
 return s
 
+def uts(s, encoding, errors):
+ """Convert a unicode object to a string.
+ """
+ if errors == "utf-8":
+ # An extra error handler similar to the -o invalid=UTF-8 option
+ # in POSIX.1-2001. Replace untranslatable characters with their
+ # UTF-8 representation.
+ try:
+ return s.encode(encoding, "strict")
+ except UnicodeEncodeError:
+ x = []
+ for c in s:
+ try:
+ x.append(c.encode(encoding, "strict"))
+ except UnicodeEncodeError:
+ x.append(c.encode("utf8"))
+ return "".join(x)
+ else:
+ return s.encode(encoding, errors)
+
 def calc_chksums(buf):
 """Calculate the checksum for a member's header by summing up all
 characters except for the chksum field which is treated as if
@@ -924,7 +954,7 @@
 def __repr__(self):
 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 
- def get_info(self):
+ def get_info(self, encoding, errors):
 """Return the TarInfo's attributes as a dictionary.
 """
 info = {
@@ -946,24 +976,29 @@
 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 info["name"] += "/"
 
+ for key in ("name", "linkname", "uname", "gname"):
+ if type(info[key]) is unicode:
+ info[key] = info[key].encode(encoding, errors)
+
 return info
 
- def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING):
+ def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 """Return a tar header as a string of 512 byte blocks.
 """
+ info = self.get_info(encoding, errors)
+
 if format == USTAR_FORMAT:
- return self.create_ustar_header()
+ return self.create_ustar_header(info)
 elif format == GNU_FORMAT:
- return self.create_gnu_header()
+ return self.create_gnu_header(info)
 elif format == PAX_FORMAT:
- return self.create_pax_header(encoding)
+ return self.create_pax_header(info, encoding, errors)
 else:
 raise ValueError("invalid format")
 
- def create_ustar_header(self):
+ def create_ustar_header(self, info):
 """Return the object as a ustar header block.
 """
- info = self.get_info()
 info["magic"] = POSIX_MAGIC
 
 if len(info["linkname"]) > LENGTH_LINK:
@@ -974,10 +1009,9 @@
 
 return self._create_header(info, USTAR_FORMAT)
 
- def create_gnu_header(self):
+ def create_gnu_header(self, info):
 """Return the object as a GNU header block sequence.
 """
- info = self.get_info()
 info["magic"] = GNU_MAGIC
 
 buf = ""
@@ -989,12 +1023,11 @@
 
 return buf + self._create_header(info, GNU_FORMAT)
 
- def create_pax_header(self, encoding):
+ def create_pax_header(self, info, encoding, errors):
 """Return the object as a ustar header block. If it cannot be
 represented this way, prepend a pax extended header sequence
 with supplement information.
 """
- info = self.get_info()
 info["magic"] = POSIX_MAGIC
 pax_headers = self.pax_headers.copy()
 
@@ -1004,7 +1037,11 @@
 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
 ("uname", "uname", 32), ("gname", "gname", 32)):
 
- val = info[name].decode(encoding)
+ if hname in pax_headers:
+ # The pax header has priority.
+ continue
+
+ val = info[name].decode(encoding, errors)
 
 # Try to encode the string as ASCII.
 try:
@@ -1013,27 +1050,23 @@
 pax_headers[hname] = val
 continue
 
- if len(val) > length:
- if name == "name":
- # Try to squeeze a longname in the prefix and name fields as in
- # ustar format.
- try:
- info["prefix"], info["name"] = self._posix_split_name(info["name"])
- except ValueError:
- pax_headers[hname] = val
- else:
- continue
- else:
- pax_headers[hname] = val
+ if len(info[name]) > length:
+ pax_headers[hname] = val
 
 # Test number fields for values that exceed the field limit or values
 # that like to be stored as float.
 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
+ if name in pax_headers:
+ # The pax header has priority. Avoid overflow.
+ info[name] = 0
+ continue
+
 val = info[name]
 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
 pax_headers[name] = unicode(val)
 info[name] = 0
 
+ # Create a pax extended header if necessary.
 if pax_headers:
 buf = self._create_pax_generic_header(pax_headers)
 else:
@@ -1042,26 +1075,10 @@
 return buf + self._create_header(info, USTAR_FORMAT)
 
 @classmethod
- def create_pax_global_header(cls, pax_headers, encoding):
+ def create_pax_global_header(cls, pax_headers):
 """Return the object as a pax global header block sequence.
 """
- new_headers = {}
- for key, val in pax_headers.iteritems():
- key = cls._to_unicode(key, encoding)
- val = cls._to_unicode(val, encoding)
- new_headers[key] = val
- return cls._create_pax_generic_header(new_headers, type=XGLTYPE)
-
- @staticmethod
- def _to_unicode(value, encoding):
- if isinstance(value, unicode):
- return value
- elif isinstance(value, (int, long, float)):
- return unicode(value)
- elif isinstance(value, str):
- return unicode(value, encoding)
- else:
- raise ValueError("unable to convert to unicode: %r" % value)
+ return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
 
 def _posix_split_name(self, name):
 """Split a name longer than 100 chars into a prefix
@@ -1093,9 +1110,9 @@
 " ", # checksum field
 info.get("type", REGTYPE),
 stn(info.get("linkname", ""), 100),
- stn(info.get("magic", ""), 8),
- stn(info.get("uname", ""), 32),
- stn(info.get("gname", ""), 32),
+ stn(info.get("magic", POSIX_MAGIC), 8),
+ stn(info.get("uname", "root"), 32),
+ stn(info.get("gname", "root"), 32),
 itn(info.get("devmajor", 0), 8, format),
 itn(info.get("devminor", 0), 8, format),
 stn(info.get("prefix", ""), 155)
@@ -1256,12 +1273,9 @@
 offset += self._block(self.size)
 tarfile.offset = offset
 
- # Patch the TarInfo object with saved extended
+ # Patch the TarInfo object with saved global
 # header information.
- for keyword, value in tarfile.pax_headers.iteritems():
- if keyword in PAX_FIELDS:
- setattr(self, keyword, value)
- self.pax_headers[keyword] = value
+ self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
 
 return self
 
@@ -1272,18 +1286,17 @@
 buf = tarfile.fileobj.read(self._block(self.size))
 
 # Fetch the next header and process it.
- b = tarfile.fileobj.read(BLOCKSIZE)
- t = self.frombuf(b)
- t.offset = self.offset
- next = t._proc_member(tarfile)
+ next = self.fromtarfile(tarfile)
+ if next is None:
+ raise HeaderError("missing subsequent header")
 
 # Patch the TarInfo object from the next header with
 # the longname information.
 next.offset = self.offset
 if self.type == GNUTYPE_LONGNAME:
- next.name = buf.rstrip(NUL)
+ next.name = nts(buf)
 elif self.type == GNUTYPE_LONGLINK:
- next.linkname = buf.rstrip(NUL)
+ next.linkname = nts(buf)
 
 return next
 
@@ -1358,21 +1371,10 @@
 else:
 pax_headers = tarfile.pax_headers.copy()
 
- # Fields in POSIX.1-2001 that are numbers, all other fields
- # are treated as UTF-8 strings.
- type_mapping = {
- "atime": float,
- "ctime": float,
- "mtime": float,
- "uid": int,
- "gid": int,
- "size": int
- }
-
 # Parse pax header information. A record looks like that:
 # "%d %s=%s\n" % (length, keyword, value). length is the size
 # of the complete record including the length field itself and
- # the newline.
+ # the newline. keyword and value are both UTF-8 encoded strings.
 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
 pos = 0
 while True:
@@ -1385,35 +1387,55 @@
 value = buf[match.end(2) + 1:match.start(1) + length - 1]
 
 keyword = keyword.decode("utf8")
- keyword = keyword.encode(tarfile.encoding)
-
 value = value.decode("utf8")
- if keyword in type_mapping:
+
+ pax_headers[keyword] = value
+ pos += length
+
+ # Fetch the next header.
+ next = self.fromtarfile(tarfile)
+
+ if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
+ if next is None:
+ raise HeaderError("missing subsequent header")
+
+ # Patch the TarInfo object with the extended header info.
+ next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
+ next.offset = self.offset
+
+ if pax_headers.has_key("size"):
+ # If the extended header replaces the size field,
+ # we need to recalculate the offset where the next
+ # header starts.
+ offset = next.offset_data
+ if next.isreg() or next.type not in SUPPORTED_TYPES:
+ offset += next._block(next.size)
+ tarfile.offset = offset
+
+ return next
+
+ def _apply_pax_info(self, pax_headers, encoding, errors):
+ """Replace fields with supplemental information from a previous
+ pax extended or global header.
+ """
+ for keyword, value in pax_headers.iteritems():
+ if keyword not in PAX_FIELDS:
+ continue
+
+ if keyword == "path":
+ value = value.rstrip("/")
+
+ if keyword in PAX_NUMBER_FIELDS:
 try:
- value = type_mapping[keyword](value)
+ value = PAX_NUMBER_FIELDS[keyword](value)
 except ValueError:
 value = 0
 else:
- value = value.encode(tarfile.encoding)
-
- pax_headers[keyword] = value
- pos += length
+ value = uts(value, encoding, errors)
 
- # Fetch the next header that will be patched with the
- # supplement information from the pax header (extended
- # only).
- t = self.fromtarfile(tarfile)
-
- if self.type != XGLTYPE and t is not None:
- # Patch the TarInfo object from the next header with
- # the pax header's information.
- for keyword, value in pax_headers.items():
- if keyword in PAX_FIELDS:
- setattr(t, keyword, value)
- pax_headers[keyword] = value
- t.pax_headers = pax_headers.copy()
+ setattr(self, keyword, value)
 
- return t
+ self.pax_headers = pax_headers.copy()
 
 def _block(self, count):
 """Round up a byte count by BLOCKSIZE and return it,
@@ -1464,8 +1486,9 @@
 
 format = DEFAULT_FORMAT # The format to use when creating an archive.
 
- encoding = ENCODING # Transfer UTF-8 strings from POSIX.1-2001
- # headers to this encoding.
+ encoding = ENCODING # Encoding for 8-bit character strings.
+
+ errors = None # Error handler for unicode conversion.
 
 tarinfo = TarInfo # The default TarInfo class to use.
 
@@ -1473,7 +1496,7 @@
 
 def __init__(self, name=None, mode="r", fileobj=None, format=None,
 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
- pax_headers=None, debug=None, errorlevel=None):
+ errors=None, pax_headers=None, debug=None, errorlevel=None):
 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
 read from an existing archive, 'a' to append data to an existing
 file or 'w' to create a new file overwriting an existing one. `mode'
@@ -1492,7 +1515,7 @@
 # Create nonexistent files in append mode.
 self.mode = "w"
 self._mode = "wb"
- fileobj = builtin_open(name, self._mode)
+ fileobj = bltn_open(name, self._mode)
 self._extfileobj = False
 else:
 if name is None and hasattr(fileobj, "name"):
@@ -1514,6 +1537,19 @@
 self.ignore_zeros = ignore_zeros
 if encoding is not None:
 self.encoding = encoding
+
+ if errors is not None:
+ self.errors = errors
+ elif mode == "r":
+ self.errors = "utf-8"
+ else:
+ self.errors = "strict"
+
+ if pax_headers is not None and self.format == PAX_FORMAT:
+ self.pax_headers = pax_headers
+ else:
+ self.pax_headers = {}
+
 if debug is not None:
 self.debug = debug
 if errorlevel is not None:
@@ -1526,7 +1562,6 @@
 self.offset = 0L # current position in the archive file
 self.inodes = {} # dictionary caching the inodes of
 # archive members already added
- self.pax_headers = {} # save contents of global pax headers
 
 if self.mode == "r":
 self.firstmember = None
@@ -1545,9 +1580,8 @@
 if self.mode in "aw":
 self._loaded = True
 
- if pax_headers:
- buf = self.tarinfo.create_pax_global_header(
- pax_headers.copy(), self.encoding)
+ if self.pax_headers:
+ buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
 self.fileobj.write(buf)
 self.offset += len(buf)
 
@@ -1669,7 +1703,7 @@
 raise CompressionError("gzip module is not available")
 
 if fileobj is None:
- fileobj = builtin_open(name, mode + "b")
+ fileobj = bltn_open(name, mode + "b")
 
 try:
 t = cls.taropen(name, mode,
@@ -1819,8 +1853,6 @@
 self.inodes[inode] = arcname
 elif stat.S_ISDIR(stmd):
 type = DIRTYPE
- if arcname[-1:] != "/":
- arcname += "/"
 elif stat.S_ISFIFO(stmd):
 type = FIFOTYPE
 elif stat.S_ISLNK(stmd):
@@ -1930,7 +1962,7 @@
 
 # Append the tar header and data to the archive.
 if tarinfo.isreg():
- f = builtin_open(name, "rb")
+ f = bltn_open(name, "rb")
 self.addfile(tarinfo, f)
 f.close()
 
@@ -1954,7 +1986,7 @@
 
 tarinfo = copy.copy(tarinfo)
 
- buf = tarinfo.tobuf(self.format, self.encoding)
+ buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
 self.fileobj.write(buf)
 self.offset += len(buf)
 
@@ -2141,7 +2173,7 @@
 """Make a file called targetpath.
 """
 source = self.extractfile(tarinfo)
- target = builtin_open(targetpath, "wb")
+ target = bltn_open(targetpath, "wb")
 copyfileobj(source, target)
 source.close()
 target.close()
@@ -2486,4 +2518,5 @@
 except TarError:
 return False
 
+bltn_open = open
 open = TarFile.open
Modified: python/branches/bcannon-objcap/Lib/test/test_subprocess.py
==============================================================================
--- python/branches/bcannon-objcap/Lib/test/test_subprocess.py	(original)
+++ python/branches/bcannon-objcap/Lib/test/test_subprocess.py	Tue May 29 20:35:08 2007
@@ -617,8 +617,16 @@
 self.assertRaises(ValueError, subprocess.call,
 [sys.executable,
 "-c", "import sys; sys.exit(47)"],
+ stdout=subprocess.PIPE,
 close_fds=True)
 
+ def test_close_fds(self):
+ # close file descriptors
+ rc = subprocess.call([sys.executable, "-c",
+ "import sys; sys.exit(47)"],
+ close_fds=True)
+ self.assertEqual(rc, 47)
+
 def test_shell_sequence(self):
 # Run command through the shell (sequence)
 newenv = os.environ.copy()
Modified: python/branches/bcannon-objcap/Lib/test/test_tarfile.py
==============================================================================
--- python/branches/bcannon-objcap/Lib/test/test_tarfile.py	(original)
+++ python/branches/bcannon-objcap/Lib/test/test_tarfile.py	Tue May 29 20:35:08 2007
@@ -1,4 +1,4 @@
-# encoding: iso8859-1
+# -*- coding: iso-8859-15 -*-
 
 import sys
 import os
@@ -372,9 +372,9 @@
 
 def test_read_longname(self):
 # Test reading of longname (bug #1471427).
- name = self.subdir + "/" + "123/" * 125 + "longname"
+ longname = self.subdir + "/" + "123/" * 125 + "longname"
 try:
- tarinfo = self.tar.getmember(name)
+ tarinfo = self.tar.getmember(longname)
 except KeyError:
 self.fail("longname not found")
 self.assert_(tarinfo.type != tarfile.DIRTYPE, "read longname as dirtype")
@@ -393,13 +393,24 @@
 tarinfo = self.tar.getmember(longname)
 offset = tarinfo.offset
 self.tar.fileobj.seek(offset)
- fobj = StringIO.StringIO(self.tar.fileobj.read(1536))
+ fobj = StringIO.StringIO(self.tar.fileobj.read(3 * 512))
 self.assertRaises(tarfile.ReadError, tarfile.open, name="foo.tar", fileobj=fobj)
 
+ def test_header_offset(self):
+ # Test if the start offset of the TarInfo object includes
+ # the preceding extended header.
+ longname = self.subdir + "/" + "123/" * 125 + "longname"
+ offset = self.tar.getmember(longname).offset
+ fobj = open(tarname)
+ fobj.seek(offset)
+ tarinfo = tarfile.TarInfo.frombuf(fobj.read(512))
+ self.assertEqual(tarinfo.type, self.longnametype)
+
 
 class GNUReadTest(LongnameTest):
 
 subdir = "gnu"
+ longnametype = tarfile.GNUTYPE_LONGNAME
 
 def test_sparse_file(self):
 tarinfo1 = self.tar.getmember("ustar/sparse")
@@ -410,26 +421,40 @@
 "sparse file extraction failed")
 
 
-class PaxReadTest(ReadTest):
+class PaxReadTest(LongnameTest):
 
 subdir = "pax"
+ longnametype = tarfile.XHDTYPE
 
- def test_pax_globheaders(self):
+ def test_pax_global_headers(self):
 tar = tarfile.open(tarname, encoding="iso8859-1")
+
 tarinfo = tar.getmember("pax/regtype1")
 self.assertEqual(tarinfo.uname, "foo")
 self.assertEqual(tarinfo.gname, "bar")
- self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
+ self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
 
 tarinfo = tar.getmember("pax/regtype2")
 self.assertEqual(tarinfo.uname, "")
 self.assertEqual(tarinfo.gname, "bar")
- self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
+ self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
 
 tarinfo = tar.getmember("pax/regtype3")
 self.assertEqual(tarinfo.uname, "tarfile")
 self.assertEqual(tarinfo.gname, "tarfile")
- self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
+ self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
+
+ def test_pax_number_fields(self):
+ # All following number fields are read from the pax header.
+ tar = tarfile.open(tarname, encoding="iso8859-1")
+ tarinfo = tar.getmember("pax/regtype4")
+ self.assertEqual(tarinfo.size, 7011)
+ self.assertEqual(tarinfo.uid, 123)
+ self.assertEqual(tarinfo.gid, 123)
+ self.assertEqual(tarinfo.mtime, 1041808783.0)
+ self.assertEqual(type(tarinfo.mtime), float)
+ self.assertEqual(float(tarinfo.pax_headers["atime"]), 1041808783.0)
+ self.assertEqual(float(tarinfo.pax_headers["ctime"]), 1041808783.0)
 
 
 class WriteTest(unittest.TestCase):
@@ -700,68 +725,161 @@
 n = tar.getmembers()[0].name
 self.assert_(name == n, "PAX longname creation failed")
 
- def test_iso8859_15_filename(self):
- self._test_unicode_filename("iso8859-15")
+ def test_pax_global_header(self):
+ pax_headers = {
+ u"foo": u"bar",
+ u"uid": u"0",
+ u"mtime": u"1.23",
+ u"test": u"äöü",
+ u"äöü": u"test"}
+
+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
+ pax_headers=pax_headers)
+ tar.addfile(tarfile.TarInfo("test"))
+ tar.close()
+
+ # Test if the global header was written correctly.
+ tar = tarfile.open(tmpname, encoding="iso8859-1")
+ self.assertEqual(tar.pax_headers, pax_headers)
+ self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers)
+
+ # Test if all the fields are unicode.
+ for key, val in tar.pax_headers.iteritems():
+ self.assert_(type(key) is unicode)
+ self.assert_(type(val) is unicode)
+ if key in tarfile.PAX_NUMBER_FIELDS:
+ try:
+ tarfile.PAX_NUMBER_FIELDS[key](val)
+ except (TypeError, ValueError):
+ self.fail("unable to convert pax header field")
+
+ def test_pax_extended_header(self):
+ # The fields from the pax header have priority over the
+ # TarInfo.
+ pax_headers = {u"path": u"foo", u"uid": u"123"}
+
+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
+ t = tarfile.TarInfo()
+ t.name = u"äöü" # non-ASCII
+ t.uid = 8**8 # too large
+ t.pax_headers = pax_headers
+ tar.addfile(t)
+ tar.close()
+
+ tar = tarfile.open(tmpname, encoding="iso8859-1")
+ t = tar.getmembers()[0]
+ self.assertEqual(t.pax_headers, pax_headers)
+ self.assertEqual(t.name, "foo")
+ self.assertEqual(t.uid, 123)
+
+
+class UstarUnicodeTest(unittest.TestCase):
+ # All *UnicodeTests FIXME
+
+ format = tarfile.USTAR_FORMAT
+
+ def test_iso8859_1_filename(self):
+ self._test_unicode_filename("iso8859-1")
+
+ def test_utf7_filename(self):
+ self._test_unicode_filename("utf7")
 
 def test_utf8_filename(self):
 self._test_unicode_filename("utf8")
 
- def test_utf16_filename(self):
- self._test_unicode_filename("utf16")
-
 def _test_unicode_filename(self, encoding):
- tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT)
- name = u"\u20ac".encode(encoding) # Euro sign
- tar.encoding = encoding
+ tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
+ name = u"äöü"
 tar.addfile(tarfile.TarInfo(name))
 tar.close()
 
 tar = tarfile.open(tmpname, encoding=encoding)
- self.assertEqual(tar.getmembers()[0].name, name)
+ self.assert_(type(tar.getnames()[0]) is not unicode)
+ self.assertEqual(tar.getmembers()[0].name, name.encode(encoding))
 tar.close()
 
 def test_unicode_filename_error(self):
- # The euro sign filename cannot be translated to iso8859-1 encoding.
- tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="utf8")
- name = u"\u20ac".encode("utf8") # Euro sign
- tar.addfile(tarfile.TarInfo(name))
+ tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict")
+ tarinfo = tarfile.TarInfo()
+
+ tarinfo.name = "äöü"
+ if self.format == tarfile.PAX_FORMAT:
+ self.assertRaises(UnicodeError, tar.addfile, tarinfo)
+ else:
+ tar.addfile(tarinfo)
+
+ tarinfo.name = u"äöü"
+ self.assertRaises(UnicodeError, tar.addfile, tarinfo)
+
+ tarinfo.name = "foo"
+ tarinfo.uname = u"äöü"
+ self.assertRaises(UnicodeError, tar.addfile, tarinfo)
+
+ def test_unicode_argument(self):
+ tar = tarfile.open(tarname, "r", encoding="iso8859-1", errors="strict")
+ for t in tar:
+ self.assert_(type(t.name) is str)
+ self.assert_(type(t.linkname) is str)
+ self.assert_(type(t.uname) is str)
+ self.assert_(type(t.gname) is str)
 tar.close()
 
- self.assertRaises(UnicodeError, tarfile.open, tmpname, encoding="iso8859-1")
+ def test_uname_unicode(self):
+ for name in (u"äöü", "äöü"):
+ t = tarfile.TarInfo("foo")
+ t.uname = name
+ t.gname = name
 
- def test_pax_headers(self):
- self._test_pax_headers({"foo": "bar", "uid": 0, "mtime": 1.23})
+ fobj = StringIO.StringIO()
+ tar = tarfile.open("foo.tar", mode="w", fileobj=fobj, format=self.format, encoding="iso8859-1")
+ tar.addfile(t)
+ tar.close()
+ fobj.seek(0)
 
- self._test_pax_headers({"euro": u"\u20ac".encode("utf8")})
+ tar = tarfile.open("foo.tar", fileobj=fobj, encoding="iso8859-1")
+ t = tar.getmember("foo")
+ self.assertEqual(t.uname, "äöü")
+ self.assertEqual(t.gname, "äöü")
 
- self._test_pax_headers({"euro": u"\u20ac"},
- {"euro": u"\u20ac".encode("utf8")})
 
- self._test_pax_headers({u"\u20ac": "euro"},
- {u"\u20ac".encode("utf8"): "euro"})
+class GNUUnicodeTest(UstarUnicodeTest):
 
- def _test_pax_headers(self, pax_headers, cmp_headers=None):
- if cmp_headers is None:
- cmp_headers = pax_headers
+ format = tarfile.GNU_FORMAT
 
- tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
- pax_headers=pax_headers, encoding="utf8")
- tar.addfile(tarfile.TarInfo("test"))
- tar.close()
 
- tar = tarfile.open(tmpname, encoding="utf8")
- self.assertEqual(tar.pax_headers, cmp_headers)
+class PaxUnicodeTest(UstarUnicodeTest):
 
- def test_truncated_header(self):
- tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT)
- tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
- tar.addfile(tarinfo)
+ format = tarfile.PAX_FORMAT
+
+ def _create_unicode_name(self, name):
+ tar = tarfile.open(tmpname, "w", format=self.format)
+ t = tarfile.TarInfo()
+ t.pax_headers["path"] = name
+ tar.addfile(t)
 tar.close()
 
- # Simulate a premature EOF.
- open(tmpname, "rb+").truncate(1536)
- tar = tarfile.open(tmpname)
- self.assertEqual(tar.getmembers(), [])
+ def test_error_handlers(self):
+ # Test if the unicode error handlers work correctly for characters
+ # that cannot be expressed in a given encoding.
+ self._create_unicode_name(u"äöü")
+
+ for handler, name in (("utf-8", u"äöü".encode("utf8")),
+ ("replace", "???"), ("ignore", "")):
+ tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
+ errors=handler)
+ self.assertEqual(tar.getnames()[0], name)
+
+ self.assertRaises(UnicodeError, tarfile.open, tmpname,
+ encoding="ascii", errors="strict")
+
+ def test_error_handler_utf8(self):
+ # Create a pathname that has one component representable using
+ # iso8859-1 and the other only in iso8859-15.
+ self._create_unicode_name(u"äöü/¤")
+
+ tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
+ errors="utf-8")
+ self.assertEqual(tar.getnames()[0], "äöü/" + u"¤".encode("utf8"))
 
 
 class AppendTest(unittest.TestCase):
@@ -836,63 +954,58 @@
 def test_ustar_limits(self):
 # 100 char name
 tarinfo = tarfile.TarInfo("0123456789" * 10)
- tarinfo.create_ustar_header()
+ tarinfo.tobuf(tarfile.USTAR_FORMAT)
 
 # 101 char name that cannot be stored
 tarinfo = tarfile.TarInfo("0123456789" * 10 + "0")
- self.assertRaises(ValueError, tarinfo.create_ustar_header)
+ self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
 
 # 256 char name with a slash at pos 156
 tarinfo = tarfile.TarInfo("123/" * 62 + "longname")
- tarinfo.create_ustar_header()
+ tarinfo.tobuf(tarfile.USTAR_FORMAT)
 
 # 256 char name that cannot be stored
 tarinfo = tarfile.TarInfo("1234567/" * 31 + "longname")
- self.assertRaises(ValueError, tarinfo.create_ustar_header)
+ self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
 
 # 512 char name
 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
- self.assertRaises(ValueError, tarinfo.create_ustar_header)
+ self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
 
 # 512 char linkname
 tarinfo = tarfile.TarInfo("longlink")
 tarinfo.linkname = "123/" * 126 + "longname"
- self.assertRaises(ValueError, tarinfo.create_ustar_header)
+ self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
 
 # uid > 8 digits
 tarinfo = tarfile.TarInfo("name")
 tarinfo.uid = 010000000
- self.assertRaises(ValueError, tarinfo.create_ustar_header)
+ self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
 
 def test_gnu_limits(self):
 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
- tarinfo.create_gnu_header()
+ tarinfo.tobuf(tarfile.GNU_FORMAT)
 
 tarinfo = tarfile.TarInfo("longlink")
 tarinfo.linkname = "123/" * 126 + "longname"
- tarinfo.create_gnu_header()
+ tarinfo.tobuf(tarfile.GNU_FORMAT)
 
 # uid >= 256 ** 7
 tarinfo = tarfile.TarInfo("name")
 tarinfo.uid = 04000000000000000000L
- self.assertRaises(ValueError, tarinfo.create_gnu_header)
+ self.assertRaises(ValueError, tarinfo.tobuf, tarfile.GNU_FORMAT)
 
 def test_pax_limits(self):
- # A 256 char name that can be stored without an extended header.
- tarinfo = tarfile.TarInfo("123/" * 62 + "longname")
- self.assert_(len(tarinfo.create_pax_header("utf8")) == 512,
- "create_pax_header attached superfluous extended header")
-
 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
- tarinfo.create_pax_header("utf8")
+ tarinfo.tobuf(tarfile.PAX_FORMAT)
 
 tarinfo = tarfile.TarInfo("longlink")
 tarinfo.linkname = "123/" * 126 + "longname"
- tarinfo.create_pax_header("utf8")
+ tarinfo.tobuf(tarfile.PAX_FORMAT)
 
 tarinfo = tarfile.TarInfo("name")
 tarinfo.uid = 04000000000000000000L
- tarinfo.create_pax_header("utf8")
+ tarinfo.tobuf(tarfile.PAX_FORMAT)
 
 
 class GzipMiscReadTest(MiscReadTest):
@@ -940,6 +1053,9 @@
 StreamWriteTest,
 GNUWriteTest,
 PaxWriteTest,
+ UstarUnicodeTest,
+ GNUUnicodeTest,
+ PaxUnicodeTest,
 AppendTest,
 LimitsTest,
 ]
Modified: python/branches/bcannon-objcap/Lib/test/test_urllib.py
==============================================================================
--- python/branches/bcannon-objcap/Lib/test/test_urllib.py	(original)
+++ python/branches/bcannon-objcap/Lib/test/test_urllib.py	Tue May 29 20:35:08 2007
@@ -545,64 +545,75 @@
 "url2pathname() failed; %s != %s" %
 (expect, result))
 
-def server(evt):
- serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- serv.settimeout(3)
- serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- serv.bind(("", 9091))
- serv.listen(5)
- try:
- conn, addr = serv.accept()
- except socket.timeout:
- pass
- else:
- conn.send("1 Hola mundo\n")
- conn.send("2 No more lines\n")
- conn.close()
- finally:
- serv.close()
- evt.set()
-
-class FTPWrapperTests(unittest.TestCase):
-
- def setUp(self):
- ftplib.FTP.port = 9091
- self.evt = threading.Event()
- threading.Thread(target=server, args=(self.evt,)).start()
- time.sleep(.1)
-
- def tearDown(self):
- self.evt.wait()
-
- def testBasic(self):
- # connects
- ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9091, [])
- ftp.ftp.sock.close()
-
- def testTimeoutDefault(self):
- # default
- ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9091, [])
- self.assertTrue(ftp.ftp.sock.gettimeout() is None)
- ftp.ftp.sock.close()
-
- def testTimeoutValue(self):
- # a value
- ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9091, [], timeout=30)
- self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
- ftp.ftp.sock.close()
-
-
- def testTimeoutNone(self):
- # None, having other default
- previous = socket.getdefaulttimeout()
- socket.setdefaulttimeout(30)
- try:
- ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9091, [], timeout=30)
- finally:
- socket.setdefaulttimeout(previous)
- self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
- ftp.ftp.close()
-
+# Just commented them out.
+# Can't really tell why keep failing in windows and sparc.
+# Everywhere else they work ok, but on those machines, someteimes
+# fail in one of the tests, sometimes in other. I have a linux, and
+# the tests go ok.
+# If anybody has one of the problematic enviroments, please help!
+# . Facundo
+#
+# def server(evt):
+# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+# serv.settimeout(3)
+# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+# serv.bind(("", 9093))
+# serv.listen(5)
+# try:
+# conn, addr = serv.accept()
+# conn.send("1 Hola mundo\n")
+# cantdata = 0
+# while cantdata < 13:
+# data = conn.recv(13-cantdata)
+# cantdata += len(data)
+# time.sleep(.3)
+# conn.send("2 No more lines\n")
+# conn.close()
+# except socket.timeout:
+# pass
+# finally:
+# serv.close()
+# evt.set()
+#
+# class FTPWrapperTests(unittest.TestCase):
+#
+# def setUp(self):
+# ftplib.FTP.port = 9093
+# self.evt = threading.Event()
+# threading.Thread(target=server, args=(self.evt,)).start()
+# time.sleep(.1)
+#
+# def tearDown(self):
+# self.evt.wait()
+#
+# def testBasic(self):
+# # connects
+# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
+# ftp.ftp.sock.close()
+#
+# def testTimeoutDefault(self):
+# # default
+# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
+# self.assertTrue(ftp.ftp.sock.gettimeout() is None)
+# ftp.ftp.sock.close()
+#
+# def testTimeoutValue(self):
+# # a value
+# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [], timeout=30)
+# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
+# ftp.ftp.sock.close()
+#
+# def testTimeoutNone(self):
+# # None, having other default
+# previous = socket.getdefaulttimeout()
+# socket.setdefaulttimeout(30)
+# try:
+# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
+# finally:
+# socket.setdefaulttimeout(previous)
+# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
+# ftp.ftp.close()
+#
 
 
 
@@ -615,7 +626,7 @@
 UnquotingTests,
 urlencode_Tests,
 Pathname_Tests,
- FTPWrapperTests,
+ #FTPWrapperTests,
 )
 
 
Modified: python/branches/bcannon-objcap/Lib/test/testtar.tar
==============================================================================
Binary files. No diff available.
Modified: python/branches/bcannon-objcap/Misc/NEWS
==============================================================================
--- python/branches/bcannon-objcap/Misc/NEWS	(original)
+++ python/branches/bcannon-objcap/Misc/NEWS	Tue May 29 20:35:08 2007
@@ -12,6 +12,9 @@
 Core and builtins
 -----------------
 
+- Add new attribute names for function objects. All the func_* become
+ __*__ attributes. (Some already existed, e.g., __doc__ and __name__.)
+
 - Add -3 option to the interpreter to warn about features that are
 deprecated and will be changed/removed in Python 3.0.
 
@@ -217,6 +220,9 @@
 Library
 -------
 
+- tarfile.py: Improved unicode support. Unicode input names are now
+ officially supported. Added "errors" argument to the TarFile class.
+
 - urllib.ftpwrapper class now accepts an optional timeout.
 
 - shlex.split() now has an optional "posix" parameter.
Modified: python/branches/bcannon-objcap/Misc/cheatsheet
==============================================================================
--- python/branches/bcannon-objcap/Misc/cheatsheet	(original)
+++ python/branches/bcannon-objcap/Misc/cheatsheet	Tue May 29 20:35:08 2007
@@ -1370,7 +1370,7 @@
 setprofile(func) Sets a profile function for performance profiling.
 Info on exception currently being handled; this is atuple
 (exc_type, exc_value, exc_traceback).Warning: assigning the
-exc_info() traceback return value to a loca variable in a
+exc_info() traceback return value to a local variable in a
 function handling an exception will cause a circular
 reference.
 setdefaultencoding Change default Unicode encoding - defaults to 7-bit ASCII.
Modified: python/branches/bcannon-objcap/Objects/funcobject.c
==============================================================================
--- python/branches/bcannon-objcap/Objects/funcobject.c	(original)
+++ python/branches/bcannon-objcap/Objects/funcobject.c	Tue May 29 20:35:08 2007
@@ -161,10 +161,14 @@
 static PyMemberDef func_memberlist[] = {
 {"func_closure", T_OBJECT, OFF(func_closure),
 	 RESTRICTED|READONLY},
+ {"__closure__", T_OBJECT, OFF(func_closure),
+	 RESTRICTED|READONLY},
 {"func_doc", T_OBJECT, OFF(func_doc), WRITE_RESTRICTED},
 {"__doc__", T_OBJECT, OFF(func_doc), WRITE_RESTRICTED},
 {"func_globals", T_OBJECT, OFF(func_globals),
 	 RESTRICTED|READONLY},
+ {"__globals__", T_OBJECT, OFF(func_globals),
+	 RESTRICTED|READONLY},
 {"__module__", T_OBJECT, OFF(func_module), WRITE_RESTRICTED},
 {NULL} /* Sentinel */
 };
@@ -240,7 +244,7 @@
 	 * other than a code object. */
 	if (value == NULL || !PyCode_Check(value)) {
 		PyErr_SetString(PyExc_TypeError,
-				"func_code must be set to a code object");
+				"__code__ must be set to a code object");
 		return -1;
 	}
 	nfree = PyCode_GetNumFree((PyCodeObject *)value);
@@ -279,7 +283,7 @@
 	 * other than a string object. */
 	if (value == NULL || !PyString_Check(value)) {
 		PyErr_SetString(PyExc_TypeError,
-				"func_name must be set to a string object");
+				"__name__ must be set to a string object");
 		return -1;
 	}
 	tmp = op->func_name;
@@ -315,7 +319,7 @@
 		value = NULL;
 	if (value != NULL && !PyTuple_Check(value)) {
 		PyErr_SetString(PyExc_TypeError,
-				"func_defaults must be set to a tuple object");
+				"__defaults__ must be set to a tuple object");
 		return -1;
 	}
 	tmp = op->func_defaults;
@@ -327,8 +331,11 @@
 
 static PyGetSetDef func_getsetlist[] = {
 {"func_code", (getter)func_get_code, (setter)func_set_code},
+ {"__code__", (getter)func_get_code, (setter)func_set_code},
 {"func_defaults", (getter)func_get_defaults,
 	 (setter)func_set_defaults},
+ {"__defaults__", (getter)func_get_defaults,
+	 (setter)func_set_defaults},
 	{"func_dict", (getter)func_get_dict, (setter)func_set_dict},
 	{"__dict__", (getter)func_get_dict, (setter)func_set_dict},
 	{"func_name", (getter)func_get_name, (setter)func_set_name},
Modified: python/branches/bcannon-objcap/PC/WinMain.c
==============================================================================
--- python/branches/bcannon-objcap/PC/WinMain.c	(original)
+++ python/branches/bcannon-objcap/PC/WinMain.c	Tue May 29 20:35:08 2007
@@ -1,10 +1,10 @@
 /* Minimal main program -- everything is loaded from the library. */
 
+#include "Python.h"
+
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
-#include "Python.h"
-
 int WINAPI WinMain(
 HINSTANCE hInstance, /* handle to current instance */
 HINSTANCE hPrevInstance, /* handle to previous instance */
Modified: python/branches/bcannon-objcap/PC/_winreg.c
==============================================================================
--- python/branches/bcannon-objcap/PC/_winreg.c	(original)
+++ python/branches/bcannon-objcap/PC/_winreg.c	Tue May 29 20:35:08 2007
@@ -12,10 +12,10 @@
 
 */
 
-#include "windows.h"
 #include "Python.h"
 #include "structmember.h"
 #include "malloc.h" /* for alloca */
+#include "windows.h"
 
 static BOOL PyHKEY_AsHKEY(PyObject *ob, HKEY *pRes, BOOL bNoneOK);
 static PyObject *PyHKEY_FromHKEY(HKEY h);
Modified: python/branches/bcannon-objcap/PC/dl_nt.c
==============================================================================
--- python/branches/bcannon-objcap/PC/dl_nt.c	(original)
+++ python/branches/bcannon-objcap/PC/dl_nt.c	Tue May 29 20:35:08 2007
@@ -7,11 +7,9 @@
 forgotten) from the programmer.
 
 */
-#include "windows.h"
 
-/* NT and Python share these */
-#include "pyconfig.h"
 #include "Python.h"
+#include "windows.h"
 
 char dllVersionBuffer[16] = ""; // a private buffer
 
Modified: python/branches/bcannon-objcap/PC/winsound.c
==============================================================================
--- python/branches/bcannon-objcap/PC/winsound.c	(original)
+++ python/branches/bcannon-objcap/PC/winsound.c	Tue May 29 20:35:08 2007
@@ -35,9 +35,9 @@
 winsound.PlaySound(None, 0)
 */
 
+#include <Python.h>
 #include <windows.h>
 #include <mmsystem.h>
-#include <Python.h>
 #ifdef HAVE_CONIO_H
 #include <conio.h>	/* port functions on Win9x */
 #endif
Modified: python/branches/bcannon-objcap/PCbuild8/pythoncore/pythoncore.vcproj
==============================================================================
--- python/branches/bcannon-objcap/PCbuild8/pythoncore/pythoncore.vcproj	(original)
+++ python/branches/bcannon-objcap/PCbuild8/pythoncore/pythoncore.vcproj	Tue May 29 20:35:08 2007
@@ -1518,10 +1518,6 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\Modules\rgbimgmodule.c"
-				>
-			</File>
-			<File
 				RelativePath="..\..\Modules\rotatingtree.c"
 				>
 			</File>
Modified: python/branches/bcannon-objcap/Python/dynload_win.c
==============================================================================
--- python/branches/bcannon-objcap/Python/dynload_win.c	(original)
+++ python/branches/bcannon-objcap/Python/dynload_win.c	Tue May 29 20:35:08 2007
@@ -1,7 +1,6 @@
 
 /* Support for dynamic loading of extension modules */
 
-#include <windows.h>
 #ifdef HAVE_DIRECT_H
 #include <direct.h>
 #endif
@@ -9,6 +8,7 @@
 
 #include "Python.h"
 #include "importdl.h"
+#include <windows.h>
 
 const struct filedescr _PyImport_DynLoadFiletab[] = {
 #ifdef _DEBUG