[Python-checkins] python/dist/src/Lib httplib.py,1.34.2.3,1.34.2.4 urlparse.py,1.29,1.29.4.1

2002年7月02日 13:42:52 -0700

Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv14132/Lib
Modified Files:
 Tag: release21-maint
	httplib.py urlparse.py 
Log Message:
Backport various bug fixes from trunk.
The 2.1 maintenance branch is now identical to the trunk through rev
1.54 of httplib.py.
Index: httplib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/httplib.py,v
retrieving revision 1.34.2.3
retrieving revision 1.34.2.4
diff -C2 -d -r1.34.2.3 -r1.34.2.4
*** httplib.py	9 Apr 2002 00:39:10 -0000	1.34.2.3
--- httplib.py	2 Jul 2002 20:42:50 -0000	1.34.2.4
***************
*** 67,72 ****
 """

! import socket
 import mimetools

 try:
--- 67,74 ----
 """

! import errno
 import mimetools
+ import socket
+ from urlparse import urlsplit

 try:
***************
*** 77,84 ****
 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
 "HTTPException", "NotConnected", "UnknownProtocol",
! "UnknownTransferEncoding", "IllegalKeywordArgument",
! "UnimplementedFileMode", "IncompleteRead",
! "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader",
! "ResponseNotReady", "BadStatusLine", "error"]

 HTTP_PORT = 80
--- 79,86 ----
 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
 "HTTPException", "NotConnected", "UnknownProtocol",
! "UnknownTransferEncoding", "UnimplementedFileMode",
! "IncompleteRead", "InvalidURL", "ImproperConnectionState",
! "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
! "BadStatusLine", "error"]

 HTTP_PORT = 80
***************
*** 110,118 ****
 self.will_close = _UNKNOWN # conn will close at end of response

! def begin(self):
! if self.msg is not None:
! # we've already started reading the response
! return
! 
 line = self.fp.readline()
 if self.debuglevel > 0:
--- 112,116 ----
 self.will_close = _UNKNOWN # conn will close at end of response

! def _read_status(self):
 line = self.fp.readline()
 if self.debuglevel > 0:
***************
*** 134,144 ****
 # The status code is a three-digit number
 try:
! self.status = status = int(status)
 if status < 100 or status > 999:
 raise BadStatusLine(line)
 except ValueError:
 raise BadStatusLine(line)
! self.reason = reason.strip()

 if version == 'HTTP/1.0':
 self.version = 10
--- 132,162 ----
 # The status code is a three-digit number
 try:
! status = int(status)
 if status < 100 or status > 999:
 raise BadStatusLine(line)
 except ValueError:
 raise BadStatusLine(line)
! return version, status, reason
! 
! def _begin(self):
! if self.msg is not None:
! # we've already started reading the response
! return

+ # read until we get a non-100 response
+ while 1:
+ version, status, reason = self._read_status()
+ if status != 100:
+ break
+ # skip the header from the 100 response
+ while 1:
+ skip = self.fp.readline().strip()
+ if not skip:
+ break
+ if self.debuglevel > 0:
+ print "header:", skip
+ 
+ self.status = status
+ self.reason = reason.strip()
 if version == 'HTTP/1.0':
 self.version = 10
***************
*** 151,154 ****
--- 169,173 ----

 if self.version == 9:
+ self.chunked = 0
 self.msg = mimetools.Message(StringIO())
 return
***************
*** 232,235 ****
--- 251,255 ----

 if self.chunked:
+ assert self.chunked != _UNKNOWN
 chunk_left = self.chunk_left
 value = ''
***************
*** 346,350 ****
 i = host.find(':')
 if i >= 0:
! port = int(host[i+1:])
 host = host[:i]
 else:
--- 366,373 ----
 i = host.find(':')
 if i >= 0:
! try:
! port = int(host[i+1:])
! except ValueError:
! raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 host = host[:i]
 else:
***************
*** 395,399 ****
 raise

! def putrequest(self, method, url):
 """Send a request to the server.

--- 418,422 ----
 raise

! def putrequest(self, method, url, skip_host=0):
 """Send a request to the server.

***************
*** 446,461 ****
 # Issue some standard headers for better HTTP/1.1 compliance

! # this header is issued *only* for HTTP/1.1 connections. more
! # specifically, this means it is only issued when the client uses
! # the new HTTPConnection() class. backwards-compat clients will
! # be using HTTP/1.0 and those clients may be issuing this header
! # themselves. we should NOT issue it twice; some web servers (such
! # as Apache) barf when they see two Host: headers

! # if we need a non-standard port,include it in the header
! if self.port == HTTP_PORT:
! self.putheader('Host', self.host)
! else:
! self.putheader('Host', "%s:%s" % (self.host, self.port))

 # note: we are assuming that clients will not attempt to set these
--- 469,497 ----
 # Issue some standard headers for better HTTP/1.1 compliance

! if not skip_host:
! # this header is issued *only* for HTTP/1.1
! # connections. more specifically, this means it is
! # only issued when the client uses the new
! # HTTPConnection() class. backwards-compat clients
! # will be using HTTP/1.0 and those clients may be
! # issuing this header themselves. we should NOT issue
! # it twice; some web servers (such as Apache) barf
! # when they see two Host: headers

! # If we need a non-standard port,include it in the
! # header. If the request is going through a proxy,
! # but the host of the actual URL, not the host of the
! # proxy.
! 
! netloc = ''
! if url.startswith('http'):
! nil, netloc, nil, nil, nil = urlsplit(url)
! 
! if netloc:
! self.putheader('Host', netloc)
! elif self.port == HTTP_PORT:
! self.putheader('Host', self.host)
! else:
! self.putheader('Host', "%s:%s" % (self.host, self.port))

 # note: we are assuming that clients will not attempt to set these
***************
*** 515,519 ****

 def _send_request(self, method, url, body, headers):
! self.putrequest(method, url)

 if body:
--- 551,562 ----

 def _send_request(self, method, url, body, headers):
! # If headers already contains a host header, then define the
! # optional skip_host argument to putrequest(). The check is
! # harder because field names are case insensitive.
! if 'Host' in (headers
! or [k for k in headers.iterkeys() if k.lower() == "host"]):
! self.putrequest(method, url, skip_host=1)
! else:
! self.putrequest(method, url)

 if body:
***************
*** 557,561 ****
 response = self.response_class(self.sock)

! response.begin()
 self.__state = _CS_IDLE

--- 600,605 ----
 response = self.response_class(self.sock)

! response._begin()
! assert response.will_close != _UNKNOWN
 self.__state = _CS_IDLE

***************
*** 569,572 ****
--- 613,693 ----
 return response

+ class SSLFile:
+ """File-like object wrapping an SSL socket."""
+ 
+ BUFSIZE = 8192
+ 
+ def __init__(self, sock, ssl, bufsize=None):
+ self._sock = sock
+ self._ssl = ssl
+ self._buf = ''
+ self._bufsize = bufsize or self.__class__.BUFSIZE
+ 
+ def _read(self):
+ buf = ''
+ # put in a loop so that we retry on transient errors
+ while 1:
+ try:
+ buf = self._ssl.read(self._bufsize)
+ except socket.sslerror, err:
+ if (err[0] == socket.SSL_ERROR_WANT_READ
+ or err[0] == socket.SSL_ERROR_WANT_WRITE):
+ continue
+ if (err[0] == socket.SSL_ERROR_ZERO_RETURN
+ or err[0] == socket.SSL_ERROR_EOF):
+ break
+ raise
+ except socket.error, err:
+ if err[0] == errno.EINTR:
+ continue
+ if err[0] == errno.EBADF:
+ # XXX socket was closed?
+ break
+ raise
+ else:
+ break
+ return buf
+ 
+ def read(self, size=None):
+ L = [self._buf]
+ avail = len(self._buf)
+ while size is None or avail < size:
+ s = self._read()
+ if s == '':
+ break
+ L.append(s)
+ avail += len(s)
+ all = "".join(L)
+ if size is None:
+ self._buf = ''
+ return all
+ else:
+ self._buf = all[size:]
+ return all[:size]
+ 
+ def readline(self):
+ L = [self._buf]
+ self._buf = ''
+ while 1:
+ i = L[-1].find("\n")
+ if i >= 0:
+ break
+ s = self._read()
+ if s == '':
+ break
+ L.append(s)
+ if i == -1:
+ # loop exited because there is no more data
+ return "".join(L)
+ else:
+ all = "".join(L)
+ # XXX could do enough bookkeeping not to do a 2nd search
+ i = all.find("\n") + 1
+ line = all[:i]
+ self._buf = all[i:]
+ return line
+ 
+ def close(self):
+ self._sock.close()

 class FakeSocket:
***************
*** 576,600 ****

 def makefile(self, mode, bufsize=None):
- """Return a readable file-like object with data from socket.
- 
- This method offers only partial support for the makefile
- interface of a real socket. It only supports modes 'r' and
- 'rb' and the bufsize argument is ignored.
- 
- The returned object contains *all* of the file data
- """
 if mode != 'r' and mode != 'rb':
 raise UnimplementedFileMode()
! 
! msgbuf = []
! while 1:
! try:
! buf = self.__ssl.read()
! except socket.sslerror, msg:
! break
! if buf == '':
! break
! msgbuf.append(buf)
! return StringIO("".join(msgbuf))

 def send(self, stuff, flags = 0):
--- 697,703 ----

 def makefile(self, mode, bufsize=None):
 if mode != 'r' and mode != 'rb':
 raise UnimplementedFileMode()
! return SSLFile(self.__sock, self.__ssl, bufsize)

 def send(self, stuff, flags = 0):
***************
*** 616,634 ****
 default_port = HTTPS_PORT

! def __init__(self, host, port=None, **x509):
! keys = x509.keys()
! try:
! keys.remove('key_file')
! except ValueError:
! pass
! try:
! keys.remove('cert_file')
! except ValueError:
! pass
! if keys:
! raise IllegalKeywordArgument()
 HTTPConnection.__init__(self, host, port)
! self.key_file = x509.get('key_file')
! self.cert_file = x509.get('cert_file')

 def connect(self):
--- 719,726 ----
 default_port = HTTPS_PORT

! def __init__(self, host, port=None, key_file=None, cert_file=None):
 HTTPConnection.__init__(self, host, port)
! self.key_file = key_file
! self.cert_file = cert_file

 def connect(self):
***************
*** 654,658 ****
 _connection_class = HTTPConnection

! def __init__(self, host='', port=None, **x509):
 "Provide a default host, since the superclass requires one."

--- 746,750 ----
 _connection_class = HTTPConnection

! def __init__(self, host='', port=None):
 "Provide a default host, since the superclass requires one."

***************
*** 664,679 ****
 # an error when we attempt to connect. Presumably, the client code
 # will call connect before then, with a proper host.
! self._conn = self._connection_class(host, port)
 # set up delegation to flesh out interface
! self.send = self._conn.send
! self.putrequest = self._conn.putrequest
! self.endheaders = self._conn.endheaders
! self._conn._http_vsn = self._http_vsn
! self._conn._http_vsn_str = self._http_vsn_str

! # we never actually use these for anything, but we keep them here for
! # compatibility with post-1.5.2 CVS.
! self.key_file = x509.get('key_file')
! self.cert_file = x509.get('cert_file')

 self.file = None
--- 756,772 ----
 # an error when we attempt to connect. Presumably, the client code
 # will call connect before then, with a proper host.
! self._setup(self._connection_class(host, port))
! 
! def _setup(self, conn):
! self._conn = conn
! 
 # set up delegation to flesh out interface
! self.send = conn.send
! self.putrequest = conn.putrequest
! self.endheaders = conn.endheaders
! self.set_debuglevel = conn.set_debuglevel

! conn._http_vsn = self._http_vsn
! conn._http_vsn_str = self._http_vsn_str

 self.file = None
***************
*** 686,692 ****
 self._conn.connect()

- def set_debuglevel(self, debuglevel):
- self._conn.set_debuglevel(debuglevel)
- 
 def getfile(self):
 "Provide a getfile, since the superclass' does not use this concept."
--- 779,782 ----
***************
*** 746,749 ****
--- 836,852 ----
 _connection_class = HTTPSConnection

+ def __init__(self, host='', port=None, **x509):
+ # provide a default host, pass the X509 cert info
+ 
+ # urf. compensate for bad input.
+ if port == 0:
+ port = None
+ self._setup(self._connection_class(host, port, **x509))
+ 
+ # we never actually use these for anything, but we keep them
+ # here for compatibility with post-1.5.2 CVS.
+ self.key_file = x509.get('key_file')
+ self.cert_file = x509.get('cert_file')
+ 

 class HTTPException(Exception):
***************
*** 753,756 ****
--- 856,862 ----
 pass

+ class InvalidURL(HTTPException):
+ pass
+ 
 class UnknownProtocol(HTTPException):
 def __init__(self, version):
***************
*** 760,766 ****
 pass

- class IllegalKeywordArgument(HTTPException):
- pass
- 
 class UnimplementedFileMode(HTTPException):
 pass
--- 866,869 ----
***************
*** 823,827 ****
 for header in headers.headers: print header.strip()
 print
! print h.getfile().read()

 if hasattr(socket, 'ssl'):
--- 926,941 ----
 for header in headers.headers: print header.strip()
 print
! print "read", len(h.getfile().read())
! 
! # minimal test that code to extract host from url works
! class HTTP11(HTTP):
! _http_vsn = 11
! _http_vsn_str = 'HTTP/1.1'
! 
! h = HTTP11('www.python.org')
! h.putrequest('GET', 'http://www.python.org/~jeremy/')
! h.endheaders()
! h.getreply()
! h.close()

 if hasattr(socket, 'ssl'):
***************
*** 833,836 ****
--- 947,951 ----
 hs.endheaders()
 status, reason, headers = hs.getreply()
+ # XXX why does this give a 302 response?
 print 'status =', status
 print 'reason =', reason
***************
*** 839,843 ****
 for header in headers.headers: print header.strip()
 print
! print hs.getfile().read()

--- 954,958 ----
 for header in headers.headers: print header.strip()
 print
! print "read", len(hs.getfile().read())

Index: urlparse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urlparse.py,v
retrieving revision 1.29
retrieving revision 1.29.4.1
diff -C2 -d -r1.29 -r1.29.4.1
*** urlparse.py	1 Mar 2001 04:27:19 -0000	1.29
--- urlparse.py	2 Jul 2002 20:42:50 -0000	1.29.4.1
***************
*** 44,48 ****

! def urlparse(url, scheme = '', allow_fragments = 1):
 """Parse a URL into 6 components:
 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
--- 44,48 ----

! def urlparse(url, scheme='', allow_fragments=1):
 """Parse a URL into 6 components:
 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
***************
*** 50,53 ****
--- 50,76 ----
 Note that we don't break the components up in smaller bits
 (e.g. netloc is a single string) and we don't expand % escapes."""
+ tuple = urlsplit(url, scheme, allow_fragments)
+ scheme, netloc, url, query, fragment = tuple
+ if scheme in uses_params and ';' in url:
+ url, params = _splitparams(url)
+ else:
+ params = ''
+ return scheme, netloc, url, params, query, fragment
+ 
+ def _splitparams(url):
+ if '/' in url:
+ i = url.find(';', url.rfind('/'))
+ if i < 0:
+ return url, ''
+ else:
+ i = url.find(';')
+ return url[:i], url[i+1:]
+ 
+ def urlsplit(url, scheme='', allow_fragments=1):
+ """Parse a URL into 5 components:
+ <scheme>://<netloc>/<path>?<query>#<fragment>
+ Return a 5-tuple: (scheme, netloc, path, query, fragment).
+ Note that we don't break the components up in smaller bits
+ (e.g. netloc is a single string) and we don't expand % escapes."""
 key = url, scheme, allow_fragments
 cached = _parse_cache.get(key, None)
***************
*** 56,60 ****
 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
 clear_cache()
! netloc = path = params = query = fragment = ''
 i = url.find(':')
 if i > 0:
--- 79,83 ----
 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
 clear_cache()
! netloc = query = fragment = ''
 i = url.find(':')
 if i > 0:
***************
*** 65,85 ****
 i = url.find('/', 2)
 if i < 0:
! i = len(url)
 netloc = url[2:i]
 url = url[i:]
! if allow_fragments:
! i = url.rfind('#')
! if i >= 0:
! fragment = url[i+1:]
! url = url[:i]
! i = url.find('?')
! if i >= 0:
! query = url[i+1:]
! url = url[:i]
! i = url.find(';')
! if i >= 0:
! params = url[i+1:]
! url = url[:i]
! tuple = scheme, netloc, url, params, query, fragment
 _parse_cache[key] = tuple
 return tuple
--- 88,101 ----
 i = url.find('/', 2)
 if i < 0:
! i = url.find('#')
! if i < 0:
! i = len(url)
 netloc = url[2:i]
 url = url[i:]
! if allow_fragments and '#' in url:
! url, fragment = url.split('#', 1)
! if '?' in url:
! url, query = url.split('?', 1)
! tuple = scheme, netloc, url, query, fragment
 _parse_cache[key] = tuple
 return tuple
***************
*** 95,111 ****
 i = len(url)
 netloc, url = url[2:i], url[i:]
! if allow_fragments and scheme in uses_fragment:
! i = url.rfind('#')
! if i >= 0:
! url, fragment = url[:i], url[i+1:]
! if scheme in uses_query:
! i = url.find('?')
! if i >= 0:
! url, query = url[:i], url[i+1:]
! if scheme in uses_params:
! i = url.find(';')
! if i >= 0:
! url, params = url[:i], url[i+1:]
! tuple = scheme, netloc, url, params, query, fragment
 _parse_cache[key] = tuple
 return tuple
--- 111,119 ----
 i = len(url)
 netloc, url = url[2:i], url[i:]
! if allow_fragments and scheme in uses_fragment and '#' in url:
! url, fragment = url.split('#', 1)
! if scheme in uses_query and '?' in url:
! url, query = url.split('?', 1)
! tuple = scheme, netloc, url, query, fragment
 _parse_cache[key] = tuple
 return tuple
***************
*** 116,119 ****
--- 124,132 ----
 originally had redundant delimiters, e.g. a ? with an empty query
 (the draft states that these are equivalent)."""
+ if params:
+ url = "%s;%s" % (url, params)
+ return urlunsplit((scheme, netloc, url, query, fragment))
+ 
+ def urlunsplit((scheme, netloc, url, query, fragment)):
 if netloc or (scheme in uses_netloc and url[:2] == '//'):
 if url and url[:1] != '/': url = '/' + url
***************
*** 121,126 ****
 if scheme:
 url = scheme + ':' + url
- if params:
- url = url + ';' + params
 if query:
 url = url + '?' + query
--- 134,137 ----
***************
*** 188,194 ****
 empty string.
 """
! s, n, p, a, q, frag = urlparse(url)
! defrag = urlunparse((s, n, p, a, q, ''))
! return defrag, frag

--- 199,208 ----
 empty string.
 """
! if '#' in url:
! s, n, p, a, q, frag = urlparse(url)
! defrag = urlunparse((s, n, p, a, q, ''))
! return defrag, frag
! else:
! return url, ''