[Python-checkins] python/dist/src/Lib _LWPCookieJar.py, NONE, 1.1 _MozillaCookieJar.py, NONE, 1.1 cookielib.py, NONE, 1.1 urllib2.py, 1.66, 1.67

Mon May 31 14:22:42 EDT 2004

Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27715/Lib
Modified Files:
	urllib2.py 
Added Files:
	_LWPCookieJar.py _MozillaCookieJar.py cookielib.py 
Log Message:
Patch #963318: Add support for client-side cookie management.
--- NEW FILE: _LWPCookieJar.py ---
"""Load / save to libwww-perl (LWP) format files.
Actually, the format is slightly extended from that used by LWP's
(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
not recorded by LWP.
It uses the version string "2.0", though really there isn't an LWP Cookies
2.0 format. This indicates that there is extra information in here
(domain_dot and # port_spec) while still being compatible with
libwww-perl, I hope.
"""
import time, re, logging
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
 MISSING_FILENAME_TEXT, join_header_words, split_header_words, \
 iso2time, time2isoz
def lwp_cookie_str(cookie):
 """Return string representation of Cookie in an the LWP cookie file format.
 Actually, the format is extended a bit -- see module docstring.
 """
 h = [(cookie.name, cookie.value),
 ("path", cookie.path),
 ("domain", cookie.domain)]
 if cookie.port is not None: h.append(("port", cookie.port))
 if cookie.path_specified: h.append(("path_spec", None))
 if cookie.port_specified: h.append(("port_spec", None))
 if cookie.domain_initial_dot: h.append(("domain_dot", None))
 if cookie.secure: h.append(("secure", None))
 if cookie.expires: h.append(("expires",
 time2isoz(float(cookie.expires))))
 if cookie.discard: h.append(("discard", None))
 if cookie.comment: h.append(("comment", cookie.comment))
 if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
 keys = cookie._rest.keys()
 keys.sort()
 for k in keys:
 h.append((k, str(cookie._rest[k])))
 h.append(("version", str(cookie.version)))
 return join_header_words([h])
class LWPCookieJar(FileCookieJar):
 """
 The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
 "Set-Cookie3" is the format used by the libwww-perl libary, not known
 to be compatible with any browser, but which is easy to read and
 doesn't lose information about RFC 2965 cookies.
 Additional methods
 as_lwp_str(ignore_discard=True, ignore_expired=True)
 """
 def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
 """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
 ignore_discard and ignore_expires: see docstring for FileCookieJar.save
 """
 now = time.time()
 r = []
 for cookie in self:
 if not ignore_discard and cookie.discard:
 continue
 if not ignore_expires and cookie.is_expired(now):
 continue
 r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
 return "\n".join(r+[""])
 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
 if filename is None:
 if self.filename is not None: filename = self.filename
 else: raise ValueError(MISSING_FILENAME_TEXT)
 f = open(filename, "w")
 try:
 # There really isn't an LWP Cookies 2.0 format, but this indicates
 # that there is extra information in here (domain_dot and
 # port_spec) while still being compatible with libwww-perl, I hope.
 f.write("#LWP-Cookies-2.0\n")
 f.write(self.as_lwp_str(ignore_discard, ignore_expires))
 finally:
 f.close()
 def _really_load(self, f, filename, ignore_discard, ignore_expires):
 magic = f.readline()
 if not re.search(self.magic_re, magic):
 msg = "%s does not seem to contain cookies" % filename
 raise IOError(msg)
 now = time.time()
 header = "Set-Cookie3:"
 boolean_attrs = ("port_spec", "path_spec", "domain_dot",
 "secure", "discard")
 value_attrs = ("version",
 "port", "path", "domain",
 "expires",
 "comment", "commenturl")
 try:
 while 1:
 line = f.readline()
 if line == "": break
 if not line.startswith(header):
 continue
 line = line[len(header):].strip()
 for data in split_header_words([line]):
 name, value = data[0]
 # name and value are an exception here, since a plain "foo"
 # (with no "=", unlike "bar=foo") means a cookie with no
 # name and value "foo". With all other cookie-attributes,
 # the situation is reversed: "foo" means an attribute named
 # "foo" with no value!
 if value is None:
 name, value = value, name
 standard = {}
 rest = {}
 for k in boolean_attrs:
 standard[k] = False
 for k, v in data[1:]:
 if k is not None:
 lc = k.lower()
 else:
 lc = None
 # don't lose case distinction for unknown fields
 if (lc in value_attrs) or (lc in boolean_attrs):
 k = lc
 if k in boolean_attrs:
 if v is None: v = True
 standard[k] = v
 elif k in value_attrs:
 standard[k] = v
 else:
 rest[k] = v
 h = standard.get
 expires = h("expires")
 discard = h("discard")
 if expires is not None:
 expires = iso2time(expires)
 if expires is None:
 discard = True
 domain = h("domain")
 domain_specified = domain.startswith(".")
 c = Cookie(h("version"), name, value,
 h("port"), h("port_spec"),
 domain, domain_specified, h("domain_dot"),
 h("path"), h("path_spec"),
 h("secure"),
 expires,
 discard,
 h("comment"),
 h("commenturl"),
 rest)
 if not ignore_discard and c.discard:
 continue
 if not ignore_expires and c.is_expired(now):
 continue
 self.set_cookie(c)
 except:
 reraise_unmasked_exceptions((IOError,))
 raise IOError("invalid Set-Cookie3 format file %s" % filename)
--- NEW FILE: _MozillaCookieJar.py ---
"""Mozilla / Netscape cookie loading / saving."""
import re, time, logging
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
 MISSING_FILENAME_TEXT
class MozillaCookieJar(FileCookieJar):
 """
 WARNING: you may want to backup your browser's cookies file if you use
 this class to save cookies. I *think* it works, but there have been
 bugs in the past!
 This class differs from CookieJar only in the format it uses to save and
 load cookies to and from a file. This class uses the Mozilla/Netscape
 `cookies.txt' format. lynx uses this file format, too.
 Don't expect cookies saved while the browser is running to be noticed by
 the browser (in fact, Mozilla on unix will overwrite your saved cookies if
 you change them on disk while it's running; on Windows, you probably can't
 save at all while the browser is running).
 Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
 Netscape cookies on saving.
 In particular, the cookie version and port number information is lost,
 together with information about whether or not Path, Port and Discard were
 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
 domain as set in the HTTP header started with a dot (yes, I'm aware some
 domains in Netscape files start with a dot and some don't -- trust me, you
 really don't want to know any more about this).
 Note that though Mozilla and Netscape use the same format, they use
 slightly different headers. The class saves cookies using the Netscape
 header by default (Mozilla can cope with that).
 """
 magic_re = "#( Netscape)? HTTP Cookie File"
 header = """\
 # Netscape HTTP Cookie File
 # http://www.netscape.com/newsref/std/cookie_spec.html
 # This is a generated file! Do not edit.
"""
 def _really_load(self, f, filename, ignore_discard, ignore_expires):
 now = time.time()
 magic = f.readline()
 if not re.search(self.magic_re, magic):
 f.close()
 raise IOError(
 "%s does not look like a Netscape format cookies file" %
 filename)
 try:
 while 1:
 line = f.readline()
 if line == "": break
 # last field may be absent, so keep any trailing tab
 if line.endswith("\n"): line = line[:-1]
 # skip comments and blank lines XXX what is $ for?
 if (line.strip().startswith("#") or
 line.strip().startswith("$") or
 line.strip() == ""):
 continue
 domain, domain_specified, path, secure, expires, name, value = \
 line.split("\t")
 secure = (secure == "TRUE")
 domain_specified = (domain_specified == "TRUE")
 if name == "":
 name = value
 value = None
 initial_dot = domain.startswith(".")
 assert domain_specified == initial_dot
 discard = False
 if expires == "":
 expires = None
 discard = True
 # assume path_specified is false
 c = Cookie(0, name, value,
 None, False,
 domain, domain_specified, initial_dot,
 path, False,
 secure,
 expires,
 discard,
 None,
 None,
 {})
 if not ignore_discard and c.discard:
 continue
 if not ignore_expires and c.is_expired(now):
 continue
 self.set_cookie(c)
 except:
 reraise_unmasked_exceptions((IOError,))
 raise IOError("invalid Netscape format file %s: %s" %
 (filename, line))
 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
 if filename is None:
 if self.filename is not None: filename = self.filename
 else: raise ValueError(MISSING_FILENAME_TEXT)
 f = open(filename, "w")
 try:
 f.write(self.header)
 now = time.time()
 for cookie in self:
 if not ignore_discard and cookie.discard:
 continue
 if not ignore_expires and cookie.is_expired(now):
 continue
 if cookie.secure: secure = "TRUE"
 else: secure = "FALSE"
 if cookie.domain.startswith("."): initial_dot = "TRUE"
 else: initial_dot = "FALSE"
 if cookie.expires is not None:
 expires = str(cookie.expires)
 else:
 expires = ""
 if cookie.value is None:
 # cookies.txt regards 'Set-Cookie: foo' as a cookie
 # with no name, whereas cookielib regards it as a
 # cookie with no value.
 name = ""
 value = cookie.name
 else:
 name = cookie.name
 value = cookie.value
 f.write(
 "\t".join([cookie.domain, initial_dot, cookie.path,
 secure, expires, name, value])+
 "\n")
 finally:
 f.close()
--- NEW FILE: cookielib.py ---
"""HTTP cookie handling for web clients.
This module has (now fairly distant) origins in Gisle Aas' Perl module
HTTP::Cookies, from the libwww-perl library.
Docstrings, comments and debug strings in this code refer to the
attributes of the HTTP cookie system as cookie-attributes, to distinguish
them clearly from Python attributes.
Class diagram (note that the classes which do not derive from
FileCookieJar are not distributed with the Python standard library, but
are available from http://wwwsearch.sf.net/):
 CookieJar____
 / \ \
 FileCookieJar \ \
 / | \ \ \
 MozillaCookieJar | LWPCookieJar \ \
 | | \
[...1717 lines suppressed...]
 """
 if filename is None:
 if self.filename is not None: filename = self.filename
 else: raise ValueError(MISSING_FILENAME_TEXT)
 self._cookies_lock.acquire()
 old_state = copy.deepcopy(self._cookies)
 self._cookies = {}
 try:
 self.load(filename, ignore_discard, ignore_expires)
 except (LoadError, IOError):
 self._cookies = old_state
 raise
 self._cookies_lock.release()
from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
from _MozillaCookieJar import MozillaCookieJar
Index: urllib2.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urllib2.py,v
retrieving revision 1.66
retrieving revision 1.67
diff -C2 -d -r1.66 -r1.67
*** urllib2.py	10 May 2004 07:35:33 -0000	1.66
--- urllib2.py	31 May 2004 18:22:40 -0000	1.67
***************
*** 107,110 ****
--- 107,111 ----
 import urlparse
 import bisect
+ import cookielib

 try:
***************
*** 177,181 ****
 class Request:

! def __init__(self, url, data=None, headers={}):
 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 self.__original = unwrap(url)
--- 178,183 ----
 class Request:

! def __init__(self, url, data=None, headers={},
! origin_req_host=None, unverifiable=False):
 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 self.__original = unwrap(url)
***************
*** 189,192 ****
--- 191,198 ----
 self.add_header(key, value)
 self.unredirected_hdrs = {}
+ if origin_req_host is None:
+ origin_req_host = cookielib.request_host(self)
+ self.origin_req_host = origin_req_host
+ self.unverifiable = unverifiable

 def __getattr__(self, attr):
***************
*** 243,246 ****
--- 249,258 ----
 self.__r_host = self.__original

+ def get_origin_req_host(self):
+ return self.origin_req_host
+ 
+ def is_unverifiable(self):
+ return self.unverifiable
+ 
 def add_header(self, key, val):
 # useful for something like authentication
***************
*** 255,258 ****
--- 267,279 ----
 header_name in self.unredirected_hdrs)

+ def get_header(self, header_name, default=None):
+ return self.headers.get(
+ header_name,
+ self.unredirected_hdrs.get(header_name, default))
+ 
+ def header_items(self):
+ hdrs = self.unredirected_hdrs.copy()
+ hdrs.update(self.headers)
+ return hdrs.items()

 class OpenerDirector:
***************
*** 461,465 ****

 class HTTPRedirectHandler(BaseHandler):
! # maximum number of redirections before assuming we're in a loop
 max_redirections = 10

--- 482,490 ----

 class HTTPRedirectHandler(BaseHandler):
! # maximum number of redirections to any single URL
! # this is needed because of the state that cookies introduce
! max_repeats = 4
! # maximum total number of redirections (regardless of URL) before
! # assuming we're in a loop
 max_redirections = 10

***************
*** 482,486 ****
 # essentially all clients do redirect in this case, so we
 # do the same.
! return Request(newurl, headers=req.headers)
 else:
 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
--- 507,514 ----
 # essentially all clients do redirect in this case, so we
 # do the same.
! return Request(newurl,
! headers=req.headers,
! origin_req_host=req.get_origin_req_host(),
! unverifiable=True)
 else:
 raise HTTPError(req.get_full_url(), code, msg, headers, fp)
***************
*** 491,498 ****
 # attribute to the Request object.
 def http_error_302(self, req, fp, code, msg, headers):
 if 'location' in headers:
! newurl = headers['location']
 elif 'uri' in headers:
! newurl = headers['uri']
 else:
 return
--- 519,528 ----
 # attribute to the Request object.
 def http_error_302(self, req, fp, code, msg, headers):
+ # Some servers (incorrectly) return multiple Location headers
+ # (so probably same goes for URI). Use first header.
 if 'location' in headers:
! newurl = headers.getheaders('location')[0]
 elif 'uri' in headers:
! newurl = headers.getheaders('uri')[0]
 else:
 return
***************
*** 507,524 ****

 # loop detection
! # .redirect_dict has a key (url, code) if url was previously
! # visited as a result of a redirection with that code. The
! # code is needed in addition to the URL because visiting a URL
! # twice isn't necessarily a loop: there is more than one way
! # to redirect (301, 302, 303, 307, refresh).
! key = (newurl, code)
 if hasattr(req, 'redirect_dict'):
 visited = new.redirect_dict = req.redirect_dict
! if key in visited or len(visited) >= self.max_redirections:
 raise HTTPError(req.get_full_url(), code,
 self.inf_msg + msg, headers, fp)
 else:
 visited = new.redirect_dict = req.redirect_dict = {}
! visited[key] = None

 # Don't close the fp until we are sure that we won't use it
--- 537,550 ----

 # loop detection
! # .redirect_dict has a key url if url was previously visited.
 if hasattr(req, 'redirect_dict'):
 visited = new.redirect_dict = req.redirect_dict
! if (visited.get(newurl, 0) >= self.max_repeats or
! len(visited) >= self.max_redirections):
 raise HTTPError(req.get_full_url(), code,
 self.inf_msg + msg, headers, fp)
 else:
 visited = new.redirect_dict = req.redirect_dict = {}
! visited[newurl] = visited.get(newurl, 0) + 1

 # Don't close the fp until we are sure that we won't use it
***************
*** 913,917 ****
 self._debuglevel = level

! def do_request(self, request):
 host = request.get_host()
 if not host:
--- 939,943 ----
 self._debuglevel = level

! def do_request_(self, request):
 host = request.get_host()
 if not host:
***************
*** 988,992 ****
 return self.do_open(httplib.HTTPConnection, req)

! http_request = AbstractHTTPHandler.do_request

 if hasattr(httplib, 'HTTPS'):
--- 1014,1018 ----
 return self.do_open(httplib.HTTPConnection, req)

! http_request = AbstractHTTPHandler.do_request_

 if hasattr(httplib, 'HTTPS'):
***************
*** 996,1000 ****
 return self.do_open(httplib.HTTPSConnection, req)

! https_request = AbstractHTTPHandler.do_request

 class UnknownHandler(BaseHandler):
--- 1022,1043 ----
 return self.do_open(httplib.HTTPSConnection, req)

! https_request = AbstractHTTPHandler.do_request_
! 
! class HTTPCookieProcessor(BaseHandler):
! def __init__(self, cookiejar=None):
! if cookiejar is None:
! cookiejar = CookieJar()
! self.cookiejar = cookiejar
! 
! def http_request(self, request):
! self.cookiejar.add_cookie_header(request)
! return request
! 
! def http_response(self, request, response):
! self.cookiejar.extract_cookies(response, request)
! return response
! 
! https_request = http_request
! https_response = http_response

 class UnknownHandler(BaseHandler):