[Python-checkins] python/dist/src/Lib _LWPCookieJar.py, NONE,
1.1 _MozillaCookieJar.py, NONE, 1.1 cookielib.py, NONE,
1.1 urllib2.py, 1.66, 1.67
loewis at users.sourceforge.net
loewis at users.sourceforge.net
Mon May 31 14:22:42 EDT 2004
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27715/Lib
Modified Files:
urllib2.py
Added Files:
_LWPCookieJar.py _MozillaCookieJar.py cookielib.py
Log Message:
Patch #963318: Add support for client-side cookie management.
--- NEW FILE: _LWPCookieJar.py ---
"""Load / save to libwww-perl (LWP) format files.
Actually, the format is slightly extended from that used by LWP's
(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
not recorded by LWP.
It uses the version string "2.0", though really there isn't an LWP Cookies
2.0 format. This indicates that there is extra information in here
(domain_dot and # port_spec) while still being compatible with
libwww-perl, I hope.
"""
import time, re, logging
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
MISSING_FILENAME_TEXT, join_header_words, split_header_words, \
iso2time, time2isoz
def lwp_cookie_str(cookie):
"""Return string representation of Cookie in an the LWP cookie file format.
Actually, the format is extended a bit -- see module docstring.
"""
h = [(cookie.name, cookie.value),
("path", cookie.path),
("domain", cookie.domain)]
if cookie.port is not None: h.append(("port", cookie.port))
if cookie.path_specified: h.append(("path_spec", None))
if cookie.port_specified: h.append(("port_spec", None))
if cookie.domain_initial_dot: h.append(("domain_dot", None))
if cookie.secure: h.append(("secure", None))
if cookie.expires: h.append(("expires",
time2isoz(float(cookie.expires))))
if cookie.discard: h.append(("discard", None))
if cookie.comment: h.append(("comment", cookie.comment))
if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
keys = cookie._rest.keys()
keys.sort()
for k in keys:
h.append((k, str(cookie._rest[k])))
h.append(("version", str(cookie.version)))
return join_header_words([h])
class LWPCookieJar(FileCookieJar):
"""
The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
"Set-Cookie3" is the format used by the libwww-perl libary, not known
to be compatible with any browser, but which is easy to read and
doesn't lose information about RFC 2965 cookies.
Additional methods
as_lwp_str(ignore_discard=True, ignore_expired=True)
"""
def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
"""Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
ignore_discard and ignore_expires: see docstring for FileCookieJar.save
"""
now = time.time()
r = []
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
return "\n".join(r+[""])
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
f = open(filename, "w")
try:
# There really isn't an LWP Cookies 2.0 format, but this indicates
# that there is extra information in here (domain_dot and
# port_spec) while still being compatible with libwww-perl, I hope.
f.write("#LWP-Cookies-2.0\n")
f.write(self.as_lwp_str(ignore_discard, ignore_expires))
finally:
f.close()
def _really_load(self, f, filename, ignore_discard, ignore_expires):
magic = f.readline()
if not re.search(self.magic_re, magic):
msg = "%s does not seem to contain cookies" % filename
raise IOError(msg)
now = time.time()
header = "Set-Cookie3:"
boolean_attrs = ("port_spec", "path_spec", "domain_dot",
"secure", "discard")
value_attrs = ("version",
"port", "path", "domain",
"expires",
"comment", "commenturl")
try:
while 1:
line = f.readline()
if line == "": break
if not line.startswith(header):
continue
line = line[len(header):].strip()
for data in split_header_words([line]):
name, value = data[0]
# name and value are an exception here, since a plain "foo"
# (with no "=", unlike "bar=foo") means a cookie with no
# name and value "foo". With all other cookie-attributes,
# the situation is reversed: "foo" means an attribute named
# "foo" with no value!
if value is None:
name, value = value, name
standard = {}
rest = {}
for k in boolean_attrs:
standard[k] = False
for k, v in data[1:]:
if k is not None:
lc = k.lower()
else:
lc = None
# don't lose case distinction for unknown fields
if (lc in value_attrs) or (lc in boolean_attrs):
k = lc
if k in boolean_attrs:
if v is None: v = True
standard[k] = v
elif k in value_attrs:
standard[k] = v
else:
rest[k] = v
h = standard.get
expires = h("expires")
discard = h("discard")
if expires is not None:
expires = iso2time(expires)
if expires is None:
discard = True
domain = h("domain")
domain_specified = domain.startswith(".")
c = Cookie(h("version"), name, value,
h("port"), h("port_spec"),
domain, domain_specified, h("domain_dot"),
h("path"), h("path_spec"),
h("secure"),
expires,
discard,
h("comment"),
h("commenturl"),
rest)
if not ignore_discard and c.discard:
continue
if not ignore_expires and c.is_expired(now):
continue
self.set_cookie(c)
except:
reraise_unmasked_exceptions((IOError,))
raise IOError("invalid Set-Cookie3 format file %s" % filename)
--- NEW FILE: _MozillaCookieJar.py ---
"""Mozilla / Netscape cookie loading / saving."""
import re, time, logging
from cookielib import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
MISSING_FILENAME_TEXT
class MozillaCookieJar(FileCookieJar):
"""
WARNING: you may want to backup your browser's cookies file if you use
this class to save cookies. I *think* it works, but there have been
bugs in the past!
This class differs from CookieJar only in the format it uses to save and
load cookies to and from a file. This class uses the Mozilla/Netscape
`cookies.txt' format. lynx uses this file format, too.
Don't expect cookies saved while the browser is running to be noticed by
the browser (in fact, Mozilla on unix will overwrite your saved cookies if
you change them on disk while it's running; on Windows, you probably can't
save at all while the browser is running).
Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
Netscape cookies on saving.
In particular, the cookie version and port number information is lost,
together with information about whether or not Path, Port and Discard were
specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
domain as set in the HTTP header started with a dot (yes, I'm aware some
domains in Netscape files start with a dot and some don't -- trust me, you
really don't want to know any more about this).
Note that though Mozilla and Netscape use the same format, they use
slightly different headers. The class saves cookies using the Netscape
header by default (Mozilla can cope with that).
"""
magic_re = "#( Netscape)? HTTP Cookie File"
header = """\
# Netscape HTTP Cookie File
# http://www.netscape.com/newsref/std/cookie_spec.html
# This is a generated file! Do not edit.
"""
def _really_load(self, f, filename, ignore_discard, ignore_expires):
now = time.time()
magic = f.readline()
if not re.search(self.magic_re, magic):
f.close()
raise IOError(
"%s does not look like a Netscape format cookies file" %
filename)
try:
while 1:
line = f.readline()
if line == "": break
# last field may be absent, so keep any trailing tab
if line.endswith("\n"): line = line[:-1]
# skip comments and blank lines XXX what is $ for?
if (line.strip().startswith("#") or
line.strip().startswith("$") or
line.strip() == ""):
continue
domain, domain_specified, path, secure, expires, name, value = \
line.split("\t")
secure = (secure == "TRUE")
domain_specified = (domain_specified == "TRUE")
if name == "":
name = value
value = None
initial_dot = domain.startswith(".")
assert domain_specified == initial_dot
discard = False
if expires == "":
expires = None
discard = True
# assume path_specified is false
c = Cookie(0, name, value,
None, False,
domain, domain_specified, initial_dot,
path, False,
secure,
expires,
discard,
None,
None,
{})
if not ignore_discard and c.discard:
continue
if not ignore_expires and c.is_expired(now):
continue
self.set_cookie(c)
except:
reraise_unmasked_exceptions((IOError,))
raise IOError("invalid Netscape format file %s: %s" %
(filename, line))
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
f = open(filename, "w")
try:
f.write(self.header)
now = time.time()
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
if cookie.secure: secure = "TRUE"
else: secure = "FALSE"
if cookie.domain.startswith("."): initial_dot = "TRUE"
else: initial_dot = "FALSE"
if cookie.expires is not None:
expires = str(cookie.expires)
else:
expires = ""
if cookie.value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas cookielib regards it as a
# cookie with no value.
name = ""
value = cookie.name
else:
name = cookie.name
value = cookie.value
f.write(
"\t".join([cookie.domain, initial_dot, cookie.path,
secure, expires, name, value])+
"\n")
finally:
f.close()
--- NEW FILE: cookielib.py ---
"""HTTP cookie handling for web clients.
This module has (now fairly distant) origins in Gisle Aas' Perl module
HTTP::Cookies, from the libwww-perl library.
Docstrings, comments and debug strings in this code refer to the
attributes of the HTTP cookie system as cookie-attributes, to distinguish
them clearly from Python attributes.
Class diagram (note that the classes which do not derive from
FileCookieJar are not distributed with the Python standard library, but
are available from http://wwwsearch.sf.net/):
CookieJar____
/ \ \
FileCookieJar \ \
/ | \ \ \
MozillaCookieJar | LWPCookieJar \ \
| | \
[...1717 lines suppressed...]
"""
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
self._cookies_lock.acquire()
old_state = copy.deepcopy(self._cookies)
self._cookies = {}
try:
self.load(filename, ignore_discard, ignore_expires)
except (LoadError, IOError):
self._cookies = old_state
raise
self._cookies_lock.release()
from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
from _MozillaCookieJar import MozillaCookieJar
Index: urllib2.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urllib2.py,v
retrieving revision 1.66
retrieving revision 1.67
diff -C2 -d -r1.66 -r1.67
*** urllib2.py 10 May 2004 07:35:33 -0000 1.66
--- urllib2.py 31 May 2004 18:22:40 -0000 1.67
***************
*** 107,110 ****
--- 107,111 ----
import urlparse
import bisect
+ import cookielib
try:
***************
*** 177,181 ****
class Request:
! def __init__(self, url, data=None, headers={}):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
self.__original = unwrap(url)
--- 178,183 ----
class Request:
! def __init__(self, url, data=None, headers={},
! origin_req_host=None, unverifiable=False):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
self.__original = unwrap(url)
***************
*** 189,192 ****
--- 191,198 ----
self.add_header(key, value)
self.unredirected_hdrs = {}
+ if origin_req_host is None:
+ origin_req_host = cookielib.request_host(self)
+ self.origin_req_host = origin_req_host
+ self.unverifiable = unverifiable
def __getattr__(self, attr):
***************
*** 243,246 ****
--- 249,258 ----
self.__r_host = self.__original
+ def get_origin_req_host(self):
+ return self.origin_req_host
+
+ def is_unverifiable(self):
+ return self.unverifiable
+
def add_header(self, key, val):
# useful for something like authentication
***************
*** 255,258 ****
--- 267,279 ----
header_name in self.unredirected_hdrs)
+ def get_header(self, header_name, default=None):
+ return self.headers.get(
+ header_name,
+ self.unredirected_hdrs.get(header_name, default))
+
+ def header_items(self):
+ hdrs = self.unredirected_hdrs.copy()
+ hdrs.update(self.headers)
+ return hdrs.items()
class OpenerDirector:
***************
*** 461,465 ****
class HTTPRedirectHandler(BaseHandler):
! # maximum number of redirections before assuming we're in a loop
max_redirections = 10
--- 482,490 ----
class HTTPRedirectHandler(BaseHandler):
! # maximum number of redirections to any single URL
! # this is needed because of the state that cookies introduce
! max_repeats = 4
! # maximum total number of redirections (regardless of URL) before
! # assuming we're in a loop
max_redirections = 10
***************
*** 482,486 ****
# essentially all clients do redirect in this case, so we
# do the same.
! return Request(newurl, headers=req.headers)
else:
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
--- 507,514 ----
# essentially all clients do redirect in this case, so we
# do the same.
! return Request(newurl,
! headers=req.headers,
! origin_req_host=req.get_origin_req_host(),
! unverifiable=True)
else:
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
***************
*** 491,498 ****
# attribute to the Request object.
def http_error_302(self, req, fp, code, msg, headers):
if 'location' in headers:
! newurl = headers['location']
elif 'uri' in headers:
! newurl = headers['uri']
else:
return
--- 519,528 ----
# attribute to the Request object.
def http_error_302(self, req, fp, code, msg, headers):
+ # Some servers (incorrectly) return multiple Location headers
+ # (so probably same goes for URI). Use first header.
if 'location' in headers:
! newurl = headers.getheaders('location')[0]
elif 'uri' in headers:
! newurl = headers.getheaders('uri')[0]
else:
return
***************
*** 507,524 ****
# loop detection
! # .redirect_dict has a key (url, code) if url was previously
! # visited as a result of a redirection with that code. The
! # code is needed in addition to the URL because visiting a URL
! # twice isn't necessarily a loop: there is more than one way
! # to redirect (301, 302, 303, 307, refresh).
! key = (newurl, code)
if hasattr(req, 'redirect_dict'):
visited = new.redirect_dict = req.redirect_dict
! if key in visited or len(visited) >= self.max_redirections:
raise HTTPError(req.get_full_url(), code,
self.inf_msg + msg, headers, fp)
else:
visited = new.redirect_dict = req.redirect_dict = {}
! visited[key] = None
# Don't close the fp until we are sure that we won't use it
--- 537,550 ----
# loop detection
! # .redirect_dict has a key url if url was previously visited.
if hasattr(req, 'redirect_dict'):
visited = new.redirect_dict = req.redirect_dict
! if (visited.get(newurl, 0) >= self.max_repeats or
! len(visited) >= self.max_redirections):
raise HTTPError(req.get_full_url(), code,
self.inf_msg + msg, headers, fp)
else:
visited = new.redirect_dict = req.redirect_dict = {}
! visited[newurl] = visited.get(newurl, 0) + 1
# Don't close the fp until we are sure that we won't use it
***************
*** 913,917 ****
self._debuglevel = level
! def do_request(self, request):
host = request.get_host()
if not host:
--- 939,943 ----
self._debuglevel = level
! def do_request_(self, request):
host = request.get_host()
if not host:
***************
*** 988,992 ****
return self.do_open(httplib.HTTPConnection, req)
! http_request = AbstractHTTPHandler.do_request
if hasattr(httplib, 'HTTPS'):
--- 1014,1018 ----
return self.do_open(httplib.HTTPConnection, req)
! http_request = AbstractHTTPHandler.do_request_
if hasattr(httplib, 'HTTPS'):
***************
*** 996,1000 ****
return self.do_open(httplib.HTTPSConnection, req)
! https_request = AbstractHTTPHandler.do_request
class UnknownHandler(BaseHandler):
--- 1022,1043 ----
return self.do_open(httplib.HTTPSConnection, req)
! https_request = AbstractHTTPHandler.do_request_
!
! class HTTPCookieProcessor(BaseHandler):
! def __init__(self, cookiejar=None):
! if cookiejar is None:
! cookiejar = CookieJar()
! self.cookiejar = cookiejar
!
! def http_request(self, request):
! self.cookiejar.add_cookie_header(request)
! return request
!
! def http_response(self, request, response):
! self.cookiejar.extract_cookies(response, request)
! return response
!
! https_request = http_request
! https_response = http_response
class UnknownHandler(BaseHandler):
More information about the Python-checkins
mailing list