Index: Misc/NEWS =================================================================== --- Misc/NEWS (revision 64812) +++ Misc/NEWS (working copy) @@ -40,6 +40,11 @@ Library ------- +- Issue #2275: urllib2 header capitalization. Included a case-insensitive dict + lookup for headers interface. Headers sent to httplib will be .title()-ed + instead of capitalize()'d. Headers dictionary exposed and documented using + .headers and Request.get_header() + - Issue #839496: SimpleHTTPServer used to open text files in text mode. This is both unnecessary (HTTP allows text content to be sent in several forms) and wrong because the actual transmitted size could differ with the Index: Doc/library/urllib2.rst =================================================================== --- Doc/library/urllib2.rst (revision 64812) +++ Doc/library/urllib2.rst (working copy) @@ -126,12 +126,14 @@ returns a string in this format. *headers* should be a dictionary, and will be treated as if :meth:`add_header` - was called with each key and value as arguments. This is often used to "spoof" - the ``User-Agent`` header, which is used by a browser to identify itself -- - some HTTP servers only allow requests coming from common browsers as opposed - to scripts. For example, Mozilla Firefox may identify itself as ``"Mozilla/5.0 - (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"``, while :mod:`urllib2`'s - default user agent string is ``"Python-urllib/2.6"`` (on Python 2.6). + was called with each key and value as arguments. *headers* are internally + stored as a special form of ``dict`` which provides case-insensitive key lookup + Headers are often used to "spoof" the ``User-Agent`` header, which is used + by a browser to identify itself -- some HTTP servers only allow requests + coming from common browsers as opposed to scripts. For example, Mozilla + Firefox may identify itself as ``"Mozilla/5.0 (X11; U; Linux i686) + Gecko/20071127 Firefox/2.0.0.11"``, while :mod:`urllib2`'s default user + agent string is ``"Python-urllib/2.6"`` (on Python 2.6). The final two arguments are only of interest for correct handling of third-party HTTP cookies: @@ -319,7 +321,11 @@ meaning when used more than once have a (header-specific) way of gaining the same functionality using only one header. +.. method:: Request.get_header(header_name, default=None) + Retrieve the value for the key ``header_name`` stored in *headers* dict. + *header* dict provides a case-insensitive key lookup. + .. method:: Request.add_unredirected_header(key, header) Add a header that will not be added to a redirected request. @@ -941,10 +947,24 @@ import urllib2 opener = urllib2.build_opener() - opener.addheaders = [('User-agent', 'Mozilla/5.0')] + opener.addheaders = [('User-Agent', 'Mozilla/5.0')] opener.open('http://www.example.com/') Also, remember that a few standard headers (:mailheader:`Content-Length`, :mailheader:`Content-Type` and :mailheader:`Host`) are added when the :class:`Request` is passed to :func:`urlopen` (or :meth:`OpenerDirector.open`). +Retriving HTTP headers: + +To retrive HTTP header, either use .headers parameter directly or use +:meth:`Request.get_header` method. :: + +>>> import urllib2 +>>> url = "http://example.com" +>>> req = urllib2.Request(url, headers={"Spam-eggs": "blah"}) +>>> req.headers["Spam-eggs"] + 'blah' +>>> req.headers["Spam-Eggs"] + 'blah' +>>> req.get_header["Spam-Eggs"] + 'blah' Index: Lib/urllib2.py =================================================================== --- Lib/urllib2.py (revision 64812) +++ Lib/urllib2.py (working copy) @@ -182,6 +182,37 @@ host = _cut_port_re.sub("", host, 1) return host.lower() +class CaseInsensitiveDict(dict): + def __init__(self, *args, **kwargs): + self.keystore = {} + d = dict(*args, **kwargs) + for k in d.keys(): + self.keystore[self._get_lower(k)] = k + return super(CaseInsensitiveDict, self).__init__(*args, **kwargs) + def __setitem__(self, k, v): + if hasattr(self,'keystore'): + self.keystore[self._get_lower(k)] = k + return super(CaseInsensitiveDict, self).__setitem__(k, v) + def __getitem__(self, k): + if hasattr(self,'keystore') and self._get_lower(k) in self.keystore: + k = self.keystore[self._get_lower(k)] + return super(CaseInsensitiveDict, self).__getitem__(k) + def __contains__(self, k): + if hasattr(self,'keystore') and self._get_lower(k) in self.keystore: + k = self.keystore[self._get_lower(k)] + return super(CaseInsensitiveDict, self).__contains__(k) + def get(self, k, failobj=None): + if hasattr(self,'keystore') and self._get_lower(k) in self.keystore: + k = self.keystore[self._get_lower(k)] + return super(CaseInsensitiveDict, self).get(k, failobj) + + @staticmethod + def _get_lower(k): + if isinstance(k, str): + return k.lower() + else: + return k + class Request: def __init__(self, url, data=None, headers={}, @@ -193,10 +224,10 @@ self.host = None self.port = None self.data = data - self.headers = {} + self.headers = CaseInsensitiveDict() for key, value in headers.items(): self.add_header(key, value) - self.unredirected_hdrs = {} + self.unredirected_hdrs = CaseInsensitiveDict() if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host @@ -263,11 +294,11 @@ def add_header(self, key, val): # useful for something like authentication - self.headers[key.capitalize()] = val + self.headers[key.title()] = val def add_unredirected_header(self, key, val): # will not be added to a redirected request - self.unredirected_hdrs[key.capitalize()] = val + self.unredirected_hdrs[key.title()] = val def has_header(self, header_name): return (header_name in self.headers or @@ -1037,13 +1068,13 @@ if request.has_data(): # POST data = request.get_data() - if not request.has_header('Content-type'): + if not request.has_header('Content-Type'): request.add_unredirected_header( - 'Content-type', + 'Content-Type', 'application/x-www-form-urlencoded') - if not request.has_header('Content-length'): + if not request.has_header('Content-Length'): request.add_unredirected_header( - 'Content-length', '%d' % len(data)) + 'Content-Length', '%d' % len(data)) scheme, sel = splittype(request.get_selector()) sel_host, sel_path = splithost(sel) Index: Lib/test/test_urllib2.py =================================================================== --- Lib/test/test_urllib2.py (revision 64812) +++ Lib/test/test_urllib2.py (working copy) @@ -49,57 +49,56 @@ def test_request_headers_dict(): """ - The Request.headers dictionary is not a documented interface. It should - stay that way, because the complete set of headers are only accessible - through the .get_header(), .has_header(), .header_items() interface. - However, .headers pre-dates those methods, and so real code will be using - the dictionary. + Check CaseInsensitive Dict lookup, so that any form of key (.capitalized() + or .title()) can lookup the headers dict. - The introduction in 2.4 of those methods was a mistake for the same reason: - code that previously saw all (urllib2 user)-provided headers in .headers - now sees only a subset (and the function interface is ugly and incomplete). - A better change would have been to replace .headers dict with a dict - subclass (or UserDict.DictMixin instance?) that preserved the .headers - interface and also provided access to the "unredirected" headers. It's - probably too late to fix that, though. - - - Check .capitalize() case normalization: - >>> url = "http://example.com" >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"] 'blah' >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"] 'blah' - - Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError, - but that could be changed in future. - +>>> Request(url, headers={"Spam-eggs":"blah"}).headers["Spam-Eggs"] + 'blah' +>>> Request(url, headers={"SpaM-EggS":"blah"}).headers["spam-eggs"] + 'blah' """ def test_request_headers_methods(): """ - Note the case normalization of header names here, to .capitalize()-case. - This should be preserved for backwards-compatibility. (In the HTTP case, - normalization to .title()-case is done by urllib2 before sending headers to - httplib). + Note the case normalization of header names here, to .title()-case + (#Issue2275).With the addition of case insensitive dict lookup,the backward + compatiblity, as in retrieval using capitalize() case is maintained. + >>> url = "http://example.com" >>> r = Request(url, headers={"Spam-eggs": "blah"}) ->>> r.has_header("Spam-eggs") +>>> r.has_header("Spam-Eggs") True >>> r.header_items() - [('Spam-eggs', 'blah')] + [('Spam-Eggs', 'blah')] >>> r.add_header("Foo-Bar", "baz") >>> items = r.header_items() >>> items.sort() >>> items - [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')] + [('Foo-Bar', 'baz'), ('Spam-Eggs', 'blah')] - Note that e.g. r.has_header("spam-EggS") is currently False, and - r.get_header("spam-EggS") returns None, but that could be changed in - future. + Examples below demonstrate Case Insensitive Dict lookup. +>>> r.has_header("Spam-eggs") # .capitalize() case + True +>>> r.has_header("spam-EggS") + True +>>> r.has_header("sPaM-EggS") + True +>>> r.get_header("Spam-eggs") # .capitalize() case + 'blah' +>>> r.get_header("spam-EGGS") + 'blah' +>>> r.get_header("sPaM-EggS") + 'blah' + + Invalid and Default value scenarios + >>> r.has_header("Not-there") False >>> print r.get_header("Not-there") @@ -751,24 +750,24 @@ r = MockResponse(200, "OK", {}, "") newreq = h.do_request_(req) if data is None: # GET - self.assert_("Content-length" not in req.unredirected_hdrs) - self.assert_("Content-type" not in req.unredirected_hdrs) + self.assert_("Content-Length" not in req.unredirected_hdrs) + self.assert_("Content-Type" not in req.unredirected_hdrs) else: # POST - self.assertEqual(req.unredirected_hdrs["Content-length"], "0") - self.assertEqual(req.unredirected_hdrs["Content-type"], + self.assertEqual(req.unredirected_hdrs["Content-Length"], "0") + self.assertEqual(req.unredirected_hdrs["Content-Type"], "application/x-www-form-urlencoded") # XXX the details of Host could be better tested self.assertEqual(req.unredirected_hdrs["Host"], "example.com") self.assertEqual(req.unredirected_hdrs["Spam"], "eggs") # don't clobber existing headers - req.add_unredirected_header("Content-length", "foo") - req.add_unredirected_header("Content-type", "bar") + req.add_unredirected_header("Content-Length", "foo") + req.add_unredirected_header("Content-Type", "bar") req.add_unredirected_header("Host", "baz") req.add_unredirected_header("Spam", "foo") newreq = h.do_request_(req) - self.assertEqual(req.unredirected_hdrs["Content-length"], "foo") - self.assertEqual(req.unredirected_hdrs["Content-type"], "bar") + self.assertEqual(req.unredirected_hdrs["Content-Length"], "foo") + self.assertEqual(req.unredirected_hdrs["Content-Type"], "bar") self.assertEqual(req.unredirected_hdrs["Host"], "baz") self.assertEqual(req.unredirected_hdrs["Spam"], "foo") @@ -943,7 +942,7 @@ 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm) opener.add_handler(auth_handler) opener.add_handler(http_handler) - self._test_basic_auth(opener, auth_handler, "Proxy-authorization", + self._test_basic_auth(opener, auth_handler, "Proxy-Authorization", realm, http_handler, password_manager, "http://acme.example.com:3128/protected", "proxy.example.com:3128",

AltStyle によって変換されたページ (->オリジナル) /