Index: Misc/NEWS
===================================================================
--- Misc/NEWS	(revision 64812)
+++ Misc/NEWS	(working copy)
@@ -25,6 +25,11 @@
 Library
 -------
 
+- Issue #2275: urllib2 header capitalization. Included a case-insensitive dict
+ lookup for headers interface. Headers sent to httplib will be .title()-ed
+ instead of capitalize()'d. Headers dictionary exposed and documented using
+ .headers and Request.get_header()
+
 - Issue #2683: Fix inconsistency in subprocess.Popen.communicate(): the
 argument now must be a bytes object in any case.
 
Index: Doc/library/urllib.request.rst
===================================================================
--- Doc/library/urllib.request.rst	(revision 64812)
+++ Doc/library/urllib.request.rst	(working copy)
@@ -175,13 +175,16 @@
 :func:`urllib.urlencode` function takes a mapping or sequence of 2-tuples and
 returns a string in this format.
 
- *headers* should be a dictionary, and will be treated as if :meth:`add_header`
- was called with each key and value as arguments. This is often used to "spoof"
- the ``User-Agent`` header, which is used by a browser to identify itself --
- some HTTP servers only allow requests coming from common browsers as opposed
- to scripts. For example, Mozilla Firefox may identify itself as ``"Mozilla/5.0
- (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"``, while :mod:`urllib`'s
- default user agent string is ``"Python-urllib/2.6"`` (on Python 2.6).
+ *headers* should be a dictionary, and will be treated as if
+ :meth:`add_header` was called with each key and value as arguments.
+ *headers* are internally stored as a special form of ``dict`` which provides
+ case-insensitive key lookup. Headers is often used to "spoof" the
+ ``User-Agent`` header, which is used by a browser to identify itself -- some
+ HTTP servers only allow requests coming from common browsers as opposed to
+ scripts. For example, Mozilla Firefox may identify itself as ``"Mozilla/5.0
+ (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11"``, while
+ :mod:`urllib`'s default user agent string is ``"Python-urllib/2.6"`` (on
+ Python 2.6).
 
 The final two arguments are only of interest for correct handling of third-party
 HTTP cookies:
@@ -1094,6 +1097,20 @@
 :mailheader:`Content-Type` and :mailheader:`Host`) are added when the
 :class:`Request` is passed to :func:`urlopen` (or :meth:`OpenerDirector.open`).
 
+Retriving HTTP headers:
+
+To retrive HTTP header, either use .headers parameter directly or use
+:meth:`Request.get_header` method. ::
+ 
+>>> from urllib.request import Request
+>>> url = "http://example.com"
+>>> req = Request(url, headers={"Spam-eggs": "blah"})
+>>> req.headers["Spam-eggs"]
+ 'blah'
+>>> req.headers["Spam-Eggs"]
+ 'blah'
+>>> req.get_header["Spam-Eggs"]
+ 'blah'
 .. _urllib-examples:
 
 Here is an example session that uses the ``GET`` method to retrieve a URL
Index: Lib/urllib/request.py
===================================================================
--- Lib/urllib/request.py	(revision 64812)
+++ Lib/urllib/request.py	(working copy)
@@ -158,6 +158,37 @@
 host = _cut_port_re.sub("", host, 1)
 return host.lower()
 
+class CaseInsensitiveDict(dict):
+ def __init__(self, *args, **kwargs):
+ self.keystore = {}
+ d = dict(*args, **kwargs)
+ for k in d.keys():
+ self.keystore[self._get_lower(k)] = k
+ return super(CaseInsensitiveDict, self).__init__(*args, **kwargs)
+ def __setitem__(self, k, v):
+ if hasattr(self,'keystore'):
+ self.keystore[self._get_lower(k)] = k
+ return super(CaseInsensitiveDict, self).__setitem__(k, v)
+ def __getitem__(self, k):
+ if hasattr(self,'keystore') and self._get_lower(k) in self.keystore:
+ k = self.keystore[self._get_lower(k)]
+ return super(CaseInsensitiveDict, self).__getitem__(k)
+ def __contains__(self, k):
+ if hasattr(self,'keystore') and self._get_lower(k) in self.keystore:
+ k = self.keystore[self._get_lower(k)]
+ return super(CaseInsensitiveDict, self).__contains__(k)
+ def get(self, k, failobj=None):
+ if hasattr(self,'keystore') and self._get_lower(k) in self.keystore:
+ k = self.keystore[self._get_lower(k)]
+ return super(CaseInsensitiveDict, self).get(k, failobj)
+
+ @staticmethod
+ def _get_lower(k):
+ if isinstance(k, str):
+ return k.lower()
+ else:
+ return k
+
 class Request:
 
 def __init__(self, url, data=None, headers={},
@@ -169,10 +200,10 @@
 self.host = None
 self.port = None
 self.data = data
- self.headers = {}
+ self.headers = CaseInsensitiveDict()
 for key, value in headers.items():
 self.add_header(key, value)
- self.unredirected_hdrs = {}
+ self.unredirected_hdrs = CaseInsensitiveDict()
 if origin_req_host is None:
 origin_req_host = request_host(self)
 self.origin_req_host = origin_req_host
@@ -239,11 +270,11 @@
 
 def add_header(self, key, val):
 # useful for something like authentication
- self.headers[key.capitalize()] = val
+ self.headers[key.title()] = val
 
 def add_unredirected_header(self, key, val):
 # will not be added to a redirected request
- self.unredirected_hdrs[key.capitalize()] = val
+ self.unredirected_hdrs[key.title()] = val
 
 def has_header(self, header_name):
 return (header_name in self.headers or
@@ -1002,13 +1033,13 @@
 
 if request.has_data(): # POST
 data = request.get_data()
- if not request.has_header('Content-type'):
+ if not request.has_header('Content-Type'):
 request.add_unredirected_header(
- 'Content-type',
+ 'Content-Type',
 'application/x-www-form-urlencoded')
- if not request.has_header('Content-length'):
+ if not request.has_header('Content-Length'):
 request.add_unredirected_header(
- 'Content-length', '%d' % len(data))
+ 'Content-Length', '%d' % len(data))
 
 scheme, sel = splittype(request.get_selector())
 sel_host, sel_path = splithost(sel)
Index: Lib/test/test_urllib2.py
===================================================================
--- Lib/test/test_urllib2.py	(revision 64812)
+++ Lib/test/test_urllib2.py	(working copy)
@@ -48,56 +48,55 @@
 
 def test_request_headers_dict():
 """
- The Request.headers dictionary is not a documented interface. It should
- stay that way, because the complete set of headers are only accessible
- through the .get_header(), .has_header(), .header_items() interface.
- However, .headers pre-dates those methods, and so real code will be using
- the dictionary.
+ Check CaseInsensitive Dict lookup, so that any form of key (.capitalized()
+ or .title()) can lookup the headers dict.
 
- The introduction in 2.4 of those methods was a mistake for the same reason:
- code that previously saw all (urllib2 user)-provided headers in .headers
- now sees only a subset (and the function interface is ugly and incomplete).
- A better change would have been to replace .headers dict with a dict
- subclass (or UserDict.DictMixin instance?) that preserved the .headers
- interface and also provided access to the "unredirected" headers. It's
- probably too late to fix that, though.
-
-
- Check .capitalize() case normalization:
-
>>> url = "http://example.com"
>>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
 'blah'
>>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
 'blah'
-
- Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
- but that could be changed in future.
-
+>>> Request(url, headers={"Spam-eggs":"blah"}).headers["Spam-Eggs"]
+ 'blah'
+>>> Request(url, headers={"SpaM-EggS":"blah"}).headers["spam-eggs"]
+ 'blah'
 """
 
 def test_request_headers_methods():
 """
- Note the case normalization of header names here, to .capitalize()-case.
- This should be preserved for backwards-compatibility. (In the HTTP case,
- normalization to .title()-case is done by urllib2 before sending headers to
- http.client).
+ Note the case normalization of header names here, to .title()-case
+ (#Issue2275).With the addition of case insensitive dict lookup,the backward
+ compatiblity, as in retrieval using capitalize() case is maintained.
 
>>> url = "http://example.com"
>>> r = Request(url, headers={"Spam-eggs": "blah"})
->>> r.has_header("Spam-eggs")
+>>> r.has_header("Spam-Eggs")
 True
>>> r.header_items()
- [('Spam-eggs', 'blah')]
+ [('Spam-Eggs', 'blah')]
>>> r.add_header("Foo-Bar", "baz")
>>> items = sorted(r.header_items())
>>> items
- [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
+ [('Foo-Bar', 'baz'), ('Spam-Eggs', 'blah')]
 
- Note that e.g. r.has_header("spam-EggS") is currently False, and
- r.get_header("spam-EggS") returns None, but that could be changed in
- future.
+ Examples below demonstrate Case Insensitive Dict lookup.
 
+>>> r.has_header("Spam-eggs") # .capitalize() case
+ True
+>>> r.has_header("spam-EggS")
+ True
+>>> r.has_header("sPaM-EggS")
+ True
+>>> r.get_header("Spam-eggs") # .capitalize() case
+ 'blah'
+>>> r.get_header("spam-EGGS")
+ 'blah'
+>>> r.get_header("sPaM-EggS")
+ 'blah'
+
+ Invalid and Default value scenarios
+
+
>>> r.has_header("Not-there")
 False
>>> print(r.get_header("Not-there"))
@@ -749,24 +748,24 @@
 r = MockResponse(200, "OK", {}, "")
 newreq = h.do_request_(req)
 if data is None: # GET
- self.assert_("Content-length" not in req.unredirected_hdrs)
- self.assert_("Content-type" not in req.unredirected_hdrs)
+ self.assert_("Content-Length" not in req.unredirected_hdrs)
+ self.assert_("Content-Type" not in req.unredirected_hdrs)
 else: # POST
- self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
- self.assertEqual(req.unredirected_hdrs["Content-type"],
+ self.assertEqual(req.unredirected_hdrs["Content-Length"], "0")
+ self.assertEqual(req.unredirected_hdrs["Content-Type"],
 "application/x-www-form-urlencoded")
 # XXX the details of Host could be better tested
 self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
 self.assertEqual(req.unredirected_hdrs["Spam"], "eggs")
 
 # don't clobber existing headers
- req.add_unredirected_header("Content-length", "foo")
- req.add_unredirected_header("Content-type", "bar")
+ req.add_unredirected_header("Content-Length", "foo")
+ req.add_unredirected_header("Content-Type", "bar")
 req.add_unredirected_header("Host", "baz")
 req.add_unredirected_header("Spam", "foo")
 newreq = h.do_request_(req)
- self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
- self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
+ self.assertEqual(req.unredirected_hdrs["Content-Length"], "foo")
+ self.assertEqual(req.unredirected_hdrs["Content-Type"], "bar")
 self.assertEqual(req.unredirected_hdrs["Host"], "baz")
 self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
 
@@ -940,7 +939,7 @@
 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
 opener.add_handler(auth_handler)
 opener.add_handler(http_handler)
- self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
+ self._test_basic_auth(opener, auth_handler, "Proxy-Authorization",
 realm, http_handler, password_manager,
 "http://acme.example.com:3128/protected",
 "proxy.example.com:3128",
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://bugs.python.org/file10863/issue2275-py3k.diff">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://bugs.python.org/file10863/issue2275-py3k.diff" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>