[Python-checkins] cpython (merge 3.3 -> default): Merge: #16983: Apply postel's law to encoded words inside quoted strings.
r.david.murray
python-checkins at python.org
Sat Feb 8 19:13:52 CET 2014
http://hg.python.org/cpython/rev/5f7e626730df
changeset: 89056:5f7e626730df
parent: 89054:2e97d3500970
parent: 89055:1dcb9d0d53a6
user: R David Murray <rdmurray at bitdance.com>
date: Sat Feb 08 13:13:01 2014 -0500
summary:
Merge: #16983: Apply postel's law to encoded words inside quoted strings.
files:
Lib/email/_header_value_parser.py | 7 +++++++
Lib/test/test_email/test__header_value_parser.py | 9 +++++++++
Lib/test/test_email/test_headerregistry.py | 10 ++++++++++
Misc/NEWS | 3 +++
4 files changed, 29 insertions(+), 0 deletions(-)
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1556,6 +1556,13 @@
while value and value[0] != '"':
if value[0] in WSP:
token, value = get_fws(value)
+ elif value[:2] == '=?':
+ try:
+ token, value = get_encoded_word(value)
+ bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+ "encoded word inside quoted string"))
+ except errors.HeaderParseError:
+ token, value = get_qcontent(value)
else:
token, value = get_qcontent(value)
bare_quoted_string.append(token)
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -540,6 +540,15 @@
self._test_get_x(parser.get_bare_quoted_string,
'""', '""', '', [], '')
+ # Issue 16983: apply postel's law to some bad encoding.
+ def test_encoded_word_inside_quotes(self):
+ self._test_get_x(parser.get_bare_quoted_string,
+ '"=?utf-8?Q?not_really_valid?="',
+ '"not really valid"',
+ 'not really valid',
+ [errors.InvalidHeaderDefect],
+ '')
+
# get_comment
def test_get_comment_only(self):
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -1155,6 +1155,16 @@
'example.com',
None),
+ 'rfc2047_atom_in_quoted_string_is_decoded':
+ ('"=?utf-8?q?=C3=89ric?=" <foo at example.com>',
+ [errors.InvalidHeaderDefect],
+ 'Éric <foo at example.com>',
+ 'Éric',
+ 'foo at example.com',
+ 'foo',
+ 'example.com',
+ None),
+
}
# XXX: Need many more examples, and in particular some with names in
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -27,6 +27,9 @@
Library
-------
+- Issue #16983: the new email header parsing code will now decode encoded words
+ that are (incorrectly) surrounded by quotes, and register a defect.
+
- Issue #19772: email.generator no longer mutates the message object when
doing a down-transform from 8bit to 7bit CTEs.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list