changeset: 77191:0869f5f47608 user: R David Murray date: Sun May 27 20:45:01 2012 -0400 files: Doc/library/email.errors.rst Lib/email/errors.py Lib/email/feedparser.py Lib/test/test_email/test_email.py Lib/test/test_email/test_parser.py Misc/NEWS description: #14925: email now registers a defect for missing header/body separator. This patch also deprecates the MalformedHeaderDefect. My best guess is that this defect was rendered obsolete by a refactoring of the parser, and the corresponding defect for the new parser (which this patch introduces) was overlooked. diff -r e10f71c8b684 -r 0869f5f47608 Doc/library/email.errors.rst --- a/Doc/library/email.errors.rst Sun May 27 17:18:28 2012 -0400 +++ b/Doc/library/email.errors.rst Sun May 27 20:45:01 2012 -0400 @@ -79,9 +79,18 @@ * :class:`MisplacedEnvelopeHeaderDefect` - A "Unix From" header was found in the middle of a header block. +* :class:`MissingHeaderBodySeparatorDefect` - A line was found while parsing + headers that had no leading white space but contained no ':'. Parsing + continues assuming that the line represents the first line of the body. + + .. versionadded: 3.3 + * :class:`MalformedHeaderDefect` -- A header was found that was missing a colon, or was otherwise malformed. + .. deprecated:: 3.3 + This defect has not been used for several Python versions. + * :class:`MultipartInvariantViolationDefect` -- A message claimed to be a :mimetype:`multipart`, but no subparts were found. Note that when a message has this defect, its :meth:`is_multipart` method may return false even though its diff -r e10f71c8b684 -r 0869f5f47608 Lib/email/errors.py --- a/Lib/email/errors.py Sun May 27 17:18:28 2012 -0400 +++ b/Lib/email/errors.py Sun May 27 20:45:01 2012 -0400 @@ -48,8 +48,10 @@ class MisplacedEnvelopeHeaderDefect(MessageDefect): """A 'Unix-from' header was found in the middle of a header block.""" -class MalformedHeaderDefect(MessageDefect): - """Found a header that was missing a colon, or was otherwise malformed.""" +class MissingHeaderBodySeparatorDefect(MessageDefect): + """Found line with no leading whitespace and no colon before blank line.""" +# XXX: backward compatibility, just in case (it was never emitted). +MalformedHeaderDefect = MissingHeaderBodySeparatorDefect class MultipartInvariantViolationDefect(MessageDefect): """A message claimed to be a multipart but no subparts were found.""" diff -r e10f71c8b684 -r 0869f5f47608 Lib/email/feedparser.py --- a/Lib/email/feedparser.py Sun May 27 17:18:28 2012 -0400 +++ b/Lib/email/feedparser.py Sun May 27 20:45:01 2012 -0400 @@ -219,6 +219,8 @@ # (i.e. newline), just throw it away. Otherwise the line is # part of the body so push it back. if not NLCRE.match(line): + defect = errors.MissingHeaderBodySeparatorDefect() + self.policy.handle_defect(self._cur, defect) self._input.unreadline(line) break headers.append(line) @@ -488,12 +490,10 @@ self._cur.defects.append(defect) continue # Split the line on the colon separating field name from value. + # There will always be a colon, because if there wasn't the part of + # the parser that calls us would have started parsing the body. i = line.find(':') - if i < 0: - defect = errors.MalformedHeaderDefect(line) - # XXX: fixme (defect not going through policy) - self._cur.defects.append(defect) - continue + assert i>0, "_parse_headers fed line with no : and no leading WS" lastheader = line[:i] lastvalue = [line] # Done with all the lines, so handle the last header. diff -r e10f71c8b684 -r 0869f5f47608 Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Sun May 27 17:18:28 2012 -0400 +++ b/Lib/test/test_email/test_email.py Sun May 27 20:45:01 2012 -0400 @@ -1960,15 +1960,27 @@ # test_parser.TestMessageDefectDetectionBase def test_first_line_is_continuation_header(self): eq = self.assertEqual - m = ' Line 1\nLine 2\nLine 3' + m = ' Line 1\nSubject: test\n\nbody' msg = email.message_from_string(m) - eq(msg.keys(), []) - eq(msg.get_payload(), 'Line 2\nLine 3') + eq(msg.keys(), ['Subject']) + eq(msg.get_payload(), 'body') eq(len(msg.defects), 1) - self.assertTrue(isinstance(msg.defects[0], - errors.FirstHeaderLineIsContinuationDefect)) + self.assertDefectsEqual(msg.defects, + [errors.FirstHeaderLineIsContinuationDefect]) eq(msg.defects[0].line, ' Line 1\n') + # test_parser.TestMessageDefectDetectionBase + def test_missing_header_body_separator(self): + # Our heuristic if we see a line that doesn't look like a header (no + # leading whitespace but no ':') is to assume that the blank line that + # separates the header from the body is missing, and to stop parsing + # headers and start parsing the body. + msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') + self.assertEqual(msg.keys(), ['Subject']) + self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') + self.assertDefectsEqual(msg.defects, + [errors.MissingHeaderBodySeparatorDefect]) + # Test RFC 2047 header encoding and decoding class TestRFC2047(TestEmailBase): diff -r e10f71c8b684 -r 0869f5f47608 Lib/test/test_email/test_parser.py --- a/Lib/test/test_email/test_parser.py Sun May 27 17:18:28 2012 -0400 +++ b/Lib/test/test_email/test_parser.py Sun May 27 20:45:01 2012 -0400 @@ -237,17 +237,33 @@ policy=self.policy.clone(raise_on_defect=True)) def test_first_line_is_continuation_header(self): - msg = self._str_msg(' Line 1\nLine 2\nLine 3') - self.assertEqual(msg.keys(), []) - self.assertEqual(msg.get_payload(), 'Line 2\nLine 3') + msg = self._str_msg(' Line 1\nSubject: test\n\nbody') + self.assertEqual(msg.keys(), ['Subject']) + self.assertEqual(msg.get_payload(), 'body') self.assertEqual(len(self.get_defects(msg)), 1) - self.assertTrue(isinstance(self.get_defects(msg)[0], - errors.FirstHeaderLineIsContinuationDefect)) + self.assertDefectsEqual(self.get_defects(msg), + [errors.FirstHeaderLineIsContinuationDefect]) self.assertEqual(self.get_defects(msg)[0].line, ' Line 1\n') def test_first_line_is_continuation_header_raise_on_defect(self): with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect): - self._str_msg(' Line 1\nLine 2\nLine 3', + self._str_msg(' Line 1\nSubject: test\n\nbody\n', + policy=self.policy.clone(raise_on_defect=True)) + + def test_missing_header_body_separator(self): + # Our heuristic if we see a line that doesn't look like a header (no + # leading whitespace but no ':') is to assume that the blank line that + # separates the header from the body is missing, and to stop parsing + # headers and start parsing the body. + msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') + self.assertEqual(msg.keys(), ['Subject']) + self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') + self.assertDefectsEqual(self.get_defects(msg), + [errors.MissingHeaderBodySeparatorDefect]) + + def test_missing_header_body_separator_raise_on_defect(self): + with self.assertRaises(errors.MissingHeaderBodySeparatorDefect): + self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n', policy=self.policy.clone(raise_on_defect=True)) diff -r e10f71c8b684 -r 0869f5f47608 Misc/NEWS --- a/Misc/NEWS Sun May 27 17:18:28 2012 -0400 +++ b/Misc/NEWS Sun May 27 20:45:01 2012 -0400 @@ -49,6 +49,10 @@ Library ------- +- Issue #14925: email now registers a defect when the parser decides that there + is a missing header/body separator line. MalformedHeaderDefect, which the + existing code would never actually generate, is deprecated. + - Issue #10365: File open dialog now works instead of crashing even when parent window is closed. Patch by Roger Serwy.

AltStyle によって変換されたページ (->オリジナル) /