changeset: 88888:92b3928bfde1 parent: 88885:b1f214165471 parent: 88887:32097f193892 user: Ezio Melotti date: Sat Feb 01 21:22:26 2014 +0200 files: Lib/html/parser.py Lib/test/test_htmlparser.py Misc/NEWS description: #20288: merge with 3.3. diff -r b1f214165471 -r 92b3928bfde1 Lib/html/parser.py --- a/Lib/html/parser.py Sat Feb 01 13:49:29 2014 -0500 +++ b/Lib/html/parser.py Sat Feb 01 21:22:26 2014 +0200 @@ -264,9 +264,9 @@ i = self.updatepos(i, k) continue else: - if ";" in rawdata[i:]: #bail by consuming &# - self.handle_data(rawdata[0:2]) - i = self.updatepos(i, 2) + if ";" in rawdata[i:]: # bail by consuming &# + self.handle_data(rawdata[i:i+2]) + i = self.updatepos(i, i+2) break elif startswith('&', i): match = entityref.match(rawdata, i) diff -r b1f214165471 -r 92b3928bfde1 Lib/test/test_htmlparser.py --- a/Lib/test/test_htmlparser.py Sat Feb 01 13:49:29 2014 -0500 +++ b/Lib/test/test_htmlparser.py Sat Feb 01 21:22:26 2014 +0200 @@ -167,6 +167,12 @@ ("data", "&#bad;"), ("endtag", "p"), ]) + # add the [] as a workaround to avoid buffering (see #20288) + self._run_check(["
&#bad;
"], [ + ("starttag", "div", []), + ("data", "&#bad;"), + ("endtag", "div"), + ]) def test_unclosed_entityref(self): self._run_check("&entityref foo", [ diff -r b1f214165471 -r 92b3928bfde1 Misc/NEWS --- a/Misc/NEWS Sat Feb 01 13:49:29 2014 -0500 +++ b/Misc/NEWS Sat Feb 01 21:22:26 2014 +0200 @@ -41,6 +41,8 @@ ValueError instead of assert for forbidden subprocess_{shell,exec} arguments. (More to follow -- a convenience API for subprocesses.) +- Issue #20288: fix handling of invalid numeric charrefs in HTMLParser. + - Issue #20424: Python implementation of io.StringIO now supports lone surrogates. - Issue #20308: inspect.signature now works on classes without user-defined

AltStyle によって変換されたページ (->オリジナル) /