[Python-checkins] cpython (2.7): #20288: fix handling of invalid numeric charrefs in HTMLParser.
ezio.melotti
python-checkins at python.org
Sat Feb 1 20:23:12 CET 2014
http://hg.python.org/cpython/rev/0d50b5851f38
changeset: 88886:0d50b5851f38
branch: 2.7
parent: 88883:fed468670866
user: Ezio Melotti <ezio.melotti at gmail.com>
date: Sat Feb 01 21:20:22 2014 +0200
summary:
#20288: fix handling of invalid numeric charrefs in HTMLParser.
files:
Lib/HTMLParser.py | 6 +++---
Lib/test/test_htmlparser.py | 6 ++++++
Misc/NEWS | 2 ++
3 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -195,9 +195,9 @@
i = self.updatepos(i, k)
continue
else:
- if ";" in rawdata[i:]: #bail by consuming &#
- self.handle_data(rawdata[0:2])
- i = self.updatepos(i, 2)
+ if ";" in rawdata[i:]: # bail by consuming '&#'
+ self.handle_data(rawdata[i:i+2])
+ i = self.updatepos(i, i+2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -394,6 +394,12 @@
("data", "&#bad;"),
("endtag", "p"),
])
+ # add the [] as a workaround to avoid buffering (see #20288)
+ self._run_check(["<div>&#bad;</div>"], [
+ ("starttag", "div", []),
+ ("data", "&#bad;"),
+ ("endtag", "div"),
+ ])
def test_unescape_function(self):
parser = HTMLParser.HTMLParser()
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -38,6 +38,8 @@
Library
-------
+- Issue #20288: fix handling of invalid numeric charrefs in HTMLParser.
+
- Issue #19456: ntpath.join() now joins relative paths correctly when a drive
is present.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list