diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -270,8 +270,8 @@ - self.__starttag_text.rfind("\n") else: offset = offset + len(self.__starttag_text) - self.error("junk characters in start tag: %r" - % (rawdata[k:endpos][:20],)) + self.handle_data(rawdata[i:endpos]) + return endpos if end.endswith('/>'): # XHTML-style empty tag: self.handle_startendtag(tag, attrs) @@ -308,8 +308,10 @@ # end of input in or before attribute value, or we have the # '/' from a '/>' ending return -1 - self.updatepos(i, j) - self.error("malformed start tag") + if j> i: + return j + else: + return i + 1 raise AssertionError("we should not get here!") # Internal -- parse endtag, return end or -1 if incomplete @@ -325,7 +327,13 @@ if self.cdata_elem is not None: self.handle_data(rawdata[i:j]) return j - self.error("bad end tag: %r" % (rawdata[i:j],)) + k = rawdata.find('<', i + 1, j) + if k> i: + j = k + if j <= i: + j = i + 1 + self.handle_data(rawdata[i:j]) + return j elem = match.group(1).lower() # script or style if self.cdata_elem is not None: diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -219,12 +219,13 @@ self._run_check(["", ""], output) def test_starttag_junk_chars(self): - self._parse_error("") - self._parse_error("") + self._run_check('', + [('data', '") - self._parse_error("") self._parse_error("

AltStyle によって変換されたページ (->オリジナル) /