Commit 5354daf

facelessuserwaylan

authored and

committed

Fix an HTML comment parsing case that can cause an infinite loop

1 parent f39cf84 commit 5354dafCopy full SHA for 5354daf

File tree

3 files changed

+42

-1

lines changed

docs
- changelog.md
markdown
- htmlparser.py
tests/test_syntax/blocks
- test_html_blocks.py

3 files changed

+42

-1

lines changed

`‎docs/changelog.md‎`

Lines changed: 6 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,12 @@ and this project adheres to the`
`10`	`10`	`[Python Version Specification](https://packaging.python.org/en/latest/specifications/version-specifiers/).`
`11`	`11`	`See the [Contributing Guide](contributing.md) for details.`
`12`	`12`
	`13`	`+## [Unreleased]`
	`14`	`+`
	`15`	`+### Fixed`
	`16`	`+`
	`17`	`+* Fix an HTML comment parsing case in some Python versions that can cause an infinite loop (#1554).`
	`18`	`+`
`13`	`19`	`## [3.9.0] - 2025年09月04日`
`14`	`20`
`15`	`21`	`### Changed`

`‎markdown/htmlparser.py‎`

Lines changed: 19 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,9 @@`
`33`	`33`	`if TYPE_CHECKING: # pragma: no cover`
`34`	`34`	`from markdown import Markdown`
`35`	`35`
	`36`	`+# Included for versions which do not have current comment fix`
	`37`	`+commentclose = re.compile(r'--!?>')`
	`38`	`+commentabruptclose = re.compile(r'-?>')`
`36`	`39`
`37`	`40`	# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
`38`	`41`	# Users can still do `from html import parser` and get the default behavior.
`@@ -302,6 +305,22 @@ def parse_pi(self, i: int) -> int:`
`302`	`305`	`self.handle_data('<?')`
`303`	`306`	`return i + 2`
`304`	`307`
	`308`	`+ if not hasattr(htmlparser, 'commentabruptclose'):`
	`309`	`+ # Internal -- parse comment, return length or -1 if not terminated`
	`310`	`+ # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state`
	`311`	`+ def parse_comment(self, i, report=True):`
	`312`	`+ rawdata = self.rawdata`
	`313`	`+ assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()'`
	`314`	`+ match = commentclose.search(rawdata, i+4)`
	`315`	`+ if not match:`
	`316`	`+ match = commentabruptclose.match(rawdata, i+4)`
	`317`	`+ if not match:`
	`318`	`+ return -1`
	`319`	`+ if report:`
	`320`	`+ j = match.start()`
	`321`	`+ self.handle_comment(rawdata[i+4: j])`
	`322`	`+ return match.end()`
	`323`	`+`
`305`	`324`	`def parse_html_declaration(self, i: int) -> int:`
`306`	`325`	`if self.at_line_start() or self.intail:`
`307`	`326`	`if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':`

`‎tests/test_syntax/blocks/test_html_blocks.py‎`

Lines changed: 17 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1018,7 +1018,7 @@ def test_comment_in_code_block(self):`
`1018`	`1018`	`# Note: This is a change in behavior. Previously, Python-Markdown interpreted this in the same manner`
`1019`	`1019`	`# as browsers and all text after the opening comment tag was considered to be in a comment. However,`
`1020`	`1020`	`# that did not match the reference implementation. The new behavior does.`
`1021`		`- def test_unclosed_comment_(self):`
	`1021`	`+ def test_unclosed_comment(self):`
`1022`	`1022`	`self.assertMarkdownRenders(`
`1023`	`1023`	`self.dedent(`
`1024`	`1024`	`"""`
`@@ -1035,6 +1035,22 @@ def test_unclosed_comment_(self):`
`1035`	`1035`	`)`
`1036`	`1036`	`)`
`1037`	`1037`
	`1038`	`+ def test_invalid_comment_end(self):`
	`1039`	`+ self.assertMarkdownRenders(`
	`1040`	`+ self.dedent(`
	`1041`	`+ """`
	`1042`	`+ <!-- This comment is malformed and never closes -- >`
	`1043`	`+ Some content after the bad comment.`
	`1044`	`+ """`
	`1045`	`+ ),`
	`1046`	`+ self.dedent(`
	`1047`	`+ """`
	`1048`	`+ <p><!-- This comment is malformed and never closes -- >`
	`1049`	`+ Some content after the bad comment.</p>`
	`1050`	`+ """`
	`1051`	`+ )`
	`1052`	`+ )`
	`1053`	`+`
`1038`	`1054`	`def test_raw_processing_instruction_one_line(self):`
`1039`	`1055`	`self.assertMarkdownRenders(`
`1040`	`1056`	`"<?php echo '>'; ?>",`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 5354daf

File tree

3 files changed

3 files changed

`‎docs/changelog.md‎`

`‎markdown/htmlparser.py‎`

`‎tests/test_syntax/blocks/test_html_blocks.py‎`

0 commit comments