Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 5354daf

Browse files
facelessuserwaylan
authored andcommitted
Fix an HTML comment parsing case that can cause an infinite loop
Fixes #1554
1 parent f39cf84 commit 5354daf

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

‎docs/changelog.md‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ and this project adheres to the
1010
[Python Version Specification](https://packaging.python.org/en/latest/specifications/version-specifiers/).
1111
See the [Contributing Guide](contributing.md) for details.
1212

13+
## [Unreleased]
14+
15+
### Fixed
16+
17+
* Fix an HTML comment parsing case in some Python versions that can cause an infinite loop (#1554).
18+
1319
## [3.9.0] - 2025年09月04日
1420

1521
### Changed

‎markdown/htmlparser.py‎

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
if TYPE_CHECKING: # pragma: no cover
3434
from markdown import Markdown
3535

36+
# Included for versions which do not have current comment fix
37+
commentclose = re.compile(r'--!?>')
38+
commentabruptclose = re.compile(r'-?>')
3639

3740
# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
3841
# Users can still do `from html import parser` and get the default behavior.
@@ -302,6 +305,22 @@ def parse_pi(self, i: int) -> int:
302305
self.handle_data('<?')
303306
return i + 2
304307

308+
if not hasattr(htmlparser, 'commentabruptclose'):
309+
# Internal -- parse comment, return length or -1 if not terminated
310+
# see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
311+
def parse_comment(self, i, report=True):
312+
rawdata = self.rawdata
313+
assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()'
314+
match = commentclose.search(rawdata, i+4)
315+
if not match:
316+
match = commentabruptclose.match(rawdata, i+4)
317+
if not match:
318+
return -1
319+
if report:
320+
j = match.start()
321+
self.handle_comment(rawdata[i+4: j])
322+
return match.end()
323+
305324
def parse_html_declaration(self, i: int) -> int:
306325
if self.at_line_start() or self.intail:
307326
if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':

‎tests/test_syntax/blocks/test_html_blocks.py‎

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1018,7 +1018,7 @@ def test_comment_in_code_block(self):
10181018
# Note: This is a change in behavior. Previously, Python-Markdown interpreted this in the same manner
10191019
# as browsers and all text after the opening comment tag was considered to be in a comment. However,
10201020
# that did not match the reference implementation. The new behavior does.
1021-
def test_unclosed_comment_(self):
1021+
def test_unclosed_comment(self):
10221022
self.assertMarkdownRenders(
10231023
self.dedent(
10241024
"""
@@ -1035,6 +1035,22 @@ def test_unclosed_comment_(self):
10351035
)
10361036
)
10371037

1038+
def test_invalid_comment_end(self):
1039+
self.assertMarkdownRenders(
1040+
self.dedent(
1041+
"""
1042+
<!-- This comment is malformed and never closes -- >
1043+
Some content after the bad comment.
1044+
"""
1045+
),
1046+
self.dedent(
1047+
"""
1048+
<p>&lt;!-- This comment is malformed and never closes -- &gt;
1049+
Some content after the bad comment.</p>
1050+
"""
1051+
)
1052+
)
1053+
10381054
def test_raw_processing_instruction_one_line(self):
10391055
self.assertMarkdownRenders(
10401056
"<?php echo '>'; ?>",

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /