Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit bc09813

Browse files
committed
Initial negative lookahead support
Removes some false positives.
1 parent d48af06 commit bc09813

File tree

5 files changed

+110
-17
lines changed

5 files changed

+110
-17
lines changed

‎regexploit/ast/sre.py‎

Lines changed: 77 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
class SreOpParser:
1919
def __init__(self):
2020
self._groups = {}
21+
self.negative_lookahead: Optional[Character] = None
2122

2223
def parse_sre(self, pattern: str, flags: int = 0):
2324
return self.sequence_or_singleton(sre_parse.parse(pattern, flags))
@@ -56,6 +57,8 @@ def from_MAX_REPEAT(
5657
) -> Union[FiniteRepeat, InfiniteRepeat, Branch, None]:
5758
minimum, maximum, elements = data
5859
infinite = maximum is sre_constants.MAXREPEAT
60+
# TODO support negative lookahead before repeat with minimum = 0
61+
negative_lookahead = self.use_negative_lookahead()
5962
repeatable = self.sequence_or_singleton(elements)
6063
if repeatable is None:
6164
return None
@@ -68,7 +71,30 @@ def from_MAX_REPEAT(
6871
# Interesting (starry) optional sequences as branches (ab*)? -> (ab*|)
6972
return make_branch([repeatable, None])
7073
if infinite:
74+
if (
75+
negative_lookahead is not None
76+
and minimum > 0
77+
and isinstance(repeatable, Character)
78+
):
79+
return Sequence(
80+
[
81+
negative_lookahead & repeatable,
82+
InfiniteRepeat(repeatable, minimum - 1),
83+
]
84+
)
7185
return InfiniteRepeat(repeatable, minimum)
86+
if (
87+
negative_lookahead is not None
88+
and minimum > 0
89+
and maximum > 1
90+
and isinstance(repeatable, Character)
91+
):
92+
return Sequence(
93+
[
94+
negative_lookahead & repeatable,
95+
FiniteRepeat(repeatable, minimum - 1, maximum - 1),
96+
]
97+
)
7298
return FiniteRepeat(repeatable, minimum, maximum)
7399

74100
def from_MIN_REPEAT(self, data):
@@ -79,30 +105,40 @@ def from_BRANCH(
79105
) -> Union[Branch, FiniteRepeat, Character, None]:
80106
# sre already transforms (a|b|c) -> [abc]
81107
branches = data[1]
82-
return make_branch([self.sequence_or_singleton(branch) for branch in branches])
83-
84-
@staticmethod
85-
def from_AT(at: SreConstant):
108+
negative_lookahead = self.use_negative_lookahead()
109+
processed_branches = []
110+
for branch in branches:
111+
self.negative_lookahead = negative_lookahead
112+
processed_branches.append(self.sequence_or_singleton(branch))
113+
self.negative_lookahead = None
114+
return make_branch(processed_branches)
115+
116+
def from_AT(self, at: SreConstant):
86117
# TODO: handling for multiline
87118
# TODO: handling for \\b
119+
self.use_negative_lookahead()
88120
if at is sre_constants.AT_END:
89121
return EndOfString()
90122
return None
91123

92-
@staticmethod
93-
def from_ANY(_: None) -> Character:
124+
def from_ANY(self, _: None) -> Character:
125+
if negative_lookahead := self.use_negative_lookahead():
126+
return negative_lookahead
94127
return Character.ANY()
95128

96-
@staticmethod
97-
def from_LITERAL(literal: int) -> Character:
129+
def from_LITERAL(self, literal: int) -> Character:
130+
if negative_lookahead := self.use_negative_lookahead():
131+
return Character.LITERAL(literal) & negative_lookahead
98132
return Character.LITERAL(literal)
99133

100-
@staticmethod
101-
def from_NOT_LITERAL(not_literal: int) -> Character:
134+
def from_NOT_LITERAL(self, not_literal: int) -> Character:
135+
if negative_lookahead := self.use_negative_lookahead():
136+
return (
137+
Character(literals={not_literal}, positive=False) & negative_lookahead
138+
)
102139
return Character(literals={not_literal}, positive=False)
103140

104-
@staticmethod
105-
def from_IN(data: List[SreOp]) -> Character:
141+
def from_IN(self, data: List[SreOp]) -> Character:
106142
literals: Optional[Set[int]] = None
107143
categories: Optional[Set] = None
108144
positive = True
@@ -125,7 +161,9 @@ def from_IN(data: List[SreOp]) -> Character:
125161
categories.add(Category[in_data.name[9:]])
126162

127163
if categories and covers_any(categories):
128-
return Character.ANY() if positive else None
164+
return self.from_ANY(None) if positive else None
165+
if negative_lookahead := self.use_negative_lookahead():
166+
return Character(literals, categories, positive) & negative_lookahead
129167
return Character(literals, categories, positive)
130168

131169
def from_GROUPREF(self, ref: int):
@@ -139,6 +177,29 @@ def from_GROUPREF_EXISTS(_) -> None:
139177
def from_ASSERT(_) -> None:
140178
return None # No intention to implement this properly
141179

142-
@staticmethod
143-
def from_ASSERT_NOT(_) -> None:
144-
return None # No intention to implement this properly
180+
def from_ASSERT_NOT(self, data) -> None:
181+
typ, ops = data
182+
if typ == 1:
183+
if len(ops) == 1:
184+
character_op = ops[0]
185+
if character_op[0] in (
186+
sre_constants.LITERAL,
187+
sre_constants.NOT_LITERAL,
188+
sre_constants.IN,
189+
):
190+
negative_lookahead = self.use_negative_lookahead()
191+
not_assertion = self.parse_op(*character_op)
192+
if not_assertion and (assertion := not_assertion.negate()):
193+
self.negative_lookahead = assertion
194+
if negative_lookahead is not None:
195+
self.negative_lookahead &= negative_lookahead
196+
else:
197+
self.negative_lookahead = negative_lookahead
198+
199+
return None # No intention to implement this fully
200+
201+
def use_negative_lookahead(self) -> Optional[Character]:
202+
if self.negative_lookahead is not None:
203+
negative_lookahead = self.negative_lookahead
204+
self.negative_lookahead = None
205+
return negative_lookahead

‎regexploit/bin/regexploit_js.py‎

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os.path
44
import io
55
import json
6+
import re
67
import subprocess
78
import sys
89
import traceback
@@ -27,10 +28,17 @@ def handle_line_from_node(line: str, output: TextOutput):
2728
parsed = SreOpParser().parse_sre(pattern)
2829
except:
2930
try:
30-
parsed = SreOpParser().parse_sre(fix_js_regex(pattern))
31+
fixed = fix_js_regex(pattern)
32+
re.compile(fixed)
3133
except:
3234
print(f"Error parsing: {pattern} from {filename}\n")
3335
return
36+
try:
37+
parsed = SreOpParser().parse_sre(fixed)
38+
except:
39+
print(f"Error in regexploit parsing: {pattern} from {filename}")
40+
print(traceback.format_exc())
41+
return
3442
output.next()
3543
try:
3644
for redos in find(parsed):

‎tests/test_character.py‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,5 +95,11 @@ def test_category_category_covers_all():
9595
assert from_regex(r"[\Dd\d]").is_any is True
9696

9797

98+
def test_negative_lookahead():
99+
assert SreOpParser().parse_sre(r"(?![0248])(?!6)(?!a)(?!xyz123)\d") == from_regex(
100+
r"[13579]"
101+
)
102+
103+
98104
def test_category_category_covers_none():
99105
assert SreOpParser().parse_sre(r"[^x0-9\w\W]") is None

‎tests/test_redos.py‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,11 @@ def test_optional_starry():
202202
r = rs[0]
203203
assert r.starriness == 4
204204
assert r.repeated_character == from_regex(r"\d")
205+
206+
207+
def test_negative_lookahead():
208+
# The final (?!c) isn't actually doing anything yet
209+
rs = find_redos(r"[abc]+(?!c)[abc]+(?!b)([abc]+[abc])(?!c)[abc]*x")
210+
r = rs[0]
211+
assert r.starriness == 4
212+
assert r.repeated_character == from_regex(r"a")

‎tests/test_repeat.py‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ def test_character_class():
4141
assert r.exact_character_class() == from_regex(r"a")
4242

4343

44+
def test_negative_lookahead_infinite():
45+
r = SreOpParser().parse_sre(r"(?!b)[a-d]+")
46+
assert r == SreOpParser().parse_sre(r"[acd][a-d]*")
47+
48+
49+
def test_negative_lookahead_finite():
50+
r = SreOpParser().parse_sre(r"(?!b)[a-d]{1,3}")
51+
assert r == SreOpParser().parse_sre(r"[acd][a-d]{0,2}")
52+
53+
4454
def test_exponential_starriness():
4555
r = from_regex(r"(?:(?:a{4,})*)+")
4656
assert r.starriness == 111 # ((1 * 10) * 10) + 1

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /