1818class SreOpParser :
1919 def __init__ (self ):
2020 self ._groups = {}
21+ self .negative_lookahead : Optional [Character ] = None
2122
2223 def parse_sre (self , pattern : str , flags : int = 0 ):
2324 return self .sequence_or_singleton (sre_parse .parse (pattern , flags ))
@@ -56,6 +57,8 @@ def from_MAX_REPEAT(
5657 ) -> Union [FiniteRepeat , InfiniteRepeat , Branch , None ]:
5758 minimum , maximum , elements = data
5859 infinite = maximum is sre_constants .MAXREPEAT
60+ # TODO support negative lookahead before repeat with minimum = 0
61+ negative_lookahead = self .use_negative_lookahead ()
5962 repeatable = self .sequence_or_singleton (elements )
6063 if repeatable is None :
6164 return None
@@ -68,7 +71,30 @@ def from_MAX_REPEAT(
6871 # Interesting (starry) optional sequences as branches (ab*)? -> (ab*|)
6972 return make_branch ([repeatable , None ])
7073 if infinite :
74+ if (
75+ negative_lookahead is not None
76+ and minimum > 0
77+ and isinstance (repeatable , Character )
78+ ):
79+ return Sequence (
80+ [
81+ negative_lookahead & repeatable ,
82+ InfiniteRepeat (repeatable , minimum - 1 ),
83+ ]
84+ )
7185 return InfiniteRepeat (repeatable , minimum )
86+ if (
87+ negative_lookahead is not None
88+ and minimum > 0
89+ and maximum > 1
90+ and isinstance (repeatable , Character )
91+ ):
92+ return Sequence (
93+ [
94+ negative_lookahead & repeatable ,
95+ FiniteRepeat (repeatable , minimum - 1 , maximum - 1 ),
96+ ]
97+ )
7298 return FiniteRepeat (repeatable , minimum , maximum )
7399
74100 def from_MIN_REPEAT (self , data ):
@@ -79,30 +105,40 @@ def from_BRANCH(
79105 ) -> Union [Branch , FiniteRepeat , Character , None ]:
80106 # sre already transforms (a|b|c) -> [abc]
81107 branches = data [1 ]
82- return make_branch ([self .sequence_or_singleton (branch ) for branch in branches ])
83- 84- @staticmethod
85- def from_AT (at : SreConstant ):
108+ negative_lookahead = self .use_negative_lookahead ()
109+ processed_branches = []
110+ for branch in branches :
111+ self .negative_lookahead = negative_lookahead
112+ processed_branches .append (self .sequence_or_singleton (branch ))
113+ self .negative_lookahead = None
114+ return make_branch (processed_branches )
115+ 116+ def from_AT (self , at : SreConstant ):
86117 # TODO: handling for multiline
87118 # TODO: handling for \\b
119+ self .use_negative_lookahead ()
88120 if at is sre_constants .AT_END :
89121 return EndOfString ()
90122 return None
91123
92- @staticmethod
93- def from_ANY (_ : None ) -> Character :
124+ def from_ANY (self , _ : None ) -> Character :
125+ if negative_lookahead := self .use_negative_lookahead ():
126+ return negative_lookahead
94127 return Character .ANY ()
95128
96- @staticmethod
97- def from_LITERAL (literal : int ) -> Character :
129+ def from_LITERAL (self , literal : int ) -> Character :
130+ if negative_lookahead := self .use_negative_lookahead ():
131+ return Character .LITERAL (literal ) & negative_lookahead
98132 return Character .LITERAL (literal )
99133
100- @staticmethod
101- def from_NOT_LITERAL (not_literal : int ) -> Character :
134+ def from_NOT_LITERAL (self , not_literal : int ) -> Character :
135+ if negative_lookahead := self .use_negative_lookahead ():
136+ return (
137+ Character (literals = {not_literal }, positive = False ) & negative_lookahead
138+ )
102139 return Character (literals = {not_literal }, positive = False )
103140
104- @staticmethod
105- def from_IN (data : List [SreOp ]) -> Character :
141+ def from_IN (self , data : List [SreOp ]) -> Character :
106142 literals : Optional [Set [int ]] = None
107143 categories : Optional [Set ] = None
108144 positive = True
@@ -125,7 +161,9 @@ def from_IN(data: List[SreOp]) -> Character:
125161 categories .add (Category [in_data .name [9 :]])
126162
127163 if categories and covers_any (categories ):
128- return Character .ANY () if positive else None
164+ return self .from_ANY (None ) if positive else None
165+ if negative_lookahead := self .use_negative_lookahead ():
166+ return Character (literals , categories , positive ) & negative_lookahead
129167 return Character (literals , categories , positive )
130168
131169 def from_GROUPREF (self , ref : int ):
@@ -139,6 +177,29 @@ def from_GROUPREF_EXISTS(_) -> None:
139177 def from_ASSERT (_ ) -> None :
140178 return None # No intention to implement this properly
141179
142- @staticmethod
143- def from_ASSERT_NOT (_ ) -> None :
144- return None # No intention to implement this properly
180+ def from_ASSERT_NOT (self , data ) -> None :
181+ typ , ops = data
182+ if typ == 1 :
183+ if len (ops ) == 1 :
184+ character_op = ops [0 ]
185+ if character_op [0 ] in (
186+ sre_constants .LITERAL ,
187+ sre_constants .NOT_LITERAL ,
188+ sre_constants .IN ,
189+ ):
190+ negative_lookahead = self .use_negative_lookahead ()
191+ not_assertion = self .parse_op (* character_op )
192+ if not_assertion and (assertion := not_assertion .negate ()):
193+ self .negative_lookahead = assertion
194+ if negative_lookahead is not None :
195+ self .negative_lookahead &= negative_lookahead
196+ else :
197+ self .negative_lookahead = negative_lookahead
198+ 199+ return None # No intention to implement this fully
200+ 201+ def use_negative_lookahead (self ) -> Optional [Character ]:
202+ if self .negative_lookahead is not None :
203+ negative_lookahead = self .negative_lookahead
204+ self .negative_lookahead = None
205+ return negative_lookahead
0 commit comments