[Python-checkins] bpo-47080: Use atomic groups to simplify fnmatch (GH-32029)

tim-one webhook-mailer at python.org
Mon Mar 21 13:49:53 EDT 2022


https://github.com/python/cpython/commit/5c3201e146b251017cd77202015f47912ddcb980
commit: 5c3201e146b251017cd77202015f47912ddcb980
branch: main
author: Tim Peters <tim.peters at gmail.com>
committer: tim-one <tim.peters at gmail.com>
date: 2022年03月21日T12:49:43-05:00
summary:
bpo-47080: Use atomic groups to simplify fnmatch (GH-32029)
Use re's new atomic groups to greatly simplify the construction of worst-case linear-time patterns.
files:
M Lib/fnmatch.py
M Lib/test/test_fnmatch.py
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 239c7490d49ee..0f5a41ac06f3e 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -16,12 +16,6 @@
 
 __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
 
-# Build a thread-safe incrementing counter to help create unique regexp group
-# names across calls.
-from itertools import count
-_nextgroupnum = count().__next__
-del count
-
 def fnmatch(name, pat):
 """Test whether FILENAME matches PATTERN.
 
@@ -149,17 +143,10 @@ def translate(pat):
 # Now deal with STAR fixed STAR fixed ...
 # For an interior `STAR fixed` pairing, we want to do a minimal
 # .*? match followed by `fixed`, with no possibility of backtracking.
- # We can't spell that directly, but can trick it into working by matching
- # .*?fixed
- # in a lookahead assertion, save the matched part in a group, then
- # consume that group via a backreference. If the overall match fails,
- # the lookahead assertion won't try alternatives. So the translation is:
- # (?=(?P<name>.*?fixed))(?P=name)
- # Group names are created as needed: g0, g1, g2, ...
- # The numbers are obtained from _nextgroupnum() to ensure they're unique
- # across calls and across threads. This is because people rely on the
- # undocumented ability to join multiple translate() results together via
- # "|" to build large regexps matching "one of many" shell patterns.
+ # Atomic groups ("(?>...)") allow us to spell that directly.
+ # Note: people rely on the undocumented ability to join multiple
+ # translate() results together via "|" to build large regexps matching
+ # "one of many" shell patterns.
 while i < n:
 assert inp[i] is STAR
 i += 1
@@ -176,8 +163,7 @@ def translate(pat):
 add(".*")
 add(fixed)
 else:
- groupnum = _nextgroupnum()
- add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
+ add(f"(?>.*?{fixed})")
 assert i == n
 res = "".join(res)
 return fr'(?s:{res})\Z'
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 10668e4f6103a..ca695d6f3f019 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -124,17 +124,9 @@ def test_translate(self):
 self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
 # fancy translation to prevent exponential-time match failure
 t = translate('**a*a****a')
- digits = re.findall(r'\d+', t)
- self.assertEqual(len(digits), 4)
- self.assertEqual(digits[0], digits[1])
- self.assertEqual(digits[2], digits[3])
- g1 = f"g{digits[0]}" # e.g., group name "g4"
- g2 = f"g{digits[2]}" # e.g., group name "g5"
- self.assertEqual(t,
- fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
+ self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z')
 # and try pasting multiple translate results - it's an undocumented
- # feature that this works; all the pain of generating unique group
- # names across calls exists to support this
+ # feature that this works
 r1 = translate('**a**a**a*')
 r2 = translate('**b**b**b*')
 r3 = translate('*c*c*c*')


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /