[Python-checkins] cpython (merge 3.4 -> default): Issue #22410: Module level functions in the re module now cache compiled

serhiy.storchaka python-checkins at python.org
Fri Oct 31 00:04:04 CET 2014


https://hg.python.org/cpython/rev/df9c1caf3654
changeset: 93293:df9c1caf3654
parent: 93290:ff5f5fd230d3
parent: 93292:cbdc658b7797
user: Serhiy Storchaka <storchaka at gmail.com>
date: Fri Oct 31 00:56:45 2014 +0200
summary:
 Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.
files:
 Lib/re.py | 11 ++++++++-
 Lib/test/test_re.py | 37 +++++++++++++++++++++++++++++++++
 Misc/NEWS | 3 ++
 3 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/Lib/re.py b/Lib/re.py
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -122,6 +122,7 @@
 import sys
 import sre_compile
 import sre_parse
+import _locale
 
 # public symbols
 __all__ = [
@@ -274,7 +275,9 @@
 def _compile(pattern, flags):
 # internal: compile pattern
 try:
- return _cache[type(pattern), pattern, flags]
+ p, loc = _cache[type(pattern), pattern, flags]
+ if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
+ return p
 except KeyError:
 pass
 if isinstance(pattern, _pattern_type):
@@ -288,7 +291,11 @@
 if not (flags & DEBUG):
 if len(_cache) >= _MAXCACHE:
 _cache.clear()
- _cache[type(pattern), pattern, flags] = p
+ if p.flags & LOCALE:
+ loc = _locale.setlocale(_locale.LC_CTYPE)
+ else:
+ loc = None
+ _cache[type(pattern), pattern, flags] = p, loc
 return p
 
 def _compile_repl(repl, pattern):
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,6 +1,7 @@
 from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
 cpython_only, captured_stdout
 import io
+import locale
 import re
 from re import Scanner
 import sre_compile
@@ -1276,6 +1277,42 @@
 # with ignore case.
 self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
 
+ def test_locale_caching(self):
+ # Issue #22410
+ oldlocale = locale.setlocale(locale.LC_CTYPE)
+ self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
+ for loc in 'en_US.iso88591', 'en_US.utf8':
+ try:
+ locale.setlocale(locale.LC_CTYPE, loc)
+ except locale.Error:
+ # Unsupported locale on this system
+ self.skipTest('test needs %s locale' % loc)
+
+ re.purge()
+ self.check_en_US_iso88591()
+ self.check_en_US_utf8()
+ re.purge()
+ self.check_en_US_utf8()
+ self.check_en_US_iso88591()
+
+ def check_en_US_iso88591(self):
+ locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
+ self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
+ self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
+ self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
+ self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
+ self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
+ self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
+
+ def check_en_US_utf8(self):
+ locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
+ self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
+ self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
+ self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
+ self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
+ self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
+ self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
+
 
 class PatternReprTests(unittest.TestCase):
 def check(self, pattern, expected):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -180,6 +180,9 @@
 Library
 -------
 
+- Issue #22410: Module level functions in the re module now cache compiled
+ locale-dependent regular expressions taking into account the locale.
+
 - Issue #22759: Query methods on pathlib.Path() (exists(), is_dir(), etc.)
 now return False when the underlying stat call raises NotADirectoryError.
 
-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /