[Python-checkins] cpython: Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or

Mon Dec 1 10:52:10 CET 2014

https://hg.python.org/cpython/rev/561d1d0de518
changeset: 93676:561d1d0de518
user: Serhiy Storchaka <storchaka at gmail.com>
date: Mon Dec 01 11:50:07 2014 +0200
summary:
 Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or
re.ASCII. It was newer worked.
files:
 Doc/library/re.rst | 6 ++-
 Lib/sre_parse.py | 10 ++++
 Lib/test/test_re.py | 82 ++++++++++++++++++++++++++------
 Misc/NEWS | 3 +
 4 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/Doc/library/re.rst b/Doc/library/re.rst
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -521,7 +521,11 @@
 current locale. The use of this flag is discouraged as the locale mechanism
 is very unreliable, and it only handles one "culture" at a time anyway;
 you should use Unicode matching instead, which is the default in Python 3
- for Unicode (str) patterns.
+ for Unicode (str) patterns. This flag makes sense only with bytes patterns.
+
+ .. deprecated-removed:: 3.5 3.6
+ Deprecated the use of :const:`re.LOCALE` with string patterns or
+ :const:`re.ASCII`.
 
 
 .. data:: M
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -751,6 +751,11 @@
 def fix_flags(src, flags):
 # Check and fix flags according to the type of pattern (str or bytes)
 if isinstance(src, str):
+ if flags & SRE_FLAG_LOCALE:
+ import warnings
+ warnings.warn("LOCALE flag with a str pattern is deprecated. "
+ "Will be an error in 3.6",
+ DeprecationWarning, stacklevel=6)
 if not flags & SRE_FLAG_ASCII:
 flags |= SRE_FLAG_UNICODE
 elif flags & SRE_FLAG_UNICODE:
@@ -758,6 +763,11 @@
 else:
 if flags & SRE_FLAG_UNICODE:
 raise ValueError("can't use UNICODE flag with a bytes pattern")
+ if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
+ import warnings
+ warnings.warn("ASCII and LOCALE flags are incompatible. "
+ "Will be an error in 3.6",
+ DeprecationWarning, stacklevel=6)
 return flags
 
 def parse(str, flags=0, pattern=None):
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -502,10 +502,6 @@
 "abcd abc bcd bx", re.ASCII).group(1), "bx")
 self.assertEqual(re.search(r"\B(b.)\B",
 "abc bcd bc abxd", re.ASCII).group(1), "bx")
- self.assertEqual(re.search(r"\b(b.)\b",
- "abcd abc bcd bx", re.LOCALE).group(1), "bx")
- self.assertEqual(re.search(r"\B(b.)\B",
- "abc bcd bc abxd", re.LOCALE).group(1), "bx")
 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
 self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
@@ -526,8 +522,6 @@
 b"1aa! a").group(0), b"1aa! a")
 self.assertEqual(re.search(r"\d\D\w\W\s\S",
 "1aa! a", re.ASCII).group(0), "1aa! a")
- self.assertEqual(re.search(r"\d\D\w\W\s\S",
- "1aa! a", re.LOCALE).group(0), "1aa! a")
 self.assertEqual(re.search(br"\d\D\w\W\s\S",
 b"1aa! a", re.LOCALE).group(0), b"1aa! a")
 
@@ -693,9 +687,12 @@
 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
+ self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
 
 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
 self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
+ self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
+ self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
 
 def test_not_literal(self):
 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
@@ -780,8 +777,10 @@
 self.assertEqual(re.X, re.VERBOSE)
 
 def test_flags(self):
- for flag in [re.I, re.M, re.X, re.S, re.L]:
+ for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
 self.assertTrue(re.compile('^pattern$', flag))
+ for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
+ self.assertTrue(re.compile(b'^pattern$', flag))
 
 def test_sre_character_literals(self):
 for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
@@ -1146,6 +1145,52 @@
 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
 self.assertRaises(ValueError, re.compile, '(?au)\w')
 
+ def test_locale_flag(self):
+ import locale
+ _, enc = locale.getlocale(locale.LC_CTYPE)
+ # Search non-ASCII letter
+ for i in range(128, 256):
+ try:
+ c = bytes([i]).decode(enc)
+ sletter = c.lower()
+ if sletter == c: continue
+ bletter = sletter.encode(enc)
+ if len(bletter) != 1: continue
+ if bletter.decode(enc) != sletter: continue
+ bpat = re.escape(bytes([i]))
+ break
+ except (UnicodeError, TypeError):
+ pass
+ else:
+ bletter = None
+ bpat = b'A'
+ # Bytes patterns
+ pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
+ if bletter:
+ self.assertTrue(pat.match(bletter))
+ pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
+ if bletter:
+ self.assertTrue(pat.match(bletter))
+ pat = re.compile(bpat, re.IGNORECASE)
+ if bletter:
+ self.assertIsNone(pat.match(bletter))
+ pat = re.compile(b'\w', re.LOCALE)
+ if bletter:
+ self.assertTrue(pat.match(bletter))
+ pat = re.compile(b'(?L)\w')
+ if bletter:
+ self.assertTrue(pat.match(bletter))
+ pat = re.compile(b'\w')
+ if bletter:
+ self.assertIsNone(pat.match(bletter))
+ # Incompatibilities
+ self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
+ self.assertWarns(DeprecationWarning, re.compile, '(?L)')
+ self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
+ self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
+ self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
+ self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
+
 def test_bug_6509(self):
 # Replacement strings of both types must parse properly.
 # all strings
@@ -1477,6 +1522,10 @@
 self.check_flags(b'bytes pattern', re.A,
 "re.compile(b'bytes pattern', re.ASCII)")
 
+ def test_locale(self):
+ self.check_flags(b'bytes pattern', re.L,
+ "re.compile(b'bytes pattern', re.LOCALE)")
+
 def test_quotes(self):
 self.check('random "double quoted" pattern',
 '''re.compile('random "double quoted" pattern')''')
@@ -1590,8 +1639,16 @@
 pass
 else:
 with self.subTest('bytes pattern match'):
- bpat = re.compile(bpat)
- self.assertTrue(bpat.search(bs))
+ obj = re.compile(bpat)
+ self.assertTrue(obj.search(bs))
+
+ # Try the match with LOCALE enabled, and check that it
+ # still succeeds.
+ with self.subTest('locale-sensitive match'):
+ obj = re.compile(bpat, re.LOCALE)
+ result = obj.search(bs)
+ if result is None:
+ print('=== Fails on locale-sensitive match', t)
 
 # Try the match with the search area limited to the extent
 # of the match and see if it still succeeds. \B will
@@ -1609,13 +1666,6 @@
 obj = re.compile(pattern, re.IGNORECASE)
 self.assertTrue(obj.search(s))
 
- # Try the match with LOCALE enabled, and check that it
- # still succeeds.
- if '(?u)' not in pattern:
- with self.subTest('locale-sensitive match'):
- obj = re.compile(pattern, re.LOCALE)
- self.assertTrue(obj.search(s))
-
 # Try the match with UNICODE locale enabled, and check
 # that it still succeeds.
 with self.subTest('unicode-sensitive match'):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -191,6 +191,9 @@
 Library
 -------
 
+- Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or
+ re.ASCII. It was newer worked.
+
 - Issue #22902: The "ip" command is now used on Linux to determine MAC address
 in uuid.getnode(). Pach by Bruno Cauet.
 
-- 
Repository URL: https://hg.python.org/cpython