[Python-checkins] cpython: Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in
serhiy.storchaka
python-checkins at python.org
Tue Sep 23 22:26:02 CEST 2014
https://hg.python.org/cpython/rev/3b32f495fb38
changeset: 92550:3b32f495fb38
user: Serhiy Storchaka <storchaka at gmail.com>
date: Tue Sep 23 23:22:41 2014 +0300
summary:
Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in
regular expressions.
files:
Lib/sre_parse.py | 20 ++++++++++++++++----
Lib/test/test_re.py | 12 +++++++-----
Misc/NEWS | 3 +++
3 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -295,7 +295,11 @@
elif c in OCTDIGITS:
# octal escape (up to three digits)
escape += source.getwhile(2, OCTDIGITS)
- return LITERAL, int(escape[1:], 8) & 0xff
+ c = int(escape[1:], 8)
+ if c > 0o377:
+ raise error('octal escape value %r outside of '
+ 'range 0-0o377' % escape)
+ return LITERAL, c
elif c in DIGITS:
raise ValueError
if len(escape) == 2:
@@ -337,7 +341,7 @@
elif c == "0":
# octal escape
escape += source.getwhile(2, OCTDIGITS)
- return LITERAL, int(escape[1:], 8) & 0xff
+ return LITERAL, int(escape[1:], 8)
elif c in DIGITS:
# octal escape *or* decimal group reference (sigh)
if source.next in DIGITS:
@@ -346,7 +350,11 @@
source.next in OCTDIGITS):
# got three octal digits; this is an octal escape
escape = escape + source.get()
- return LITERAL, int(escape[1:], 8) & 0xff
+ c = int(escape[1:], 8)
+ if c > 0o377:
+ raise error('octal escape value %r outside of '
+ 'range 0-0o377' % escape)
+ return LITERAL, c
# not an octal escape, so this is a group reference
group = int(escape[1:])
if group < state.groups:
@@ -837,7 +845,11 @@
s.next in OCTDIGITS):
this += sget()
isoctal = True
- lappend(chr(int(this[1:], 8) & 0xff))
+ c = int(this[1:], 8)
+ if c > 0o377:
+ raise error('octal escape value %r outside of '
+ 'range 0-0o377' % this)
+ lappend(chr(c))
if not isoctal:
addgroup(int(this[1:]))
else:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -154,8 +154,8 @@
self.assertEqual(re.sub('x', r'09円', 'x'), '0円' + '9')
self.assertEqual(re.sub('x', r'0円a', 'x'), '0円' + 'a')
- self.assertEqual(re.sub('x', r'400円', 'x'), '0円')
- self.assertEqual(re.sub('x', r'777円', 'x'), '377円')
+ self.assertRaises(re.error, re.sub, 'x', r'400円', 'x')
+ self.assertRaises(re.error, re.sub, 'x', r'777円', 'x')
self.assertRaises(re.error, re.sub, 'x', r'1円', 'x')
self.assertRaises(re.error, re.sub, 'x', r'8円', 'x')
@@ -700,7 +700,7 @@
self.assertTrue(re.match(r"08円", "0008円"))
self.assertTrue(re.match(r"01円", "001円"))
self.assertTrue(re.match(r"018円", "0018円"))
- self.assertTrue(re.match(r"567円", chr(0o167)))
+ self.assertRaises(re.error, re.match, r"567円", "")
self.assertRaises(re.error, re.match, r"911円", "")
self.assertRaises(re.error, re.match, r"\x1", "")
self.assertRaises(re.error, re.match, r"\x1z", "")
@@ -728,12 +728,13 @@
self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
- self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
+ self.assertRaises(re.error, re.match, r"[567円]", "")
self.assertRaises(re.error, re.match, r"[911円]", "")
self.assertRaises(re.error, re.match, r"[\x1z]", "")
self.assertRaises(re.error, re.match, r"[\u123z]", "")
self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
self.assertRaises(re.error, re.match, r"[\U00110000]", "")
+ self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
def test_sre_byte_literals(self):
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
@@ -749,7 +750,7 @@
self.assertTrue(re.match(br"08円", b"0008円"))
self.assertTrue(re.match(br"01円", b"001円"))
self.assertTrue(re.match(br"018円", b"0018円"))
- self.assertTrue(re.match(br"567円", bytes([0o167])))
+ self.assertRaises(re.error, re.match, br"567円", b"")
self.assertRaises(re.error, re.match, br"911円", b"")
self.assertRaises(re.error, re.match, br"\x1", b"")
self.assertRaises(re.error, re.match, br"\x1z", b"")
@@ -766,6 +767,7 @@
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
self.assertTrue(re.match(br"[\u]", b'u'))
self.assertTrue(re.match(br"[\U]", b'U'))
+ self.assertRaises(re.error, re.match, br"[567円]", b"")
self.assertRaises(re.error, re.match, br"[911円]", b"")
self.assertRaises(re.error, re.match, br"[\x1z]", b"")
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -137,6 +137,9 @@
Library
-------
+- Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in
+ regular expressions.
+
- Issue #20912: Now directories added to ZIP file have correct Unix and MS-DOS
directory attributes.
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list