[Python-checkins] r78729 - in python/branches/py3k: Lib/sre_parse.py Lib/test/test_re.py Misc/NEWS

ezio.melotti python-checkins at python.org
Sat Mar 6 16:24:08 CET 2010


Author: ezio.melotti
Date: Sat Mar 6 16:24:08 2010
New Revision: 78729
Log:
#6509: fix re.sub to work properly when the pattern, the string, and the replacement were all bytes. Patch by Antoine Pitrou.
Modified:
 python/branches/py3k/Lib/sre_parse.py
 python/branches/py3k/Lib/test/test_re.py
 python/branches/py3k/Misc/NEWS
Modified: python/branches/py3k/Lib/sre_parse.py
==============================================================================
--- python/branches/py3k/Lib/sre_parse.py	(original)
+++ python/branches/py3k/Lib/sre_parse.py	Sat Mar 6 16:24:08 2010
@@ -786,12 +786,18 @@
 groups = []
 groupsappend = groups.append
 literals = [None] * len(p)
+ if isinstance(source, str):
+ encode = lambda x: x
+ else:
+ # The tokenizer implicitly decodes bytes objects as latin-1, we must
+ # therefore re-encode the final representation.
+ encode = lambda x: x.encode('latin1')
 for c, s in p:
 if c is MARK:
 groupsappend((i, s))
 # literal[i] is already None
 else:
- literals[i] = s
+ literals[i] = encode(s)
 i = i + 1
 return groups, literals
 
Modified: python/branches/py3k/Lib/test/test_re.py
==============================================================================
--- python/branches/py3k/Lib/test/test_re.py	(original)
+++ python/branches/py3k/Lib/test/test_re.py	Sat Mar 6 16:24:08 2010
@@ -717,6 +717,24 @@
 self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
 self.assertRaises(ValueError, re.compile, '(?au)\w')
 
+ def test_bug_6509(self):
+ # Replacement strings of both types must parse properly.
+ # all strings
+ pat = re.compile('a(\w)')
+ self.assertEqual(pat.sub('b\1円', 'ac'), 'bc')
+ pat = re.compile('a(.)')
+ self.assertEqual(pat.sub('b\1円', 'a\u1234'), 'b\u1234')
+ pat = re.compile('..')
+ self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
+
+ # all bytes
+ pat = re.compile(b'a(\w)')
+ self.assertEqual(pat.sub(b'b\1円', b'ac'), b'bc')
+ pat = re.compile(b'a(.)')
+ self.assertEqual(pat.sub(b'b\1円', b'a\xCD'), b'b\xCD')
+ pat = re.compile(b'..')
+ self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
+
 def test_dealloc(self):
 # issue 3299: check for segfault in debug build
 import _sre
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Sat Mar 6 16:24:08 2010
@@ -268,6 +268,9 @@
 Library
 -------
 
+- Issue #6509: fix re.sub to work properly when the pattern, the string, and
+ the replacement were all bytes. Patch by Antoine Pitrou.
+
 - The sqlite3 module was updated to pysqlite 2.6.0. This fixes several obscure
 bugs and allows loading SQLite extensions from shared libraries.
 


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /