[Python-checkins] r78392 - in python/trunk: Lib/test/test_unicode.py Misc/NEWS Objects/unicodeobject.c

victor.stinner python-checkins at python.org
Wed Feb 24 00:16:07 CET 2010


Author: victor.stinner
Date: Wed Feb 24 00:16:07 2010
New Revision: 78392
Log:
Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF
=> raise an UnicodeDecodeError. Patch written by Ezio Melotti.
Modified:
 python/trunk/Lib/test/test_unicode.py
 python/trunk/Misc/NEWS
 python/trunk/Objects/unicodeobject.c
Modified: python/trunk/Lib/test/test_unicode.py
==============================================================================
--- python/trunk/Lib/test/test_unicode.py	(original)
+++ python/trunk/Lib/test/test_unicode.py	Wed Feb 24 00:16:07 2010
@@ -395,6 +395,19 @@
 self.assertEqual(u'%c' % 0x1234, u'\u1234')
 self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
 
+ for num in range(0x00,0x80):
+ char = chr(num)
+ self.assertEqual(u"%c" % char, char)
+ self.assertEqual(u"%c" % num, char)
+ # Issue 7649
+ for num in range(0x80,0x100):
+ uchar = unichr(num)
+ self.assertEqual(uchar, u"%c" % num) # works only with ints
+ self.assertEqual(uchar, u"%c" % uchar) # and unicode chars
+ # the implicit decoding should fail for non-ascii chars
+ self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num))
+ self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num))
+
 # formatting jobs delegated from the string implementation:
 self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Wed Feb 24 00:16:07 2010
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an
+ UnicodeDecodeError
+
 - Issue #6902: Fix problem with built-in types format incorrectly with
 0 padding.
 
@@ -249,7 +252,7 @@
 
 Documentation
 -------------
- 
+
 - Updating `Using Python` documentation to include description of CPython's
 -J, -U and -X options.
 
Modified: python/trunk/Objects/unicodeobject.c
==============================================================================
--- python/trunk/Objects/unicodeobject.c	(original)
+++ python/trunk/Objects/unicodeobject.c	Wed Feb 24 00:16:07 2010
@@ -8170,6 +8170,7 @@
 size_t buflen,
 PyObject *v)
 {
+ PyObject *s;
 /* presume that the buffer is at least 2 characters long */
 if (PyUnicode_Check(v)) {
 if (PyUnicode_GET_SIZE(v) != 1)
@@ -8180,7 +8181,14 @@
 else if (PyString_Check(v)) {
 if (PyString_GET_SIZE(v) != 1)
 goto onError;
- buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
+ /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
+ string, "u'%c' % char" should fail with a UnicodeDecodeError */
+ s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
+ /* if the char is not decodable return -1 */
+ if (s == NULL)
+ return -1;
+ buf[0] = PyUnicode_AS_UNICODE(s)[0];
+ Py_DECREF(s);
 }
 
 else {


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /