diff -r 60c831305e73 Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Tue Oct 02 05:34:38 2012 +0300 +++ b/Lib/test/test_codecs.py Tue Oct 02 19:10:22 2012 +0300 @@ -1519,6 +1519,14 @@ (u"abc", 3) ) + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, "\x00\x01\x02", "strict", u"ab" + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, "\x00\x01\x02", "strict", u"ab\ufffe" + ) + self.assertEqual( codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"), (u"ab\ufffd", 3) @@ -1545,6 +1553,139 @@ (u"", len(allbytes)) ) + def test_decode_with_int2str_map(self): + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "strict", + {0: u'a', 1: u'b', 2: u'c'}), + (u"abc", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "strict", + {0: u'Aa', 1: u'Bb', 2: u'Cc'}), + (u"AaBbCc", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "strict", + {0: u'\U0010FFFF', 1: u'b', 2: u'c'}), + (u"\U0010FFFFbc", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "strict", + {0: u'a', 1: u'b', 2: u''}), + (u"ab", 3) + ) + + # Issue #14850 + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, "\x00\x01\x02", "strict", + {0: u'a', 1: u'b', 3: u'\ufffe'} + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "replace", + {0: u'a', 1: u'b'}), + (u"ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "replace", + {0: u'a', 1: u'b', 2: None}), + (u"ab\ufffd", 3) + ) + + # Issue #14850 + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "replace", + {0: u'a', 1: u'b', 2: u'\ufffe'}), + (u"ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "ignore", + {0: u'a', 1: u'b'}), + (u"ab", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "ignore", + {0: u'a', 1: u'b', 2: None}), + (u"ab", 3) + ) + + # Issue #14850 + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "ignore", + {0: u'a', 1: u'b', 2: u'\ufffe'}), + (u"ab", 3) + ) + + allbytes = "".join(chr(i) for i in xrange(256)) + self.assertEqual( + codecs.charmap_decode(allbytes, "ignore", {}), + (u"", len(allbytes)) + ) + + def test_decode_with_int2int_map(self): + a = ord(u'a') + b = ord(u'b') + c = ord(u'c') + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "strict", + {0: a, 1: b, 2: c}), + (u"abc", 3) + ) + + # Issue #15379 + #self.assertEqual( + # codecs.charmap_decode("\x00\x01\x02", "strict", + # {0: 0x10FFFF, 1: b, 2: c}), + # (u"\U0010FFFFbc", 3) + #) + + self.assertRaises(TypeError, + codecs.charmap_decode, "\x00\x01\x02", "strict", + {0: 0x110000, 1: b, 2: c} + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, "\x00\x01\x02", "strict", + {0: a, 1: b}, + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, "\x00\x01\x02", "strict", + {0: a, 1: b, 2: 0xFFFE}, + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "replace", + {0: a, 1: b}), + (u"ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "replace", + {0: a, 1: b, 2: 0xFFFE}), + (u"ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "ignore", + {0: a, 1: b}), + (u"ab", 3) + ) + + self.assertEqual( + codecs.charmap_decode("\x00\x01\x02", "ignore", + {0: a, 1: b, 2: 0xFFFE}), + (u"ab", 3) + ) + + class WithStmtTest(unittest.TestCase): def test_encodedfile(self): f = StringIO.StringIO("\xc3\xbc") diff -r 60c831305e73 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Tue Oct 02 05:34:38 2012 +0300 +++ b/Objects/unicodeobject.c Tue Oct 02 19:10:22 2012 +0300 @@ -4118,15 +4118,18 @@ if (PyErr_ExceptionMatches(PyExc_LookupError)) { /* No mapping found means: mapping is undefined. */ PyErr_Clear(); - x = Py_None; - Py_INCREF(x); + goto Undefined; } else goto onError; } /* Apply mapping */ + if (x == Py_None) + goto Undefined; if (PyInt_Check(x)) { long value = PyInt_AS_LONG(x); + if (value == 0xFFFE) + goto Undefined; if (value < 0 || value> 65535) { PyErr_SetString(PyExc_TypeError, "character mapping must be in range(65536)"); @@ -4135,29 +4138,16 @@ } *p++ = (Py_UNICODE)value; } - else if (x == Py_None) { - /* undefined mapping */ - outpos = p-PyUnicode_AS_UNICODE(v); - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "charmap", "character maps to ", - starts, size, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) { - Py_DECREF(x); - goto onError; - } - Py_DECREF(x); - continue; - } else if (PyUnicode_Check(x)) { Py_ssize_t targetsize = PyUnicode_GET_SIZE(x); - if (targetsize == 1) + if (targetsize == 1) { /* 1-1 mapping */ - *p++ = *PyUnicode_AS_UNICODE(x); - + Py_UNICODE value = *PyUnicode_AS_UNICODE(x); + if (value == 0xFFFE) + goto Undefined; + *p++ = value; + } else if (targetsize> 1) { /* 1-n mapping */ if (targetsize> extrachars) { @@ -4191,6 +4181,20 @@ } Py_DECREF(x); ++s; + continue; +Undefined: + /* undefined mapping */ + Py_XDECREF(x); + outpos = p-PyUnicode_AS_UNICODE(v); + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to ", + starts, size, &startinpos, &endinpos, &exc, &s, + &v, &outpos, &p)) { + goto onError; + } } } if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))

AltStyle によって変換されたページ (->オリジナル) /