[Python-checkins] python/dist/src/Lib/test test_codeccallbacks.py, 1.16.4.1, 1.16.4.2 test_codecs.py, 1.15.2.7, 1.15.2.8

Tue Aug 30 12:46:16 CEST 2005

Update of /cvsroot/python/python/dist/src/Lib/test
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5498/Lib/test
Modified Files:
 Tag: release24-maint
	test_codeccallbacks.py test_codecs.py 
Log Message:
Backport checkin:
SF bug #1251300: On UCS-4 builds the "unicode-internal" codec will now complain
about illegal code points. The codec now supports PEP 293 style error handlers.
(This is a variant of the patch by Nik Haldimann that detects truncated data)
Index: test_codeccallbacks.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codeccallbacks.py,v
retrieving revision 1.16.4.1
retrieving revision 1.16.4.2
diff -u -d -r1.16.4.1 -r1.16.4.2

--- test_codeccallbacks.py	14 Dec 2004 21:33:13 -0000	1.16.4.1
+++ test_codeccallbacks.py	30 Aug 2005 10:46:06 -0000	1.16.4.2
@@ -111,7 +111,7 @@
 sout += "\\U%08x" % sys.maxunicode
 self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
 
- def test_relaxedutf8(self):
+ def test_decoderelaxedutf8(self):
 # This is the test for a decoding callback handler,
 # that relaxes the UTF-8 minimal encoding restriction.
 # A null byte that is encoded as "\xc0\x80" will be
@@ -158,6 +158,35 @@
 charmap[ord("?")] = u"XYZ"
 self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
 
+ def test_decodeunicodeinternal(self):
+ self.assertRaises(
+ UnicodeDecodeError,
+ "\x00\x00\x00\x00\x00".decode,
+ "unicode-internal",
+ )
+ if sys.maxunicode > 0xffff:
+ def handler_unicodeinternal(exc):
+ if not isinstance(exc, UnicodeDecodeError):
+ raise TypeError("don't know how to handle %r" % exc)
+ return (u"\x01", 1)
+
+ self.assertEqual(
+ "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
+ u"\u0000"
+ )
+
+ self.assertEqual(
+ "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
+ u"\u0000\ufffd"
+ )
+
+ codecs.register_error("test.hui", handler_unicodeinternal)
+
+ self.assertEqual(
+ "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
+ u"\u0000\u0001\u0000"
+ )
+
 def test_callbacks(self):
 def handler1(exc):
 if not isinstance(exc, UnicodeEncodeError) \
@@ -503,7 +532,8 @@
 for (enc, bytes) in (
 ("ascii", "\xff"),
 ("utf-8", "\xff"),
- ("utf-7", "+x-")
+ ("utf-7", "+x-"),
+ ("unicode-internal", "\x00"),
 ):
 self.assertRaises(
 TypeError,
Index: test_codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codecs.py,v
retrieving revision 1.15.2.7
retrieving revision 1.15.2.8
diff -u -d -r1.15.2.7 -r1.15.2.8
--- test_codecs.py	25 Aug 2005 11:04:04 -0000	1.15.2.7
+++ test_codecs.py	30 Aug 2005 10:46:06 -0000	1.15.2.8
@@ -1,7 +1,7 @@
 from test import test_support
 import unittest
 import codecs
-import StringIO
+import sys, StringIO
 
 class Queue(object):
 """
@@ -455,6 +455,54 @@
 for uni, puny in punycode_testcases:
 self.assertEquals(uni, puny.decode("punycode"))
 
+class UnicodeInternalTest(unittest.TestCase):
+ def test_bug1251300(self):
+ # Decoding with unicode_internal used to not correctly handle "code
+ # points" above 0x10ffff on UCS-4 builds.
+ if sys.maxunicode > 0xffff:
+ ok = [
+ ("\x00\x10\xff\xff", u"\U0010ffff"),
+ ("\x00\x00\x01\x01", u"\U00000101"),
+ ("", u""),
+ ]
+ not_ok = [
+ "\x7f\xff\xff\xff",
+ "\x80\x00\x00\x00",
+ "\x81\x00\x00\x00",
+ "\x00",
+ "\x00\x00\x00\x00\x00",
+ ]
+ for internal, uni in ok:
+ if sys.byteorder == "little":
+ internal = "".join(reversed(internal))
+ self.assertEquals(uni, internal.decode("unicode_internal"))
+ for internal in not_ok:
+ if sys.byteorder == "little":
+ internal = "".join(reversed(internal))
+ self.assertRaises(UnicodeDecodeError, internal.decode,
+ "unicode_internal")
+
+ def test_decode_error_attributes(self):
+ if sys.maxunicode > 0xffff:
+ try:
+ "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
+ except UnicodeDecodeError, ex:
+ self.assertEquals("unicode_internal", ex.encoding)
+ self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
+ self.assertEquals(4, ex.start)
+ self.assertEquals(8, ex.end)
+ else:
+ self.fail()
+
+ def test_decode_callback(self):
+ if sys.maxunicode > 0xffff:
+ codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
+ decoder = codecs.getdecoder("unicode_internal")
+ ab = u"ab".encode("unicode_internal")
+ ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
+ "UnicodeInternalTest")
+ self.assertEquals((u"ab", 12), ignored)
+
 # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
 nameprep_tests = [
 # 3.1 Map to nothing.
@@ -696,6 +744,7 @@
 EscapeDecodeTest,
 RecodingTest,
 PunycodeTest,
+ UnicodeInternalTest,
 NameprepTest,
 CodecTest,
 CodecsModuleTest,