[Python-checkins] cpython (merge 3.1 -> 3.2): (Merge 3.1) Issue #12100: Don't reset incremental encoders of CJK codecs at

victor.stinner python-checkins at python.org
Tue May 24 22:24:36 CEST 2011


http://hg.python.org/cpython/rev/7f2ab2f95a04
changeset: 70348:7f2ab2f95a04
branch: 3.2
parent: 70344:169b57028bac
parent: 70347:bd17396895fb
user: Victor Stinner <victor.stinner at haypocalc.com>
date: Tue May 24 22:22:17 2011 +0200
summary:
 (Merge 3.1) Issue #12100: Don't reset incremental encoders of CJK codecs at
each call to their encode() method anymore, but continue to call the reset()
method if the final argument is True.
files:
 Lib/test/test_multibytecodec.py | 30 ++++++++++++++++++
 Misc/NEWS | 4 ++
 Modules/cjkcodecs/multibytecodec.c | 8 ++--
 3 files changed, 38 insertions(+), 4 deletions(-)
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -256,6 +256,36 @@
 # Any ISO 2022 codec will cause the segfault
 myunichr(x).encode('iso_2022_jp', 'ignore')
 
+class TestStateful(unittest.TestCase):
+ text = '\u4E16\u4E16'
+ encoding = 'iso-2022-jp'
+ expected = b'\x1b$B@$@$'
+ expected_reset = b'\x1b$B@$@$\x1b(B'
+
+ def test_encode(self):
+ self.assertEqual(self.text.encode(self.encoding), self.expected_reset)
+
+ def test_incrementalencoder(self):
+ encoder = codecs.getincrementalencoder(self.encoding)()
+ output = b''.join(
+ encoder.encode(char)
+ for char in self.text)
+ self.assertEqual(output, self.expected)
+
+ def test_incrementalencoder_final(self):
+ encoder = codecs.getincrementalencoder(self.encoding)()
+ last_index = len(self.text) - 1
+ output = b''.join(
+ encoder.encode(char, index == last_index)
+ for index, char in enumerate(self.text))
+ self.assertEqual(output, self.expected_reset)
+
+class TestHZStateful(TestStateful):
+ text = '\u804a\u804a'
+ encoding = 'hz'
+ expected = b'~{ADAD'
+ expected_reset = b'~{ADAD~}'
+
 def test_main():
 support.run_unittest(__name__)
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,10 @@
 Library
 -------
 
+- Issue #12100: Don't reset incremental encoders of CJK codecs at each call to
+ their encode() method anymore, but continue to call the reset() method if the
+ final argument is True.
+
 - Issue #5715: In socketserver, close the server socket in the child process.
 
 - Correct lookup of __dir__ on objects. Among other things, this causes errors
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -479,7 +479,7 @@
 MultibyteEncodeBuffer buf;
 Py_ssize_t finalsize, r = 0;
 
- if (datalen == 0)
+ if (datalen == 0 && !(flags & MBENC_RESET))
 return PyBytes_FromStringAndSize(NULL, 0);
 
 buf.excobj = NULL;
@@ -514,7 +514,7 @@
 break;
 }
 
- if (codec->encreset != NULL)
+ if (codec->encreset != NULL && (flags & MBENC_RESET))
 for (;;) {
 Py_ssize_t outleft;
 
@@ -784,8 +784,8 @@
 inbuf_end = inbuf + datalen;
 
 r = multibytecodec_encode(ctx->codec, &ctx->state,
- (const Py_UNICODE **)&inbuf,
- datalen, ctx->errors, final ? MBENC_FLUSH : 0);
+ (const Py_UNICODE **)&inbuf, datalen,
+ ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
 if (r == NULL) {
 /* recover the original pending buffer */
 if (origpending > 0)
-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /