[Python-checkins] r69817 - in python/branches/io-c: Lib/test/test_io.py Modules/_textio.c

antoine.pitrou python-checkins at python.org
Fri Feb 20 21:45:50 CET 2009


Author: antoine.pitrou
Date: Fri Feb 20 21:45:50 2009
New Revision: 69817
Log:
Allow IncrementalNewlineDecoder to take unicode objects as decoding input if the decoder parameter is None
This will help rewriting StringIO to C
Modified:
 python/branches/io-c/Lib/test/test_io.py
 python/branches/io-c/Modules/_textio.c
Modified: python/branches/io-c/Lib/test/test_io.py
==============================================================================
--- python/branches/io-c/Lib/test/test_io.py	(original)
+++ python/branches/io-c/Lib/test/test_io.py	Fri Feb 20 21:45:50 2009
@@ -1745,7 +1745,10 @@
 
 self.assertEqual(buffer.seekable(), txt.seekable())
 
- def check_newline_decoder_utf8(self, decoder):
+
+class IncrementalNewlineDecoderTest(unittest.TestCase):
+
+ def check_newline_decoding_utf8(self, decoder):
 # UTF-8 specific tests for a newline decoder
 def _check_decode(b, s, **kwargs):
 # We exercise getstate() / setstate() as well as decode()
@@ -1787,12 +1790,20 @@
 _check_decode(b'\xe8\xa2\x88\r', "\u8888")
 _check_decode(b'\n', "\n")
 
- def check_newline_decoder(self, decoder, encoding):
+ def check_newline_decoding(self, decoder, encoding):
 result = []
- encoder = codecs.getincrementalencoder(encoding)()
- def _decode_bytewise(s):
- for b in encoder.encode(s):
- result.append(decoder.decode(bytes([b])))
+ if encoding is not None:
+ encoder = codecs.getincrementalencoder(encoding)()
+ def _decode_bytewise(s):
+ # Decode one byte at a time
+ for b in encoder.encode(s):
+ result.append(decoder.decode(bytes([b])))
+ else:
+ encoder = None
+ def _decode_bytewise(s):
+ # Decode one char at a time
+ for c in s:
+ result.append(decoder.decode(c))
 self.assertEquals(decoder.newlines, None)
 _decode_bytewise("abc\n\r")
 self.assertEquals(decoder.newlines, '\n')
@@ -1805,22 +1816,28 @@
 _decode_bytewise("abc\r")
 self.assertEquals("".join(result), "abc\n\nabcabc\nabcabc")
 decoder.reset()
- self.assertEquals(decoder.decode("abc".encode(encoding)), "abc")
+ input = "abc"
+ if encoder is not None:
+ encoder.reset()
+ input = encoder.encode(input)
+ self.assertEquals(decoder.decode(input), "abc")
 self.assertEquals(decoder.newlines, None)
 
 def test_newline_decoder(self):
 encodings = (
- 'utf-8', 'latin-1',
+ # None meaning the IncrementalNewlineDecoder takes unicode input
+ # rather than bytes input
+ None, 'utf-8', 'latin-1',
 'utf-16', 'utf-16-le', 'utf-16-be',
 'utf-32', 'utf-32-le', 'utf-32-be',
 )
 for enc in encodings:
- decoder = codecs.getincrementaldecoder(enc)()
+ decoder = enc and codecs.getincrementaldecoder(enc)()
 decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
- self.check_newline_decoder(decoder, enc)
+ self.check_newline_decoding(decoder, enc)
 decoder = codecs.getincrementaldecoder("utf-8")()
 decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
- self.check_newline_decoder_utf8(decoder)
+ self.check_newline_decoding_utf8(decoder)
 
 
 # XXX Tests for open()
@@ -1933,7 +1950,8 @@
 BufferedReaderTest, BufferedWriterTest,
 BufferedRWPairTest, BufferedRandomTest,
 StatefulIncrementalDecoderTest,
- TextIOWrapperTest, MiscIOTest
+ IncrementalNewlineDecoderTest,
+ TextIOWrapperTest, MiscIOTest,
 )
 
 if __name__ == "__main__":
Modified: python/branches/io-c/Modules/_textio.c
==============================================================================
--- python/branches/io-c/Modules/_textio.c	(original)
+++ python/branches/io-c/Modules/_textio.c	Fri Feb 20 21:45:50 2009
@@ -154,7 +154,9 @@
 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
 "records the types of newlines encountered. When used with\n"
 "translate=False, it ensures that the newline sequence is returned in\n"
- "one piece.\n"
+ "one piece. When used with decoder=None, it expects unicode strings as\n"
+ "decode input and translates newlines without first invoking an external\n"
+ "decoder.\n"
 );
 
 typedef struct {
@@ -226,8 +228,15 @@
 }
 
 /* decode input (with the eventual \r from a previous pass) */
- output = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
- input, final ? Py_True : Py_False, NULL);
+ if (self->decoder != Py_None) {
+ output = PyObject_CallMethodObjArgs(self->decoder,
+ _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
+ }
+ else {
+ output = input;
+ Py_INCREF(output);
+ }
+
 if (output == NULL)
 return NULL;
 
@@ -421,20 +430,25 @@
 static PyObject *
 IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
 {
- PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
- _PyIO_str_getstate, NULL);
 PyObject *buffer;
 unsigned PY_LONG_LONG flag;
 
- if (state == NULL)
- return NULL;
-
- if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
+ if (self->decoder != Py_None) {
+ PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
+ _PyIO_str_getstate, NULL);
+ if (state == NULL)
+ return NULL;
+ if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
+ Py_DECREF(state);
+ return NULL;
+ }
+ Py_INCREF(buffer);
 Py_DECREF(state);
- return NULL;
 }
- Py_INCREF(buffer);
- Py_DECREF(state);
+ else {
+ buffer = PyBytes_FromString("");
+ flag = 0;
+ }
 flag <<= 1;
 if (self->pendingcr)
 flag |= 1;
@@ -453,7 +467,11 @@
 self->pendingcr = (int) flag & 1;
 flag >>= 1;
 
- return PyObject_CallMethod(self->decoder, "setstate", "((OK))", buffer, flag);
+ if (self->decoder != Py_None)
+ return PyObject_CallMethod(self->decoder,
+ "setstate", "((OK))", buffer, flag);
+ else
+ Py_RETURN_NONE;
 }
 
 static PyObject *
@@ -461,7 +479,10 @@
 {
 self->seennl = 0;
 self->pendingcr = 0;
- return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
+ if (self->decoder != Py_None)
+ return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
+ else
+ Py_RETURN_NONE;
 }
 
 static PyObject *


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /