[Python-checkins] r69839 - in python/branches/io-c: Lib/io.py Lib/test/test_memoryio.py Modules/_stringio.c Modules/io.c

antoine.pitrou python-checkins at python.org
Sat Feb 21 19:54:01 CET 2009


Author: antoine.pitrou
Date: Sat Feb 21 19:54:01 2009
New Revision: 69839
Log:
StringIO is now written entirely in C (and blazingly fast)
Modified:
 python/branches/io-c/Lib/io.py
 python/branches/io-c/Lib/test/test_memoryio.py
 python/branches/io-c/Modules/_stringio.c
 python/branches/io-c/Modules/io.c
Modified: python/branches/io-c/Lib/io.py
==============================================================================
--- python/branches/io-c/Lib/io.py	(original)
+++ python/branches/io-c/Lib/io.py	Sat Feb 21 19:54:01 2009
@@ -1847,6 +1847,7 @@
 def newlines(self):
 return self._decoder.newlines if self._decoder else None
 
+StringIO = _io.StringIO
 class unused_StringIO(unused_TextIOWrapper):
 """Text I/O implementation using an in-memory buffer.
 
@@ -1871,101 +1872,6 @@
 self.flush()
 return self.buffer.getvalue().decode(self._encoding, self._errors)
 
-try:
- class StringIO(_io._StringIO, TextIOBase):
- """Text I/O implementation using an in-memory buffer.
-
- The initial_value argument sets the value of object. The newline
- argument is like the one of TextIOWrapper's constructor.
- """
-
- _read = _io._StringIO.read
- _readline = _io._StringIO.readline
- _write = _io._StringIO.write
- _tell = _io._StringIO.tell
- _seek = _io._StringIO.seek
- _truncate = _io._StringIO.truncate
- _getvalue = _io._StringIO.getvalue
-
- def getvalue(self) -> str:
- """Retrieve the entire contents of the object."""
- if self.closed:
- raise ValueError("read on closed file")
- return self._getvalue()
-
- def write(self, s: str) -> int:
- """Write string s to file.
-
- Returns the number of characters written.
- """
- if self.closed:
- raise ValueError("write to closed file")
- return self._write(s)
-
- def read(self, n: int = None) -> str:
- """Read at most n characters, returned as a string.
-
- If the argument is negative or omitted, read until EOF
- is reached. Return an empty string at EOF.
- """
- if self.closed:
- raise ValueError("read to closed file")
- return self._read(n)
-
- def tell(self) -> int:
- """Tell the current file position."""
- if self.closed:
- raise ValueError("tell from closed file")
- return self._tell()
-
- def seek(self, pos: int = None, whence: int = 0) -> int:
- """Change stream position.
-
- Seek to character offset pos relative to position indicated by whence:
- 0 Start of stream (the default). pos should be >= 0;
- 1 Current position - pos must be 0;
- 2 End of stream - pos must be 0.
- Returns the new absolute position.
- """
- if self.closed:
- raise ValueError("seek from closed file")
- return self._seek(pos, whence)
-
- def truncate(self, pos: int = None) -> int:
- """Truncate size to pos.
-
- The pos argument defaults to the current file position, as
- returned by tell(). Imply an absolute seek to pos.
- Returns the new absolute position.
- """
- if self.closed:
- raise ValueError("truncate from closed file")
- return self._truncate(pos)
-
- def readline(self, limit: int = None) -> str:
- if self.closed:
- raise ValueError("read from closed file")
- return self._readline(limit)
-
- _LF = 1
- _CR = 2
- _CRLF = 4
-
- @property
- def newlines(self):
- return (None,
- "\n",
- "\r",
- ("\r", "\n"),
- "\r\n",
- ("\n", "\r\n"),
- ("\r", "\r\n"),
- ("\r", "\n", "\r\n")
- )[self._seennl]
-
-
-except ImportError:
- StringIO = _StringIO
 
 # make test_memoryio happy!
 _BytesIO = BytesIO
Modified: python/branches/io-c/Lib/test/test_memoryio.py
==============================================================================
--- python/branches/io-c/Lib/test/test_memoryio.py	(original)
+++ python/branches/io-c/Lib/test/test_memoryio.py	Sat Feb 21 19:54:01 2009
@@ -391,7 +391,7 @@
 self.assertEqual(memio.errors, "strict")
 self.assertEqual(memio.line_buffering, False)
 
- def test_newlines_none(self):
+ def test_newline_none(self):
 # newline=None
 memio = self.ioclass("a\nb\r\nc\rd", newline=None)
 self.assertEqual(list(memio), ["a\n", "b\n", "c\n", "d"])
@@ -407,7 +407,7 @@
 memio.seek(0)
 self.assertEqual(memio.read(), "a\nb\nc\nd")
 
- def test_newlines_empty(self):
+ def test_newline_empty(self):
 # newline=""
 memio = self.ioclass("a\nb\r\nc\rd", newline="")
 self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"])
@@ -418,16 +418,17 @@
 memio = self.ioclass(newline="")
 self.assertEqual(2, memio.write("a\n"))
 self.assertEqual(2, memio.write("b\r"))
- self.assertEqual(4, memio.write("\nc\rd"))
+ self.assertEqual(2, memio.write("\nc"))
+ self.assertEqual(2, memio.write("\rd"))
 memio.seek(0)
 self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"])
 
- def test_newlines_lf(self):
+ def test_newline_lf(self):
 # newline="\n"
 memio = self.ioclass("a\nb\r\nc\rd")
 self.assertEqual(list(memio), ["a\n", "b\r\n", "c\rd"])
 
- def test_newlines_cr(self):
+ def test_newline_cr(self):
 # newline="\r"
 memio = self.ioclass("a\nb\r\nc\rd", newline="\r")
 memio.seek(0)
@@ -435,7 +436,7 @@
 memio.seek(0)
 self.assertEqual(list(memio), ["a\r", "b\r", "\r", "c\r", "d"])
 
- def test_newlines_crlf(self):
+ def test_newline_crlf(self):
 # newline="\r\n"
 memio = self.ioclass("a\nb\r\nc\rd", newline="\r\n")
 memio.seek(0)
@@ -469,6 +470,17 @@
 self.assertEqual(memio.tell(), len(buf) * 2)
 self.assertEqual(memio.getvalue(), buf + buf)
 
+ # XXX This test fails with the Python version of io.StringIO
+ def test_newlines_property(self):
+ memio = self.ioclass(newline=None)
+ self.assertEqual(memio.newlines, None)
+ memio.write("a\n")
+ self.assertEqual(memio.newlines, "\n")
+ memio.write("b\r\n")
+ self.assertEqual(memio.newlines, ("\n", "\r\n"))
+ memio.write("c\rd")
+ self.assertEqual(memio.newlines, ("\r", "\n", "\r\n"))
+
 
 def test_main():
 tests = [PyBytesIOTest, PyStringIOTest, CBytesIOTest, CStringIOTest]
Modified: python/branches/io-c/Modules/_stringio.c
==============================================================================
--- python/branches/io-c/Modules/_stringio.c	(original)
+++ python/branches/io-c/Modules/_stringio.c	Sat Feb 21 19:54:01 2009
@@ -13,12 +13,13 @@
 Py_ssize_t string_size;
 size_t buf_size;
 
- int ok; /* initialized? */
+ char ok; /* initialized? */
+ char closed;
+ char readuniversal;
+ char readtranslate;
 PyObject *decoder;
 PyObject *readnl;
 PyObject *writenl;
- char readuniversal;
- char readtranslate;
 } StringIOObject;
 
 #define CHECK_INITIALIZED(self) \
@@ -28,6 +29,20 @@
 return NULL; \
 }
 
+#define CHECK_CLOSED(self) \
+ if (self->closed) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on closed file"); \
+ return NULL; \
+ }
+
+PyDoc_STRVAR(stringio_doc,
+ "Text I/O implementation using an in-memory buffer.\n"
+ "\n"
+ "The initial_value argument sets the value of object. The newline\n"
+ "argument is like the one of TextIOWrapper's constructor.");
+
+
 /* Internal routine for changing the size, in terms of characters, of the
 buffer of StringIO objects. The caller should ensure that the 'size'
 argument is non-negative. Returns 0 on success, -1 otherwise. */
@@ -163,20 +178,34 @@
 return -1;
 }
 
+PyDoc_STRVAR(stringio_getvalue_doc,
+ "Retrieve the entire contents of the object.");
+
 static PyObject *
 stringio_getvalue(StringIOObject *self)
 {
 CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
 return PyUnicode_FromUnicode(self->buf, self->string_size);
 }
 
+PyDoc_STRVAR(stringio_tell_doc,
+ "Tell the current file position.");
+
 static PyObject *
 stringio_tell(StringIOObject *self)
 {
 CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
 return PyLong_FromSsize_t(self->pos);
 }
 
+PyDoc_STRVAR(stringio_read_doc,
+ "Read at most n characters, returned as a string.\n"
+ "\n"
+ "If the argument is negative or omitted, read until EOF\n"
+ "is reached. Return an empty string at EOF.\n");
+
 static PyObject *
 stringio_read(StringIOObject *self, PyObject *args)
 {
@@ -187,6 +216,7 @@
 CHECK_INITIALIZED(self);
 if (!PyArg_ParseTuple(args, "|O:read", &arg))
 return NULL;
+ CHECK_CLOSED(self);
 
 if (PyNumber_Check(arg)) {
 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
@@ -216,29 +246,13 @@
 return PyUnicode_FromUnicode(output, size);
 }
 
+/* Internal helper, used by stringio_readline and stringio_iternext */
 static PyObject *
-stringio_readline(StringIOObject *self, PyObject *args)
+_stringio_readline(StringIOObject *self, Py_ssize_t limit)
 {
- PyObject *arg = Py_None;
- Py_ssize_t limit = -1;
 Py_UNICODE *start, *end, old_char;
 Py_ssize_t len, consumed;
 
- CHECK_INITIALIZED(self);
- if (!PyArg_ParseTuple(args, "|O:readline", &arg))
- return NULL;
-
- if (PyNumber_Check(arg)) {
- limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
- if (limit == -1 && PyErr_Occurred())
- return NULL;
- }
- else if (arg != Py_None) {
- PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
- Py_TYPE(arg)->tp_name);
- return NULL;
- }
-
 /* In case of overseek, return the empty string */
 if (self->pos >= self->string_size)
 return PyUnicode_FromString("");
@@ -262,6 +276,79 @@
 return PyUnicode_FromUnicode(start, len);
 }
 
+PyDoc_STRVAR(stringio_readline_doc,
+ "Read until newline or EOF.\n"
+ "\n"
+ "Returns an empty string if EOF is hit immediately.\n");
+
+static PyObject *
+stringio_readline(StringIOObject *self, PyObject *args)
+{
+ PyObject *arg = Py_None;
+ Py_ssize_t limit = -1;
+
+ CHECK_INITIALIZED(self);
+ if (!PyArg_ParseTuple(args, "|O:readline", &arg))
+ return NULL;
+ CHECK_CLOSED(self);
+
+ if (PyNumber_Check(arg)) {
+ limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
+ if (limit == -1 && PyErr_Occurred())
+ return NULL;
+ }
+ else if (arg != Py_None) {
+ PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
+ Py_TYPE(arg)->tp_name);
+ return NULL;
+ }
+ return _stringio_readline(self, limit);
+}
+
+static PyObject *
+stringio_iternext(StringIOObject *self)
+{
+ PyObject *line;
+
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+
+ if (Py_TYPE(self) == &PyStringIO_Type) {
+ /* Skip method call overhead for speed */
+ line = _stringio_readline(self, -1);
+ }
+ else {
+ /* XXX is subclassing StringIO really supported? */
+ line = PyObject_CallMethodObjArgs((PyObject *)self,
+ _PyIO_str_readline, NULL);
+ if (line && !PyUnicode_Check(line)) {
+ PyErr_Format(PyExc_IOError,
+ "readline() should have returned an str object, "
+ "not '%.200s'", Py_TYPE(line)->tp_name);
+ Py_DECREF(line);
+ return NULL;
+ }
+ }
+
+ if (line == NULL)
+ return NULL;
+
+ if (PyUnicode_GET_SIZE(line) == 0) {
+ /* Reached EOF */
+ Py_DECREF(line);
+ return NULL;
+ }
+
+ return line;
+}
+
+PyDoc_STRVAR(stringio_truncate_doc,
+ "Truncate size to pos.\n"
+ "\n"
+ "The pos argument defaults to the current file position, as\n"
+ "returned by tell(). Imply an absolute seek to pos.\n"
+ "Returns the new absolute position.\n");
+
 static PyObject *
 stringio_truncate(StringIOObject *self, PyObject *args)
 {
@@ -271,6 +358,7 @@
 CHECK_INITIALIZED(self);
 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
 return NULL;
+ CHECK_CLOSED(self);
 
 if (PyNumber_Check(arg)) {
 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
@@ -303,6 +391,15 @@
 return PyLong_FromSsize_t(size);
 }
 
+PyDoc_STRVAR(stringio_seek_doc,
+ "Change stream position.\n"
+ "\n"
+ "Seek to character offset pos relative to position indicated by whence:\n"
+ " 0 Start of stream (the default). pos should be >= 0;\n"
+ " 1 Current position - pos must be 0;\n"
+ " 2 End of stream - pos must be 0.\n"
+ "Returns the new absolute position.\n");
+
 static PyObject *
 stringio_seek(StringIOObject *self, PyObject *args)
 {
@@ -312,6 +409,7 @@
 CHECK_INITIALIZED(self);
 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
 return NULL;
+ CHECK_CLOSED(self);
 
 if (mode != 0 && mode != 1 && mode != 2) {
 PyErr_Format(PyExc_ValueError,
@@ -344,6 +442,12 @@
 return PyLong_FromSsize_t(self->pos);
 }
 
+PyDoc_STRVAR(stringio_write_doc,
+ "Write string to file.\n"
+ "\n"
+ "Returns the number of characters written, which is always equal to\n"
+ "the length of the string.\n");
+
 static PyObject *
 stringio_write(StringIOObject *self, PyObject *obj)
 {
@@ -355,6 +459,7 @@
 Py_TYPE(obj)->tp_name);
 return NULL;
 }
+ CHECK_CLOSED(self);
 size = PyUnicode_GET_SIZE(obj);
 
 if (size > 0 && write_str(self, obj) < 0)
@@ -363,13 +468,33 @@
 return PyLong_FromSsize_t(size);
 }
 
+PyDoc_STRVAR(stringio_close_doc,
+ "Close the IO object. Attempting any further operation after the\n"
+ "object is closed will raise a ValueError.\n"
+ "\n"
+ "This method has no effect if the file is already closed.\n");
+
+static PyObject *
+stringio_close(StringIOObject *self)
+{
+ self->closed = 1;
+ /* Free up some memory */
+ if (resize_buffer(self, 0) < 0)
+ return NULL;
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+ Py_RETURN_NONE;
+}
+
 static void
 stringio_dealloc(StringIOObject *self)
 {
 Py_CLEAR(self->readnl);
 Py_CLEAR(self->writenl);
 Py_CLEAR(self->decoder);
- PyMem_Free(self->buf);
+ if (self->buf)
+ PyMem_Free(self->buf);
 Py_TYPE(self)->tp_free(self);
 }
 
@@ -472,11 +597,12 @@
 }
 self->pos = 0;
 
-
+ self->closed = 0;
 self->ok = 1;
 return 0;
 }
 
+/* Properties and pseudo-properties */
 static PyObject *
 stringio_seekable(StringIOObject *self, PyObject *args)
 {
@@ -507,9 +633,17 @@
 }
 
 static PyObject *
+stringio_closed(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ return PyBool_FromLong(self->closed);
+}
+
+static PyObject *
 stringio_encoding(StringIOObject *self, void *context)
 {
 CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
 return PyUnicode_FromString("utf-8");
 }
 
@@ -517,6 +651,7 @@
 stringio_errors(StringIOObject *self, void *context)
 {
 CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
 return PyUnicode_FromString("strict");
 }
 
@@ -524,25 +659,39 @@
 stringio_line_buffering(StringIOObject *self, void *context)
 {
 CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
 Py_RETURN_FALSE;
 }
 
+static PyObject *
+stringio_newlines(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ if (self->decoder == NULL)
+ Py_RETURN_NONE;
+ return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
+}
+
 static struct PyMethodDef stringio_methods[] = {
- {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL},
- {"read", (PyCFunction)stringio_read, METH_VARARGS, NULL},
- {"readline", (PyCFunction)stringio_readline, METH_VARARGS, NULL},
- {"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL},
- {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL},
- {"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL},
- {"write", (PyCFunction)stringio_write, METH_O, NULL},
+ {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
+ {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, stringio_getvalue_doc},
+ {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
+ {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
+ {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
+ {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
+ {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
+ {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
 
- {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
- {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
- {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
+ {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
+ {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
+ {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
 {NULL, NULL} /* sentinel */
 };
 
 static PyGetSetDef stringio_getset[] = {
+ {"closed", (getter)stringio_closed, NULL, NULL},
+ {"newlines", (getter)stringio_newlines, NULL, NULL},
 /* (following comments straight off of the original Python wrapper:)
 XXX Cruft to support the TextIOWrapper API. This would only
 be meaningful if StringIO supported the buffer attribute.
@@ -558,7 +707,7 @@
 
 PyTypeObject PyStringIO_Type = {
 PyVarObject_HEAD_INIT(NULL, 0)
- "_StringIO", /*tp_name*/
+ "StringIO", /*tp_name*/
 sizeof(StringIOObject), /*tp_basicsize*/
 0, /*tp_itemsize*/
 (destructor)stringio_dealloc, /*tp_dealloc*/
@@ -577,13 +726,13 @@
 0, /*tp_setattro*/
 0, /*tp_as_buffer*/
 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
- 0, /*tp_doc*/
+ stringio_doc, /*tp_doc*/
 0, /*tp_traverse*/
 0, /*tp_clear*/
 0, /*tp_richcompare*/
 0, /*tp_weaklistoffset*/
 0, /*tp_iter*/
- 0, /*tp_iternext*/
+ (iternextfunc)stringio_iternext, /*tp_iternext*/
 stringio_methods, /*tp_methods*/
 0, /*tp_members*/
 stringio_getset, /*tp_getset*/
Modified: python/branches/io-c/Modules/io.c
==============================================================================
--- python/branches/io-c/Modules/io.c	(original)
+++ python/branches/io-c/Modules/io.c	Sat Feb 21 19:54:01 2009
@@ -671,8 +671,8 @@
 ADD_TYPE(&PyBytesIO_Type, "BytesIO");
 
 /* StringIO */
- /* PyStringIO_Type.tp_base = &PyTextIOBase_Type; */
- ADD_TYPE(&PyStringIO_Type, "_StringIO");
+ PyStringIO_Type.tp_base = &PyTextIOBase_Type;
+ ADD_TYPE(&PyStringIO_Type, "StringIO");
 
 /* BufferedReader */
 PyBufferedReader_Type.tp_base = &PyBufferedIOBase_Type;


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /