[Python-checkins] r67816 - in sandbox/trunk/io-c: _bufferedio.c _fileio.c _iobase.c _textio.c build.py io.c io.py

amaury.forgeotdarc python-checkins at python.org
Tue Dec 16 23:26:10 CET 2008


Author: amaury.forgeotdarc
Date: Tue Dec 16 23:26:10 2008
New Revision: 67816
Log:
General progress in io-c:
- merge changes from py3k branch
- rewrite parameter handling in fileio_init
- start to write TextIOWrapper (not used; just compiles)
- more cosmetic changes
Added:
 sandbox/trunk/io-c/_textio.c (contents, props changed)
Modified:
 sandbox/trunk/io-c/_bufferedio.c
 sandbox/trunk/io-c/_fileio.c
 sandbox/trunk/io-c/_iobase.c
 sandbox/trunk/io-c/build.py
 sandbox/trunk/io-c/io.c
 sandbox/trunk/io-c/io.py
Modified: sandbox/trunk/io-c/_bufferedio.c
==============================================================================
--- sandbox/trunk/io-c/_bufferedio.c	(original)
+++ sandbox/trunk/io-c/_bufferedio.c	Tue Dec 16 23:26:10 2008
@@ -3,6 +3,9 @@
 #include "pythread.h"
 #include "_iomodule.h"
 
+extern PyObject *
+_PyIOBase_checkClosed(PyObject *self, PyObject *unused);
+
 /*
 * BufferedIOBase class, inherits from IOBase.
 */
@@ -108,9 +111,6 @@
 
 PyObject *raw;
 
- PyObject *name;
- PyObject *mode;
-
 PyObject *read_buf;
 Py_ssize_t read_pos;
 PyThread_type_lock read_lock;
@@ -131,8 +131,6 @@
 if (self->weakreflist != NULL)
 PyObject_ClearWeakRefs((PyObject *)self);
 Py_CLEAR(self->raw);
- Py_CLEAR(self->name);
- Py_CLEAR(self->mode);
 Py_CLEAR(self->read_buf);
 Py_CLEAR(self->write_buf);
 Py_CLEAR(self->dict);
@@ -147,7 +145,7 @@
 
 /* Positioning */
 
-static PyObject*
+static PyObject *
 BufferedIOMixin_truncate(BufferedObject *self, PyObject *args)
 {
 PyObject *pos = Py_None;
@@ -182,7 +180,7 @@
 
 /* Flush and close */
 
-static PyObject*
+static PyObject *
 BufferedIOMixin_flush(BufferedObject *self, PyObject *args)
 {
 return PyObject_CallMethod(self->raw, "flush", NULL);
@@ -206,8 +204,7 @@
 return PyObject_GetAttrString(self->raw, "closed");
 }
 
-
-static PyObject*
+static PyObject *
 BufferedIOMixin_close(BufferedObject *self, PyObject *args)
 {
 PyObject *res;
@@ -229,33 +226,45 @@
 
 /* Inquiries */
 
-static PyObject*
+static PyObject *
 BufferedIOMixin_seekable(BufferedObject *self, PyObject *args)
 {
 return PyObject_CallMethod(self->raw, "seekable", NULL);
 }
 
-static PyObject*
+static PyObject *
 BufferedIOMixin_readable(BufferedObject *self, PyObject *args)
 {
 return PyObject_CallMethod(self->raw, "readable", NULL);
 }
 
-static PyObject*
+static PyObject *
 BufferedIOMixin_writable(BufferedObject *self, PyObject *args)
 {
 return PyObject_CallMethod(self->raw, "writable", NULL);
 }
 
+static PyObject *
+BufferedIOMixin_name_get(BufferedObject *self, void *context)
+{
+ return PyObject_GetAttrString(self->raw, "name");
+}
+
+static PyObject *
+BufferedIOMixin_mode_get(BufferedObject *self, void *context)
+{
+ return PyObject_GetAttrString(self->raw, "mode");
+}
+
 /* Lower-level APIs */
 
-static PyObject*
+static PyObject *
 BufferedIOMixin_fileno(BufferedObject *self, PyObject *args)
 {
 return PyObject_CallMethod(self->raw, "fileno", NULL);
 }
 
-static PyObject*
+static PyObject *
 BufferedIOMixin_isatty(BufferedObject *self, PyObject *args)
 {
 return PyObject_CallMethod(self->raw, "isatty", NULL);
@@ -518,6 +527,11 @@
 return NULL;
 }
 
+ if (BufferedIOMixin_closed(self)) {
+ PyErr_SetString(PyExc_ValueError, "read of closed file");
+ return NULL;
+ }
+
 Py_BEGIN_ALLOW_THREADS
 PyThread_acquire_lock(self->read_lock, 1);
 Py_END_ALLOW_THREADS
@@ -628,7 +642,7 @@
 return res;
 }
 
-static PyObject*
+static PyObject *
 BufferedReader_seek(BufferedObject *self, PyObject *args)
 {
 Py_ssize_t pos;
@@ -659,7 +673,7 @@
 return res;
 }
 
-static PyObject*
+static PyObject *
 BufferedReader_tell(BufferedObject *self, PyObject *args)
 {
 PyObject *op1, *op2, *res;
@@ -701,13 +715,14 @@
 };
 
 static PyMemberDef BufferedReader_members[] = {
- {"_name", T_OBJECT, offsetof(BufferedObject, name), 0},
- {"_mode", T_OBJECT, offsetof(BufferedObject, mode), 0},
+ {"raw", T_OBJECT, offsetof(BufferedObject, raw), 0},
 {NULL}
 };
 
 static PyGetSetDef BufferedReader_getset[] = {
 {"closed", (getter)BufferedIOMixin_closed_get, NULL, NULL},
+ {"name", (getter)BufferedIOMixin_name_get, NULL, NULL},
+ {"mode", (getter)BufferedIOMixin_mode_get, NULL, NULL},
 {0}
 };
 
@@ -1068,13 +1083,14 @@
 };
 
 static PyMemberDef BufferedWriter_members[] = {
- {"_name", T_OBJECT, offsetof(BufferedObject, name), 0},
- {"_mode", T_OBJECT, offsetof(BufferedObject, mode), 0},
+ {"raw", T_OBJECT, offsetof(BufferedObject, raw), 0},
 {NULL}
 };
 
 static PyGetSetDef BufferedWriter_getset[] = {
 {"closed", (getter)BufferedIOMixin_closed_get, NULL, NULL},
+ {"name", (getter)BufferedIOMixin_name_get, NULL, NULL},
+ {"mode", (getter)BufferedIOMixin_mode_get, NULL, NULL},
 {0}
 };
 
@@ -1172,7 +1188,8 @@
 Py_CLEAR(self->reader);
 return -1;
 }
- self->reader = (BufferedObject*)PyType_GenericNew(&PyBufferedReader_Type, args, NULL);
+ self->reader = (BufferedObject *)PyType_GenericNew(
+	 &PyBufferedReader_Type, args, NULL);
 Py_DECREF(args);
 if (self->reader == NULL)
 return -1;
@@ -1182,7 +1199,8 @@
 Py_CLEAR(self->reader);
 return -1;
 }
- self->writer = (BufferedObject*)PyType_GenericNew(&PyBufferedWriter_Type, args, NULL);
+ self->writer = (BufferedObject *)PyType_GenericNew(
+	 &PyBufferedWriter_Type, args, NULL);
 Py_DECREF(args);
 if (self->writer == NULL) {
 Py_CLEAR(self->reader);
@@ -1199,9 +1217,9 @@
 }
 
 static PyObject *
-_forward_call(BufferedObject *self, const char* name, PyObject *args)
+_forward_call(BufferedObject *self, const char *name, PyObject *args)
 {
- PyObject *func = PyObject_GetAttrString((PyObject*)self, name);
+ PyObject *func = PyObject_GetAttrString((PyObject *)self, name);
 PyObject *ret;
 
 if (func == NULL) {
@@ -1425,7 +1443,7 @@
 return NULL;
 }
 
- res = PyObject_CallMethod((PyObject*)self, "flush", NULL);
+ res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
 if (res == NULL)
 return NULL;
 
@@ -1583,13 +1601,14 @@
 };
 
 static PyMemberDef BufferedRandom_members[] = {
- {"_name", T_OBJECT, offsetof(BufferedObject, name), 0},
- {"_mode", T_OBJECT, offsetof(BufferedObject, mode), 0},
+ {"raw", T_OBJECT, offsetof(BufferedObject, raw), 0},
 {NULL}
 };
 
 static PyGetSetDef BufferedRandom_getset[] = {
 {"closed", (getter)BufferedIOMixin_closed_get, NULL, NULL},
+ {"name", (getter)BufferedIOMixin_name_get, NULL, NULL},
+ {"mode", (getter)BufferedIOMixin_mode_get, NULL, NULL},
 {0}
 };
 
Modified: sandbox/trunk/io-c/_fileio.c
==============================================================================
--- sandbox/trunk/io-c/_fileio.c	(original)
+++ sandbox/trunk/io-c/_fileio.c	Tue Dec 16 23:26:10 2008
@@ -28,6 +28,20 @@
 #include <windows.h>
 #endif
 
+#if BUFSIZ < (8*1024)
+#define SMALLCHUNK (8*1024)
+#elif (BUFSIZ >= (2 << 25))
+#error "unreasonable BUFSIZ > 64MB defined"
+#else
+#define SMALLCHUNK BUFSIZ
+#endif
+
+#if SIZEOF_INT < 4
+#define BIGCHUNK (512 * 32)
+#else
+#define BIGCHUNK (512 * 1024)
+#endif
+
 typedef struct {
 	PyObject_HEAD
 	int fd;
@@ -63,10 +77,7 @@
 fileio_close(PyFileIOObject *self)
 {
 	if (!self->closefd) {
-		if (PyErr_WarnEx(PyExc_RuntimeWarning,
-				 "Trying to close unclosable fd!", 3) < 0) {
-			return NULL;
-		}
+		self->fd = -1;
 		Py_RETURN_NONE;
 	}
 	errno = internal_close(self);
@@ -89,6 +100,10 @@
 	self = (PyFileIOObject *) type->tp_alloc(type, 0);
 	if (self != NULL) {
 		self->fd = -1;
+		self->readable = 0;
+		self->writable = 0;
+		self->seekable = -1;
+		self->closefd = 1;
 		self->weakreflist = NULL;
 	}
 
@@ -128,6 +143,7 @@
 	PyFileIOObject *self = (PyFileIOObject *) oself;
 	static char *kwlist[] = {"file", "mode", "closefd", NULL};
 	char *name = NULL;
+	PyObject *nameobj;
 	char *mode = "r";
 	char *s;
 #ifdef MS_WINDOWS
@@ -146,44 +162,60 @@
 			return -1;
 	}
 
-	if (PyArg_ParseTupleAndKeywords(args, kwds, "i|si:fileio",
-					kwlist, &fd, &mode, &closefd)) {
-		if (fd < 0) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:fileio",
+					 kwlist, &nameobj, &mode, &closefd))
+		return -1;
+
+	if (PyFloat_Check(nameobj)) {
+		PyErr_SetString(PyExc_TypeError,
+				"integer argument expected, got float");
+		return -1;
+	}
+
+	fd = PyLong_AsLong(nameobj);
+	if (fd < 0) {
+		if (!PyErr_Occurred()) {
 			PyErr_SetString(PyExc_ValueError,
 					"Negative filedescriptor");
 			return -1;
 		}
-	}
-	else {
 		PyErr_Clear();
+	}
 
 #ifdef Py_WIN_WIDE_FILENAMES
-	 if (GetVersion() < 0x80000000) {
+	if (GetVersion() < 0x80000000) {
 		/* On NT, so wide API available */
-		PyObject *po;
-		if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:fileio",
-						kwlist, &po, &mode, &closefd)
-						) {
-			widename = PyUnicode_AS_UNICODE(po);
-		} else {
-			/* Drop the argument parsing error as narrow
-			 strings are also valid. */
-			PyErr_Clear();
-		}
-	 }
-	 if (widename == NULL)
-#endif
-	 {
-		if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:fileio",
-						 kwlist,
-						 Py_FileSystemDefaultEncoding,
-						 &name, &mode, &closefd))
-			return -1;
-	 }
+		if (PyUnicode_Check(nameobj))
+			widename = PyUnicode_AS_UNICODE(nameobj);
+	}
+	if (widename == NULL)
+#endif
+	if (fd < 0)
+	{
+		if (PyBytes_Check(nameobj) || PyByteArray_Check(nameobj)) {
+			if (PyObject_AsCharBuffer(nameobj, &name, NULL) < 0)
+				return -1;
+		}
+		else {
+			PyObject *s;
+			PyObject *u = PyUnicode_FromObject(nameobj);
+
+			if (u == NULL)
+				return -1;
+
+			s = PyUnicode_AsEncodedString(
+				u, Py_FileSystemDefaultEncoding, NULL);
+			Py_DECREF(u);
+			if (s == NULL)
+				return -1;
+			if (!PyBytes_Check(s)) {
+				PyErr_SetString(PyExc_TypeError,
+						"encoder failed to return bytes");
+			}
+			name = PyBytes_AS_STRING(s);
+		}
 	}
 
-	self->readable = self->writable = 0;
-	self->seekable = -1;
 	s = mode;
 	while (*s) {
 		switch (*s++) {
@@ -212,6 +244,8 @@
 			flags |= O_CREAT;
 			append = 1;
 			break;
+		case 'b':
+			break;
 		case '+':
 			if (plus)
 				goto bad_mode;
@@ -252,7 +286,7 @@
 		self->closefd = 1;
 		if (!closefd) {
 			PyErr_SetString(PyExc_ValueError,
- "Cannot use closefd=True with file name");
+ "Cannot use closefd=False with file name");
 			goto error;
 		}
 
@@ -277,6 +311,9 @@
 			goto error;
 	}
 
+ if (PyObject_SetAttrString(self, "name", nameobj) < 0)
+		goto error;
+
 	goto done;
 
 error:
@@ -396,12 +433,23 @@
 	Py_ssize_t total = 0;
 	int n;
 
-	result = PyBytes_FromStringAndSize(NULL, DEFAULT_BUFFER_SIZE);
+	result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
 	if (result == NULL)
 		return NULL;
 
 	while (1) {
-		Py_ssize_t newsize = total + DEFAULT_BUFFER_SIZE;
+		Py_ssize_t newsize = (total < SMALLCHUNK) ? SMALLCHUNK : total;
+
+		/* Keep doubling until we reach BIGCHUNK;
+		 then keep adding BIGCHUNK. */
+		if (newsize <= BIGCHUNK) {
+			newsize += newsize;
+		}
+		else {
+			/* NOTE: overflow impossible due to limits on BUFSIZ */
+			newsize += BIGCHUNK;
+		}
+
 		if (PyBytes_GET_SIZE(result) < newsize) {
 			if (_PyBytes_Resize(&result, newsize) < 0) {
 				if (total == 0) {
@@ -684,12 +732,12 @@
 {
 	if (self->readable) {
 		if (self->writable)
-			return "r+";
+			return "rb+";
 		else
-			return "r";
+			return "rb";
 	}
 	else
-		return "w";
+		return "wb";
 }
 
 static PyObject *
@@ -820,6 +868,12 @@
 }
 
 static PyObject *
+get_closefd(PyFileIOObject *self, void *closure)
+{
+	return PyBool_FromLong((long)(self->closefd));
+}
+
+static PyObject *
 get_mode(PyFileIOObject *self, void *closure)
 {
 	return PyUnicode_FromString(mode_string(self));
@@ -827,6 +881,8 @@
 
 static PyGetSetDef fileio_getsetlist[] = {
 	{"closed", (getter)get_closed, NULL, "True if the file is closed"},
+	{"closefd", (getter)get_closefd, NULL, 
+		"True if the file descriptor will be closed"},
 	{"mode", (getter)get_mode, NULL, "String giving the file mode"},
 	{0},
 };
@@ -870,5 +926,5 @@
 	fileio_init,				/* tp_init */
 	PyType_GenericAlloc,			/* tp_alloc */
 	fileio_new,				/* tp_new */
-	0,				/* tp_free */
+	PyObject_Del,				/* tp_free */
 };
Modified: sandbox/trunk/io-c/_iobase.c
==============================================================================
--- sandbox/trunk/io-c/_iobase.c	(original)
+++ sandbox/trunk/io-c/_iobase.c	Tue Dec 16 23:26:10 2008
@@ -38,7 +38,7 @@
 
 /* Internal methods */
 static PyObject *
-IOBase_unsupported(const char* message)
+IOBase_unsupported(const char *message)
 {
 PyErr_SetString(PyIOExc_UnsupportedOperation, message);
 return NULL;
@@ -59,7 +59,7 @@
 "\n"
 "Return the new absolute position.");
 
-static PyObject*
+static PyObject *
 IOBase_seek(PyObject *self, PyObject *args)
 {
 return IOBase_unsupported("seek");
@@ -80,7 +80,7 @@
 "Size defaults to the current IO position as reported by tell(). Return\n"
 "the new size.");
 
-static PyObject*
+static PyObject *
 IOBase_truncate(PyObject *self, PyObject *args)
 {
 return IOBase_unsupported("seek");
@@ -93,7 +93,7 @@
 "\n"
 "This is not implemented for read-only and non-blocking streams.\n");
 
-static PyObject*
+static PyObject *
 IOBase_flush(PyObject *self, PyObject *args)
 {
 /* XXX Should this return the number of bytes written??? */
@@ -128,7 +128,7 @@
 Py_RETURN_NONE;
 }
 
-static PyObject*
+static PyObject *
 IOBase_close(PyObject *self, PyObject *args)
 {
 PyObject *res;
@@ -171,13 +171,13 @@
 "If False, seek(), tell() and truncate() will raise IOError.\n"
 "This method may need to do a test seek().");
 
-static PyObject*
+static PyObject *
 IOBase_seekable(PyObject *self, PyObject *args)
 {
 Py_RETURN_FALSE;
 }
 
-PyObject*
+PyObject *
 _PyIOBase_checkSeekable(PyObject *self, PyObject *unused)
 {
 PyObject *res = PyObject_CallMethod(self, "seekable", NULL);
@@ -195,14 +195,14 @@
 "\n"
 "If False, read() will raise IOError.");
 
-static PyObject*
+static PyObject *
 IOBase_readable(PyObject *self, PyObject *args)
 {
 Py_RETURN_FALSE;
 }
 
 /* May be called with any object */
-PyObject*
+PyObject *
 _PyIOBase_checkReadable(PyObject *self, PyObject *unused)
 {
 PyObject *res = PyObject_CallMethod(self, "readable", NULL);
@@ -220,14 +220,14 @@
 "\n"
 "If False, read() will raise IOError.");
 
-static PyObject*
+static PyObject *
 IOBase_writable(PyObject *self, PyObject *args)
 {
 Py_RETURN_FALSE;
 }
 
 /* May be called with any object */
-PyObject*
+PyObject *
 _PyIOBase_checkWritable(PyObject *self, PyObject *unused)
 {
 PyObject *res = PyObject_CallMethod(self, "writable", NULL);
Added: sandbox/trunk/io-c/_textio.c
==============================================================================
--- (empty file)
+++ sandbox/trunk/io-c/_textio.c	Tue Dec 16 23:26:10 2008
@@ -0,0 +1,578 @@
+#include "Python.h"
+#include "_iomodule.h"
+
+extern PyObject *
+_PyIOBase_checkClosed(PyObject *self, PyObject *unused);
+
+/* TextIOBase */
+
+PyDoc_STRVAR(TextIOBase_doc,
+ "Base class for text I/O.\n"
+ "\n"
+ "This class provides a character and line based interface to stream\n"
+ "I/O. There is no readinto method because Python's character strings\n"
+ "are immutable. There is no public constructor.\n"
+ );
+
+static PyObject *
+_unsupported(const char *message)
+{
+ PyErr_SetString(PyIOExc_UnsupportedOperation, message);
+ return NULL;
+}
+
+static PyObject *
+TextIOBase_read(PyObject *self, PyObject *args)
+{
+ return _unsupported("read");
+}
+
+static PyObject *
+TextIOBase_write(PyObject *self, PyObject *args)
+{
+ return _unsupported("read");
+}
+
+static PyObject *
+TextIOBase_readline(PyObject *self, PyObject *args)
+{
+ return _unsupported("read");
+}
+
+/* XXX properties: encoding, newlines */
+
+
+/* TextIOWrapper */
+
+PyDoc_STRVAR(TextIOWrapper_doc,
+ "Character and line based layer over a BufferedIOBase object, buffer.\n"
+ "\n"
+ "encoding gives the name of the encoding that the stream will be\n"
+ "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
+ "\n"
+ "errors determines the strictness of encoding and decoding (see the\n"
+ "codecs.register) and defaults to \"strict\".\n"
+ "\n"
+ "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
+ "handling of line endings. If it is None, universal newlines is\n"
+ "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
+ "or '\\r\\n' are translated to '\\n' before being returned to the\n"
+ "caller. Conversely, on output, '\\n' is translated to the system\n"
+ "default line seperator, os.linesep. If newline is any other of its\n"
+ "legal values, that newline becomes the newline when the file is read\n"
+ "and it is returned untranslated. On output, '\\n' is converted to the\n"
+ "newline.\n"
+ "\n"
+ "If line_buffering is True, a call to flush is implied when a call to\n"
+ "write contains a newline character."
+ );
+
+typedef struct
+{
+ PyObject_HEAD
+ Py_ssize_t chunk_size;
+ PyObject *buffer;
+ const char *encoding;
+ const char *errors;
+ PyObject *encoder;
+ PyObject *decoder;
+ PyObject *readnl;
+ const char *writenl; /* utf-8 encoded, NULL stands for \n */
+ int line_buffering:1;
+ int readuniversal:1;
+ int readtranslate:1;
+ int writetranslate:1;
+ int seekable:1;
+ int telling:1;
+
+ PyObject *decoded_chars; /* buffer for text returned from decoder */
+ Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
+ PyObject *snapshot;
+ /* snapshot is either None, or a tuple (dec_flags, next_input) where
+ * dec_flags is the second (integer) item of the decoder state and
+ * next_input is the chunk of input bytes that comes next after the
+ * snapshot point. We use this to reconstruct decoder states in tell().
+ */
+} PyTextIOWrapperObject;
+
+Py_LOCAL_INLINE(const Py_UNICODE *)
+findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
+{
+ /* like wcschr, but doesn't stop at NULL characters */
+ while (size-- > 0) {
+ if (*s == ch)
+ return s;
+ s++;
+ }
+ return NULL;
+}
+
+static PyObject *
+TextIOWrapper_encoder_get(PyTextIOWrapperObject *self)
+{
+ if (self->encoder == NULL) {
+ self->encoder = PyCodec_IncrementalEncoder(
+ self->encoding, self->errors);
+
+ if (self->encoder == NULL)
+ return NULL;
+ }
+
+ Py_INCREF(self->encoder);
+ return self->encoder;
+}
+
+static PyObject *
+TextIOWrapper_decoder_get(PyTextIOWrapperObject *self)
+{
+ if (self->decoder == NULL) {
+ PyObject *decoder = PyCodec_IncrementalDecoder(
+ self->encoding, self->errors);
+
+ if (decoder == NULL)
+ return NULL;
+
+ if (self->readuniversal) {
+ /* XXX
+ * decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
+ */
+ }
+ }
+
+ Py_INCREF(self->decoder);
+ return self->decoder;
+}
+
+static PyObject *
+TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
+{
+ PyObject *ret;
+ PyObject *text; /* owned reference */
+ PyObject *encoder, *b;
+ Py_ssize_t textlen;
+ int haslf = 0;
+ int needflush = 0;
+
+ if (!PyArg_ParseTuple(args, "U:write", &text)) {
+ return NULL;
+ }
+
+ if (_PyIOBase_checkClosed((PyObject *)self, NULL) == NULL)
+ return NULL;
+
+ Py_INCREF(text);
+
+ textlen = PyUnicode_GetSize(text);
+
+ if (self->writetranslate || self->line_buffering)
+ if (findchar(PyUnicode_AS_UNICODE(text),
+ PyUnicode_GET_SIZE(text), '\n'))
+ haslf = 1;
+
+ if (haslf && self->writetranslate && self->writenl != NULL) {
+ PyObject *newtext = PyObject_CallMethod(
+ text, "replace", "ss", "\n", self->writenl);
+ Py_DECREF(text);
+ if (newtext == NULL)
+ return NULL;
+ text = newtext;
+ }
+
+ if (self->line_buffering &&
+ (haslf ||
+ findchar(PyUnicode_AS_UNICODE(text),
+ PyUnicode_GET_SIZE(text), '\r')))
+ needflush = 1;
+
+ encoder = TextIOWrapper_encoder_get(self);
+ if (!encoder) {
+ Py_DECREF(text);
+ return NULL;
+ }
+ /* XXX What if we were just reading? */
+ b = PyObject_CallMethod(encoder, "encode", "O", text);
+ Py_DECREF(text);
+ Py_DECREF(encoder);
+ if (b == NULL)
+ return NULL;
+
+ ret = PyObject_CallMethod(self->buffer, "write", "O", b);
+ Py_DECREF(b);
+ if (ret == NULL)
+ return NULL;
+ Py_DECREF(ret);
+
+ if (needflush) {
+ ret = PyObject_CallMethod(self->buffer, "flush", NULL);
+ if (ret == NULL)
+ return NULL;
+ Py_DECREF(ret);
+ }
+
+ Py_CLEAR(self->snapshot);
+
+ if (self->decoder) {
+ ret = PyObject_CallMethod(self->decoder, "reset", NULL);
+ if (ret == NULL)
+ return NULL;
+ Py_DECREF(ret);
+ }
+
+ return PyLong_FromSsize_t(textlen);
+}
+
+/* Steal a reference to chars and store it in the decoded_char buffer;
+ */
+static void
+TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars)
+{
+ Py_CLEAR(self->decoded_chars);
+ self->decoded_chars = chars;
+ self->decoded_chars_used = 0;
+}
+
+static PyObject *
+TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
+{
+ PyObject *chars;
+ Py_ssize_t avail = (PyBytes_GET_SIZE(self->decoded_chars)
+ - self->decoded_chars_used);
+
+ if (self->decoded_chars == NULL)
+ return PyUnicode_FromStringAndSize(NULL, 0);
+
+ if (n < 0 || n > avail)
+ n = avail;
+
+ chars = PyBytes_FromStringAndSize(
+ PyBytes_AS_STRING(self->decoded_chars) + self->decoded_chars_used,
+ n);
+ if (chars == NULL)
+ return NULL;
+
+ self->decoded_chars_used += n;
+ return chars;
+}
+
+/* Rewind the _decoded_chars buffer. */
+static int
+TextIOWrapper_rewind_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
+{
+ if (self->decoded_chars_used < n) {
+ PyErr_SetString(PyExc_ValueError,
+ "rewind decoded_chars out of bounds");
+ return -1;
+ }
+ self->decoded_chars_used -= n;
+ return self->decoded_chars_used;
+}
+
+static Py_ssize_t
+TextIOWrapper_decoded_chars_length(PyTextIOWrapperObject *self, Py_ssize_t n)
+{
+ if (self->decoded_chars == NULL)
+ return 0;
+ return PyUnicode_GetSize(self->decoded_chars) - self->decoded_chars_used;
+}
+
+
+/* Read and decode the next chunk of data from the BufferedReader.
+ */
+static int
+TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
+{
+ PyObject *dec_buffer = NULL;
+ PyObject *dec_flags = NULL;
+ PyObject *input_chunk, *decoded_chars;
+ int eof;
+
+ /* The return value is True unless EOF was reached. The decoded string is
+ * placed in self._decoded_chars (replacing its previous value). The
+ * entire input chunk is sent to the decoder, though some of it may remain
+ * buffered in the decoder, yet to be converted.
+ */
+
+ if (self->decoder == NULL) {
+ PyErr_SetString(PyExc_ValueError, "no decoder");
+ return -1;
+ }
+
+ if (self->telling) {
+ /* To prepare for tell(), we need to snapshot a point in the file
+ * where the decoder's input buffer is empty.
+ */
+
+ PyObject *state = PyObject_CallMethod(self->decoder, "getstate", NULL);
+ if (state == NULL)
+ return -1;
+ /* Given this, we know there was a valid snapshot point
+ * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
+ */
+ if (PyArg_Parse(state, "OO", &dec_buffer, &dec_flags) < 0) {
+ Py_DECREF(state);
+ return -1;
+ }
+ Py_DECREF(state);
+ }
+
+ /* Read a chunk, decode it, and put the result in self._decoded_chars. */
+ input_chunk = PyObject_CallMethod(self->buffer, "read1",
+ "n", self->chunk_size);
+ if (input_chunk == NULL)
+ goto fail;
+ assert(PyBytes_Check(input_chunk));
+
+ eof = (PyBytes_Size(input_chunk) == 0);
+
+ decoded_chars = PyObject_CallMethod(self->decoder, "decode",
+ "Oi", input_chunk, eof);
+
+ Py_DECREF(input_chunk);
+ if (decoded_chars == NULL)
+ goto fail;
+ TextIOWrapper_set_decoded_chars(self, decoded_chars);
+
+ if (self->telling) {
+ /* At the snapshot point, len(dec_buffer) bytes before the read, the
+ * next input to be decoded is dec_buffer + input_chunk.
+ */
+ PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
+ if (next_input == NULL)
+ goto fail;
+ Py_DECREF(dec_buffer);
+ Py_CLEAR(self->snapshot);
+ self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
+ }
+
+ return (eof == 0);
+
+ fail:
+ Py_XDECREF(dec_buffer);
+ Py_XDECREF(dec_flags);
+ return -1;
+
+}
+
+static PyObject *
+TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
+{
+ Py_ssize_t n = -1;
+ PyObject *decoder;
+ PyObject *result;
+
+ if (!PyArg_ParseTuple(args, "|n:read", &n))
+ return NULL;
+
+ decoder = TextIOWrapper_decoder_get(self);
+ if (decoder == NULL)
+ return NULL;
+
+ if (n < 0) {
+ /* Read everything */
+ PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
+ PyObject *decoded;
+ if (bytes == NULL)
+ goto fail;
+ decoded = PyObject_CallMethod(decoder, "decode", "Oi", bytes, /*final=*/1);
+
+ if (self->decoded_chars) {
+ result = PyNumber_Add(self->decoded_chars, decoded);
+ if (result == NULL)
+ goto fail;
+ Py_CLEAR(self->decoded_chars);
+ Py_DECREF(decoded);
+ }
+ else {
+ result = decoded;
+ }
+ Py_CLEAR(self->snapshot);
+ Py_DECREF(decoder);
+ return result;
+ }
+ else {
+ int res = 1;
+ PyObject *result = TextIOWrapper_get_decoded_chars(self, -1);
+
+ /* Keep reading chunks until we have n characters to return */
+ while (res == 1) {
+ if (result == NULL || Py_SIZE(result) >= n) {
+ break;
+ }
+ res = TextIOWrapper_read_chunk(self);
+ if (res < 0) {
+ Py_DECREF(result);
+ goto fail;
+ }
+
+ PyUnicode_AppendAndDel(&result,
+ TextIOWrapper_get_decoded_chars(
+ self, n - Py_SIZE(result)));
+ if (result == NULL)
+ goto fail;
+ }
+ Py_DECREF(decoder);
+ return result;
+ }
+ fail:
+ Py_DECREF(decoder);
+ return NULL;
+}
+
+
+static PyObject *
+TextIOWrapper_next(PyTextIOWrapperObject *self, PyObject *args)
+{
+ PyObject *line;
+ self->telling = 0;
+
+ line = PyObject_CallMethod((PyObject *)self, "readline", NULL);
+ if (line == NULL)
+ return NULL;
+
+ if (Py_SIZE(line) == 0) {
+ Py_CLEAR(self->snapshot);
+ self->telling = self->seekable;
+ return NULL;
+ }
+
+ return line;
+}
+
+static PyObject *
+TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args)
+{
+ Py_ssize_t limit = -1;
+ PyObject *line, *decoder;
+ Py_ssize_t start, endpos;
+ int res;
+
+ if (!PyArg_ParseTuple(args, "n:readline", &limit)) {
+ return NULL;
+ }
+
+ if (_PyIOBase_checkClosed((PyObject *)self, NULL) == NULL)
+ return NULL;
+
+ /* Grab all the decoded text (we will rewind any extra bits later). */
+ line = TextIOWrapper_get_decoded_chars(self, -1);
+ if (line == NULL)
+ return NULL;
+
+ start = 0;
+
+ decoder = TextIOWrapper_decoder_get(self);
+ if (decoder == NULL)
+ goto error;
+
+ endpos = -1;
+
+ while (1) {
+ Py_UNICODE* ptr = PyUnicode_AS_UNICODE(line);
+ if (self->readtranslate) {
+ /* Newlines are already translated, only search for \n */
+ Py_UNICODE* pos = Py_UNICODE_strchr(ptr + start, '\n');
+ if (pos != NULL) {
+ endpos = pos - ptr + 1;
+ break;
+ }
+ else
+ start = PyUnicode_GET_SIZE(line);
+ }
+ else if (self->readuniversal) {
+ /* Universal newline search. Find any of \r, \r\n, \n
+ * The decoder ensures that \r\n are not split in two pieces
+ */
+
+ /* In C we'd look for these in parallel of course.
+ * XXX Hey!
+ */
+ Py_UNICODE* nlpos = Py_UNICODE_strchr(ptr + start, '\n');
+ Py_UNICODE* crpos = Py_UNICODE_strchr(ptr + start, '\r');
+ if (crpos == NULL) {
+ if (nlpos == NULL) {
+ /* Nothing found */
+ start = PyUnicode_GET_SIZE(line);
+ }
+ else {
+ /* Found \n */
+ endpos = nlpos - ptr + 1;
+ break;
+ }
+ }
+ else if (nlpos == NULL) {
+ /* Found lone \r */
+ endpos = crpos - ptr + 1;
+ break;
+ }
+ else if (nlpos < crpos) {
+ /* Found \n */
+ endpos = nlpos - ptr + 1;
+ break;
+ }
+ else if (nlpos == crpos + 1) {
+ /* Found \r\n */
+ endpos = crpos - ptr + 2;
+ break;
+ }
+ else {
+ /* Found \r */
+ endpos = crpos - ptr + 1;
+ break;
+ }
+ }
+ else {
+ /* non-universal */
+ Py_ssize_t pos = PyUnicode_Find(line, self->readnl,
+ start, -1, 1);
+ if (pos >= 0) {
+ endpos = pos + PyUnicode_GET_SIZE(self->readnl);
+ break;
+ }
+ }
+
+ if (limit >= 0 && PyUnicode_GET_SIZE(line) >= limit) {
+ /* reached length limit */
+ endpos = limit;
+ break;
+ }
+
+ /* No line ending seen yet - get more data */
+ while (1) {
+ res = TextIOWrapper_read_chunk(self);
+ if (res < 0)
+ goto error;
+ if (res == 0)
+ break;
+ if (self->decoded_chars &&
+ PyUnicode_GET_SIZE(self->decoded_chars))
+ break;
+ }
+ if (res == 0) {
+ /* end of file */
+ TextIOWrapper_set_decoded_chars(self, NULL);
+ Py_CLEAR(self->snapshot);
+ return line;
+ }
+ else {
+ PyUnicode_AppendAndDel(&line,
+ TextIOWrapper_get_decoded_chars(
+ self, -1));
+ if (line == NULL)
+ goto error;
+ }
+ }
+
+ if (limit >= 0 && endpos > limit)
+ endpos = limit; /* don't exceed limit */
+
+ /* Rewind decoded_chars to just after the line ending we found. */
+ TextIOWrapper_rewind_decoded_chars(
+ self, PyUnicode_GET_SIZE(line) - endpos);
+ if (PyUnicode_Resize(&line, endpos) < 0)
+ goto error;
+ return line;
+
+ error:
+ Py_DECREF(line);
+ return NULL;
+}
Modified: sandbox/trunk/io-c/build.py
==============================================================================
--- sandbox/trunk/io-c/build.py	(original)
+++ sandbox/trunk/io-c/build.py	Tue Dec 16 23:26:10 2008
@@ -3,7 +3,7 @@
 from distutils.command.build_ext import build_ext
 
 def compile():
- sources = ['io.c', '_iobase.c', '_bufferedio.c',
+ sources = ['io.c', '_iobase.c', '_bufferedio.c', '_textio.c',
 '_fileio.c', '_bytesio.c']
 sources = [os.path.join(os.path.dirname(__file__), s) for s in sources]
 io_ext = Extension('_io', sources)
Modified: sandbox/trunk/io-c/io.c
==============================================================================
--- sandbox/trunk/io-c/io.c	(original)
+++ sandbox/trunk/io-c/io.c	Tue Dec 16 23:26:10 2008
@@ -65,8 +65,8 @@
 if (baseargs == NULL)
 return -1;
 
- if (((PyTypeObject*)PyExc_IOError)->tp_init(
- (PyObject*)self, baseargs, kwds) == -1) {
+ if (((PyTypeObject *)PyExc_IOError)->tp_init(
+ (PyObject *)self, baseargs, kwds) == -1) {
 Py_DECREF(baseargs);
 return -1;
 }
@@ -144,14 +144,13 @@
 * The main open() function
 */
 PyDoc_STRVAR(open_doc,
-"Open file and return a stream. If the file cannot be opened, an IOError is\n"
-"raised.\n"
+"Open file and return a stream. Raise IOError upon failure.\n"
 "\n"
-"file is either a string giving the name (and the path if the file\n"
-"isn't in the current working directory) of the file to be opened or an\n"
-"integer file descriptor of the file to be wrapped. (If a file\n"
-"descriptor is given, it is closed when the returned I/O object is\n"
-"closed, unless closefd is set to False.)\n"
+"file is either a text or byte string giving the name (and the path\n"
+"if the file isn't in the current working directory) of the file to\n"
+"be opened or an integer file descriptor of the file to be\n"
+"wrapped. (If a file descriptor is given, it is closed when the\n"
+"returned I/O object is closed, unless closefd is set to False.)\n"
 "\n"
 "mode is an optional string that specifies the mode in which the file\n"
 "is opened. It defaults to 'r' which means open for reading in text\n"
@@ -244,7 +243,7 @@
 );
 
 static PyObject *
-io_open(PyObject* self, PyObject *args, PyObject *kwds)
+io_open(PyObject *self, PyObject *args, PyObject *kwds)
 {
 char *kwlist[] = {"file", "mode", "buffering",
 "encoding", "errors", "newline",
@@ -271,7 +270,9 @@
 return NULL;
 }
 
- if (!PyUnicode_Check(file) && !PyNumber_Check(file)) {
+ if (!PyUnicode_Check(file) &&
+	!PyBytes_Check(file) &&
+	!PyNumber_Check(file)) {
 PyErr_Format(PyExc_TypeError, "invalid file: %R", file);
 return NULL;
 }
@@ -429,10 +430,6 @@
 goto error;
 }
 
- if (PyObject_SetAttrString(raw, "_name", file) < 0)
- goto error;
- if (PyObject_SetAttrString(raw, "_mode", modeobj) < 0)
- goto error;
 Py_DECREF(modeobj);
 return raw;
 }
@@ -469,10 +466,6 @@
 
 /* if binary, returns the buffered file */
 if (binary) {
- if (PyObject_SetAttrString(buffer, "_name", file) < 0)
- goto error;
- if (PyObject_SetAttrString(buffer, "_mode", modeobj) < 0)
- goto error;
 Py_DECREF(modeobj);
 return buffer;
 }
@@ -495,8 +488,6 @@
 if (wrapper == NULL)
 goto error;
 
- if (PyObject_SetAttrString(wrapper, "name", file) < 0)
- goto error;
 if (PyObject_SetAttrString(wrapper, "mode", modeobj) < 0)
 goto error;
 Py_DECREF(modeobj);
@@ -546,7 +537,7 @@
 
 /* UnsupportedOperation inherits from ValueError and IOError */
 PyIOExc_UnsupportedOperation = PyObject_CallFunction(
- (PyObject*)&PyType_Type, "s(OO){}",
+ (PyObject *)&PyType_Type, "s(OO){}",
 "UnsupportedOperation", PyExc_ValueError, PyExc_IOError);
 if (PyIOExc_UnsupportedOperation == NULL)
 goto fail;
@@ -554,7 +545,7 @@
 PyIOExc_UnsupportedOperation);
 
 /* BlockingIOError */
- base = (PyTypeObject*)PyExc_IOError;
+ base = (PyTypeObject *)PyExc_IOError;
 _PyExc_BlockingIOError.tp_base = base;
 if (PyType_Ready(&_PyExc_BlockingIOError) < 0)
 goto fail;
Modified: sandbox/trunk/io-c/io.py
==============================================================================
--- sandbox/trunk/io-c/io.py	(original)
+++ sandbox/trunk/io-c/io.py	Tue Dec 16 23:26:10 2008
@@ -82,14 +82,13 @@
 def unused_open(file, mode="r", buffering=None, encoding=None, errors=None,
 newline=None, closefd=True):
 
- r"""Open file and return a stream. If the file cannot be opened, an IOError is
- raised.
+ r"""Open file and return a stream. Raise IOError upon failure.
 
- file is either a string giving the name (and the path if the file
- isn't in the current working directory) of the file to be opened or an
- integer file descriptor of the file to be wrapped. (If a file
- descriptor is given, it is closed when the returned I/O object is
- closed, unless closefd is set to False.)
+ file is either a text or byte string giving the name (and the path
+ if the file isn't in the current working directory) of the file to
+ be opened or an integer file descriptor of the file to be
+ wrapped. (If a file descriptor is given, it is closed when the
+ returned I/O object is closed, unless closefd is set to False.)
 
 mode is an optional string that specifies the mode in which the file
 is opened. It defaults to 'r' which means open for reading in text
@@ -180,7 +179,7 @@
 opened in a text mode, and for bytes a BytesIO can be used like a file
 opened in a binary mode.
 """
- if not isinstance(file, (str, int)):
+ if not isinstance(file, (str, bytes, int)):
 raise TypeError("invalid file: %r" % file)
 if not isinstance(mode, str):
 raise TypeError("invalid mode: %r" % mode)
@@ -240,8 +239,6 @@
 raise ValueError("invalid buffering size")
 if buffering == 0:
 if binary:
- raw._name = file
- raw._mode = mode
 return raw
 raise ValueError("can't have unbuffered text I/O")
 if updating:
@@ -253,11 +250,8 @@
 else:
 raise ValueError("unknown mode: %r" % mode)
 if binary:
- buffer.name = file
- buffer.mode = mode
 return buffer
 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
- text.name = file
 text.mode = mode
 return text
 import _io
@@ -383,7 +377,6 @@
 except IOError:
 pass # If flush() fails, just give up
 self.__closed = True
- print("__closed")
 
 def __del__(self) -> None:
 """Destructor. Calls close()."""
@@ -625,6 +618,10 @@
 # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
 # to do since _fileio.c is written in C).
 
+ def __init__(self, name, mode="r", closefd=True):
+ _fileio._FileIO.__init__(self, name, mode, closefd)
+ self._name = name
+
 def close(self):
 _fileio._FileIO.close(self)
 RawIOBase.close(self)
@@ -633,10 +630,6 @@
 def name(self):
 return self._name
 
- # XXX(gb): _FileIO already has a mode property
- @property
- def mode(self):
- return self._mode
 FileIO = _io.FileIO
 
 
@@ -773,6 +766,14 @@
 def closed(self):
 return self.raw.closed
 
+ @property
+ def name(self):
+ return self.raw.name
+
+ @property
+ def mode(self):
+ return self.raw.mode
+
 ### Lower-level APIs ###
 
 def fileno(self):
@@ -1479,6 +1480,10 @@
 def closed(self):
 return self.buffer.closed
 
+ @property
+ def name(self):
+ return self.buffer.name
+
 def fileno(self):
 return self.buffer.fileno()
 


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /