[Python-checkins] r72170 - in python/branches/pep-0383: Include/unicodeobject.h Modules/_io/fileio.c Modules/posixmodule.c Objects/unicodeobject.c Python/codecs.c

martin.v.loewis python-checkins at python.org
Fri May 1 21:38:21 CEST 2009


Author: martin.v.loewis
Date: Fri May 1 21:38:20 2009
New Revision: 72170
Log:
Implement PEP 383 for file IO.
Modified:
 python/branches/pep-0383/Include/unicodeobject.h
 python/branches/pep-0383/Modules/_io/fileio.c
 python/branches/pep-0383/Modules/posixmodule.c
 python/branches/pep-0383/Objects/unicodeobject.c
 python/branches/pep-0383/Python/codecs.c
Modified: python/branches/pep-0383/Include/unicodeobject.h
==============================================================================
--- python/branches/pep-0383/Include/unicodeobject.h	(original)
+++ python/branches/pep-0383/Include/unicodeobject.h	Fri May 1 21:38:20 2009
@@ -198,6 +198,7 @@
 # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
+# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@@ -296,6 +297,7 @@
 # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
+# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
@@ -693,25 +695,6 @@
 PyObject *unicode,
 const char *errors);
 
-/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
-
- If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
- UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
- invalid characters with '?'.
-
- The function is intended to be used for paths and file names only
- during bootstrapping process where the codecs are not set up.
-*/
-
-PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
- const char *s /* encoded string */
- );
-
-PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
- const char *s, /* encoded string */
- Py_ssize_t size /* size */
- );
-
 /* Returns a pointer to the default encoding (normally, UTF-8) of the
 Unicode object unicode and the size of the encoded representation
 in bytes stored in *size.
@@ -1254,6 +1237,33 @@
 const char *errors		/* error handling */
 );
 
+/* --- File system encoding ---------------------------------------------- */
+
+/* ParseTuple converter which converts a Unicode object into the file
+ system encoding, using the PEP 383 error handler; bytes objects are
+ output as-is. */
+
+PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
+
+/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
+
+ If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
+ UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
+ invalid characters with '?'.
+
+ The function is intended to be used for paths and file names only
+ during bootstrapping process where the codecs are not set up.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+ const char *s /* encoded string */
+ );
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
+ const char *s, /* encoded string */
+ Py_ssize_t size /* size */
+ );
+
 /* --- Methods & Slots ----------------------------------------------------
 
 These are capable of handling Unicode objects and strings on input
Modified: python/branches/pep-0383/Modules/_io/fileio.c
==============================================================================
--- python/branches/pep-0383/Modules/_io/fileio.c	(original)
+++ python/branches/pep-0383/Modules/_io/fileio.c	Fri May 1 21:38:20 2009
@@ -245,7 +245,7 @@
 				return -1;
 
 			stringobj = PyUnicode_AsEncodedString(
-				u, Py_FileSystemDefaultEncoding, NULL);
+				u, Py_FileSystemDefaultEncoding, "utf8b");
 			Py_DECREF(u);
 			if (stringobj == NULL)
 				return -1;
Modified: python/branches/pep-0383/Modules/posixmodule.c
==============================================================================
--- python/branches/pep-0383/Modules/posixmodule.c	(original)
+++ python/branches/pep-0383/Modules/posixmodule.c	Fri May 1 21:38:20 2009
@@ -534,6 +534,36 @@
 	return d;
 }
 
+/* Convert a bytes object to a char*. Optionally lock the buffer if it is a
+ bytes array. */
+
+static char*
+bytes2str(PyObject* o, int lock)
+{
+	if(PyBytes_Check(o))
+		return PyBytes_AsString(o);
+	else if(PyByteArray_Check(o)) {
+		if (lock && o->ob_type->tp_as_buffer->bf_getbuffer(o, NULL, 0) < 0)
+			/* On a bytearray, this should not fail. */
+			PyErr_BadInternalCall();
+		return PyByteArray_AsString(o);
+	} else {
+		/* The FS converter should have verified that this
+		 is either bytes or bytearray. */
+		PyErr_BadInternalCall();
+		return NULL;
+	}
+}
+
+/* Release the lock, decref the object. */
+static void
+release_bytes(PyObject* o)
+{
+	if (PyByteArray_Check(o))
+		o->ob_type->tp_as_buffer->bf_releasebuffer(NULL, 0);
+	Py_DECREF(o);
+}
+
 
 /* Set a POSIX-specific error from errno, and return NULL */
 
@@ -558,10 +588,11 @@
 
 
 static PyObject *
-posix_error_with_allocated_filename(char* name)
+posix_error_with_allocated_filename(PyObject* name)
 {
-	PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, name);
-	PyMem_Free(name);
+	PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, 
+						 bytes2str(name, 0));
+	release_bytes(name);
 	return rc;
 }
 
@@ -728,17 +759,19 @@
 static PyObject *
 posix_1str(PyObject *args, char *format, int (*func)(const char*))
 {
-	char *path1 = NULL;
+	PyObject *opath1 = NULL;
+	char *path1;
 	int res;
 	if (!PyArg_ParseTuple(args, format,
-	 Py_FileSystemDefaultEncoding, &path1))
+	 PyUnicode_FSConverter, &opath1))
 		return NULL;
+	path1 = bytes2str(opath1, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = (*func)(path1);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path1);
-	PyMem_Free(path1);
+		return posix_error_with_allocated_filename(opath1);
+	release_bytes(opath1);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -748,17 +781,20 @@
 	 char *format,
 	 int (*func)(const char *, const char *))
 {
-	char *path1 = NULL, *path2 = NULL;
+	PyObject *opath1, *opath2;
+	char *path1, *path2;
 	int res;
 	if (!PyArg_ParseTuple(args, format,
-	 Py_FileSystemDefaultEncoding, &path1,
-	 Py_FileSystemDefaultEncoding, &path2))
+	 PyUnicode_FSConverter, &opath1,
+	 PyUnicode_FSConverter, &opath2))
 		return NULL;
+	path1 = bytes2str(opath1, 1);
+	path2 = bytes2str(opath2, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = (*func)(path1, path2);
 	Py_END_ALLOW_THREADS
-	PyMem_Free(path1);
-	PyMem_Free(path2);
+	release_bytes(opath1);
+	release_bytes(opath2);
 	if (res != 0)
 		/* XXX how to report both path1 and path2??? */
 		return posix_error();
@@ -1560,8 +1596,8 @@
 	 int (*wstatfunc)(const Py_UNICODE *, STRUCT_STAT *))
 {
 	STRUCT_STAT st;
-	char *path = NULL;	/* pass this to stat; do not free() it */
-	char *pathfree = NULL; /* this memory must be free'd */
+	PyObject *opath;
+	char *path;
 	int res;
 	PyObject *result;
 
@@ -1590,25 +1626,24 @@
 #endif
 
 	if (!PyArg_ParseTuple(args, format,
-	 Py_FileSystemDefaultEncoding, &path))
+	 PyUnicode_FSConverter, &opath))
 		return NULL;
-	pathfree = path;
-
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = (*statfunc)(path, &st);
 	Py_END_ALLOW_THREADS
 
 	if (res != 0) {
 #ifdef MS_WINDOWS
-		result = win32_error("stat", pathfree);
+		result = win32_error("stat", path);
 #else
-		result = posix_error_with_filename(pathfree);
+		result = posix_error_with_filename(path);
 #endif
 	} 
 	else
 		result = _pystat_fromstructstat(&st);
 
-	PyMem_Free(pathfree);
+	release_bytes(opath);
 	return result;
 }
 
@@ -1625,6 +1660,7 @@
 static PyObject *
 posix_access(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	int mode;
 	
@@ -1644,13 +1680,14 @@
 		 are also valid. */
 		PyErr_Clear();
 	}
-	if (!PyArg_ParseTuple(args, "eti:access",
-			 Py_FileSystemDefaultEncoding, &path, &mode))
+	if (!PyArg_ParseTuple(args, "O&i:access",
+			 PyUnicode_FSConverter, &opath, &mode))
 		return 0;
+	path = bytes2str(opath);
 	Py_BEGIN_ALLOW_THREADS
 	attr = GetFileAttributesA(path);
 	Py_END_ALLOW_THREADS
-	PyMem_Free(path);
+	release_bytes(opath);
 finish:
 	if (attr == 0xFFFFFFFF)
 		/* File does not exist, or cannot read attributes */
@@ -1663,13 +1700,14 @@
 			 || (attr & FILE_ATTRIBUTE_DIRECTORY));
 #else
 	int res;
-	if (!PyArg_ParseTuple(args, "eti:access", 
-			 Py_FileSystemDefaultEncoding, &path, &mode))
+	if (!PyArg_ParseTuple(args, "O&i:access", 
+			 PyUnicode_FSConverter, &opath, &mode))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = access(path, mode);
 	Py_END_ALLOW_THREADS
-	PyMem_Free(path);
+	release_bytes(opath);
 	return PyBool_FromLong(res == 0);
 #endif
 }
@@ -1750,11 +1788,11 @@
 #ifdef MS_WINDOWS
 	return win32_1str(args, "chdir", "y:chdir", win32_chdir, "U:chdir", win32_wchdir);
 #elif defined(PYOS_OS2) && defined(PYCC_GCC)
-	return posix_1str(args, "et:chdir", _chdir2);
+	return posix_1str(args, "O&:chdir", _chdir2);
 #elif defined(__VMS)
-	return posix_1str(args, "et:chdir", (int (*)(const char *))chdir);
+	return posix_1str(args, "O&:chdir", (int (*)(const char *))chdir);
 #else
-	return posix_1str(args, "et:chdir", chdir);
+	return posix_1str(args, "O&:chdir", chdir);
 #endif
 }
 
@@ -1779,6 +1817,7 @@
 static PyObject *
 posix_chmod(PyObject *self, PyObject *args)
 {
+	PyObject *opath = NULL;
 	char *path = NULL;
 	int i;
 	int res;
@@ -1809,9 +1848,10 @@
 		 are also valid. */
 		PyErr_Clear();
 	}
-	if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
-	 &path, &i))
+	if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
+	 &opath, &i))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	attr = GetFileAttributesA(path);
 	if (attr != 0xFFFFFFFF) {
@@ -1826,22 +1866,23 @@
 	Py_END_ALLOW_THREADS
 	if (!res) {
 		win32_error("chmod", path);
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
-	PyMem_Free(path);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #else /* Py_WIN_WIDE_FILENAMES */
-	if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
-	 &path, &i))
+	if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
+	 &opath, &i))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = chmod(path, i);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #endif
@@ -1877,18 +1918,20 @@
 static PyObject *
 posix_lchmod(PyObject *self, PyObject *args)
 {
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	int i;
 	int res;
-	if (!PyArg_ParseTuple(args, "eti:lchmod", Py_FileSystemDefaultEncoding,
-	 &path, &i))
+	if (!PyArg_ParseTuple(args, "O&i:lchmod", PyUnicode_FSConverter,
+	 &opath, &i))
 		return NULL;
+	path = bytes2str(opath, 1)
 	Py_BEGIN_ALLOW_THREADS
 	res = lchmod(path, i);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_RETURN_NONE;
 }
 #endif /* HAVE_LCHMOD */
@@ -1902,18 +1945,20 @@
 static PyObject *
 posix_chflags(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	unsigned long flags;
 	int res;
-	if (!PyArg_ParseTuple(args, "etk:chflags",
-			 Py_FileSystemDefaultEncoding, &path, &flags))
+	if (!PyArg_ParseTuple(args, "O&k:chflags",
+			 PyUnicode_FSConverter, &opath, &flags))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = chflags(path, flags);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -1928,18 +1973,20 @@
 static PyObject *
 posix_lchflags(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	unsigned long flags;
 	int res;
-	if (!PyArg_ParseTuple(args, "etk:lchflags",
-			 Py_FileSystemDefaultEncoding, &path, &flags))
+	if (!PyArg_ParseTuple(args, "O&k:lchflags",
+			 PyUnicode_FSConverter, &path, &flags))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = lchflags(path, flags);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -1953,7 +2000,7 @@
 static PyObject *
 posix_chroot(PyObject *self, PyObject *args)
 {
-	return posix_1str(args, "et:chroot", chroot);
+	return posix_1str(args, "O&:chroot", chroot);
 }
 #endif
 
@@ -1996,19 +2043,21 @@
 static PyObject *
 posix_chown(PyObject *self, PyObject *args)
 {
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	long uid, gid;
 	int res;
-	if (!PyArg_ParseTuple(args, "etll:chown",
-	 Py_FileSystemDefaultEncoding, &path,
+	if (!PyArg_ParseTuple(args, "O&ll:chown",
+	 PyUnicode_FSConverter, &opath,
 	 &uid, &gid))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = chown(path, (uid_t) uid, (gid_t) gid);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -2045,19 +2094,21 @@
 static PyObject *
 posix_lchown(PyObject *self, PyObject *args)
 {
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	int uid, gid;
 	int res;
-	if (!PyArg_ParseTuple(args, "etii:lchown",
-	 Py_FileSystemDefaultEncoding, &path,
+	if (!PyArg_ParseTuple(args, "O&ii:lchown",
+	 PyUnicode_FSConverter, &opath,
 	 &uid, &gid))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	res = lchown(path, (uid_t) uid, (gid_t) gid);
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -2113,7 +2164,7 @@
 		return posix_error();
 	if (use_bytes)
 		return PyBytes_FromStringAndSize(buf, strlen(buf));
-	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict");
+	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
 }
 
 PyDoc_STRVAR(posix_getcwd__doc__,
@@ -2146,7 +2197,7 @@
 static PyObject *
 posix_link(PyObject *self, PyObject *args)
 {
-	return posix_2str(args, "etet:link", link);
+	return posix_2str(args, "O&O&:link", link);
 }
 #endif /* HAVE_LINK */
 
@@ -2259,9 +2310,16 @@
 	}
 #endif
 
-	if (!PyArg_ParseTuple(args, "et#:listdir",
-	 Py_FileSystemDefaultEncoding, &bufptr, &len))
+	if (!PyArg_ParseTuple(args, "O&:listdir",
+	 PyUnicode_FSConverter, &opath))
 		return NULL;
+	if (PyObject_Size(opath)+1 > MAX_PATH) {
+		PyErr_SetString(PyExc_ValueError, "path too long");
+		Py_DECREF(opath);
+		return NULL;
+	}
+	strcpy(namebuf, bytes2str(opath, 0));
+	len = PyObject_Size(opath);
 	if (len > 0) {
 		char ch = namebuf[len-1];
 		if (ch != SEP && ch != ALTSEP && ch != ':')
@@ -2323,6 +2381,7 @@
 #ifndef MAX_PATH
 #define MAX_PATH CCHMAXPATH
 #endif
+ PyObject *oname;
 char *name, *pt;
 Py_ssize_t len;
 PyObject *d, *v;
@@ -2332,11 +2391,13 @@
 FILEFINDBUF3 ep;
 APIRET rc;
 
- if (!PyArg_ParseTuple(args, "et#:listdir", 
- Py_FileSystemDefaultEncoding, &name, &len))
+ if (!PyArg_ParseTuple(args, "O&:listdir", 
+ PyUnicode_FSConverter, &oname))
 return NULL;
+ name = bytes2str(oname);
+ len = PyObject_Size(oname);
 if (len >= MAX_PATH) {
- PyMem_Free(name);
+ release_bytes(oname);
 PyErr_SetString(PyExc_ValueError, "path too long");
 return NULL;
 }
@@ -2349,7 +2410,7 @@
 strcpy(namebuf + len, "*.*");
 
 if ((d = PyList_New(0)) == NULL) {
- PyMem_Free(name);
+ release_bytes(oname);
 return NULL;
 }
 
@@ -2362,7 +2423,7 @@
 
 if (rc != NO_ERROR) {
 errno = ENOENT;
- return posix_error_with_allocated_filename(name);
+ return posix_error_with_allocated_filename(oname);
 }
 
 if (srchcnt > 0) { /* If Directory is NOT Totally Empty, */
@@ -2392,11 +2453,11 @@
 } while (DosFindNext(hdir, &ep, sizeof(ep), &srchcnt) == NO_ERROR && srchcnt > 0);
 }
 
- PyMem_Free(name);
+ release_bytes(oname);
 return d;
 #else
-
-	char *name = NULL;
+	PyObject *oname;
+	char *name;
 	PyObject *d, *v;
 	DIR *dirp;
 	struct dirent *ep;
@@ -2407,14 +2468,15 @@
 		arg_is_unicode = 0;
 		PyErr_Clear();
 	}
-	if (!PyArg_ParseTuple(args, "et:listdir", Py_FileSystemDefaultEncoding, &name))
+	if (!PyArg_ParseTuple(args, "O&:listdir", PyUnicode_FSConverter, &oname))
 		return NULL;
+	name = bytes2str(oname, 1);
 	if ((dirp = opendir(name)) == NULL) {
-		return posix_error_with_allocated_filename(name);
+		return posix_error_with_allocated_filename(oname);
 	}
 	if ((d = PyList_New(0)) == NULL) {
 		closedir(dirp);
-		PyMem_Free(name);
+		release_bytes(oname);
 		return NULL;
 	}
 	for (;;) {
@@ -2428,7 +2490,7 @@
 			} else {
 				closedir(dirp);
 				Py_DECREF(d);
-				return posix_error_with_allocated_filename(name);
+				return posix_error_with_allocated_filename(oname);
 			}
 		}
 		if (ep->d_name[0] == '.' &&
@@ -2446,7 +2508,7 @@
 
 			w = PyUnicode_FromEncodedObject(v,
 					Py_FileSystemDefaultEncoding,
-					"strict");
+					"utf8b");
 			if (w != NULL) {
 				Py_DECREF(v);
 				v = w;
@@ -2469,7 +2531,7 @@
 		Py_DECREF(v);
 	}
 	closedir(dirp);
-	PyMem_Free(name);
+	release_bytes(oname);
 
 	return d;
 
@@ -2481,10 +2543,8 @@
 static PyObject *
 posix__getfullpathname(PyObject *self, PyObject *args)
 {
-	/* assume encoded strings won't more than double no of chars */
-	char inbuf[MAX_PATH*2];
-	char *inbufp = inbuf;
-	Py_ssize_t insize = sizeof(inbuf);
+	PyObject *opath;
+	char *path;
 	char outbuf[MAX_PATH*2];
 	char *temp;
 #ifdef Py_WIN_WIDE_FILENAMES
@@ -2518,13 +2578,17 @@
 		PyErr_Clear();
 	}
 #endif
-	if (!PyArg_ParseTuple (args, "et#:_getfullpathname",
-	 Py_FileSystemDefaultEncoding, &inbufp,
-	 &insize))
-		return NULL;
-	if (!GetFullPathName(inbuf, sizeof(outbuf)/sizeof(outbuf[0]),
-	 outbuf, &temp))
-		return win32_error("GetFullPathName", inbuf);
+	if (!PyArg_ParseTuple (args, "O&:_getfullpathname",
+	 PyUnicode_FSConverter, &opath))
+		return NULL;
+	path = bytes2str(opath, 1);
+	if (!GetFullPathName(path, sizeof(outbuf)/sizeof(outbuf[0]),
+	 outbuf, &temp)) {
+		win32_error("GetFullPathName", path);
+		release_bytes(opath);
+		return NULL;
+	}
+	release_bytes(path);
 	if (PyUnicode_Check(PyTuple_GetItem(args, 0))) {
 		return PyUnicode_Decode(outbuf, strlen(outbuf),
 			Py_FileSystemDefaultEncoding, NULL);
@@ -2541,7 +2605,8 @@
 posix_mkdir(PyObject *self, PyObject *args)
 {
 	int res;
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	int mode = 0777;
 
 #ifdef Py_WIN_WIDE_FILENAMES
@@ -2562,9 +2627,10 @@
 		 are also valid. */
 		PyErr_Clear();
 	}
-	if (!PyArg_ParseTuple(args, "et|i:mkdir",
-	 Py_FileSystemDefaultEncoding, &path, &mode))
+	if (!PyArg_ParseTuple(args, "O&|i:mkdir",
+	 PyUnicode_FSConverter, &opath, &mode))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 	/* PyUnicode_AS_UNICODE OK without thread lock as
 	 it is a simple dereference. */
@@ -2572,17 +2638,18 @@
 	Py_END_ALLOW_THREADS
 	if (!res) {
 		win32_error("mkdir", path);
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
-	PyMem_Free(path);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #else
 
-	if (!PyArg_ParseTuple(args, "et|i:mkdir",
-	 Py_FileSystemDefaultEncoding, &path, &mode))
+	if (!PyArg_ParseTuple(args, "O&|i:mkdir",
+	 PyUnicode_FSConverter, &opath, &mode))
 		return NULL;
+	path = bytes2str(opath, 1);
 	Py_BEGIN_ALLOW_THREADS
 #if ( defined(__WATCOMC__) || defined(PYCC_VACPP) ) && !defined(__QNX__)
 	res = mkdir(path);
@@ -2591,8 +2658,8 @@
 #endif
 	Py_END_ALLOW_THREADS
 	if (res < 0)
-		return posix_error_with_allocated_filename(path);
-	PyMem_Free(path);
+		return posix_error_with_allocated_filename(opath);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #endif
@@ -2684,7 +2751,7 @@
 	Py_INCREF(Py_None);
 	return Py_None;
 #else
-	return posix_2str(args, "etet:rename", rename);
+	return posix_2str(args, "O&O&:rename", rename);
 #endif
 }
 
@@ -2699,7 +2766,7 @@
 #ifdef MS_WINDOWS
 	return win32_1str(args, "rmdir", "y:rmdir", RemoveDirectoryA, "U:rmdir", RemoveDirectoryW);
 #else
-	return posix_1str(args, "et:rmdir", rmdir);
+	return posix_1str(args, "O&:rmdir", rmdir);
 #endif
 }
 
@@ -2712,9 +2779,9 @@
 posix_stat(PyObject *self, PyObject *args)
 {
 #ifdef MS_WINDOWS
-	return posix_do_stat(self, args, "et:stat", STAT, "U:stat", win32_wstat);
+	return posix_do_stat(self, args, "O&:stat", STAT, "U:stat", win32_wstat);
 #else
-	return posix_do_stat(self, args, "et:stat", STAT, NULL, NULL);
+	return posix_do_stat(self, args, "O&:stat", STAT, NULL, NULL);
 #endif
 }
 
@@ -2780,7 +2847,7 @@
 #ifdef MS_WINDOWS
 	return win32_1str(args, "remove", "y:remove", DeleteFileA, "U:remove", DeleteFileW);
 #else
-	return posix_1str(args, "et:remove", unlink);
+	return posix_1str(args, "O&:remove", unlink);
 #endif
 }
 
@@ -2852,7 +2919,8 @@
 	PyObject *arg;
 	PyUnicodeObject *obwpath;
 	wchar_t *wpath = NULL;
-	char *apath = NULL;
+	PyObject *oapath;
+	char *apath;
 	HANDLE hFile;
 	long atimesec, mtimesec, ausec, musec;
 	FILETIME atime, mtime;
@@ -2874,9 +2942,10 @@
 			PyErr_Clear();
 	}
 	if (!wpath) {
-		if (!PyArg_ParseTuple(args, "etO:utime",
-				Py_FileSystemDefaultEncoding, &apath, &arg))
+		if (!PyArg_ParseTuple(args, "O&O:utime",
+				PyUnicode_FSConverter, &oapath, &arg))
 			return NULL;
+		apath = bytes2str(oapath);
 		Py_BEGIN_ALLOW_THREADS
 		hFile = CreateFileA(apath, FILE_WRITE_ATTRIBUTES, 0,
 				 NULL, OPEN_EXISTING,
@@ -2884,10 +2953,10 @@
 		Py_END_ALLOW_THREADS
 		if (hFile == INVALID_HANDLE_VALUE) {
 			win32_error("utime", apath);
-			PyMem_Free(apath);
+			release(oapath);
 			return NULL;
 		}
-		PyMem_Free(apath);
+		release_bytes(oapath);
 	}
 	
 	if (arg == Py_None) {
@@ -2928,7 +2997,8 @@
 	return result;
 #else /* Py_WIN_WIDE_FILENAMES */
 
-	char *path = NULL;
+	PyObject *opath;
+	char *path;
 	long atime, mtime, ausec, musec;
 	int res;
 	PyObject* arg;
@@ -2951,9 +3021,10 @@
 #endif /* HAVE_UTIMES */
 
 
-	if (!PyArg_ParseTuple(args, "etO:utime",
-				 Py_FileSystemDefaultEncoding, &path, &arg))
+	if (!PyArg_ParseTuple(args, "O&O:utime",
+				 PyUnicode_FSConverter, &opath, &arg))
 		return NULL;
+	path = bytes2str(opath, 1);
 	if (arg == Py_None) {
 		/* optional time values not given */
 		Py_BEGIN_ALLOW_THREADS
@@ -2963,18 +3034,18 @@
 	else if (!PyTuple_Check(arg) || PyTuple_Size(arg) != 2) {
 		PyErr_SetString(PyExc_TypeError,
 				"utime() arg 2 must be a tuple (atime, mtime)");
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
 	else {
 		if (extract_time(PyTuple_GET_ITEM(arg, 0),
 				 &atime, &ausec) == -1) {
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 		}
 		if (extract_time(PyTuple_GET_ITEM(arg, 1),
 				 &mtime, &musec) == -1) {
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 		}
 		ATIME = atime;
@@ -2992,9 +3063,9 @@
 #endif /* HAVE_UTIMES */
 	}
 	if (res < 0) {
-		return posix_error_with_allocated_filename(path);
+		return posix_error_with_allocated_filename(opath);
 	}
-	PyMem_Free(path);
+	release_bytes(opath);
 	Py_INCREF(Py_None);
 	return Py_None;
 #undef UTIME_ARG
@@ -3029,6 +3100,22 @@
 		PyMem_Free(array[i]);
 	PyMem_DEL(array);
 }
+
+int fsconvert_strdup(PyObject *o, char**out)
+{
+	PyObject *bytes;
+	Py_ssize_t size;
+	if (!PyUnicode_FSConverter(o, &bytes))
+		return 0;
+	size = PyObject_Size(bytes);
+	*out = PyMem_Malloc(size+1);
+	if (!*out)
+		return 0;
+	/* Don't lock bytes, as we hold the GIL */
+	memcpy(*out, bytes2str(bytes, 0), size+1);
+	Py_DECREF(bytes);
+	return 1;
+}
 #endif
 
 
@@ -3043,6 +3130,7 @@
 static PyObject *
 posix_execv(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv;
 	char **argvlist;
@@ -3052,10 +3140,11 @@
 	/* execv has two arguments: (path, argv), where
 	 argv is a list or tuple of strings. */
 
-	if (!PyArg_ParseTuple(args, "etO:execv",
- Py_FileSystemDefaultEncoding,
- &path, &argv))
+	if (!PyArg_ParseTuple(args, "O&O:execv",
+ PyUnicode_FSConverter,
+ &opath, &argv))
 		return NULL;
+	path = bytes2str(opath, 1);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3066,28 +3155,27 @@
 	}
 	else {
 		PyErr_SetString(PyExc_TypeError, "execv() arg 2 must be a tuple or list");
- PyMem_Free(path);
+ release_bytes(opath);
 		return NULL;
 	}
 	if (argc < 1) {
 		PyErr_SetString(PyExc_ValueError, "execv() arg 2 must not be empty");
- PyMem_Free(path);
+ release_bytes(opath);
 		return NULL;
 	}
 
 	argvlist = PyMem_NEW(char *, argc+1);
 	if (argvlist == NULL) {
-		PyMem_Free(path);
+		release_bytes(opath);
 		return PyErr_NoMemory();
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i), "et",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i])) {
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				 &argvlist[i])) {
 			free_string_array(argvlist, i);
 			PyErr_SetString(PyExc_TypeError,
 					"execv() arg 2 must contain only strings");
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 
 		}
@@ -3099,7 +3187,7 @@
 	/* If we get here it's definitely an error */
 
 	free_string_array(argvlist, argc);
-	PyMem_Free(path);
+	release_bytes(opath);
 	return posix_error();
 }
 
@@ -3115,6 +3203,7 @@
 static PyObject *
 posix_execve(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv, *env;
 	char **argvlist;
@@ -3128,10 +3217,11 @@
 	 argv is a list or tuple of strings and env is a dictionary
 	 like posix.environ. */
 
-	if (!PyArg_ParseTuple(args, "etOO:execve",
-			 Py_FileSystemDefaultEncoding,
-			 &path, &argv, &env))
+	if (!PyArg_ParseTuple(args, "O&OO:execve",
+			 PyUnicode_FSConverter,
+			 &opath, &argv, &env))
 		return NULL;
+	path = bytes2str(opath, 1);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3157,10 +3247,8 @@
 		goto fail_0;
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i),
-				 "et;execve() arg 2 must contain only strings",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i]))
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				 &argvlist[i]))
 		{
 			lastarg = i;
 			goto fail_1;
@@ -3242,7 +3330,7 @@
 	Py_XDECREF(vals);
 	Py_XDECREF(keys);
 fail_0:
-	PyMem_Free(path);
+	release_bytes(opath);
 	return NULL;
 }
 #endif /* HAVE_EXECV */
@@ -3260,8 +3348,10 @@
 static PyObject *
 posix_spawnv(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv;
+	PyObject **oargvlist;
 	char **argvlist;
 	int mode, i;
 	Py_ssize_t argc;
@@ -3271,10 +3361,11 @@
 	/* spawnv has three arguments: (mode, path, argv), where
 	 argv is a list or tuple of strings. */
 
-	if (!PyArg_ParseTuple(args, "ietO:spawnv", &mode,
-			 Py_FileSystemDefaultEncoding,
-			 &path, &argv))
+	if (!PyArg_ParseTuple(args, "iO&O:spawnv", &mode,
+			 PyUnicode_FSConverter,
+			 &opath, &argv))
 		return NULL;
+	path = bytes2str(opath);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3286,24 +3377,23 @@
 	else {
 		PyErr_SetString(PyExc_TypeError,
 				"spawnv() arg 2 must be a tuple or list");
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
 
 	argvlist = PyMem_NEW(char *, argc+1);
 	if (argvlist == NULL) {
-		PyMem_Free(path);
+		release_bytes(opath);
 		return PyErr_NoMemory();
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i), "et",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i])) {
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				 &oargvlist[i])) {
 			free_string_array(argvlist, i);
 			PyErr_SetString(
 				PyExc_TypeError,
 				"spawnv() arg 2 must contain only strings");
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 		}
 	}
@@ -3323,7 +3413,7 @@
 #endif
 
 	free_string_array(argvlist, argc);
-	PyMem_Free(path);
+	release_bytes(opath);
 
 	if (spawnval == -1)
 		return posix_error();
@@ -3348,6 +3438,7 @@
 static PyObject *
 posix_spawnve(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv, *env;
 	char **argvlist;
@@ -3363,10 +3454,11 @@
 	 argv is a list or tuple of strings and env is a dictionary
 	 like posix.environ. */
 
-	if (!PyArg_ParseTuple(args, "ietOO:spawnve", &mode,
-			 Py_FileSystemDefaultEncoding,
-			 &path, &argv, &env))
+	if (!PyArg_ParseTuple(args, "iO&OO:spawnve", &mode,
+			 PyUnicode_FSConverter,
+			 &opath, &argv, &env))
 		return NULL;
+	path = bytes2str(opath);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3392,10 +3484,8 @@
 		goto fail_0;
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i),
-			 "et;spawnve() arg 2 must contain only strings",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i]))
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				 &argvlist[i]))
 		{
 			lastarg = i;
 			goto fail_1;
@@ -3485,7 +3575,7 @@
 	Py_XDECREF(vals);
 	Py_XDECREF(keys);
 fail_0:
-	PyMem_Free(path);
+	release_bytes(opath);
 	return res;
 }
 
@@ -3503,6 +3593,7 @@
 static PyObject *
 posix_spawnvp(PyObject *self, PyObject *args)
 {
+	PyObject *opath;
 	char *path;
 	PyObject *argv;
 	char **argvlist;
@@ -3513,10 +3604,11 @@
 	/* spawnvp has three arguments: (mode, path, argv), where
 	 argv is a list or tuple of strings. */
 
-	if (!PyArg_ParseTuple(args, "ietO:spawnvp", &mode,
-			 Py_FileSystemDefaultEncoding,
-			 &path, &argv))
+	if (!PyArg_ParseTuple(args, "iO&O:spawnvp", &mode,
+			 PyUnicode_FSConverter,
+			 &opath, &argv))
 		return NULL;
+	path = bytes2str(opath);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3528,24 +3620,23 @@
 	else {
 		PyErr_SetString(PyExc_TypeError,
 				"spawnvp() arg 2 must be a tuple or list");
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
 
 	argvlist = PyMem_NEW(char *, argc+1);
 	if (argvlist == NULL) {
-		PyMem_Free(path);
+		release_bytes(opath);
 		return PyErr_NoMemory();
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i), "et",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i])) {
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				 &argvlist[i])) {
 			free_string_array(argvlist, i);
 			PyErr_SetString(
 				PyExc_TypeError,
 				"spawnvp() arg 2 must contain only strings");
-			PyMem_Free(path);
+			release_bytes(opath);
 			return NULL;
 		}
 	}
@@ -3560,7 +3651,7 @@
 	Py_END_ALLOW_THREADS
 
 	free_string_array(argvlist, argc);
-	PyMem_Free(path);
+	release_bytes(opath);
 
 	if (spawnval == -1)
 		return posix_error();
@@ -3582,6 +3673,7 @@
 static PyObject *
 posix_spawnvpe(PyObject *self, PyObject *args)
 {
+	PyObject *opath
 	char *path;
 	PyObject *argv, *env;
 	char **argvlist;
@@ -3597,9 +3689,10 @@
 	 like posix.environ. */
 
 	if (!PyArg_ParseTuple(args, "ietOO:spawnvpe", &mode,
-			 Py_FileSystemDefaultEncoding,
-			 &path, &argv, &env))
+			 PyUnicode_FSConverter,
+			 &opath, &argv, &env))
 		return NULL;
+	path = bytes2str(opath);
 	if (PyList_Check(argv)) {
 		argc = PyList_Size(argv);
 		getitem = PyList_GetItem;
@@ -3625,10 +3718,8 @@
 		goto fail_0;
 	}
 	for (i = 0; i < argc; i++) {
-		if (!PyArg_Parse((*getitem)(argv, i),
-			 "et;spawnvpe() arg 2 must contain only strings",
-				 Py_FileSystemDefaultEncoding,
-				 &argvlist[i]))
+		if (!fsconvert_strdup((*getitem)(argv, i),
+				 &argvlist[i]))
 		{
 			lastarg = i;
 			goto fail_1;
@@ -3709,7 +3800,7 @@
 	Py_XDECREF(vals);
 	Py_XDECREF(keys);
 fail_0:
-	PyMem_Free(path);
+	release_bytes(opath);
 	return res;
 }
 #endif /* PYOS_OS2 */
@@ -4548,12 +4639,12 @@
 posix_lstat(PyObject *self, PyObject *args)
 {
 #ifdef HAVE_LSTAT
-	return posix_do_stat(self, args, "et:lstat", lstat, NULL, NULL);
+	return posix_do_stat(self, args, "O&:lstat", lstat, NULL, NULL);
 #else /* !HAVE_LSTAT */
 #ifdef MS_WINDOWS
-	return posix_do_stat(self, args, "et:lstat", STAT, "U:lstat", win32_wstat);
+	return posix_do_stat(self, args, "O&:lstat", STAT, "U:lstat", win32_wstat);
 #else
-	return posix_do_stat(self, args, "et:lstat", STAT, NULL, NULL);
+	return posix_do_stat(self, args, "O&:lstat", STAT, NULL, NULL);
 #endif
 #endif /* !HAVE_LSTAT */
 }
@@ -4569,16 +4660,18 @@
 {
 	PyObject* v;
 	char buf[MAXPATHLEN];
+	PyObject *opath;
 	char *path;
 	int n;
 	int arg_is_unicode = 0;
 
-	if (!PyArg_ParseTuple(args, "et:readlink", 
-				Py_FileSystemDefaultEncoding, &path))
+	if (!PyArg_ParseTuple(args, "O&:readlink", 
+				PyUnicode_FSConverter, &opath))
 		return NULL;
+	path = bytes2str(opath, 1);
 	v = PySequence_GetItem(args, 0);
 	if (v == NULL) {
-		PyMem_Free(path);
+		release_bytes(opath);
 		return NULL;
 	}
 
@@ -4591,16 +4684,16 @@
 	n = readlink(path, buf, (int) sizeof buf);
 	Py_END_ALLOW_THREADS
 	if (n < 0)
-		return posix_error_with_allocated_filename(path);
+		return posix_error_with_allocated_filename(opath);
 
-	PyMem_Free(path);
+	release_bytes(opath);
 	v = PyBytes_FromStringAndSize(buf, n);
 	if (arg_is_unicode) {
 		PyObject *w;
 
 		w = PyUnicode_FromEncodedObject(v,
 				Py_FileSystemDefaultEncoding,
-				"strict");
+				"utf8b");
 		if (w != NULL) {
 			Py_DECREF(v);
 			v = w;
@@ -4622,7 +4715,7 @@
 static PyObject *
 posix_symlink(PyObject *self, PyObject *args)
 {
-	return posix_2str(args, "etet:symlink", symlink);
+	return posix_2str(args, "O&O&:symlink", symlink);
 }
 #endif /* HAVE_SYMLINK */
 
@@ -4810,7 +4903,8 @@
 static PyObject *
 posix_open(PyObject *self, PyObject *args)
 {
-	char *file = NULL;
+	PyObject *ofile;
+	char *file;
 	int flag;
 	int mode = 0777;
 	int fd;
@@ -4834,17 +4928,17 @@
 	}
 #endif
 
-	if (!PyArg_ParseTuple(args, "eti|i",
-	 Py_FileSystemDefaultEncoding, &file,
+	if (!PyArg_ParseTuple(args, "O&i|i",
+	 PyUnicode_FSConverter, &ofile,
 	 &flag, &mode))
 		return NULL;
-
+	file = bytes2str(ofile, 1);
 	Py_BEGIN_ALLOW_THREADS
 	fd = open(file, flag, mode);
 	Py_END_ALLOW_THREADS
 	if (fd < 0)
-		return posix_error_with_allocated_filename(file);
-	PyMem_Free(file);
+		return posix_error_with_allocated_filename(ofile);
+	release_bytes(ofile);
 	return PyLong_FromLong((long)fd);
 }
 
@@ -6728,20 +6822,21 @@
 #endif
 
 normal:
-	if (!PyArg_ParseTuple(args, "et|s:startfile", 
-			 Py_FileSystemDefaultEncoding, &filepath, 
+	if (!PyArg_ParseTuple(args, "O&|s:startfile", 
+			 PyUnicode_FSConverter, &ofilepath, 
 			 &operation))
 		return NULL;
+	filepath = bytes2str(ofilepath);
 	Py_BEGIN_ALLOW_THREADS
 	rc = ShellExecute((HWND)0, operation, filepath, 
 			 NULL, NULL, SW_SHOWNORMAL);
 	Py_END_ALLOW_THREADS
 	if (rc <= (HINSTANCE)32) {
 		PyObject *errval = win32_error("startfile", filepath);
-		PyMem_Free(filepath);
+		release_bytes(ofilepath);
 		return errval;
 	}
-	PyMem_Free(filepath);
+	release_bytes(ofilepath);
 	Py_INCREF(Py_None);
 	return Py_None;
 }
Modified: python/branches/pep-0383/Objects/unicodeobject.c
==============================================================================
--- python/branches/pep-0383/Objects/unicodeobject.c	(original)
+++ python/branches/pep-0383/Objects/unicodeobject.c	Fri May 1 21:38:20 2009
@@ -1548,6 +1548,53 @@
 }
 }
 
+/* Convert the argument to a bytes object, according to the file
+ system encoding */
+
+int
+PyUnicode_FSConverter(PyObject* arg, void* addr)
+{
+ PyObject *output = NULL;
+ Py_ssize_t size;
+ void *data;
+ if (PyBytes_Check(arg) || PyByteArray_Check(arg)) {
+ output = arg;
+ Py_INCREF(output);
+ }
+ else {
+ arg = PyUnicode_FromObject(arg);
+ if (!arg)
+ return 0;
+ output = PyUnicode_AsEncodedObject(arg, 
+ Py_FileSystemDefaultEncoding,
+ "utf8b");
+ Py_DECREF(arg);
+ if (!output)
+ return 0;
+ if (!PyBytes_Check(output)) {
+ Py_DECREF(output);
+ PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
+ return 0;
+ }
+ }
+ if (PyBytes_Check(output)) {
+ size = PyBytes_GET_SIZE(output);
+ data = PyBytes_AS_STRING(output);
+ } 
+ else {
+ size = PyByteArray_GET_SIZE(output);
+ data = PyByteArray_AS_STRING(output);
+ }
+ if (size != strlen(data)) {
+ PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+ Py_DECREF(output);
+ return 0;
+ }
+ *(PyObject**)addr = output;
+ return 1;
+}
+
+
 char*
 _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
 {
Modified: python/branches/pep-0383/Python/codecs.c
==============================================================================
--- python/branches/pep-0383/Python/codecs.c	(original)
+++ python/branches/pep-0383/Python/codecs.c	Fri May 1 21:38:20 2009
@@ -823,6 +823,75 @@
 }
 }
 
+PyObject *PyCodec_UTF8bErrors(PyObject *exc)
+{
+ PyObject *restuple;
+ PyObject *object;
+ Py_ssize_t start;
+ Py_ssize_t end;
+ PyObject *res;
+ if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+	Py_UNICODE *p;
+	Py_UNICODE *startp;
+	char *outp;
+	if (PyUnicodeEncodeError_GetStart(exc, &start))
+	 return NULL;
+	if (PyUnicodeEncodeError_GetEnd(exc, &end))
+	 return NULL;
+	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+	 return NULL;
+	startp = PyUnicode_AS_UNICODE(object);
+	res = PyBytes_FromStringAndSize(NULL, end-start);
+	if (!res)
+	 return NULL;
+	outp = PyBytes_AsString(res);
+	for (p = startp+start; p < startp+end; p++) {
+	 Py_UNICODE ch = *p;
+	 if (ch < 0xdc80 || ch > 0xdcff) {
+		/* Not a UTF-8b surrogate, fail with original exception */
+		PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+		Py_DECREF(res);
+		Py_DECREF(object);
+		return NULL;
+	 }
+	 *outp++ = ch - 0xdc00;
+	}
+	restuple = Py_BuildValue("(On)", res, end);
+	Py_DECREF(res);
+	Py_DECREF(object);
+	return restuple;
+ }
+ else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+	unsigned char *p;
+	Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+	int consumed = 0;
+	if (PyUnicodeDecodeError_GetStart(exc, &start))
+	 return NULL;
+	if (PyUnicodeDecodeError_GetEnd(exc, &end))
+	 return NULL;
+	if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+	 return NULL;
+	if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+	 Py_DECREF(object);
+	 return NULL;
+	}
+	while (consumed < 4 && consumed < end-start) {
+	 ch[consumed] = 0xdc00 + p[start+consumed];
+	 consumed++;
+	}
+	Py_DECREF(object);
+	if (ch == 0) {
+	 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+	 return NULL;
+	}
+	return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+ }
+ else {
+	wrong_exception_type(exc);
+	return NULL;
+ }
+}
+
 	
 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 {
@@ -858,6 +927,11 @@
 return PyCodec_SurrogateErrors(exc);
 }
 
+static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+{
+ return PyCodec_UTF8bErrors(exc);
+}
+
 static int _PyCodecRegistry_Init(void)
 {
 static struct {
@@ -912,6 +986,14 @@
 		surrogates_errors,
 		METH_O
 	 }
+	},
+	{
+	 "utf8b",
+	 {
+		"utf8b",
+		utf8b_errors,
+		METH_O
+	 }
 	}
 };
 


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /