[Python-checkins] r67939 - python/branches/py3k/Objects/unicodeobject.c

alexandre.vassalotti python-checkins at python.org
Sat Dec 27 10:16:49 CET 2008


Author: alexandre.vassalotti
Date: Sat Dec 27 10:16:49 2008
New Revision: 67939
Log:
Optimize built-in unicode codecs by avoiding unecessary copying.
The approach used is similiar to what is currently used in the version
of unicodeobject.c in Python 2.x. The only difference is we use
_PyBytes_Resize instead of _PyString_Resize.
Modified:
 python/branches/py3k/Objects/unicodeobject.c
Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Sat Dec 27 10:16:49 2008
@@ -1873,7 +1873,7 @@
 int encodeWhiteSpace,
 const char *errors)
 {
- PyObject *v, *result;
+ PyObject *v;
 /* It might be possible to tighten this worst case */
 Py_ssize_t cbAllocated = 5 * size;
 int inShift = 0;
@@ -1889,11 +1889,11 @@
 if (cbAllocated / 5 != size)
 return PyErr_NoMemory();
 
- v = PyByteArray_FromStringAndSize(NULL, cbAllocated);
+ v = PyBytes_FromStringAndSize(NULL, cbAllocated);
 if (v == NULL)
 return NULL;
 
- start = out = PyByteArray_AS_STRING(v);
+ start = out = PyBytes_AS_STRING(v);
 for (;i < size; ++i) {
 Py_UNICODE ch = s[i];
 
@@ -1958,10 +1958,9 @@
 *out++= B64(charsleft << (6-bitsleft) );
 *out++ = '-';
 }
-
- result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), out - start);
- Py_DECREF(v);
- return result;
+ if (_PyBytes_Resize(&v, out - start) < 0)
+ return NULL;
+ return v;
 }
 
 #undef SPECIAL
@@ -2479,7 +2478,7 @@
 		 const char *errors,
 		 int byteorder)
 {
- PyObject *v, *result;
+ PyObject *v;
 unsigned char *p;
 Py_ssize_t nsize, bytesize;
 #ifndef Py_UNICODE_WIDE
@@ -2515,11 +2514,11 @@
 bytesize = nsize * 4;
 if (bytesize / 4 != nsize)
 	return PyErr_NoMemory();
- v = PyByteArray_FromStringAndSize(NULL, bytesize);
+ v = PyBytes_FromStringAndSize(NULL, bytesize);
 if (v == NULL)
 return NULL;
 
- p = (unsigned char *)PyByteArray_AS_STRING(v);
+ p = (unsigned char *)PyBytes_AS_STRING(v);
 if (byteorder == 0)
 	STORECHAR(0xFEFF);
 if (size == 0)
@@ -2556,9 +2555,7 @@
 }
 
 done:
- result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
- Py_DECREF(v);
- return result;
+ return v;
 #undef STORECHAR
 }
 
@@ -2757,7 +2754,7 @@
 		 const char *errors,
 		 int byteorder)
 {
- PyObject *v, *result;
+ PyObject *v;
 unsigned char *p;
 Py_ssize_t nsize, bytesize;
 #ifdef Py_UNICODE_WIDE
@@ -2792,11 +2789,11 @@
 bytesize = nsize * 2;
 if (bytesize / 2 != nsize)
 	return PyErr_NoMemory();
- v = PyByteArray_FromStringAndSize(NULL, bytesize);
+ v = PyBytes_FromStringAndSize(NULL, bytesize);
 if (v == NULL)
 return NULL;
 
- p = (unsigned char *)PyByteArray_AS_STRING(v);
+ p = (unsigned char *)PyBytes_AS_STRING(v);
 if (byteorder == 0)
 	STORECHAR(0xFEFF);
 if (size == 0)
@@ -2828,9 +2825,7 @@
 }
 
 done:
- result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
- Py_DECREF(v);
- return result;
+ return v;
 #undef STORECHAR
 }
 
@@ -3120,7 +3115,7 @@
 PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
 					Py_ssize_t size)
 {
- PyObject *repr, *result;
+ PyObject *repr;
 char *p;
 
 #ifdef Py_UNICODE_WIDE
@@ -3147,17 +3142,20 @@
 escape.
 */
 
+ if (size == 0)
+ return PyBytes_FromStringAndSize(NULL, 0);
+
 if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
 	return PyErr_NoMemory();
 
- repr = PyByteArray_FromStringAndSize(NULL,
+ repr = PyBytes_FromStringAndSize(NULL,
 2
 + expandsize*size
 + 1);
 if (repr == NULL)
 return NULL;
 
- p = PyByteArray_AS_STRING(repr);
+ p = PyBytes_AS_STRING(repr);
 
 while (size-- > 0) {
 Py_UNICODE ch = *s++;
@@ -3249,13 +3247,13 @@
 *p++ = (char) ch;
 }
 
- result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr),
- p - PyByteArray_AS_STRING(repr));
- Py_DECREF(repr);
- return result;
+ assert(p - PyBytes_AS_STRING(repr) > 0);
+ if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0)
+ return NULL;
+ return repr;
 }
 
-PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
+PyObject *PyUnicodeAsUnicodeEscapeString(PyObject *unicode)
 {
 PyObject *s;
 if (!PyUnicode_Check(unicode)) {
@@ -3389,7 +3387,7 @@
 PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
 					 Py_ssize_t size)
 {
- PyObject *repr, *result;
+ PyObject *repr;
 char *p;
 char *q;
 
@@ -3402,13 +3400,13 @@
 if (size > PY_SSIZE_T_MAX / expandsize)
 	return PyErr_NoMemory();
 
- repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
+ repr = PyBytes_FromStringAndSize(NULL, expandsize * size);
 if (repr == NULL)
 return NULL;
 if (size == 0)
- goto done;
+ return repr;
 
- p = q = PyByteArray_AS_STRING(repr);
+ p = q = PyBytes_AS_STRING(repr);
 while (size-- > 0) {
 Py_UNICODE ch = *s++;
 #ifdef Py_UNICODE_WIDE
@@ -3468,10 +3466,10 @@
 }
 size = p - q;
 
- done:
- result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
- Py_DECREF(repr);
- return result;
+ assert(size > 0);
+ if (_PyBytes_Resize(&repr, size) < 0)
+ return NULL;
+ return repr;
 }
 
 PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
@@ -3706,7 +3704,6 @@
 const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
 PyObject *errorHandler = NULL;
 PyObject *exc = NULL;
- PyObject *result = NULL;
 /* the following variable is used for caching string comparisons
 * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
 int known_errorHandler = -1;
@@ -3715,10 +3712,10 @@
 replacements, if we need more, we'll resize */
 if (size == 0)
 return PyBytes_FromStringAndSize(NULL, 0);
- res = PyByteArray_FromStringAndSize(NULL, size);
+ res = PyBytes_FromStringAndSize(NULL, size);
 if (res == NULL)
 return NULL;
- str = PyByteArray_AS_STRING(res);
+ str = PyBytes_AS_STRING(res);
 ressize = size;
 
 while (p<endp) {
@@ -3768,7 +3765,7 @@
 		 p = collend;
 		 break;
 		case 4: /* xmlcharrefreplace */
-		 respos = str - PyByteArray_AS_STRING(res);
+		 respos = str - PyBytes_AS_STRING(res);
 		 /* determine replacement size (temporarily (mis)uses p) */
 		 for (p = collstart, repsize = 0; p < collend; ++p) {
 			if (*p<10)
@@ -3795,9 +3792,9 @@
 		 if (requiredsize > ressize) {
 			if (requiredsize<2*ressize)
 			 requiredsize = 2*ressize;
-			if (PyByteArray_Resize(res, requiredsize))
+			if (_PyBytes_Resize(&res, requiredsize))
 			 goto onError;
-			str = PyByteArray_AS_STRING(res) + respos;
+			str = PyBytes_AS_STRING(res) + respos;
 			ressize = requiredsize;
 		 }
 		 /* generate replacement (temporarily (mis)uses p) */
@@ -3815,17 +3812,17 @@
 		 /* need more space? (at least enough for what we
 		 have+the replacement+the rest of the string, so
 		 we won't have to check space for encodable characters) */
-		 respos = str - PyByteArray_AS_STRING(res);
+		 respos = str - PyBytes_AS_STRING(res);
 		 repsize = PyUnicode_GET_SIZE(repunicode);
 		 requiredsize = respos+repsize+(endp-collend);
 		 if (requiredsize > ressize) {
 			if (requiredsize<2*ressize)
 			 requiredsize = 2*ressize;
-			if (PyByteArray_Resize(res, requiredsize)) {
+			if (_PyBytes_Resize(&res, requiredsize)) {
 			 Py_DECREF(repunicode);
 			 goto onError;
 			}
-			str = PyByteArray_AS_STRING(res) + respos;
+			str = PyBytes_AS_STRING(res) + respos;
 			ressize = requiredsize;
 		 }
 		 /* check if there is anything unencodable in the replacement
@@ -3845,13 +3842,23 @@
 	 }
 	}
 }
- result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(res),
- str - PyByteArray_AS_STRING(res));
+ /* Resize if we allocated to much */
+ size = str - PyBytes_AS_STRING(res);
+ if (size < ressize) { /* If this falls res will be NULL */
+ assert(size > 0);
+ if (_PyBytes_Resize(&res, size) < 0)
+ goto onError;
+ }
+
+ Py_XDECREF(errorHandler);
+ Py_XDECREF(exc);
+ return res;
+
 onError:
- Py_DECREF(res);
+ Py_XDECREF(res);
 Py_XDECREF(errorHandler);
 Py_XDECREF(exc);
- return result;
+ return NULL;
 }
 
 PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
@@ -4104,7 +4111,7 @@
 else {
 	/* Extend string object */
 	n = PyBytes_Size(*repr);
-	if (_PyBytes_Resize(repr, n + mbcssize) < 0)
+	if (_PyBytes_Resize(&repr, n + mbcssize) < 0)
 	 return -1;
 }
 
@@ -4834,7 +4841,8 @@
 
 /* Resize if we allocated to much */
 if (respos<PyBytes_GET_SIZE(res))
-	_PyBytes_Resize(&res, respos);
+ if (_PyBytes_Resize(&res, respos) < 0)
+ goto onError;
 
 Py_XDECREF(exc);
 Py_XDECREF(errorHandler);


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /