[Python-checkins] r88709 - in python/branches/py3k: Misc/NEWS Objects/unicodeobject.c

victor.stinner python-checkins at python.org
Wed Mar 2 02:03:14 CET 2011


Author: victor.stinner
Date: Wed Mar 2 02:03:14 2011
New Revision: 88709
Log:
Issue #8923: cache str.encode() result
When a string is encoded to UTF-8 in strict mode, the result is cached into the
object. Examples: str.encode(), str.encode('utf-8'), PyUnicode_AsUTF8String()
and PyUnicode_AsEncodedString(unicode, "utf-8", NULL).
Modified:
 python/branches/py3k/Misc/NEWS
 python/branches/py3k/Objects/unicodeobject.c
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Wed Mar 2 02:03:14 2011
@@ -10,6 +10,11 @@
 Core and Builtins
 -----------------
 
+- Issue #8923: When a string is encoded to UTF-8 in strict mode, the result is
+ cached into the object. Examples: str.encode(), str.encode('utf-8'),
+ PyUnicode_AsUTF8String() and PyUnicode_AsEncodedString(unicode, "utf-8",
+ NULL).
+
 - Issue #10831: PyUnicode_FromFormat() supports %li, %lli and %zi formats.
 
 - Issue #10829: Refactor PyUnicode_FromFormat(), use the same function to parse
Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Wed Mar 2 02:03:14 2011
@@ -1710,17 +1710,21 @@
 }
 
 if (encoding == NULL)
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- errors);
+ return PyUnicode_AsUTF8String(unicode);
 
 /* Shortcuts for common default encodings */
 if (normalize_encoding(encoding, lower, sizeof(lower))) {
 if ((strcmp(lower, "utf-8") == 0) ||
 (strcmp(lower, "utf8") == 0))
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- errors);
+ {
+ if (errors == NULL || strcmp(errors, "strict") == 0) {
+ return PyUnicode_AsUTF8String(unicode);
+ } else {
+ return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+ PyUnicode_GET_SIZE(unicode),
+ errors);
+ }
+ }
 else if ((strcmp(lower, "latin-1") == 0) ||
 (strcmp(lower, "latin1") == 0) ||
 (strcmp(lower, "iso-8859-1") == 0))
@@ -3077,13 +3081,16 @@
 PyObject *
 PyUnicode_AsUTF8String(PyObject *unicode)
 {
+ PyObject *utf8;
 if (!PyUnicode_Check(unicode)) {
 PyErr_BadArgument();
 return NULL;
 }
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- NULL);
+ utf8 = _PyUnicode_AsDefaultEncodedString(unicode);
+ if (utf8 == NULL)
+ return NULL;
+ Py_INCREF(utf8);
+ return utf8;
 }
 
 /* --- UTF-32 Codec ------------------------------------------------------- */


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /