[Python-checkins] cpython (3.2): Issue #10156: In the interpreter's initialization phase, unicode globals
serhiy.storchaka
python-checkins at python.org
Sat Jan 26 11:21:53 CET 2013
http://hg.python.org/cpython/rev/f7eda8165e6f
changeset: 81749:f7eda8165e6f
branch: 3.2
parent: 81745:d391b2849a51
user: Serhiy Storchaka <storchaka at gmail.com>
date: Sat Jan 26 12:14:02 2013 +0200
summary:
Issue #10156: In the interpreter's initialization phase, unicode globals
are now initialized dynamically as needed.
files:
Misc/NEWS | 3 +
Objects/unicodeobject.c | 105 +++++++++++++--------------
2 files changed, 52 insertions(+), 56 deletions(-)
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
Core and Builtins
-----------------
+- Issue #10156: In the interpreter's initialization phase, unicode globals
+ are now initialized dynamically as needed.
+
- Issue #16975: Fix error handling bug in the escape-decode bytes decoder.
- Issue #14850: Now a charmap decoder treats U+FFFE as "undefined mapping"
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -80,8 +80,9 @@
/* --- Globals ------------------------------------------------------------
- The globals are initialized by the _PyUnicode_Init() API and should
- not be used before calling that API.
+NOTE: In the interpreter's initialization phase, some globals are currently
+ initialized dynamically as needed. In the process Unicode objects may
+ be created before the Unicode type is ready.
*/
@@ -98,18 +99,30 @@
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
-static PyObject *interned;
+static PyObject *interned = NULL;
/* Free list for Unicode objects */
-static PyUnicodeObject *free_list;
-static int numfree;
+static PyUnicodeObject *free_list = NULL;
+static int numfree = 0;
/* The empty Unicode object is shared to improve performance. */
-static PyUnicodeObject *unicode_empty;
+static PyUnicodeObject *unicode_empty = NULL;
+
+#define _Py_RETURN_UNICODE_EMPTY() \
+ do { \
+ if (unicode_empty != NULL) \
+ Py_INCREF(unicode_empty); \
+ else { \
+ unicode_empty = _PyUnicode_New(0); \
+ if (unicode_empty != NULL) \
+ Py_INCREF(unicode_empty); \
+ } \
+ return (PyObject *)unicode_empty; \
+ } while (0)
/* Single character Unicode strings in the Latin-1 range are being
shared as well. */
-static PyUnicodeObject *unicode_latin1[256];
+static PyUnicodeObject *unicode_latin1[256] = {NULL};
/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
@@ -214,7 +227,7 @@
#define BLOOM_MASK unsigned long
-static BLOOM_MASK bloom_linebreak;
+static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -479,10 +492,8 @@
if (u != NULL) {
/* Optimization for empty strings */
- if (size == 0 && unicode_empty != NULL) {
- Py_INCREF(unicode_empty);
- return (PyObject *)unicode_empty;
- }
+ if (size == 0)
+ _Py_RETURN_UNICODE_EMPTY();
/* Single character Unicode objects in the Latin-1 range are
shared when using this constructor */
@@ -528,10 +539,8 @@
if (u != NULL) {
/* Optimization for empty strings */
- if (size == 0 && unicode_empty != NULL) {
- Py_INCREF(unicode_empty);
- return (PyObject *)unicode_empty;
- }
+ if (size == 0)
+ _Py_RETURN_UNICODE_EMPTY();
/* Single characters are shared when using this constructor.
Restrict to ASCII, since the input must be UTF-8. */
@@ -1393,15 +1402,11 @@
/* Decoding bytes objects is the most common case and should be fast */
if (PyBytes_Check(obj)) {
- if (PyBytes_GET_SIZE(obj) == 0) {
- Py_INCREF(unicode_empty);
- v = (PyObject *) unicode_empty;
- }
- else {
- v = PyUnicode_Decode(
- PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
- encoding, errors);
- }
+ if (PyBytes_GET_SIZE(obj) == 0)
+ _Py_RETURN_UNICODE_EMPTY();
+ v = PyUnicode_Decode(
+ PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
+ encoding, errors);
return v;
}
@@ -1421,12 +1426,11 @@
}
if (buffer.len == 0) {
- Py_INCREF(unicode_empty);
- v = (PyObject *) unicode_empty;
- }
- else
- v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
-
+ PyBuffer_Release(&buffer);
+ _Py_RETURN_UNICODE_EMPTY();
+ }
+
+ v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
PyBuffer_Release(&buffer);
return v;
}
@@ -8323,10 +8327,8 @@
Py_ssize_t nchars;
size_t nbytes;
- if (len < 1) {
- Py_INCREF(unicode_empty);
- return (PyObject *)unicode_empty;
- }
+ if (len < 1)
+ _Py_RETURN_UNICODE_EMPTY();
if (len == 1 && PyUnicode_CheckExact(str)) {
/* no repeat, return original string */
@@ -10056,8 +10058,6 @@
void _PyUnicode_Init(void)
{
- int i;
-
/* XXX - move this array to unicodectype.c ? */
Py_UNICODE linebreak[] = {
0x000A, /* LINE FEED */
@@ -10071,14 +10071,12 @@
};
/* Init the implementation */
- free_list = NULL;
- numfree = 0;
- unicode_empty = _PyUnicode_New(0);
- if (!unicode_empty)
- return;
-
- for (i = 0; i < 256; i++)
- unicode_latin1[i] = NULL;
+ if (!unicode_empty) {
+ unicode_empty = _PyUnicode_New(0);
+ if (!unicode_empty)
+ return;
+ }
+
if (PyType_Ready(&PyUnicode_Type) < 0)
Py_FatalError("Can't initialize 'unicode'");
@@ -10123,15 +10121,11 @@
{
int i;
- Py_XDECREF(unicode_empty);
- unicode_empty = NULL;
-
- for (i = 0; i < 256; i++) {
- if (unicode_latin1[i]) {
- Py_DECREF(unicode_latin1[i]);
- unicode_latin1[i] = NULL;
- }
- }
+ Py_CLEAR(unicode_empty);
+
+ for (i = 0; i < 256; i++)
+ Py_CLEAR(unicode_latin1[i]);
+
(void)PyUnicode_ClearFreeList();
}
@@ -10250,8 +10244,7 @@
"mortal/immortal\n", mortal_size, immortal_size);
Py_DECREF(keys);
PyDict_Clear(interned);
- Py_DECREF(interned);
- interned = NULL;
+ Py_CLEAR(interned);
}
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list