diff -r 0ea77fa9f392 Include/unicodeobject.h --- a/Include/unicodeobject.h Sat May 23 21:36:27 2009 +0200 +++ b/Include/unicodeobject.h Sun May 24 01:08:40 2009 +0200 @@ -425,8 +425,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE; #define Py_UNICODE_MATCH(string, offset, substring) \ ((*((string)->str + (offset)) == *((substring)->str)) && \ - ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \ - !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE))) + ((*((string)->str + (offset) + Py_SIZE(substring)-1) == *((substring)->str + Py_SIZE(substring)-1))) && \ + !memcmp((string)->str + (offset), (substring)->str, Py_SIZE(substring)*sizeof(Py_UNICODE))) #ifdef __cplusplus extern "C" { @@ -435,18 +435,18 @@ extern "C" { /* --- Unicode Type ------------------------------------------------------- */ typedef struct { - PyObject_HEAD - Py_ssize_t length; /* Length of raw Unicode data in buffer */ - Py_UNICODE *str; /* Raw Unicode buffer */ + PyObject_VAR_HEAD long hash; /* Hash value; -1 if not set */ - int state; /* != 0 if interned. In this case the two - * references from the dictionary to this object - * are *not* counted in ob_refcnt. */ PyObject *defenc; /* (Default) Encoded version as Python string, or NULL; this is used for implementing the buffer protocol */ + unsigned char state; /* != 0 if interned. In this case the two + * references from the dictionary to this object + * are *not* counted in ob_refcnt. */ + Py_UNICODE str[1]; /* Raw Unicode buffer */ } PyUnicodeObject; + PyAPI_DATA(PyTypeObject) PyUnicode_Type; PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; @@ -460,9 +460,9 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_T /* Fast access macros */ #define PyUnicode_GET_SIZE(op) \ - (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length)) + (assert(PyUnicode_Check(op)), Py_SIZE(op)) #define PyUnicode_GET_DATA_SIZE(op) \ - (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))) + (assert(PyUnicode_Check(op)), Py_SIZE(op) * sizeof(Py_UNICODE)) #define PyUnicode_AS_UNICODE(op) \ (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str)) #define PyUnicode_AS_DATA(op) \ diff -r 0ea77fa9f392 Lib/test/test_io.py --- a/Lib/test/test_io.py Sat May 23 21:36:27 2009 +0200 +++ b/Lib/test/test_io.py Sun May 24 01:08:40 2009 +0200 @@ -2263,9 +2263,9 @@ class MiscIOTest(unittest.TestCase): self.assertRaises(TypeError, self.BlockingIOError, 1, "", None) b = self.BlockingIOError(1, "") self.assertEqual(b.characters_written, 0) - class C(str): + class C: pass - c = C("") + c = C() b = self.BlockingIOError(1, c) c.b = b b.c = c diff -r 0ea77fa9f392 Lib/test/test_sys.py --- a/Lib/test/test_sys.py Sat May 23 21:36:27 2009 +0200 +++ b/Lib/test/test_sys.py Sun May 24 01:08:40 2009 +0200 @@ -693,10 +693,11 @@ class SizeofTest(unittest.TestCase): # unicode usize = len('0円'.encode('unicode-internal')) samples = ['', '1'*100] + ucode = {2: 'H', 4: 'I'}[usize] # we need to test for both sizes, because we don't know if the string # has been cached for s in samples: - basicsize = size(h + 'PPliP') + usize * (len(s) + 1) + basicsize = struct.calcsize(vh + 'lPb' + '%d%s' % (len(s) + 1, ucode)) check(s, basicsize) # weakref import weakref diff -r 0ea77fa9f392 Objects/stringlib/eq.h --- a/Objects/stringlib/eq.h Sat May 23 21:36:27 2009 +0200 +++ b/Objects/stringlib/eq.h Sun May 24 01:08:40 2009 +0200 @@ -9,13 +9,13 @@ unicode_eq(PyObject *aa, PyObject *bb) register PyUnicodeObject *a = (PyUnicodeObject *)aa; register PyUnicodeObject *b = (PyUnicodeObject *)bb; - if (a->length != b->length) + if (PyUnicode_GET_SIZE(a) != PyUnicode_GET_SIZE(b)) return 0; - if (a->length == 0) + if (PyUnicode_GET_SIZE(a) == 0) return 1; if (a->str[0] != b->str[0]) return 0; - if (a->length == 1) + if (PyUnicode_GET_SIZE(a) == 1) return 1; - return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; + return memcmp(a->str, b->str, PyUnicode_GET_DATA_SIZE(a)) == 0; } diff -r 0ea77fa9f392 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat May 23 21:36:27 2009 +0200 +++ b/Objects/unicodeobject.c Sun May 24 01:08:40 2009 +0200 @@ -46,32 +46,39 @@ OF OR IN CONNECTION WITH THE USE OR PERF #include "unicodeobject.h" #include "ucnhash.h" +#include + #ifdef MS_WINDOWS #include #endif -/* Limit for the Unicode object free list */ - -#define PyUnicode_MAXFREELIST 1024 - -/* Limit for the Unicode object free list stay alive optimization. +/* PyUnicodeObject_SIZE gives the basic physical size of an unicode string; + any memory allocation for a string of length n should request + (PyUnicodeObject_SIZE + n * sizeof(Py_UNICODE)) bytes. + + Using PyUnicodeObject_SIZE instead of sizeof(PyUnicodeObject) saves + 3 bytes per string allocation on a typical system. +*/ +#define PyUnicodeObject_SIZE (offsetof(PyUnicodeObject, str) + sizeof(Py_UNICODE)) + + +/* Number of free lists, one per unicode object size. The implementation will keep allocated Unicode memory intact for - all objects on the free list having a size less than this - limit. This reduces malloc() overhead for small Unicode objects. - - At worst this will result in PyUnicode_MAXFREELIST * - (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT + - malloc()-overhead) bytes of unused garbage. + objects having a size less than this limit, within a certain number + of objects for each size (as defined by the CAN_SAVE macro below). Setting the limit to 0 effectively turns the feature off. - - Note: This is an experimental feature ! If you get core dumps when - using Unicode objects, turn this feature off. - -*/ - -#define KEEPALIVE_SIZE_LIMIT 9 +*/ + +#define MAX_SAVED_SIZE 150 + +/* We keep lots of small objects in the free lists, but less larger ones. */ + +#define CAN_SAVE(obj_length, list_size) \ + ((obj_length < 20 && list_size < 50) \ + || (obj_length < 80 && list_size < 4) \ + || (list_size < 1)) /* Endianness switches; defaults to little endian */ @@ -103,9 +110,8 @@ extern "C" { */ static PyObject *interned; -/* Free list for Unicode objects */ -static PyUnicodeObject *free_list; -static int numfree; +/* Free lists for Unicode objects */ +static PyUnicodeObject *unicode_freelist[MAX_SAVED_SIZE]; /* The empty Unicode object is shared to improve performance. */ static PyUnicodeObject *unicode_empty; @@ -247,62 +253,73 @@ Py_LOCAL_INLINE(int) unicode_member(Py_U /* --- Unicode Object ----------------------------------------------------- */ static -int unicode_resize(register PyUnicodeObject *unicode, - Py_ssize_t length) -{ - void *oldstr; - - /* Shortcut if there's nothing much to do. */ - if (unicode->length == length) +PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); + +static +PyUnicodeObject *unicode_resize(register PyUnicodeObject *unicode, + Py_ssize_t length) +{ + PyUnicodeObject *v; + + /* Optimization for empty strings; yes, this sometimes happens. */ + if (length == 0 && unicode_empty != NULL) { + Py_DECREF(unicode); + Py_INCREF(unicode_empty); + return unicode_empty; + } + + /* Resizing unicode_empty and single character objects is not + possible since these are being shared. We simply return a fresh + copy with the same Unicode content. */ + if (PyUnicode_GET_SIZE(unicode) != length && + (unicode == unicode_empty || PyUnicode_GET_SIZE(unicode) == 1)) { + v = _PyUnicode_New(length); + if (v == NULL) + return NULL; + Py_UNICODE_COPY(v->str, unicode->str, + length < PyUnicode_GET_SIZE(unicode) ? length : PyUnicode_GET_SIZE(unicode)); + Py_DECREF(unicode); + return v; + } + + /* PyObject_REALLOC will almost always return a new memory block, so try + to find an existing one instead */ + if (length < MAX_SAVED_SIZE && (v = unicode_freelist[length])) { + unicode_freelist[length] = (PyUnicodeObject *) v->defenc; + v->defenc = NULL; + v->state = 0; + Py_UNICODE_COPY(v->str, unicode->str, + length < PyUnicode_GET_SIZE(unicode) ? length : PyUnicode_GET_SIZE(unicode)); + Py_DECREF(unicode); goto reset; - - /* Resizing shared object (unicode_empty or single character - objects) in-place is not allowed. Use PyUnicode_Resize() - instead ! */ - - if (unicode == unicode_empty || - (unicode->length == 1 && - unicode->str[0] < 256U && - unicode_latin1[unicode->str[0]] == unicode)) { - PyErr_SetString(PyExc_SystemError, - "can't resize shared str objects"); - return -1; - } - - /* We allocate one more byte to make sure the string is Ux0000 terminated. - The overallocation is also used by fastsearch, which assumes that it's - safe to look at str[length] (without making any assumptions about what - it contains). */ - - oldstr = unicode->str; - unicode->str = PyObject_REALLOC(unicode->str, - sizeof(Py_UNICODE) * (length + 1)); - if (!unicode->str) { - unicode->str = (Py_UNICODE *)oldstr; + } + + /* Adapted from similar code in tupleobject */ + _Py_DEC_REFTOTAL; + _Py_ForgetReference(unicode); + v = (PyUnicodeObject *) PyObject_REALLOC((char *) unicode, + PyUnicodeObject_SIZE + length * sizeof(Py_UNICODE)); + if (v == NULL) { + PyObject_DEL(unicode); PyErr_NoMemory(); - return -1; - } - unicode->str[length] = 0; - unicode->length = length; - + return NULL; + } + Py_CLEAR(v->defenc); reset: - /* Reset the object caches */ - if (unicode->defenc) { - Py_DECREF(unicode->defenc); - unicode->defenc = NULL; - } - unicode->hash = -1; - - return 0; -} - -/* We allocate one more byte to make sure the string is - Ux0000 terminated; some code (e.g. new_identifier) - relies on that. + Py_SIZE(v) = length; + v->str[length] = 0; + v->hash = -1; + _Py_NewReference(v); + return v; +} + +/* We allocate one more byte to make sure the string is Ux0000 terminated. + The overallocation is also used by fastsearch, which assumes that it's + safe to look at str[length] (without making any assumptions about what + it contains). XXX This allocator could further be enhanced by assuring that the free list never reduces its size below 1. - */ static @@ -322,38 +339,22 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize } /* Unicode freelist & memory allocation */ - if (free_list) { - unicode = free_list; - free_list = *(PyUnicodeObject **)unicode; - numfree--; - if (unicode->str) { - /* Keep-Alive optimization: we only upsize the buffer, - never downsize it. */ - if ((unicode->length < length) && - unicode_resize(unicode, length) < 0) { - PyObject_DEL(unicode->str); - unicode->str = NULL; - } - } - else { - size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); - unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size); - } - PyObject_INIT(unicode, &PyUnicode_Type); - } - else { - size_t new_size; - unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); - if (unicode == NULL) - return NULL; - new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); - unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size); - } - - if (!unicode->str) { - PyErr_NoMemory(); - goto onError; - } + if (length < MAX_SAVED_SIZE + && (unicode = unicode_freelist[length])) { + _Py_NewReference(unicode); + unicode_freelist[length] = (PyUnicodeObject *) unicode->defenc; + } + else { + /* Inline PyObject_NewVar */ + unicode = (PyUnicodeObject *) PyObject_MALLOC( + PyUnicodeObject_SIZE + length * sizeof(Py_UNICODE)); + if (!unicode) { + PyErr_NoMemory(); + return NULL; + } + PyObject_INIT_VAR(unicode, &PyUnicode_Type, length); + } + /* Initialize the first element to guard against cases where * the caller fails before initializing str -- unicode_resize() * reads str[0], and the Keep-Alive optimization can keep memory @@ -363,23 +364,18 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize */ unicode->str[0] = 0; unicode->str[length] = 0; - unicode->length = length; + Py_SIZE(unicode) = length; unicode->hash = -1; unicode->state = 0; unicode->defenc = NULL; return unicode; - - onError: - /* XXX UNREF/NEWREF interface should be more symmetrical */ - _Py_DEC_REFTOTAL; - _Py_ForgetReference((PyObject *)unicode); - PyObject_Del(unicode); - return NULL; } static void unicode_dealloc(register PyUnicodeObject *unicode) { + Py_ssize_t length = PyUnicode_GET_SIZE(unicode); + switch (PyUnicode_CHECK_INTERNED(unicode)) { case SSTATE_NOT_INTERNED: break; @@ -399,28 +395,20 @@ void unicode_dealloc(register PyUnicodeO Py_FatalError("Inconsistent interned string state."); } - if (PyUnicode_CheckExact(unicode) && - numfree < PyUnicode_MAXFREELIST) { - /* Keep-Alive optimization */ - if (unicode->length>= KEEPALIVE_SIZE_LIMIT) { - PyObject_DEL(unicode->str); - unicode->str = NULL; - unicode->length = 0; - } - if (unicode->defenc) { - Py_DECREF(unicode->defenc); - unicode->defenc = NULL; - } - /* Add to free list */ - *(PyUnicodeObject **)unicode = free_list; - free_list = unicode; - numfree++; - } - else { - PyObject_DEL(unicode->str); - Py_XDECREF(unicode->defenc); - Py_TYPE(unicode)->tp_free((PyObject *)unicode); - } + Py_CLEAR(unicode->defenc); + + if (PyUnicode_CheckExact(unicode) && length < MAX_SAVED_SIZE) { + PyUnicodeObject *v = unicode_freelist[length]; + if (!v || CAN_SAVE(length, PyUnicode_GET_SIZE(v))) { + /* Keep track of number of items stacked on the freelist */ + Py_SIZE(unicode) = v ? PyUnicode_GET_SIZE(v) + 1 : 1; + unicode->defenc = (PyObject *) v; + unicode_freelist[length] = unicode; + return; + } + } + + Py_TYPE(unicode)->tp_free((PyObject *)unicode); } static @@ -439,29 +427,16 @@ int _PyUnicode_Resize(PyUnicodeObject ** return -1; } - /* Resizing unicode_empty and single character objects is not - possible since these are being shared. We simply return a fresh - copy with the same Unicode content. */ - if (v->length != length && - (v == unicode_empty || v->length == 1)) { - PyUnicodeObject *w = _PyUnicode_New(length); - if (w == NULL) - return -1; - Py_UNICODE_COPY(w->str, v->str, - length < v->length ? length : v->length); - Py_DECREF(*unicode); - *unicode = w; - return 0; - } - - /* Note that we don't have to modify *unicode for unshared Unicode - objects, since we can modify them in-place. */ - return unicode_resize(v, length); + v = unicode_resize(v, length); + if (v == NULL) + return -1; + *unicode = v; + return 0; } int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) { - return _PyUnicode_Resize((PyUnicodeObject **)unicode, length); + return _PyUnicode_Resize((PyUnicodeObject **) unicode, length); } PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, @@ -760,14 +735,12 @@ PyUnicode_FromFormatV(const char *format width = (width*10) + *f++ - '0'; while (*++f && *f != '%' && !ISALPHA((unsigned)*f)) ; - /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since * they don't affect the amount of space we reserve. */ if ((*f == 'l' || *f == 'z') && (f[1] == 'd' || f[1] == 'u')) ++f; - switch (*f) { case 'c': (void)va_arg(count, int); @@ -898,7 +871,6 @@ PyUnicode_FromFormatV(const char *format string = PyUnicode_FromUnicode(NULL, n); if (!string) goto fail; - s = PyUnicode_AS_UNICODE(string); callresult = callresults; @@ -5811,13 +5783,13 @@ int PyUnicode_EncodeDecimal(Py_UNICODE * /* helper macro to fixup start/end slice values */ #define FIX_START_END(obj) \ if (start < 0) \ - start += (obj)->length; \ + start += PyUnicode_GET_SIZE(obj); \ if (start < 0) \ start = 0; \ - if (end> (obj)->length) \ - end = (obj)->length; \ + if (end> PyUnicode_GET_SIZE(obj)) \ + end = PyUnicode_GET_SIZE(obj); \ if (end < 0) \ - end += (obj)->length; \ + end += PyUnicode_GET_SIZE(obj); \ if (end < 0) \ end = 0; @@ -5842,7 +5814,7 @@ Py_ssize_t PyUnicode_Count(PyObject *str FIX_START_END(str_obj); result = stringlib_count( - str_obj->str + start, end - start, sub_obj->str, sub_obj->length + str_obj->str + start, end - start, sub_obj->str, PyUnicode_GET_SIZE(sub_obj) ); Py_DECREF(sub_obj); @@ -5894,12 +5866,12 @@ int tailmatch(PyUnicodeObject *self, Py_ssize_t end, int direction) { - if (substring->length == 0) + if (PyUnicode_GET_SIZE(substring) == 0) return 1; FIX_START_END(self); - end -= substring->length; + end -= PyUnicode_GET_SIZE(substring); if (end < start) return 0; @@ -5949,11 +5921,11 @@ PyObject *fixup(PyUnicodeObject *self, PyUnicodeObject *u; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self)); if (u == NULL) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); + Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self)); if (!fixfct(u) && PyUnicode_CheckExact(self)) { /* fixfct should return TRUE if it modified the buffer. If @@ -5969,7 +5941,7 @@ PyObject *fixup(PyUnicodeObject *self, static int fixupper(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); Py_UNICODE *s = self->str; int status = 0; @@ -5990,7 +5962,7 @@ int fixupper(PyUnicodeObject *self) static int fixlower(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); Py_UNICODE *s = self->str; int status = 0; @@ -6011,7 +5983,7 @@ int fixlower(PyUnicodeObject *self) static int fixswapcase(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); Py_UNICODE *s = self->str; int status = 0; @@ -6032,7 +6004,7 @@ int fixswapcase(PyUnicodeObject *self) static int fixcapitalize(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); Py_UNICODE *s = self->str; int status = 0; @@ -6211,6 +6183,7 @@ PyUnicodeObject *pad(PyUnicodeObject *se Py_UNICODE fill) { PyUnicodeObject *u; + Py_ssize_t length = PyUnicode_GET_SIZE(self); if (left < 0) left = 0; @@ -6222,18 +6195,18 @@ PyUnicodeObject *pad(PyUnicodeObject *se return self; } - if (left> PY_SSIZE_T_MAX - self->length || - right> PY_SSIZE_T_MAX - (left + self->length)) { + if (left> PY_SSIZE_T_MAX - length || + right> PY_SSIZE_T_MAX - (left + length)) { PyErr_SetString(PyExc_OverflowError, "padded string is too long"); return NULL; } - u = _PyUnicode_New(left + self->length + right); + u = _PyUnicode_New(left + length + right); if (u) { if (left) Py_UNICODE_FILL(u->str, fill, left); - Py_UNICODE_COPY(u->str + left, self->str, self->length); + Py_UNICODE_COPY(u->str + left, self->str, length); if (right) - Py_UNICODE_FILL(u->str + left + self->length, fill, right); + Py_UNICODE_FILL(u->str + left + length, fill, right); } return u; @@ -6257,7 +6230,7 @@ PyObject *split_whitespace(PyUnicodeObje { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); PyObject *str; register const Py_UNICODE *buf = self->str; @@ -6349,7 +6322,7 @@ PyObject *split_char(PyUnicodeObject *se { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); PyObject *str; register const Py_UNICODE *buf = self->str; @@ -6380,8 +6353,8 @@ PyObject *split_substring(PyUnicodeObjec { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; - Py_ssize_t sublen = substring->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); + Py_ssize_t sublen = PyUnicode_GET_SIZE(substring); PyObject *str; for (i = j = 0; i <= len - sublen; ) { @@ -6410,7 +6383,7 @@ PyObject *rsplit_whitespace(PyUnicodeObj { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); PyObject *str; register const Py_UNICODE *buf = self->str; @@ -6450,7 +6423,7 @@ PyObject *rsplit_char(PyUnicodeObject *s { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); PyObject *str; register const Py_UNICODE *buf = self->str; @@ -6483,8 +6456,8 @@ PyObject *rsplit_substring(PyUnicodeObje { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; - Py_ssize_t sublen = substring->length; + Py_ssize_t len = PyUnicode_GET_SIZE(self); + Py_ssize_t sublen = PyUnicode_GET_SIZE(substring); PyObject *str; for (i = len - sublen, j = len; i>= 0; ) { @@ -6528,10 +6501,10 @@ PyObject *split(PyUnicodeObject *self, if (substring == NULL) return split_whitespace(self,list,maxcount); - else if (substring->length == 1) + else if (PyUnicode_GET_SIZE(substring) == 1) return split_char(self,list,substring->str[0],maxcount); - else if (substring->length == 0) { + else if (PyUnicode_GET_SIZE(substring) == 0) { Py_DECREF(list); PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; @@ -6557,10 +6530,10 @@ PyObject *rsplit(PyUnicodeObject *self, if (substring == NULL) return rsplit_whitespace(self,list,maxcount); - else if (substring->length == 1) + else if (PyUnicode_GET_SIZE(substring) == 1) return rsplit_char(self,list,substring->str[0],maxcount); - else if (substring->length == 0) { + else if (PyUnicode_GET_SIZE(substring) == 0) { Py_DECREF(list); PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; @@ -6580,21 +6553,21 @@ PyObject *replace(PyUnicodeObject *self, if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; - if (str1->length == str2->length) { + if (PyUnicode_GET_SIZE(str1) == PyUnicode_GET_SIZE(str2)) { /* same length */ Py_ssize_t i; - if (str1->length == 1) { + if (PyUnicode_GET_SIZE(str1) == 1) { /* replace characters */ Py_UNICODE u1, u2; - if (!findchar(self->str, self->length, str1->str[0])) + if (!findchar(self->str, PyUnicode_GET_SIZE(self), str1->str[0])) goto nothing; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self)); if (!u) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); + Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self)); u1 = str1->str[0]; u2 = str2->str[0]; - for (i = 0; i < u->length; i++) + for (i = 0; i < PyUnicode_GET_SIZE(u); i++) if (u->str[i] == u1) { if (--maxcount < 0) break; @@ -6602,20 +6575,20 @@ PyObject *replace(PyUnicodeObject *self, } } else { i = fastsearch( - self->str, self->length, str1->str, str1->length, FAST_SEARCH + self->str, PyUnicode_GET_SIZE(self), str1->str, PyUnicode_GET_SIZE(str1), FAST_SEARCH ); if (i < 0) goto nothing; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self)); if (!u) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); - while (i <= self->length - str1->length) + Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self)); + while (i <= PyUnicode_GET_SIZE(self) - PyUnicode_GET_SIZE(str1)) if (Py_UNICODE_MATCH(self, i, str1)) { if (--maxcount < 0) break; - Py_UNICODE_COPY(u->str+i, str2->str, str2->length); - i += str1->length; + Py_UNICODE_COPY(u->str+i, str2->str, PyUnicode_GET_SIZE(str2)); + i += PyUnicode_GET_SIZE(str1); } else i++; } @@ -6626,23 +6599,23 @@ PyObject *replace(PyUnicodeObject *self, Py_UNICODE *p; /* replace strings */ - n = stringlib_count(self->str, self->length, str1->str, str1->length); + n = stringlib_count(self->str, PyUnicode_GET_SIZE(self), str1->str, PyUnicode_GET_SIZE(str1)); if (n> maxcount) n = maxcount; if (n == 0) goto nothing; - /* new_size = self->length + n * (str2->length - str1->length)); */ - delta = (str2->length - str1->length); + /* new_size = PyUnicode_GET_SIZE(self) + n * (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1))); */ + delta = (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1)); if (delta == 0) { - new_size = self->length; + new_size = PyUnicode_GET_SIZE(self); } else { - product = n * (str2->length - str1->length); - if ((product / (str2->length - str1->length)) != n) { + product = n * (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1)); + if ((product / (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1))) != n) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } - new_size = self->length + product; + new_size = PyUnicode_GET_SIZE(self) + product; if (new_size < 0) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); @@ -6654,8 +6627,8 @@ PyObject *replace(PyUnicodeObject *self, return NULL; i = 0; p = u->str; - e = self->length - str1->length; - if (str1->length> 0) { + e = PyUnicode_GET_SIZE(self) - PyUnicode_GET_SIZE(str1); + if (PyUnicode_GET_SIZE(str1)> 0) { while (n--> 0) { /* look for next match */ j = i; @@ -6672,25 +6645,25 @@ PyObject *replace(PyUnicodeObject *self, p += j - i; } /* copy substitution string */ - if (str2->length> 0) { - Py_UNICODE_COPY(p, str2->str, str2->length); - p += str2->length; - } - i = j + str1->length; - } - if (i < self->length) + if (PyUnicode_GET_SIZE(str2)> 0) { + Py_UNICODE_COPY(p, str2->str, PyUnicode_GET_SIZE(str2)); + p += PyUnicode_GET_SIZE(str2); + } + i = j + PyUnicode_GET_SIZE(str1); + } + if (i < PyUnicode_GET_SIZE(self)) /* copy tail [i:] */ - Py_UNICODE_COPY(p, self->str+i, self->length-i); + Py_UNICODE_COPY(p, self->str+i, PyUnicode_GET_SIZE(self)-i); } else { /* interleave */ while (n> 0) { - Py_UNICODE_COPY(p, str2->str, str2->length); - p += str2->length; + Py_UNICODE_COPY(p, str2->str, PyUnicode_GET_SIZE(str2)); + p += PyUnicode_GET_SIZE(str2); if (--n <= 0) break; *p++ = self->str[i++]; } - Py_UNICODE_COPY(p, self->str+i, self->length-i); + Py_UNICODE_COPY(p, self->str+i, PyUnicode_GET_SIZE(self)-i); } } return (PyObject *) u; @@ -6701,7 +6674,7 @@ PyObject *replace(PyUnicodeObject *self, Py_INCREF(self); return (PyObject *) self; } - return PyUnicode_FromUnicode(self->str, self->length); + return PyUnicode_FromUnicode(self->str, PyUnicode_GET_SIZE(self)); } /* --- Unicode Object Methods --------------------------------------------- */ @@ -6811,12 +6784,12 @@ unicode_center(PyUnicodeObject *self, Py if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar)) return NULL; - if (self->length>= width && PyUnicode_CheckExact(self)) { + if (PyUnicode_GET_SIZE(self)>= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - marg = width - self->length; + marg = width - PyUnicode_GET_SIZE(self); left = marg / 2 + (marg & width & 1); return (PyObject*) pad(self, left, marg - left, fillchar); @@ -6848,8 +6821,8 @@ unicode_compare(PyUnicodeObject *str1, P Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; - len1 = str1->length; - len2 = str2->length; + len1 = PyUnicode_GET_SIZE(str1); + len2 = PyUnicode_GET_SIZE(str2); while (len1> 0 && len2> 0) { Py_UNICODE c1, c2; @@ -6882,8 +6855,8 @@ unicode_compare(PyUnicodeObject *str1, P Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; - len1 = str1->length; - len2 = str2->length; + len1 = PyUnicode_GET_SIZE(str1); + len2 = PyUnicode_GET_SIZE(str2); while (len1> 0 && len2> 0) { Py_UNICODE c1, c2; @@ -6945,8 +6918,7 @@ PyObject *PyUnicode_RichCompare(PyObject if (PyUnicode_Check(left) && PyUnicode_Check(right)) { PyObject *v; - if (((PyUnicodeObject *) left)->length != - ((PyUnicodeObject *) right)->length) { + if (PyUnicode_GET_SIZE(left) != PyUnicode_GET_SIZE(right)) { if (op == Py_EQ) { Py_INCREF(Py_False); return Py_False; @@ -7049,11 +7021,11 @@ PyObject *PyUnicode_Concat(PyObject *lef } /* Concat the two Unicode strings */ - w = _PyUnicode_New(u->length + v->length); + w = _PyUnicode_New(PyUnicode_GET_SIZE(u) + PyUnicode_GET_SIZE(v)); if (w == NULL) goto onError; - Py_UNICODE_COPY(w->str, u->str, u->length); - Py_UNICODE_COPY(w->str + u->length, v->str, v->length); + Py_UNICODE_COPY(w->str, u->str, PyUnicode_GET_SIZE(u)); + Py_UNICODE_COPY(w->str + PyUnicode_GET_SIZE(u), v->str, PyUnicode_GET_SIZE(v)); Py_DECREF(u); Py_DECREF(v); @@ -7116,7 +7088,7 @@ unicode_count(PyUnicodeObject *self, PyO result = PyLong_FromSsize_t( stringlib_count(self->str + start, end - start, - substring->str, substring->length) + substring->str, PyUnicode_GET_SIZE(substring)) ); Py_DECREF(substring); @@ -7183,7 +7155,7 @@ unicode_expandtabs(PyUnicodeObject *self /* First pass: determine size of output string */ i = 0; /* chars up to and including most recent \n or \r */ j = 0; /* chars since most recent \n or \r (use in tab calculations) */ - e = self->str + self->length; /* end of input */ + e = self->str + PyUnicode_GET_SIZE(self); /* end of input */ for (p = self->str; p < e; p++) if (*p == '\t') { if (tabsize> 0) { @@ -7215,7 +7187,7 @@ unicode_expandtabs(PyUnicodeObject *self j = 0; /* same as in first pass */ q = u->str; /* next output char */ - qe = u->str + u->length; /* end of output */ + qe = u->str + PyUnicode_GET_SIZE(u); /* end of output */ for (p = self->str; p < e; p++) if (*p == '\t') { @@ -7281,7 +7253,7 @@ unicode_find(PyUnicodeObject *self, PyOb static PyObject * unicode_getitem(PyUnicodeObject *self, Py_ssize_t index) { - if (index < 0 || index>= self->length) { + if (index < 0 || index>= PyUnicode_GET_SIZE(self)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; } @@ -7716,7 +7688,7 @@ unicode_join(PyObject *self, PyObject *d static Py_ssize_t unicode_length(PyUnicodeObject *self) { - return self->length; + return PyUnicode_GET_SIZE(self); } PyDoc_STRVAR(ljust__doc__, @@ -7734,12 +7706,12 @@ unicode_ljust(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar)) return NULL; - if (self->length>= width && PyUnicode_CheckExact(self)) { + if (PyUnicode_GET_SIZE(self)>= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - return (PyObject*) pad(self, 0, width - self->length, fillchar); + return (PyObject*) pad(self, 0, width - PyUnicode_GET_SIZE(self), fillchar); } PyDoc_STRVAR(lower__doc__, @@ -7922,8 +7894,8 @@ unicode_repeat(PyUnicodeObject *str, Py_ /* ensure # of chars needed doesn't overflow int and # of bytes * needed doesn't overflow size_t */ - nchars = len * str->length; - if (nchars / len != str->length) { + nchars = len * PyUnicode_GET_SIZE(str); + if (len && nchars / len != PyUnicode_GET_SIZE(str)) { PyErr_SetString(PyExc_OverflowError, "repeated string is too long"); return NULL; @@ -7940,11 +7912,14 @@ unicode_repeat(PyUnicodeObject *str, Py_ p = u->str; - if (str->length == 1) { + if (PyUnicode_GET_SIZE(str) == 1 && len> 0) { Py_UNICODE_FILL(p, str->str[0], len); } else { - Py_ssize_t done = str->length; /* number of characters copied this far */ - Py_UNICODE_COPY(p, str->str, str->length); + Py_ssize_t done = 0; /* number of characters copied this far */ + if (done < nchars) { + Py_UNICODE_COPY(p, str->str, PyUnicode_GET_SIZE(str)); + done = PyUnicode_GET_SIZE(str); + } while (done < nchars) { Py_ssize_t n = (done <= nchars-done) ? done : nchars-done; Py_UNICODE_COPY(p+done, p, n); @@ -8248,12 +8223,12 @@ unicode_rjust(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar)) return NULL; - if (self->length>= width && PyUnicode_CheckExact(self)) { + if (PyUnicode_GET_SIZE(self)>= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - return (PyObject*) pad(self, width - self->length, 0, fillchar); + return (PyObject*) pad(self, width - PyUnicode_GET_SIZE(self), 0, fillchar); } PyObject *PyUnicode_Split(PyObject *s, @@ -8594,7 +8569,7 @@ are deleted."); static PyObject* unicode_translate(PyUnicodeObject *self, PyObject *table) { - return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore"); + return PyUnicode_TranslateCharmap(self->str, PyUnicode_GET_SIZE(self), table, "ignore"); } PyDoc_STRVAR(upper__doc__, @@ -8624,7 +8599,7 @@ unicode_zfill(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n:zfill", &width)) return NULL; - if (self->length>= width) { + if (PyUnicode_GET_SIZE(self)>= width) { if (PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; @@ -8636,7 +8611,7 @@ unicode_zfill(PyUnicodeObject *self, PyO ); } - fill = width - self->length; + fill = width - PyUnicode_GET_SIZE(self); u = pad(self, fill, 0, '0'); @@ -8652,14 +8627,6 @@ unicode_zfill(PyUnicodeObject *self, PyO return (PyObject*) u; } -#if 0 -static PyObject* -unicode_freelistsize(PyUnicodeObject *self) -{ - return PyLong_FromLong(numfree); -} -#endif - PyDoc_STRVAR(startswith__doc__, "S.startswith(prefix[, start[, end]]) -> bool\n\ \n\ @@ -8779,8 +8746,8 @@ PyDoc_STRVAR(p_format__doc__, static PyObject * unicode__sizeof__(PyUnicodeObject *v) { - return PyLong_FromSsize_t(sizeof(PyUnicodeObject) + - sizeof(Py_UNICODE) * (v->length + 1)); + return PyLong_FromSsize_t(PyUnicodeObject_SIZE + + sizeof(Py_UNICODE) * PyUnicode_GET_SIZE(v)); } PyDoc_STRVAR(sizeof__doc__, @@ -8789,7 +8756,7 @@ PyDoc_STRVAR(sizeof__doc__, static PyObject * unicode_getnewargs(PyUnicodeObject *v) { - return Py_BuildValue("(u#)", v->str, v->length); + return Py_BuildValue("(u#)", v->str, PyUnicode_GET_SIZE(v)); } @@ -8909,7 +8876,7 @@ unicode_subscript(PyUnicodeObject* self, if (slicelength <= 0) { return PyUnicode_FromUnicode(NULL, 0); - } else if (start == 0 && step == 1 && slicelength == self->length && + } else if (start == 0 && step == 1 && slicelength == PyUnicode_GET_SIZE(self) && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject *)self; @@ -9543,22 +9510,13 @@ unicode_subtype_new(PyTypeObject *type, if (tmp == NULL) return NULL; assert(PyUnicode_Check(tmp)); - pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length); - if (pnew == NULL) { + pnew = (PyUnicodeObject *) type->tp_alloc(type, n = PyUnicode_GET_SIZE(tmp)); + if (pnew != NULL) { + Py_UNICODE_COPY(pnew->str, tmp->str, n+1); + Py_SIZE(pnew) = n; + pnew->hash = tmp->hash; Py_DECREF(tmp); - return NULL; - } - pnew->str = (Py_UNICODE*) PyObject_MALLOC(sizeof(Py_UNICODE) * (n+1)); - if (pnew->str == NULL) { - _Py_ForgetReference((PyObject *)pnew); - PyObject_Del(pnew); - Py_DECREF(tmp); - return PyErr_NoMemory(); - } - Py_UNICODE_COPY(pnew->str, tmp->str, n+1); - pnew->length = n; - pnew->hash = tmp->hash; - Py_DECREF(tmp); + } return (PyObject *)pnew; } @@ -9573,9 +9531,9 @@ static PyObject *unicode_iter(PyObject * PyTypeObject PyUnicode_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - "str", /* tp_name */ - sizeof(PyUnicodeObject), /* tp_size */ - 0, /* tp_itemsize */ + "str", /* tp_name */ + sizeof(PyUnicodeObject), /* tp_size */ + sizeof(Py_UNICODE), /* tp_itemsize */ /* Slots */ (destructor)unicode_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -9634,8 +9592,6 @@ void _PyUnicode_Init(void) }; /* Init the implementation */ - free_list = NULL; - numfree = 0; unicode_empty = _PyUnicode_New(0); if (!unicode_empty) return; @@ -9658,21 +9614,20 @@ void _PyUnicode_Init(void) int PyUnicode_ClearFreeList(void) { - int freelist_size = numfree; - PyUnicodeObject *u; - - for (u = free_list; u != NULL;) { - PyUnicodeObject *v = u; - u = *(PyUnicodeObject **)u; - if (v->str) - PyObject_DEL(v->str); - Py_XDECREF(v->defenc); - PyObject_Del(v); - numfree--; - } - free_list = NULL; - assert(numfree == 0); - return freelist_size; + int i, freed_objects = 0; + for (i = 0; i < MAX_SAVED_SIZE; i++) { + PyUnicodeObject *u, *v; + u = unicode_freelist[i]; + while (u != NULL) { + v = (PyUnicodeObject *) u->defenc; + Py_SIZE(u) = i; + PyObject_DEL(u); + u = v; + freed_objects++; + } + unicode_freelist[i] = NULL; + } + return freed_objects; } void @@ -9791,11 +9746,11 @@ void _Py_ReleaseInternedUnicodeStrings(v break; case SSTATE_INTERNED_IMMORTAL: Py_REFCNT(s) += 1; - immortal_size += s->length; + immortal_size += PyUnicode_GET_SIZE(s); break; case SSTATE_INTERNED_MORTAL: Py_REFCNT(s) += 2; - mortal_size += s->length; + mortal_size += PyUnicode_GET_SIZE(s); break; default: Py_FatalError("Inconsistent interned string state.");

AltStyle によって変換されたページ (->オリジナル) /