diff -r 0ea77fa9f392 Include/unicodeobject.h
--- a/Include/unicodeobject.h	Sat May 23 21:36:27 2009 +0200
+++ b/Include/unicodeobject.h	Sun May 24 01:08:40 2009 +0200
@@ -425,8 +425,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 
 #define Py_UNICODE_MATCH(string, offset, substring) \
 ((*((string)->str + (offset)) == *((substring)->str)) && \
- ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
- !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
+ ((*((string)->str + (offset) + Py_SIZE(substring)-1) == *((substring)->str + Py_SIZE(substring)-1))) && \
+ !memcmp((string)->str + (offset), (substring)->str, Py_SIZE(substring)*sizeof(Py_UNICODE)))
 
 #ifdef __cplusplus
 extern "C" {
@@ -435,18 +435,18 @@ extern "C" {
 /* --- Unicode Type ------------------------------------------------------- */
 
 typedef struct {
- PyObject_HEAD
- Py_ssize_t length;		/* Length of raw Unicode data in buffer */
- Py_UNICODE *str;		/* Raw Unicode buffer */
+ PyObject_VAR_HEAD
 long hash;			/* Hash value; -1 if not set */
- int state;			/* != 0 if interned. In this case the two
- 				 * references from the dictionary to this object
- 				 * are *not* counted in ob_refcnt. */
 PyObject *defenc;		/* (Default) Encoded version as Python
 				 string, or NULL; this is used for
 				 implementing the buffer protocol */
+ unsigned char state; /* != 0 if interned. In this case the two
+ * references from the dictionary to this object
+ * are *not* counted in ob_refcnt. */
+ Py_UNICODE str[1];		/* Raw Unicode buffer */
 } PyUnicodeObject;
 
+
 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 
@@ -460,9 +460,9 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_T
 
 /* Fast access macros */
 #define PyUnicode_GET_SIZE(op) \
- (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
+ (assert(PyUnicode_Check(op)), Py_SIZE(op))
 #define PyUnicode_GET_DATA_SIZE(op) \
- (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
+ (assert(PyUnicode_Check(op)), Py_SIZE(op) * sizeof(Py_UNICODE))
 #define PyUnicode_AS_UNICODE(op) \
 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
 #define PyUnicode_AS_DATA(op) \
diff -r 0ea77fa9f392 Lib/test/test_io.py
--- a/Lib/test/test_io.py	Sat May 23 21:36:27 2009 +0200
+++ b/Lib/test/test_io.py	Sun May 24 01:08:40 2009 +0200
@@ -2263,9 +2263,9 @@ class MiscIOTest(unittest.TestCase):
 self.assertRaises(TypeError, self.BlockingIOError, 1, "", None)
 b = self.BlockingIOError(1, "")
 self.assertEqual(b.characters_written, 0)
- class C(str):
+ class C:
 pass
- c = C("")
+ c = C()
 b = self.BlockingIOError(1, c)
 c.b = b
 b.c = c
diff -r 0ea77fa9f392 Lib/test/test_sys.py
--- a/Lib/test/test_sys.py	Sat May 23 21:36:27 2009 +0200
+++ b/Lib/test/test_sys.py	Sun May 24 01:08:40 2009 +0200
@@ -693,10 +693,11 @@ class SizeofTest(unittest.TestCase):
 # unicode
 usize = len('0円'.encode('unicode-internal'))
 samples = ['', '1'*100]
+ ucode = {2: 'H', 4: 'I'}[usize]
 # we need to test for both sizes, because we don't know if the string
 # has been cached
 for s in samples:
- basicsize = size(h + 'PPliP') + usize * (len(s) + 1)
+ basicsize = struct.calcsize(vh + 'lPb' + '%d%s' % (len(s) + 1, ucode))
 check(s, basicsize)
 # weakref
 import weakref
diff -r 0ea77fa9f392 Objects/stringlib/eq.h
--- a/Objects/stringlib/eq.h	Sat May 23 21:36:27 2009 +0200
+++ b/Objects/stringlib/eq.h	Sun May 24 01:08:40 2009 +0200
@@ -9,13 +9,13 @@ unicode_eq(PyObject *aa, PyObject *bb)
 	register PyUnicodeObject *a = (PyUnicodeObject *)aa;
 	register PyUnicodeObject *b = (PyUnicodeObject *)bb;
 
-	if (a->length != b->length)
+	if (PyUnicode_GET_SIZE(a) != PyUnicode_GET_SIZE(b))
 		return 0;
-	if (a->length == 0)
+	if (PyUnicode_GET_SIZE(a) == 0)
 		return 1;
 	if (a->str[0] != b->str[0])
 		return 0;
-	if (a->length == 1)
+	if (PyUnicode_GET_SIZE(a) == 1)
 		return 1;
-	return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
+	return memcmp(a->str, b->str, PyUnicode_GET_DATA_SIZE(a)) == 0;
 }
diff -r 0ea77fa9f392 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Sat May 23 21:36:27 2009 +0200
+++ b/Objects/unicodeobject.c	Sun May 24 01:08:40 2009 +0200
@@ -46,32 +46,39 @@ OF OR IN CONNECTION WITH THE USE OR PERF
 #include "unicodeobject.h"
 #include "ucnhash.h"
 
+#include <stddef.h>
+
 #ifdef MS_WINDOWS
 #include <windows.h>
 #endif
 
-/* Limit for the Unicode object free list */
-
-#define PyUnicode_MAXFREELIST 1024
-
-/* Limit for the Unicode object free list stay alive optimization.
+/* PyUnicodeObject_SIZE gives the basic physical size of an unicode string;
+ any memory allocation for a string of length n should request
+ (PyUnicodeObject_SIZE + n * sizeof(Py_UNICODE)) bytes.
+
+ Using PyUnicodeObject_SIZE instead of sizeof(PyUnicodeObject) saves
+ 3 bytes per string allocation on a typical system.
+*/
+#define PyUnicodeObject_SIZE (offsetof(PyUnicodeObject, str) + sizeof(Py_UNICODE))
+
+
+/* Number of free lists, one per unicode object size.
 
 The implementation will keep allocated Unicode memory intact for
- all objects on the free list having a size less than this
- limit. This reduces malloc() overhead for small Unicode objects.
-
- At worst this will result in PyUnicode_MAXFREELIST *
- (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
- malloc()-overhead) bytes of unused garbage.
+ objects having a size less than this limit, within a certain number
+ of objects for each size (as defined by the CAN_SAVE macro below).
 
 Setting the limit to 0 effectively turns the feature off.
-
- Note: This is an experimental feature ! If you get core dumps when
- using Unicode objects, turn this feature off.
-
-*/
-
-#define KEEPALIVE_SIZE_LIMIT 9
+*/
+
+#define MAX_SAVED_SIZE 150
+
+/* We keep lots of small objects in the free lists, but less larger ones. */
+
+#define CAN_SAVE(obj_length, list_size) \
+ ((obj_length < 20 && list_size < 50) \ + &#124;&#124; (obj_length < 80 && list_size < 4) \ + &#124;&#124; (list_size < 1)) /* Endianness switches; defaults to little endian */ @@ -103,9 +110,8 @@ extern "C" { */ static PyObject *interned; -/* Free list for Unicode objects */ -static PyUnicodeObject *free_list; -static int numfree; +/* Free lists for Unicode objects */ +static PyUnicodeObject *unicode_freelist[MAX_SAVED_SIZE]; /* The empty Unicode object is shared to improve performance. */ static PyUnicodeObject *unicode_empty; @@ -247,62 +253,73 @@ Py_LOCAL_INLINE(int) unicode_member(Py_U /* --- Unicode Object ----------------------------------------------------- */ static -int unicode_resize(register PyUnicodeObject *unicode, - Py_ssize_t length) -{ - void *oldstr; - - /* Shortcut if there's nothing much to do. */ - if (unicode->length == length)
+PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
+
+static
+PyUnicodeObject *unicode_resize(register PyUnicodeObject *unicode,
+ Py_ssize_t length)
+{
+ PyUnicodeObject *v;
+
+ /* Optimization for empty strings; yes, this sometimes happens. */
+ if (length == 0 && unicode_empty != NULL) {
+ Py_DECREF(unicode);
+ Py_INCREF(unicode_empty);
+ return unicode_empty;
+ }
+
+ /* Resizing unicode_empty and single character objects is not
+ possible since these are being shared. We simply return a fresh
+ copy with the same Unicode content. */
+ if (PyUnicode_GET_SIZE(unicode) != length &&
+ (unicode == unicode_empty &#124;&#124; PyUnicode_GET_SIZE(unicode) == 1)) {
+ v = _PyUnicode_New(length);
+ if (v == NULL)
+ return NULL;
+ Py_UNICODE_COPY(v->str, unicode->str,
+ length < PyUnicode_GET_SIZE(unicode) ? length : PyUnicode_GET_SIZE(unicode)); + Py_DECREF(unicode); + return v; + } + + /* PyObject_REALLOC will almost always return a new memory block, so try + to find an existing one instead */ + if (length < MAX_SAVED_SIZE && (v = unicode_freelist[length])) { + unicode_freelist[length] = (PyUnicodeObject *) v->defenc;
+ v->defenc = NULL;
+ v->state = 0;
+ Py_UNICODE_COPY(v->str, unicode->str,
+ length < PyUnicode_GET_SIZE(unicode) ? length : PyUnicode_GET_SIZE(unicode)); + Py_DECREF(unicode); goto reset; - - /* Resizing shared object (unicode_empty or single character - objects) in-place is not allowed. Use PyUnicode_Resize() - instead ! */ - - if (unicode == unicode_empty &#124;&#124; - (unicode->length == 1 &&
- unicode->str[0] < 256U && - unicode_latin1[unicode->str[0]] == unicode)) {
- PyErr_SetString(PyExc_SystemError,
- "can't resize shared str objects");
- return -1;
- }
-
- /* We allocate one more byte to make sure the string is Ux0000 terminated.
- The overallocation is also used by fastsearch, which assumes that it's
- safe to look at str[length] (without making any assumptions about what
- it contains). */
-
- oldstr = unicode->str;
- unicode->str = PyObject_REALLOC(unicode->str,
- sizeof(Py_UNICODE) * (length + 1));
- if (!unicode->str) {
- unicode->str = (Py_UNICODE *)oldstr;
+ }
+
+ /* Adapted from similar code in tupleobject */
+ _Py_DEC_REFTOTAL;
+ _Py_ForgetReference(unicode);
+ v = (PyUnicodeObject *) PyObject_REALLOC((char *) unicode,
+ PyUnicodeObject_SIZE + length * sizeof(Py_UNICODE));
+ if (v == NULL) {
+ PyObject_DEL(unicode);
 PyErr_NoMemory();
- return -1;
- }
- unicode->str[length] = 0;
- unicode->length = length;
-
+ return NULL;
+ }
+ Py_CLEAR(v->defenc);
 reset:
- /* Reset the object caches */
- if (unicode->defenc) {
- Py_DECREF(unicode->defenc);
- unicode->defenc = NULL;
- }
- unicode->hash = -1;
-
- return 0;
-}
-
-/* We allocate one more byte to make sure the string is
- Ux0000 terminated; some code (e.g. new_identifier)
- relies on that.
+ Py_SIZE(v) = length;
+ v->str[length] = 0;
+ v->hash = -1;
+ _Py_NewReference(v);
+ return v;
+}
+
+/* We allocate one more byte to make sure the string is Ux0000 terminated.
+ The overallocation is also used by fastsearch, which assumes that it's
+ safe to look at str[length] (without making any assumptions about what
+ it contains). 
 
 XXX This allocator could further be enhanced by assuring that the
 free list never reduces its size below 1.
-
 */
 
 static
@@ -322,38 +339,22 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize
 }
 
 /* Unicode freelist & memory allocation */
- if (free_list) {
- unicode = free_list;
- free_list = *(PyUnicodeObject **)unicode;
- numfree--;
- if (unicode->str) {
- /* Keep-Alive optimization: we only upsize the buffer,
- never downsize it. */
- if ((unicode->length < length) && - unicode_resize(unicode, length) < 0) { - PyObject_DEL(unicode->str);
- unicode->str = NULL;
- }
- }
- else {
- size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
- unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
- }
- PyObject_INIT(unicode, &PyUnicode_Type);
- }
- else {
- size_t new_size;
- unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
- if (unicode == NULL)
- return NULL;
- new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
- unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
- }
-
- if (!unicode->str) {
- PyErr_NoMemory();
- goto onError;
- }
+ if (length < MAX_SAVED_SIZE + && (unicode = unicode_freelist[length])) { + _Py_NewReference(unicode); + unicode_freelist[length] = (PyUnicodeObject *) unicode->defenc;
+ }
+ else {
+ /* Inline PyObject_NewVar */
+ unicode = (PyUnicodeObject *) PyObject_MALLOC(
+ PyUnicodeObject_SIZE + length * sizeof(Py_UNICODE));
+ if (!unicode) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ PyObject_INIT_VAR(unicode, &PyUnicode_Type, length);
+ }
+
 /* Initialize the first element to guard against cases where
 * the caller fails before initializing str -- unicode_resize()
 * reads str[0], and the Keep-Alive optimization can keep memory
@@ -363,23 +364,18 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize
 */
 unicode->str[0] = 0;
 unicode->str[length] = 0;
- unicode->length = length;
+ Py_SIZE(unicode) = length;
 unicode->hash = -1;
 unicode->state = 0;
 unicode->defenc = NULL;
 return unicode;
-
- onError:
- /* XXX UNREF/NEWREF interface should be more symmetrical */
- _Py_DEC_REFTOTAL;
- _Py_ForgetReference((PyObject *)unicode);
- PyObject_Del(unicode);
- return NULL;
 }
 
 static
 void unicode_dealloc(register PyUnicodeObject *unicode)
 {
+ Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
+
 switch (PyUnicode_CHECK_INTERNED(unicode)) {
 case SSTATE_NOT_INTERNED:
 break;
@@ -399,28 +395,20 @@ void unicode_dealloc(register PyUnicodeO
 Py_FatalError("Inconsistent interned string state.");
 }
 
- if (PyUnicode_CheckExact(unicode) &&
- numfree < PyUnicode_MAXFREELIST) { - /* Keep-Alive optimization */ - if (unicode->length>= KEEPALIVE_SIZE_LIMIT) {
- PyObject_DEL(unicode->str);
- unicode->str = NULL;
- unicode->length = 0;
- }
- if (unicode->defenc) {
- Py_DECREF(unicode->defenc);
- unicode->defenc = NULL;
- }
- /* Add to free list */
- *(PyUnicodeObject **)unicode = free_list;
- free_list = unicode;
- numfree++;
- }
- else {
- PyObject_DEL(unicode->str);
- Py_XDECREF(unicode->defenc);
- Py_TYPE(unicode)->tp_free((PyObject *)unicode);
- }
+ Py_CLEAR(unicode->defenc);
+
+ if (PyUnicode_CheckExact(unicode) && length < MAX_SAVED_SIZE) { + PyUnicodeObject *v = unicode_freelist[length]; + if (!v &#124;&#124; CAN_SAVE(length, PyUnicode_GET_SIZE(v))) { + /* Keep track of number of items stacked on the freelist */ + Py_SIZE(unicode) = v ? PyUnicode_GET_SIZE(v) + 1 : 1; + unicode->defenc = (PyObject *) v;
+ unicode_freelist[length] = unicode;
+ return;
+ }
+ }
+
+ Py_TYPE(unicode)->tp_free((PyObject *)unicode);
 }
 
 static
@@ -439,29 +427,16 @@ int _PyUnicode_Resize(PyUnicodeObject **
 return -1;
 }
 
- /* Resizing unicode_empty and single character objects is not
- possible since these are being shared. We simply return a fresh
- copy with the same Unicode content. */
- if (v->length != length &&
- (v == unicode_empty &#124;&#124; v->length == 1)) {
- PyUnicodeObject *w = _PyUnicode_New(length);
- if (w == NULL)
- return -1;
- Py_UNICODE_COPY(w->str, v->str,
- length < v->length ? length : v->length);
- Py_DECREF(*unicode);
- *unicode = w;
- return 0;
- }
-
- /* Note that we don't have to modify *unicode for unshared Unicode
- objects, since we can modify them in-place. */
- return unicode_resize(v, length);
+ v = unicode_resize(v, length);
+ if (v == NULL)
+ return -1;
+ *unicode = v;
+ return 0;
 }
 
 int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length)
 {
- return _PyUnicode_Resize((PyUnicodeObject **)unicode, length);
+ return _PyUnicode_Resize((PyUnicodeObject **) unicode, length);
 }
 
 PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
@@ -760,14 +735,12 @@ PyUnicode_FromFormatV(const char *format
 width = (width*10) + *f++ - '0';
 while (*++f && *f != '%' && !ISALPHA((unsigned)*f))
 ;
-
 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 * they don't affect the amount of space we reserve.
 */
 if ((*f == 'l' &#124;&#124; *f == 'z') &&
 (f[1] == 'd' &#124;&#124; f[1] == 'u'))
 ++f;
-
 switch (*f) {
 case 'c':
 (void)va_arg(count, int);
@@ -898,7 +871,6 @@ PyUnicode_FromFormatV(const char *format
 string = PyUnicode_FromUnicode(NULL, n);
 if (!string)
 goto fail;
-
 s = PyUnicode_AS_UNICODE(string);
 callresult = callresults;
 
@@ -5811,13 +5783,13 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *
 /* helper macro to fixup start/end slice values */
 #define FIX_START_END(obj) \
 if (start < 0) \ - start += (obj)->length; \
+ start += PyUnicode_GET_SIZE(obj); \
 if (start < 0) \ start = 0; \ - if (end> (obj)->length) \
- end = (obj)->length; \
+ if (end> PyUnicode_GET_SIZE(obj)) \
+ end = PyUnicode_GET_SIZE(obj); \
 if (end < 0) \ - end += (obj)->length; \
+ end += PyUnicode_GET_SIZE(obj); \
 if (end < 0) \ end = 0; @@ -5842,7 +5814,7 @@ Py_ssize_t PyUnicode_Count(PyObject *str FIX_START_END(str_obj); result = stringlib_count( - str_obj->str + start, end - start, sub_obj->str, sub_obj->length
+ str_obj->str + start, end - start, sub_obj->str, PyUnicode_GET_SIZE(sub_obj)
 );
 
 Py_DECREF(sub_obj);
@@ -5894,12 +5866,12 @@ int tailmatch(PyUnicodeObject *self,
 Py_ssize_t end,
 int direction)
 {
- if (substring->length == 0)
+ if (PyUnicode_GET_SIZE(substring) == 0)
 return 1;
 
 FIX_START_END(self);
 
- end -= substring->length;
+ end -= PyUnicode_GET_SIZE(substring);
 if (end < start) return 0; @@ -5949,11 +5921,11 @@ PyObject *fixup(PyUnicodeObject *self, PyUnicodeObject *u; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
+ u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self));
 if (u == NULL)
 return NULL;
 
- Py_UNICODE_COPY(u->str, self->str, self->length);
+ Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self));
 
 if (!fixfct(u) && PyUnicode_CheckExact(self)) {
 /* fixfct should return TRUE if it modified the buffer. If
@@ -5969,7 +5941,7 @@ PyObject *fixup(PyUnicodeObject *self,
 static
 int fixupper(PyUnicodeObject *self)
 {
- Py_ssize_t len = self->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
 Py_UNICODE *s = self->str;
 int status = 0;
 
@@ -5990,7 +5962,7 @@ int fixupper(PyUnicodeObject *self)
 static
 int fixlower(PyUnicodeObject *self)
 {
- Py_ssize_t len = self->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
 Py_UNICODE *s = self->str;
 int status = 0;
 
@@ -6011,7 +5983,7 @@ int fixlower(PyUnicodeObject *self)
 static
 int fixswapcase(PyUnicodeObject *self)
 {
- Py_ssize_t len = self->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
 Py_UNICODE *s = self->str;
 int status = 0;
 
@@ -6032,7 +6004,7 @@ int fixswapcase(PyUnicodeObject *self)
 static
 int fixcapitalize(PyUnicodeObject *self)
 {
- Py_ssize_t len = self->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
 Py_UNICODE *s = self->str;
 int status = 0;
 
@@ -6211,6 +6183,7 @@ PyUnicodeObject *pad(PyUnicodeObject *se
 Py_UNICODE fill)
 {
 PyUnicodeObject *u;
+ Py_ssize_t length = PyUnicode_GET_SIZE(self);
 
 if (left < 0) left = 0; @@ -6222,18 +6195,18 @@ PyUnicodeObject *pad(PyUnicodeObject *se return self; } - if (left> PY_SSIZE_T_MAX - self->length &#124;&#124;
- right> PY_SSIZE_T_MAX - (left + self->length)) {
+ if (left> PY_SSIZE_T_MAX - length &#124;&#124;
+ right> PY_SSIZE_T_MAX - (left + length)) {
 PyErr_SetString(PyExc_OverflowError, "padded string is too long");
 return NULL;
 }
- u = _PyUnicode_New(left + self->length + right);
+ u = _PyUnicode_New(left + length + right);
 if (u) {
 if (left)
 Py_UNICODE_FILL(u->str, fill, left);
- Py_UNICODE_COPY(u->str + left, self->str, self->length);
+ Py_UNICODE_COPY(u->str + left, self->str, length);
 if (right)
- Py_UNICODE_FILL(u->str + left + self->length, fill, right);
+ Py_UNICODE_FILL(u->str + left + length, fill, right);
 }
 
 return u;
@@ -6257,7 +6230,7 @@ PyObject *split_whitespace(PyUnicodeObje
 {
 register Py_ssize_t i;
 register Py_ssize_t j;
- Py_ssize_t len = self->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
 PyObject *str;
 register const Py_UNICODE *buf = self->str;
 
@@ -6349,7 +6322,7 @@ PyObject *split_char(PyUnicodeObject *se
 {
 register Py_ssize_t i;
 register Py_ssize_t j;
- Py_ssize_t len = self->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
 PyObject *str;
 register const Py_UNICODE *buf = self->str;
 
@@ -6380,8 +6353,8 @@ PyObject *split_substring(PyUnicodeObjec
 {
 register Py_ssize_t i;
 register Py_ssize_t j;
- Py_ssize_t len = self->length;
- Py_ssize_t sublen = substring->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
+ Py_ssize_t sublen = PyUnicode_GET_SIZE(substring);
 PyObject *str;
 
 for (i = j = 0; i <= len - sublen; ) { @@ -6410,7 +6383,7 @@ PyObject *rsplit_whitespace(PyUnicodeObj { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
 PyObject *str;
 register const Py_UNICODE *buf = self->str;
 
@@ -6450,7 +6423,7 @@ PyObject *rsplit_char(PyUnicodeObject *s
 {
 register Py_ssize_t i;
 register Py_ssize_t j;
- Py_ssize_t len = self->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
 PyObject *str;
 register const Py_UNICODE *buf = self->str;
 
@@ -6483,8 +6456,8 @@ PyObject *rsplit_substring(PyUnicodeObje
 {
 register Py_ssize_t i;
 register Py_ssize_t j;
- Py_ssize_t len = self->length;
- Py_ssize_t sublen = substring->length;
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
+ Py_ssize_t sublen = PyUnicode_GET_SIZE(substring);
 PyObject *str;
 
 for (i = len - sublen, j = len; i>= 0; ) {
@@ -6528,10 +6501,10 @@ PyObject *split(PyUnicodeObject *self,
 if (substring == NULL)
 return split_whitespace(self,list,maxcount);
 
- else if (substring->length == 1)
+ else if (PyUnicode_GET_SIZE(substring) == 1)
 return split_char(self,list,substring->str[0],maxcount);
 
- else if (substring->length == 0) {
+ else if (PyUnicode_GET_SIZE(substring) == 0) {
 Py_DECREF(list);
 PyErr_SetString(PyExc_ValueError, "empty separator");
 return NULL;
@@ -6557,10 +6530,10 @@ PyObject *rsplit(PyUnicodeObject *self,
 if (substring == NULL)
 return rsplit_whitespace(self,list,maxcount);
 
- else if (substring->length == 1)
+ else if (PyUnicode_GET_SIZE(substring) == 1)
 return rsplit_char(self,list,substring->str[0],maxcount);
 
- else if (substring->length == 0) {
+ else if (PyUnicode_GET_SIZE(substring) == 0) {
 Py_DECREF(list);
 PyErr_SetString(PyExc_ValueError, "empty separator");
 return NULL;
@@ -6580,21 +6553,21 @@ PyObject *replace(PyUnicodeObject *self,
 if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; - if (str1->length == str2->length) {
+ if (PyUnicode_GET_SIZE(str1) == PyUnicode_GET_SIZE(str2)) {
 /* same length */
 Py_ssize_t i;
- if (str1->length == 1) {
+ if (PyUnicode_GET_SIZE(str1) == 1) {
 /* replace characters */
 Py_UNICODE u1, u2;
- if (!findchar(self->str, self->length, str1->str[0]))
+ if (!findchar(self->str, PyUnicode_GET_SIZE(self), str1->str[0]))
 goto nothing;
- u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
+ u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self));
 if (!u)
 return NULL;
- Py_UNICODE_COPY(u->str, self->str, self->length);
+ Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self));
 u1 = str1->str[0];
 u2 = str2->str[0];
- for (i = 0; i < u->length; i++)
+ for (i = 0; i < PyUnicode_GET_SIZE(u); i++) if (u->str[i] == u1) {
 if (--maxcount < 0) break; @@ -6602,20 +6575,20 @@ PyObject *replace(PyUnicodeObject *self, } } else { i = fastsearch( - self->str, self->length, str1->str, str1->length, FAST_SEARCH
+ self->str, PyUnicode_GET_SIZE(self), str1->str, PyUnicode_GET_SIZE(str1), FAST_SEARCH
 );
 if (i < 0) goto nothing; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
+ u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self));
 if (!u)
 return NULL;
- Py_UNICODE_COPY(u->str, self->str, self->length);
- while (i <= self->length - str1->length)
+ Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self));
+ while (i <= PyUnicode_GET_SIZE(self) - PyUnicode_GET_SIZE(str1)) if (Py_UNICODE_MATCH(self, i, str1)) { if (--maxcount < 0) break; - Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
- i += str1->length;
+ Py_UNICODE_COPY(u->str+i, str2->str, PyUnicode_GET_SIZE(str2));
+ i += PyUnicode_GET_SIZE(str1);
 } else
 i++;
 }
@@ -6626,23 +6599,23 @@ PyObject *replace(PyUnicodeObject *self,
 Py_UNICODE *p;
 
 /* replace strings */
- n = stringlib_count(self->str, self->length, str1->str, str1->length);
+ n = stringlib_count(self->str, PyUnicode_GET_SIZE(self), str1->str, PyUnicode_GET_SIZE(str1));
 if (n> maxcount)
 n = maxcount;
 if (n == 0)
 goto nothing;
- /* new_size = self->length + n * (str2->length - str1->length)); */
- delta = (str2->length - str1->length);
+ /* new_size = PyUnicode_GET_SIZE(self) + n * (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1))); */
+ delta = (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1));
 if (delta == 0) {
- new_size = self->length;
+ new_size = PyUnicode_GET_SIZE(self);
 } else {
- product = n * (str2->length - str1->length);
- if ((product / (str2->length - str1->length)) != n) {
+ product = n * (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1));
+ if ((product / (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1))) != n) {
 PyErr_SetString(PyExc_OverflowError,
 "replace string is too long");
 return NULL;
 }
- new_size = self->length + product;
+ new_size = PyUnicode_GET_SIZE(self) + product;
 if (new_size < 0) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); @@ -6654,8 +6627,8 @@ PyObject *replace(PyUnicodeObject *self, return NULL; i = 0; p = u->str;
- e = self->length - str1->length;
- if (str1->length> 0) {
+ e = PyUnicode_GET_SIZE(self) - PyUnicode_GET_SIZE(str1);
+ if (PyUnicode_GET_SIZE(str1)> 0) {
 while (n--> 0) {
 /* look for next match */
 j = i;
@@ -6672,25 +6645,25 @@ PyObject *replace(PyUnicodeObject *self,
 p += j - i;
 }
 /* copy substitution string */
- if (str2->length> 0) {
- Py_UNICODE_COPY(p, str2->str, str2->length);
- p += str2->length;
- }
- i = j + str1->length;
- }
- if (i < self->length)
+ if (PyUnicode_GET_SIZE(str2)> 0) {
+ Py_UNICODE_COPY(p, str2->str, PyUnicode_GET_SIZE(str2));
+ p += PyUnicode_GET_SIZE(str2);
+ }
+ i = j + PyUnicode_GET_SIZE(str1);
+ }
+ if (i < PyUnicode_GET_SIZE(self)) /* copy tail [i:] */ - Py_UNICODE_COPY(p, self->str+i, self->length-i);
+ Py_UNICODE_COPY(p, self->str+i, PyUnicode_GET_SIZE(self)-i);
 } else {
 /* interleave */
 while (n> 0) {
- Py_UNICODE_COPY(p, str2->str, str2->length);
- p += str2->length;
+ Py_UNICODE_COPY(p, str2->str, PyUnicode_GET_SIZE(str2));
+ p += PyUnicode_GET_SIZE(str2);
 if (--n <= 0) break; *p++ = self->str[i++];
 }
- Py_UNICODE_COPY(p, self->str+i, self->length-i);
+ Py_UNICODE_COPY(p, self->str+i, PyUnicode_GET_SIZE(self)-i);
 }
 }
 return (PyObject *) u;
@@ -6701,7 +6674,7 @@ PyObject *replace(PyUnicodeObject *self,
 Py_INCREF(self);
 return (PyObject *) self;
 }
- return PyUnicode_FromUnicode(self->str, self->length);
+ return PyUnicode_FromUnicode(self->str, PyUnicode_GET_SIZE(self));
 }
 
 /* --- Unicode Object Methods --------------------------------------------- */
@@ -6811,12 +6784,12 @@ unicode_center(PyUnicodeObject *self, Py
 if (!PyArg_ParseTuple(args, "n&#124;O&:center", &width, convert_uc, &fillchar))
 return NULL;
 
- if (self->length>= width && PyUnicode_CheckExact(self)) {
+ if (PyUnicode_GET_SIZE(self)>= width && PyUnicode_CheckExact(self)) {
 Py_INCREF(self);
 return (PyObject*) self;
 }
 
- marg = width - self->length;
+ marg = width - PyUnicode_GET_SIZE(self);
 left = marg / 2 + (marg & width & 1);
 
 return (PyObject*) pad(self, left, marg - left, fillchar);
@@ -6848,8 +6821,8 @@ unicode_compare(PyUnicodeObject *str1, P
 Py_UNICODE *s1 = str1->str;
 Py_UNICODE *s2 = str2->str;
 
- len1 = str1->length;
- len2 = str2->length;
+ len1 = PyUnicode_GET_SIZE(str1);
+ len2 = PyUnicode_GET_SIZE(str2);
 
 while (len1> 0 && len2> 0) {
 Py_UNICODE c1, c2;
@@ -6882,8 +6855,8 @@ unicode_compare(PyUnicodeObject *str1, P
 Py_UNICODE *s1 = str1->str;
 Py_UNICODE *s2 = str2->str;
 
- len1 = str1->length;
- len2 = str2->length;
+ len1 = PyUnicode_GET_SIZE(str1);
+ len2 = PyUnicode_GET_SIZE(str2);
 
 while (len1> 0 && len2> 0) {
 Py_UNICODE c1, c2;
@@ -6945,8 +6918,7 @@ PyObject *PyUnicode_RichCompare(PyObject
 
 if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
 PyObject *v;
- if (((PyUnicodeObject *) left)->length !=
- ((PyUnicodeObject *) right)->length) {
+ if (PyUnicode_GET_SIZE(left) != PyUnicode_GET_SIZE(right)) {
 if (op == Py_EQ) {
 Py_INCREF(Py_False);
 return Py_False;
@@ -7049,11 +7021,11 @@ PyObject *PyUnicode_Concat(PyObject *lef
 }
 
 /* Concat the two Unicode strings */
- w = _PyUnicode_New(u->length + v->length);
+ w = _PyUnicode_New(PyUnicode_GET_SIZE(u) + PyUnicode_GET_SIZE(v));
 if (w == NULL)
 goto onError;
- Py_UNICODE_COPY(w->str, u->str, u->length);
- Py_UNICODE_COPY(w->str + u->length, v->str, v->length);
+ Py_UNICODE_COPY(w->str, u->str, PyUnicode_GET_SIZE(u));
+ Py_UNICODE_COPY(w->str + PyUnicode_GET_SIZE(u), v->str, PyUnicode_GET_SIZE(v));
 
 Py_DECREF(u);
 Py_DECREF(v);
@@ -7116,7 +7088,7 @@ unicode_count(PyUnicodeObject *self, PyO
 
 result = PyLong_FromSsize_t(
 stringlib_count(self->str + start, end - start,
- substring->str, substring->length)
+ substring->str, PyUnicode_GET_SIZE(substring))
 );
 
 Py_DECREF(substring);
@@ -7183,7 +7155,7 @@ unicode_expandtabs(PyUnicodeObject *self
 /* First pass: determine size of output string */
 i = 0; /* chars up to and including most recent \n or \r */
 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
- e = self->str + self->length; /* end of input */
+ e = self->str + PyUnicode_GET_SIZE(self); /* end of input */
 for (p = self->str; p < e; p++) if (*p == '\t') { if (tabsize> 0) {
@@ -7215,7 +7187,7 @@ unicode_expandtabs(PyUnicodeObject *self
 
 j = 0; /* same as in first pass */
 q = u->str; /* next output char */
- qe = u->str + u->length; /* end of output */
+ qe = u->str + PyUnicode_GET_SIZE(u); /* end of output */
 
 for (p = self->str; p < e; p++) if (*p == '\t') { @@ -7281,7 +7253,7 @@ unicode_find(PyUnicodeObject *self, PyOb static PyObject * unicode_getitem(PyUnicodeObject *self, Py_ssize_t index) { - if (index < 0 &#124;&#124; index>= self->length) {
+ if (index < 0 &#124;&#124; index>= PyUnicode_GET_SIZE(self)) {
 PyErr_SetString(PyExc_IndexError, "string index out of range");
 return NULL;
 }
@@ -7716,7 +7688,7 @@ unicode_join(PyObject *self, PyObject *d
 static Py_ssize_t
 unicode_length(PyUnicodeObject *self)
 {
- return self->length;
+ return PyUnicode_GET_SIZE(self);
 }
 
 PyDoc_STRVAR(ljust__doc__,
@@ -7734,12 +7706,12 @@ unicode_ljust(PyUnicodeObject *self, PyO
 if (!PyArg_ParseTuple(args, "n&#124;O&:ljust", &width, convert_uc, &fillchar))
 return NULL;
 
- if (self->length>= width && PyUnicode_CheckExact(self)) {
+ if (PyUnicode_GET_SIZE(self)>= width && PyUnicode_CheckExact(self)) {
 Py_INCREF(self);
 return (PyObject*) self;
 }
 
- return (PyObject*) pad(self, 0, width - self->length, fillchar);
+ return (PyObject*) pad(self, 0, width - PyUnicode_GET_SIZE(self), fillchar);
 }
 
 PyDoc_STRVAR(lower__doc__,
@@ -7922,8 +7894,8 @@ unicode_repeat(PyUnicodeObject *str, Py_
 /* ensure # of chars needed doesn't overflow int and # of bytes
 * needed doesn't overflow size_t
 */
- nchars = len * str->length;
- if (nchars / len != str->length) {
+ nchars = len * PyUnicode_GET_SIZE(str);
+ if (len && nchars / len != PyUnicode_GET_SIZE(str)) {
 PyErr_SetString(PyExc_OverflowError,
 "repeated string is too long");
 return NULL;
@@ -7940,11 +7912,14 @@ unicode_repeat(PyUnicodeObject *str, Py_
 
 p = u->str;
 
- if (str->length == 1) {
+ if (PyUnicode_GET_SIZE(str) == 1 && len> 0) {
 Py_UNICODE_FILL(p, str->str[0], len);
 } else {
- Py_ssize_t done = str->length; /* number of characters copied this far */
- Py_UNICODE_COPY(p, str->str, str->length);
+ Py_ssize_t done = 0; /* number of characters copied this far */
+ if (done < nchars) { + Py_UNICODE_COPY(p, str->str, PyUnicode_GET_SIZE(str));
+ done = PyUnicode_GET_SIZE(str);
+ }
 while (done < nchars) { Py_ssize_t n = (done <= nchars-done) ? done : nchars-done; Py_UNICODE_COPY(p+done, p, n); @@ -8248,12 +8223,12 @@ unicode_rjust(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n&#124;O&:rjust", &width, convert_uc, &fillchar)) return NULL; - if (self->length>= width && PyUnicode_CheckExact(self)) {
+ if (PyUnicode_GET_SIZE(self)>= width && PyUnicode_CheckExact(self)) {
 Py_INCREF(self);
 return (PyObject*) self;
 }
 
- return (PyObject*) pad(self, width - self->length, 0, fillchar);
+ return (PyObject*) pad(self, width - PyUnicode_GET_SIZE(self), 0, fillchar);
 }
 
 PyObject *PyUnicode_Split(PyObject *s,
@@ -8594,7 +8569,7 @@ are deleted.");
 static PyObject*
 unicode_translate(PyUnicodeObject *self, PyObject *table)
 {
- return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore");
+ return PyUnicode_TranslateCharmap(self->str, PyUnicode_GET_SIZE(self), table, "ignore");
 }
 
 PyDoc_STRVAR(upper__doc__,
@@ -8624,7 +8599,7 @@ unicode_zfill(PyUnicodeObject *self, PyO
 if (!PyArg_ParseTuple(args, "n:zfill", &width))
 return NULL;
 
- if (self->length>= width) {
+ if (PyUnicode_GET_SIZE(self)>= width) {
 if (PyUnicode_CheckExact(self)) {
 Py_INCREF(self);
 return (PyObject*) self;
@@ -8636,7 +8611,7 @@ unicode_zfill(PyUnicodeObject *self, PyO
 );
 }
 
- fill = width - self->length;
+ fill = width - PyUnicode_GET_SIZE(self);
 
 u = pad(self, fill, 0, '0');
 
@@ -8652,14 +8627,6 @@ unicode_zfill(PyUnicodeObject *self, PyO
 return (PyObject*) u;
 }
 
-#if 0
-static PyObject*
-unicode_freelistsize(PyUnicodeObject *self)
-{
- return PyLong_FromLong(numfree);
-}
-#endif
-
 PyDoc_STRVAR(startswith__doc__,
 "S.startswith(prefix[, start[, end]]) -> bool\n\
 \n\
@@ -8779,8 +8746,8 @@ PyDoc_STRVAR(p_format__doc__,
 static PyObject *
 unicode__sizeof__(PyUnicodeObject *v)
 {
- return PyLong_FromSsize_t(sizeof(PyUnicodeObject) +
- sizeof(Py_UNICODE) * (v->length + 1));
+ return PyLong_FromSsize_t(PyUnicodeObject_SIZE +
+ sizeof(Py_UNICODE) * PyUnicode_GET_SIZE(v));
 }
 
 PyDoc_STRVAR(sizeof__doc__,
@@ -8789,7 +8756,7 @@ PyDoc_STRVAR(sizeof__doc__,
 static PyObject *
 unicode_getnewargs(PyUnicodeObject *v)
 {
- return Py_BuildValue("(u#)", v->str, v->length);
+ return Py_BuildValue("(u#)", v->str, PyUnicode_GET_SIZE(v));
 }
 
 
@@ -8909,7 +8876,7 @@ unicode_subscript(PyUnicodeObject* self,
 
 if (slicelength <= 0) { return PyUnicode_FromUnicode(NULL, 0); - } else if (start == 0 && step == 1 && slicelength == self->length &&
+ } else if (start == 0 && step == 1 && slicelength == PyUnicode_GET_SIZE(self) &&
 PyUnicode_CheckExact(self)) {
 Py_INCREF(self);
 return (PyObject *)self;
@@ -9543,22 +9510,13 @@ unicode_subtype_new(PyTypeObject *type, 
 if (tmp == NULL)
 return NULL;
 assert(PyUnicode_Check(tmp));
- pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
- if (pnew == NULL) {
+ pnew = (PyUnicodeObject *) type->tp_alloc(type, n = PyUnicode_GET_SIZE(tmp));
+ if (pnew != NULL) {
+ Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
+ Py_SIZE(pnew) = n;
+ pnew->hash = tmp->hash;
 Py_DECREF(tmp);
- return NULL;
- }
- pnew->str = (Py_UNICODE*) PyObject_MALLOC(sizeof(Py_UNICODE) * (n+1));
- if (pnew->str == NULL) {
- _Py_ForgetReference((PyObject *)pnew);
- PyObject_Del(pnew);
- Py_DECREF(tmp);
- return PyErr_NoMemory();
- }
- Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
- pnew->length = n;
- pnew->hash = tmp->hash;
- Py_DECREF(tmp);
+ }
 return (PyObject *)pnew;
 }
 
@@ -9573,9 +9531,9 @@ static PyObject *unicode_iter(PyObject *
 
 PyTypeObject PyUnicode_Type = {
 PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "str", /* tp_name */
- sizeof(PyUnicodeObject), /* tp_size */
- 0, /* tp_itemsize */
+ "str", /* tp_name */
+ sizeof(PyUnicodeObject), /* tp_size */
+ sizeof(Py_UNICODE), /* tp_itemsize */
 /* Slots */
 (destructor)unicode_dealloc, /* tp_dealloc */
 0, /* tp_print */
@@ -9634,8 +9592,6 @@ void _PyUnicode_Init(void)
 };
 
 /* Init the implementation */
- free_list = NULL;
- numfree = 0;
 unicode_empty = _PyUnicode_New(0);
 if (!unicode_empty)
 return;
@@ -9658,21 +9614,20 @@ void _PyUnicode_Init(void)
 int
 PyUnicode_ClearFreeList(void)
 {
- int freelist_size = numfree;
- PyUnicodeObject *u;
-
- for (u = free_list; u != NULL;) {
- PyUnicodeObject *v = u;
- u = *(PyUnicodeObject **)u;
- if (v->str)
- PyObject_DEL(v->str);
- Py_XDECREF(v->defenc);
- PyObject_Del(v);
- numfree--;
- }
- free_list = NULL;
- assert(numfree == 0);
- return freelist_size;
+ int i, freed_objects = 0;
+ for (i = 0; i < MAX_SAVED_SIZE; i++) { + PyUnicodeObject *u, *v; + u = unicode_freelist[i]; + while (u != NULL) { + v = (PyUnicodeObject *) u->defenc;
+ Py_SIZE(u) = i;
+ PyObject_DEL(u);
+ u = v;
+ freed_objects++;
+ }
+ unicode_freelist[i] = NULL;
+ }
+ return freed_objects;
 }
 
 void
@@ -9791,11 +9746,11 @@ void _Py_ReleaseInternedUnicodeStrings(v
 break;
 case SSTATE_INTERNED_IMMORTAL:
 Py_REFCNT(s) += 1;
- immortal_size += s->length;
+ immortal_size += PyUnicode_GET_SIZE(s);
 break;
 case SSTATE_INTERNED_MORTAL:
 Py_REFCNT(s) += 2;
- mortal_size += s->length;
+ mortal_size += PyUnicode_GET_SIZE(s);
 break;
 default:
 Py_FatalError("Inconsistent interned string state.");
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://bugs.python.org/file14048/unialloc5.patch">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://bugs.python.org/file14048/unialloc5.patch" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>