diff -r a88310d86455 Include/bytesobject.h --- a/Include/bytesobject.h Sun Apr 14 19:22:47 2013 +0200 +++ b/Include/bytesobject.h Mon Apr 15 23:53:39 2013 +0200 @@ -122,6 +122,65 @@ PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertTh #define F_ALT (1<<3) #define F_ZERO (1<<4) +#ifndef Py_LIMITED_API +typedef struct { + char small_buffer[500]; + + PyObject *buffer; + char *str; + Py_ssize_t size; + Py_ssize_t pos; + + Py_ssize_t min_size; + unsigned char overallocate; + /* If readonly is 1, buffer is a shared string (cannot be modified) + and size is set to 0. */ + unsigned char readonly; +} _PyBytesWriter ; + +/* Initialize a bytes writer. */ +PyAPI_FUNC(void) +_PyBytesWriter_Init(_PyBytesWriter *writer); + +/* Prepare the buffer to write 'count' bytes. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyBytesWriter_Prepare(WRITER, COUNT) \ + (((COUNT) <= (WRITER)->size - (WRITER)->pos) \ + ? 0 \ + : (((COUNT) == 0) \ + ? 0 \ + : _PyBytesWriter_PrepareInternal((WRITER), (COUNT)))) + +/* Don't call this function directly, use the _PyBytesWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyBytesWriter_PrepareInternal(_PyBytesWriter *writer, Py_ssize_t count); + +/* Append a bytes string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyBytesWriter_WriteStr(_PyBytesWriter *writer, + PyObject *str /* Bytes string */ + ); + +/* Append a byte. + Return 0 on success, or raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyBytesWriter_WriteChar(_PyBytesWriter *writer, + char byte + ); + +/* Get the value of the writer as an bytes object. Clear the + buffer of the writer. Raise an exception and return NULL on error. */ +PyAPI_FUNC(PyObject *) +_PyBytesWriter_Finish(_PyBytesWriter *writer); + +/* Deallocate memory of a writer (clear its internal buffer). */ +PyAPI_FUNC(void) +_PyBytesWriter_Dealloc(_PyBytesWriter *writer); +#endif + #ifdef __cplusplus } #endif diff -r a88310d86455 Include/unicodeobject.h --- a/Include/unicodeobject.h Sun Apr 14 19:22:47 2013 +0200 +++ b/Include/unicodeobject.h Mon Apr 15 23:53:39 2013 +0200 @@ -964,7 +964,7 @@ PyAPI_FUNC(int) Py_ssize_t len /* length in bytes */ ); -/* Get the value of the write as an Unicode string. Clear the +/* Get the value of the writer as an Unicode string. Clear the buffer of the writer. Raise an exception and return NULL on error. */ PyAPI_FUNC(PyObject *) diff -r a88310d86455 Objects/bytesobject.c --- a/Objects/bytesobject.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Objects/bytesobject.c Mon Apr 15 23:53:39 2013 +0200 @@ -2975,3 +2975,169 @@ bytes_iter(PyObject *seq) _PyObject_GC_TRACK(it); return (PyObject *)it; } + +void +_PyBytesWriter_Init(_PyBytesWriter *writer) +{ + memset(writer, 0, sizeof(*writer)); +} + +Py_LOCAL_INLINE(void) +_PyBytesWriter_Update(_PyBytesWriter *writer) +{ + if (writer->buffer != NULL) { + if (!writer->readonly) + writer->size = PyBytes_GET_SIZE(writer->buffer); + else + writer->size = 0; + writer->str = PyBytes_AS_STRING(writer->buffer); + } + else { + assert(writer->str == writer->small_buffer); + } +} + +int +_PyBytesWriter_PrepareInternal(_PyBytesWriter *writer, Py_ssize_t count) +{ + Py_ssize_t newsize; + PyObject *newbuffer; + + assert(count> 0); + + if (count> PY_SSIZE_T_MAX - writer->pos) { + PyErr_NoMemory(); + return -1; + } + newsize = writer->pos + count; + + if (writer->buffer == NULL && newsize <= sizeof(writer->small_buffer)) { + assert(writer->pos == 0); + assert(!writer->readonly); + writer->str = writer->small_buffer; + writer->size = sizeof(writer->small_buffer); + } + else if (writer->buffer == NULL) { + assert(!writer->readonly); + if (writer->overallocate) { + /* overallocate 25% to limit the number of resize */ + if (newsize <= (PY_SSIZE_T_MAX - newsize / 4)) + newsize += newsize / 4; + if (newsize < writer->min_size) + newsize = writer->min_size; + } + writer->buffer = PyBytes_FromStringAndSize(NULL, newsize); + if (writer->buffer == NULL) + return -1; + if (writer->str == writer->small_buffer) { + memcpy(PyBytes_AS_STRING(writer->buffer), + writer->str, writer->pos); + } + else { + assert(writer->pos == 0); + } + } + else if (newsize> writer->size) { + if (writer->overallocate) { + /* overallocate 25% to limit the number of resize */ + if (newsize <= (PY_SSIZE_T_MAX - newsize / 4)) + newsize += newsize / 4; + if (newsize < writer->min_size) + newsize = writer->min_size; + } + + if (writer->readonly) { + /* resize + widen */ + newbuffer = PyBytes_FromStringAndSize(NULL, newsize); + if (newbuffer == NULL) + return -1; + memcpy(PyBytes_AS_STRING(newbuffer), writer->str, writer->pos); + Py_DECREF(writer->buffer); + writer->readonly = 0; + writer->buffer = newbuffer; + } + else { + _PyBytes_Resize(&writer->buffer, newsize); + if (writer->buffer == NULL) + return -1; + } + } + else { + /* cannot happen if the _PyBytesWriter_Prepare() macro is used */ + assert(0); + } + _PyBytesWriter_Update(writer); + return 0; +} + +int +_PyBytesWriter_WriteStr(_PyBytesWriter *writer, PyObject *str) +{ + Py_ssize_t len; + + if (!PyBytes_Check(str)) { + PyErr_Format(PyExc_TypeError, + "expected bytes, %.200s found", Py_TYPE(str)->tp_name); + return -1; + } + len = PyBytes_GET_SIZE(str); + + if (len == 0) + return 0; + if (len> writer->size - writer->pos) { + if (writer->buffer == NULL && !writer->overallocate) { + Py_INCREF(str); + writer->buffer = str; + writer->readonly = 1; + _PyBytesWriter_Update(writer); + writer->pos += len; + return 0; + } + if (_PyBytesWriter_PrepareInternal(writer, len) == -1) + return -1; + } + memcpy(writer->str + writer->pos, PyBytes_AS_STRING(str), len); + writer->pos += len; + return 0; +} + +int +_PyBytesWriter_WriteChar(_PyBytesWriter *writer, char byte) +{ + if (_PyBytesWriter_Prepare(writer, 1) < 0) + return -1; + writer->str[writer->pos] = byte; + writer->pos++; + return 0; +} + +PyObject * +_PyBytesWriter_Finish(_PyBytesWriter *writer) +{ + if (writer->pos == 0) { + Py_XDECREF(writer->buffer); + return PyBytes_FromStringAndSize(NULL, 0); + } + if (writer->readonly) { + assert(PyBytes_GET_SIZE(writer->buffer) == writer->pos); + return writer->buffer; + } + if (writer->str == writer->small_buffer) { + writer->buffer = PyBytes_FromStringAndSize(writer->str, writer->pos); + } + else if (PyBytes_GET_SIZE(writer->buffer) != writer->pos) { + _PyBytes_Resize(&writer->buffer, writer->pos); + if (writer->buffer == NULL) + return NULL; + } + return writer->buffer; +} + +void +_PyBytesWriter_Dealloc(_PyBytesWriter *writer) +{ + if (writer->buffer != NULL) + Py_DECREF(writer->buffer); + _PyBytesWriter_Init(writer); +} + diff -r a88310d86455 Objects/stringlib/codecs.h --- a/Objects/stringlib/codecs.h Sun Apr 14 19:22:47 2013 +0200 +++ b/Objects/stringlib/codecs.h Mon Apr 15 23:53:39 2013 +0200 @@ -260,13 +260,8 @@ STRINGLIB(utf8_encoder)(PyObject *unicod Py_ssize_t size, const char *errors) { -#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ - Py_ssize_t i; /* index into s of next input byte */ - PyObject *result; /* result string object */ char *p; /* next free byte in output buffer */ - Py_ssize_t nallocated; /* number of result bytes allocated */ - Py_ssize_t nneeded; /* number of result bytes needed */ #if STRINGLIB_SIZEOF_CHAR> 1 PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -274,38 +269,26 @@ STRINGLIB(utf8_encoder)(PyObject *unicod #endif #if STRINGLIB_SIZEOF_CHAR == 1 const Py_ssize_t max_char_size = 2; - char stackbuf[MAX_SHORT_UNICHARS * 2]; #elif STRINGLIB_SIZEOF_CHAR == 2 const Py_ssize_t max_char_size = 3; - char stackbuf[MAX_SHORT_UNICHARS * 3]; #else /* STRINGLIB_SIZEOF_CHAR == 4 */ const Py_ssize_t max_char_size = 4; - char stackbuf[MAX_SHORT_UNICHARS * 4]; #endif + _PyBytesWriter writer; assert(size>= 0); - if (size <= MAX_SHORT_UNICHARS) { - /* Write into the stack buffer; nallocated can't overflow. - * At the end, we'll allocate exactly as much heap space as it - * turns out we need. - */ - nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int); - result = NULL; /* will allocate after we're done */ - p = stackbuf; + _PyBytesWriter_Init(&writer); + if (size * max_char_size> PY_SSIZE_T_MAX) { + PyErr_NoMemory(); + goto error; } - else { - if (size> PY_SSIZE_T_MAX / max_char_size) { - /* integer overflow */ - return PyErr_NoMemory(); - } - /* Overallocate on the heap, and give the excess back at the end. */ - nallocated = size * max_char_size; - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - return NULL; - p = PyBytes_AS_STRING(result); - } + writer.min_size = size * max_char_size; + writer.overallocate = 1; + + if (_PyBytesWriter_Prepare(&writer, writer.min_size) < 0) + goto error; + p = writer.str; for (i = 0; i < size;) { Py_UCS4 ch = data[i++]; @@ -329,6 +312,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicod Py_ssize_t newpos; Py_ssize_t repsize, k, startpos; startpos = i-1; + + + writer.pos = p - writer.str; + rep = unicode_encode_call_errorhandler( errors, &errorHandler, "utf-8", "surrogates not allowed", unicode, &exc, startpos, startpos+1, &newpos); @@ -341,29 +328,15 @@ STRINGLIB(utf8_encoder)(PyObject *unicod repsize = PyUnicode_GET_LENGTH(rep); if (repsize> max_char_size) { - Py_ssize_t offset; - - if (result == NULL) - offset = p - stackbuf; - else - offset = p - PyBytes_AS_STRING(result); - - if (nallocated> PY_SSIZE_T_MAX - repsize + max_char_size) { + if (writer.min_size> PY_SSIZE_T_MAX - (repsize - max_char_size)) { /* integer overflow */ PyErr_NoMemory(); goto error; } - nallocated += repsize - max_char_size; - if (result != NULL) { - if (_PyBytes_Resize(&result, nallocated) < 0) - goto error; - } else { - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - goto error; - Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset); - } - p = PyBytes_AS_STRING(result) + offset; + writer.min_size += (repsize - max_char_size); + if (_PyBytesWriter_Prepare(&writer, writer.min_size) < 0) + goto error; + p = writer.str + writer.pos; } if (PyBytes_Check(rep)) { @@ -391,6 +364,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicod *p++ = (char)c; } } + Py_CLEAR(rep); } else @@ -415,36 +389,22 @@ STRINGLIB(utf8_encoder)(PyObject *unicod #endif /* STRINGLIB_SIZEOF_CHAR> 2 */ #endif /* STRINGLIB_SIZEOF_CHAR> 1 */ } - - if (result == NULL) { - /* This was stack allocated. */ - nneeded = p - stackbuf; - assert(nneeded <= nallocated); - result = PyBytes_FromStringAndSize(stackbuf, nneeded); - } - else { - /* Cut back to size actually needed. */ - nneeded = p - PyBytes_AS_STRING(result); - assert(nneeded <= nallocated); - _PyBytes_Resize(&result, nneeded); - } + writer.pos = p - writer.str; #if STRINGLIB_SIZEOF_CHAR> 1 Py_XDECREF(errorHandler); Py_XDECREF(exc); #endif - return result; + return _PyBytesWriter_Finish(&writer); + error: + _PyBytesWriter_Dealloc(&writer); #if STRINGLIB_SIZEOF_CHAR> 1 - error: Py_XDECREF(rep); Py_XDECREF(errorHandler); Py_XDECREF(exc); - Py_XDECREF(result); +#endif return NULL; -#endif - -#undef MAX_SHORT_UNICHARS } /* The pattern for constructing UCS2-repeated masks. */ diff -r a88310d86455 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Objects/unicodeobject.c Mon Apr 15 23:53:39 2013 +0200 @@ -6178,12 +6178,7 @@ unicode_encode_ucs1(PyObject *unicode, Py_ssize_t pos=0, size; int kind; void *data; - /* output object */ - PyObject *res; - /* pointer into the output */ - char *str; /* current output position */ - Py_ssize_t ressize; const char *encoding = (limit == 256) ? "latin-1" : "ascii"; const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; PyObject *errorHandler = NULL; @@ -6191,21 +6186,21 @@ unicode_encode_ucs1(PyObject *unicode, /* the following variable is used for caching string comparisons * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ int known_errorHandler = -1; + _PyBytesWriter writer; if (PyUnicode_READY(unicode) == -1) return NULL; size = PyUnicode_GET_LENGTH(unicode); kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); + if (size == 0) + return PyBytes_FromStringAndSize(NULL, 0); + + _PyBytesWriter_Init(&writer); /* allocate enough for a simple encoding without replacements, if we need more, we'll resize */ - if (size == 0) - return PyBytes_FromStringAndSize(NULL, 0); - res = PyBytes_FromStringAndSize(NULL, size); - if (res == NULL) - return NULL; - str = PyBytes_AS_STRING(res); - ressize = size; + if (_PyBytesWriter_Prepare(&writer, size) < 0) + return NULL; while (pos < size) { Py_UCS4 c = PyUnicode_READ(kind, data, pos); @@ -6213,13 +6208,13 @@ unicode_encode_ucs1(PyObject *unicode, /* can we encode this? */ if (c ressize) { - if (requiredsize<2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) - goto onError; - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } + + if (repsize> 1) + writer.overallocate = 1; + if (_PyBytesWriter_Prepare(&writer, repsize) < 0) + goto onError; + /* generate replacement */ for (i = collstart; i < collend; ++i) { - str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); + c = PyUnicode_READ(kind, data, i); + writer.pos += sprintf(writer.str + writer.pos, "&#%d;", c); } pos = collend; break; @@ -6290,47 +6284,41 @@ unicode_encode_ucs1(PyObject *unicode, repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, encoding, reason, unicode, &exc, collstart, collend, &newpos); - if (repunicode == NULL || (PyUnicode_Check(repunicode) && - PyUnicode_READY(repunicode) == -1)) + if (repunicode == NULL) goto onError; + if (PyBytes_Check(repunicode)) { /* Directly copy bytes result to output. */ repsize = PyBytes_Size(repunicode); - if (repsize> 1) { - /* Make room for all additional bytes. */ - respos = str - PyBytes_AS_STRING(res); - if (_PyBytes_Resize(&res, ressize+repsize-1)) { - Py_DECREF(repunicode); - goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize += repsize-1; - } - memcpy(str, PyBytes_AsString(repunicode), repsize); - str += repsize; + + if (repsize> 1) + writer.overallocate = 1; + if (_PyBytesWriter_Prepare(&writer, repsize) < 0) + goto onError; + + memcpy(writer.str + writer.pos, + PyBytes_AsString(repunicode), repsize); + writer.pos += repsize; + Py_DECREF(repunicode); pos = newpos; - Py_DECREF(repunicode); break; } + + if (PyUnicode_READY(repunicode) == -1) + goto onError; + /* need more space? (at least enough for what we have+the replacement+the rest of the string, so we won't have to check space for encodable characters) */ - respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos+repsize+(size-collend); - if (requiredsize> ressize) { - if (requiredsize<2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) { - Py_DECREF(repunicode); - goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } + if (repsize> 1) + writer.overallocate = 1; + if (_PyBytesWriter_Prepare(&writer, repsize) < 0) + goto onError; + /* check if there is anything unencodable in the replacement and copy it to the output */ - for (i = 0; repsize-->0; ++i, ++str) { + for (i = 0; repsize-->0; ++i) { c = PyUnicode_READ_CHAR(repunicode, i); if (c>= limit) { raise_encode_exception(&exc, encoding, unicode, @@ -6338,27 +6326,21 @@ unicode_encode_ucs1(PyObject *unicode, Py_DECREF(repunicode); goto onError; } - *str = (char)c; + writer.str[writer.pos] = (char)c; + writer.pos++; } + Py_DECREF(repunicode); pos = newpos; - Py_DECREF(repunicode); - } - } - } - /* Resize if we allocated to much */ - size = str - PyBytes_AS_STRING(res); - if (size < ressize) { /* If this falls res will be NULL */ - assert(size>= 0); - if (_PyBytes_Resize(&res, size) < 0) - goto onError; + } + } } Py_XDECREF(errorHandler); Py_XDECREF(exc); - return res; + return _PyBytesWriter_Finish(&writer); onError: - Py_XDECREF(res); + _PyBytesWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; @@ -7740,18 +7722,6 @@ charmapencode_lookup(Py_UCS4 c, PyObject } } -static int -charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize) -{ - Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); - /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (_PyBytes_Resize(outobj, requiredsize)) - return -1; - return 0; -} - typedef enum charmapencode_result { enc_SUCCESS, enc_FAILED, enc_EXCEPTION } charmapencode_result; @@ -7763,54 +7733,40 @@ typedef enum charmapencode_result { reallocation error occurred. The caller must decref the result */ static charmapencode_result charmapencode_output(Py_UCS4 c, PyObject *mapping, - PyObject **outobj, Py_ssize_t *outpos) + _PyBytesWriter *writer) { PyObject *rep; - char *outstart; - Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); + char ch; if (Py_TYPE(mapping) == &EncodingMapType) { int res = encoding_map_lookup(c, mapping); - Py_ssize_t requiredsize = *outpos+1; if (res == -1) return enc_FAILED; - if (outsize outsize) - /* Make room for all additional bytes. */ - if (charmapencode_resize(res, respos, requiredsize)) { - Py_DECREF(repunicode); - return -1; - } - memcpy(PyBytes_AsString(*res) + *respos, - PyBytes_AsString(repunicode), repsize); - *respos += repsize; + if (_PyBytesWriter_WriteStr(writer, repunicode) < 0) { + Py_DECREF(repunicode); + return -1; + } + Py_DECREF(repunicode); *inpos = newpos; - Py_DECREF(repunicode); break; } /* generate replacement */ @@ -7954,7 +7900,7 @@ charmap_encoding_error( kind = PyUnicode_KIND(repunicode); for (index = 0; index < repsize; index++) { Py_UCS4 repch = PyUnicode_READ(kind, data, index); - x = charmapencode_output(repch, mapping, res, respos); + x = charmapencode_output(repch, mapping, writer); if (x==enc_EXCEPTION) { Py_DECREF(repunicode); return -1; @@ -7976,13 +7922,9 @@ PyObject * PyObject *mapping, const char *errors) { - /* output object */ - PyObject *res = NULL; /* current input position */ - Py_ssize_t inpos = 0; + Py_ssize_t inpos; Py_ssize_t size; - /* current output position */ - Py_ssize_t respos = 0; PyObject *errorHandler = NULL; PyObject *exc = NULL; /* the following variable is used for caching string comparisons @@ -7991,6 +7933,7 @@ PyObject * int known_errorHandler = -1; void *data; int kind; + _PyBytesWriter writer; if (PyUnicode_READY(unicode) == -1) return NULL; @@ -8002,25 +7945,23 @@ PyObject * if (mapping == NULL) return unicode_encode_ucs1(unicode, errors, 256); - /* allocate enough for a simple encoding without - replacements, if we need more, we'll resize */ - res = PyBytes_FromStringAndSize(NULL, size); - if (res == NULL) + _PyBytesWriter_Init(&writer); + + if (_PyBytesWriter_Prepare(&writer, size) < 0) goto onError; - if (size == 0) - return res; - + + inpos = 0; while (inpos

AltStyle によって変換されたページ (->オリジナル) /