diff -r 47f392d6547d -r c77116610d11 Include/bytesobject.h --- a/Include/bytesobject.h Sat Apr 20 14:08:16 2013 -0700 +++ b/Include/bytesobject.h Sun Apr 21 02:15:01 2013 +0200 @@ -122,6 +122,96 @@ PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertTh #define F_ALT (1<<3) #define F_ZERO (1<<4) +#ifndef Py_LIMITED_API +/* Bytes writer: API designed to reduce the number of memory reallocations + when the output size is unknown. + + The structure should be allocated on the stack, so no heap allocation is + done before _PyBytesWriter_Finish() for short string (bytes are written into + small_buffer, allocated on the stack). + + To reduce the number of memory reallocations, overallocate can be set to 1. + In this case, the buffer will be larger than requested size and so further + writes should not need to grow the buffer. + */ +typedef struct { + /* Current position in the buffer */ + char *str; + + /* Start of the buffer */ + char *start; + + /* End of the buffer */ + char *end; + + /* Minimum number of allocated bytes */ + Py_ssize_t min_size; + + /* Overallocate buffer size? */ + int overallocate; + + /* Buffer for large strings */ + PyObject *buffer; + + /* Buffer for short strings */ + char small_buffer[512]; +} _PyBytesWriter ; + +/* Initialize a bytes writer: use the small buffer, set min_size and + * overallocate to zero. */ +PyAPI_FUNC(void) +_PyBytesWriter_Init(_PyBytesWriter *writer); + +/* Allocate 'count' more bytes: add count to min_size and grow the buffer if + needed. + + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t count); + +/* Prepare the buffer to write 'count' bytes: check if buffer is big enough + using the current position. + + Only use this function if the write is done immediately. If the writer may + be used before the write, _PyBytesWriter_Alloc() should be used instead. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyBytesWriter_Prepare(WRITER, COUNT) \ + (((Py_ssize_t)(COUNT) <= (Py_ssize_t)((WRITER)->end - (WRITER)->str)) \ + ? 0 \ + : _PyBytesWriter_PrepareInternal((WRITER), (COUNT))) + +/* Don't call this function directly, use the _PyBytesWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyBytesWriter_PrepareInternal(_PyBytesWriter *writer, Py_ssize_t count); + +/* Write a byte. + Return 0 on success, or raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyBytesWriter_WriteByte(_PyBytesWriter *writer, + char byte + ); + +/* Write a bytes string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, + PyObject *bytes /* Bytes string */ + ); + +/* Get the value of the writer as an bytes object. Clear the + buffer of the writer. Raise an exception and return NULL on error. + + After calling this function, the writer cannot be used anymore. */ +PyAPI_FUNC(PyObject *) +_PyBytesWriter_Finish(_PyBytesWriter *writer); + +/* Deallocate memory of a writer (clear its internal buffer). */ +PyAPI_FUNC(void) +_PyBytesWriter_Dealloc(_PyBytesWriter *writer); +#endif + #ifdef __cplusplus } #endif diff -r 47f392d6547d -r c77116610d11 Misc/NEWS --- a/Misc/NEWS Sat Apr 20 14:08:16 2013 -0700 +++ b/Misc/NEWS Sun Apr 21 02:15:01 2013 +0200 @@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1? Core and Builtins ----------------- +- Issue #17742: Add a new "bytes writer API" (_PyBytesWriter), API designed to + reduce the number of memory reallocations when the output size is unknown. + - Issue #17413: sys.settrace callbacks were being passed a string instead of an exception instance for the 'value' element of the arg tuple if the exception originated from C code; now an exception instance is always provided. diff -r 47f392d6547d -r c77116610d11 Objects/bytesobject.c --- a/Objects/bytesobject.c Sat Apr 20 14:08:16 2013 -0700 +++ b/Objects/bytesobject.c Sun Apr 21 02:15:01 2013 +0200 @@ -2975,3 +2975,147 @@ bytes_iter(PyObject *seq) _PyObject_GC_TRACK(it); return (PyObject *)it; } + +void +_PyBytesWriter_Init(_PyBytesWriter *writer) +{ + writer->start = writer->small_buffer; + writer->end = writer->start + sizeof(writer->small_buffer); + writer->str = writer->start; + writer->min_size = 0; + writer->overallocate = 0; + writer->buffer = NULL; +#ifdef DEBUG + memset(writer->small_buffer, 0xff, sizeof(writer->small_buffer)); +#endif +} + +static int +_PyBytesWriter_AllocInternal(_PyBytesWriter *writer, Py_ssize_t size) +{ + Py_ssize_t pos; + + assert(size> 0); + pos = writer->str - writer->start; + + if (writer->buffer == NULL) { + assert(writer->start == writer->small_buffer); + assert(size> sizeof(writer->small_buffer)); + + /* overallocate 25% to limit the number of resize */ + if (writer->overallocate && size <= (PY_SSIZE_T_MAX - size / 4)) + size += size / 4; + if (size < writer->min_size) + size = writer->min_size; + + writer->buffer = PyBytes_FromStringAndSize(NULL, size); + if (writer->buffer == NULL) + return -1; + memcpy(PyBytes_AS_STRING(writer->buffer), + writer->small_buffer, pos); + } + else { + assert (size> (writer->end - writer->start)); + + /* overallocate 25% to limit the number of resize */ + if (writer->overallocate && size <= (PY_SSIZE_T_MAX - size / 4)) + size += size / 4; + if (size < writer->min_size) + size = writer->min_size; + + _PyBytes_Resize(&writer->buffer, size); + if (writer->buffer == NULL) + return -1; + } + writer->start = PyBytes_AS_STRING(writer->buffer); + writer->end = writer->start + PyBytes_GET_SIZE(writer->buffer); + writer->str = writer->start + pos; + return 0; +} + +int +_PyBytesWriter_PrepareInternal(_PyBytesWriter *writer, Py_ssize_t count) +{ + Py_ssize_t pos; + + assert(count> 0); + pos = writer->str - writer->start; + + if (count> PY_SSIZE_T_MAX - pos) { + PyErr_NoMemory(); + return -1; + } + return _PyBytesWriter_AllocInternal(writer, pos + count); +} + +int +_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t count) +{ + Py_ssize_t pos; + + if (count> PY_SSIZE_T_MAX - writer->min_size) { + PyErr_NoMemory(); + return -1; + } + writer->min_size += count; + + if (writer->min_size <= (Py_ssize_t)(writer->end - writer->start)) + return 0; + + return _PyBytesWriter_AllocInternal(writer, writer->min_size); +} + +int +_PyBytesWriter_WriteByte(_PyBytesWriter *writer, char byte) +{ + if (_PyBytesWriter_Prepare(writer, 1) < 0) + return -1; + *writer->str = byte; + writer->str++; + return 0; +} + +int +_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, PyObject *bytes) +{ + Py_ssize_t len; + + if (!PyBytes_Check(bytes)) { + PyErr_Format(PyExc_TypeError, + "expected bytes, %.200s found", Py_TYPE(bytes)->tp_name); + return -1; + } + len = PyBytes_GET_SIZE(bytes); + if (len == 0) + return 0; + + if (_PyBytesWriter_Prepare(writer, len) < 0) + return -1; + memcpy(writer->str, PyBytes_AS_STRING(bytes), len); + writer->str += len; + return 0; +} + +PyObject * +_PyBytesWriter_Finish(_PyBytesWriter *writer) +{ + Py_ssize_t pos = writer->str - writer->start; + if (pos == 0) { + Py_XDECREF(writer->buffer); + return PyBytes_FromStringAndSize(NULL, 0); + } + if (writer->start == writer->small_buffer) { + writer->buffer = PyBytes_FromStringAndSize(writer->start, pos); + } + else if (PyBytes_GET_SIZE(writer->buffer) != pos) { + _PyBytes_Resize(&writer->buffer, pos); + } + return writer->buffer; +} + +void +_PyBytesWriter_Dealloc(_PyBytesWriter *writer) +{ + Py_XDECREF(writer->buffer); +} + diff -r 47f392d6547d -r c77116610d11 Objects/stringlib/codecs.h --- a/Objects/stringlib/codecs.h Sat Apr 20 14:08:16 2013 -0700 +++ b/Objects/stringlib/codecs.h Sun Apr 21 02:15:01 2013 +0200 @@ -260,13 +260,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicod Py_ssize_t size, const char *errors) { -#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ - Py_ssize_t i; /* index into s of next input byte */ - PyObject *result; /* result string object */ - char *p; /* next free byte in output buffer */ - Py_ssize_t nallocated; /* number of result bytes allocated */ - Py_ssize_t nneeded; /* number of result bytes needed */ #if STRINGLIB_SIZEOF_CHAR> 1 PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -274,45 +268,29 @@ STRINGLIB(utf8_encoder)(PyObject *unicod #endif #if STRINGLIB_SIZEOF_CHAR == 1 const Py_ssize_t max_char_size = 2; - char stackbuf[MAX_SHORT_UNICHARS * 2]; #elif STRINGLIB_SIZEOF_CHAR == 2 const Py_ssize_t max_char_size = 3; - char stackbuf[MAX_SHORT_UNICHARS * 3]; #else /* STRINGLIB_SIZEOF_CHAR == 4 */ const Py_ssize_t max_char_size = 4; - char stackbuf[MAX_SHORT_UNICHARS * 4]; #endif + _PyBytesWriter writer; assert(size>= 0); - if (size <= MAX_SHORT_UNICHARS) { - /* Write into the stack buffer; nallocated can't overflow. - * At the end, we'll allocate exactly as much heap space as it - * turns out we need. - */ - nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int); - result = NULL; /* will allocate after we're done */ - p = stackbuf; + _PyBytesWriter_Init(&writer); + if (size> PY_SSIZE_T_MAX / max_char_size) { + PyErr_NoMemory(); + goto error; } - else { - if (size> PY_SSIZE_T_MAX / max_char_size) { - /* integer overflow */ - return PyErr_NoMemory(); - } - /* Overallocate on the heap, and give the excess back at the end. */ - nallocated = size * max_char_size; - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - return NULL; - p = PyBytes_AS_STRING(result); - } + if (_PyBytesWriter_Alloc(&writer, size * max_char_size) < 0) + goto error; for (i = 0; i < size;) { Py_UCS4 ch = data[i++]; if (ch < 0x80) { /* Encode ASCII */ - *p++ = (char) ch; + *writer.str++ = (char) ch; } else @@ -321,14 +299,16 @@ STRINGLIB(utf8_encoder)(PyObject *unicod #endif { /* Encode Latin-1 */ - *p++ = (char)(0xc0 | (ch>> 6)); - *p++ = (char)(0x80 | (ch & 0x3f)); + *writer.str++ = (char)(0xc0 | (ch>> 6)); + *writer.str++ = (char)(0x80 | (ch & 0x3f)); } #if STRINGLIB_SIZEOF_CHAR> 1 else if (Py_UNICODE_IS_SURROGATE(ch)) { Py_ssize_t newpos; Py_ssize_t repsize, k, startpos; + startpos = i-1; + rep = unicode_encode_call_errorhandler( errors, &errorHandler, "utf-8", "surrogates not allowed", unicode, &exc, startpos, startpos+1, &newpos); @@ -341,35 +321,15 @@ STRINGLIB(utf8_encoder)(PyObject *unicod repsize = PyUnicode_GET_LENGTH(rep); if (repsize> max_char_size) { - Py_ssize_t offset; - - if (result == NULL) - offset = p - stackbuf; - else - offset = p - PyBytes_AS_STRING(result); - - if (nallocated> PY_SSIZE_T_MAX - repsize + max_char_size) { - /* integer overflow */ - PyErr_NoMemory(); + writer.overallocate = 1; + if (_PyBytesWriter_Alloc(&writer, repsize - max_char_size) < 0) goto error; - } - nallocated += repsize - max_char_size; - if (result != NULL) { - if (_PyBytes_Resize(&result, nallocated) < 0) - goto error; - } else { - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - goto error; - Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset); - } - p = PyBytes_AS_STRING(result) + offset; } if (PyBytes_Check(rep)) { char *prep = PyBytes_AS_STRING(rep); for(k = repsize; k> 0; k--) - *p++ = *prep++; + *writer.str++ = *prep++; } else /* rep is unicode */ { enum PyUnicode_Kind repkind; void *repdata; @@ -388,7 +348,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicod "surrogates not allowed"); goto error; } - *p++ = (char)c; + *writer.str++ = (char)c; } } Py_CLEAR(rep); @@ -398,53 +358,38 @@ STRINGLIB(utf8_encoder)(PyObject *unicod if (ch < 0x10000) #endif { - *p++ = (char)(0xe0 | (ch>> 12)); - *p++ = (char)(0x80 | ((ch>> 6) & 0x3f)); - *p++ = (char)(0x80 | (ch & 0x3f)); + *writer.str++ = (char)(0xe0 | (ch>> 12)); + *writer.str++ = (char)(0x80 | ((ch>> 6) & 0x3f)); + *writer.str++ = (char)(0x80 | (ch & 0x3f)); } #if STRINGLIB_SIZEOF_CHAR> 2 else /* ch>= 0x10000 */ { assert(ch <= MAX_UNICODE); /* Encode UCS4 Unicode ordinals */ - *p++ = (char)(0xf0 | (ch>> 18)); - *p++ = (char)(0x80 | ((ch>> 12) & 0x3f)); - *p++ = (char)(0x80 | ((ch>> 6) & 0x3f)); - *p++ = (char)(0x80 | (ch & 0x3f)); + *writer.str++ = (char)(0xf0 | (ch>> 18)); + *writer.str++ = (char)(0x80 | ((ch>> 12) & 0x3f)); + *writer.str++ = (char)(0x80 | ((ch>> 6) & 0x3f)); + *writer.str++ = (char)(0x80 | (ch & 0x3f)); } #endif /* STRINGLIB_SIZEOF_CHAR> 2 */ #endif /* STRINGLIB_SIZEOF_CHAR> 1 */ } - if (result == NULL) { - /* This was stack allocated. */ - nneeded = p - stackbuf; - assert(nneeded <= nallocated); - result = PyBytes_FromStringAndSize(stackbuf, nneeded); - } - else { - /* Cut back to size actually needed. */ - nneeded = p - PyBytes_AS_STRING(result); - assert(nneeded <= nallocated); - _PyBytes_Resize(&result, nneeded); - } - #if STRINGLIB_SIZEOF_CHAR> 1 Py_XDECREF(errorHandler); Py_XDECREF(exc); #endif - return result; + return _PyBytesWriter_Finish(&writer); + error: + _PyBytesWriter_Dealloc(&writer); #if STRINGLIB_SIZEOF_CHAR> 1 - error: Py_XDECREF(rep); Py_XDECREF(errorHandler); Py_XDECREF(exc); - Py_XDECREF(result); +#endif return NULL; -#endif - -#undef MAX_SHORT_UNICHARS } /* The pattern for constructing UCS2-repeated masks. */ diff -r 47f392d6547d -r c77116610d11 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Apr 20 14:08:16 2013 -0700 +++ b/Objects/unicodeobject.c Sun Apr 21 02:15:01 2013 +0200 @@ -6182,12 +6182,7 @@ unicode_encode_ucs1(PyObject *unicode, Py_ssize_t pos=0, size; int kind; void *data; - /* output object */ - PyObject *res; - /* pointer into the output */ - char *str; /* current output position */ - Py_ssize_t ressize; const char *encoding = (limit == 256) ? "latin-1" : "ascii"; const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; PyObject *errorHandler = NULL; @@ -6195,21 +6190,21 @@ unicode_encode_ucs1(PyObject *unicode, /* the following variable is used for caching string comparisons * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ int known_errorHandler = -1; + _PyBytesWriter writer; if (PyUnicode_READY(unicode) == -1) return NULL; size = PyUnicode_GET_LENGTH(unicode); kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); + if (size == 0) + return PyBytes_FromStringAndSize(NULL, 0); + + _PyBytesWriter_Init(&writer); /* allocate enough for a simple encoding without replacements, if we need more, we'll resize */ - if (size == 0) - return PyBytes_FromStringAndSize(NULL, 0); - res = PyBytes_FromStringAndSize(NULL, size); - if (res == NULL) - return NULL; - str = PyBytes_AS_STRING(res); - ressize = size; + if (_PyBytesWriter_Alloc(&writer, size) < 0) + return NULL; while (pos < size) { Py_UCS4 c = PyUnicode_READ(kind, data, pos); @@ -6217,13 +6212,12 @@ unicode_encode_ucs1(PyObject *unicode, /* can we encode this? */ if (c ressize) { - if (requiredsize<2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) - goto onError; - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } + + writer.overallocate = 1; + if (_PyBytesWriter_Alloc(&writer, repsize - 1) < 0) + goto onError; + /* generate replacement */ for (i = collstart; i < collend; ++i) { - str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); + c = PyUnicode_READ(kind, data, i); + writer.str += sprintf(writer.str, "&#%d;", c); } pos = collend; break; @@ -6294,75 +6285,65 @@ unicode_encode_ucs1(PyObject *unicode, repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, encoding, reason, unicode, &exc, collstart, collend, &newpos); - if (repunicode == NULL || (PyUnicode_Check(repunicode) && - PyUnicode_READY(repunicode) == -1)) + if (repunicode == NULL) goto onError; + if (PyBytes_Check(repunicode)) { /* Directly copy bytes result to output. */ repsize = PyBytes_Size(repunicode); - if (repsize> 1) { - /* Make room for all additional bytes. */ - respos = str - PyBytes_AS_STRING(res); - if (_PyBytes_Resize(&res, ressize+repsize-1)) { + if (repsize != 0) { + if (repsize> 1) + writer.overallocate = 1; + if (_PyBytesWriter_Alloc(&writer, repsize - 1) < 0) + goto onError; + + memcpy(writer.str, + PyBytes_AsString(repunicode), repsize); + writer.str += repsize; + } + Py_DECREF(repunicode); + pos = newpos; + break; + } + + if (PyUnicode_READY(repunicode) == -1) + goto onError; + + /* need more space? (at least enough for what we + have+the replacement+the rest of the string, so + we won't have to check space for encodable characters) */ + repsize = PyUnicode_GET_LENGTH(repunicode); + if (repsize != 0) { + if (repsize> 1) + writer.overallocate = 1; + if (_PyBytesWriter_Alloc(&writer, repsize - 1) < 0) + goto onError; + + /* check if there is anything unencodable in the replacement + and copy it to the output */ + for (i = 0; repsize-->0; ++i) { + c = PyUnicode_READ_CHAR(repunicode, i); + if (c>= limit) { + raise_encode_exception(&exc, encoding, unicode, + pos, pos+1, reason); Py_DECREF(repunicode); goto onError; } - str = PyBytes_AS_STRING(res) + respos; - ressize += repsize-1; + *writer.str++ = (char)c; } - memcpy(str, PyBytes_AsString(repunicode), repsize); - str += repsize; - pos = newpos; - Py_DECREF(repunicode); - break; } - /* need more space? (at least enough for what we - have+the replacement+the rest of the string, so - we won't have to check space for encodable characters) */ - respos = str - PyBytes_AS_STRING(res); - repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos+repsize+(size-collend); - if (requiredsize> ressize) { - if (requiredsize<2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) { - Py_DECREF(repunicode); - goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } - /* check if there is anything unencodable in the replacement - and copy it to the output */ - for (i = 0; repsize-->0; ++i, ++str) { - c = PyUnicode_READ_CHAR(repunicode, i); - if (c>= limit) { - raise_encode_exception(&exc, encoding, unicode, - pos, pos+1, reason); - Py_DECREF(repunicode); - goto onError; - } - *str = (char)c; - } + Py_DECREF(repunicode); pos = newpos; - Py_DECREF(repunicode); - } - } - } - /* Resize if we allocated to much */ - size = str - PyBytes_AS_STRING(res); - if (size < ressize) { /* If this falls res will be NULL */ - assert(size>= 0); - if (_PyBytes_Resize(&res, size) < 0) - goto onError; + } + } } Py_XDECREF(errorHandler); Py_XDECREF(exc); - return res; + return _PyBytesWriter_Finish(&writer); onError: - Py_XDECREF(res); + _PyBytesWriter_Dealloc(&writer); Py_XDECREF(errorHandler); Py_XDECREF(exc); return NULL; @@ -7781,18 +7762,6 @@ charmapencode_lookup(Py_UCS4 c, PyObject } } -static int -charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize) -{ - Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); - /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (_PyBytes_Resize(outobj, requiredsize)) - return -1; - return 0; -} - typedef enum charmapencode_result { enc_SUCCESS, enc_FAILED, enc_EXCEPTION } charmapencode_result; @@ -7804,54 +7773,39 @@ typedef enum charmapencode_result { reallocation error occurred. The caller must decref the result */ static charmapencode_result charmapencode_output(Py_UCS4 c, PyObject *mapping, - PyObject **outobj, Py_ssize_t *outpos) + _PyBytesWriter *writer) { PyObject *rep; - char *outstart; - Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); if (Py_TYPE(mapping) == &EncodingMapType) { int res = encoding_map_lookup(c, mapping); - Py_ssize_t requiredsize = *outpos+1; if (res == -1) return enc_FAILED; - if (outsizeoverallocate = 1; for (cp = buffer; *cp; ++cp) { - x = charmapencode_output(*cp, mapping, res, respos); + x = charmapencode_output(*cp, mapping, writer); if (x==enc_EXCEPTION) return -1; else if (x==enc_FAILED) { @@ -7967,22 +7922,14 @@ charmap_encoding_error( if (repunicode == NULL) return -1; if (PyBytes_Check(repunicode)) { - /* Directly copy bytes result to output. */ - Py_ssize_t outsize = PyBytes_Size(*res); - Py_ssize_t requiredsize; - repsize = PyBytes_Size(repunicode); - requiredsize = *respos + repsize; - if (requiredsize> outsize) - /* Make room for all additional bytes. */ - if (charmapencode_resize(res, respos, requiredsize)) { - Py_DECREF(repunicode); - return -1; - } - memcpy(PyBytes_AsString(*res) + *respos, - PyBytes_AsString(repunicode), repsize); - *respos += repsize; + if (PyBytes_GET_SIZE(repunicode)> 1) + writer->overallocate = 1; + if (_PyBytesWriter_WriteBytes(writer, repunicode) < 0) { + Py_DECREF(repunicode); + return -1; + } + Py_DECREF(repunicode); *inpos = newpos; - Py_DECREF(repunicode); break; } /* generate replacement */ @@ -7991,11 +7938,13 @@ charmap_encoding_error( return -1; } repsize = PyUnicode_GET_LENGTH(repunicode); + if (repsize> 1) + writer->overallocate = 1; data = PyUnicode_DATA(repunicode); kind = PyUnicode_KIND(repunicode); for (index = 0; index < repsize; index++) { Py_UCS4 repch = PyUnicode_READ(kind, data, index); - x = charmapencode_output(repch, mapping, res, respos); + x = charmapencode_output(repch, mapping, writer); if (x==enc_EXCEPTION) { Py_DECREF(repunicode); return -1; @@ -8017,13 +7966,9 @@ PyObject * PyObject *mapping, const char *errors) { - /* output object */ - PyObject *res = NULL; /* current input position */ - Py_ssize_t inpos = 0; + Py_ssize_t inpos; Py_ssize_t size; - /* current output position */ - Py_ssize_t respos = 0; PyObject *errorHandler = NULL; PyObject *exc = NULL; /* the following variable is used for caching string comparisons @@ -8032,6 +7977,7 @@ PyObject * int known_errorHandler = -1; void *data; int kind; + _PyBytesWriter writer; if (PyUnicode_READY(unicode) == -1) return NULL; @@ -8043,25 +7989,25 @@ PyObject * if (mapping == NULL) return unicode_encode_ucs1(unicode, errors, 256); + _PyBytesWriter_Init(&writer); + /* allocate enough for a simple encoding without replacements, if we need more, we'll resize */ - res = PyBytes_FromStringAndSize(NULL, size); - if (res == NULL) + if (_PyBytesWriter_Alloc(&writer, size) < 0) goto onError; - if (size == 0) - return res; - + + inpos = 0; while (inpos

AltStyle によって変換されたページ (->オリジナル) /