[Python-checkins] cpython: Issue #25349: Optimize bytes % args using the new private _PyBytesWriter API

Fri Oct 9 06:21:56 EDT 2015

https://hg.python.org/cpython/rev/b2f3cbdc0f2d
changeset: 98615:b2f3cbdc0f2d
user: Victor Stinner <victor.stinner at gmail.com>
date: Fri Oct 09 11:48:06 2015 +0200
summary:
 Issue #25349: Optimize bytes % args using the new private _PyBytesWriter API
* Thanks to the _PyBytesWriter API, output smaller than 512 bytes are allocated
 on the stack and so avoid calling _PyBytes_Resize(). Because of that, change
 the default buffer size to fmtcnt instead of fmtcnt+100.
* Rely on _PyBytesWriter algorithm to overallocate the buffer instead of using
 a custom code. For example, _PyBytesWriter uses a different overallocation
 factor (25% or 50%) depending on the platform to get best performances.
* Disable overallocation for the last write.
* Replace C loops to fill characters with memset()
* Add also many comments to _PyBytes_Format()
* Remove unused FORMATBUFLEN constant
* Avoid the creation of a temporary bytes object when formatting a floating
 point number (when no custom formatting option is used)
* Fix also reference leaks on error handling
* Use Py_MEMCPY() to copy bytes between two formatters (%)
files:
 Misc/NEWS | 2 +
 Objects/bytesobject.c | 187 ++++++++++++++++++++---------
 2 files changed, 130 insertions(+), 59 deletions(-)

diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@
 Core and Builtins
 -----------------
 
+- Issue #25349: Optimize bytes % args using the new private _PyBytesWriter API.
+
 - Issue #24806: Prevent builtin types that are not allowed to be subclassed from
 being subclassed through multiple inheritance.
 
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -409,12 +409,15 @@
 
 /* Returns a new reference to a PyBytes object, or NULL on failure. */
 
-static PyObject *
-formatfloat(PyObject *v, int flags, int prec, int type)
+static char*
+formatfloat(PyObject *v, int flags, int prec, int type,
+ PyObject **p_result, _PyBytesWriter *writer, char *str,
+ Py_ssize_t prealloc)
 {
 char *p;
 PyObject *result;
 double x;
+ size_t len;
 
 x = PyFloat_AsDouble(v);
 if (x == -1.0 && PyErr_Occurred()) {
@@ -431,9 +434,23 @@
 
 if (p == NULL)
 return NULL;
- result = PyBytes_FromStringAndSize(p, strlen(p));
+
+ len = strlen(p);
+ if (writer != NULL) {
+ if ((Py_ssize_t)len > prealloc) {
+ str = _PyBytesWriter_Prepare(writer, str, len - prealloc);
+ if (str == NULL)
+ return NULL;
+ }
+ Py_MEMCPY(str, p, len);
+ str += len;
+ return str;
+ }
+
+ result = PyBytes_FromStringAndSize(p, len);
 PyMem_Free(p);
- return result;
+ *p_result = result;
+ return str;
 }
 
 static PyObject *
@@ -557,36 +574,32 @@
 return NULL;
 }
 
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
- FORMATBUFLEN is the length of the buffer in which the ints &
- chars are formatted. XXX This is a magic number. Each formatting
- routine does bounds checking to ensure no overflow, but a better
- solution may be to malloc a buffer of appropriate size for each
- format. For now, the current solution is sufficient.
-*/
-#define FORMATBUFLEN (size_t)120
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
 
 PyObject *
 _PyBytes_Format(PyObject *format, PyObject *args)
 {
 char *fmt, *res;
 Py_ssize_t arglen, argidx;
- Py_ssize_t reslen, rescnt, fmtcnt;
+ Py_ssize_t fmtcnt;
 int args_owned = 0;
- PyObject *result;
 PyObject *dict = NULL;
+ _PyBytesWriter writer;
+
 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
 PyErr_BadInternalCall();
 return NULL;
 }
 fmt = PyBytes_AS_STRING(format);
 fmtcnt = PyBytes_GET_SIZE(format);
- reslen = rescnt = fmtcnt + 100;
- result = PyBytes_FromStringAndSize((char *)NULL, reslen);
- if (result == NULL)
+
+ _PyBytesWriter_Init(&writer);
+
+ res = _PyBytesWriter_Alloc(&writer, fmtcnt);
+ if (res == NULL)
 return NULL;
- res = PyBytes_AsString(result);
+ writer.overallocate = 1;
+
 if (PyTuple_Check(args)) {
 arglen = PyTuple_GET_SIZE(args);
 argidx = 0;
@@ -600,18 +613,25 @@
 !PyByteArray_Check(args)) {
 dict = args;
 }
+
 while (--fmtcnt >= 0) {
 if (*fmt != '%') {
- if (--rescnt < 0) {
- rescnt = fmtcnt + 100;
- reslen += rescnt;
- if (_PyBytes_Resize(&result, reslen))
- return NULL;
- res = PyBytes_AS_STRING(result)
- + reslen - rescnt;
- --rescnt;
+ Py_ssize_t len;
+ char *pos;
+
+ pos = strchr(fmt + 1, '%');
+ if (pos != NULL)
+ len = pos - fmt;
+ else {
+ len = PyBytes_GET_SIZE(format);
+ len -= (fmt - PyBytes_AS_STRING(format));
 }
- *res++ = *fmt++;
+ assert(len != 0);
+
+ Py_MEMCPY(res, fmt, len);
+ res += len;
+ fmt += len;
+ fmtcnt -= (len - 1);
 }
 else {
 /* Got a format specifier */
@@ -626,6 +646,10 @@
 int sign;
 Py_ssize_t len = 0;
 char onechar; /* For byte_converter() */
+ Py_ssize_t alloc;
+#ifdef Py_DEBUG
+ char *before;
+#endif
 
 fmt++;
 if (*fmt == '(') {
@@ -673,6 +697,8 @@
 arglen = -1;
 argidx = -2;
 }
+
+ /* Parse flags. Example: "%+i" => flags=F_SIGN. */
 while (--fmtcnt >= 0) {
 switch (c = *fmt++) {
 case '-': flags |= F_LJUST; continue;
@@ -683,6 +709,8 @@
 }
 break;
 }
+
+ /* Parse width. Example: "%10s" => width=10 */
 if (c == '*') {
 v = getnextarg(args, arglen, &argidx);
 if (v == NULL)
@@ -717,6 +745,8 @@
 width = width*10 + (c - '0');
 }
 }
+
+ /* Parse precision. Example: "%.3f" => prec=3 */
 if (c == '.') {
 prec = 0;
 if (--fmtcnt >= 0)
@@ -771,6 +801,12 @@
 if (v == NULL)
 goto error;
 }
+
+ if (fmtcnt < 0) {
+ /* last writer: disable writer overallocation */
+ writer.overallocate = 0;
+ }
+
 sign = 0;
 fill = ' ';
 switch (c) {
@@ -778,6 +814,7 @@
 pbuf = "%";
 len = 1;
 break;
+
 case 'r':
 // %r is only for 2/3 code; 3 only code should use %a
 case 'a':
@@ -790,6 +827,7 @@
 if (prec >= 0 && len > prec)
 len = prec;
 break;
+
 case 's':
 // %s is only for 2/3 code; 3 only code should use %b
 case 'b':
@@ -799,6 +837,7 @@
 if (prec >= 0 && len > prec)
 len = prec;
 break;
+
 case 'i':
 case 'd':
 case 'u':
@@ -815,14 +854,24 @@
 if (flags & F_ZERO)
 fill = '0';
 break;
+
 case 'e':
 case 'E':
 case 'f':
 case 'F':
 case 'g':
 case 'G':
- temp = formatfloat(v, flags, prec, c);
- if (temp == NULL)
+ if (width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ res = formatfloat(v, flags, prec, c, NULL, &writer, res, 1);
+ if (res == NULL)
+ goto error;
+ continue;
+ }
+
+ if (!formatfloat(v, flags, prec, c, &temp, NULL, res, 1))
 goto error;
 pbuf = PyBytes_AS_STRING(temp);
 len = PyBytes_GET_SIZE(temp);
@@ -830,12 +879,14 @@
 if (flags & F_ZERO)
 fill = '0';
 break;
+
 case 'c':
 pbuf = &onechar;
 len = byte_converter(v, &onechar);
 if (!len)
 goto error;
 break;
+
 default:
 PyErr_Format(PyExc_ValueError,
 "unsupported format character '%c' (0x%x) "
@@ -845,6 +896,7 @@
 PyBytes_AsString(format)));
 goto error;
 }
+
 if (sign) {
 if (*pbuf == '-' || *pbuf == '+') {
 sign = *pbuf++;
@@ -859,29 +911,30 @@
 }
 if (width < len)
 width = len;
- if (rescnt - (sign != 0) < width) {
- reslen -= rescnt;
- rescnt = width + fmtcnt + 100;
- reslen += rescnt;
- if (reslen < 0) {
- Py_DECREF(result);
- Py_XDECREF(temp);
- return PyErr_NoMemory();
- }
- if (_PyBytes_Resize(&result, reslen)) {
- Py_XDECREF(temp);
- return NULL;
- }
- res = PyBytes_AS_STRING(result)
- + reslen - rescnt;
+
+ alloc = width;
+ if (sign != 0 && len == width)
+ alloc++;
+ if (alloc > 1) {
+ res = _PyBytesWriter_Prepare(&writer, res, alloc - 1);
+ if (res == NULL)
+ goto error;
 }
+#ifdef Py_DEBUG
+ before = res;
+#endif
+
+ /* Write the sign if needed */
 if (sign) {
 if (fill != ' ')
 *res++ = sign;
- rescnt--;
 if (width > len)
 width--;
 }
+
+ /* Write the numeric prefix for "x", "X" and "o" formats
+ if the alternate form is used.
+ For example, write "0x" for the "%#x" format. */
 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
 assert(pbuf[0] == '0');
 assert(pbuf[1] == c);
@@ -889,18 +942,21 @@
 *res++ = *pbuf++;
 *res++ = *pbuf++;
 }
- rescnt -= 2;
 width -= 2;
 if (width < 0)
 width = 0;
 len -= 2;
 }
+
+ /* Pad left with the fill character if needed */
 if (width > len && !(flags & F_LJUST)) {
- do {
- --rescnt;
- *res++ = fill;
- } while (--width > len);
+ memset(res, fill, width - len);
+ res += (width - len);
+ width = len;
 }
+
+ /* If padding with spaces: write sign if needed and/or numeric
+ prefix if the alternate form is used */
 if (fill == ' ') {
 if (sign)
 *res++ = sign;
@@ -912,13 +968,17 @@
 *res++ = *pbuf++;
 }
 }
+
+ /* Copy bytes */
 Py_MEMCPY(res, pbuf, len);
 res += len;
- rescnt -= len;
- while (--width >= len) {
- --rescnt;
- *res++ = ' ';
+
+ /* Pad right with the fill character if needed */
+ if (width > len) {
+ memset(res, ' ', width - len);
+ res += (width - len);
 }
+
 if (dict && (argidx < arglen) && c != '%') {
 PyErr_SetString(PyExc_TypeError,
 "not all arguments converted during bytes formatting");
@@ -926,22 +986,31 @@
 goto error;
 }
 Py_XDECREF(temp);
+
+#ifdef Py_DEBUG
+ /* check that we computed the exact size for this write */
+ assert((res - before) == alloc);
+#endif
 } /* '%' */
+
+ /* If overallocation was disabled, ensure that it was the last
+ write. Otherwise, we missed an optimization */
+ assert(writer.overallocate || fmtcnt < 0);
 } /* until end */
+
 if (argidx < arglen && !dict) {
 PyErr_SetString(PyExc_TypeError,
 "not all arguments converted during bytes formatting");
 goto error;
 }
+
 if (args_owned) {
 Py_DECREF(args);
 }
- if (_PyBytes_Resize(&result, reslen - rescnt))
- return NULL;
- return result;
+ return _PyBytesWriter_Finish(&writer, res);
 
 error:
- Py_DECREF(result);
+ _PyBytesWriter_Dealloc(&writer);
 if (args_owned) {
 Py_DECREF(args);
 }
-- 
Repository URL: https://hg.python.org/cpython