[Python-checkins] cpython: Optimize bytearray % args

victor.stinner python-checkins at python.org
Wed Oct 14 04:02:08 EDT 2015


https://hg.python.org/cpython/rev/f369b79c0153
changeset: 98741:f369b79c0153
user: Victor Stinner <victor.stinner at gmail.com>
date: Wed Oct 14 09:56:53 2015 +0200
summary:
 Optimize bytearray % args
Issue #25399: Don't create temporary bytes objects: modify _PyBytes_Format() to
create work directly on bytearray objects.
* Rename _PyBytes_Format() to _PyBytes_FormatEx() just in case if something
 outside CPython uses it
* _PyBytes_FormatEx() now uses (char*, Py_ssize_t) for the input string, so
 bytearray_format() doesn't need tot create a temporary input bytes object
* Add use_bytearray parameter to _PyBytes_FormatEx() which is passed to
 _PyBytesWriter, to create a bytearray buffer instead of a bytes buffer
Most formatting operations are now between 2.5 and 5 times faster.
files:
 Include/bytesobject.h | 6 +++-
 Objects/bytearrayobject.c | 22 +++-----------
 Objects/bytesobject.c | 41 +++++++++++++++-----------
 3 files changed, 33 insertions(+), 36 deletions(-)
diff --git a/Include/bytesobject.h b/Include/bytesobject.h
--- a/Include/bytesobject.h
+++ b/Include/bytesobject.h
@@ -62,7 +62,11 @@
 PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
-PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *);
+PyAPI_FUNC(PyObject*) _PyBytes_FormatEx(
+ const char *format,
+ Py_ssize_t format_len,
+ PyObject *args,
+ int use_bytearray);
 #endif
 PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
 						 const char *, Py_ssize_t,
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -282,26 +282,14 @@
 static PyObject *
 bytearray_format(PyByteArrayObject *self, PyObject *args)
 {
- PyObject *bytes_in, *bytes_out, *res;
- char *bytestring;
-
- if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
+ if (self == NULL || !PyByteArray_Check(self)) {
 PyErr_BadInternalCall();
 return NULL;
 }
- bytestring = PyByteArray_AS_STRING(self);
- bytes_in = PyBytes_FromString(bytestring);
- if (bytes_in == NULL)
- return NULL;
- bytes_out = _PyBytes_Format(bytes_in, args);
- Py_DECREF(bytes_in);
- if (bytes_out == NULL)
- return NULL;
- res = PyByteArray_FromObject(bytes_out);
- Py_DECREF(bytes_out);
- if (res == NULL)
- return NULL;
- return res;
+
+ return _PyBytes_FormatEx(PyByteArray_AS_STRING(self),
+ PyByteArray_GET_SIZE(self),
+ args, 1);
 }
 
 /* Functions stuffed into the type object */
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -568,28 +568,32 @@
 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
 
 PyObject *
-_PyBytes_Format(PyObject *format, PyObject *args)
+_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
+ PyObject *args, int use_bytearray)
 {
- char *fmt, *res;
+ const char *fmt;
+ char *res;
 Py_ssize_t arglen, argidx;
 Py_ssize_t fmtcnt;
 int args_owned = 0;
 PyObject *dict = NULL;
 _PyBytesWriter writer;
 
- if (format == NULL || !PyBytes_Check(format) || args == NULL) {
+ if (args == NULL) {
 PyErr_BadInternalCall();
 return NULL;
 }
- fmt = PyBytes_AS_STRING(format);
- fmtcnt = PyBytes_GET_SIZE(format);
+ fmt = format;
+ fmtcnt = format_len;
 
 _PyBytesWriter_Init(&writer);
+ writer.use_bytearray = use_bytearray;
 
 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
 if (res == NULL)
 return NULL;
- writer.overallocate = 1;
+ if (!use_bytearray)
+ writer.overallocate = 1;
 
 if (PyTuple_Check(args)) {
 arglen = PyTuple_GET_SIZE(args);
@@ -613,10 +617,8 @@
 pos = strchr(fmt + 1, '%');
 if (pos != NULL)
 len = pos - fmt;
- else {
- len = PyBytes_GET_SIZE(format);
- len -= (fmt - PyBytes_AS_STRING(format));
- }
+ else
+ len = format_len - (fmt - format);
 assert(len != 0);
 
 Py_MEMCPY(res, fmt, len);
@@ -644,7 +646,7 @@
 
 fmt++;
 if (*fmt == '(') {
- char *keystart;
+ const char *keystart;
 Py_ssize_t keylen;
 PyObject *key;
 int pcount = 1;
@@ -924,8 +926,7 @@
 "unsupported format character '%c' (0x%x) "
 "at index %zd",
 c, c,
- (Py_ssize_t)(fmt - 1 -
- PyBytes_AsString(format)));
+ (Py_ssize_t)(fmt - 1 - format));
 goto error;
 }
 
@@ -1028,7 +1029,7 @@
 
 /* If overallocation was disabled, ensure that it was the last
 write. Otherwise, we missed an optimization */
- assert(writer.overallocate || fmtcnt < 0);
+ assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
 } /* until end */
 
 if (argidx < arglen && !dict) {
@@ -3233,11 +3234,15 @@
 };
 
 static PyObject *
-bytes_mod(PyObject *v, PyObject *w)
+bytes_mod(PyObject *self, PyObject *args)
 {
- if (!PyBytes_Check(v))
- Py_RETURN_NOTIMPLEMENTED;
- return _PyBytes_Format(v, w);
+ if (self == NULL || !PyBytes_Check(self)) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
+ args, 0);
 }
 
 static PyNumberMethods bytes_as_number = {
-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /